Apply Functions
tapply
The documentation definition for tapply
is a bit more specific than the others, where the arguments are now (X, INDEX, FUN)
, with X
being an object where the split
function applies, INDEX
is a factor by which X
is grouped, and FUN
is function as before.
To simplify this definition, we can say tapply
applies FUN
to X
when X
is grouped by INDEX
.
Examples
Using the itcont1980 data, show the top 20 city-and-state pairs, according to the amount of money donated.
Click to see solution
library(data.table)
myDF <- fread("/anvil/projects/tdm/data/election/itcont1980.txt", quote="")
names(myDF) <- c("CMTE_ID", "AMNDT_IND", "RPT_TP", "TRANSACTION_PGI", "IMAGE_NUM",
"TRANSACTION_TP", "ENTITY_TP", "NAME", "CITY", "STATE", "ZIP_CODE", "EMPLOYER", "OCCUPATION",
"TRANSACTION_DT", "TRANSACTION_AMT", "OTHER_ID", "TRAN_ID", "FILE_NUM", "MEMO_CD", "MEMO_TEXT",
"SUB_ID")
head(sort(tapply(myDF$TRANSACTION_AMT, paste(myDF$CITY, myDF$STATE, sep=", "),sum, na.rm=TRUE),
decreasing=TRUE), n=20)
, 17299729 NEW YORK, NY 11345027 HOUSTON, TX 7606806 DALLAS, TX 4748262 LOS ANGELES, CA 4569952 WASHINGTON, DC 4273606 CHICAGO, IL 3179470 SAN FRANCISCO, CA 2061441 BEVERLY HILLS, CA 2053148 ATLANTA, GA 1892356 OKLAHOMA CITY, OK 1751431 ST LOUIS, MO 1639570 SAN ANTONIO, TX 1583292 MIAMI, FL 1541867 TULSA, OK 1341956 GREENWICH, CT 1340112 WASHINGTOM, DC 1300341 MIDLAND, TX 1210584 DENVER, CO 1183471 CINCINNATI, OH 1096983
Use the sapply function to run this function on each election year (in other words, the even numbered years) from 1980 to 2018 to get the total amount of all the donations from all of the states during an election year. Use the tapply function to add up the amount of donations in each state, across all of the years.
mydonations <- function(myyear) {
myDF <- fread(paste0("/anvil/projects/tdm/data/election/itcont", myyear, ".txt"), quote="", select = c(10,15))
names(myDF) <- c("state", "donation")
myvalue <- tapply(myDF$donation, myDF$state, sum)
return(myvalue)
}
Click to see solution
mydonations <- function(myyear) {
myDF <- fread(paste0("/anvil/projects/tdm/data/election/itcont", myyear, ".txt"), quote="", select = c(10,15))
names(myDF) <- c("state", "donation")
myvalue <- tapply(myDF$donation, myDF$state, sum)
return(myvalue)
}
library(data.table)
myresults <- sapply( seq(1980,2018,by=2), mydonations )
v <- unlist(myresults)
tapply(v, names(v), sum)
sapply
sapply
will function identically to lapply
unless the output can be simplified, in which case sapply
executes that simplification. The following occurs when we run sapply
in place of lapply
on our squares
vector.
Examples
Use the sapply function to run this function on each election year (in other words, the even numbered years) from 1980 to 2018 to obtain the total amount of the donations from Indiana during federal election campaigns.
myindydonations <- function(myyear) {
myDF <- fread(paste0("/anvil/projects/tdm/data/election/itcont", myyear, ".txt"), quote="", select = c(10,15))
names(myDF) <- c("state", "donation")
myvalue <- tapply(myDF$donation, myDF$state, sum)['IN']
names(myvalue) <- myyear
return(myvalue)
}
Click to see solution
myindydonations <- function(myyear) {
myDF <- fread(paste0("/anvil/projects/tdm/data/election/itcont", myyear, ".txt"), quote="", select = c(10,15))
names(myDF) <- c("state", "donation")
myvalue <- tapply(myDF$donation, myDF$state, sum)['IN']
names(myvalue) <- myyear
return(myvalue)
}
library(data.table)
myresults <- sapply( seq(1980,2018,by=2), myindydonations )
Use the sapply function to run this function on each election year (in other words, the even numbered years) from 1980 to 2018 to get the total amount of all the donations from all of the states during an election year.
mydonations <- function(myyear) {
myDF <- fread(paste0("/anvil/projects/tdm/data/election/itcont", myyear, ".txt"), quote="", select = c(10,15))
names(myDF) <- c("state", "donation")
myvalue <- tapply(myDF$donation, myDF$state, sum)
return(myvalue)
}
Click to see solution
mydonations <- function(myyear) {
myDF <- fread(paste0("/anvil/projects/tdm/data/election/itcont", myyear, ".txt"), quote="", select = c(10,15))
names(myDF) <- c("state", "donation")
myvalue <- tapply(myDF$donation, myDF$state, sum)
return(myvalue)
}
library(data.table)
myresults <- sapply( seq(1980,2018,by=2), mydonations )
Use the sapply function to plot myindycities for each of the even-numbered election years 1984 to 1994.
myindycities <- function(myyear) {
myDF <- fread(paste0("/anvil/projects/tdm/data/election/itcont", myyear, ".txt"), quote="", select = c(9, 10,15))
names(myDF) <- c("city", "state", "donation")
myDF <- myDF[myDF$state == "IN", ]
city_donations <- tapply(myDF$donation, myDF$city, sum)
sorted_cities <- sort(city_donations, decreasing = TRUE)
return(head(sorted_cities, 10))
}
myresults <- lapply(seq(1984, 1994, by=2), myindycities)
names(myresults) <- seq(1984, 1994, by=2)
Click to see solution
myindycities <- function(myyear) {
myDF <- fread(paste0("/anvil/projects/tdm/data/election/itcont", myyear, ".txt"), quote="", select = c(9, 10,15))
names(myDF) <- c("city", "state", "donation")
myDF <- myDF[myDF$state == "IN", ]
city_donations <- tapply(myDF$donation, myDF$city, sum)
sorted_cities <- sort(city_donations, decreasing = TRUE)
return(head(sorted_cities, 10))
}
myresults <- lapply(seq(1984, 1994, by=2), myindycities)
names(myresults) <- seq(1984, 1994, by=2)
par(mfrow = c(3, 2))
sapply(names(myresults), function(year) {
dotchart(myresults[[year]], main = paste("Donations by City -", year), xlab = "Total Donations", ylab = "City")
})
lapply
lapply
applies a function to each element of a list, then returns a list that’s been altered by the function. Since there is only one dimension in a list, the MARGIN
parameter does not apply.
Examples
Use the list apply function (lapply) to run the function myindycities on each of the even-numbered election years 1984 to 1994.
myindycities <- function(myyear) {
myDF <- fread(paste0("/anvil/projects/tdm/data/election/itcont", myyear, ".txt"), quote="", select = c(9, 10,15))
names(myDF) <- c("city", "state", "donation")
myDF <- myDF[myDF$state == "IN", ]
city_donations <- tapply(myDF$donation, myDF$city, sum)
sorted_cities <- sort(city_donations, decreasing = TRUE)
return(head(sorted_cities, 10))
}
Click to see solution
myindycities <- function(myyear) {
myDF <- fread(paste0("/anvil/projects/tdm/data/election/itcont", myyear, ".txt"), quote="", select = c(9, 10,15))
names(myDF) <- c("city", "state", "donation")
myDF <- myDF[myDF$state == "IN", ]
city_donations <- tapply(myDF$donation, myDF$city, sum)
sorted_cities <- sort(city_donations, decreasing = TRUE)
return(head(sorted_cities, 10))
}
myresults <- lapply( seq(1984,1994,by=2), myindycities )
names(myresults) <- seq(1984,1994,by=2)
myresults