Apply Functions

tapply

The documentation definition for tapply is a bit more specific than the others, where the arguments are now (X, INDEX, FUN), with X being an object where the split function applies, INDEX is a factor by which X is grouped, and FUN is function as before.

To simplify this definition, we can say tapply applies FUN to X when X is grouped by INDEX.

Examples

Using the itcont1980 data, show the top 20 city-and-state pairs, according to the amount of money donated.

Click to see solution
library(data.table)
myDF <- fread("/anvil/projects/tdm/data/election/itcont1980.txt", quote="")
names(myDF) <- c("CMTE_ID", "AMNDT_IND", "RPT_TP", "TRANSACTION_PGI", "IMAGE_NUM",
                 "TRANSACTION_TP", "ENTITY_TP", "NAME", "CITY", "STATE", "ZIP_CODE", "EMPLOYER", "OCCUPATION",
                 "TRANSACTION_DT", "TRANSACTION_AMT", "OTHER_ID", "TRAN_ID", "FILE_NUM", "MEMO_CD", "MEMO_TEXT",
                 "SUB_ID")

head(sort(tapply(myDF$TRANSACTION_AMT, paste(myDF$CITY, myDF$STATE, sep=", "),sum, na.rm=TRUE),
          decreasing=TRUE), n=20)
,
    17299729
NEW YORK, NY
    11345027
HOUSTON, TX
    7606806
DALLAS, TX
    4748262
LOS ANGELES, CA
    4569952
WASHINGTON, DC
    4273606
CHICAGO, IL
    3179470
SAN FRANCISCO, CA
    2061441
BEVERLY HILLS, CA
    2053148
ATLANTA, GA
    1892356
OKLAHOMA CITY, OK
    1751431
ST LOUIS, MO
    1639570
SAN ANTONIO, TX
    1583292
MIAMI, FL
    1541867
TULSA, OK
    1341956
GREENWICH, CT
    1340112
WASHINGTOM, DC
    1300341
MIDLAND, TX
    1210584
DENVER, CO
    1183471
CINCINNATI, OH
    1096983

Use the sapply function to run this function on each election year (in other words, the even numbered years) from 1980 to 2018 to get the total amount of all the donations from all of the states during an election year. Use the tapply function to add up the amount of donations in each state, across all of the years.

mydonations <- function(myyear) {
    myDF <- fread(paste0("/anvil/projects/tdm/data/election/itcont", myyear, ".txt"), quote="", select = c(10,15))
    names(myDF) <- c("state", "donation")
    myvalue <- tapply(myDF$donation, myDF$state, sum)
    return(myvalue)
}
Click to see solution
mydonations <- function(myyear) {
    myDF <- fread(paste0("/anvil/projects/tdm/data/election/itcont", myyear, ".txt"), quote="", select = c(10,15))
    names(myDF) <- c("state", "donation")
    myvalue <- tapply(myDF$donation, myDF$state, sum)
    return(myvalue)
}

library(data.table)
myresults <- sapply( seq(1980,2018,by=2), mydonations )

v <- unlist(myresults)
tapply(v, names(v), sum)

sapply

sapply will function identically to lapply unless the output can be simplified, in which case sapply executes that simplification. The following occurs when we run sapply in place of lapply on our squares vector.

Examples

Use the sapply function to run this function on each election year (in other words, the even numbered years) from 1980 to 2018 to obtain the total amount of the donations from Indiana during federal election campaigns.

myindydonations <- function(myyear) {
    myDF <- fread(paste0("/anvil/projects/tdm/data/election/itcont", myyear, ".txt"), quote="", select = c(10,15))
    names(myDF) <- c("state", "donation")
    myvalue <- tapply(myDF$donation, myDF$state, sum)['IN']
    names(myvalue) <- myyear
    return(myvalue)
}
Click to see solution
myindydonations <- function(myyear) {
    myDF <- fread(paste0("/anvil/projects/tdm/data/election/itcont", myyear, ".txt"), quote="", select = c(10,15))
    names(myDF) <- c("state", "donation")
    myvalue <- tapply(myDF$donation, myDF$state, sum)['IN']
    names(myvalue) <- myyear
    return(myvalue)
}

library(data.table)
myresults <- sapply( seq(1980,2018,by=2), myindydonations )

Use the sapply function to run this function on each election year (in other words, the even numbered years) from 1980 to 2018 to get the total amount of all the donations from all of the states during an election year.

mydonations <- function(myyear) {
    myDF <- fread(paste0("/anvil/projects/tdm/data/election/itcont", myyear, ".txt"), quote="", select = c(10,15))
    names(myDF) <- c("state", "donation")
    myvalue <- tapply(myDF$donation, myDF$state, sum)
    return(myvalue)
}
Click to see solution
mydonations <- function(myyear) {
    myDF <- fread(paste0("/anvil/projects/tdm/data/election/itcont", myyear, ".txt"), quote="", select = c(10,15))
    names(myDF) <- c("state", "donation")
    myvalue <- tapply(myDF$donation, myDF$state, sum)
    return(myvalue)
}

library(data.table)
myresults <- sapply( seq(1980,2018,by=2), mydonations )

Use the sapply function to plot myindycities for each of the even-numbered election years 1984 to 1994.

myindycities <- function(myyear) {
    myDF <- fread(paste0("/anvil/projects/tdm/data/election/itcont", myyear, ".txt"), quote="", select = c(9, 10,15))
    names(myDF) <- c("city", "state", "donation")
    myDF <- myDF[myDF$state == "IN", ]
    city_donations <- tapply(myDF$donation, myDF$city, sum)
    sorted_cities <- sort(city_donations, decreasing = TRUE)
    return(head(sorted_cities, 10))
}
myresults <- lapply(seq(1984, 1994, by=2), myindycities)
names(myresults) <- seq(1984, 1994, by=2)
Click to see solution
myindycities <- function(myyear) {
    myDF <- fread(paste0("/anvil/projects/tdm/data/election/itcont", myyear, ".txt"), quote="", select = c(9, 10,15))
    names(myDF) <- c("city", "state", "donation")
    myDF <- myDF[myDF$state == "IN", ]
    city_donations <- tapply(myDF$donation, myDF$city, sum)
    sorted_cities <- sort(city_donations, decreasing = TRUE)
    return(head(sorted_cities, 10))
}

myresults <- lapply(seq(1984, 1994, by=2), myindycities)
names(myresults) <- seq(1984, 1994, by=2)
par(mfrow = c(3, 2))
sapply(names(myresults), function(year) {
  dotchart(myresults[[year]], main = paste("Donations by City -", year), xlab = "Total Donations", ylab = "City")
})

lapply

lapply applies a function to each element of a list, then returns a list that’s been altered by the function. Since there is only one dimension in a list, the MARGIN parameter does not apply.

Examples

Use the list apply function (lapply) to run the function myindycities on each of the even-numbered election years 1984 to 1994.

myindycities <- function(myyear) {
    myDF <- fread(paste0("/anvil/projects/tdm/data/election/itcont", myyear, ".txt"), quote="", select = c(9, 10,15))
    names(myDF) <- c("city", "state", "donation")
    myDF <- myDF[myDF$state == "IN", ]
    city_donations <- tapply(myDF$donation, myDF$city, sum)
    sorted_cities <- sort(city_donations, decreasing = TRUE)
    return(head(sorted_cities, 10))
}
Click to see solution
myindycities <- function(myyear) {
    myDF <- fread(paste0("/anvil/projects/tdm/data/election/itcont", myyear, ".txt"), quote="", select = c(9, 10,15))
    names(myDF) <- c("city", "state", "donation")
    myDF <- myDF[myDF$state == "IN", ]
    city_donations <- tapply(myDF$donation, myDF$city, sum)
    sorted_cities <- sort(city_donations, decreasing = TRUE)
    return(head(sorted_cities, 10))
}

myresults <- lapply( seq(1984,1994,by=2), myindycities )
names(myresults) <- seq(1984,1994,by=2)
myresults