ECON 413
Factors, lists, functionals

Erol Taymaz
Department of Economics
Middle East Technical University

Factors

aa <- c("small", "large", "medium")
aa
## [1] "small"  "large"  "medium"
bb <- sample(3, 10, replace = TRUE)
bb
##  [1] 3 1 2 2 1 1 3 1 1 1
aa <- aa[bb]
aa
##  [1] "medium" "small"  "large"  "large"  "small"  "small"  "medium" "small" 
##  [9] "small"  "small"
class(aa)
## [1] "character"
aaf <- factor(aa)
aaf
##  [1] medium small  large  large  small  small  medium small  small  small 
## Levels: large medium small
as.numeric(aaf)
##  [1] 2 3 1 1 3 3 2 3 3 3
as.numeric(aa)
##  [1] NA NA NA NA NA NA NA NA NA NA
aaof <- ordered(aaf, levels=c("small", "medium", "large"))
aaof
##  [1] medium small  large  large  small  small  medium small  small  small 
## Levels: small < medium < large
as.numeric(aaf)
##  [1] 2 3 1 1 3 3 2 3 3 3
as.numeric(aaof)
##  [1] 2 1 3 3 1 1 2 1 1 1
# Converting characters to numeric values
aa <- factor(c("3", "11", "2", "23", "313", "2"))
aa
## [1] 3   11  2   23  313 2  
## Levels: 11 2 23 3 313
aan <- as.numeric(aa)
aan
## [1] 4 1 2 3 5 2
aan <- as.numeric(as.character(aa))
aan
## [1]   3  11   2  23 313   2
rm(list=ls())

Lists

library(ggplot2)

a <- data.frame(x = rnorm(100))
b <- letters[1:5]
d <- c(TRUE, FALSE, TRUE, FALSE, FALSE) 
g <- ggplot(a, aes(x=x)) + geom_density()
x <- list(a = a, b = b, d = d)

class(x)
## [1] "list"
str(x)
## List of 3
##  $ a:'data.frame':   100 obs. of  1 variable:
##   ..$ x: num [1:100] -0.59 -0.904 1.303 -0.568 -1.883 ...
##  $ b: chr [1:5] "a" "b" "c" "d" ...
##  $ d: logi [1:5] TRUE FALSE TRUE FALSE FALSE
class(x[1])
## [1] "list"
class(x[[1]])
## [1] "data.frame"
class(x$a)
## [1] "data.frame"
# x[[1]][1:5]
x[[1]][1:5,]
## [1] -0.5901170 -0.9038400  1.3033041 -0.5679117 -1.8828967
x$a[1:5,]
## [1] -0.5901170 -0.9038400  1.3033041 -0.5679117 -1.8828967
y <- list(a, g)
z <- list(y, x)

class(y)
## [1] "list"
length(y)
## [1] 2
class(y[1])
## [1] "list"
class(y[[1]])
## [1] "data.frame"
class(y[2])
## [1] "list"
class(y[[2]])
## [1] "gg"     "ggplot"
y[[2]]

# Empty vectors
vlogical <- vector(mode = "logical", length = 3)
vlist <- vector(mode = "list", length = 7)
length(vlist)
## [1] 7

Functionals

A higher-order function is a function that takes a function as an input or returns a function as output.

descriptive <- function(x, f) {
  f(x, na.rm=T)
}

set.seed(123)
aa <- rnorm(100)
descriptive(aa, mean)
## [1] 0.09040591
descriptive(aa, max)
## [1] 2.187333

lapply function

myFun <- function(x, f, ...) {
  out <- vector("list", length(x))
  for (i in seq_along(x)) {
    out[[i]] <- f(x[[i]], ...)
  }
  out
}

str(mtcars)
## 'data.frame':    32 obs. of  11 variables:
##  $ mpg : num  21 21 22.8 21.4 18.7 18.1 14.3 24.4 22.8 19.2 ...
##  $ cyl : num  6 6 4 6 8 6 8 4 4 6 ...
##  $ disp: num  160 160 108 258 360 ...
##  $ hp  : num  110 110 93 110 175 105 245 62 95 123 ...
##  $ drat: num  3.9 3.9 3.85 3.08 3.15 2.76 3.21 3.69 3.92 3.92 ...
##  $ wt  : num  2.62 2.88 2.32 3.21 3.44 ...
##  $ qsec: num  16.5 17 18.6 19.4 17 ...
##  $ vs  : num  0 0 1 1 0 1 0 1 1 1 ...
##  $ am  : num  1 1 1 0 0 0 0 0 0 0 ...
##  $ gear: num  4 4 4 3 3 3 3 4 4 4 ...
##  $ carb: num  4 4 1 1 2 1 4 2 2 4 ...
myFun(mtcars, mean)
## [[1]]
## [1] 20.09062
## 
## [[2]]
## [1] 6.1875
## 
## [[3]]
## [1] 230.7219
## 
## [[4]]
## [1] 146.6875
## 
## [[5]]
## [1] 3.596563
## 
## [[6]]
## [1] 3.21725
## 
## [[7]]
## [1] 17.84875
## 
## [[8]]
## [1] 0.4375
## 
## [[9]]
## [1] 0.40625
## 
## [[10]]
## [1] 3.6875
## 
## [[11]]
## [1] 2.8125
unlist(myFun(mtcars, mean))
##  [1]  20.090625   6.187500 230.721875 146.687500   3.596563   3.217250
##  [7]  17.848750   0.437500   0.406250   3.687500   2.812500
unlist(myFun(mtcars, class))
##  [1] "numeric" "numeric" "numeric" "numeric" "numeric" "numeric" "numeric"
##  [8] "numeric" "numeric" "numeric" "numeric"
unlist(lapply(mtcars, mean))
##        mpg        cyl       disp         hp       drat         wt       qsec 
##  20.090625   6.187500 230.721875 146.687500   3.596563   3.217250  17.848750 
##         vs         am       gear       carb 
##   0.437500   0.406250   3.687500   2.812500
unlist(lapply(mtcars, class))
##       mpg       cyl      disp        hp      drat        wt      qsec        vs 
## "numeric" "numeric" "numeric" "numeric" "numeric" "numeric" "numeric" "numeric" 
##        am      gear      carb 
## "numeric" "numeric" "numeric"
unlist(lapply(mtcars, function(x) sum((x -mean(x))^2)/length(x)))
##          mpg          cyl         disp           hp         drat           wt 
## 3.518897e+01 3.089844e+00 1.488077e+04 4.553965e+03 2.769476e-01 9.274609e-01 
##         qsec           vs           am         gear         carb 
## 3.093380e+00 2.460938e-01 2.412109e-01 5.273438e-01 2.527344e+00
unlist(lapply(c(1:11), function(x) sum(mtcars[,x])))
##  [1]  642.900  198.000 7383.100 4694.000  115.090  102.952  571.160   14.000
##  [9]   13.000  118.000   90.000

sapply function

sapply(mtcars, mean)
##        mpg        cyl       disp         hp       drat         wt       qsec 
##  20.090625   6.187500 230.721875 146.687500   3.596563   3.217250  17.848750 
##         vs         am       gear       carb 
##   0.437500   0.406250   3.687500   2.812500
sapply(mtcars, class)
##       mpg       cyl      disp        hp      drat        wt      qsec        vs 
## "numeric" "numeric" "numeric" "numeric" "numeric" "numeric" "numeric" "numeric" 
##        am      gear      carb 
## "numeric" "numeric" "numeric"

Map function

xs <- replicate(5, runif(10), simplify = FALSE)
ws <- replicate(5, c(1:10), simplify = FALSE)

# Unweighted means
unlist(lapply(xs, mean))
## [1] 0.4595448 0.4874797 0.5758510 0.4291318 0.6112719
# Weighted means
lapply(c(1:length(xs)), function(i) weighted.mean(xs[[i]], ws[[i]]))
## [[1]]
## [1] 0.3897107
## 
## [[2]]
## [1] 0.5652652
## 
## [[3]]
## [1] 0.6007108
## 
## [[4]]
## [1] 0.4449518
## 
## [[5]]
## [1] 0.6790272
# Use __Map__ function
Map(weighted.mean, xs, ws)
## [[1]]
## [1] 0.3897107
## 
## [[2]]
## [1] 0.5652652
## 
## [[3]]
## [1] 0.6007108
## 
## [[4]]
## [1] 0.4449518
## 
## [[5]]
## [1] 0.6790272

mclapply and mcMap

mclapply() and mcMap() are parallel versions of lapply() and Map()