The function for computing the mean is bound the name mean
When running things through loops, you may often want to apply a function without binding it to a name
vapply(mtcars, function(x) length(unique(x)), FUN.VALUE = double(1))
## mpg cyl disp hp drat wt qsec vs am gear carb ## 25 3 27 22 22 29 30 2 2 3 6
If you have a bunch of functions, you might consider storing them all in a list.
You can then access the functions in the same way you would subset any list
funs <- list( quarter = function(x) x / 4, half = function(x) x / 2, double = function(x) x * 2, quadruple = function(x) x * 4)
If you have a bunch of functions, you might consider storing them all in a list.
You can then access the functions in the same way you would subset any list
funs <- list( quarter = function(x) x / 4, half = function(x) x / 2, double = function(x) x * 2, quadruple = function(x) x * 4)
This is kind of weird...
map_df
map_df(smry, ~.x(mtcars$mpg))
## # A tibble: 1 x 5## n n_miss n_valid mean sd## <int> <int> <int> <dbl> <dbl>## 1 32 0 32 20.09062 6.026948
map_df(smry, ~.x(mtcars$cyl))
## # A tibble: 1 x 5## n n_miss n_valid mean sd## <int> <int> <int> <dbl> <dbl>## 1 32 0 32 6.1875 1.785922
05:00
map_df(mtcars, function(col) map_df(smry, ~.x(col)), .id = "column")
## # A tibble: 11 x 6## column n n_miss n_valid mean sd## <chr> <int> <int> <int> <dbl> <dbl>## 1 mpg 32 0 32 20.09062 6.026948 ## 2 cyl 32 0 32 6.1875 1.785922 ## 3 disp 32 0 32 230.7219 123.9387 ## 4 hp 32 0 32 146.6875 68.56287 ## 5 drat 32 0 32 3.596562 0.5346787## 6 wt 32 0 32 3.21725 0.9784574## 7 qsec 32 0 32 17.84875 1.786943 ## 8 vs 32 0 32 0.4375 0.5040161## 9 am 32 0 32 0.40625 0.4989909## 10 gear 32 0 32 3.6875 0.7378041## 11 carb 32 0 32 2.8125 1.615200
map_df(mtcars, summarize_col, .id = "column")
## # A tibble: 11 x 6## column n n_miss n_valid mean sd## <chr> <int> <int> <int> <dbl> <dbl>## 1 mpg 32 0 32 20.09062 6.026948 ## 2 cyl 32 0 32 6.1875 1.785922 ## 3 disp 32 0 32 230.7219 123.9387 ## 4 hp 32 0 32 146.6875 68.56287 ## 5 drat 32 0 32 3.596562 0.5346787## 6 wt 32 0 32 3.21725 0.9784574## 7 qsec 32 0 32 17.84875 1.786943 ## 8 vs 32 0 32 0.4375 0.5040161## 9 am 32 0 32 0.40625 0.4989909## 10 gear 32 0 32 3.6875 0.7378041## 11 carb 32 0 32 2.8125 1.615200
summarize_df <- function(df) { map_df(df, summarize_col, .id = "column")}
summarize_df(airquality)
## # A tibble: 6 x 6## column n n_miss n_valid mean sd## <chr> <int> <int> <int> <dbl> <dbl>## 1 Ozone 153 37 116 NA NA ## 2 Solar.R 153 7 146 NA NA ## 3 Wind 153 0 153 9.957516 3.523001## 4 Temp 153 0 153 77.88235 9.465270## 5 Month 153 0 153 6.993464 1.416522## 6 Day 153 0 153 15.80392 8.864520
summarize_df <- function(df) { map_df(df, summarize_col, .id = "column")}
summarize_df(airquality)
## # A tibble: 6 x 6## column n n_miss n_valid mean sd## <chr> <int> <int> <int> <dbl> <dbl>## 1 Ozone 153 37 116 NA NA ## 2 Solar.R 153 7 146 NA NA ## 3 Wind 153 0 153 9.957516 3.523001## 4 Temp 153 0 153 77.88235 9.465270## 5 Month 153 0 153 6.993464 1.416522## 6 Day 153 0 153 15.80392 8.864520
Notice the missing data. Why? What should we do?
The arguments supplied to the function
What's one way to identify the formals for a function - say, lm
?
?
: Help documentation!
Alternative - use a function!
formals(lm)
## $formula## ## ## $data## ## ## $subset## ## ## $weights## ## ## $na.action## ## ## $method## [1] "qr"## ## $model## [1] TRUE## ## $x## [1] FALSE## ## $y## [1] FALSE## ## $qr## [1] TRUE## ## $singular.ok## [1] TRUE## ## $contrasts## NULL## ## $offset## ## ## $...
body
body(lm)
## {## ret.x <- x## ret.y <- y## cl <- match.call()## mf <- match.call(expand.dots = FALSE)## m <- match(c("formula", "data", "subset", "weights", "na.action", ## "offset"), names(mf), 0L)## mf <- mf[c(1L, m)]## mf$drop.unused.levels <- TRUE## mf[[1L]] <- quote(stats::model.frame)## mf <- eval(mf, parent.frame())## if (method == "model.frame") ## return(mf)## else if (method != "qr") ## warning(gettextf("method = '%s' is not supported. Using 'qr'", ## method), domain = NA)## mt <- attr(mf, "terms")## y <- model.response(mf, "numeric")## w <- as.vector(model.weights(mf))## if (!is.null(w) && !is.numeric(w)) ## stop("'weights' must be a numeric vector")## offset <- model.offset(mf)## mlm <- is.matrix(y)## ny <- if (mlm) ## nrow(y)## else length(y)## if (!is.null(offset)) {## if (!mlm) ## offset <- as.vector(offset)## if (NROW(offset) != ny) ## stop(gettextf("number of offsets is %d, should equal %d (number of observations)", ## NROW(offset), ny), domain = NA)## }## if (is.empty.model(mt)) {## x <- NULL## z <- list(coefficients = if (mlm) matrix(NA_real_, 0, ## ncol(y)) else numeric(), residuals = y, fitted.values = 0 * ## y, weights = w, rank = 0L, df.residual = if (!is.null(w)) sum(w != ## 0) else ny)## if (!is.null(offset)) {## z$fitted.values <- offset## z$residuals <- y - offset## }## }## else {## x <- model.matrix(mt, mf, contrasts)## z <- if (is.null(w)) ## lm.fit(x, y, offset = offset, singular.ok = singular.ok, ## ...)## else lm.wfit(x, y, w, offset = offset, singular.ok = singular.ok, ## ...)## }## class(z) <- c(if (mlm) "mlm", "lm")## z$na.action <- attr(mf, "na.action")## z$offset <- offset## z$contrasts <- attr(x, "contrasts")## z$xlevels <- .getXlevels(mt, mf)## z$call <- cl## z$terms <- mt## if (model) ## z$model <- mf## if (ret.x) ## z$x <- x## if (ret.y) ## z$y <- y## if (!qr) ## z$qr <- NULL## z## }
extract_grades <- function(dif_mod, items) { item_names <- names(items) delta <- -2.35*(log(dif_mod$alphaMH)) grades <- symnum(abs(delta), c(0, 1, 1.5, Inf), symbols = c("A", "B", "C")) tibble(item = item_names, delta, grades) %>% mutate(grades = as.character(grades))}
read_sub_files <- function(file) { read_csv(file) %>% mutate( content_area = str_extract( file, "[Ee][Ll][Aa]|[Rr]dg|[Ww]ri|[Mm]ath|[Ss]ci" ), grade = gsub(".+g(\\d\\d*).+", "\\1", file), grade = as.numeric(grade) ) %>% select(content_area, grade, everything()) %>% clean_names()}ifiles <- map_df(ifiles, read_sub_files)
mods <- mtcars %>% group_by(cyl) %>% nest() %>% mutate( model = map( data, ~lm(mpg ~ disp + hp + drat, data = .x) ) )mods
## # A tibble: 3 x 3## # Groups: cyl [3]## cyl data model ## <dbl> <list> <list>## 1 6 <tibble[,10] [7 × 10]> <lm> ## 2 4 <tibble[,10] [11 × 10]> <lm> ## 3 8 <tibble[,10] [14 × 10]> <lm>
pull_coef <- function(model, coef_name) {
coef(model)[coef_name]
}
mods %>% mutate(intercept = map_dbl(model, pull_coef, "(Intercept)"), disp = map_dbl(model, pull_coef, "disp"), hp = map_dbl(model, pull_coef, "hp"), drat = map_dbl(model, pull_coef, "drat"))
## # A tibble: 3 x 7## # Groups: cyl [3]## cyl data model intercept disp hp drat## <dbl> <list> <lis> <dbl> <dbl> <dbl> <dbl>## 1 6 <tibble[,10] [7 × 1… <lm> 6.284507 0.02635410 0.006229086 2.193577 ## 2 4 <tibble[,10] [11 × … <lm> 46.08662 -0.1225361 -0.04937771 -0.6041857## 3 8 <tibble[,10] [14 × … <lm> 19.00162 -0.01671461 -0.02140236 2.006011
pull_coef <- function(model, coef_name = "(Intercept)") { coef(model)[coef_name]}mods %>% mutate(intercept = map_dbl(model, pull_coef))
## # A tibble: 3 x 4## # Groups: cyl [3]## cyl data model intercept## <dbl> <list> <list> <dbl>## 1 6 <tibble[,10] [7 × 10]> <lm> 6.284507## 2 4 <tibble[,10] [11 × 10]> <lm> 46.08662 ## 3 8 <tibble[,10] [14 × 10]> <lm> 19.00162
pull_coef <- function(model) { coefs <- coef(model) data.frame(coefficient = names(coefs), estimate = coefs)}mods %>% mutate(coefs = map(model, pull_coef))
## # A tibble: 3 x 4## # Groups: cyl [3]## cyl data model coefs ## <dbl> <list> <list> <list> ## 1 6 <tibble[,10] [7 × 10]> <lm> <df[,2] [4 × 2]>## 2 4 <tibble[,10] [11 × 10]> <lm> <df[,2] [4 × 2]>## 3 8 <tibble[,10] [14 × 10]> <lm> <df[,2] [4 × 2]>
mods %>% mutate(coefs = map(model, pull_coef)) %>% unnest(coefs)
## # A tibble: 12 x 5## # Groups: cyl [3]## cyl data model coefficient estimate## <dbl> <list> <list> <chr> <dbl>## 1 6 <tibble[,10] [7 × 10]> <lm> (Intercept) 6.284507 ## 2 6 <tibble[,10] [7 × 10]> <lm> disp 0.02635410 ## 3 6 <tibble[,10] [7 × 10]> <lm> hp 0.006229086## 4 6 <tibble[,10] [7 × 10]> <lm> drat 2.193577 ## 5 4 <tibble[,10] [11 × 10]> <lm> (Intercept) 46.08662 ## 6 4 <tibble[,10] [11 × 10]> <lm> disp -0.1225361 ## 7 4 <tibble[,10] [11 × 10]> <lm> hp -0.04937771 ## 8 4 <tibble[,10] [11 × 10]> <lm> drat -0.6041857 ## 9 8 <tibble[,10] [14 × 10]> <lm> (Intercept) 19.00162 ## 10 8 <tibble[,10] [14 × 10]> <lm> disp -0.01671461 ## 11 8 <tibble[,10] [14 × 10]> <lm> hp -0.02140236 ## 12 8 <tibble[,10] [14 × 10]> <lm> drat 2.006011
mods %>% mutate(coefs = map(model, pull_coef)) %>% select(cyl, coefs) %>% unnest(coefs)
## # A tibble: 12 x 3## # Groups: cyl [3]## cyl coefficient estimate## <dbl> <chr> <dbl>## 1 6 (Intercept) 6.284507 ## 2 6 disp 0.02635410 ## 3 6 hp 0.006229086## 4 6 drat 2.193577 ## 5 4 (Intercept) 46.08662 ## 6 4 disp -0.1225361 ## 7 4 hp -0.04937771 ## 8 4 drat -0.6041857 ## 9 8 (Intercept) 19.00162 ## 10 8 disp -0.01671461 ## 11 8 hp -0.02140236 ## 12 8 drat 2.006011
mods %>% mutate(coefs = map(model, pull_coef)) %>% select(cyl, coefs) %>% unnest(coefs) %>% pivot_wider(names_from = "coefficient", values_from = "estimate") %>% arrange(cyl)
## # A tibble: 3 x 5## # Groups: cyl [3]## cyl `(Intercept)` disp hp drat## <dbl> <dbl> <dbl> <dbl> <dbl>## 1 4 46.08662 -0.1225361 -0.04937771 -0.6041857## 2 6 6.284507 0.02635410 0.006229086 2.193577 ## 3 8 19.00162 -0.01671461 -0.02140236 2.006011
set.seed(42)df <- tibble::tibble( a = rnorm(10, 100, 150), b = rnorm(10, 100, 150), c = rnorm(10, 100, 150), d = rnorm(10, 100, 150))df
## # A tibble: 10 x 4## a b c d## <dbl> <dbl> <dbl> <dbl>## 1 305.6438 295.7304 54.00421 168.3175 ## 2 15.29527 442.9968 -167.1963 205.7256 ## 3 154.4693 -108.3291 74.21240 255.2655 ## 4 194.9294 58.18168 282.2012 8.661044## 5 160.6402 80.00180 384.2790 175.7433 ## 6 84.08132 195.3926 35.42963 -157.5513 ## 7 326.7283 57.36206 61.40959 -17.66885 ## 8 85.80114 -298.4683 -164.4745 -27.63614 ## 9 402.7636 -266.0700 169.0146 -262.1311 ## 10 90.59289 298.0170 4.000769 105.4184
df %>% mutate(a = (a - min(a, na.rm = TRUE)) / (max(a, na.rm = TRUE) - min(a, na.rm = TRUE)))
## # A tibble: 10 x 4## a b c d## <dbl> <dbl> <dbl> <dbl>## 1 0.7493478 295.7304 54.00421 168.3175 ## 2 0 442.9968 -167.1963 205.7256 ## 3 0.3591881 -108.3291 74.21240 255.2655 ## 4 0.4636099 58.18168 282.2012 8.661044## 5 0.3751145 80.00180 384.2790 175.7433 ## 6 0.1775269 195.3926 35.42963 -157.5513 ## 7 0.8037639 57.36206 61.40959 -17.66885 ## 8 0.1819655 -298.4683 -164.4745 -27.63614 ## 9 1 -266.0700 169.0146 -262.1311 ## 10 0.1943323 298.0170 4.000769 105.4184
df %>% mutate(a = (a - min(a, na.rm = TRUE)) / (max(a, na.rm = TRUE) - min(a, na.rm = TRUE)), b = (b - min(b, na.rm = TRUE)) / (max(b, na.rm = TRUE) - min(b, na.rm = TRUE)), c = (c - min(c, na.rm = TRUE)) / (max(c, na.rm = TRUE) - min(c, na.rm = TRUE)), d = (d - min(d, na.rm = TRUE)) / (max(d, na.rm = TRUE) - min(d, na.rm = TRUE)))
## # A tibble: 10 x 4## a b c d## <dbl> <dbl> <dbl> <dbl>## 1 0.7493478 0.8013846 0.4011068 0.8319510## 2 0 1 0 0.9042516## 3 0.3591881 0.2564372 0.4377506 1 ## 4 0.4636099 0.4810071 0.8149005 0.5233744## 5 0.3751145 0.5104355 1 0.8463031## 6 0.1775269 0.6660608 0.3674252 0.2021270## 7 0.8037639 0.4799017 0.4145351 0.4724852## 8 0.1819655 0 0.004935493 0.4532209## 9 1 0.04369494 0.6096572 0 ## 10 0.1943323 0.8044685 0.3104346 0.7103825
map_df(df, ~(.x - min(.x, na.rm = TRUE)) / (max(.x, na.rm = TRUE) - min(.x, na.rm = TRUE)))
## # A tibble: 10 x 4## a b c d## <dbl> <dbl> <dbl> <dbl>## 1 0.7493478 0.8013846 0.4011068 0.8319510## 2 0 1 0 0.9042516## 3 0.3591881 0.2564372 0.4377506 1 ## 4 0.4636099 0.4810071 0.8149005 0.5233744## 5 0.3751145 0.5104355 1 0.8463031## 6 0.1775269 0.6660608 0.3674252 0.2021270## 7 0.8037639 0.4799017 0.4145351 0.4724852## 8 0.1819655 0 0.004935493 0.4532209## 9 1 0.04369494 0.6096572 0 ## 10 0.1943323 0.8044685 0.3104346 0.7103825
modify
here toomap_df(df, rescale01b)
## # A tibble: 10 x 4## a b c d## <dbl> <dbl> <dbl> <dbl>## 1 0.7493478 0.8013846 0.4011068 0.8319510## 2 0 1 0 0.9042516## 3 0.3591881 0.2564372 0.4377506 1 ## 4 0.4636099 0.4810071 0.8149005 0.5233744## 5 0.3751145 0.5104355 1 0.8463031## 6 0.1775269 0.6660608 0.3674252 0.2021270## 7 0.8037639 0.4799017 0.4145351 0.4724852## 8 0.1819655 0 0.004935493 0.4532209## 9 1 0.04369494 0.6096572 0 ## 10 0.1943323 0.8044685 0.3104346 0.7103825
means_df <- function(df) { means <- map(df, mean2) # calculate means nulls <- map_lgl(means, is.null) # find null values means_l <- means[!nulls] # subset list to remove nulls as.data.frame(means_l) # return a df}
head(iris)
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species## 1 5.1 3.5 1.4 0.2 setosa## 2 4.9 3.0 1.4 0.2 setosa## 3 4.7 3.2 1.3 0.2 setosa## 4 4.6 3.1 1.5 0.2 setosa## 5 5.0 3.6 1.4 0.2 setosa## 6 5.4 3.9 1.7 0.4 setosa
means_df(iris)
## Sepal.Length Sepal.Width Petal.Length Petal.Width## 1 5.843333 3.057333 3.758 1.199333
head(airquality)
## Ozone Solar.R Wind Temp Month Day## 1 41 190 7.4 67 5 1## 2 36 118 8.0 72 5 2## 3 12 149 12.6 74 5 3## 4 18 313 11.5 62 5 4## 5 NA NA 14.3 56 5 5## 6 28 NA 14.9 66 5 6
means_df(airquality)
## Ozone Solar.R Wind Temp Month Day## 1 NA NA 9.957516 77.88235 6.993464 15.80392
Keyboard shortcuts
↑, ←, Pg Up, k | Go to previous slide |
↓, →, Pg Dn, Space, j | Go to next slide |
Home | Go to first slide |
End | Go to last slide |
Number + Return | Go to specific slide |
b / m / f | Toggle blackout / mirrored / fullscreen mode |
c | Clone slideshow |
p | Toggle presenter mode |
t | Restart the presentation timer |
?, h | Toggle this help |
Esc | Back to slideshow |