l = list(vec_numeric = 5:8,
mat = matrix(1:8, 2, 4),
vec_logical = c(TRUE, FALSE),
summary = summary(rnorm(1000)))
l
## $vec_numeric
## [1] 5 6 7 8
##
## $mat
## [,1] [,2] [,3] [,4]
## [1,] 1 3 5 7
## [2,] 2 4 6 8
##
## $vec_logical
## [1] TRUE FALSE
##
## $summary
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -3.00805 -0.69737 -0.03532 -0.01165 0.68843 3.81028
For
loop:
df = list(
a = rnorm(20, 3, 1),
b = rnorm(20, 0, 5),
c = rnorm(20, 10, .2),
d = rnorm(20, -3, 1)
)
mean_and_sd = function(x) {
if (!is.numeric(x)) {
stop("Argument x should be numeric")
} else if (length(x) == 1) {
stop("Cannot be computed for length 1 vectors")
}
mean_x = mean(x)
sd_x = sd(x)
tibble(
mean = mean_x,
sd = sd_x
)
}
mean_and_sd(df[[1]])
## # A tibble: 1 x 2
## mean sd
## <dbl> <dbl>
## 1 2.70 1.12
output = vector("list", length = 4)
for (i in 1:4) {
output[[i]] = mean_and_sd(df[[i]])
}
Map
function:
output = map(df, mean_and_sd)
output_median = map(df, median)
output_median_1 = map_dbl(df, median)
output_median_2 = map_dfr(df, median)
# more specific
output = map(df, ~mean_and_sd(.x))
library(rvest)
## Loading required package: xml2
##
## Attaching package: 'rvest'
## The following object is masked from 'package:purrr':
##
## pluck
## The following object is masked from 'package:readr':
##
## guess_encoding
## Loading required package: xml2
##
## Attaching package: 'rvest'
## The following object is masked from 'package:purrr':
##
## pluck
## The following object is masked from 'package:readr':
##
## guess_encoding
read_page_reviews = function(url) {
h = read_html(url)
title = h %>%
html_nodes("#cm_cr-review_list .review-title") %>%
html_text()
stars = h %>%
html_nodes("#cm_cr-review_list .review-rating") %>%
html_text() %>%
str_extract("\\d") %>%
as.numeric()
text = h %>%
html_nodes(".review-data:nth-child(5)") %>%
html_text()
data_frame(title, stars, text)
}
url_base = "https://www.amazon.com/product-reviews/B00005JNBQ/ref=cm_cr_arp_d_viewopt_rvwer?ie=UTF8&reviewerType=avp_only_reviews&sortBy=recent&pageNumber="
vec_urls = str_c(url_base, 1:5)
dynamite_reviews = map_df(vec_urls, read_page_reviews)
weather =
rnoaa::meteo_pull_monitors(
c("USW00094728", "USC00519397", "USS0023B17S"),
var = c("PRCP", "TMIN", "TMAX"),
date_min = "2016-01-01",
date_max = "2016-12-31") %>%
mutate(
name = recode(id, USW00094728 = "CentralPark_NY",
USC00519397 = "Waikiki_HA",
USS0023B17S = "Waterhole_WA"),
tmin = tmin / 10,
tmax = tmax / 10) %>%
select(name, id, everything())
weather_nest =
weather %>%
nest(data = date:tmin)
weather_nest
## # A tibble: 3 x 3
## name id data
## <chr> <chr> <list<df[,4]>>
## 1 CentralPark_NY USW00094728 [366 × 4]
## 2 Waikiki_HA USC00519397 [366 × 4]
## 3 Waterhole_WA USS0023B17S [366 × 4]
weather_nest %>%
pull(data) %>%
.[[1]]
## # A tibble: 366 x 4
## date prcp tmax tmin
## <date> <dbl> <dbl> <dbl>
## 1 2016-01-01 0 5.6 1.1
## 2 2016-01-02 0 4.4 0
## 3 2016-01-03 0 7.2 1.7
## # … with 363 more rows
weather_nest$data[[1]]
## # A tibble: 366 x 4
## date prcp tmax tmin
## <date> <dbl> <dbl> <dbl>
## 1 2016-01-01 0 5.6 1.1
## 2 2016-01-02 0 4.4 0
## 3 2016-01-03 0 7.2 1.7
## # … with 363 more rows
weather_nest %>%
unnest()
## Warning: `cols` is now required.
## Please use `cols = c(data)`
## # A tibble: 1,098 x 6
## name id date prcp tmax tmin
## <chr> <chr> <date> <dbl> <dbl> <dbl>
## 1 CentralPark_NY USW00094728 2016-01-01 0 5.6 1.1
## 2 CentralPark_NY USW00094728 2016-01-02 0 4.4 0
## 3 CentralPark_NY USW00094728 2016-01-03 0 7.2 1.7
## # … with 1,095 more rows
central_park_df =
weather_nest$data[[1]]
lm(tmax ~ tmin, data = central_park_df)
##
## Call:
## lm(formula = tmax ~ tmin, data = central_park_df)
##
## Coefficients:
## (Intercept) tmin
## 7.779 1.045
lm(tmax ~ tmin, data = weather_nest$data[[1]])
##
## Call:
## lm(formula = tmax ~ tmin, data = weather_nest$data[[1]])
##
## Coefficients:
## (Intercept) tmin
## 7.779 1.045
# into map statement
weather_lm = function(df){
lm(tmax ~ tmin, data = df)
}
output = map(weather_nest$data, weather_lm)
weather_nest %>%
mutate(
lin_model = map(data, weather_lm)
)
## # A tibble: 3 x 4
## name id data lin_model
## <chr> <chr> <list<df[,4]>> <list>
## 1 CentralPark_NY USW00094728 [366 × 4] <lm>
## 2 Waikiki_HA USC00519397 [366 × 4] <lm>
## 3 Waterhole_WA USS0023B17S [366 × 4] <lm>