dplyr# use suppressPackageStartupMessages() to clean up output when loading packages
suppressPackageStartupMessages(library(tidyverse))
suppressPackageStartupMessages(library(gapminder))
suppressPackageStartupMessages(library(tsibble))
suppressPackageStartupMessages(library(DT))
This worksheet contains exercises aimed for practice with dplyr.
# whatever you group by in group_by, is what summarise will summarise.
# when you make columns with summarize, they can be immediately used in the same summarise function
gapminder %>%
group_by(continent, year) %>%
summarize(min_life = min(lifeExp),
country = country[lifeExp == min_life]) %>%
arrange(min_life)
## # A tibble: 60 x 4
## # Groups: continent [5]
## continent year min_life country
## <fct> <int> <dbl> <fct>
## 1 Africa 1992 23.6 Rwanda
## 2 Asia 1952 28.8 Afghanistan
## 3 Africa 1952 30 Gambia
## 4 Asia 1957 30.3 Afghanistan
## 5 Asia 1977 31.2 Cambodia
## 6 Africa 1957 31.6 Sierra Leone
## 7 Asia 1962 32.0 Afghanistan
## 8 Africa 1962 32.8 Sierra Leone
## 9 Asia 1967 34.0 Afghanistan
## 10 Africa 1967 34.1 Sierra Leone
## # … with 50 more rows
FILL_THIS_IN. Here’s another convenience function for you: dplyr::first().mutate(rel_growth = FILL_THIS_IN) %>%
arrange(FILL_THIS_IN) %>%
gapminder %>%
DT::datatable()
group_by(country) %>%
# use first to get the population for the first year on record (current order, can use order_by to specify a different order than the current one)
gapminder %>%
group_by(country) %>%
arrange(year) %>%
mutate(rel_growth = pop - first(pop)) %>%
DT::datatable()
NA’s. Instead of using lag(), use the convenience function provided by the tsibble package, tsibble::difference():drop_na() %>%
ungroup() %>%
arrange(year) %>%
filter(inc_life_exp == min(inc_life_exp)) %>%
gapminder %>%
mutate(inc_life_exp = FILL_THIS_IN) %>%
arrange(inc_life_exp) %>%
group_by(country) %>%
group_by(continent) %>%
knitr::kable()
# difference() takes vector minus its lag, equivalent to lifeExp - lag(lifeExp)
# arrange works INDEPENDENTLY OF GROUPS
# have to use .group_by = TRUE to take group into account
gapminder %>%
group_by(country) %>%
arrange(year) %>%
mutate(inc_life_exp = difference(lifeExp)) %>%
drop_na() %>%
ungroup() %>%
group_by(continent) %>%
filter(inc_life_exp == min(inc_life_exp)) %>%
arrange(inc_life_exp) %>%
knitr::kable()
| country | continent | year | lifeExp | pop | gdpPercap | inc_life_exp |
|---|---|---|---|---|---|---|
| Rwanda | Africa | 1992 | 23.599 | 7290203 | 737.0686 | -20.421 |
| Cambodia | Asia | 1977 | 31.220 | 6978607 | 524.9722 | -9.097 |
| El Salvador | Americas | 1977 | 56.696 | 4282586 | 5138.9224 | -1.511 |
| Montenegro | Europe | 2002 | 73.981 | 720230 | 6557.1943 | -1.464 |
| Australia | Oceania | 1967 | 71.100 | 11872264 | 14526.1246 | 0.170 |