library(tidyverse)
lotr <- read_csv("https://raw.githubusercontent.com/jennybc/lotr-tidy/master/data/lotr_tidy.csv")
guest <- read_csv("https://raw.githubusercontent.com/STAT545-UBC/Classroom/master/data/wedding/attend.csv")
email <- read_csv("https://raw.githubusercontent.com/STAT545-UBC/Classroom/master/data/wedding/emails.csv")
library(DT)
Exercise 1: Univariate Pivoting
Consider the Lord of the Rings data:
datatable(lotr)
- Would you say this data is in tidy format?
- Quite tidy: Each observation is a combination of film, race, and gender. And each observation is how many words spoken.
- Widen the data so that we see the words spoken by each race, by putting race as its own column.
# pivot_wider/pivot_longer equivalent gather/spread
(lotr_wide <- lotr %>%
pivot_wider(id_cols = c(-Race, -Words),
names_from = Race,
values_from = Words))
- Re-lengthen the wide LOTR data from Question 2 above.
lotr_wide %>%
pivot_longer(cols = c(-Film, -Gender),
names_to = "Race",
values_to = "Words")
lotr_wide %>%
pivot_longer(cols = -c(Film, Gender),
names_to = "Race",
values_to = "Words")
Exercise 2: Multivariate Pivoting
Congratulations, you’re getting married! In addition to the wedding, you’ve decided to hold two other events: a day-of brunch and a day-before round of golf. You’ve made a guestlist of attendance so far, along with food preference for the food events (wedding and brunch).
guest %>%
DT::datatable(rownames = FALSE)
- Put “meal” and “attendance” as their own columns, with the events living in a new column.
(guest_long <- guest %>%
pivot_longer(cols = c(-party, -name),
names_to = c(".value", "event"),
names_sep = "_"))
- Use
tidyr::separate() to split the name into two columns: “first” and “last”. Then, re-unite them with tidyr::unite().
guest_long %>%
separate(name, into = c("first_name", "last_name"), sep=" ")
guest_long %>%
separate(name, into = c("first_name", "last_name"), sep=" ") %>%
unite(col = "name", c("first_name", "last_name"), sep = " ")
- Which parties still have a “PENDING” status for all members and all events?
guest_long %>%
group_by(party) %>%
summarize(all_pending = all(attendance == "PENDING"))
# all returns true for that party if attendance == "PENDING" for each party member
# all returns false for that party if attendance != "PEnDING" for one member of a party
- Which parties still have a “PENDING” status for all members for the wedding?
guest %>%
group_by(party) %>%
summarize(pending_wedding = all(attendance_wedding == "PENDING"))
- Put the data back to the way it was.
guest_long %>%
pivot_wider(id_cols = c(party,name),
names_from = event,
names_sep = "_",
values_from = c(meal,attendance))
- You also have a list of emails for each party, in this worksheet under the variable
email. Change this so that each person gets their own row. Use tidyr::separate_rows()
Exercise 3: Making tibbles
- Create a tibble that has the following columns:
- A
label column with "Sample A" in its entries.
- 100 random observations drawn from the N(0,1) distribution in the column
x
y calculated as the x values + N(0,1) error.
n <- 100
tibble(label = "Sample A",
x = rnorm(n),
y = x + rnorm(n))
- Generate a Gaussian sample of size 100 for each combination of the following means (
mu) and standard deviations (sd).
- Fix the
experiment tibble below (originally defined in the documentation of the tidyr::expand() function) so that all three repeats are displayed for each person, and the measurements are kept. The code is given, but needs one adjustment. What is it?
(experiment <- tibble(
name = rep(c("Alex", "Robert", "Sam"), c(3, 2, 1)),
trt = rep(c("a", "b", "a"), c(3, 2, 1)),
rep = c(1, 2, 3, 1, 2, 1),
measurement_1 = runif(6),
measurement_2 = runif(6)
))
experiment %>% complete(name,trt,rep)
experiment %>% complete(nesting(name, trt), rep)
experiment %>% expand(name, trt, rep)
# expand gives all possible combinations of inputs, even if not present in the dataset
# complete (without nesting) gives all possible combos with NAs
# complete (with nesting), completes the reps based on the existing name, trt combo
LS0tCnRpdGxlOiAnY20wMDkgRXhlcmNpc2VzOiB0aWR5IGRhdGEnCm91dHB1dDogCiAgaHRtbF9ub3RlYm9vazoKICAgIHRoZW1lOiBwYXBlcgplZGl0b3Jfb3B0aW9uczogCiAgY2h1bmtfb3V0cHV0X3R5cGU6IGlubGluZQotLS0KCmBgYHtyLCB3YXJuaW5nID0gRkFMU0UsIG1lc3NhZ2UgPSBGQUxTRX0KbGlicmFyeSh0aWR5dmVyc2UpCmxpYnJhcnkoRFQpCmxvdHIgIDwtIHJlYWRfY3N2KCJodHRwczovL3Jhdy5naXRodWJ1c2VyY29udGVudC5jb20vamVubnliYy9sb3RyLXRpZHkvbWFzdGVyL2RhdGEvbG90cl90aWR5LmNzdiIpCmd1ZXN0IDwtIHJlYWRfY3N2KCJodHRwczovL3Jhdy5naXRodWJ1c2VyY29udGVudC5jb20vU1RBVDU0NS1VQkMvQ2xhc3Nyb29tL21hc3Rlci9kYXRhL3dlZGRpbmcvYXR0ZW5kLmNzdiIpCmVtYWlsIDwtIHJlYWRfY3N2KCJodHRwczovL3Jhdy5naXRodWJ1c2VyY29udGVudC5jb20vU1RBVDU0NS1VQkMvQ2xhc3Nyb29tL21hc3Rlci9kYXRhL3dlZGRpbmcvZW1haWxzLmNzdiIpCmBgYAoKPCEtLS1UaGUgZm9sbG93aW5nIGNodW5rIGFsbG93cyBlcnJvcnMgd2hlbiBrbml0dGluZy0tLT4KCmBgYHtyIGFsbG93IGVycm9ycywgZWNobyA9IEZBTFNFfQprbml0cjo6b3B0c19jaHVuayRzZXQoZXJyb3IgPSBUUlVFLCB3YXJuaW5nID0gRkFMU0UpCmBgYAoKIyMgRXhlcmNpc2UgMTogVW5pdmFyaWF0ZSBQaXZvdGluZwoKQ29uc2lkZXIgdGhlIExvcmQgb2YgdGhlIFJpbmdzIGRhdGE6CgpgYGB7cn0KZGF0YXRhYmxlKGxvdHIpCmBgYAoKMS4gV291bGQgeW91IHNheSB0aGlzIGRhdGEgaXMgaW4gdGlkeSBmb3JtYXQ/CgoqIFF1aXRlIHRpZHk6IEVhY2ggb2JzZXJ2YXRpb24gaXMgYSBjb21iaW5hdGlvbiBvZiBmaWxtLCByYWNlLCBhbmQgZ2VuZGVyLiBBbmQgZWFjaCBvYnNlcnZhdGlvbiBpcyBob3cgbWFueSB3b3JkcyBzcG9rZW4uCgoyLiBXaWRlbiB0aGUgZGF0YSBzbyB0aGF0IHdlIHNlZSB0aGUgd29yZHMgc3Bva2VuIGJ5IGVhY2ggcmFjZSwgYnkgcHV0dGluZyByYWNlIGFzIGl0cyBvd24gY29sdW1uLgoKYGBge3J9CiMgcGl2b3Rfd2lkZXIvcGl2b3RfbG9uZ2VyIGVxdWl2YWxlbnQgZ2F0aGVyL3NwcmVhZAoobG90cl93aWRlIDwtIGxvdHIgJT4lIAogIHBpdm90X3dpZGVyKGlkX2NvbHMgPSBjKC1SYWNlLCAtV29yZHMpLCAKICAgICAgICAgICAgICBuYW1lc19mcm9tID0gUmFjZSwgCiAgICAgICAgICAgICAgdmFsdWVzX2Zyb20gPSBXb3JkcykpCmBgYAoKMy4gUmUtbGVuZ3RoZW4gdGhlIHdpZGUgTE9UUiBkYXRhIGZyb20gUXVlc3Rpb24gMiBhYm92ZS4KCmBgYHtyfQojIG5lZ2F0aXZlIHNpZ24gY2FuIGdvIG91dHNpZGUgb3IgaW5zaWRlIG9mIHRoZSB2ZWN0b3IKbG90cl93aWRlICU+JSAKICBwaXZvdF9sb25nZXIoY29scyA9IGMoLUZpbG0sIC1HZW5kZXIpLCAKICAgICAgICAgICAgICAgbmFtZXNfdG8gID0gIlJhY2UiLCAKICAgICAgICAgICAgICAgdmFsdWVzX3RvID0gIldvcmRzIikKCmxvdHJfd2lkZSAlPiUKICBwaXZvdF9sb25nZXIoY29scyA9IC1jKEZpbG0sIEdlbmRlciksCiAgICAgICAgICAgICAgIG5hbWVzX3RvID0gIlJhY2UiLAogICAgICAgICAgICAgICB2YWx1ZXNfdG8gPSAiV29yZHMiKQpgYGAKCiMjIEV4ZXJjaXNlIDI6IE11bHRpdmFyaWF0ZSBQaXZvdGluZwoKQ29uZ3JhdHVsYXRpb25zLCB5b3UncmUgZ2V0dGluZyBtYXJyaWVkISBJbiBhZGRpdGlvbiB0byB0aGUgd2VkZGluZywgeW91J3ZlIGRlY2lkZWQgdG8gaG9sZCB0d28gb3RoZXIgZXZlbnRzOiBhIGRheS1vZiBicnVuY2ggYW5kIGEgZGF5LWJlZm9yZSByb3VuZCBvZiBnb2xmLiAgWW91J3ZlIG1hZGUgYSBndWVzdGxpc3Qgb2YgYXR0ZW5kYW5jZSBzbyBmYXIsIGFsb25nIHdpdGggZm9vZCBwcmVmZXJlbmNlIGZvciB0aGUgZm9vZCBldmVudHMgKHdlZGRpbmcgYW5kIGJydW5jaCkuCgpgYGB7cn0KZ3Vlc3QgJT4lIAogIERUOjpkYXRhdGFibGUocm93bmFtZXMgPSBGQUxTRSkKYGBgCgoxLiBQdXQgIm1lYWwiIGFuZCAiYXR0ZW5kYW5jZSIgYXMgdGhlaXIgb3duIGNvbHVtbnMsIHdpdGggdGhlIGV2ZW50cyBsaXZpbmcgaW4gYSBuZXcgY29sdW1uLgoKYGBge3J9CihndWVzdF9sb25nIDwtIGd1ZXN0ICU+JSAKICBwaXZvdF9sb25nZXIoY29scyAgICAgID0gYygtcGFydHksIC1uYW1lKSwgCiAgICAgICAgICAgICAgIG5hbWVzX3RvICA9IGMoIi52YWx1ZSIsICJldmVudCIpLAogICAgICAgICAgICAgICBuYW1lc19zZXAgPSAiXyIpKQpgYGAKCjIuIFVzZSBgdGlkeXI6OnNlcGFyYXRlKClgIHRvIHNwbGl0IHRoZSBuYW1lIGludG8gdHdvIGNvbHVtbnM6ICJmaXJzdCIgYW5kICJsYXN0Ii4gVGhlbiwgcmUtdW5pdGUgdGhlbSB3aXRoIGB0aWR5cjo6dW5pdGUoKWAuCgpgYGB7cn0KZ3Vlc3RfbG9uZyAlPiUgCiAgc2VwYXJhdGUobmFtZSwgaW50byA9IGMoImZpcnN0X25hbWUiLCAibGFzdF9uYW1lIiksIHNlcD0iICIpCgpndWVzdF9sb25nICU+JSAKICBzZXBhcmF0ZShuYW1lLCBpbnRvID0gYygiZmlyc3RfbmFtZSIsICJsYXN0X25hbWUiKSwgc2VwPSIgIikgJT4lCiAgdW5pdGUoY29sID0gIm5hbWUiLCBjKCJmaXJzdF9uYW1lIiwgImxhc3RfbmFtZSIpLCBzZXAgPSAiICIpCmBgYAoKMy4gV2hpY2ggcGFydGllcyBzdGlsbCBoYXZlIGEgIlBFTkRJTkciIHN0YXR1cyBmb3IgYWxsIG1lbWJlcnMgYW5kIGFsbCBldmVudHM/CgpgYGB7cn0KIyBhbGwgcmV0dXJucyB0cnVlIGZvciB0aGF0IHBhcnR5IGlmIGF0dGVuZGFuY2UgPT0gIlBFTkRJTkciIGZvciBlYWNoIHBhcnR5IG1lbWJlcgojIGFsbCByZXR1cm5zIGZhbHNlIGZvciB0aGF0IHBhcnR5IGlmIGF0dGVuZGFuY2UgIT0gIlBFTkRJTkciIGZvciBvbmUgbWVtYmVyIG9mIGEgcGFydHkgCmd1ZXN0X2xvbmcgJT4lIAogIGdyb3VwX2J5KHBhcnR5KSAlPiUgCiAgc3VtbWFyaXplKGFsbF9wZW5kaW5nID0gYWxsKGF0dGVuZGFuY2UgPT0gIlBFTkRJTkciKSkKYGBgCgo0LiBXaGljaCBwYXJ0aWVzIHN0aWxsIGhhdmUgYSAiUEVORElORyIgc3RhdHVzIGZvciBhbGwgbWVtYmVycyBmb3IgdGhlIHdlZGRpbmc/CgpgYGB7cn0KZ3Vlc3QgJT4lIAogIGdyb3VwX2J5KHBhcnR5KSAlPiUgCiAgc3VtbWFyaXplKHBlbmRpbmdfd2VkZGluZyA9IGFsbChhdHRlbmRhbmNlX3dlZGRpbmcgPT0gIlBFTkRJTkciKSkKYGBgCgoKNS4gUHV0IHRoZSBkYXRhIGJhY2sgdG8gdGhlIHdheSBpdCB3YXMuCgpgYGB7cn0KZ3Vlc3RfbG9uZyAlPiUgCiAgcGl2b3Rfd2lkZXIoaWRfY29scyAgICAgPSBjKHBhcnR5LG5hbWUpLCAKICAgICAgICAgICAgICBuYW1lc19mcm9tICA9IGV2ZW50LCAKICAgICAgICAgICAgICBuYW1lc19zZXAgICA9ICJfIiwgCiAgICAgICAgICAgICAgdmFsdWVzX2Zyb20gPSBjKG1lYWwsYXR0ZW5kYW5jZSkpCmBgYAoKNi4gWW91IGFsc28gaGF2ZSBhIGxpc3Qgb2YgZW1haWxzIGZvciBlYWNoIHBhcnR5LCBpbiB0aGlzIHdvcmtzaGVldCB1bmRlciB0aGUgdmFyaWFibGUgYGVtYWlsYC4gQ2hhbmdlIHRoaXMgc28gdGhhdCBlYWNoIHBlcnNvbiBnZXRzIHRoZWlyIG93biByb3cuIFVzZSBgdGlkeXI6OnNlcGFyYXRlX3Jvd3MoKWAKCmBgYHtyfQplbWFpbCAlPiUgCiAgc2VwYXJhdGVfcm93cyhndWVzdCwgc2VwID0gIiwgIikKYGBgCgojIyBFeGVyY2lzZSAzOiBNYWtpbmcgdGliYmxlcwoKMS4gQ3JlYXRlIGEgdGliYmxlIHRoYXQgaGFzIHRoZSBmb2xsb3dpbmcgY29sdW1uczoKCi0gQSBgbGFiZWxgIGNvbHVtbiB3aXRoIGAiU2FtcGxlIEEiYCBpbiBpdHMgZW50cmllcy4KLSAxMDAgcmFuZG9tIG9ic2VydmF0aW9ucyBkcmF3biBmcm9tIHRoZSBOKDAsMSkgZGlzdHJpYnV0aW9uIGluIHRoZSBjb2x1bW4gYHhgCi0gYHlgIGNhbGN1bGF0ZWQgYXMgdGhlIGB4YCB2YWx1ZXMgKyBOKDAsMSkgZXJyb3IuIAoKYGBge3J9Cm4gPC0gMTAwCnRpYmJsZShsYWJlbCA9ICJTYW1wbGUgQSIsCiAgICAgICAgICAgICB4ID0gcm5vcm0obiksCiAgICAgICAgICAgICB5ID0geCArIHJub3JtKG4pKQpgYGAKCgoyLiBHZW5lcmF0ZSBhIEdhdXNzaWFuIHNhbXBsZSBvZiBzaXplIDEwMCBmb3IgZWFjaCBjb21iaW5hdGlvbiBvZiB0aGUgZm9sbG93aW5nIG1lYW5zIChgbXVgKSBhbmQgc3RhbmRhcmQgZGV2aWF0aW9ucyAoYHNkYCkuCgpgYGB7cn0KIyBncm91cF9ieV9hbGw6IGdyb3VwIGJ5IGV2ZXJ5IHZhcmlhYmxlCiMgcm5vcm06IG5vcm1hbCBkaXN0cmlidXRpb24KIyB1bm5lc3Q6IHVucmF2ZWwgdGhlICdsaXN0JwpuIDwtIDEwMAptdSA8LSBjKC01LCAwLCA1KQpzZCA8LSBjKDEsIDMsIDEwKQp0aWJibGUobXUgPSBtdSwgc2QgPSBzZCkgJT4lIAogIGdyb3VwX2J5X2FsbCgpICU+JSAKICBtdXRhdGUoeiA9IGxpc3Qocm5vcm0obiwgbXUsIHNkKSkpICU+JSAKICB1bm5lc3QoKQpgYGAKCjMuIEZpeCB0aGUgYGV4cGVyaW1lbnRgIHRpYmJsZSBiZWxvdyAob3JpZ2luYWxseSBkZWZpbmVkIGluIHRoZSBkb2N1bWVudGF0aW9uIG9mIHRoZSBgdGlkeXI6OmV4cGFuZCgpYCBmdW5jdGlvbikgc28gdGhhdCBhbGwgdGhyZWUgcmVwZWF0cyBhcmUgZGlzcGxheWVkIGZvciBlYWNoIHBlcnNvbiwgYW5kIHRoZSBtZWFzdXJlbWVudHMgYXJlIGtlcHQuIFRoZSBjb2RlIGlzIGdpdmVuLCBidXQgbmVlZHMgb25lIGFkanVzdG1lbnQuIFdoYXQgaXMgaXQ/CgpgYGB7cn0KIyBleHBhbmQgZ2l2ZXMgYWxsIHBvc3NpYmxlIGNvbWJpbmF0aW9ucyBvZiBpbnB1dHMsIGV2ZW4gaWYgbm90IHByZXNlbnQgaW4gdGhlIGRhdGFzZXQsIGFuZCBvbmx5IGluY2x1ZGVzIHRoZSBjb2x1bW5zIGdpdmVuLCB3aXRoIG5vIE5BcwojIGNvbXBsZXRlICh3aXRob3V0IG5lc3RpbmcpIGdpdmVzIGFsbCBwb3NzaWJsZSBjb21ib3Mgd2l0aCBOQXMKIyBjb21wbGV0ZSAod2l0aCBuZXN0aW5nKSwgY29tcGxldGVzIHRoZSByZXBzIGJhc2VkIG9uIHRoZSBleGlzdGluZyBuYW1lLCB0cnQgY29tYm8KKGV4cGVyaW1lbnQgPC0gdGliYmxlKAogIG5hbWUgPSByZXAoYygiQWxleCIsICJSb2JlcnQiLCAiU2FtIiksIGMoMywgMiwgMSkpLAogIHRydCAgPSByZXAoYygiYSIsICJiIiwgImEiKSwgYygzLCAyLCAxKSksCiAgcmVwID0gYygxLCAyLCAzLCAxLCAyLCAxKSwKICBtZWFzdXJlbWVudF8xID0gcnVuaWYoNiksCiAgbWVhc3VyZW1lbnRfMiA9IHJ1bmlmKDYpCikpCmV4cGVyaW1lbnQgJT4lIGNvbXBsZXRlKG5hbWUsdHJ0LHJlcCkKZXhwZXJpbWVudCAlPiUgY29tcGxldGUobmVzdGluZyhuYW1lLCB0cnQpLCByZXApCmV4cGVyaW1lbnQgJT4lIGV4cGFuZChuYW1lLCB0cnQsIHJlcCkKYGBg