# Install packages
if (!require("pacman")) install.packages("pacman")
## Loading required package: pacman
pacman::p_load(tidyverse, # tidyverse pkgs including purrr
tictoc) # performance test
reduce()
to automate joining multiple dataframesdplyr::left_join() = merge(x, y, all.x = TRUE)
.df1 <- tibble(x = sample(1:10, size = 3, replace = TRUE),
y = sample(1:10, size = 3, replace = TRUE),
z = sample(1:10, size = 3, replace = TRUE))
df2 <- tibble(x = sample(1:10, size = 3, replace = TRUE),
y = sample(1:10, size = 3, replace = TRUE),
z = sample(1:10, size = 3, replace = TRUE))
df3 <- tibble(x = sample(1:10, size = 3, replace = TRUE),
y = sample(1:10, size = 3, replace = TRUE),
z = sample(1:10, size = 3, replace = TRUE))
first_join <- left_join(df1, df2)
## Joining, by = c("x", "y", "z")
second_join <- left_join(first_join, df3)
## Joining, by = c("x", "y", "z")
second_join
x <int> | y <int> | z <int> | ||
---|---|---|---|---|
10 | 3 | 4 | ||
1 | 7 | 9 | ||
6 | 4 | 1 |
Challenge Why the above solution is not efficient?
How reduce() works.
- Input: Takes a vector of length n
- Computation: Calls a function with a pair of values at a time
- Output: Returns a vector of length 1
reduced <- reduce(list(df1, df2, df3), left_join)
## Joining, by = c("x", "y", "z")
## Joining, by = c("x", "y", "z")
reduced
x <int> | y <int> | z <int> | ||
---|---|---|---|---|
10 | 3 | 4 | ||
1 | 7 | 9 | ||
6 | 4 | 1 |