# Install packages
if (!require("pacman")) install.packages("pacman")
## Loading required package: pacman
pacman::p_load(tidyverse, # tidyverse pkgs including purrr
tictoc, # performance test
furrr) # parallel processing
slowly()
and future_
to make automation process either slower or fasterwalk()
works same as map()
but doesn’t store its output.
If you’re web scraping, one problem with this approach is it’s too fast by human standards.
If you want to make the function run slowly …
slowly() takes a function and modifies it to wait a given amount of time between each call. -
purrr
package vignette
In a different situation, you want to make your function run faster. This is a common situation when you collect and analyze data at large-scale. You can solve this problem using parallel processing. For more on the parallel processing in R, read this review.
Parallel processing setup
Step1: Determine the number of max workers (availableCores()
)
Step2: Determine the parallel processing mode (plan()
)
# Setup
n_cores <- availableCores() - 1
n_cores # This number depends on your computer spec.
## system
## 7
plan(multiprocess, # multicore, if supported, otherwise multisession
workers = n_cores) # the maximum number of workers
## Warning: [ONE-TIME WARNING] Forked processing ('multicore') is disabled
## in future (>= 1.13.0) when running R from RStudio, because it is
## considered unstable. Because of this, plan("multicore") will fall
## back to plan("sequential"), and plan("multiprocess") will fall back to
## plan("multisession") - not plan("multicore") as in the past. For more details,
## how to control forked processing or not, and how to silence this warning in
## future R sessions, see ?future::supportsMulticore
# 4.931 sec elapsed
tic()
mean100 <- map(1:1000000, mean)
toc()
## 4.869 sec elapsed
# 2.536 sec elapsed
tic()
mean100 <- future_map(1:1000000, mean)
toc()
## 3.487 sec elapsed