---
title: "B_02_RandomForest_split_data.R"
format:
html:
self-contained: true
embed-resources: true
toc: true # optional: adds a table of contents
theme: cosmo # optional: Bootstrap theme
code-fold: show # optional: collapsible code blocks
code-tools: true # optional: adds copy/paste buttons
toc-depth: 4
editor:
markdown:
wrap: 72
---
# B - 02: RandomForest (split models)
```{r}
#| echo: false
#| message: false
#| warning: false
#| results: "hide"
#| output: false
rm (list = ls ())
gc ()
```
## Libraries
```{r}
#| message: false
#| warning: false
#| results: "hide"
#| output: false
suppressPackageStartupMessages ({
library (here)
source (here:: here ("Code/00_Configuration.R" ))
package_list <-
c (
package_list,
"ranger" ,
"tidymodels" ,
"caret" ,
"skimr" ,
"DALEXtra" ,
"DALEX" ,
"ggthemes" ,
"hstats"
)
lapply (package_list, require, character = TRUE )
tidymodels_prefer ()
})
rm (list = ls ())
set.seed (123 )
```
## Read data
```{r}
#| message: false
#| warning: false
#| error: false
dta <-
readRDS (
here:: here (
"Data/output/1_all_predictors_merged.rds"
)
) %>%
filter (samplingPeriodID == 1 )
```
## Set variable vectors for hypotheses
```{r}
sp_id <-
c (
"verbatimIdentification" ,
"scientificName"
)
H1 <-
c (
"Mass" ,
"GlobRangeSize_km2" ,
"Migration" ,
"Habitat_5" ,
"Generalism" ,
"Threatened" ,
"pd"
)
H2 <-
c (
"D_AOO_a" ,
"mean_lnLac" ,
"AOO" ,
"joincount_delta" ,
"circNorm" ,
"minDist_toBorder_centr"
)
H3 <-
c ("datasetID" )
predictors <-
c (H1, H2, H3)
responses <-
c (
"Jaccard_dissim" ,
"log_R2_1" ,
"log_R2_1_per_year" # yearly rate
)
```
## Make model data
Reduce to model variables
```{r}
dta_new <-
dta %>%
select (all_of (c (sp_id, responses, H3, H1, H2))) %>%
ungroup ()
```
check correct coding
```{r}
str (dta_new) # 14 predictors
```
split by dataset
```{r}
dta_atlas_split <-
dta_new %>%
group_split (datasetID)
```
Set model specifications & indicate tuning
```{r}
set.seed (123 )
mod_spec <-
rand_forest (
mtry = tune (),
trees = tune (),
min_n = tune ()
) %>%
set_engine ("ranger" ,
importance = "permutation" ,
respect.unordered.factors = TRUE
) %>%
set_mode ("regression" )
rf_params <-
parameters (
mtry (range = c (2L, 10L)),
min_n (range = c (5L, 15L)),
trees (range = c (1000L, 5000L))
)
```
Create empty lists for results
```{r}
#| eval: false
tuned_res <-
replicate (length (dta_atlas_split), list ())
predictions <-
replicate (length (dta_atlas_split), list ())
res <-
replicate (length (dta_atlas_split), list ())
list_split_data <-
replicate (length (dta_atlas_split), list ())
results_response <-
replicate (3 , list ())
results_atlas_i <-
replicate (3 , list ())
```
### START LOOP
-- Level 1: for each atlas (1-4)
-- Level 2: for each response (1-3)
```{r}
#| label: save cv folds
list_split_data <- list ()
for (atlas_i in seq_along (dta_atlas_split)) {
this_atlas <-
dta_atlas_split[[atlas_i]] %>%
select (- datasetID)
set.seed (123 )
split_info <-
initial_split (this_atlas, prop = 0.8 )
train <-
training (split_info)
test <-
testing (split_info)
# Cross validation folds on training data
set.seed (123 )
folds <-
vfold_cv (train, v = 10 , repeats = 3 )
list_split_data[[atlas_i]] <- list (
split_info = split_info,
train = train,
test = test,
folds = folds
)
}
```
```{r}
#| eval: false
for (atlas_i in seq_along (dta_atlas_split)) {
this_atlas <-
dta_atlas_split[[atlas_i]] %>%
select (- datasetID)
# split data into training and testing (80/20)
set.seed (123 )
split_info <-
initial_split (this_atlas, prop = 0.8 )
train <-
training (split_info)
test <-
testing (split_info)
# Cross validation folds on training data
set.seed (123 )
folds <-
vfold_cv (train, v = 10 , repeats = 3 )
# Save split data to list for later reference
list_split_data[[atlas_i]] <-
list (
split_info = split_info,
train = train,
test = test,
folds = folds
)
# Loop through responses
for (resp_i in seq_along (responses)) {
resp <-
responses[resp_i]
if (responses[resp_i] == "Jaccard_dissim" ) {
mod_rec <-
recipe (Jaccard_dissim ~ ., data = train) %>%
update_role (all_of (sp_id),
new_role = "speciesID" ,
old_role = "predictor"
) %>%
update_role (all_of (responses[- resp_i]),
old_role = "predictor" ,
new_role = "alt_resp"
)
} else if (responses[resp_i] == "log_R2_1" ) {
mod_rec <-
recipe (log_R2_1 ~ ., data = train) %>%
update_role (all_of (sp_id),
new_role = "speciesID" ,
old_role = "predictor"
) %>%
update_role (all_of (responses[- resp_i]),
old_role = "predictor" ,
new_role = "alt_resp"
)
} else if (responses[resp_i] == "log_R2_1_per_year" ) {
mod_rec <-
recipe (log_R2_1_per_year ~ ., data = train) %>%
update_role (all_of (sp_id),
new_role = "speciesID" ,
old_role = "predictor"
) %>%
update_role (all_of (responses[- resp_i]),
old_role = "predictor" ,
new_role = "alt_resp"
)
}
# Model workflow
set.seed (123 )
mod_wf <- workflow () %>%
add_recipe (mod_rec) %>%
add_model (mod_spec)
# Bayesian hyperparameter tuning
tictoc:: tic ()
set.seed (123 )
tuned_bayes <-
tune_bayes (
mod_wf,
resamples = folds,
param_info = rf_params,
initial = 5 ,
iter = 50 ,
control = control_bayes (
verbose = T,
no_improve = 10 ,
seed = 123
)
)
tictoc:: toc ()
# Best parameters
best <-
tuned_bayes %>%
select_best (metric = "rmse" )
# Final model fit
final_wf <-
mod_wf %>%
finalize_workflow (best)
# performs the model evaluation on the testing data directly
final_fit <-
final_wf %>%
last_fit (split_info)
# Predictions on test data
p <-
predict (
final_fit
%>% tune:: extract_workflow (),
new_data = test
) %>%
bind_cols (test %>%
select (
responses[resp_i],
verbatimIdentification,
scientificName
)) %>%
setNames (
c (
".pred" ,
responses[resp_i],
"verbatimIdentification" ,
"scientificName"
)
) %>%
mutate (
resid = .[[responses[resp_i]]] - .pred
)
# Explainable AI (xAI)
fit <-
final_fit %>%
extract_fit_parsnip ()
explainer <-
DALEXtra:: explain_tidymodels (
label = paste0 (responses[resp_i], "_" , atlas_i),
fit,
data = train %>%
select (! any_of (c (
responses,
"verbatimIdentification" ,
"scientificName"
))),
y = train %>%
pull (responses[resp_i])
)
# save results to list
results_response[[resp_i]] <-
list (
tuned_bayes = tuned_bayes,
best = best,
final_fit = final_fit,
predictions = p,
explainer = explainer,
pd = model_profile (explainer,
type = "partial" ,
N = NULL ,
variables = explainer$ data %>% names ()
),
md = model_diagnostics (explainer),
mp = model_performance (explainer),
vi = model_parts (explainer)
)
}
results_atlas_i[[atlas_i]] <-
results_response
}
saveRDS (results_atlas_i, here:: here ("Data/output/B_models/B_02_rf_res_atlas_split.rds" ))
```
```{r}
#| echo: false
#| eval: true
results_atlas_i <-
readRDS (here:: here ("Data/output/B_models/B_02_rf_res_atlas_split.rds" ))
```
## xAI plots
```{r}
#| eval: false
plot_list <- replicate (2 , list ())
for (atlas_i in seq_along (results_atlas_i)) {
plot_list[[atlas_i]] <- list ()
for (resp_i in seq_along (results_atlas_i[[atlas_i]])) {
this_response <- responses[resp_i]
if (atlas_i == 1 ) datasetID <- 5 else
if (atlas_i == 2 ) datasetID <- 6 else
if (atlas_i == 3 ) datasetID <- 13 else
if (atlas_i == 4 ) datasetID <- 26
subset <- results_atlas_i[[atlas_i]][[resp_i]]
# List with all plots for saving as pdf
plot_list[[atlas_i]][[resp_i]] <-
list (
pd =
plot (subset$ pd) +
ggthemes:: theme_few (),
mp =
plot (subset$ mp) +
ggthemes:: theme_few (),
md =
plot (subset$ md %>%
mutate (label = paste0 (datasetID, "_" , this_response))) +
ggthemes:: theme_few (),
vi =
plot (subset$ vi %>%
mutate (label = paste0 (datasetID, "_" , this_response))) +
ggthemes:: theme_few ()
)
}
}
pdf (
file = paste0 (here:: here ("Figures/B_models/xAI/B_02_xAI_separate_per_atlas.pdf" )),
onefile = TRUE
)
plot_list
dev.off ()
```
## Save results to csv
### jaccard detailed performances
```{r}
# Packages
library (tidymodels) # rsample, workflows, yardstick, tune, etc.
library (ranger) # for OOB stats
library (dplyr)
library (purrr)
library (tibble)
library (here)
# mse helper works across yardstick versions
mse_vec <- function (truth, estimate, na_rm = TRUE , ...) {
mean ((truth - estimate)^ 2 , na.rm = na_rm)
}
# helper to compute R² / RMSE / MSE on *any* data frame
compute_set_metrics <- function (wflow, data, outcome_chr) {
preds <- predict (wflow, data) %>% bind_cols (data)
truth <- preds[[outcome_chr]]
est <- preds$ .pred
tibble (
r2 = yardstick:: rsq_vec (truth, est),
rmse = yardstick:: rmse_vec (truth, est),
mse = mse_vec (truth, est)
)
}
# summarize function
extract_all_metrics <- function (split, obj, outcome_chr, external_data = NULL ) {
# (a) get fitted workflow & OOB stats -----------------------------
wf <- tune:: extract_workflow (obj$ final_fit)
rf_fit <- extract_fit_parsnip (wf)$ fit # ranger model
oob_mse <- rf_fit$ prediction.error
oob_r2 <- rf_fit$ r.squared
oob_rmse <- sqrt (oob_mse)
# (b) training / testing data from the split inside final_fit -----
train_set <- split$ train
test_set <- split$ test
cv_folds <- split$ folds
# (c) "raw" (apparent) metrics ------------------------------------
raw <- compute_set_metrics (wf, train_set, outcome_chr)
# (d) CV metrics for each study region
cv_tbl <-
fit_resamples (
wf,
resamples = cv_folds,
metrics = metric_set (rsq, rmse)
) %>%
collect_metrics ()
cv_rmse <- cv_tbl %>% filter (.metric == "rmse" ) %>% pull (mean)
cv_r2 <- cv_tbl %>% filter (.metric == "rsq" ) %>% pull (mean)
cv_mse <- cv_rmse^ 2
# (e) train / test -------------------------------------------------
tr <- compute_set_metrics (wf, train_set, outcome_chr)
te <- compute_set_metrics (wf, test_set, outcome_chr)
# (f) optional external -------------------------------------------
ext <- if (! is.null (external_data)) {
compute_set_metrics (wf, external_data, outcome_chr)
} else tibble (r2 = NA_real_ , rmse = NA_real_ , mse = NA_real_ )
# (g) tidy one row per context ------------------------------------
tibble:: tribble (
~ context, ~ r2, ~ rmse, ~ mse,
"raw" , raw$ r2, raw$ rmse, raw$ mse,
"oob" , oob_r2, oob_rmse, oob_mse,
"cv" , cv_r2, cv_rmse, cv_mse,
"train" , tr$ r2, tr$ rmse, tr$ mse,
"test" , te$ r2, te$ rmse, te$ mse,
"external" , ext$ r2, ext$ rmse, ext$ mse
)
}
```
```{r}
#| eval: false
# Set up variables for mapping performance results:
results_atlas <- results_atlas_i
responses <- c ("Jaccard_dissim" , "log_R2_1" , "log_R2_1_per_year" )
# external_df valid for the first two responses (adjust if needed)
external_df <-
readRDS (here ("Data/output/" , "1_all_predictors_merged.rds" )) %>%
filter (samplingPeriodID == 2 ) %>%
mutate (
across (
c (samplingPeriodID, datasetID), ~ as.factor (as.character (.)))) %>%
# join response columns & rename
left_join (
readRDS (here ("Data/output/" , "2_big_table_3.rds" )) %>%
select (datasetID, samplingPeriodID, verbatimIdentification,
Jaccard_dissim, log_R3_2) %>%
rename (log_R2_1 = log_R3_2) %>%
mutate (
across (
c (samplingPeriodID, datasetID), ~ as.factor (as.character (.))))
)
# Map performance results:
all_metrics <- purrr:: imap_dfr (
results_atlas, # iterate over atlases
function (resp_list, atlas_idx) {
purrr:: imap_dfr (
resp_list, # iterate over responses inside atlas
function (obj, resp_i) {
resp_name <- responses[resp_i]
datasetID <- c ("5" , "6" , "13" , "26" )[atlas_idx]
message (print (datasetID))
ext_data <-
if (resp_name %in% c ("Jaccard_dissim" , "log_R2_1" ) &&
datasetID %in% c (5 ,13 ))
external_df else NULL
split <- list_split_data[[atlas_idx]]
extract_all_metrics (
split = split,
obj,
outcome_chr = resp_name,
external_data = ext_data
) %>%
mutate (atlas = paste0 ("atlas_" , atlas_idx),
response = resp_name)
}
)
}
)
# set vectors for naming:
contexts <- c ("raw" , "oob" , "cv" , "test" , "external" )
metrics <- c ("r2" , "mse" , "rmse" )
cols <-
map (contexts, ~ paste0 (metrics, "_" , .x)) %>%
unlist ()
all_metrics2 <-
all_metrics %>%
filter (context != "train" ) %>%
pivot_wider (
id_cols = c (response, atlas),
names_from = context,
values_from = c (r2, rmse, mse)
) %>%
mutate (
datasetID = as.factor (case_when (atlas == "atlas_1" ~ 5 ,
atlas == "atlas_2" ~ 6 ,
atlas == "atlas_3" ~ 13 ,
atlas == "atlas_4" ~ 26 ,
.default = NA ))) %>%
select (response, datasetID, all_of (cols)) %>%
arrange (response)
all_metrics2
write.csv (all_metrics2,
here ("Data/output/results/B_02_detailed_rf_results_split.csv" ))
```
```{r}
#| echo: false
all_metrics2 <-
read.csv (
here ("Data/output/results/B_02_detailed_rf_results_split.csv" ))
```
```{r}
all_metrics2 %>%
mutate (across (c (3 : 17 ), round, 2 )) %>%
kableExtra:: kable ()
```
```{r}
all_metrics2 %>%
#select(-X) %>%
pivot_longer (
cols = c (- response, - datasetID),
names_to = c (".value" , "context" ),
names_sep = "_"
) %>%
ggplot (
aes (context, r2, colour = response, group = response)
) +
geom_point (size = 3 ) +
geom_line (linewidth = 0.8 ) +
scale_y_continuous (limits = c (0 , 1 )) +
labs (
title = "Model Generalisation Across Validation Contexts" ,
y = "R²" ,
x = NULL
) +
theme_minimal ()+
facet_wrap (~ datasetID)
```
### log ratio detailed performances
### log ratio per year detailed performances
### Less detailed:
```{r}
tuned_res_df <-
list (
results_atlas_i[[1 ]][[1 ]]$ best,
results_atlas_i[[1 ]][[2 ]]$ best,
results_atlas_i[[1 ]][[3 ]]$ best,
results_atlas_i[[2 ]][[1 ]]$ best,
results_atlas_i[[2 ]][[2 ]]$ best,
results_atlas_i[[2 ]][[3 ]]$ best,
results_atlas_i[[3 ]][[1 ]]$ best,
results_atlas_i[[3 ]][[2 ]]$ best,
results_atlas_i[[3 ]][[3 ]]$ best,
results_atlas_i[[4 ]][[1 ]]$ best,
results_atlas_i[[4 ]][[2 ]]$ best,
results_atlas_i[[4 ]][[3 ]]$ best
) %>%
bind_rows () %>%
select (- .config) %>%
mutate (
response = c (
"Jaccard_dissim" ,
"log_R2_1" ,
"log_R2_1_per_year" ,
"Jaccard_dissim" ,
"log_R2_1" ,
"log_R2_1_per_year" ,
"Jaccard_dissim" ,
"log_R2_1" ,
"log_R2_1_per_year" ,
"Jaccard_dissim" ,
"log_R2_1" ,
"log_R2_1_per_year"
),
datasetID = c (5 , 5 , 5 , 6 , 6 , 6 , 13 , 13 , 13 , 26 , 26 , 26 )
)
mod_res <-
list (
results_atlas_i[[1 ]][[1 ]]$ final_fit$ .metrics,
results_atlas_i[[1 ]][[2 ]]$ final_fit$ .metrics,
results_atlas_i[[1 ]][[3 ]]$ final_fit$ .metrics,
results_atlas_i[[2 ]][[1 ]]$ final_fit$ .metrics,
results_atlas_i[[2 ]][[2 ]]$ final_fit$ .metrics,
results_atlas_i[[2 ]][[3 ]]$ final_fit$ .metrics,
results_atlas_i[[3 ]][[1 ]]$ final_fit$ .metrics,
results_atlas_i[[3 ]][[2 ]]$ final_fit$ .metrics,
results_atlas_i[[3 ]][[3 ]]$ final_fit$ .metrics,
results_atlas_i[[4 ]][[1 ]]$ final_fit$ .metrics,
results_atlas_i[[4 ]][[2 ]]$ final_fit$ .metrics,
results_atlas_i[[4 ]][[3 ]]$ final_fit$ .metrics
) %>%
bind_rows () %>%
mutate (
response = c (
"Jaccard_dissim" ,
"Jaccard_dissim" ,
"log_R2_1" ,
"log_R2_1" ,
"log_R2_1_per_year" ,
"log_R2_1_per_year" ,
"Jaccard_dissim" ,
"Jaccard_dissim" ,
"log_R2_1" ,
"log_R2_1" ,
"log_R2_1_per_year" ,
"log_R2_1_per_year" ,
"Jaccard_dissim" ,
"Jaccard_dissim" ,
"log_R2_1" ,
"log_R2_1" ,
"log_R2_1_per_year" ,
"log_R2_1_per_year" ,
"Jaccard_dissim" ,
"Jaccard_dissim" ,
"log_R2_1" ,
"log_R2_1" ,
"log_R2_1_per_year" ,
"log_R2_1_per_year"
),
datasetID = c (
5 , 5 , 5 , 5 , 5 , 5 ,
6 , 6 , 6 , 6 , 6 , 6 ,
13 , 13 , 13 , 13 , 13 , 13 ,
26 , 26 , 26 , 26 , 26 , 26
)
) %>%
pivot_wider (
id_cols = c (response, datasetID),
names_from = c (".metric" ),
values_from = c (".estimate" )
) %>%
full_join (tuned_res_df) %>%
mutate (
cv = "3x10folds" ,
tuning_method = "bayesian" ,
model_type = "ranger"
)
```
check:
```{r}
kable (mod_res)
```
```{r}
#| eval: false
write.csv2 (mod_res, here:: here ("Data/output/B_02_ranger_atlas_separate.csv" ))
```
Interactions with H-stats
```{r}
#| eval: false
interaction_res <-
replicate (length (results_atlas_i), list ())
for (atlas_i in seq_along (results_atlas_i)) {
interaction_res[[atlas_i]] <-
list ()
this_atlas <-
results_atlas_i[[atlas_i]]
for (response_i in seq_along (this_atlas)) {
# prepare data for hstats function
dd <-
this_atlas[[response_i]]
fit <-
dd$ final_fit %>%
extract_fit_parsnip ()
preds <-
dd$ explainer$ data
# run hstats
interactions <-
hstats (fit,
X = preds,
threeway_m = 5
)
# save results
interaction_res[[atlas_i]][[response_i]] <- interactions
# create the plot
interaction_plot <- plot (interactions, which = 1 : 100 ) + theme_few ()
if (atlas_i == 1 ) datasetID <- 5 else
if (atlas_i == 2 ) datasetID <- 6 else
if (atlas_i == 3 ) datasetID <- 13 else
if (atlas_i == 4 ) datasetID <- 26
ggsave (
filename =
here:: here (
"Figures/B_models/interactions/" ,
paste0 (
"B_02_" ,
responses[response_i],
"_" ,
datasetID,
"_interactions_hstats.svg"
)
),
plot = interaction_plot,
device = "svg" ,
width = 10 ,
height = 10
)
}
}
interaction_res
saveRDS (interaction_res, here:: here ("Data/output/temp/B_02_Interaction_res_atlas_split.rds" ))
```
```{r}
#| echo: false
#| eval: true
interaction_res <-
readRDS (here:: here ("Data/output/temp/B_02_Interaction_res_atlas_split.rds" ))
```
Save interactions in numbers (csv) for Documentation
```{r}
#| message: false
#| warning: false
#| error: false
res_list <- replicate (4 , list)
for (atlas_i in seq_along (interaction_res)) {
this_atlas <- interaction_res[[atlas_i]]
res_list[[atlas_i]] <- list ()
if (atlas_i == 1 ) datasetID <- 5 else
if (atlas_i == 2 ) datasetID <- 6 else
if (atlas_i == 3 ) datasetID <- 13 else
if (atlas_i == 4 ) datasetID <- 26
for (resp_i in seq_along (this_atlas)) {
if (resp_i == 1 ) this_response <- "Jaccard_dissim" else
if (resp_i == 2 ) this_response <- "log_R2_1" else
if (resp_i == 3 ) this_response <- "log_R2_1_per_year"
response_res <- this_atlas[[resp_i]]
H2_all_df <-
data.frame (
row.names = NULL ,
datasetID = datasetID,
response = this_response,
variable = "all" ,
test = "total_interaction_strength" ,
H2 = summary (response_res)$ h2[[1 ]]
)
H2_overall_df <-
data.frame (
row.names = NULL ,
datasetID = datasetID,
response = this_response,
variable = row.names (summary (response_res)$ h2_overall[[1 ]]),
test = "overall" ,
H2 = summary (response_res)$ h2_overall[[1 ]]
)
H2_pairwise_df <-
data.frame (
row.names = NULL ,
datasetID = datasetID,
response = this_response,
variable = row.names (summary (response_res)$ h2_pairwise[[1 ]]),
test = "pairwise" ,
H2 = summary (response_res)$ h2_pairwise[[1 ]]
)
H2_threeway_df <-
data.frame (
row.names = NULL ,
datasetID = datasetID,
response = this_response,
variable = row.names (summary (response_res)$ h2_threeway[[1 ]]),
test = "threeway" ,
H2 = summary (response_res)$ h2_threeway[[1 ]]
)
temp <-
full_join (H2_all_df, H2_overall_df) %>%
full_join (H2_pairwise_df) %>%
full_join (H2_threeway_df)
res_list[[atlas_i]][[resp_i]] <- temp
}
}
res_df <- bind_rows (res_list)
kable (res_df)
```
```{r}
#| eval: false
write.csv2 (res_df, here:: here ("Data/output/results/B_02_Hstats_split_data.csv" ))
```
Variable importances:
```{r}
#| eval: false
vimp_list <- replicate (4 , list)
for (atlas_i in seq_along (results_atlas_i)) {
vimp_list[[atlas_i]] <- list ()
this_atlas <- results_atlas_i[[atlas_i]]
if (atlas_i == 1 ) datasetID <- 5 else
if (atlas_i == 2 ) datasetID <- 6 else
if (atlas_i == 3 ) datasetID <- 13 else
if (atlas_i == 4 ) datasetID <- 26
for (i in seq_along (this_atlas)) {
if (i == 1 ) this_response <- "Jaccard_dissim" else
if (i == 2 ) this_response <- "log_R2_1" else
if (i == 3 ) this_response <- "log_R2_1_per_year"
fit <-
this_atlas[[i]]$ final_fit %>%
extract_fit_engine ()
vimp <-
fit$ variable.importance
vimp_list[[atlas_i]][[i]] <- data.frame (
datasetID = datasetID,
variable = names (vimp),
importance = vimp,
row.names = NULL ,
response = this_response,
importance_mode = fit$ importance.mode
)
}
}
imp_df <-
vimp_list %>%
bind_rows ()
write.csv2 (imp_df, here:: here ("Data/output/results/B_02_ranger_vimp_split_data.csv" ))
```