B_02_RandomForest_split_data.R

B - 02: RandomForest (split models)

Libraries

Code
suppressPackageStartupMessages({
  library(here)

  source(here::here("Code/00_Configuration.R"))

  package_list <-
    c(
      package_list,
      "ranger",
      "tidymodels",
      "caret",
      "skimr",
      "DALEXtra",
      "DALEX",
      "ggthemes",
      "hstats"
    )

  lapply(package_list, require, character = TRUE)

  tidymodels_prefer()
})


rm(list = ls())

set.seed(123)

Read data

Code
dta <-
  readRDS(
    here::here(
      "Data/output/1_all_predictors_merged.rds"
    )
  ) %>%
  filter(samplingPeriodID == 1)

Set variable vectors for hypotheses

Code
sp_id <-
  c(
    "verbatimIdentification",
    "scientificName"
  )

H1 <-
  c(
    "Mass",
    "GlobRangeSize_km2",
    "Migration",
    "Habitat_5",
    "Generalism",
    "Threatened",
    "pd"
  )

H2 <-
  c(
    "D_AOO_a",
    "mean_lnLac",
    "AOO",
    "joincount_delta",
    "circNorm",
    "minDist_toBorder_centr"
  )

H3 <-
  c("datasetID")

predictors <-
  c(H1, H2, H3)

responses <-
  c(
    "Jaccard_dissim",
    "log_R2_1",
    "log_R2_1_per_year" # yearly rate
  )

Make model data

Reduce to model variables

Code
dta_new <-
  dta %>%
  select(all_of(c(sp_id, responses, H3, H1, H2))) %>%
  ungroup()

check correct coding

Code
str(dta_new) # 14 predictors
'data.frame':   1054 obs. of  19 variables:
 $ verbatimIdentification: chr  "Ciconia nigra" "Carduelis carduelis" "Passer montanus" "Sylvia borin" ...
 $ scientificName        : chr  "Ciconia nigra" "Carduelis carduelis" "Passer montanus" "Sylvia borin" ...
 $ Jaccard_dissim        : num  0.369 0.024 0.063 0.14 0.686 0.53 0.117 0.18 0.315 0.011 ...
 $ log_R2_1              : num  0.231 0.009 -0.017 0.018 0.177 -0.275 -0.001 0.043 -0.042 0.001 ...
 $ log_R2_1_per_year     : num  0.012 0 -0.001 0.001 0.009 -0.014 0 0.002 -0.002 0 ...
 $ datasetID             : Factor w/ 4 levels "5","6","13","26": 1 1 1 1 1 1 1 1 1 1 ...
 $ Mass                  : num  2926 16 21.4 18.2 10.8 ...
 $ GlobRangeSize_km2     : num  18362593 11851257 39064405 9628175 5995144 ...
 $ Migration             : Factor w/ 4 levels "1","2","3","NA": 3 1 1 3 3 3 1 1 1 3 ...
 $ Habitat_5             : Factor w/ 5 levels "closed","freshwater",..: 1 1 1 1 1 2 1 1 2 1 ...
 $ Generalism            : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
 $ Threatened            : Factor w/ 2 levels "0","1": 1 1 1 1 1 2 1 1 1 1 ...
 $ pd                    : num  25.28 8.51 5.72 7.69 3.97 ...
 $ D_AOO_a               : num  1.63 1.99 1.97 1.86 1.19 ...
 $ mean_lnLac            : num  0.14 0.0906 0.0949 0.1005 0.3207 ...
 $ AOO                   : num  48682 77329 76027 71762 18278 ...
 $ joincount_delta       : num  0.2551 0.0238 0.0493 0.0661 0.4199 ...
 $ circNorm              : num  56.23 4.01 5.5 12.37 81.02 ...
 $ minDist_toBorder_centr: num  83214 84680 83847 86413 70037 ...

split by dataset

Code
dta_atlas_split <-
  dta_new %>%
  group_split(datasetID)

Set model specifications & indicate tuning

Code
set.seed(123)
mod_spec <-
  rand_forest(
    mtry = tune(),
    trees = tune(),
    min_n = tune()
  ) %>%
  set_engine("ranger",
    importance = "permutation",
    respect.unordered.factors = TRUE
  ) %>%
  set_mode("regression")

rf_params <-
  parameters(
    mtry(range = c(2L, 10L)),
    min_n(range = c(5L, 15L)),
    trees(range = c(1000L, 5000L))
  )

Create empty lists for results

Code
tuned_res <-
  replicate(length(dta_atlas_split), list())

predictions <-
  replicate(length(dta_atlas_split), list())

res <-
  replicate(length(dta_atlas_split), list())

list_split_data <-
  replicate(length(dta_atlas_split), list())

results_response <-
  replicate(3, list())

results_atlas_i <-
  replicate(3, list())

START LOOP

– Level 1: for each atlas (1-4) – Level 2: for each response (1-3)

Code
list_split_data <- list()
for (atlas_i in seq_along(dta_atlas_split)) {
  this_atlas <-
    dta_atlas_split[[atlas_i]] %>%
    select(-datasetID)

  set.seed(123)

  split_info <-
    initial_split(this_atlas, prop = 0.8)

  train <-
    training(split_info)

  test <-
    testing(split_info)

  # Cross validation folds on training data
  set.seed(123)
  folds <-
    vfold_cv(train, v = 10, repeats = 3)
  
  
    list_split_data[[atlas_i]] <- list(
    split_info = split_info,
    train = train,
    test = test,
    folds = folds
  )
}
Code
for (atlas_i in seq_along(dta_atlas_split)) {
  this_atlas <-
    dta_atlas_split[[atlas_i]] %>%
    select(-datasetID)

  # split data into training and testing (80/20)
  set.seed(123)

  split_info <-
    initial_split(this_atlas, prop = 0.8)

  train <-
    training(split_info)

  test <-
    testing(split_info)

  # Cross validation folds on training data
  set.seed(123)
  folds <-
    vfold_cv(train, v = 10, repeats = 3)


  # Save split data to list for later reference
  list_split_data[[atlas_i]] <-
    list(
      split_info = split_info,
      train = train,
      test = test,
      folds = folds
    )

  # Loop through responses
  for (resp_i in seq_along(responses)) {
    resp <-
      responses[resp_i]

    if (responses[resp_i] == "Jaccard_dissim") {
      mod_rec <-
        recipe(Jaccard_dissim ~ ., data = train) %>%
        update_role(all_of(sp_id),
          new_role = "speciesID",
          old_role = "predictor"
        ) %>%
        update_role(all_of(responses[-resp_i]),
          old_role = "predictor",
          new_role = "alt_resp"
        )
    } else if (responses[resp_i] == "log_R2_1") {
      mod_rec <-
        recipe(log_R2_1 ~ ., data = train) %>%
        update_role(all_of(sp_id),
          new_role = "speciesID",
          old_role = "predictor"
        ) %>%
        update_role(all_of(responses[-resp_i]),
          old_role = "predictor",
          new_role = "alt_resp"
        )
    } else if (responses[resp_i] == "log_R2_1_per_year") {
      mod_rec <-
        recipe(log_R2_1_per_year ~ ., data = train) %>%
        update_role(all_of(sp_id),
          new_role = "speciesID",
          old_role = "predictor"
        ) %>%
        update_role(all_of(responses[-resp_i]),
          old_role = "predictor",
          new_role = "alt_resp"
        )
    }

    # Model workflow
    set.seed(123)
    mod_wf <- workflow() %>%
      add_recipe(mod_rec) %>%
      add_model(mod_spec)

    # Bayesian hyperparameter tuning
    tictoc::tic()

    set.seed(123)
    tuned_bayes <-
      tune_bayes(
        mod_wf,
        resamples = folds,
        param_info = rf_params,
        initial = 5,
        iter = 50,
        control = control_bayes(
          verbose = T,
          no_improve = 10,
          seed = 123
        )
      )
    tictoc::toc()

    # Best parameters
    best <-
      tuned_bayes %>%
      select_best(metric = "rmse")

    # Final model fit
    final_wf <-
      mod_wf %>%
      finalize_workflow(best)

    # performs the model evaluation on the testing data directly
    final_fit <-
      final_wf %>%
      last_fit(split_info)

    # Predictions on test data
    p <-
      predict(
        final_fit
        %>% tune::extract_workflow(),
        new_data = test
      ) %>%
      bind_cols(test %>%
        select(
          responses[resp_i],
          verbatimIdentification,
          scientificName
        )) %>%
      setNames(
        c(
          ".pred",
          responses[resp_i],
          "verbatimIdentification",
          "scientificName"
        )
      ) %>%
      mutate(
        resid = .[[responses[resp_i]]] - .pred
      )

    # Explainable AI (xAI)
    fit <-
      final_fit %>%
      extract_fit_parsnip()

    explainer <-
      DALEXtra::explain_tidymodels(
        label = paste0(responses[resp_i], "_", atlas_i),
        fit,
        data = train %>%
          select(!any_of(c(
            responses,
            "verbatimIdentification",
            "scientificName"
          ))),
        y = train %>%
          pull(responses[resp_i])
      )

    # save results to list
    results_response[[resp_i]] <-
      list(
        tuned_bayes = tuned_bayes,
        best = best,
        final_fit = final_fit,
        predictions = p,
        explainer = explainer,
        pd = model_profile(explainer,
          type = "partial",
          N = NULL,
          variables = explainer$data %>% names()
        ),
        md = model_diagnostics(explainer),
        mp = model_performance(explainer),
        vi = model_parts(explainer)
      )
  }


  results_atlas_i[[atlas_i]] <-
    results_response
}

saveRDS(results_atlas_i, here::here("Data/output/B_models/B_02_rf_res_atlas_split.rds"))

xAI plots

Code
plot_list <- replicate(2, list())

for (atlas_i in seq_along(results_atlas_i)) {
  plot_list[[atlas_i]] <- list()

  for (resp_i in seq_along(results_atlas_i[[atlas_i]])) {
    this_response <- responses[resp_i]

    if (atlas_i == 1) datasetID <- 5 else 
      if (atlas_i == 2) datasetID <- 6 else 
        if (atlas_i == 3) datasetID <- 13 else 
          if (atlas_i == 4) datasetID <- 26

    subset <- results_atlas_i[[atlas_i]][[resp_i]]

    # List with all plots for saving as pdf
    plot_list[[atlas_i]][[resp_i]] <-
      list(
        pd =
          plot(subset$pd) +
            ggthemes::theme_few(),
        mp =
          plot(subset$mp) +
            ggthemes::theme_few(),
        md =
          plot(subset$md %>%
            mutate(label = paste0(datasetID, "_", this_response))) +
            ggthemes::theme_few(),
        vi = 
          plot(subset$vi %>%
            mutate(label = paste0(datasetID, "_", this_response))) +
            ggthemes::theme_few()
      )
  }
}

pdf(
  file = paste0(here::here("Figures/B_models/xAI/B_02_xAI_separate_per_atlas.pdf")),
  onefile = TRUE
)
plot_list
dev.off()

Save results to csv

jaccard detailed performances

Code
# Packages
library(tidymodels)   # rsample, workflows, yardstick, tune, etc.
library(ranger)       # for OOB stats
library(dplyr)
library(purrr)
library(tibble)
library(here)

# mse helper works across yardstick versions
mse_vec <- function(truth, estimate, na_rm = TRUE, ...) {
  mean((truth - estimate)^2, na.rm = na_rm)
}

# helper to compute R² / RMSE / MSE on *any* data frame
compute_set_metrics <- function(wflow, data, outcome_chr) {
  preds <- predict(wflow, data) %>% bind_cols(data)
  truth <- preds[[outcome_chr]]
  est   <- preds$.pred
  tibble(
    r2   = yardstick::rsq_vec (truth, est),
    rmse = yardstick::rmse_vec(truth, est),
    mse  = mse_vec            (truth, est)
  )
}

# summarize function
extract_all_metrics <- function(split, obj, outcome_chr, external_data = NULL) {

  # (a) get fitted workflow & OOB stats -----------------------------
  wf <- tune::extract_workflow(obj$final_fit)
  rf_fit <- extract_fit_parsnip(wf)$fit        # ranger model
  oob_mse  <- rf_fit$prediction.error
  oob_r2   <- rf_fit$r.squared
  oob_rmse <- sqrt(oob_mse)
  
  # (b) training / testing data from the split inside final_fit -----
  train_set <- split$train
  test_set  <- split$test
  cv_folds <- split$folds
  
  # (c) "raw" (apparent) metrics ------------------------------------
  raw <- compute_set_metrics(wf, train_set, outcome_chr)
  
  # (d) CV metrics for each study region
  cv_tbl <- 
    fit_resamples(
      wf,
      resamples = cv_folds,
      metrics = metric_set(rsq, rmse)
    ) %>%
    collect_metrics()
  
  cv_rmse <- cv_tbl %>% filter(.metric == "rmse") %>% pull(mean)
  cv_r2   <- cv_tbl %>% filter(.metric == "rsq")  %>% pull(mean)
  cv_mse  <- cv_rmse^2
  
  # (e) train / test -------------------------------------------------
  tr <- compute_set_metrics(wf, train_set, outcome_chr)
  te <- compute_set_metrics(wf, test_set,  outcome_chr)
  
  # (f) optional external -------------------------------------------
  ext <- if (!is.null(external_data)) {
           compute_set_metrics(wf, external_data, outcome_chr)
         } else tibble(r2 = NA_real_, rmse = NA_real_, mse = NA_real_)
  
  # (g) tidy one row per context ------------------------------------
  tibble::tribble(
    ~context,   ~r2,            ~rmse,        ~mse,
    "raw",      raw$r2,         raw$rmse,     raw$mse,
    "oob",      oob_r2,         oob_rmse,     oob_mse,
    "cv",       cv_r2,          cv_rmse,      cv_mse,
    "train",    tr$r2,          tr$rmse,      tr$mse,
    "test",     te$r2,          te$rmse,      te$mse,
    "external", ext$r2,         ext$rmse,     ext$mse
  )
}
Code
# Set up variables for mapping performance results:
results_atlas <- results_atlas_i

responses <- c("Jaccard_dissim", "log_R2_1", "log_R2_1_per_year")

# external_df valid for the first two responses (adjust if needed)
external_df <-
  readRDS(here("Data/output/", "1_all_predictors_merged.rds")) %>%
  filter(samplingPeriodID == 2) %>%
  mutate(
    across(
      c(samplingPeriodID, datasetID), ~ as.factor(as.character(.)))) %>%
  # join response columns & rename
  left_join(
    readRDS(here("Data/output/", "2_big_table_3.rds")) %>%
      select(datasetID, samplingPeriodID, verbatimIdentification,
             Jaccard_dissim, log_R3_2) %>%
      rename(log_R2_1 = log_R3_2) %>%
      mutate(
        across(
          c(samplingPeriodID, datasetID), ~ as.factor(as.character(.))))
  )

# Map performance results:
all_metrics <- purrr::imap_dfr(
  results_atlas,                      # iterate over atlases
  function(resp_list, atlas_idx) {
    purrr::imap_dfr(
      resp_list,                      # iterate over responses inside atlas
      function(obj, resp_i) {
        resp_name <- responses[resp_i]
        datasetID <- c("5", "6", "13", "26")[atlas_idx]
        message(print(datasetID))
        ext_data  <- 
          if (resp_name %in% c("Jaccard_dissim", "log_R2_1") && 
              datasetID %in% c(5,13))
            external_df else NULL
        split <- list_split_data[[atlas_idx]]
        
        extract_all_metrics(
          split = split,
          obj,
          outcome_chr  = resp_name,
          external_data = ext_data
        ) %>%
        mutate(atlas    = paste0("atlas_", atlas_idx),
               response = resp_name)
      }
    )
  }
)

# set vectors for naming:
contexts <- c("raw", "oob", "cv", "test", "external")
metrics <- c("r2", "mse", "rmse")
cols <-
  map(contexts, ~ paste0(metrics, "_", .x)) %>%
  unlist()


all_metrics2 <-
  all_metrics %>%
  filter(context != "train") %>%
  pivot_wider(
    id_cols = c(response, atlas),
    names_from = context,
    values_from = c(r2, rmse, mse)
  ) %>%  
  mutate(
    datasetID = as.factor(case_when(atlas == "atlas_1" ~ 5,
                                    atlas == "atlas_2" ~ 6,
                                    atlas == "atlas_3" ~ 13,
                                    atlas == "atlas_4" ~ 26,
                                    .default  = NA))) %>%
  select(response, datasetID, all_of(cols)) %>%
  arrange(response)


all_metrics2
write.csv(all_metrics2,
          here("Data/output/results/B_02_detailed_rf_results_split.csv"))
Code
all_metrics2 %>% 
  mutate(across(c(3:17), round, 2)) %>% 
  kableExtra::kable()
Warning: There was 1 warning in `mutate()`.
ℹ In argument: `across(c(3:17), round, 2)`.
Caused by warning:
! The `...` argument of `across()` is deprecated as of dplyr 1.1.0.
Supply arguments directly to `.fns` through an anonymous function instead.

  # Previously
  across(a:b, mean, na.rm = TRUE)

  # Now
  across(a:b, \(x) mean(x, na.rm = TRUE))
X response datasetID r2_raw mse_raw rmse_raw r2_oob mse_oob rmse_oob r2_cv mse_cv rmse_cv r2_test mse_test rmse_test r2_external mse_external rmse_external
1 Jaccard_dissim 5 0.98 0.00 0.05 0.89 0.01 0.11 0.88 0.01 0.10 0.90 0.01 0.10 0.63 0.03 0.1781903
2 Jaccard_dissim 6 0.98 0.00 0.05 0.85 0.01 0.11 0.86 0.01 0.10 0.85 0.01 0.11 NA NA NA
3 Jaccard_dissim 13 0.95 0.00 0.05 0.70 0.02 0.12 0.70 0.01 0.12 0.66 0.02 0.15 0.66 0.04 0.2003646
4 Jaccard_dissim 26 0.96 0.00 0.05 0.72 0.01 0.11 0.73 0.01 0.11 0.72 0.01 0.12 NA NA NA
5 log_R2_1 5 0.76 0.07 0.27 0.10 0.19 0.44 0.26 0.18 0.42 0.07 0.09 0.30 0.02 0.46 0.6755954
6 log_R2_1 6 0.74 0.15 0.38 0.15 0.38 0.62 0.21 0.37 0.61 0.02 0.52 0.72 NA NA NA
7 log_R2_1 13 0.93 0.11 0.32 0.58 0.51 0.71 0.49 0.47 0.68 0.28 0.52 0.72 0.01 0.74 0.8582190
8 log_R2_1 26 0.91 0.05 0.23 0.06 0.20 0.45 0.14 0.18 0.43 0.22 0.20 0.45 NA NA NA
9 log_R2_1_per_year 5 0.76 0.00 0.01 0.09 0.00 0.02 0.26 0.00 0.02 0.07 0.00 0.02 NA NA NA
10 log_R2_1_per_year 6 0.73 0.00 0.02 0.15 0.00 0.02 0.20 0.00 0.02 0.02 0.00 0.03 NA NA NA
11 log_R2_1_per_year 13 0.93 0.00 0.01 0.58 0.00 0.02 0.49 0.00 0.02 0.29 0.00 0.02 NA NA NA
12 log_R2_1_per_year 26 0.91 0.00 0.00 0.06 0.00 0.01 0.14 0.00 0.01 0.22 0.00 0.01 NA NA NA
Code
all_metrics2 %>%
  #select(-X) %>%
  pivot_longer(
    cols = c(-response, -datasetID),
    names_to = c(".value", "context"),
    names_sep = "_"
  ) %>%

ggplot(
  aes(context, r2, colour = response, group = response)
) +
  geom_point(size = 3) +
  geom_line(linewidth = 0.8) +
  scale_y_continuous(limits = c(0, 1)) +
  labs(
    title = "Model Generalisation Across Validation Contexts",
    y = "R²",
    x = NULL
  ) +
  theme_minimal()+
  facet_wrap(~datasetID)
Warning: Expected 2 pieces. Missing pieces filled with `NA` in 1 rows [1].
Warning: Removed 20 rows containing missing values or values outside the scale range
(`geom_point()`).
Warning: Removed 3 rows containing missing values or values outside the scale range
(`geom_line()`).

log ratio detailed performances

log ratio per year detailed performances

Less detailed:

Code
tuned_res_df <-
  list(
    results_atlas_i[[1]][[1]]$best,
    results_atlas_i[[1]][[2]]$best,
    results_atlas_i[[1]][[3]]$best,
    results_atlas_i[[2]][[1]]$best,
    results_atlas_i[[2]][[2]]$best,
    results_atlas_i[[2]][[3]]$best,
    results_atlas_i[[3]][[1]]$best,
    results_atlas_i[[3]][[2]]$best,
    results_atlas_i[[3]][[3]]$best,
    results_atlas_i[[4]][[1]]$best,
    results_atlas_i[[4]][[2]]$best,
    results_atlas_i[[4]][[3]]$best
  ) %>%
  bind_rows() %>%
  select(-.config) %>%
  mutate(
    response = c(
      "Jaccard_dissim",
      "log_R2_1",
      "log_R2_1_per_year",
      "Jaccard_dissim",
      "log_R2_1",
      "log_R2_1_per_year",
      "Jaccard_dissim",
      "log_R2_1",
      "log_R2_1_per_year",
      "Jaccard_dissim",
      "log_R2_1",
      "log_R2_1_per_year"
    ),
    datasetID = c(5, 5, 5, 6, 6, 6, 13, 13, 13, 26, 26, 26)
  )


mod_res <-
  list(
    results_atlas_i[[1]][[1]]$final_fit$.metrics,
    results_atlas_i[[1]][[2]]$final_fit$.metrics,
    results_atlas_i[[1]][[3]]$final_fit$.metrics,
    results_atlas_i[[2]][[1]]$final_fit$.metrics,
    results_atlas_i[[2]][[2]]$final_fit$.metrics,
    results_atlas_i[[2]][[3]]$final_fit$.metrics,
    results_atlas_i[[3]][[1]]$final_fit$.metrics,
    results_atlas_i[[3]][[2]]$final_fit$.metrics,
    results_atlas_i[[3]][[3]]$final_fit$.metrics,
    results_atlas_i[[4]][[1]]$final_fit$.metrics,
    results_atlas_i[[4]][[2]]$final_fit$.metrics,
    results_atlas_i[[4]][[3]]$final_fit$.metrics
  ) %>%
  bind_rows() %>%
  mutate(
    response = c(
      "Jaccard_dissim",
      "Jaccard_dissim",
      "log_R2_1",
      "log_R2_1",
      "log_R2_1_per_year",
      "log_R2_1_per_year",
      "Jaccard_dissim",
      "Jaccard_dissim",
      "log_R2_1",
      "log_R2_1",
      "log_R2_1_per_year",
      "log_R2_1_per_year",
      "Jaccard_dissim",
      "Jaccard_dissim",
      "log_R2_1",
      "log_R2_1",
      "log_R2_1_per_year",
      "log_R2_1_per_year",
      "Jaccard_dissim",
      "Jaccard_dissim",
      "log_R2_1",
      "log_R2_1",
      "log_R2_1_per_year",
      "log_R2_1_per_year"
    ),
    datasetID = c(
      5, 5, 5, 5, 5, 5,
      6, 6, 6, 6, 6, 6,
      13, 13, 13, 13, 13, 13,
      26, 26, 26, 26, 26, 26
    )
  ) %>%
  pivot_wider(
    id_cols = c(response, datasetID),
    names_from = c(".metric"),
    values_from = c(".estimate")
  ) %>%
  full_join(tuned_res_df) %>%
  mutate(
    cv = "3x10folds",
    tuning_method = "bayesian",
    model_type = "ranger"
  )
Joining with `by = join_by(response, datasetID)`

check:

Code
kable(mod_res)
response datasetID rmse rsq mtry min_n trees cv tuning_method model_type
Jaccard_dissim 5 0.0988881 0.9012123 5 5 1420 3x10folds bayesian ranger
log_R2_1 5 0.2956381 0.0722593 2 15 4967 3x10folds bayesian ranger
log_R2_1_per_year 5 0.0155612 0.0729791 2 15 4688 3x10folds bayesian ranger
Jaccard_dissim 6 0.1075795 0.8504765 9 5 1804 3x10folds bayesian ranger
log_R2_1 6 0.7197388 0.0218604 5 13 2968 3x10folds bayesian ranger
log_R2_1_per_year 6 0.0271482 0.0223793 4 15 1173 3x10folds bayesian ranger
Jaccard_dissim 13 0.1477504 0.6606242 6 5 3005 3x10folds bayesian ranger
log_R2_1 13 0.7187413 0.2812205 10 5 1118 3x10folds bayesian ranger
log_R2_1_per_year 13 0.0246676 0.2917952 10 5 1945 3x10folds bayesian ranger
Jaccard_dissim 26 0.1182506 0.7201068 6 5 4976 3x10folds bayesian ranger
log_R2_1 26 0.4450751 0.2248997 2 6 1008 3x10folds bayesian ranger
log_R2_1_per_year 26 0.0096819 0.2243523 2 6 4921 3x10folds bayesian ranger
Code
write.csv2(mod_res, here::here("Data/output/B_02_ranger_atlas_separate.csv"))

Interactions with H-stats

Code
interaction_res <-
  replicate(length(results_atlas_i), list())

for (atlas_i in seq_along(results_atlas_i)) {
  interaction_res[[atlas_i]] <-
    list()

  this_atlas <-
    results_atlas_i[[atlas_i]]

  for (response_i in seq_along(this_atlas)) {
    # prepare data for hstats function
    dd <-
      this_atlas[[response_i]]

    fit <-
      dd$final_fit %>%
      extract_fit_parsnip()

    preds <-
      dd$explainer$data

    # run hstats
    interactions <-
      hstats(fit,
        X = preds,
        threeway_m = 5
      )

    # save results
    interaction_res[[atlas_i]][[response_i]] <- interactions

    # create the plot
    interaction_plot <- plot(interactions, which = 1:100) + theme_few()

    if (atlas_i == 1) datasetID <- 5 else 
      if (atlas_i == 2) datasetID <- 6 else 
        if (atlas_i == 3) datasetID <- 13 else 
          if (atlas_i == 4) datasetID <- 26

    ggsave(
      filename =
        here::here(
          "Figures/B_models/interactions/",
          paste0(
            "B_02_",
            responses[response_i],
            "_",
            datasetID,
            "_interactions_hstats.svg"
          )
        ),
      plot = interaction_plot,
      device = "svg",
      width = 10,
      height = 10
    )
  }
}

interaction_res
saveRDS(interaction_res, here::here("Data/output/temp/B_02_Interaction_res_atlas_split.rds"))

Save interactions in numbers (csv) for Documentation

Code
res_list <- replicate(4, list)

for (atlas_i in seq_along(interaction_res)) {
  this_atlas <- interaction_res[[atlas_i]]
  res_list[[atlas_i]] <- list()

  if (atlas_i == 1) datasetID <- 5 else 
    if (atlas_i == 2) datasetID <- 6 else 
      if (atlas_i == 3) datasetID <- 13 else 
        if (atlas_i == 4) datasetID <- 26

  for (resp_i in seq_along(this_atlas)) {
    if (resp_i == 1) this_response <- "Jaccard_dissim" else 
      if (resp_i == 2) this_response <- "log_R2_1" else 
        if (resp_i == 3) this_response <- "log_R2_1_per_year"

    response_res <- this_atlas[[resp_i]]

    H2_all_df <-
      data.frame(
        row.names = NULL,
        datasetID = datasetID,
        response = this_response,
        variable = "all",
        test = "total_interaction_strength",
        H2 = summary(response_res)$h2[[1]]
      )

    H2_overall_df <-
      data.frame(
        row.names = NULL,
        datasetID = datasetID,
        response = this_response,
        variable = row.names(summary(response_res)$h2_overall[[1]]),
        test = "overall",
        H2 = summary(response_res)$h2_overall[[1]]
      )

    H2_pairwise_df <-
      data.frame(
        row.names = NULL,
        datasetID = datasetID,
        response = this_response,
        variable = row.names(summary(response_res)$h2_pairwise[[1]]),
        test = "pairwise",
        H2 = summary(response_res)$h2_pairwise[[1]]
      )

    H2_threeway_df <-
      data.frame(
        row.names = NULL,
        datasetID = datasetID,
        response = this_response,
        variable = row.names(summary(response_res)$h2_threeway[[1]]),
        test = "threeway",
        H2 = summary(response_res)$h2_threeway[[1]]
      )

    temp <-
      full_join(H2_all_df, H2_overall_df) %>%
      full_join(H2_pairwise_df) %>%
      full_join(H2_threeway_df)

    res_list[[atlas_i]][[resp_i]] <- temp
  }
}

res_df <- bind_rows(res_list)

kable(res_df)
datasetID response variable test H2
5 Jaccard_dissim all total_interaction_strength 0.0598871
5 Jaccard_dissim AOO overall 0.0138498
5 Jaccard_dissim D_AOO_a overall 0.0136075
5 Jaccard_dissim joincount_delta overall 0.0068304
5 Jaccard_dissim circNorm overall 0.0047233
5 Jaccard_dissim mean_lnLac overall 0.0035305
5 Jaccard_dissim Migration overall 0.0029290
5 Jaccard_dissim minDist_toBorder_centr overall 0.0025918
5 Jaccard_dissim pd overall 0.0019147
5 Jaccard_dissim Mass overall 0.0016965
5 Jaccard_dissim Habitat_5 overall 0.0016391
5 Jaccard_dissim GlobRangeSize_km2 overall 0.0005129
5 Jaccard_dissim Threatened overall 0.0000108
5 Jaccard_dissim Generalism overall 0.0000064
5 Jaccard_dissim mean_lnLac:joincount_delta pairwise 0.0136793
5 Jaccard_dissim joincount_delta:circNorm pairwise 0.0131594
5 Jaccard_dissim mean_lnLac:circNorm pairwise 0.0058165
5 Jaccard_dissim D_AOO_a:circNorm pairwise 0.0055413
5 Jaccard_dissim AOO:circNorm pairwise 0.0040775
5 Jaccard_dissim D_AOO_a:joincount_delta pairwise 0.0033556
5 Jaccard_dissim D_AOO_a:AOO pairwise 0.0032179
5 Jaccard_dissim AOO:joincount_delta pairwise 0.0029552
5 Jaccard_dissim mean_lnLac:AOO pairwise 0.0010200
5 Jaccard_dissim D_AOO_a:mean_lnLac pairwise 0.0006726
5 Jaccard_dissim AOO:joincount_delta:circNorm threeway 0.0003732
5 Jaccard_dissim D_AOO_a:joincount_delta:circNorm threeway 0.0002239
5 Jaccard_dissim mean_lnLac:joincount_delta:circNorm threeway 0.0000766
5 Jaccard_dissim D_AOO_a:mean_lnLac:AOO threeway 0.0000740
5 Jaccard_dissim D_AOO_a:AOO:circNorm threeway 0.0000626
5 Jaccard_dissim D_AOO_a:AOO:joincount_delta threeway 0.0000494
5 Jaccard_dissim D_AOO_a:mean_lnLac:circNorm threeway 0.0000397
5 Jaccard_dissim mean_lnLac:AOO:circNorm threeway 0.0000365
5 Jaccard_dissim mean_lnLac:AOO:joincount_delta threeway 0.0000321
5 Jaccard_dissim D_AOO_a:mean_lnLac:joincount_delta threeway 0.0000198
5 log_R2_1 all total_interaction_strength 0.1556536
5 log_R2_1 minDist_toBorder_centr overall 0.0238857
5 log_R2_1 mean_lnLac overall 0.0162175
5 log_R2_1 AOO overall 0.0146771
5 log_R2_1 D_AOO_a overall 0.0123982
5 log_R2_1 Migration overall 0.0078661
5 log_R2_1 Habitat_5 overall 0.0048621
5 log_R2_1 circNorm overall 0.0047958
5 log_R2_1 GlobRangeSize_km2 overall 0.0046309
5 log_R2_1 pd overall 0.0045913
5 log_R2_1 Mass overall 0.0045726
5 log_R2_1 joincount_delta overall 0.0033420
5 log_R2_1 Threatened overall 0.0009752
5 log_R2_1 Generalism overall 0.0003754
5 log_R2_1 mean_lnLac:minDist_toBorder_centr pairwise 0.0264172
5 log_R2_1 Migration:minDist_toBorder_centr pairwise 0.0145257
5 log_R2_1 D_AOO_a:minDist_toBorder_centr pairwise 0.0130751
5 log_R2_1 AOO:minDist_toBorder_centr pairwise 0.0130093
5 log_R2_1 Migration:mean_lnLac pairwise 0.0064310
5 log_R2_1 Migration:AOO pairwise 0.0034315
5 log_R2_1 Migration:D_AOO_a pairwise 0.0032363
5 log_R2_1 D_AOO_a:mean_lnLac pairwise 0.0022992
5 log_R2_1 mean_lnLac:AOO pairwise 0.0013320
5 log_R2_1 D_AOO_a:AOO pairwise 0.0011658
5 log_R2_1 D_AOO_a:mean_lnLac:minDist_toBorder_centr threeway 0.0002648
5 log_R2_1 D_AOO_a:mean_lnLac:AOO threeway 0.0000988
5 log_R2_1 Migration:mean_lnLac:AOO threeway 0.0000669
5 log_R2_1 D_AOO_a:AOO:minDist_toBorder_centr threeway 0.0000655
5 log_R2_1 Migration:D_AOO_a:minDist_toBorder_centr threeway 0.0000532
5 log_R2_1 Migration:AOO:minDist_toBorder_centr threeway 0.0000463
5 log_R2_1 Migration:mean_lnLac:minDist_toBorder_centr threeway 0.0000290
5 log_R2_1 Migration:D_AOO_a:AOO threeway 0.0000230
5 log_R2_1 Migration:D_AOO_a:mean_lnLac threeway 0.0000226
5 log_R2_1 mean_lnLac:AOO:minDist_toBorder_centr threeway 0.0000197
5 log_R2_1_per_year all total_interaction_strength 0.1565922
5 log_R2_1_per_year minDist_toBorder_centr overall 0.0235293
5 log_R2_1_per_year mean_lnLac overall 0.0168624
5 log_R2_1_per_year AOO overall 0.0146081
5 log_R2_1_per_year D_AOO_a overall 0.0126080
5 log_R2_1_per_year Migration overall 0.0081611
5 log_R2_1_per_year Habitat_5 overall 0.0049102
5 log_R2_1_per_year circNorm overall 0.0048329
5 log_R2_1_per_year GlobRangeSize_km2 overall 0.0047544
5 log_R2_1_per_year pd overall 0.0045985
5 log_R2_1_per_year Mass overall 0.0044342
5 log_R2_1_per_year joincount_delta overall 0.0033666
5 log_R2_1_per_year Threatened overall 0.0010092
5 log_R2_1_per_year Generalism overall 0.0003760
5 log_R2_1_per_year mean_lnLac:minDist_toBorder_centr pairwise 0.0270227
5 log_R2_1_per_year Migration:minDist_toBorder_centr pairwise 0.0160174
5 log_R2_1_per_year D_AOO_a:minDist_toBorder_centr pairwise 0.0131184
5 log_R2_1_per_year AOO:minDist_toBorder_centr pairwise 0.0115159
5 log_R2_1_per_year Migration:mean_lnLac pairwise 0.0064058
5 log_R2_1_per_year Migration:AOO pairwise 0.0037614
5 log_R2_1_per_year Migration:D_AOO_a pairwise 0.0029700
5 log_R2_1_per_year D_AOO_a:mean_lnLac pairwise 0.0025627
5 log_R2_1_per_year mean_lnLac:AOO pairwise 0.0012379
5 log_R2_1_per_year D_AOO_a:AOO pairwise 0.0009550
5 log_R2_1_per_year D_AOO_a:mean_lnLac:minDist_toBorder_centr threeway 0.0002724
5 log_R2_1_per_year D_AOO_a:mean_lnLac:AOO threeway 0.0000721
5 log_R2_1_per_year Migration:mean_lnLac:AOO threeway 0.0000701
5 log_R2_1_per_year Migration:D_AOO_a:minDist_toBorder_centr threeway 0.0000572
5 log_R2_1_per_year Migration:AOO:minDist_toBorder_centr threeway 0.0000455
5 log_R2_1_per_year D_AOO_a:AOO:minDist_toBorder_centr threeway 0.0000453
5 log_R2_1_per_year Migration:mean_lnLac:minDist_toBorder_centr threeway 0.0000283
5 log_R2_1_per_year Migration:D_AOO_a:mean_lnLac threeway 0.0000262
5 log_R2_1_per_year Migration:D_AOO_a:AOO threeway 0.0000193
5 log_R2_1_per_year mean_lnLac:AOO:minDist_toBorder_centr threeway 0.0000185
6 Jaccard_dissim all total_interaction_strength 0.1010645
6 Jaccard_dissim D_AOO_a overall 0.0297196
6 Jaccard_dissim circNorm overall 0.0295028
6 Jaccard_dissim AOO overall 0.0220913
6 Jaccard_dissim joincount_delta overall 0.0207221
6 Jaccard_dissim GlobRangeSize_km2 overall 0.0123461
6 Jaccard_dissim mean_lnLac overall 0.0017711
6 Jaccard_dissim pd overall 0.0009105
6 Jaccard_dissim Mass overall 0.0008897
6 Jaccard_dissim Habitat_5 overall 0.0002703
6 Jaccard_dissim minDist_toBorder_centr overall 0.0001580
6 Jaccard_dissim Migration overall 0.0000555
6 Jaccard_dissim Threatened overall 0.0000075
6 Jaccard_dissim Generalism overall 0.0000059
6 Jaccard_dissim D_AOO_a:circNorm pairwise 0.0563727
6 Jaccard_dissim AOO:circNorm pairwise 0.0218717
6 Jaccard_dissim D_AOO_a:joincount_delta pairwise 0.0191333
6 Jaccard_dissim AOO:joincount_delta pairwise 0.0104372
6 Jaccard_dissim GlobRangeSize_km2:joincount_delta pairwise 0.0069210
6 Jaccard_dissim GlobRangeSize_km2:AOO pairwise 0.0053861
6 Jaccard_dissim GlobRangeSize_km2:D_AOO_a pairwise 0.0043218
6 Jaccard_dissim joincount_delta:circNorm pairwise 0.0041067
6 Jaccard_dissim GlobRangeSize_km2:circNorm pairwise 0.0022205
6 Jaccard_dissim D_AOO_a:AOO pairwise 0.0014190
6 Jaccard_dissim GlobRangeSize_km2:D_AOO_a:AOO threeway 0.0003994
6 Jaccard_dissim GlobRangeSize_km2:D_AOO_a:joincount_delta threeway 0.0002870
6 Jaccard_dissim D_AOO_a:AOO:circNorm threeway 0.0002711
6 Jaccard_dissim AOO:joincount_delta:circNorm threeway 0.0002697
6 Jaccard_dissim D_AOO_a:AOO:joincount_delta threeway 0.0002187
6 Jaccard_dissim GlobRangeSize_km2:AOO:joincount_delta threeway 0.0002154
6 Jaccard_dissim D_AOO_a:joincount_delta:circNorm threeway 0.0001885
6 Jaccard_dissim GlobRangeSize_km2:D_AOO_a:circNorm threeway 0.0001482
6 Jaccard_dissim GlobRangeSize_km2:joincount_delta:circNorm threeway 0.0000896
6 Jaccard_dissim GlobRangeSize_km2:AOO:circNorm threeway 0.0000557
6 log_R2_1 all total_interaction_strength 0.1387404
6 log_R2_1 Mass overall 0.0275937
6 log_R2_1 joincount_delta overall 0.0149367
6 log_R2_1 D_AOO_a overall 0.0143788
6 log_R2_1 AOO overall 0.0134166
6 log_R2_1 mean_lnLac overall 0.0102506
6 log_R2_1 GlobRangeSize_km2 overall 0.0078742
6 log_R2_1 circNorm overall 0.0078237
6 log_R2_1 Habitat_5 overall 0.0056561
6 log_R2_1 pd overall 0.0045713
6 log_R2_1 minDist_toBorder_centr overall 0.0038501
6 log_R2_1 Migration overall 0.0011115
6 log_R2_1 Threatened overall 0.0004300
6 log_R2_1 Generalism overall 0.0001340
6 log_R2_1 Mass:joincount_delta pairwise 0.0148149
6 log_R2_1 Mass:AOO pairwise 0.0127990
6 log_R2_1 Mass:D_AOO_a pairwise 0.0104496
6 log_R2_1 mean_lnLac:AOO pairwise 0.0099040
6 log_R2_1 mean_lnLac:joincount_delta pairwise 0.0096607
6 log_R2_1 Mass:mean_lnLac pairwise 0.0058988
6 log_R2_1 D_AOO_a:AOO pairwise 0.0037788
6 log_R2_1 AOO:joincount_delta pairwise 0.0029392
6 log_R2_1 D_AOO_a:joincount_delta pairwise 0.0024472
6 log_R2_1 D_AOO_a:mean_lnLac pairwise 0.0017026
6 log_R2_1 D_AOO_a:mean_lnLac:AOO threeway 0.0011866
6 log_R2_1 mean_lnLac:AOO:joincount_delta threeway 0.0003804
6 log_R2_1 Mass:D_AOO_a:joincount_delta threeway 0.0003795
6 log_R2_1 Mass:mean_lnLac:AOO threeway 0.0002328
6 log_R2_1 Mass:mean_lnLac:joincount_delta threeway 0.0002276
6 log_R2_1 Mass:D_AOO_a:AOO threeway 0.0001965
6 log_R2_1 D_AOO_a:mean_lnLac:joincount_delta threeway 0.0001409
6 log_R2_1 Mass:AOO:joincount_delta threeway 0.0000936
6 log_R2_1 Mass:D_AOO_a:mean_lnLac threeway 0.0000882
6 log_R2_1 D_AOO_a:AOO:joincount_delta threeway 0.0000608
6 log_R2_1_per_year all total_interaction_strength 0.1250904
6 log_R2_1_per_year Mass overall 0.0205397
6 log_R2_1_per_year joincount_delta overall 0.0134652
6 log_R2_1_per_year AOO overall 0.0126629
6 log_R2_1_per_year D_AOO_a overall 0.0106287
6 log_R2_1_per_year mean_lnLac overall 0.0101753
6 log_R2_1_per_year circNorm overall 0.0087529
6 log_R2_1_per_year GlobRangeSize_km2 overall 0.0062331
6 log_R2_1_per_year Habitat_5 overall 0.0059114
6 log_R2_1_per_year minDist_toBorder_centr overall 0.0040371
6 log_R2_1_per_year pd overall 0.0033470
6 log_R2_1_per_year Migration overall 0.0009265
6 log_R2_1_per_year Threatened overall 0.0004192
6 log_R2_1_per_year Generalism overall 0.0001648
6 log_R2_1_per_year Mass:AOO pairwise 0.0114985
6 log_R2_1_per_year Mass:joincount_delta pairwise 0.0093539
6 log_R2_1_per_year mean_lnLac:joincount_delta pairwise 0.0078681
6 log_R2_1_per_year mean_lnLac:AOO pairwise 0.0072352
6 log_R2_1_per_year Mass:D_AOO_a pairwise 0.0065471
6 log_R2_1_per_year Mass:mean_lnLac pairwise 0.0035956
6 log_R2_1_per_year AOO:joincount_delta pairwise 0.0035016
6 log_R2_1_per_year D_AOO_a:joincount_delta pairwise 0.0034719
6 log_R2_1_per_year D_AOO_a:AOO pairwise 0.0022115
6 log_R2_1_per_year D_AOO_a:mean_lnLac pairwise 0.0013901
6 log_R2_1_per_year D_AOO_a:mean_lnLac:AOO threeway 0.0005287
6 log_R2_1_per_year Mass:D_AOO_a:joincount_delta threeway 0.0002592
6 log_R2_1_per_year D_AOO_a:mean_lnLac:joincount_delta threeway 0.0002587
6 log_R2_1_per_year mean_lnLac:AOO:joincount_delta threeway 0.0002521
6 log_R2_1_per_year Mass:mean_lnLac:joincount_delta threeway 0.0001308
6 log_R2_1_per_year Mass:D_AOO_a:AOO threeway 0.0001210
6 log_R2_1_per_year D_AOO_a:AOO:joincount_delta threeway 0.0000990
6 log_R2_1_per_year Mass:AOO:joincount_delta threeway 0.0000764
6 log_R2_1_per_year Mass:mean_lnLac:AOO threeway 0.0000560
6 log_R2_1_per_year Mass:D_AOO_a:mean_lnLac threeway 0.0000445
13 Jaccard_dissim all total_interaction_strength 0.1121384
13 Jaccard_dissim joincount_delta overall 0.0217049
13 Jaccard_dissim AOO overall 0.0207696
13 Jaccard_dissim D_AOO_a overall 0.0200154
13 Jaccard_dissim Mass overall 0.0095850
13 Jaccard_dissim mean_lnLac overall 0.0081323
13 Jaccard_dissim circNorm overall 0.0053420
13 Jaccard_dissim Habitat_5 overall 0.0037584
13 Jaccard_dissim GlobRangeSize_km2 overall 0.0036670
13 Jaccard_dissim pd overall 0.0035652
13 Jaccard_dissim Migration overall 0.0034860
13 Jaccard_dissim minDist_toBorder_centr overall 0.0015387
13 Jaccard_dissim Threatened overall 0.0001717
13 Jaccard_dissim Generalism overall 0.0000333
13 Jaccard_dissim D_AOO_a:joincount_delta pairwise 0.0140928
13 Jaccard_dissim AOO:joincount_delta pairwise 0.0098289
13 Jaccard_dissim mean_lnLac:joincount_delta pairwise 0.0091204
13 Jaccard_dissim Mass:mean_lnLac pairwise 0.0086317
13 Jaccard_dissim Mass:joincount_delta pairwise 0.0061875
13 Jaccard_dissim D_AOO_a:AOO pairwise 0.0051294
13 Jaccard_dissim mean_lnLac:AOO pairwise 0.0036168
13 Jaccard_dissim D_AOO_a:mean_lnLac pairwise 0.0028201
13 Jaccard_dissim Mass:AOO pairwise 0.0023650
13 Jaccard_dissim Mass:D_AOO_a pairwise 0.0022295
13 Jaccard_dissim Mass:D_AOO_a:joincount_delta threeway 0.0001281
13 Jaccard_dissim D_AOO_a:AOO:joincount_delta threeway 0.0001257
13 Jaccard_dissim Mass:D_AOO_a:mean_lnLac threeway 0.0001255
13 Jaccard_dissim Mass:mean_lnLac:joincount_delta threeway 0.0001031
13 Jaccard_dissim Mass:AOO:joincount_delta threeway 0.0000960
13 Jaccard_dissim Mass:mean_lnLac:AOO threeway 0.0000918
13 Jaccard_dissim D_AOO_a:mean_lnLac:AOO threeway 0.0000740
13 Jaccard_dissim D_AOO_a:mean_lnLac:joincount_delta threeway 0.0000518
13 Jaccard_dissim mean_lnLac:AOO:joincount_delta threeway 0.0000429
13 Jaccard_dissim Mass:D_AOO_a:AOO threeway 0.0000384
13 log_R2_1 all total_interaction_strength 0.0791008
13 log_R2_1 Mass overall 0.0299547
13 log_R2_1 D_AOO_a overall 0.0166392
13 log_R2_1 AOO overall 0.0099497
13 log_R2_1 GlobRangeSize_km2 overall 0.0071454
13 log_R2_1 Threatened overall 0.0059631
13 log_R2_1 circNorm overall 0.0054693
13 log_R2_1 pd overall 0.0035636
13 log_R2_1 joincount_delta overall 0.0032695
13 log_R2_1 mean_lnLac overall 0.0029345
13 log_R2_1 minDist_toBorder_centr overall 0.0024854
13 log_R2_1 Habitat_5 overall 0.0004812
13 log_R2_1 Migration overall 0.0001793
13 log_R2_1 Generalism overall 0.0000913
13 log_R2_1 Threatened:D_AOO_a pairwise 0.0371994
13 log_R2_1 Mass:GlobRangeSize_km2 pairwise 0.0361499
13 log_R2_1 Mass:Threatened pairwise 0.0250424
13 log_R2_1 Mass:D_AOO_a pairwise 0.0161187
13 log_R2_1 Mass:AOO pairwise 0.0074672
13 log_R2_1 GlobRangeSize_km2:D_AOO_a pairwise 0.0058205
13 log_R2_1 Threatened:AOO pairwise 0.0007571
13 log_R2_1 D_AOO_a:AOO pairwise 0.0006532
13 log_R2_1 GlobRangeSize_km2:AOO pairwise 0.0001514
13 log_R2_1 GlobRangeSize_km2:Threatened pairwise 0.0000487
13 log_R2_1 Mass:Threatened:D_AOO_a threeway 0.0008060
13 log_R2_1 Mass:D_AOO_a:AOO threeway 0.0002521
13 log_R2_1 Mass:GlobRangeSize_km2:AOO threeway 0.0002247
13 log_R2_1 Threatened:D_AOO_a:AOO threeway 0.0001796
13 log_R2_1 Mass:GlobRangeSize_km2:D_AOO_a threeway 0.0001720
13 log_R2_1 Mass:Threatened:AOO threeway 0.0000391
13 log_R2_1 GlobRangeSize_km2:D_AOO_a:AOO threeway 0.0000196
13 log_R2_1 GlobRangeSize_km2:Threatened:D_AOO_a threeway 0.0000108
13 log_R2_1 Mass:GlobRangeSize_km2:Threatened threeway 0.0000023
13 log_R2_1 GlobRangeSize_km2:Threatened:AOO threeway 0.0000004
13 log_R2_1_per_year all total_interaction_strength 0.0811756
13 log_R2_1_per_year Mass overall 0.0315904
13 log_R2_1_per_year D_AOO_a overall 0.0160063
13 log_R2_1_per_year AOO overall 0.0105841
13 log_R2_1_per_year GlobRangeSize_km2 overall 0.0070064
13 log_R2_1_per_year circNorm overall 0.0058178
13 log_R2_1_per_year Threatened overall 0.0055658
13 log_R2_1_per_year pd overall 0.0033994
13 log_R2_1_per_year joincount_delta overall 0.0031782
13 log_R2_1_per_year mean_lnLac overall 0.0031601
13 log_R2_1_per_year minDist_toBorder_centr overall 0.0025260
13 log_R2_1_per_year Habitat_5 overall 0.0006292
13 log_R2_1_per_year Migration overall 0.0001475
13 log_R2_1_per_year Generalism overall 0.0000752
13 log_R2_1_per_year Mass:GlobRangeSize_km2 pairwise 0.0326397
13 log_R2_1_per_year Mass:D_AOO_a pairwise 0.0163533
13 log_R2_1_per_year Mass:AOO pairwise 0.0072449
13 log_R2_1_per_year D_AOO_a:circNorm pairwise 0.0064975
13 log_R2_1_per_year GlobRangeSize_km2:D_AOO_a pairwise 0.0052061
13 log_R2_1_per_year Mass:circNorm pairwise 0.0043993
13 log_R2_1_per_year GlobRangeSize_km2:circNorm pairwise 0.0015335
13 log_R2_1_per_year AOO:circNorm pairwise 0.0010755
13 log_R2_1_per_year D_AOO_a:AOO pairwise 0.0006856
13 log_R2_1_per_year GlobRangeSize_km2:AOO pairwise 0.0001600
13 log_R2_1_per_year Mass:GlobRangeSize_km2:circNorm threeway 0.0005227
13 log_R2_1_per_year Mass:D_AOO_a:AOO threeway 0.0002897
13 log_R2_1_per_year Mass:D_AOO_a:circNorm threeway 0.0001924
13 log_R2_1_per_year Mass:GlobRangeSize_km2:AOO threeway 0.0001728
13 log_R2_1_per_year Mass:GlobRangeSize_km2:D_AOO_a threeway 0.0001610
13 log_R2_1_per_year D_AOO_a:AOO:circNorm threeway 0.0001224
13 log_R2_1_per_year Mass:AOO:circNorm threeway 0.0000653
13 log_R2_1_per_year GlobRangeSize_km2:D_AOO_a:circNorm threeway 0.0000403
13 log_R2_1_per_year GlobRangeSize_km2:D_AOO_a:AOO threeway 0.0000273
13 log_R2_1_per_year GlobRangeSize_km2:AOO:circNorm threeway 0.0000017
26 Jaccard_dissim all total_interaction_strength 0.1565636
26 Jaccard_dissim D_AOO_a overall 0.0566946
26 Jaccard_dissim AOO overall 0.0395430
26 Jaccard_dissim joincount_delta overall 0.0179824
26 Jaccard_dissim circNorm overall 0.0174994
26 Jaccard_dissim mean_lnLac overall 0.0127689
26 Jaccard_dissim Habitat_5 overall 0.0058893
26 Jaccard_dissim pd overall 0.0037535
26 Jaccard_dissim Mass overall 0.0037507
26 Jaccard_dissim minDist_toBorder_centr overall 0.0035816
26 Jaccard_dissim GlobRangeSize_km2 overall 0.0028431
26 Jaccard_dissim Migration overall 0.0020993
26 Jaccard_dissim Threatened overall 0.0001222
26 Jaccard_dissim Generalism overall 0.0001034
26 Jaccard_dissim mean_lnLac:joincount_delta pairwise 0.0238481
26 Jaccard_dissim D_AOO_a:circNorm pairwise 0.0167878
26 Jaccard_dissim AOO:circNorm pairwise 0.0116036
26 Jaccard_dissim AOO:joincount_delta pairwise 0.0112521
26 Jaccard_dissim mean_lnLac:circNorm pairwise 0.0095564
26 Jaccard_dissim D_AOO_a:joincount_delta pairwise 0.0080105
26 Jaccard_dissim D_AOO_a:AOO pairwise 0.0074041
26 Jaccard_dissim joincount_delta:circNorm pairwise 0.0045932
26 Jaccard_dissim D_AOO_a:mean_lnLac pairwise 0.0041073
26 Jaccard_dissim mean_lnLac:AOO pairwise 0.0036380
26 Jaccard_dissim mean_lnLac:joincount_delta:circNorm threeway 0.0007889
26 Jaccard_dissim D_AOO_a:AOO:joincount_delta threeway 0.0004160
26 Jaccard_dissim D_AOO_a:mean_lnLac:AOO threeway 0.0003658
26 Jaccard_dissim D_AOO_a:mean_lnLac:joincount_delta threeway 0.0002476
26 Jaccard_dissim D_AOO_a:joincount_delta:circNorm threeway 0.0002393
26 Jaccard_dissim D_AOO_a:AOO:circNorm threeway 0.0002340
26 Jaccard_dissim mean_lnLac:AOO:circNorm threeway 0.0002065
26 Jaccard_dissim AOO:joincount_delta:circNorm threeway 0.0001455
26 Jaccard_dissim D_AOO_a:mean_lnLac:circNorm threeway 0.0001451
26 Jaccard_dissim mean_lnLac:AOO:joincount_delta threeway 0.0001109
26 log_R2_1 all total_interaction_strength 0.3801135
26 log_R2_1 AOO overall 0.0392737
26 log_R2_1 D_AOO_a overall 0.0350926
26 log_R2_1 mean_lnLac overall 0.0312374
26 log_R2_1 circNorm overall 0.0239790
26 log_R2_1 Habitat_5 overall 0.0219475
26 log_R2_1 Mass overall 0.0217341
26 log_R2_1 GlobRangeSize_km2 overall 0.0203280
26 log_R2_1 pd overall 0.0171080
26 log_R2_1 joincount_delta overall 0.0169687
26 log_R2_1 minDist_toBorder_centr overall 0.0160733
26 log_R2_1 Threatened overall 0.0087302
26 log_R2_1 Migration overall 0.0086608
26 log_R2_1 Generalism overall 0.0078238
26 log_R2_1 Habitat_5:circNorm pairwise 0.0435408
26 log_R2_1 Habitat_5:D_AOO_a pairwise 0.0415548
26 log_R2_1 Habitat_5:mean_lnLac pairwise 0.0200283
26 log_R2_1 Habitat_5:AOO pairwise 0.0171760
26 log_R2_1 D_AOO_a:AOO pairwise 0.0165601
26 log_R2_1 mean_lnLac:AOO pairwise 0.0121818
26 log_R2_1 D_AOO_a:mean_lnLac pairwise 0.0082198
26 log_R2_1 AOO:circNorm pairwise 0.0072119
26 log_R2_1 mean_lnLac:circNorm pairwise 0.0045865
26 log_R2_1 D_AOO_a:circNorm pairwise 0.0041146
26 log_R2_1 D_AOO_a:mean_lnLac:circNorm threeway 0.0015448
26 log_R2_1 Habitat_5:D_AOO_a:AOO threeway 0.0015268
26 log_R2_1 Habitat_5:mean_lnLac:AOO threeway 0.0012519
26 log_R2_1 D_AOO_a:mean_lnLac:AOO threeway 0.0011213
26 log_R2_1 D_AOO_a:AOO:circNorm threeway 0.0010279
26 log_R2_1 Habitat_5:D_AOO_a:circNorm threeway 0.0006194
26 log_R2_1 Habitat_5:mean_lnLac:circNorm threeway 0.0004994
26 log_R2_1 Habitat_5:D_AOO_a:mean_lnLac threeway 0.0004746
26 log_R2_1 mean_lnLac:AOO:circNorm threeway 0.0004316
26 log_R2_1 Habitat_5:AOO:circNorm threeway 0.0000931
26 log_R2_1_per_year all total_interaction_strength 0.3825850
26 log_R2_1_per_year AOO overall 0.0399449
26 log_R2_1_per_year D_AOO_a overall 0.0357572
26 log_R2_1_per_year mean_lnLac overall 0.0326375
26 log_R2_1_per_year Mass overall 0.0240901
26 log_R2_1_per_year Habitat_5 overall 0.0237107
26 log_R2_1_per_year circNorm overall 0.0221451
26 log_R2_1_per_year GlobRangeSize_km2 overall 0.0195584
26 log_R2_1_per_year minDist_toBorder_centr overall 0.0169696
26 log_R2_1_per_year pd overall 0.0169347
26 log_R2_1_per_year joincount_delta overall 0.0165741
26 log_R2_1_per_year Threatened overall 0.0085758
26 log_R2_1_per_year Migration overall 0.0074302
26 log_R2_1_per_year Generalism overall 0.0060294
26 log_R2_1_per_year Habitat_5:D_AOO_a pairwise 0.0397811
26 log_R2_1_per_year D_AOO_a:AOO pairwise 0.0223430
26 log_R2_1_per_year Habitat_5:AOO pairwise 0.0203670
26 log_R2_1_per_year Habitat_5:mean_lnLac pairwise 0.0169598
26 log_R2_1_per_year Mass:AOO pairwise 0.0131936
26 log_R2_1_per_year Mass:D_AOO_a pairwise 0.0122461
26 log_R2_1_per_year D_AOO_a:mean_lnLac pairwise 0.0112894
26 log_R2_1_per_year mean_lnLac:AOO pairwise 0.0108299
26 log_R2_1_per_year Mass:mean_lnLac pairwise 0.0098152
26 log_R2_1_per_year Mass:Habitat_5 pairwise 0.0080655
26 log_R2_1_per_year Habitat_5:D_AOO_a:AOO threeway 0.0030368
26 log_R2_1_per_year D_AOO_a:mean_lnLac:AOO threeway 0.0018347
26 log_R2_1_per_year Habitat_5:mean_lnLac:AOO threeway 0.0012165
26 log_R2_1_per_year Mass:Habitat_5:AOO threeway 0.0006008
26 log_R2_1_per_year Mass:mean_lnLac:AOO threeway 0.0005147
26 log_R2_1_per_year Mass:D_AOO_a:AOO threeway 0.0004537
26 log_R2_1_per_year Mass:Habitat_5:mean_lnLac threeway 0.0004507
26 log_R2_1_per_year Habitat_5:D_AOO_a:mean_lnLac threeway 0.0003663
26 log_R2_1_per_year Mass:Habitat_5:D_AOO_a threeway 0.0003407
26 log_R2_1_per_year Mass:D_AOO_a:mean_lnLac threeway 0.0002742
Code
write.csv2(res_df, here::here("Data/output/results/B_02_Hstats_split_data.csv"))

Variable importances:

Code
vimp_list <- replicate(4, list)

for (atlas_i in seq_along(results_atlas_i)) {
  vimp_list[[atlas_i]] <- list()
  this_atlas <- results_atlas_i[[atlas_i]]

  if (atlas_i == 1) datasetID <- 5 else 
    if (atlas_i == 2) datasetID <- 6 else 
      if (atlas_i == 3) datasetID <- 13 else 
        if (atlas_i == 4) datasetID <- 26

  for (i in seq_along(this_atlas)) {
    if (i == 1) this_response <- "Jaccard_dissim" else 
      if (i == 2) this_response <- "log_R2_1" else 
        if (i == 3) this_response <- "log_R2_1_per_year"

    fit <- 
      this_atlas[[i]]$final_fit %>% 
      extract_fit_engine()
    
    vimp <- 
      fit$variable.importance

    vimp_list[[atlas_i]][[i]] <- data.frame(
      datasetID = datasetID,
      variable = names(vimp),
      importance = vimp,
      row.names = NULL,
      response = this_response,
      importance_mode = fit$importance.mode
    )
  }
}

imp_df <-
  vimp_list %>%
  bind_rows()

write.csv2(imp_df, here::here("Data/output/results/B_02_ranger_vimp_split_data.csv"))