B_02_RandomForest_split_data.R

B - 02: RandomForest (split models)

Libraries

Code

suppressPackageStartupMessages({
  library(here)

  source(here::here("Code/00_Configuration.R"))

  package_list <-
    c(
      package_list,
      "ranger",
      "tidymodels",
      "caret",
      "skimr",
      "DALEXtra",
      "DALEX",
      "ggthemes",
      "hstats"
    )

  lapply(package_list, require, character = TRUE)

  tidymodels_prefer()
})


rm(list = ls())

set.seed(123)

Read data

Code

dta <-
  readRDS(
    here::here(
      "Data/output/1_all_predictors_merged.rds"
    )
  ) %>%
  filter(samplingPeriodID == 1)

Set variable vectors for hypotheses

Code

sp_id <-
  c(
    "verbatimIdentification",
    "scientificName"
  )

H1 <-
  c(
    "Mass",
    "GlobRangeSize_km2",
    "Migration",
    "Habitat_5",
    "Generalism",
    "Threatened",
    "pd"
  )

H2 <-
  c(
    "D_AOO_a",
    "mean_lnLac",
    "AOO",
    "joincount_delta",
    "circNorm",
    "minDist_toBorder_centr"
  )

H3 <-
  c("datasetID")

predictors <-
  c(H1, H2, H3)

responses <-
  c(
    "Jaccard_dissim",
    "log_R2_1",
    "log_R2_1_per_year" # yearly rate
  )

Make model data

Reduce to model variables

Code

dta_new <-
  dta %>%
  select(all_of(c(sp_id, responses, H3, H1, H2))) %>%
  ungroup()

check correct coding

Code

str(dta_new) # 14 predictors

'data.frame':   1054 obs. of  19 variables:
 $ verbatimIdentification: chr  "Ciconia nigra" "Carduelis carduelis" "Passer montanus" "Sylvia borin" ...
 $ scientificName        : chr  "Ciconia nigra" "Carduelis carduelis" "Passer montanus" "Sylvia borin" ...
 $ Jaccard_dissim        : num  0.369 0.024 0.063 0.14 0.686 0.53 0.117 0.18 0.315 0.011 ...
 $ log_R2_1              : num  0.231 0.009 -0.017 0.018 0.177 -0.275 -0.001 0.043 -0.042 0.001 ...
 $ log_R2_1_per_year     : num  0.012 0 -0.001 0.001 0.009 -0.014 0 0.002 -0.002 0 ...
 $ datasetID             : Factor w/ 4 levels "5","6","13","26": 1 1 1 1 1 1 1 1 1 1 ...
 $ Mass                  : num  2926 16 21.4 18.2 10.8 ...
 $ GlobRangeSize_km2     : num  18362593 11851257 39064405 9628175 5995144 ...
 $ Migration             : Factor w/ 4 levels "1","2","3","NA": 3 1 1 3 3 3 1 1 1 3 ...
 $ Habitat_5             : Factor w/ 5 levels "closed","freshwater",..: 1 1 1 1 1 2 1 1 2 1 ...
 $ Generalism            : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
 $ Threatened            : Factor w/ 2 levels "0","1": 1 1 1 1 1 2 1 1 1 1 ...
 $ pd                    : num  25.28 8.51 5.72 7.69 3.97 ...
 $ D_AOO_a               : num  1.63 1.99 1.97 1.86 1.19 ...
 $ mean_lnLac            : num  0.14 0.0906 0.0949 0.1005 0.3207 ...
 $ AOO                   : num  48682 77329 76027 71762 18278 ...
 $ joincount_delta       : num  0.2551 0.0238 0.0493 0.0661 0.4199 ...
 $ circNorm              : num  56.23 4.01 5.5 12.37 81.02 ...
 $ minDist_toBorder_centr: num  83214 84680 83847 86413 70037 ...

split by dataset

Code

dta_atlas_split <-
  dta_new %>%
  group_split(datasetID)

Set model specifications & indicate tuning

Code

set.seed(123)
mod_spec <-
  rand_forest(
    mtry = tune(),
    trees = tune(),
    min_n = tune()
  ) %>%
  set_engine("ranger",
    importance = "permutation",
    respect.unordered.factors = TRUE
  ) %>%
  set_mode("regression")

rf_params <-
  parameters(
    mtry(range = c(2L, 10L)),
    min_n(range = c(5L, 15L)),
    trees(range = c(1000L, 5000L))
  )

Create empty lists for results

Code

tuned_res <-
  replicate(length(dta_atlas_split), list())

predictions <-
  replicate(length(dta_atlas_split), list())

res <-
  replicate(length(dta_atlas_split), list())

list_split_data <-
  replicate(length(dta_atlas_split), list())

results_response <-
  replicate(3, list())

results_atlas_i <-
  replicate(3, list())

START LOOP

– Level 1: for each atlas (1-4) – Level 2: for each response (1-3)

Code

list_split_data <- list()
for (atlas_i in seq_along(dta_atlas_split)) {
  this_atlas <-
    dta_atlas_split[[atlas_i]] %>%
    select(-datasetID)

  set.seed(123)

  split_info <-
    initial_split(this_atlas, prop = 0.8)

  train <-
    training(split_info)

  test <-
    testing(split_info)

  # Cross validation folds on training data
  set.seed(123)
  folds <-
    vfold_cv(train, v = 10, repeats = 3)
  
  
    list_split_data[[atlas_i]] <- list(
    split_info = split_info,
    train = train,
    test = test,
    folds = folds
  )
}

Code

for (atlas_i in seq_along(dta_atlas_split)) {
  this_atlas <-
    dta_atlas_split[[atlas_i]] %>%
    select(-datasetID)

  # split data into training and testing (80/20)
  set.seed(123)

  split_info <-
    initial_split(this_atlas, prop = 0.8)

  train <-
    training(split_info)

  test <-
    testing(split_info)

  # Cross validation folds on training data
  set.seed(123)
  folds <-
    vfold_cv(train, v = 10, repeats = 3)


  # Save split data to list for later reference
  list_split_data[[atlas_i]] <-
    list(
      split_info = split_info,
      train = train,
      test = test,
      folds = folds
    )

  # Loop through responses
  for (resp_i in seq_along(responses)) {
    resp <-
      responses[resp_i]

    if (responses[resp_i] == "Jaccard_dissim") {
      mod_rec <-
        recipe(Jaccard_dissim ~ ., data = train) %>%
        update_role(all_of(sp_id),
          new_role = "speciesID",
          old_role = "predictor"
        ) %>%
        update_role(all_of(responses[-resp_i]),
          old_role = "predictor",
          new_role = "alt_resp"
        )
    } else if (responses[resp_i] == "log_R2_1") {
      mod_rec <-
        recipe(log_R2_1 ~ ., data = train) %>%
        update_role(all_of(sp_id),
          new_role = "speciesID",
          old_role = "predictor"
        ) %>%
        update_role(all_of(responses[-resp_i]),
          old_role = "predictor",
          new_role = "alt_resp"
        )
    } else if (responses[resp_i] == "log_R2_1_per_year") {
      mod_rec <-
        recipe(log_R2_1_per_year ~ ., data = train) %>%
        update_role(all_of(sp_id),
          new_role = "speciesID",
          old_role = "predictor"
        ) %>%
        update_role(all_of(responses[-resp_i]),
          old_role = "predictor",
          new_role = "alt_resp"
        )
    }

    # Model workflow
    set.seed(123)
    mod_wf <- workflow() %>%
      add_recipe(mod_rec) %>%
      add_model(mod_spec)

    # Bayesian hyperparameter tuning
    tictoc::tic()

    set.seed(123)
    tuned_bayes <-
      tune_bayes(
        mod_wf,
        resamples = folds,
        param_info = rf_params,
        initial = 5,
        iter = 50,
        control = control_bayes(
          verbose = T,
          no_improve = 10,
          seed = 123
        )
      )
    tictoc::toc()

    # Best parameters
    best <-
      tuned_bayes %>%
      select_best(metric = "rmse")

    # Final model fit
    final_wf <-
      mod_wf %>%
      finalize_workflow(best)

    # performs the model evaluation on the testing data directly
    final_fit <-
      final_wf %>%
      last_fit(split_info)

    # Predictions on test data
    p <-
      predict(
        final_fit
        %>% tune::extract_workflow(),
        new_data = test
      ) %>%
      bind_cols(test %>%
        select(
          responses[resp_i],
          verbatimIdentification,
          scientificName
        )) %>%
      setNames(
        c(
          ".pred",
          responses[resp_i],
          "verbatimIdentification",
          "scientificName"
        )
      ) %>%
      mutate(
        resid = .[[responses[resp_i]]] - .pred
      )

    # Explainable AI (xAI)
    fit <-
      final_fit %>%
      extract_fit_parsnip()

    explainer <-
      DALEXtra::explain_tidymodels(
        label = paste0(responses[resp_i], "_", atlas_i),
        fit,
        data = train %>%
          select(!any_of(c(
            responses,
            "verbatimIdentification",
            "scientificName"
          ))),
        y = train %>%
          pull(responses[resp_i])
      )

    # save results to list
    results_response[[resp_i]] <-
      list(
        tuned_bayes = tuned_bayes,
        best = best,
        final_fit = final_fit,
        predictions = p,
        explainer = explainer,
        pd = model_profile(explainer,
          type = "partial",
          N = NULL,
          variables = explainer$data %>% names()
        ),
        md = model_diagnostics(explainer),
        mp = model_performance(explainer),
        vi = model_parts(explainer)
      )
  }


  results_atlas_i[[atlas_i]] <-
    results_response
}

saveRDS(results_atlas_i, here::here("Data/output/B_models/B_02_rf_res_atlas_split.rds"))

xAI plots

Code

plot_list <- replicate(2, list())

for (atlas_i in seq_along(results_atlas_i)) {
  plot_list[[atlas_i]] <- list()

  for (resp_i in seq_along(results_atlas_i[[atlas_i]])) {
    this_response <- responses[resp_i]

    if (atlas_i == 1) datasetID <- 5 else 
      if (atlas_i == 2) datasetID <- 6 else 
        if (atlas_i == 3) datasetID <- 13 else 
          if (atlas_i == 4) datasetID <- 26

    subset <- results_atlas_i[[atlas_i]][[resp_i]]

    # List with all plots for saving as pdf
    plot_list[[atlas_i]][[resp_i]] <-
      list(
        pd =
          plot(subset$pd) +
            ggthemes::theme_few(),
        mp =
          plot(subset$mp) +
            ggthemes::theme_few(),
        md =
          plot(subset$md %>%
            mutate(label = paste0(datasetID, "_", this_response))) +
            ggthemes::theme_few(),
        vi = 
          plot(subset$vi %>%
            mutate(label = paste0(datasetID, "_", this_response))) +
            ggthemes::theme_few()
      )
  }
}

pdf(
  file = paste0(here::here("Figures/B_models/xAI/B_02_xAI_separate_per_atlas.pdf")),
  onefile = TRUE
)
plot_list
dev.off()

Save results to csv

jaccard detailed performances

Code

# Packages
library(tidymodels)   # rsample, workflows, yardstick, tune, etc.
library(ranger)       # for OOB stats
library(dplyr)
library(purrr)
library(tibble)
library(here)

# mse helper works across yardstick versions
mse_vec <- function(truth, estimate, na_rm = TRUE, ...) {
  mean((truth - estimate)^2, na.rm = na_rm)
}

# helper to compute R² / RMSE / MSE on *any* data frame
compute_set_metrics <- function(wflow, data, outcome_chr) {
  preds <- predict(wflow, data) %>% bind_cols(data)
  truth <- preds[[outcome_chr]]
  est   <- preds$.pred
  tibble(
    r2   = yardstick::rsq_vec (truth, est),
    rmse = yardstick::rmse_vec(truth, est),
    mse  = mse_vec            (truth, est)
  )
}

# summarize function
extract_all_metrics <- function(split, obj, outcome_chr, external_data = NULL) {

  # (a) get fitted workflow & OOB stats -----------------------------
  wf <- tune::extract_workflow(obj$final_fit)
  rf_fit <- extract_fit_parsnip(wf)$fit        # ranger model
  oob_mse  <- rf_fit$prediction.error
  oob_r2   <- rf_fit$r.squared
  oob_rmse <- sqrt(oob_mse)
  
  # (b) training / testing data from the split inside final_fit -----
  train_set <- split$train
  test_set  <- split$test
  cv_folds <- split$folds
  
  # (c) "raw" (apparent) metrics ------------------------------------
  raw <- compute_set_metrics(wf, train_set, outcome_chr)
  
  # (d) CV metrics for each study region
  cv_tbl <- 
    fit_resamples(
      wf,
      resamples = cv_folds,
      metrics = metric_set(rsq, rmse)
    ) %>%
    collect_metrics()
  
  cv_rmse <- cv_tbl %>% filter(.metric == "rmse") %>% pull(mean)
  cv_r2   <- cv_tbl %>% filter(.metric == "rsq")  %>% pull(mean)
  cv_mse  <- cv_rmse^2
  
  # (e) train / test -------------------------------------------------
  tr <- compute_set_metrics(wf, train_set, outcome_chr)
  te <- compute_set_metrics(wf, test_set,  outcome_chr)
  
  # (f) optional external -------------------------------------------
  ext <- if (!is.null(external_data)) {
           compute_set_metrics(wf, external_data, outcome_chr)
         } else tibble(r2 = NA_real_, rmse = NA_real_, mse = NA_real_)
  
  # (g) tidy one row per context ------------------------------------
  tibble::tribble(
    ~context,   ~r2,            ~rmse,        ~mse,
    "raw",      raw$r2,         raw$rmse,     raw$mse,
    "oob",      oob_r2,         oob_rmse,     oob_mse,
    "cv",       cv_r2,          cv_rmse,      cv_mse,
    "train",    tr$r2,          tr$rmse,      tr$mse,
    "test",     te$r2,          te$rmse,      te$mse,
    "external", ext$r2,         ext$rmse,     ext$mse
  )
}

Code

# Set up variables for mapping performance results:
results_atlas <- results_atlas_i

responses <- c("Jaccard_dissim", "log_R2_1", "log_R2_1_per_year")

# external_df valid for the first two responses (adjust if needed)
external_df <-
  readRDS(here("Data/output/", "1_all_predictors_merged.rds")) %>%
  filter(samplingPeriodID == 2) %>%
  mutate(
    across(
      c(samplingPeriodID, datasetID), ~ as.factor(as.character(.)))) %>%
  # join response columns & rename
  left_join(
    readRDS(here("Data/output/", "2_big_table_3.rds")) %>%
      select(datasetID, samplingPeriodID, verbatimIdentification,
             Jaccard_dissim, log_R3_2) %>%
      rename(log_R2_1 = log_R3_2) %>%
      mutate(
        across(
          c(samplingPeriodID, datasetID), ~ as.factor(as.character(.))))
  )

# Map performance results:
all_metrics <- purrr::imap_dfr(
  results_atlas,                      # iterate over atlases
  function(resp_list, atlas_idx) {
    purrr::imap_dfr(
      resp_list,                      # iterate over responses inside atlas
      function(obj, resp_i) {
        resp_name <- responses[resp_i]
        datasetID <- c("5", "6", "13", "26")[atlas_idx]
        message(print(datasetID))
        ext_data  <- 
          if (resp_name %in% c("Jaccard_dissim", "log_R2_1") && 
              datasetID %in% c(5,13))
            external_df else NULL
        split <- list_split_data[[atlas_idx]]
        
        extract_all_metrics(
          split = split,
          obj,
          outcome_chr  = resp_name,
          external_data = ext_data
        ) %>%
        mutate(atlas    = paste0("atlas_", atlas_idx),
               response = resp_name)
      }
    )
  }
)

# set vectors for naming:
contexts <- c("raw", "oob", "cv", "test", "external")
metrics <- c("r2", "mse", "rmse")
cols <-
  map(contexts, ~ paste0(metrics, "_", .x)) %>%
  unlist()


all_metrics2 <-
  all_metrics %>%
  filter(context != "train") %>%
  pivot_wider(
    id_cols = c(response, atlas),
    names_from = context,
    values_from = c(r2, rmse, mse)
  ) %>%  
  mutate(
    datasetID = as.factor(case_when(atlas == "atlas_1" ~ 5,
                                    atlas == "atlas_2" ~ 6,
                                    atlas == "atlas_3" ~ 13,
                                    atlas == "atlas_4" ~ 26,
                                    .default  = NA))) %>%
  select(response, datasetID, all_of(cols)) %>%
  arrange(response)


all_metrics2
write.csv(all_metrics2,
          here("Data/output/results/B_02_detailed_rf_results_split.csv"))

Code

all_metrics2 %>% 
  mutate(across(c(3:17), round, 2)) %>% 
  kableExtra::kable()

Warning: There was 1 warning in `mutate()`.
ℹ In argument: `across(c(3:17), round, 2)`.
Caused by warning:
! The `...` argument of `across()` is deprecated as of dplyr 1.1.0.
Supply arguments directly to `.fns` through an anonymous function instead.

  # Previously
  across(a:b, mean, na.rm = TRUE)

  # Now
  across(a:b, \(x) mean(x, na.rm = TRUE))

X	response	datasetID	r2_raw	mse_raw	rmse_raw	r2_oob	mse_oob	rmse_oob	r2_cv	mse_cv	rmse_cv	r2_test	mse_test	rmse_test	r2_external	mse_external	rmse_external
1	Jaccard_dissim	5	0.98	0.00	0.05	0.89	0.01	0.11	0.88	0.01	0.10	0.90	0.01	0.10	0.63	0.03	0.1781903
2	Jaccard_dissim	6	0.98	0.00	0.05	0.85	0.01	0.11	0.86	0.01	0.10	0.85	0.01	0.11	NA	NA	NA
3	Jaccard_dissim	13	0.95	0.00	0.05	0.70	0.02	0.12	0.70	0.01	0.12	0.66	0.02	0.15	0.66	0.04	0.2003646
4	Jaccard_dissim	26	0.96	0.00	0.05	0.72	0.01	0.11	0.73	0.01	0.11	0.72	0.01	0.12	NA	NA	NA
5	log_R2_1	5	0.76	0.07	0.27	0.10	0.19	0.44	0.26	0.18	0.42	0.07	0.09	0.30	0.02	0.46	0.6755954
6	log_R2_1	6	0.74	0.15	0.38	0.15	0.38	0.62	0.21	0.37	0.61	0.02	0.52	0.72	NA	NA	NA
7	log_R2_1	13	0.93	0.11	0.32	0.58	0.51	0.71	0.49	0.47	0.68	0.28	0.52	0.72	0.01	0.74	0.8582190
8	log_R2_1	26	0.91	0.05	0.23	0.06	0.20	0.45	0.14	0.18	0.43	0.22	0.20	0.45	NA	NA	NA
9	log_R2_1_per_year	5	0.76	0.00	0.01	0.09	0.00	0.02	0.26	0.00	0.02	0.07	0.00	0.02	NA	NA	NA
10	log_R2_1_per_year	6	0.73	0.00	0.02	0.15	0.00	0.02	0.20	0.00	0.02	0.02	0.00	0.03	NA	NA	NA
11	log_R2_1_per_year	13	0.93	0.00	0.01	0.58	0.00	0.02	0.49	0.00	0.02	0.29	0.00	0.02	NA	NA	NA
12	log_R2_1_per_year	26	0.91	0.00	0.00	0.06	0.00	0.01	0.14	0.00	0.01	0.22	0.00	0.01	NA	NA	NA

Code

all_metrics2 %>%
  #select(-X) %>%
  pivot_longer(
    cols = c(-response, -datasetID),
    names_to = c(".value", "context"),
    names_sep = "_"
  ) %>%

ggplot(
  aes(context, r2, colour = response, group = response)
) +
  geom_point(size = 3) +
  geom_line(linewidth = 0.8) +
  scale_y_continuous(limits = c(0, 1)) +
  labs(
    title = "Model Generalisation Across Validation Contexts",
    y = "R²",
    x = NULL
  ) +
  theme_minimal()+
  facet_wrap(~datasetID)

Warning: Expected 2 pieces. Missing pieces filled with `NA` in 1 rows [1].

Warning: Removed 20 rows containing missing values or values outside the scale range
(`geom_point()`).

Warning: Removed 3 rows containing missing values or values outside the scale range
(`geom_line()`).

log ratio detailed performances

log ratio per year detailed performances

Less detailed:

Code

tuned_res_df <-
  list(
    results_atlas_i[[1]][[1]]$best,
    results_atlas_i[[1]][[2]]$best,
    results_atlas_i[[1]][[3]]$best,
    results_atlas_i[[2]][[1]]$best,
    results_atlas_i[[2]][[2]]$best,
    results_atlas_i[[2]][[3]]$best,
    results_atlas_i[[3]][[1]]$best,
    results_atlas_i[[3]][[2]]$best,
    results_atlas_i[[3]][[3]]$best,
    results_atlas_i[[4]][[1]]$best,
    results_atlas_i[[4]][[2]]$best,
    results_atlas_i[[4]][[3]]$best
  ) %>%
  bind_rows() %>%
  select(-.config) %>%
  mutate(
    response = c(
      "Jaccard_dissim",
      "log_R2_1",
      "log_R2_1_per_year",
      "Jaccard_dissim",
      "log_R2_1",
      "log_R2_1_per_year",
      "Jaccard_dissim",
      "log_R2_1",
      "log_R2_1_per_year",
      "Jaccard_dissim",
      "log_R2_1",
      "log_R2_1_per_year"
    ),
    datasetID = c(5, 5, 5, 6, 6, 6, 13, 13, 13, 26, 26, 26)
  )


mod_res <-
  list(
    results_atlas_i[[1]][[1]]$final_fit$.metrics,
    results_atlas_i[[1]][[2]]$final_fit$.metrics,
    results_atlas_i[[1]][[3]]$final_fit$.metrics,
    results_atlas_i[[2]][[1]]$final_fit$.metrics,
    results_atlas_i[[2]][[2]]$final_fit$.metrics,
    results_atlas_i[[2]][[3]]$final_fit$.metrics,
    results_atlas_i[[3]][[1]]$final_fit$.metrics,
    results_atlas_i[[3]][[2]]$final_fit$.metrics,
    results_atlas_i[[3]][[3]]$final_fit$.metrics,
    results_atlas_i[[4]][[1]]$final_fit$.metrics,
    results_atlas_i[[4]][[2]]$final_fit$.metrics,
    results_atlas_i[[4]][[3]]$final_fit$.metrics
  ) %>%
  bind_rows() %>%
  mutate(
    response = c(
      "Jaccard_dissim",
      "Jaccard_dissim",
      "log_R2_1",
      "log_R2_1",
      "log_R2_1_per_year",
      "log_R2_1_per_year",
      "Jaccard_dissim",
      "Jaccard_dissim",
      "log_R2_1",
      "log_R2_1",
      "log_R2_1_per_year",
      "log_R2_1_per_year",
      "Jaccard_dissim",
      "Jaccard_dissim",
      "log_R2_1",
      "log_R2_1",
      "log_R2_1_per_year",
      "log_R2_1_per_year",
      "Jaccard_dissim",
      "Jaccard_dissim",
      "log_R2_1",
      "log_R2_1",
      "log_R2_1_per_year",
      "log_R2_1_per_year"
    ),
    datasetID = c(
      5, 5, 5, 5, 5, 5,
      6, 6, 6, 6, 6, 6,
      13, 13, 13, 13, 13, 13,
      26, 26, 26, 26, 26, 26
    )
  ) %>%
  pivot_wider(
    id_cols = c(response, datasetID),
    names_from = c(".metric"),
    values_from = c(".estimate")
  ) %>%
  full_join(tuned_res_df) %>%
  mutate(
    cv = "3x10folds",
    tuning_method = "bayesian",
    model_type = "ranger"
  )

Joining with `by = join_by(response, datasetID)`

check:

Code

kable(mod_res)

response	datasetID	rmse	rsq	mtry	min_n	trees	cv	tuning_method	model_type
Jaccard_dissim	5	0.0988881	0.9012123	5	5	1420	3x10folds	bayesian	ranger
log_R2_1	5	0.2956381	0.0722593	2	15	4967	3x10folds	bayesian	ranger
log_R2_1_per_year	5	0.0155612	0.0729791	2	15	4688	3x10folds	bayesian	ranger
Jaccard_dissim	6	0.1075795	0.8504765	9	5	1804	3x10folds	bayesian	ranger
log_R2_1	6	0.7197388	0.0218604	5	13	2968	3x10folds	bayesian	ranger
log_R2_1_per_year	6	0.0271482	0.0223793	4	15	1173	3x10folds	bayesian	ranger
Jaccard_dissim	13	0.1477504	0.6606242	6	5	3005	3x10folds	bayesian	ranger
log_R2_1	13	0.7187413	0.2812205	10	5	1118	3x10folds	bayesian	ranger
log_R2_1_per_year	13	0.0246676	0.2917952	10	5	1945	3x10folds	bayesian	ranger
Jaccard_dissim	26	0.1182506	0.7201068	6	5	4976	3x10folds	bayesian	ranger
log_R2_1	26	0.4450751	0.2248997	2	6	1008	3x10folds	bayesian	ranger
log_R2_1_per_year	26	0.0096819	0.2243523	2	6	4921	3x10folds	bayesian	ranger

Code

write.csv2(mod_res, here::here("Data/output/B_02_ranger_atlas_separate.csv"))

Interactions with H-stats

Code

interaction_res <-
  replicate(length(results_atlas_i), list())

for (atlas_i in seq_along(results_atlas_i)) {
  interaction_res[[atlas_i]] <-
    list()

  this_atlas <-
    results_atlas_i[[atlas_i]]

  for (response_i in seq_along(this_atlas)) {
    # prepare data for hstats function
    dd <-
      this_atlas[[response_i]]

    fit <-
      dd$final_fit %>%
      extract_fit_parsnip()

    preds <-
      dd$explainer$data

    # run hstats
    interactions <-
      hstats(fit,
        X = preds,
        threeway_m = 5
      )

    # save results
    interaction_res[[atlas_i]][[response_i]] <- interactions

    # create the plot
    interaction_plot <- plot(interactions, which = 1:100) + theme_few()

    if (atlas_i == 1) datasetID <- 5 else 
      if (atlas_i == 2) datasetID <- 6 else 
        if (atlas_i == 3) datasetID <- 13 else 
          if (atlas_i == 4) datasetID <- 26

    ggsave(
      filename =
        here::here(
          "Figures/B_models/interactions/",
          paste0(
            "B_02_",
            responses[response_i],
            "_",
            datasetID,
            "_interactions_hstats.svg"
          )
        ),
      plot = interaction_plot,
      device = "svg",
      width = 10,
      height = 10
    )
  }
}

interaction_res
saveRDS(interaction_res, here::here("Data/output/temp/B_02_Interaction_res_atlas_split.rds"))

Save interactions in numbers (csv) for Documentation

Code

res_list <- replicate(4, list)

for (atlas_i in seq_along(interaction_res)) {
  this_atlas <- interaction_res[[atlas_i]]
  res_list[[atlas_i]] <- list()

  if (atlas_i == 1) datasetID <- 5 else 
    if (atlas_i == 2) datasetID <- 6 else 
      if (atlas_i == 3) datasetID <- 13 else 
        if (atlas_i == 4) datasetID <- 26

  for (resp_i in seq_along(this_atlas)) {
    if (resp_i == 1) this_response <- "Jaccard_dissim" else 
      if (resp_i == 2) this_response <- "log_R2_1" else 
        if (resp_i == 3) this_response <- "log_R2_1_per_year"

    response_res <- this_atlas[[resp_i]]

    H2_all_df <-
      data.frame(
        row.names = NULL,
        datasetID = datasetID,
        response = this_response,
        variable = "all",
        test = "total_interaction_strength",
        H2 = summary(response_res)$h2[[1]]
      )

    H2_overall_df <-
      data.frame(
        row.names = NULL,
        datasetID = datasetID,
        response = this_response,
        variable = row.names(summary(response_res)$h2_overall[[1]]),
        test = "overall",
        H2 = summary(response_res)$h2_overall[[1]]
      )

    H2_pairwise_df <-
      data.frame(
        row.names = NULL,
        datasetID = datasetID,
        response = this_response,
        variable = row.names(summary(response_res)$h2_pairwise[[1]]),
        test = "pairwise",
        H2 = summary(response_res)$h2_pairwise[[1]]
      )

    H2_threeway_df <-
      data.frame(
        row.names = NULL,
        datasetID = datasetID,
        response = this_response,
        variable = row.names(summary(response_res)$h2_threeway[[1]]),
        test = "threeway",
        H2 = summary(response_res)$h2_threeway[[1]]
      )

    temp <-
      full_join(H2_all_df, H2_overall_df) %>%
      full_join(H2_pairwise_df) %>%
      full_join(H2_threeway_df)

    res_list[[atlas_i]][[resp_i]] <- temp
  }
}

res_df <- bind_rows(res_list)

kable(res_df)

datasetID	response	variable	test	H2
5	Jaccard_dissim	all	total_interaction_strength	0.0598871
5	Jaccard_dissim	AOO	overall	0.0138498
5	Jaccard_dissim	D_AOO_a	overall	0.0136075
5	Jaccard_dissim	joincount_delta	overall	0.0068304
5	Jaccard_dissim	circNorm	overall	0.0047233
5	Jaccard_dissim	mean_lnLac	overall	0.0035305
5	Jaccard_dissim	Migration	overall	0.0029290
5	Jaccard_dissim	minDist_toBorder_centr	overall	0.0025918
5	Jaccard_dissim	pd	overall	0.0019147
5	Jaccard_dissim	Mass	overall	0.0016965
5	Jaccard_dissim	Habitat_5	overall	0.0016391
5	Jaccard_dissim	GlobRangeSize_km2	overall	0.0005129
5	Jaccard_dissim	Threatened	overall	0.0000108
5	Jaccard_dissim	Generalism	overall	0.0000064
5	Jaccard_dissim	mean_lnLac:joincount_delta	pairwise	0.0136793
5	Jaccard_dissim	joincount_delta:circNorm	pairwise	0.0131594
5	Jaccard_dissim	mean_lnLac:circNorm	pairwise	0.0058165
5	Jaccard_dissim	D_AOO_a:circNorm	pairwise	0.0055413
5	Jaccard_dissim	AOO:circNorm	pairwise	0.0040775
5	Jaccard_dissim	D_AOO_a:joincount_delta	pairwise	0.0033556
5	Jaccard_dissim	D_AOO_a:AOO	pairwise	0.0032179
5	Jaccard_dissim	AOO:joincount_delta	pairwise	0.0029552
5	Jaccard_dissim	mean_lnLac:AOO	pairwise	0.0010200
5	Jaccard_dissim	D_AOO_a:mean_lnLac	pairwise	0.0006726
5	Jaccard_dissim	AOO:joincount_delta:circNorm	threeway	0.0003732
5	Jaccard_dissim	D_AOO_a:joincount_delta:circNorm	threeway	0.0002239
5	Jaccard_dissim	mean_lnLac:joincount_delta:circNorm	threeway	0.0000766
5	Jaccard_dissim	D_AOO_a:mean_lnLac:AOO	threeway	0.0000740
5	Jaccard_dissim	D_AOO_a:AOO:circNorm	threeway	0.0000626
5	Jaccard_dissim	D_AOO_a:AOO:joincount_delta	threeway	0.0000494
5	Jaccard_dissim	D_AOO_a:mean_lnLac:circNorm	threeway	0.0000397
5	Jaccard_dissim	mean_lnLac:AOO:circNorm	threeway	0.0000365
5	Jaccard_dissim	mean_lnLac:AOO:joincount_delta	threeway	0.0000321
5	Jaccard_dissim	D_AOO_a:mean_lnLac:joincount_delta	threeway	0.0000198
5	log_R2_1	all	total_interaction_strength	0.1556536
5	log_R2_1	minDist_toBorder_centr	overall	0.0238857
5	log_R2_1	mean_lnLac	overall	0.0162175
5	log_R2_1	AOO	overall	0.0146771
5	log_R2_1	D_AOO_a	overall	0.0123982
5	log_R2_1	Migration	overall	0.0078661
5	log_R2_1	Habitat_5	overall	0.0048621
5	log_R2_1	circNorm	overall	0.0047958
5	log_R2_1	GlobRangeSize_km2	overall	0.0046309
5	log_R2_1	pd	overall	0.0045913
5	log_R2_1	Mass	overall	0.0045726
5	log_R2_1	joincount_delta	overall	0.0033420
5	log_R2_1	Threatened	overall	0.0009752
5	log_R2_1	Generalism	overall	0.0003754
5	log_R2_1	mean_lnLac:minDist_toBorder_centr	pairwise	0.0264172
5	log_R2_1	Migration:minDist_toBorder_centr	pairwise	0.0145257
5	log_R2_1	D_AOO_a:minDist_toBorder_centr	pairwise	0.0130751
5	log_R2_1	AOO:minDist_toBorder_centr	pairwise	0.0130093
5	log_R2_1	Migration:mean_lnLac	pairwise	0.0064310
5	log_R2_1	Migration:AOO	pairwise	0.0034315
5	log_R2_1	Migration:D_AOO_a	pairwise	0.0032363
5	log_R2_1	D_AOO_a:mean_lnLac	pairwise	0.0022992
5	log_R2_1	mean_lnLac:AOO	pairwise	0.0013320
5	log_R2_1	D_AOO_a:AOO	pairwise	0.0011658
5	log_R2_1	D_AOO_a:mean_lnLac:minDist_toBorder_centr	threeway	0.0002648
5	log_R2_1	D_AOO_a:mean_lnLac:AOO	threeway	0.0000988
5	log_R2_1	Migration:mean_lnLac:AOO	threeway	0.0000669
5	log_R2_1	D_AOO_a:AOO:minDist_toBorder_centr	threeway	0.0000655
5	log_R2_1	Migration:D_AOO_a:minDist_toBorder_centr	threeway	0.0000532
5	log_R2_1	Migration:AOO:minDist_toBorder_centr	threeway	0.0000463
5	log_R2_1	Migration:mean_lnLac:minDist_toBorder_centr	threeway	0.0000290
5	log_R2_1	Migration:D_AOO_a:AOO	threeway	0.0000230
5	log_R2_1	Migration:D_AOO_a:mean_lnLac	threeway	0.0000226
5	log_R2_1	mean_lnLac:AOO:minDist_toBorder_centr	threeway	0.0000197
5	log_R2_1_per_year	all	total_interaction_strength	0.1565922
5	log_R2_1_per_year	minDist_toBorder_centr	overall	0.0235293
5	log_R2_1_per_year	mean_lnLac	overall	0.0168624
5	log_R2_1_per_year	AOO	overall	0.0146081
5	log_R2_1_per_year	D_AOO_a	overall	0.0126080
5	log_R2_1_per_year	Migration	overall	0.0081611
5	log_R2_1_per_year	Habitat_5	overall	0.0049102
5	log_R2_1_per_year	circNorm	overall	0.0048329
5	log_R2_1_per_year	GlobRangeSize_km2	overall	0.0047544
5	log_R2_1_per_year	pd	overall	0.0045985
5	log_R2_1_per_year	Mass	overall	0.0044342
5	log_R2_1_per_year	joincount_delta	overall	0.0033666
5	log_R2_1_per_year	Threatened	overall	0.0010092
5	log_R2_1_per_year	Generalism	overall	0.0003760
5	log_R2_1_per_year	mean_lnLac:minDist_toBorder_centr	pairwise	0.0270227
5	log_R2_1_per_year	Migration:minDist_toBorder_centr	pairwise	0.0160174
5	log_R2_1_per_year	D_AOO_a:minDist_toBorder_centr	pairwise	0.0131184
5	log_R2_1_per_year	AOO:minDist_toBorder_centr	pairwise	0.0115159
5	log_R2_1_per_year	Migration:mean_lnLac	pairwise	0.0064058
5	log_R2_1_per_year	Migration:AOO	pairwise	0.0037614
5	log_R2_1_per_year	Migration:D_AOO_a	pairwise	0.0029700
5	log_R2_1_per_year	D_AOO_a:mean_lnLac	pairwise	0.0025627
5	log_R2_1_per_year	mean_lnLac:AOO	pairwise	0.0012379
5	log_R2_1_per_year	D_AOO_a:AOO	pairwise	0.0009550
5	log_R2_1_per_year	D_AOO_a:mean_lnLac:minDist_toBorder_centr	threeway	0.0002724
5	log_R2_1_per_year	D_AOO_a:mean_lnLac:AOO	threeway	0.0000721
5	log_R2_1_per_year	Migration:mean_lnLac:AOO	threeway	0.0000701
5	log_R2_1_per_year	Migration:D_AOO_a:minDist_toBorder_centr	threeway	0.0000572
5	log_R2_1_per_year	Migration:AOO:minDist_toBorder_centr	threeway	0.0000455
5	log_R2_1_per_year	D_AOO_a:AOO:minDist_toBorder_centr	threeway	0.0000453
5	log_R2_1_per_year	Migration:mean_lnLac:minDist_toBorder_centr	threeway	0.0000283
5	log_R2_1_per_year	Migration:D_AOO_a:mean_lnLac	threeway	0.0000262
5	log_R2_1_per_year	Migration:D_AOO_a:AOO	threeway	0.0000193
5	log_R2_1_per_year	mean_lnLac:AOO:minDist_toBorder_centr	threeway	0.0000185
6	Jaccard_dissim	all	total_interaction_strength	0.1010645
6	Jaccard_dissim	D_AOO_a	overall	0.0297196
6	Jaccard_dissim	circNorm	overall	0.0295028
6	Jaccard_dissim	AOO	overall	0.0220913
6	Jaccard_dissim	joincount_delta	overall	0.0207221
6	Jaccard_dissim	GlobRangeSize_km2	overall	0.0123461
6	Jaccard_dissim	mean_lnLac	overall	0.0017711
6	Jaccard_dissim	pd	overall	0.0009105
6	Jaccard_dissim	Mass	overall	0.0008897
6	Jaccard_dissim	Habitat_5	overall	0.0002703
6	Jaccard_dissim	minDist_toBorder_centr	overall	0.0001580
6	Jaccard_dissim	Migration	overall	0.0000555
6	Jaccard_dissim	Threatened	overall	0.0000075
6	Jaccard_dissim	Generalism	overall	0.0000059
6	Jaccard_dissim	D_AOO_a:circNorm	pairwise	0.0563727
6	Jaccard_dissim	AOO:circNorm	pairwise	0.0218717
6	Jaccard_dissim	D_AOO_a:joincount_delta	pairwise	0.0191333
6	Jaccard_dissim	AOO:joincount_delta	pairwise	0.0104372
6	Jaccard_dissim	GlobRangeSize_km2:joincount_delta	pairwise	0.0069210
6	Jaccard_dissim	GlobRangeSize_km2:AOO	pairwise	0.0053861
6	Jaccard_dissim	GlobRangeSize_km2:D_AOO_a	pairwise	0.0043218
6	Jaccard_dissim	joincount_delta:circNorm	pairwise	0.0041067
6	Jaccard_dissim	GlobRangeSize_km2:circNorm	pairwise	0.0022205
6	Jaccard_dissim	D_AOO_a:AOO	pairwise	0.0014190
6	Jaccard_dissim	GlobRangeSize_km2:D_AOO_a:AOO	threeway	0.0003994
6	Jaccard_dissim	GlobRangeSize_km2:D_AOO_a:joincount_delta	threeway	0.0002870
6	Jaccard_dissim	D_AOO_a:AOO:circNorm	threeway	0.0002711
6	Jaccard_dissim	AOO:joincount_delta:circNorm	threeway	0.0002697
6	Jaccard_dissim	D_AOO_a:AOO:joincount_delta	threeway	0.0002187
6	Jaccard_dissim	GlobRangeSize_km2:AOO:joincount_delta	threeway	0.0002154
6	Jaccard_dissim	D_AOO_a:joincount_delta:circNorm	threeway	0.0001885
6	Jaccard_dissim	GlobRangeSize_km2:D_AOO_a:circNorm	threeway	0.0001482
6	Jaccard_dissim	GlobRangeSize_km2:joincount_delta:circNorm	threeway	0.0000896
6	Jaccard_dissim	GlobRangeSize_km2:AOO:circNorm	threeway	0.0000557
6	log_R2_1	all	total_interaction_strength	0.1387404
6	log_R2_1	Mass	overall	0.0275937
6	log_R2_1	joincount_delta	overall	0.0149367
6	log_R2_1	D_AOO_a	overall	0.0143788
6	log_R2_1	AOO	overall	0.0134166
6	log_R2_1	mean_lnLac	overall	0.0102506
6	log_R2_1	GlobRangeSize_km2	overall	0.0078742
6	log_R2_1	circNorm	overall	0.0078237
6	log_R2_1	Habitat_5	overall	0.0056561
6	log_R2_1	pd	overall	0.0045713
6	log_R2_1	minDist_toBorder_centr	overall	0.0038501
6	log_R2_1	Migration	overall	0.0011115
6	log_R2_1	Threatened	overall	0.0004300
6	log_R2_1	Generalism	overall	0.0001340
6	log_R2_1	Mass:joincount_delta	pairwise	0.0148149
6	log_R2_1	Mass:AOO	pairwise	0.0127990
6	log_R2_1	Mass:D_AOO_a	pairwise	0.0104496
6	log_R2_1	mean_lnLac:AOO	pairwise	0.0099040
6	log_R2_1	mean_lnLac:joincount_delta	pairwise	0.0096607
6	log_R2_1	Mass:mean_lnLac	pairwise	0.0058988
6	log_R2_1	D_AOO_a:AOO	pairwise	0.0037788
6	log_R2_1	AOO:joincount_delta	pairwise	0.0029392
6	log_R2_1	D_AOO_a:joincount_delta	pairwise	0.0024472
6	log_R2_1	D_AOO_a:mean_lnLac	pairwise	0.0017026
6	log_R2_1	D_AOO_a:mean_lnLac:AOO	threeway	0.0011866
6	log_R2_1	mean_lnLac:AOO:joincount_delta	threeway	0.0003804
6	log_R2_1	Mass:D_AOO_a:joincount_delta	threeway	0.0003795
6	log_R2_1	Mass:mean_lnLac:AOO	threeway	0.0002328
6	log_R2_1	Mass:mean_lnLac:joincount_delta	threeway	0.0002276
6	log_R2_1	Mass:D_AOO_a:AOO	threeway	0.0001965
6	log_R2_1	D_AOO_a:mean_lnLac:joincount_delta	threeway	0.0001409
6	log_R2_1	Mass:AOO:joincount_delta	threeway	0.0000936
6	log_R2_1	Mass:D_AOO_a:mean_lnLac	threeway	0.0000882
6	log_R2_1	D_AOO_a:AOO:joincount_delta	threeway	0.0000608
6	log_R2_1_per_year	all	total_interaction_strength	0.1250904
6	log_R2_1_per_year	Mass	overall	0.0205397
6	log_R2_1_per_year	joincount_delta	overall	0.0134652
6	log_R2_1_per_year	AOO	overall	0.0126629
6	log_R2_1_per_year	D_AOO_a	overall	0.0106287
6	log_R2_1_per_year	mean_lnLac	overall	0.0101753
6	log_R2_1_per_year	circNorm	overall	0.0087529
6	log_R2_1_per_year	GlobRangeSize_km2	overall	0.0062331
6	log_R2_1_per_year	Habitat_5	overall	0.0059114
6	log_R2_1_per_year	minDist_toBorder_centr	overall	0.0040371
6	log_R2_1_per_year	pd	overall	0.0033470
6	log_R2_1_per_year	Migration	overall	0.0009265
6	log_R2_1_per_year	Threatened	overall	0.0004192
6	log_R2_1_per_year	Generalism	overall	0.0001648
6	log_R2_1_per_year	Mass:AOO	pairwise	0.0114985
6	log_R2_1_per_year	Mass:joincount_delta	pairwise	0.0093539
6	log_R2_1_per_year	mean_lnLac:joincount_delta	pairwise	0.0078681
6	log_R2_1_per_year	mean_lnLac:AOO	pairwise	0.0072352
6	log_R2_1_per_year	Mass:D_AOO_a	pairwise	0.0065471
6	log_R2_1_per_year	Mass:mean_lnLac	pairwise	0.0035956
6	log_R2_1_per_year	AOO:joincount_delta	pairwise	0.0035016
6	log_R2_1_per_year	D_AOO_a:joincount_delta	pairwise	0.0034719
6	log_R2_1_per_year	D_AOO_a:AOO	pairwise	0.0022115
6	log_R2_1_per_year	D_AOO_a:mean_lnLac	pairwise	0.0013901
6	log_R2_1_per_year	D_AOO_a:mean_lnLac:AOO	threeway	0.0005287
6	log_R2_1_per_year	Mass:D_AOO_a:joincount_delta	threeway	0.0002592
6	log_R2_1_per_year	D_AOO_a:mean_lnLac:joincount_delta	threeway	0.0002587
6	log_R2_1_per_year	mean_lnLac:AOO:joincount_delta	threeway	0.0002521
6	log_R2_1_per_year	Mass:mean_lnLac:joincount_delta	threeway	0.0001308
6	log_R2_1_per_year	Mass:D_AOO_a:AOO	threeway	0.0001210
6	log_R2_1_per_year	D_AOO_a:AOO:joincount_delta	threeway	0.0000990
6	log_R2_1_per_year	Mass:AOO:joincount_delta	threeway	0.0000764
6	log_R2_1_per_year	Mass:mean_lnLac:AOO	threeway	0.0000560
6	log_R2_1_per_year	Mass:D_AOO_a:mean_lnLac	threeway	0.0000445
13	Jaccard_dissim	all	total_interaction_strength	0.1121384
13	Jaccard_dissim	joincount_delta	overall	0.0217049
13	Jaccard_dissim	AOO	overall	0.0207696
13	Jaccard_dissim	D_AOO_a	overall	0.0200154
13	Jaccard_dissim	Mass	overall	0.0095850
13	Jaccard_dissim	mean_lnLac	overall	0.0081323
13	Jaccard_dissim	circNorm	overall	0.0053420
13	Jaccard_dissim	Habitat_5	overall	0.0037584
13	Jaccard_dissim	GlobRangeSize_km2	overall	0.0036670
13	Jaccard_dissim	pd	overall	0.0035652
13	Jaccard_dissim	Migration	overall	0.0034860
13	Jaccard_dissim	minDist_toBorder_centr	overall	0.0015387
13	Jaccard_dissim	Threatened	overall	0.0001717
13	Jaccard_dissim	Generalism	overall	0.0000333
13	Jaccard_dissim	D_AOO_a:joincount_delta	pairwise	0.0140928
13	Jaccard_dissim	AOO:joincount_delta	pairwise	0.0098289
13	Jaccard_dissim	mean_lnLac:joincount_delta	pairwise	0.0091204
13	Jaccard_dissim	Mass:mean_lnLac	pairwise	0.0086317
13	Jaccard_dissim	Mass:joincount_delta	pairwise	0.0061875
13	Jaccard_dissim	D_AOO_a:AOO	pairwise	0.0051294
13	Jaccard_dissim	mean_lnLac:AOO	pairwise	0.0036168
13	Jaccard_dissim	D_AOO_a:mean_lnLac	pairwise	0.0028201
13	Jaccard_dissim	Mass:AOO	pairwise	0.0023650
13	Jaccard_dissim	Mass:D_AOO_a	pairwise	0.0022295
13	Jaccard_dissim	Mass:D_AOO_a:joincount_delta	threeway	0.0001281
13	Jaccard_dissim	D_AOO_a:AOO:joincount_delta	threeway	0.0001257
13	Jaccard_dissim	Mass:D_AOO_a:mean_lnLac	threeway	0.0001255
13	Jaccard_dissim	Mass:mean_lnLac:joincount_delta	threeway	0.0001031
13	Jaccard_dissim	Mass:AOO:joincount_delta	threeway	0.0000960
13	Jaccard_dissim	Mass:mean_lnLac:AOO	threeway	0.0000918
13	Jaccard_dissim	D_AOO_a:mean_lnLac:AOO	threeway	0.0000740
13	Jaccard_dissim	D_AOO_a:mean_lnLac:joincount_delta	threeway	0.0000518
13	Jaccard_dissim	mean_lnLac:AOO:joincount_delta	threeway	0.0000429
13	Jaccard_dissim	Mass:D_AOO_a:AOO	threeway	0.0000384
13	log_R2_1	all	total_interaction_strength	0.0791008
13	log_R2_1	Mass	overall	0.0299547
13	log_R2_1	D_AOO_a	overall	0.0166392
13	log_R2_1	AOO	overall	0.0099497
13	log_R2_1	GlobRangeSize_km2	overall	0.0071454
13	log_R2_1	Threatened	overall	0.0059631
13	log_R2_1	circNorm	overall	0.0054693
13	log_R2_1	pd	overall	0.0035636
13	log_R2_1	joincount_delta	overall	0.0032695
13	log_R2_1	mean_lnLac	overall	0.0029345
13	log_R2_1	minDist_toBorder_centr	overall	0.0024854
13	log_R2_1	Habitat_5	overall	0.0004812
13	log_R2_1	Migration	overall	0.0001793
13	log_R2_1	Generalism	overall	0.0000913
13	log_R2_1	Threatened:D_AOO_a	pairwise	0.0371994
13	log_R2_1	Mass:GlobRangeSize_km2	pairwise	0.0361499
13	log_R2_1	Mass:Threatened	pairwise	0.0250424
13	log_R2_1	Mass:D_AOO_a	pairwise	0.0161187
13	log_R2_1	Mass:AOO	pairwise	0.0074672
13	log_R2_1	GlobRangeSize_km2:D_AOO_a	pairwise	0.0058205
13	log_R2_1	Threatened:AOO	pairwise	0.0007571
13	log_R2_1	D_AOO_a:AOO	pairwise	0.0006532
13	log_R2_1	GlobRangeSize_km2:AOO	pairwise	0.0001514
13	log_R2_1	GlobRangeSize_km2:Threatened	pairwise	0.0000487
13	log_R2_1	Mass:Threatened:D_AOO_a	threeway	0.0008060
13	log_R2_1	Mass:D_AOO_a:AOO	threeway	0.0002521
13	log_R2_1	Mass:GlobRangeSize_km2:AOO	threeway	0.0002247
13	log_R2_1	Threatened:D_AOO_a:AOO	threeway	0.0001796
13	log_R2_1	Mass:GlobRangeSize_km2:D_AOO_a	threeway	0.0001720
13	log_R2_1	Mass:Threatened:AOO	threeway	0.0000391
13	log_R2_1	GlobRangeSize_km2:D_AOO_a:AOO	threeway	0.0000196
13	log_R2_1	GlobRangeSize_km2:Threatened:D_AOO_a	threeway	0.0000108
13	log_R2_1	Mass:GlobRangeSize_km2:Threatened	threeway	0.0000023
13	log_R2_1	GlobRangeSize_km2:Threatened:AOO	threeway	0.0000004
13	log_R2_1_per_year	all	total_interaction_strength	0.0811756
13	log_R2_1_per_year	Mass	overall	0.0315904
13	log_R2_1_per_year	D_AOO_a	overall	0.0160063
13	log_R2_1_per_year	AOO	overall	0.0105841
13	log_R2_1_per_year	GlobRangeSize_km2	overall	0.0070064
13	log_R2_1_per_year	circNorm	overall	0.0058178
13	log_R2_1_per_year	Threatened	overall	0.0055658
13	log_R2_1_per_year	pd	overall	0.0033994
13	log_R2_1_per_year	joincount_delta	overall	0.0031782
13	log_R2_1_per_year	mean_lnLac	overall	0.0031601
13	log_R2_1_per_year	minDist_toBorder_centr	overall	0.0025260
13	log_R2_1_per_year	Habitat_5	overall	0.0006292
13	log_R2_1_per_year	Migration	overall	0.0001475
13	log_R2_1_per_year	Generalism	overall	0.0000752
13	log_R2_1_per_year	Mass:GlobRangeSize_km2	pairwise	0.0326397
13	log_R2_1_per_year	Mass:D_AOO_a	pairwise	0.0163533
13	log_R2_1_per_year	Mass:AOO	pairwise	0.0072449
13	log_R2_1_per_year	D_AOO_a:circNorm	pairwise	0.0064975
13	log_R2_1_per_year	GlobRangeSize_km2:D_AOO_a	pairwise	0.0052061
13	log_R2_1_per_year	Mass:circNorm	pairwise	0.0043993
13	log_R2_1_per_year	GlobRangeSize_km2:circNorm	pairwise	0.0015335
13	log_R2_1_per_year	AOO:circNorm	pairwise	0.0010755
13	log_R2_1_per_year	D_AOO_a:AOO	pairwise	0.0006856
13	log_R2_1_per_year	GlobRangeSize_km2:AOO	pairwise	0.0001600
13	log_R2_1_per_year	Mass:GlobRangeSize_km2:circNorm	threeway	0.0005227
13	log_R2_1_per_year	Mass:D_AOO_a:AOO	threeway	0.0002897
13	log_R2_1_per_year	Mass:D_AOO_a:circNorm	threeway	0.0001924
13	log_R2_1_per_year	Mass:GlobRangeSize_km2:AOO	threeway	0.0001728
13	log_R2_1_per_year	Mass:GlobRangeSize_km2:D_AOO_a	threeway	0.0001610
13	log_R2_1_per_year	D_AOO_a:AOO:circNorm	threeway	0.0001224
13	log_R2_1_per_year	Mass:AOO:circNorm	threeway	0.0000653
13	log_R2_1_per_year	GlobRangeSize_km2:D_AOO_a:circNorm	threeway	0.0000403
13	log_R2_1_per_year	GlobRangeSize_km2:D_AOO_a:AOO	threeway	0.0000273
13	log_R2_1_per_year	GlobRangeSize_km2:AOO:circNorm	threeway	0.0000017
26	Jaccard_dissim	all	total_interaction_strength	0.1565636
26	Jaccard_dissim	D_AOO_a	overall	0.0566946
26	Jaccard_dissim	AOO	overall	0.0395430
26	Jaccard_dissim	joincount_delta	overall	0.0179824
26	Jaccard_dissim	circNorm	overall	0.0174994
26	Jaccard_dissim	mean_lnLac	overall	0.0127689
26	Jaccard_dissim	Habitat_5	overall	0.0058893
26	Jaccard_dissim	pd	overall	0.0037535
26	Jaccard_dissim	Mass	overall	0.0037507
26	Jaccard_dissim	minDist_toBorder_centr	overall	0.0035816
26	Jaccard_dissim	GlobRangeSize_km2	overall	0.0028431
26	Jaccard_dissim	Migration	overall	0.0020993
26	Jaccard_dissim	Threatened	overall	0.0001222
26	Jaccard_dissim	Generalism	overall	0.0001034
26	Jaccard_dissim	mean_lnLac:joincount_delta	pairwise	0.0238481
26	Jaccard_dissim	D_AOO_a:circNorm	pairwise	0.0167878
26	Jaccard_dissim	AOO:circNorm	pairwise	0.0116036
26	Jaccard_dissim	AOO:joincount_delta	pairwise	0.0112521
26	Jaccard_dissim	mean_lnLac:circNorm	pairwise	0.0095564
26	Jaccard_dissim	D_AOO_a:joincount_delta	pairwise	0.0080105
26	Jaccard_dissim	D_AOO_a:AOO	pairwise	0.0074041
26	Jaccard_dissim	joincount_delta:circNorm	pairwise	0.0045932
26	Jaccard_dissim	D_AOO_a:mean_lnLac	pairwise	0.0041073
26	Jaccard_dissim	mean_lnLac:AOO	pairwise	0.0036380
26	Jaccard_dissim	mean_lnLac:joincount_delta:circNorm	threeway	0.0007889
26	Jaccard_dissim	D_AOO_a:AOO:joincount_delta	threeway	0.0004160
26	Jaccard_dissim	D_AOO_a:mean_lnLac:AOO	threeway	0.0003658
26	Jaccard_dissim	D_AOO_a:mean_lnLac:joincount_delta	threeway	0.0002476
26	Jaccard_dissim	D_AOO_a:joincount_delta:circNorm	threeway	0.0002393
26	Jaccard_dissim	D_AOO_a:AOO:circNorm	threeway	0.0002340
26	Jaccard_dissim	mean_lnLac:AOO:circNorm	threeway	0.0002065
26	Jaccard_dissim	AOO:joincount_delta:circNorm	threeway	0.0001455
26	Jaccard_dissim	D_AOO_a:mean_lnLac:circNorm	threeway	0.0001451
26	Jaccard_dissim	mean_lnLac:AOO:joincount_delta	threeway	0.0001109
26	log_R2_1	all	total_interaction_strength	0.3801135
26	log_R2_1	AOO	overall	0.0392737
26	log_R2_1	D_AOO_a	overall	0.0350926
26	log_R2_1	mean_lnLac	overall	0.0312374
26	log_R2_1	circNorm	overall	0.0239790
26	log_R2_1	Habitat_5	overall	0.0219475
26	log_R2_1	Mass	overall	0.0217341
26	log_R2_1	GlobRangeSize_km2	overall	0.0203280
26	log_R2_1	pd	overall	0.0171080
26	log_R2_1	joincount_delta	overall	0.0169687
26	log_R2_1	minDist_toBorder_centr	overall	0.0160733
26	log_R2_1	Threatened	overall	0.0087302
26	log_R2_1	Migration	overall	0.0086608
26	log_R2_1	Generalism	overall	0.0078238
26	log_R2_1	Habitat_5:circNorm	pairwise	0.0435408
26	log_R2_1	Habitat_5:D_AOO_a	pairwise	0.0415548
26	log_R2_1	Habitat_5:mean_lnLac	pairwise	0.0200283
26	log_R2_1	Habitat_5:AOO	pairwise	0.0171760
26	log_R2_1	D_AOO_a:AOO	pairwise	0.0165601
26	log_R2_1	mean_lnLac:AOO	pairwise	0.0121818
26	log_R2_1	D_AOO_a:mean_lnLac	pairwise	0.0082198
26	log_R2_1	AOO:circNorm	pairwise	0.0072119
26	log_R2_1	mean_lnLac:circNorm	pairwise	0.0045865
26	log_R2_1	D_AOO_a:circNorm	pairwise	0.0041146
26	log_R2_1	D_AOO_a:mean_lnLac:circNorm	threeway	0.0015448
26	log_R2_1	Habitat_5:D_AOO_a:AOO	threeway	0.0015268
26	log_R2_1	Habitat_5:mean_lnLac:AOO	threeway	0.0012519
26	log_R2_1	D_AOO_a:mean_lnLac:AOO	threeway	0.0011213
26	log_R2_1	D_AOO_a:AOO:circNorm	threeway	0.0010279
26	log_R2_1	Habitat_5:D_AOO_a:circNorm	threeway	0.0006194
26	log_R2_1	Habitat_5:mean_lnLac:circNorm	threeway	0.0004994
26	log_R2_1	Habitat_5:D_AOO_a:mean_lnLac	threeway	0.0004746
26	log_R2_1	mean_lnLac:AOO:circNorm	threeway	0.0004316
26	log_R2_1	Habitat_5:AOO:circNorm	threeway	0.0000931
26	log_R2_1_per_year	all	total_interaction_strength	0.3825850
26	log_R2_1_per_year	AOO	overall	0.0399449
26	log_R2_1_per_year	D_AOO_a	overall	0.0357572
26	log_R2_1_per_year	mean_lnLac	overall	0.0326375
26	log_R2_1_per_year	Mass	overall	0.0240901
26	log_R2_1_per_year	Habitat_5	overall	0.0237107
26	log_R2_1_per_year	circNorm	overall	0.0221451
26	log_R2_1_per_year	GlobRangeSize_km2	overall	0.0195584
26	log_R2_1_per_year	minDist_toBorder_centr	overall	0.0169696
26	log_R2_1_per_year	pd	overall	0.0169347
26	log_R2_1_per_year	joincount_delta	overall	0.0165741
26	log_R2_1_per_year	Threatened	overall	0.0085758
26	log_R2_1_per_year	Migration	overall	0.0074302
26	log_R2_1_per_year	Generalism	overall	0.0060294
26	log_R2_1_per_year	Habitat_5:D_AOO_a	pairwise	0.0397811
26	log_R2_1_per_year	D_AOO_a:AOO	pairwise	0.0223430
26	log_R2_1_per_year	Habitat_5:AOO	pairwise	0.0203670
26	log_R2_1_per_year	Habitat_5:mean_lnLac	pairwise	0.0169598
26	log_R2_1_per_year	Mass:AOO	pairwise	0.0131936
26	log_R2_1_per_year	Mass:D_AOO_a	pairwise	0.0122461
26	log_R2_1_per_year	D_AOO_a:mean_lnLac	pairwise	0.0112894
26	log_R2_1_per_year	mean_lnLac:AOO	pairwise	0.0108299
26	log_R2_1_per_year	Mass:mean_lnLac	pairwise	0.0098152
26	log_R2_1_per_year	Mass:Habitat_5	pairwise	0.0080655
26	log_R2_1_per_year	Habitat_5:D_AOO_a:AOO	threeway	0.0030368
26	log_R2_1_per_year	D_AOO_a:mean_lnLac:AOO	threeway	0.0018347
26	log_R2_1_per_year	Habitat_5:mean_lnLac:AOO	threeway	0.0012165
26	log_R2_1_per_year	Mass:Habitat_5:AOO	threeway	0.0006008
26	log_R2_1_per_year	Mass:mean_lnLac:AOO	threeway	0.0005147
26	log_R2_1_per_year	Mass:D_AOO_a:AOO	threeway	0.0004537
26	log_R2_1_per_year	Mass:Habitat_5:mean_lnLac	threeway	0.0004507
26	log_R2_1_per_year	Habitat_5:D_AOO_a:mean_lnLac	threeway	0.0003663
26	log_R2_1_per_year	Mass:Habitat_5:D_AOO_a	threeway	0.0003407
26	log_R2_1_per_year	Mass:D_AOO_a:mean_lnLac	threeway	0.0002742

Code

write.csv2(res_df, here::here("Data/output/results/B_02_Hstats_split_data.csv"))

Variable importances:

Code

vimp_list <- replicate(4, list)

for (atlas_i in seq_along(results_atlas_i)) {
  vimp_list[[atlas_i]] <- list()
  this_atlas <- results_atlas_i[[atlas_i]]

  if (atlas_i == 1) datasetID <- 5 else 
    if (atlas_i == 2) datasetID <- 6 else 
      if (atlas_i == 3) datasetID <- 13 else 
        if (atlas_i == 4) datasetID <- 26

  for (i in seq_along(this_atlas)) {
    if (i == 1) this_response <- "Jaccard_dissim" else 
      if (i == 2) this_response <- "log_R2_1" else 
        if (i == 3) this_response <- "log_R2_1_per_year"

    fit <- 
      this_atlas[[i]]$final_fit %>% 
      extract_fit_engine()
    
    vimp <- 
      fit$variable.importance

    vimp_list[[atlas_i]][[i]] <- data.frame(
      datasetID = datasetID,
      variable = names(vimp),
      importance = vimp,
      row.names = NULL,
      response = this_response,
      importance_mode = fit$importance.mode
    )
  }
}

imp_df <-
  vimp_list %>%
  bind_rows()

write.csv2(imp_df, here::here("Data/output/results/B_02_ranger_vimp_split_data.csv"))