D - 02: Figure 1

Libraries

Code
suppressPackageStartupMessages({
  
library(here)

source(here::here("Code/00_Configuration.R"))

package_list <- 
  c(package_list,  
    "ranger", 
    "tidymodels", 
    "caret", 
    "skimr", 
    "DALEX",
    "DALEXtra")

lapply(package_list, require, character = TRUE)
tidymodels_prefer()

})

rm(list = ls())

Plot settings

Color vector for Log Ratio

Code
my_cols_LR <- c(
  "strong decrease [< log(0.5)]" = "#d7191c",
  "weak decrease [log(0.5) - log (0.9)]" = "#fdae61",
  "stable [log(0.9) - log(1.1)]" = "#e0e0e0",
  "weak increase [log(1.1) - log(2)]" = "#a6d96a",
  "strong increase [> log(2)]" = "#1a9641"
)

Color vector for Jaccard

Code
my_cols_J <- c(
  "stable (< 0.1)" = "#B35806",
  "weak turnover (0.1 - 0.25)" = "#F1A340",
  "weak intermediate turnover (0.25 - 0.5)" = "#FEE0B6",
  "strong intermediate turnover (0.5 - 0.75)" = "#D8DAEB",
  "strong turnover (> 0.75)" = "#998EC3",
  "complete turnover (> 0.9)" = "#542788"
)

Standard Theme

Code
common_theme <- theme_classic() +
  theme(
    axis.line = element_line(colour = "azure4", linetype = "solid"),
    axis.ticks = element_line(colour = "gray13"),
    axis.title = element_text(size = 20),
    axis.text = element_text(size = 16),
    axis.text.x = element_text(vjust = 0.9, hjust = 0.9),
    legend.position = "none"
  )

Part 1: Get model results

Set variable vectors for hypotheses

Code
sp_id <-
  c("verbatimIdentification", 
    "scientificName")

H1 <-
  c("Mass", 
    "GlobRangeSize_km2", 
    "Migration", 
    "Habitat_5", 
    "Generalism", 
    "Threatened", 
    "pd")

H2 <-
  c("D_AOO_a", 
    "mean_lnLac", 
    "AOO", 
    "joincount_delta", 
    "circNorm", 
    "minDist_toBorder_centr")

H3 <-
  c("datasetID")

predictors <-
  c(H1, H2, H3)

responses <-
  c("Jaccard_dissim", 
    "log_R2_1", 
    "log_R2_1_per_year" # yearly rate
    )

Read/Make model data

Reduce to model variables

Code
dta <- 
  readRDS(
    here::here(
      "Data/output/1_all_predictors_merged.rds")
    ) %>%
  select(all_of(c(sp_id, responses, H3, H1, H2, "samplingPeriodID"))) %>%
  ungroup()

Load models

Code
all_models <-
  readRDS(here::here("Data/output/B_models/B_01_list_all_results_rf.rds"))

fit_J <- 
  all_models$res[[1]] %>%
  extract_fit_parsnip()

fit_lnRR <- 
  all_models$res[[2]] %>%
  extract_fit_parsnip()

fit_lnRR_y <- 
  all_models$res[[3]] %>%
  extract_fit_parsnip()

Predictions from the models:

Code
comp_df <- dta %>%
  filter(samplingPeriodID == 2) %>%
  select(
    -samplingPeriodID,
    -Jaccard_dissim,
    -log_R2_1,
    -log_R2_1_per_year)

comp_df$Jacc_pred <-
  round(
    stats::predict(
      all_models$res[[1]] %>%
        tune::extract_workflow(),
      new_data = comp_df
    ),
    4
  )$.pred

comp_df$LogRatio_pred <-
  round(
    predict(
      all_models$res[[2]] %>%
        tune::extract_workflow(),
      new_data = comp_df
    ),
    4
  )$.pred

comp_df$LogRatio_y_pred <-
  round(
    predict(
      all_models$res[[3]] %>%
        tune::extract_workflow(),
      new_data = comp_df
    ),
    4
  )$.pred

checks:

Code
comp_df %>%
  select(verbatimIdentification, datasetID, Jacc_pred, LogRatio_y_pred, LogRatio_pred) %>%
  head()
   verbatimIdentification datasetID Jacc_pred LogRatio_y_pred LogRatio_pred
1 Nucifraga caryocatactes         5    0.4337          0.0017        0.0305
2      Anas platyrhynchos         5    0.0653          0.0034        0.0683
3         Aythya fuligula         5    0.2890          0.0018        0.0153
4 Acrocephalus scirpaceus         5    0.2798          0.0030        0.0600
5       Dryocopus martius         5    0.0637          0.0007        0.0081
6         Serinus serinus         5    0.0107          0.0002        0.0011

Merge predictions to the data

Code
dat_mod <- full_join(comp_df, dta) %>%
  mutate(
    Jaccard = case_when(
      samplingPeriodID == 2 ~ Jacc_pred,
      .default = Jaccard_dissim
    ),
    log_R2_1 = case_when(
      samplingPeriodID == 2 ~ LogRatio_pred,
      .default = log_R2_1
    ),
    log_R2_1_y = case_when(
      samplingPeriodID == 2 ~ LogRatio_y_pred,
      .default = log_R2_1_per_year
    )
  ) %>%
  select(datasetID, samplingPeriodID, verbatimIdentification, log_R2_1, log_R2_1_y, Jaccard)
Joining with `by = join_by(verbatimIdentification, scientificName, datasetID,
Mass, GlobRangeSize_km2, Migration, Habitat_5, Generalism, Threatened, pd,
D_AOO_a, mean_lnLac, AOO, joincount_delta, circNorm, minDist_toBorder_centr)`

Checks:

Code
dat_mod %>%
  filter(samplingPeriodID == 1 & Jaccard != 0) %>%
  glimpse()
Rows: 1,045
Columns: 6
$ datasetID              <fct> 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,…
$ samplingPeriodID       <fct> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…
$ verbatimIdentification <chr> "Ciconia nigra", "Carduelis carduelis", "Passer…
$ log_R2_1               <dbl> 0.231, 0.009, -0.017, 0.018, 0.177, -0.275, -0.…
$ log_R2_1_y             <dbl> 0.012, 0.000, -0.001, 0.001, 0.009, -0.014, 0.0…
$ Jaccard                <dbl> 0.369, 0.024, 0.063, 0.140, 0.686, 0.530, 0.117…