D_02_Figure_1.R

D - 02: Figure 1

Libraries

Code

suppressPackageStartupMessages({
  
library(here)

source(here::here("Code/00_Configuration.R"))

package_list <- 
  c(package_list,  
    "ranger", 
    "tidymodels", 
    "caret", 
    "skimr", 
    "DALEX",
    "DALEXtra")

lapply(package_list, require, character = TRUE)
tidymodels_prefer()

})

rm(list = ls())

Plot settings

Color vector for Log Ratio

Code

my_cols_LR <- c(
  "strong decrease [< log(0.5)]" = "#d7191c",
  "weak decrease [log(0.5) - log (0.9)]" = "#fdae61",
  "stable [log(0.9) - log(1.1)]" = "#e0e0e0",
  "weak increase [log(1.1) - log(2)]" = "#a6d96a",
  "strong increase [> log(2)]" = "#1a9641"
)

Color vector for Jaccard

Code

my_cols_J <- c(
  "stable (< 0.1)" = "#B35806",
  "weak turnover (0.1 - 0.25)" = "#F1A340",
  "weak intermediate turnover (0.25 - 0.5)" = "#FEE0B6",
  "strong intermediate turnover (0.5 - 0.75)" = "#D8DAEB",
  "strong turnover (> 0.75)" = "#998EC3",
  "complete turnover (> 0.9)" = "#542788"
)

Standard Theme

Code

common_theme <- theme_classic() +
  theme(
    axis.line = element_line(colour = "azure4", linetype = "solid"),
    axis.ticks = element_line(colour = "gray13"),
    axis.title = element_text(size = 20),
    axis.text = element_text(size = 16),
    axis.text.x = element_text(vjust = 0.9, hjust = 0.9),
    legend.position = "none"
  )

Part 1: Get model results

Set variable vectors for hypotheses

Code

sp_id <-
  c("verbatimIdentification", 
    "scientificName")

H1 <-
  c("Mass", 
    "GlobRangeSize_km2", 
    "Migration", 
    "Habitat_5", 
    "Generalism", 
    "Threatened", 
    "pd")

H2 <-
  c("D_AOO_a", 
    "mean_lnLac", 
    "AOO", 
    "joincount_delta", 
    "circNorm", 
    "minDist_toBorder_centr")

H3 <-
  c("datasetID")

predictors <-
  c(H1, H2, H3)

responses <-
  c("Jaccard_dissim", 
    "log_R2_1", 
    "log_R2_1_per_year" # yearly rate
    )

Read/Make model data

Reduce to model variables

Code

dta <- 
  readRDS(
    here::here(
      "Data/output/1_all_predictors_merged.rds")
    ) %>%
  select(all_of(c(sp_id, responses, H3, H1, H2, "samplingPeriodID"))) %>%
  ungroup()

Load models

Code

all_models <-
  readRDS(here::here("Data/output/B_models/B_01_list_all_results_rf.rds"))

fit_J <- 
  all_models$res[[1]] %>%
  extract_fit_parsnip()

fit_lnRR <- 
  all_models$res[[2]] %>%
  extract_fit_parsnip()

fit_lnRR_y <- 
  all_models$res[[3]] %>%
  extract_fit_parsnip()

Predictions from the models:

Code

comp_df <- dta %>%
  filter(samplingPeriodID == 2) %>%
  select(
    -samplingPeriodID,
    -Jaccard_dissim,
    -log_R2_1,
    -log_R2_1_per_year)

comp_df$Jacc_pred <-
  round(
    stats::predict(
      all_models$res[[1]] %>%
        tune::extract_workflow(),
      new_data = comp_df
    ),
    4
  )$.pred

comp_df$LogRatio_pred <-
  round(
    predict(
      all_models$res[[2]] %>%
        tune::extract_workflow(),
      new_data = comp_df
    ),
    4
  )$.pred

comp_df$LogRatio_y_pred <-
  round(
    predict(
      all_models$res[[3]] %>%
        tune::extract_workflow(),
      new_data = comp_df
    ),
    4
  )$.pred

checks:

Code

comp_df %>%
  select(verbatimIdentification, datasetID, Jacc_pred, LogRatio_y_pred, LogRatio_pred) %>%
  head()

   verbatimIdentification datasetID Jacc_pred LogRatio_y_pred LogRatio_pred
1 Nucifraga caryocatactes         5    0.4337          0.0017        0.0305
2      Anas platyrhynchos         5    0.0653          0.0034        0.0683
3         Aythya fuligula         5    0.2890          0.0018        0.0153
4 Acrocephalus scirpaceus         5    0.2798          0.0030        0.0600
5       Dryocopus martius         5    0.0637          0.0007        0.0081
6         Serinus serinus         5    0.0107          0.0002        0.0011

Merge predictions to the data

Code

dat_mod <- full_join(comp_df, dta) %>%
  mutate(
    Jaccard = case_when(
      samplingPeriodID == 2 ~ Jacc_pred,
      .default = Jaccard_dissim
    ),
    log_R2_1 = case_when(
      samplingPeriodID == 2 ~ LogRatio_pred,
      .default = log_R2_1
    ),
    log_R2_1_y = case_when(
      samplingPeriodID == 2 ~ LogRatio_y_pred,
      .default = log_R2_1_per_year
    )
  ) %>%
  select(datasetID, samplingPeriodID, verbatimIdentification, log_R2_1, log_R2_1_y, Jaccard)

Joining with `by = join_by(verbatimIdentification, scientificName, datasetID,
Mass, GlobRangeSize_km2, Migration, Habitat_5, Generalism, Threatened, pd,
D_AOO_a, mean_lnLac, AOO, joincount_delta, circNorm, minDist_toBorder_centr)`

Checks:

Code

dat_mod %>%
  filter(samplingPeriodID == 1 & Jaccard != 0) %>%
  glimpse()

Rows: 1,045
Columns: 6
$ datasetID              <fct> 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,…
$ samplingPeriodID       <fct> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…
$ verbatimIdentification <chr> "Ciconia nigra", "Carduelis carduelis", "Passer…
$ log_R2_1               <dbl> 0.231, 0.009, -0.017, 0.018, 0.177, -0.275, -0.…
$ log_R2_1_y             <dbl> 0.012, 0.000, -0.001, 0.001, 0.009, -0.014, 0.0…
$ Jaccard                <dbl> 0.369, 0.024, 0.063, 0.140, 0.686, 0.530, 0.117…

Part 2: Make trends categorical

set factors and cutoff values for categories:

Define factor levels

Code

factor_levels_LR <- c(
  "strong decrease [< log(0.5)]",
  "weak decrease [log(0.5) - log (0.9)]",
  "stable [log(0.9) - log(1.1)]",
  "weak increase [log(1.1) - log(2)]",
  "strong increase [> log(2)]"
)

Define the cutoff points for the ranges in log scale

Code

cutoff_points_LR <- c(-Inf, log(0.5), log(0.9), log(1.1), log(2), Inf)

Define factor levels

Code

factor_levels_J <- c(
  "stable (< 0.1)",
  "weak turnover (0.1 - 0.25)",
  "weak intermediate turnover (0.25 - 0.5)",
  "strong intermediate turnover (0.5 - 0.75)",
  "strong turnover (> 0.75)",
  "complete turnover (> 0.9)"
)

Define the cutoff points for the ranges in log scale

Code

cutoff_points_J <- c(-Inf, 0.1, 0.25, 0.5, 0.75, 0.9, Inf)

make copy from data:

Code

dat_cat <- dta

prepare data for plotting:

Convert the log ratios and Jaccard to factor variables based on the ranges

Code

dat_cat$trend_LR <-
  cut(dat_cat$log_R2_1,
    breaks = cutoff_points_LR,
    labels = factor_levels_LR
  )

dat_cat$trend_J <-
  cut(dat_cat$Jaccard,
    breaks = cutoff_points_J,
    labels = factor_levels_J
  )

dat_cat <- dat_cat %>%
  rstatix::reorder_levels(trend_LR, factor_levels_LR) %>%
  rstatix::reorder_levels(trend_J, factor_levels_J) %>%
  na.omit()

dat_cat_wide <- dat_cat %>%
  tidyr::pivot_wider(
    id_cols = c(datasetID, verbatimIdentification),
    names_from = samplingPeriodID,
    values_from = c(log_R2_1, trend_LR, Jaccard_dissim, trend_J)
  )

print(colnames(dat_cat_wide))

 [1] "datasetID"              "verbatimIdentification" "log_R2_1_2"            
 [4] "log_R2_1_1"             "trend_LR_2"             "trend_LR_1"            
 [7] "Jaccard_dissim_2"       "Jaccard_dissim_1"       "trend_J_2"             
[10] "trend_J_1"

Code

lookup <- c(
  "LR_Observed" = "log_R2_1_1",
  "trend_LR_Observed" = "trend_LR_1",
  "LR_Predicted" = "log_R2_1_2",
  "trend_LR_Predicted" = "trend_LR_2",
  "J_Observed" = "Jaccard_dissim_1",
  "trend_J_Observed" = "trend_J_1",
  "J_Predicted" = "Jaccard_dissim_2",
  "trend_J_Predicted" = "trend_J_2"
)
dat_cat_wide <- dat_cat_wide %>%
  dplyr::rename(all_of(lookup)) %>%
  arrange(datasetID)



names_data <- unique(dat_cat_wide$datasetID)

Reorder columns

Code

dat_cat_wide <- dat_cat_wide[, c(
  "datasetID",
  "verbatimIdentification",
  "J_Predicted",
  "trend_J_Predicted",
  "J_Observed",
  "trend_J_Observed",
  "LR_Predicted",
  "trend_LR_Predicted",
  "LR_Observed",
  "trend_LR_Observed"
)]

Plots:

Plot number of species per trend per dataset

Code

fig1_LR_past <- dat_cat %>%
  filter(samplingPeriodID == 1) %>%
  group_by(datasetID, trend_LR) %>%
  summarise(n_sp = n_distinct(verbatimIdentification)) %>%
  ggplot(aes(y = n_sp, x = trend_LR, fill = trend_LR)) +
  geom_col(col = "lightgrey", show.legend = FALSE) +
  ylim(0, 260) +
  facet_grid(~datasetID) +
  ggthemes::theme_few() +
  scale_fill_manual(values = my_cols_LR) +
  theme(
    axis.text.x = element_blank(),
    axis.ticks.x = element_blank(),
    strip.text = element_text(size = 10)
  ) +
  labs(
    x = "Occupancy trend (Log Ratio)", y = "Number of Species",
    fill = NULL
  )

`summarise()` has grouped output by 'datasetID'. You can override using the
`.groups` argument.

Code

fig1_LR_past

Code

# export::graph2ppt(fig1_LR_past, here::here("Figures/A_data/D_02_Fig1_LnRR_Histograms.pptx"), width = 8, height = 4) ggsave("Figures/A_data/D_02_LogRatio_Histogram.svg", fig1_LR_past, width = 4, height = 2)

Code

fig1_J_past <- dat_cat %>%
  filter(samplingPeriodID == 1) %>%
  group_by(datasetID, trend_J) %>%
  summarise(n_sp = n_distinct(verbatimIdentification)) %>%
  ggplot(aes(y = n_sp, x = trend_J, fill = trend_J)) +
  geom_col(col = "lightgrey", show.legend = FALSE) +
  facet_grid(~datasetID) +
  ggthemes::theme_few() +
  ylim(0, 260) +
  scale_fill_manual(values = my_cols_J) +
  theme(
    axis.text.x = element_blank(),
    axis.ticks.x = element_blank(),
    strip.text = element_text(size = 10)
  ) +
  labs(
    x = "Occupancy trend (Jaccard)", y = "Number of Species",
    fill = NULL
  )

`summarise()` has grouped output by 'datasetID'. You can override using the
`.groups` argument.

Code

fig1_J_past

Code

# export::graph2ppt(fig1_J_past, here::here("Figures/A_data/D_02_Fig1_J_Histograms.pptx"), width = 8, height = 4) ggsave("Figures/A_data/D_02_J_Histogram.svg", fig1_J_past, width = 4, height = 2)

Code

legend_J <- ggpubr::get_legend(
  dat_cat %>%
    filter(samplingPeriodID == 1) %>%
    group_by(datasetID, trend_J) %>%
    summarise(n_sp = n_distinct(verbatimIdentification)) %>%
    ggplot(aes(y = n_sp, x = trend_J, fill = trend_J)) +
    geom_col(col = "lightgrey", show.legend = TRUE) +
    facet_grid(~datasetID) +
    ggthemes::theme_few() +
    ylim(0, 260) +
    scale_fill_manual(values = my_cols_J) +
    theme(
      axis.text.x = element_blank(),
      axis.ticks.x = element_blank(),
      strip.text = element_text(size = 10)
    ) +
    labs(
      x = "Occupancy trend (Jaccard)", y = "Number of Species",
      fill = NULL
    ) +
    theme(legend.position = "bottom")
)

`summarise()` has grouped output by 'datasetID'. You can override using the
`.groups` argument.

Code

plot(legend_J)

Code

# ggsave(filename = "D_02_Fig1_b_legend.svg", path = here::here("Figures/C_validation/"), width = 15)

Code

legend_lnR <- ggpubr::get_legend(
  dat_cat %>%
    filter(samplingPeriodID == 1) %>%
    group_by(datasetID, trend_LR) %>%
    summarise(n_sp = n_distinct(verbatimIdentification)) %>%
    ggplot(aes(y = n_sp, x = trend_LR, fill = trend_LR)) +
    geom_col(col = "lightgrey", show.legend = TRUE) +
    ylim(0, 260) +
    facet_grid(~datasetID) +
    ggthemes::theme_few() +
    scale_fill_manual(values = my_cols_LR) +
    theme(
      axis.text.x = element_blank(),
      axis.ticks.x = element_blank(),
      strip.text = element_text(size = 10)
    ) +
    labs(
      x = "Occupancy trend (Log Ratio)", y = "Number of Species",
      fill = NULL
    ) +
    theme(legend.position = "bottom")
)

`summarise()` has grouped output by 'datasetID'. You can override using the
`.groups` argument.

Code

plot(legend_lnR)

Code

# ggsave(filename = "D_02_Fig1_a_legend.svg", path = here::here("Figures/C_validation/"), width = 15)