---
title: "D_02_Figure_1.R"
format:
html:
self-contained: true
embed-resources: true
toc: true # optional: adds a table of contents
theme: cosmo # optional: Bootstrap theme
code-fold: show # optional: collapsible code blocks
code-tools: true # optional: adds copy/paste buttons
toc-depth: 4
editor:
markdown:
wrap: 72
---
# D - 02: Figure 1
```{r}
#| echo: false
#| message: false
#| warning: false
#| results: "hide"
#| output: false
rm(list = ls())
gc()
```
## Libraries
```{r}
#| message: false
#| warning: false
#| results: "hide"
#| output: false
suppressPackageStartupMessages({
library(here)
source(here::here("Code/00_Configuration.R"))
package_list <-
c(package_list,
"ranger",
"tidymodels",
"caret",
"skimr",
"DALEX",
"DALEXtra")
lapply(package_list, require, character = TRUE)
tidymodels_prefer()
})
rm(list = ls())
```
## Plot settings
Color vector for Log Ratio
```{r}
my_cols_LR <- c(
"strong decrease [< log(0.5)]" = "#d7191c",
"weak decrease [log(0.5) - log (0.9)]" = "#fdae61",
"stable [log(0.9) - log(1.1)]" = "#e0e0e0",
"weak increase [log(1.1) - log(2)]" = "#a6d96a",
"strong increase [> log(2)]" = "#1a9641"
)
```
Color vector for Jaccard
```{r}
my_cols_J <- c(
"stable (< 0.1)" = "#B35806",
"weak turnover (0.1 - 0.25)" = "#F1A340",
"weak intermediate turnover (0.25 - 0.5)" = "#FEE0B6",
"strong intermediate turnover (0.5 - 0.75)" = "#D8DAEB",
"strong turnover (> 0.75)" = "#998EC3",
"complete turnover (> 0.9)" = "#542788"
)
```
Standard Theme
```{r}
common_theme <- theme_classic() +
theme(
axis.line = element_line(colour = "azure4", linetype = "solid"),
axis.ticks = element_line(colour = "gray13"),
axis.title = element_text(size = 20),
axis.text = element_text(size = 16),
axis.text.x = element_text(vjust = 0.9, hjust = 0.9),
legend.position = "none"
)
```
# Part 1: Get model results
## Set variable vectors for hypotheses
```{r}
sp_id <-
c("verbatimIdentification",
"scientificName")
H1 <-
c("Mass",
"GlobRangeSize_km2",
"Migration",
"Habitat_5",
"Generalism",
"Threatened",
"pd")
H2 <-
c("D_AOO_a",
"mean_lnLac",
"AOO",
"joincount_delta",
"circNorm",
"minDist_toBorder_centr")
H3 <-
c("datasetID")
predictors <-
c(H1, H2, H3)
responses <-
c("Jaccard_dissim",
"log_R2_1",
"log_R2_1_per_year" # yearly rate
)
```
## Read/Make model data
Reduce to model variables
```{r}
#| message: false
#| warning: false
#| error: false
dta <-
readRDS(
here::here(
"Data/output/1_all_predictors_merged.rds")
) %>%
select(all_of(c(sp_id, responses, H3, H1, H2, "samplingPeriodID"))) %>%
ungroup()
```
## Load models
```{r}
all_models <-
readRDS(here::here("Data/output/B_models/B_01_list_all_results_rf.rds"))
fit_J <-
all_models$res[[1]] %>%
extract_fit_parsnip()
fit_lnRR <-
all_models$res[[2]] %>%
extract_fit_parsnip()
fit_lnRR_y <-
all_models$res[[3]] %>%
extract_fit_parsnip()
```
Predictions from the models:
```{r}
comp_df <- dta %>%
filter(samplingPeriodID == 2) %>%
select(
-samplingPeriodID,
-Jaccard_dissim,
-log_R2_1,
-log_R2_1_per_year)
comp_df$Jacc_pred <-
round(
stats::predict(
all_models$res[[1]] %>%
tune::extract_workflow(),
new_data = comp_df
),
4
)$.pred
comp_df$LogRatio_pred <-
round(
predict(
all_models$res[[2]] %>%
tune::extract_workflow(),
new_data = comp_df
),
4
)$.pred
comp_df$LogRatio_y_pred <-
round(
predict(
all_models$res[[3]] %>%
tune::extract_workflow(),
new_data = comp_df
),
4
)$.pred
```
checks:
```{r}
comp_df %>%
select(verbatimIdentification, datasetID, Jacc_pred, LogRatio_y_pred, LogRatio_pred) %>%
head()
```
Merge predictions to the data
```{r}
dat_mod <- full_join(comp_df, dta) %>%
mutate(
Jaccard = case_when(
samplingPeriodID == 2 ~ Jacc_pred,
.default = Jaccard_dissim
),
log_R2_1 = case_when(
samplingPeriodID == 2 ~ LogRatio_pred,
.default = log_R2_1
),
log_R2_1_y = case_when(
samplingPeriodID == 2 ~ LogRatio_y_pred,
.default = log_R2_1_per_year
)
) %>%
select(datasetID, samplingPeriodID, verbatimIdentification, log_R2_1, log_R2_1_y, Jaccard)
```
Checks:
```{r}
dat_mod %>%
filter(samplingPeriodID == 1 & Jaccard != 0) %>%
glimpse()
```
# Part 2: Make trends categorical
set factors and cutoff values for categories:
Define factor levels
```{r}
factor_levels_LR <- c(
"strong decrease [< log(0.5)]",
"weak decrease [log(0.5) - log (0.9)]",
"stable [log(0.9) - log(1.1)]",
"weak increase [log(1.1) - log(2)]",
"strong increase [> log(2)]"
)
```
Define the cutoff points for the ranges in log scale
```{r}
cutoff_points_LR <- c(-Inf, log(0.5), log(0.9), log(1.1), log(2), Inf)
```
Define factor levels
```{r}
factor_levels_J <- c(
"stable (< 0.1)",
"weak turnover (0.1 - 0.25)",
"weak intermediate turnover (0.25 - 0.5)",
"strong intermediate turnover (0.5 - 0.75)",
"strong turnover (> 0.75)",
"complete turnover (> 0.9)"
)
```
Define the cutoff points for the ranges in log scale
```{r}
cutoff_points_J <- c(-Inf, 0.1, 0.25, 0.5, 0.75, 0.9, Inf)
```
make copy from data:
```{r}
dat_cat <- dta
```
prepare data for plotting:
Convert the log ratios and Jaccard to factor variables based on the ranges
```{r}
dat_cat$trend_LR <-
cut(dat_cat$log_R2_1,
breaks = cutoff_points_LR,
labels = factor_levels_LR
)
dat_cat$trend_J <-
cut(dat_cat$Jaccard,
breaks = cutoff_points_J,
labels = factor_levels_J
)
dat_cat <- dat_cat %>%
rstatix::reorder_levels(trend_LR, factor_levels_LR) %>%
rstatix::reorder_levels(trend_J, factor_levels_J) %>%
na.omit()
dat_cat_wide <- dat_cat %>%
tidyr::pivot_wider(
id_cols = c(datasetID, verbatimIdentification),
names_from = samplingPeriodID,
values_from = c(log_R2_1, trend_LR, Jaccard_dissim, trend_J)
)
print(colnames(dat_cat_wide))
lookup <- c(
"LR_Observed" = "log_R2_1_1",
"trend_LR_Observed" = "trend_LR_1",
"LR_Predicted" = "log_R2_1_2",
"trend_LR_Predicted" = "trend_LR_2",
"J_Observed" = "Jaccard_dissim_1",
"trend_J_Observed" = "trend_J_1",
"J_Predicted" = "Jaccard_dissim_2",
"trend_J_Predicted" = "trend_J_2"
)
dat_cat_wide <- dat_cat_wide %>%
dplyr::rename(all_of(lookup)) %>%
arrange(datasetID)
names_data <- unique(dat_cat_wide$datasetID)
```
Reorder columns
```{r}
dat_cat_wide <- dat_cat_wide[, c(
"datasetID",
"verbatimIdentification",
"J_Predicted",
"trend_J_Predicted",
"J_Observed",
"trend_J_Observed",
"LR_Predicted",
"trend_LR_Predicted",
"LR_Observed",
"trend_LR_Observed"
)]
```
## Plots:
Plot number of species per trend per dataset
```{r}
fig1_LR_past <- dat_cat %>%
filter(samplingPeriodID == 1) %>%
group_by(datasetID, trend_LR) %>%
summarise(n_sp = n_distinct(verbatimIdentification)) %>%
ggplot(aes(y = n_sp, x = trend_LR, fill = trend_LR)) +
geom_col(col = "lightgrey", show.legend = FALSE) +
ylim(0, 260) +
facet_grid(~datasetID) +
ggthemes::theme_few() +
scale_fill_manual(values = my_cols_LR) +
theme(
axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
strip.text = element_text(size = 10)
) +
labs(
x = "Occupancy trend (Log Ratio)", y = "Number of Species",
fill = NULL
)
fig1_LR_past
# export::graph2ppt(fig1_LR_past, here::here("Figures/A_data/D_02_Fig1_LnRR_Histograms.pptx"), width = 8, height = 4) ggsave("Figures/A_data/D_02_LogRatio_Histogram.svg", fig1_LR_past, width = 4, height = 2)
```
```{r}
fig1_J_past <- dat_cat %>%
filter(samplingPeriodID == 1) %>%
group_by(datasetID, trend_J) %>%
summarise(n_sp = n_distinct(verbatimIdentification)) %>%
ggplot(aes(y = n_sp, x = trend_J, fill = trend_J)) +
geom_col(col = "lightgrey", show.legend = FALSE) +
facet_grid(~datasetID) +
ggthemes::theme_few() +
ylim(0, 260) +
scale_fill_manual(values = my_cols_J) +
theme(
axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
strip.text = element_text(size = 10)
) +
labs(
x = "Occupancy trend (Jaccard)", y = "Number of Species",
fill = NULL
)
fig1_J_past
# export::graph2ppt(fig1_J_past, here::here("Figures/A_data/D_02_Fig1_J_Histograms.pptx"), width = 8, height = 4) ggsave("Figures/A_data/D_02_J_Histogram.svg", fig1_J_past, width = 4, height = 2)
```
```{r}
legend_J <- ggpubr::get_legend(
dat_cat %>%
filter(samplingPeriodID == 1) %>%
group_by(datasetID, trend_J) %>%
summarise(n_sp = n_distinct(verbatimIdentification)) %>%
ggplot(aes(y = n_sp, x = trend_J, fill = trend_J)) +
geom_col(col = "lightgrey", show.legend = TRUE) +
facet_grid(~datasetID) +
ggthemes::theme_few() +
ylim(0, 260) +
scale_fill_manual(values = my_cols_J) +
theme(
axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
strip.text = element_text(size = 10)
) +
labs(
x = "Occupancy trend (Jaccard)", y = "Number of Species",
fill = NULL
) +
theme(legend.position = "bottom")
)
plot(legend_J)
# ggsave(filename = "D_02_Fig1_b_legend.svg", path = here::here("Figures/C_validation/"), width = 15)
```
```{r}
legend_lnR <- ggpubr::get_legend(
dat_cat %>%
filter(samplingPeriodID == 1) %>%
group_by(datasetID, trend_LR) %>%
summarise(n_sp = n_distinct(verbatimIdentification)) %>%
ggplot(aes(y = n_sp, x = trend_LR, fill = trend_LR)) +
geom_col(col = "lightgrey", show.legend = TRUE) +
ylim(0, 260) +
facet_grid(~datasetID) +
ggthemes::theme_few() +
scale_fill_manual(values = my_cols_LR) +
theme(
axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
strip.text = element_text(size = 10)
) +
labs(
x = "Occupancy trend (Log Ratio)", y = "Number of Species",
fill = NULL
) +
theme(legend.position = "bottom")
)
plot(legend_lnR)
# ggsave(filename = "D_02_Fig1_a_legend.svg", path = here::here("Figures/C_validation/"), width = 15)
```