B_04_Model_performance_figures.R

B - 04: Model performance figures

Libraries

Code

suppressPackageStartupMessages({
  library(here)

  source(here::here("Code/00_Configuration.R"))

  package_list <-
    c(
      package_list,
      "ranger",
      "tidymodels",
      "caret",
      "skimr",
      "DALEXtra",
      "DALEX",
      "ggthemes",
      "hstats",
      "tidytext"
    )

  lapply(package_list, require, character = TRUE)

  tidymodels_prefer()
})

rm(list = ls())

Set variable vectors for hypotheses

Code

sp_id <-
  c(
    "verbatimIdentification",
    "scientificName"
  )

H1 <-
  c(
    "Mass",
    "GlobRangeSize_km2",
    "Migration",
    "Habitat_5",
    "Generalism",
    "Threatened",
    "pd"
  )

H2 <-
  c(
    "D_AOO_a",
    "mean_lnLac",
    "AOO",
    "joincount_delta",
    "circNorm",
    "minDist_toBorder_centr"
  )

H3 <-
  c("datasetID")

predictors <-
  c(H1, H2, H3)

responses <-
  c(
    "Jaccard_dissim",
    "log_R2_1",
    "log_R2_1_per_year" # yearly rate
  )

Part 1: Mp, Vip, Hstats

Read result .csvs back in (csv2 with ; as separator)

Code

# pooled model results
mp_full <-
  read.csv2(
    here::here(
      "Data/output/results/",
      "B_01_ranger_all_data.csv"
    )
  )
vip_full <-
  read.csv2(
    here::here(
      "Data/output/results/",
      "B_01_ranger_vimp_all_data.csv"
    )
  )
int_full <-
  read.csv2(
    here::here(
      "Data/output/results/",
      "B_01_Hstats_all_data.csv"
    )
  )


# split by dataset
mp_split <-
  read.csv2(
    here::here(
      "Data/output/results/",
      "B_02_ranger_atlas_separate.csv"
    )
  )
vip_split <-
  read.csv2(
    here::here(
      "Data/output/results/",
      "B_02_ranger_vimp_split_data.csv"
    )
  )
int_split <-
  read.csv2(
    here::here(
      "Data/output/results/",
      "B_02_Hstats_split_data.csv"
    )
  )

1.1 Pooled Model

1.1.1 Model Performance

Code

p_1 <-
  mp_full %>%
  # reformat to long:
  pivot_longer(
    cols = c(rmse, rsq)
  ) %>%
  # Plot
  ggplot() +
  geom_point(
    aes(x = response, y = value, shape = name),
    cex = 2,
    col = "black",
    bg = "black"
  ) +
  ggthemes::theme_few() +
  scale_shape_manual(
    values = c(rmse = 21, rsq = 1)
  )

p_1

1.1.2 Variable Importance

Code

vip_full <-
  vip_full %>%
  group_by(variable) %>%
  mutate(
    mean_importance_jaccard =
      mean(importance[response == "Jaccard_dissim"],
        na.rm = TRUE
      )
  ) %>%
  ungroup() %>%
  mutate(
    variable =
      fct_reorder(
        variable,
        mean_importance_jaccard
      )
  ) %>%
  mutate(
    hypo =
      case_when(
        variable %in% H1 ~ "H1",
        variable %in% H2 ~ "H2",
        variable %in% H3 ~ "H3"
      )
  )

p_3 <-
  vip_full %>%
  ggplot() +
  geom_col(
    aes(
      x = importance,
      y = variable,
      fill = hypo
    )
  ) +
  facet_wrap(~response,
    scales = "free_x"
  ) +
  labs(
    y = "Variable",
    x = "Importance"
  ) +
  ggthemes::theme_few()


p_3

1.1.3 Variable Interactions

Code

p_5 <-
  int_full %>%
  filter(
    !c(test == "total_interaction_strength")
  ) %>%
  ggplot() +
  geom_col(
    aes(
      x = H2,
      y = reorder_within(variable, H2, response)
    )
  ) +
  facet_wrap(~ test + response,
    scales = "free"
  ) +
  scale_y_reordered() +
  ggthemes::theme_few()

p_5 # saved as 1200x800

1.2 Split Model

1.2.1 Model Performance

Code

p_2 <-
  mp_split %>%
  # reformat to long:
  pivot_longer(
    cols = c(rmse, rsq)
  ) %>%
  # Plot
  ggplot() +
  geom_point(
    aes(
      x = factor(datasetID),
      y = value, shape = name
    ),
    cex = 2,
    col = "black",
    bg = "black"
  ) +
  facet_wrap(~response) +
  ggthemes::theme_few() +
  scale_shape_manual(
    values = c(rmse = 21, rsq = 1)
  ) +
  xlab("DatasetID")

p_2

1.2.2 Variable Importance

Code

vip_split <-
  vip_split %>%
  group_by(variable, datasetID, response) %>%
  mutate(
    mean_importance_jaccard =
      mean(importance[response == "Jaccard_dissim"],
        na.rm = TRUE
      )
  ) %>%
  ungroup() %>%
  mutate(
    variable =
      fct_reorder(variable, mean_importance_jaccard)
  ) %>%
  mutate(
    hypo =
      case_when(
        variable %in% H1 ~ "H1",
        variable %in% H2 ~ "H2",
        variable %in% H3 ~ "H3"
      )
  )

Warning: There was 1 warning in `mutate()`.
ℹ In argument: `variable = fct_reorder(variable, mean_importance_jaccard)`.
Caused by warning:
! `fct_reorder()` removing 104 missing values.
ℹ Use `.na_rm = TRUE` to silence this message.
ℹ Use `.na_rm = FALSE` to preserve NAs.

Code

p_4 <-
  vip_split %>%
  ggplot() +
  geom_col(
    aes(
      x = importance,
      y = variable,
      fill = hypo
    )
  ) +
  facet_wrap(~ response + datasetID,
    scales = "free_x"
  ) +
  labs(
    y = "Variable",
    x = "Importance"
  ) +
  ggthemes::theme_few()


p_4 # saved as 1200x800

1.2.3 Variable Interactions

Code

p_6 <-
  int_split %>%
  filter(
    !c(test == "total_interaction_strength")
  ) %>%
  ggplot() +
  geom_col(
    aes(
      x = H2,
      y = reorder_within(variable, H2, response),
      fill = factor(datasetID)
    )
  ) +
  facet_wrap(~ test + response,
    scales = "free"
  ) +
  scale_y_reordered() +
  ggthemes::theme_few()
p_6 # saved as 1500x1500

save as ppt for postediting

Code

esquisse::ggplot_to_ppt()

Part 2: Partial dependence plots

Start fresh:

Libraries

Code

suppressPackageStartupMessages({
  library(here)

  source(here::here("Code/00_Configuration.R"))

  package_list <-
    c(
      package_list,
      "ranger",
      "tidymodels",
      "caret",
      "skimr",
      "DALEXtra",
      "DALEX",
      "ggthemes",
      "hstats",
      "tidytext",
      "pdp"
    )

  lapply(package_list, require, character = TRUE)

  tidymodels_prefer()
})

rm(list = ls())

Set variable vectors for hypotheses

Code

sp_id <-
  c(
    "verbatimIdentification",
    "scientificName"
  )

H1 <-
  c(
    "Mass",
    "GlobRangeSize_km2",
    "Migration",
    "Habitat_5",
    "Generalism",
    "Threatened",
    "pd"
  )

H2 <-
  c(
    "D_AOO_a",
    "mean_lnLac",
    "AOO",
    "joincount_delta",
    "circNorm",
    "minDist_toBorder_centr"
  )

H3 <-
  c("datasetID")

predictors <-
  c(H1, H2, H3)

responses <-
  c(
    "Jaccard_dissim",
    "log_R2_1",
    "log_R2_1_per_year" # yearly rate
  )

Create lookup labels for clean plotting:

Code

lookup <-
  c(
    "Mass" = "Body mass",
    "GlobRangeSize_km2" = "Global range size",
    "Migration" = "Migration",
    "Habitat_5" = "Habitat type",
    "Generalism" = "Generalism",
    "Threatened" = "Threatened",
    "pd" = "Phylogenetic distinctiveness",
    "D_AOO_a" = "Fractal dimension",
    "mean_lnLac" = "Mean log lacunarity",
    "AOO" = "Area of occupancy",
    "joincount_delta" = "Spatial autocorrelation",
    "circNorm" = "Normalized circularity",
    "minDist_toBorder_centr" = "Smallest distance to the border"
  )

Categorical plot:

Code

cat_vars <- c(
  "Migration",
  "Habitat_5",
  "Generalism",
  "Threatened",
  "datasetID"
)

2.1 Model results:

Read result objects from .rds

Code

all <-
  readRDS(
    here::here(
      "Data/output/B_models/",
      "B_01_list_all_results_rf.rds"
    )
  )

split <- readRDS(
  here::here(
    "Data/output/B_models/",
    "B_02_rf_res_atlas_split.rds"
  )
)

Extract all fit objects into a named list: /- Naming convention: response_atlas

Code

all_models_list <-
  list(

    # full models:
    Jaccard = all$res[[1]] %>% extract_fit_parsnip(),
    log_Ratio = all$res[[2]] %>% extract_fit_parsnip(),
    log_Ratio_y = all$res[[3]] %>% extract_fit_parsnip(),

    # split by datasetID:
    Jaccard_5 = split[[1]][[1]]$final_fit %>%
      extract_fit_parsnip(),
    Jaccard_6 = split[[2]][[1]]$final_fit %>%
      extract_fit_parsnip(),
    Jaccard_13 = split[[3]][[1]]$final_fit %>%
      extract_fit_parsnip(),
    Jaccard_26 = split[[4]][[1]]$final_fit %>%
      extract_fit_parsnip(),
    log_Ratio_5 = split[[1]][[2]]$final_fit %>%
      extract_fit_parsnip(),
    log_Ratio_6 = split[[2]][[2]]$final_fit %>%
      extract_fit_parsnip(),
    log_Ratio_13 = split[[3]][[2]]$final_fit %>%
      extract_fit_parsnip(),
    log_Ratio_26 = split[[4]][[2]]$final_fit %>%
      extract_fit_parsnip(),
    log_Ratio_y_5 = split[[1]][[3]]$final_fit %>%
      extract_fit_parsnip(),
    log_Ratio_y_6 = split[[2]][[3]]$final_fit %>%
      extract_fit_parsnip(),
    log_Ratio_y_13 = split[[3]][[3]]$final_fit %>%
      extract_fit_parsnip(),
    log_Ratio_y_26 = split[[4]][[3]]$final_fit %>%
      extract_fit_parsnip()
  )

# Extract all training data to a list (note: these are identical for different responses)

train_data <-
  list(

    # full data
    all = all$list_split_data[[1]]$train,

    # split by dataset
    split_5 = split[[1]][[1]]$final_fit$splits[[1]] %>%
      training(),
    split_6 = split[[2]][[1]]$final_fit$splits[[1]] %>%
      training(),
    split_13 = split[[3]][[1]]$final_fit$splits[[1]] %>%
      training(),
    split_26 = split[[4]][[1]]$final_fit$splits[[1]] %>%
      training()
  )

create empty list:

Code

pdp_list <- replicate(15, list)

Code

for (model_i in seq_along(all_models_list)) {
  if (model_i %in% c(1, 4, 5, 6, 7)) response <- "Jaccard_dissim" else if (model_i %in% c(2, 8, 9, 10, 11)) response <- "log_R2_1" else if (model_i %in% c(3, 12, 13, 14, 15)) response <- "log_R2_1_per_year"

  # select model

  this_model <- all_models_list[[model_i]]

  # dynamically remove datasetID as predictor depending on model

  if (model_i %in% c(4:15)) {
    predictors <- predictors[1:13]
  } else {
    predictors
  }

  # create empty list for each model_i
  pdp_list[[model_i]] <- list()

  # match the model to its train_data (it was previously saved in the model results list)
  if (model_i %in% c(1:3)) train_i <- 1 else if (model_i %in% c(4:7)) train_i <- 2 else if (model_i %in% c(8:11)) train_i <- 3 else if (model_i %in% c(12:15)) train_i <- 4

  if (model_i %in% c(1, 2, 3)) obj_ident <- "full" else if (model_i %in% c(4:7)) obj_ident <- "Jaccard_split" else if (model_i %in% c(8:11)) obj_ident <- "log_R2_1_split" else if (model_i %in% c(12:15)) obj_ident <- "log_R2_1_y_ split"

  # set current training data
  this_train <- train_data[[train_i]]

  # loop through all variables in the data (all predictors)
  for (var_i in seq_along(predictors)) {
    # select current variable to plot
    this_variable <- predictors[var_i]

    print(paste0("object = ", obj_ident, ", Response = ", response, ", Var = ", this_variable))

    pdp_list[[model_i]][[var_i]] <-
      pdp::partial(
        object = this_model,
        train = this_train,
        pred.var = this_variable,
        plot = FALSE
      )
  }
}

# save to temporary folder
saveRDS(pdp_list, here::here("Data/output/temp/B_04_partial_dependencies.rds"))

Merge to plotting df:

Code

pdp_df_list <- replicate(15, list)
pdp_df_list_cat <- replicate(15, list)

for (model_i in seq_along(pdp_list)) {
  this_model <- pdp_list[[model_i]]
  pdp_df_list[[model_i]] <- list()
  pdp_df_list_cat[[model_i]] <- list()

  if (model_i %in% c(1, 4, 5, 6, 7)) response <- "Jaccard_dissim" else if (model_i %in% c(2, 8, 9, 10, 11)) response <- "log_R2_1" else if (model_i %in% c(3, 12, 13, 14, 15)) response <- "log_R2_1_per_year"

  if (model_i %in% c(1, 2, 3)) obj_ident <- "full" else if (model_i %in% c(4:7)) obj_ident <- "Jaccard_split" else if (model_i %in% c(8:11)) obj_ident <- "log_R2_1_split" else if (model_i %in% c(12:15)) obj_ident <- "log_R2_1_y_split"

  if (model_i %in% c(1, 2, 3)) datasetID <- "all" else if (model_i %in% c(4, 8, 12)) datasetID <- "5" else if (model_i %in% c(5, 9, 13)) datasetID <- "6" else if (model_i %in% c(6, 10, 14)) datasetID <- "13" else if (model_i %in% c(7, 11, 15)) datasetID <- "26"

  for (var_i in seq_along(this_model)) {
    pdp_df_i <- this_model[[var_i]]
    pdp_df_i$variable <- names(pdp_df_i)[1]

    pdp_df_i$value <- pdp_df_i[[1]]
    pdp_df_i$response <- response
    pdp_df_i$datasetID <- datasetID
    pdp_df_i$model <- obj_ident
    pdp_df_i[[1]] <- NULL

    if (unique(pdp_df_i$variable) %in% cat_vars) {
      pdp_df_list_cat[[model_i]][[var_i]] <- pdp_df_i
    } else {
      pdp_df_list[[model_i]][[var_i]] <- pdp_df_i
    }
  }
}

pdp_num <- pdp_df_list %>% bind_rows()
pdp_cat <- pdp_df_list_cat %>% bind_rows()

pdp_res <- list(
  numeric = pdp_num,
  categorical = pdp_cat
)

Plots (ggplot2)

Code

for (num_cat_i in seq_along(pdp_res)) { # there are 2 sub-lists (num, cat)

  if (num_cat_i == 1) { # numeric

    # chose sublist from results
    pdp_subset <- pdp_res[[num_cat_i]]

    # extract results from models dynamically

    for (model_i in seq_along(unique(pdp_subset$model))) { # there are 4 models

      this_model <- unique(pdp_subset$model)[model_i]
      model_dd <- pdp_subset %>% filter(model == this_model)

      for (resp_i in seq_along(unique(model_dd$response))) {
        this_resp <- unique(model_dd$response)[resp_i]

        print(paste0("Num/Cat = ", num_cat_i, "; model = ", this_model, "; response = ", this_resp))

        response_dd <- model_dd %>%
          filter(response == this_resp)

        p0 <- response_dd %>%
          ggplot() +
          geom_line(
            aes(
              y = yhat,
              x = value,
              col = datasetID,
              group = interaction(variable, datasetID)
            ),
            linewidth = 0.8,
            alpha = 0.8
          ) +
          labs(
            x = NULL,
            y = bquote(italic(.(this_resp)) ~ "(Partial)")
          ) +
          facet_wrap(~variable, scales = "free_x", labeller = labeller(variable = lookup)) +
          ggthemes::theme_par() +
          ggtitle(paste(this_resp), paste0(this_model, "-model")) +
          scale_color_brewer(palette = "PuOr")

        # adjust axes and plot hline

        if (this_resp == "Jaccard_dissim") {
          p1 <-
            p0 +
            ylim(0, 1) +
            geom_hline(
              yintercept = 0.5,
              linewidth = 0.3,
              col = "darkgrey"
            )
        } else if (this_resp == "log_R2_1") {
          p1 <-
            p0 +
            # ylim(min(temp$log_R2_1), max(temp$log_R2_1)) +
            geom_hline(
              yintercept = 0,
              linewidth = 0.3,
              col = "darkgrey"
            )
        } else if (this_resp == "log_R2_1_per_year") {
          p1 <-
            p0 +
            # ylim(min(temp$log_R2_1_per_year), max(temp$log_R2_1_per_year)) +
            geom_hline(
              yintercept = 0,
              linewidth = 0.3,
              col = "darkgrey"
            )
        }

        print(p1)
      } # responses loop closing
    } # models loop closing
  } else { # Categorical variables

    # chose sublist from results

    pdp_subset <- pdp_res[[num_cat_i]]

    # extract results from models dynamically

    for (model_i in seq_along(unique(pdp_subset$model))) { # there are 4 models

      this_model <- unique(pdp_subset$model)[model_i]
      model_dd <- pdp_subset %>% filter(model == this_model)

      for (resp_i in seq_along(unique(model_dd$response))) {
        this_resp <- unique(model_dd$response)[resp_i]

        print(paste0("Num/Cat = ", num_cat_i, "; model = ", this_model, "; response = ", this_resp))

        response_dd <- model_dd %>%
          filter(response == this_resp)

        p0 <- response_dd %>%
          ggplot() +
          geom_point(
            aes(
              y = yhat,
              x = value,
              col = datasetID
            ),
            alpha = 0.8
          ) +
          ggthemes::theme_par() +
          labs(y = bquote(italic(.(this_resp)) ~ "(Partial)")) +
          facet_wrap(~variable, scales = "free_x", labeller = labeller(variable = lookup)) +
          scale_color_brewer(palette = "PuOr")

        # adjust axes and plot hline

        if (this_resp == "Jaccard_dissim") {
          p1 <-
            p0 +
            ylim(0, 1) +
            geom_hline(
              yintercept = 0.5,
              linewidth = 0.3,
              col = "darkgrey"
            )
        } else if (this_resp == "log_R2_1") {
          p1 <-
            p0 +
            # ylim(min(temp$log_R2_1), max(temp$log_R2_1)) +
            geom_hline(
              yintercept = 0,
              linewidth = 0.3,
              col = "darkgrey"
            )
        } else if (this_resp == "log_R2_1_per_year") {
          p1 <-
            p0 +
            # ylim(min(temp$log_R2_1_per_year), max(temp$log_R2_1_per_year)) +
            geom_hline(
              yintercept = 0,
              linewidth = 0.3,
              col = "darkgrey"
            )
        }

        print(p1)
      } # responses loop closing
    } # models loop closing
  } # categorical variables loop closing
} # closing the whole thing

[1] "Num/Cat = 1; model = full; response = Jaccard_dissim"

[1] "Num/Cat = 1; model = full; response = log_R2_1"

[1] "Num/Cat = 1; model = full; response = log_R2_1_per_year"

[1] "Num/Cat = 1; model = Jaccard_split; response = Jaccard_dissim"

[1] "Num/Cat = 1; model = log_R2_1_split; response = log_R2_1"

[1] "Num/Cat = 1; model = log_R2_1_y_split; response = log_R2_1_per_year"

[1] "Num/Cat = 2; model = full; response = Jaccard_dissim"

Warning: Removed 4 rows containing missing values or values outside the scale range
(`geom_point()`).

[1] "Num/Cat = 2; model = full; response = log_R2_1"

Warning: Removed 4 rows containing missing values or values outside the scale range
(`geom_point()`).

[1] "Num/Cat = 2; model = full; response = log_R2_1_per_year"

Warning: Removed 4 rows containing missing values or values outside the scale range
(`geom_point()`).

[1] "Num/Cat = 2; model = Jaccard_split; response = Jaccard_dissim"

[1] "Num/Cat = 2; model = log_R2_1_split; response = log_R2_1"

[1] "Num/Cat = 2; model = log_R2_1_y_split; response = log_R2_1_per_year"

For maintext:

2.1.1 Jaccard per model:

Numeric Jaccard plots

Code

plots_numeric <- list()
for (var_i in seq_along(unique(pdp_res[[1]]$variable))) {
  this_var <- unique(pdp_res[[1]]$variable)[var_i]
  this_resp <- "Jaccard_dissim"
  this_model <- "Jaccard_split"

  p0 <- pdp_res[[1]] %>%
    filter(model == "Jaccard_split" & variable == this_var) %>%
    ggplot() +
    geom_hline(yintercept = 0.5, linewidth = 0.3, col = "darkgrey") +
    geom_line(
      aes(
        x = value,
        y = yhat,
        group = datasetID,
        col = datasetID
      ),
      linewidth = 0.8
    ) +
    ggthemes::theme_few() +
    scale_color_manual(values = c(
      "all" = "black",
      "5" = "#E66101",
      "6" = "#FDB863",
      "13" = "#B2ABD2",
      "26" = "#5E3C99"
    )) +
    labs(
      x = lookup[this_var],
      y = bquote(italic(.(this_resp)) ~ "(Partial)"),
      col = "Dataset"
    ) +
    ggtitle(NULL, paste0(this_model, "-model")) +
    ylim(0, 1)

  p0
  plots_numeric[[var_i]] <- p0
}

plots_numeric

[[1]]


[[2]]


[[3]]


[[4]]


[[5]]


[[6]]


[[7]]


[[8]]


[[9]]

Code

ggsave(
  plot = plots_numeric[[1]],
  path = here::here("Figures/B_models/performance/J/"),
  filename = paste0("B_04_PartialPlot_Jaccard_split_", "mass", ".svg"),
  width = 3.5, height = 2.8
)
ggsave(
  plot = plots_numeric[[2]],
  path = here::here("Figures/B_models/performance/J/"),
  filename = paste0("B_04_PartialPlot_Jaccard_split_", "globalrangesize", ".svg"),
  width = 3.5, height = 2.8
)
ggsave(
  plot = plots_numeric[[3]],
  path = here::here("Figures/B_models/performance/J/"),
  filename = paste0("B_04_PartialPlot_Jaccard_split_", "phylodist", ".svg"),
  width = 3.5, height = 2.8
)
ggsave(
  plot = plots_numeric[[4]],
  path = here::here("Figures/B_models/performance/J/"),
  filename = paste0("B_04_PartialPlot_Jaccard_split_", "D", ".svg"),
  width = 3.5, height = 2.8
)
ggsave(
  plot = plots_numeric[[5]],
  path = here::here("Figures/B_models/performance/J/"),
  filename = paste0("B_04_PartialPlot_Jaccard_split_", "Lacunarity", ".svg"),
  width = 3.5, height = 2.8
)
ggsave(
  plot = plots_numeric[[6]],
  path = here::here("Figures/B_models/performance/J/"),
  filename = paste0("B_04_PartialPlot_Jaccard_split_", "AOO", ".svg"),
  width = 3.5, height = 2.8
)
ggsave(
  plot = plots_numeric[[7]],
  path = here::here("Figures/B_models/performance/J/"),
  filename = paste0("B_04_PartialPlot_Jaccard_split_", "SAC", ".svg"),
  width = 3.5, height = 2.8
)
ggsave(
  plot = plots_numeric[[8]],
  path = here::here("Figures/B_models/performance/J/"),
  filename = paste0("B_04_PartialPlot_Jaccard_split_", "Circ", ".svg"),
  width = 3.5, height = 2.8
)
ggsave(
  plot = plots_numeric[[9]],
  path = here::here("Figures/B_models/performance/J/"),
  filename = paste0("B_04_PartialPlot_Jaccard_split_", "DistBorder", ".svg"),
  width = 3.5, height = 2.8
)

Categorical Jaccard plots

Code

plots_categorical <- list()
for (var_i in seq_along(unique(pdp_res[[2]]$variable))) {
  this_var <- unique(pdp_res[[2]]$variable)[var_i]
  this_resp <- "Jaccard_dissim"
  this_model <- "Jaccard_split"

  # Categorical Jaccard plots

  p1 <- pdp_res[[2]] %>%
    filter(model == "Jaccard_split" & variable == this_var) %>%
    ggplot() +
    geom_hline(yintercept = 0.5, linewidth = 0.3, col = "darkgrey") +
    geom_point(
      aes(
        x = value,
        y = yhat,
        col = datasetID
      ),
      size = 3,
      alpha = 0.75
    ) +
    ggthemes::theme_few() +
    scale_color_brewer(palette = "PuOr") +
    labs(
      x = lookup[this_var],
      y = bquote(italic(.(this_resp)) ~ "(Partial)"),
      col = "Dataset"
    ) +
    ggtitle(NULL, paste0(this_model, "-model")) +
    ylim(0, 1)

  plots_categorical[[var_i]] <- p1
}

plots_categorical <- plots_categorical[c(1:4)]
plots_categorical

[[1]]


[[2]]


[[3]]


[[4]]

save to svg

Code

ggsave(
  plot = plots_categorical[[1]],
  path = here::here("Figures/B_models/performance/J/"),
  filename = paste0("B_04_PartialPlot_Jaccard_split_", "Migration", ".svg"),
  width = 3.5, height = 2.8
)
ggsave(
  plot = plots_categorical[[2]],
  path = here::here("Figures/B_models/performance/J/"),
  filename = paste0("B_04_PartialPlot_Jaccard_split_", "Habitat", ".svg"),
  width = 3.5, height = 2.8
)
ggsave(
  plot = plots_categorical[[3]],
  path = here::here("Figures/B_models/performance/J/"),
  filename = paste0("B_04_PartialPlot_Jaccard_split_", "Generalism", ".svg"),
  width = 3.5, height = 2.8
)
ggsave(
  plot = plots_categorical[[4]],
  path = here::here("Figures/B_models/performance/J/"),
  filename = paste0("B_04_PartialPlot_Jaccard_split_", "Threat", ".svg"),
  width = 3.5, height = 2.8
)

Code

patchwork::wrap_plots(plots_numeric, ncol = 3) +
  patchwork::plot_layout(tag_level = "keep", guides = "collect", axes = "collect_y") +
  patchwork::plot_annotation(tag_levels = "a", tag_suffix = ")")

Code

# export::graph2ppt(
#   file=here::here("Figures/B_models/performance/B_04_PartialSep_Jaccard.ppt"), width=8, height=10)

Code

patchwork::wrap_plots(plots_categorical, ncol = 3) +
  patchwork::plot_layout(tag_level = "keep", guides = "collect", axes = "collect_y") +
  patchwork::plot_annotation(tag_levels = "a", tag_suffix = ")")

Code

# export::graph2ppt(
#   file=here::here("Figures/B_models/performance/B_04_PartialSep_Categorical_Jaccard.ppt"), width=8, height=(10/3)*2)

2.1.2 Log Ratio per model:

Code

plots_numeric <- list()
for (var_i in seq_along(unique(pdp_res[[1]]$variable))) {
  this_var <- unique(pdp_res[[1]]$variable)[var_i]
  this_resp <- "log_R2_1"
  this_model <- "log_R2_1_split"


  # Numeric log ratio plots
  p0 <- pdp_res[[1]] %>%
    filter(model == "log_R2_1_split" & variable == this_var) %>%
    ggplot() +
    geom_hline(yintercept = 0, linewidth = 0.3, col = "darkgrey") +
    geom_line(
      aes(
        x = value,
        y = yhat,
        group = datasetID,
        col = datasetID
      ),
      linewidth = 0.8
    ) +
    ggthemes::theme_few() +
    scale_color_manual(values = c(
      "all" = "black",
      "5" = "#E66101",
      "6" = "#FDB863",
      "13" = "#B2ABD2",
      "26" = "#5E3C99"
    )) +
    labs(
      x = lookup[this_var],
      y = bquote(italic(.(this_resp)) ~ "(Partial)"),
      col = "Dataset"
    ) +
    ggtitle(NULL, paste0(this_model, "-model"))

  p0
  plots_numeric[[var_i]] <- p0
}

plots_numeric

[[1]]


[[2]]


[[3]]


[[4]]


[[5]]


[[6]]


[[7]]


[[8]]


[[9]]

Code

ggsave(
  plot = plots_numeric[[1]],
  path = here::here("Figures/B_models/performance/lnR/"),
  filename = paste0("B_04_PartialPlot_lnR_split_", "mass", ".svg"),
  width = 3.5, height = 2.8
)
ggsave(
  plot = plots_numeric[[2]],
  path = here::here("Figures/B_models/performance/lnR/"),
  filename = paste0("B_04_PartialPlot_lnR_split_", "globalrangesize", ".svg"),
  width = 3.5, height = 2.8
)
ggsave(
  plot = plots_numeric[[3]],
  path = here::here("Figures/B_models/performance/lnR/"),
  filename = paste0("B_04_PartialPlot_lnR_split_", "phylodist", ".svg"),
  width = 3.5, height = 2.8
)
ggsave(
  plot = plots_numeric[[4]],
  path = here::here("Figures/B_models/performance/lnR/"),
  filename = paste0("B_04_PartialPlot_lnR_split_", "D", ".svg"),
  width = 3.5, height = 2.8
)
ggsave(
  plot = plots_numeric[[5]],
  path = here::here("Figures/B_models/performance/lnR/"),
  filename = paste0("B_04_PartialPlot_lnR_split_", "Lacunarity", ".svg"),
  width = 3.5, height = 2.8
)
ggsave(
  plot = plots_numeric[[6]],
  path = here::here("Figures/B_models/performance/lnR/"),
  filename = paste0("B_04_PartialPlot_lnR_split_", "AOO", ".svg"),
  width = 3.5, height = 2.8
)
ggsave(
  plot = plots_numeric[[7]],
  path = here::here("Figures/B_models/performance/lnR/"),
  filename = paste0("B_04_PartialPlot_lnR_split_", "SAC", ".svg"),
  width = 3.5, height = 2.8
)
ggsave(
  plot = plots_numeric[[8]],
  path = here::here("Figures/B_models/performance/lnR/"),
  filename = paste0("B_04_PartialPlot_lnR_split_", "Circ", ".svg"),
  width = 3.5, height = 2.8
)
ggsave(
  plot = plots_numeric[[9]],
  path = here::here("Figures/B_models/performance/lnR/"),
  filename = paste0("B_04_PartialPlot_lnR_split_", "DistBorder", ".svg"),
  width = 3.5, height = 2.8
)

Code

plots_categorical <- list()
for (var_i in seq_along(unique(pdp_res[[2]]$variable))) {
  this_var <- unique(pdp_res[[2]]$variable)[var_i]
  this_resp <- "log_R2_1"
  this_model <- "log_R2_1_split"

  # Categorical log ratio plots
  p1 <- pdp_res[[2]] %>%
    filter(model == "log_R2_1_split" & variable == this_var) %>%
    ggplot() +
    geom_hline(yintercept = 0, linewidth = 0.3, col = "darkgrey") +
    geom_point(
      aes(
        x = value,
        y = yhat,
        col = datasetID
      ),
      size = 3,
      alpha = 0.75
    ) +
    ggthemes::theme_few() +
    scale_color_brewer(palette = "PuOr") +
    labs(
      x = lookup[this_var],
      y = bquote(italic(.(this_resp)) ~ "(Partial)"),
      col = "Dataset"
    ) +
    ggtitle(NULL, paste0(this_model, "-model"))
  plots_categorical[[var_i]] <- p1
}

plots_categorical <- plots_categorical[c(1:4)]
plots_categorical

[[1]]


[[2]]


[[3]]


[[4]]

Code

ggsave(
  plot = plots_categorical[[1]],
  path = here::here("Figures/B_models/performance/lnR/"),
  filename = paste0("B_04_PartialPlot_lnR_split_", "Migration", ".svg"),
  width = 3.5, height = 2.8
)
ggsave(
  plot = plots_categorical[[2]],
  path = here::here("Figures/B_models/performance/lnR/"),
  filename = paste0("B_04_PartialPlot_lnR_split_", "Habitat", ".svg"),
  width = 3.5, height = 2.8
)
ggsave(
  plot = plots_categorical[[3]],
  path = here::here("Figures/B_models/performance/lnR/"),
  filename = paste0("B_04_PartialPlot_lnR_split_", "Generalism", ".svg"),
  width = 3.5, height = 2.8
)
ggsave(
  plot = plots_categorical[[4]],
  path = here::here("Figures/B_models/performance/lnR/"),
  filename = paste0("B_04_PartialPlot_lnR_split_", "Threat", ".svg"),
  width = 3.5, height = 2.8
)

Code

patchwork::wrap_plots(plots_numeric, ncol = 3) +
  patchwork::plot_layout(tag_level = "keep", guides = "collect", axes = "collect_y") +
  patchwork::plot_annotation(tag_levels = "a", tag_suffix = ")")

Code

# export::graph2ppt(
#   file=here::here("Figures/B_models/performance/B_04_PartialSep_logR.ppt"), width=8, height=10)

Code

patchwork::wrap_plots(plots_categorical, ncol = 3) +
  patchwork::plot_layout(tag_level = "keep", guides = "collect", axes = "collect_y") +
  patchwork::plot_annotation(tag_levels = "a", tag_suffix = ")")

Code

# export::graph2ppt(
#   file=here::here("Figures/B_models/performance/B_04_PartialSep_Categorical_logR.ppt"), width=8, height=(10/3)*2)

2.1.3 Log ratio per year per model:

Code

plots_numeric <- list()
for (var_i in seq_along(unique(pdp_res[[1]]$variable))) {
  this_var <- unique(pdp_res[[1]]$variable)[var_i]
  this_resp <- "log_R2_1_per_year"
  this_model <- "log_R2_1_y_split"

  # Numeric yearly rate plots

  p0 <- pdp_res[[1]] %>%
    filter(model == "log_R2_1_y_split" & variable == this_var) %>%
    ggplot() +
    geom_hline(yintercept = 0, linewidth = 0.3, col = "darkgrey") +
    geom_line(
      aes(
        x = value,
        y = yhat,
        group = datasetID,
        col = datasetID
      ),
      linewidth = 0.8
    ) +
    ggthemes::theme_few() +
    scale_color_manual(values = c(
      "all" = "black",
      "5" = "#E66101",
      "6" = "#FDB863",
      "13" = "#B2ABD2",
      "26" = "#5E3C99"
    )) +
    labs(
      x = lookup[this_var],
      y = bquote(italic(.(this_resp)) ~ "(Partial)"),
      col = "Dataset"
    ) +
    ggtitle(NULL, paste0(this_model, "-model"))

  p0
  plots_numeric[[var_i]] <- p0
}

plots_numeric

[[1]]


[[2]]


[[3]]


[[4]]


[[5]]


[[6]]


[[7]]


[[8]]


[[9]]

Code

ggsave(
  plot = plots_numeric[[1]],
  path = here::here("Figures/B_models/performance/lnR_y/"),
  filename = paste0("B_04_PartialPlot_lnR_y_split_", "mass", ".svg"),
  width = 3.5, height = 2.8
)
ggsave(
  plot = plots_numeric[[2]],
  path = here::here("Figures/B_models/performance/lnR_y/"),
  filename = paste0("B_04_PartialPlot_lnR_y_split_", "globalrangesize", ".svg"),
  width = 3.5, height = 2.8
)
ggsave(
  plot = plots_numeric[[3]],
  path = here::here("Figures/B_models/performance/lnR_y/"),
  filename = paste0("B_04_PartialPlot_lnR_y_split_", "phylodist", ".svg"),
  width = 3.5, height = 2.8
)
ggsave(
  plot = plots_numeric[[4]],
  path = here::here("Figures/B_models/performance/lnR_y/"),
  filename = paste0("B_04_PartialPlot_lnR_y_split_", "D", ".svg"),
  width = 3.5, height = 2.8
)
ggsave(
  plot = plots_numeric[[5]],
  path = here::here("Figures/B_models/performance/lnR_y/"),
  filename = paste0("B_04_PartialPlot_lnR_y_split_", "Lacunarity", ".svg"),
  width = 3.5, height = 2.8
)
ggsave(
  plot = plots_numeric[[6]],
  path = here::here("Figures/B_models/performance/lnR_y/"),
  filename = paste0("B_04_PartialPlot_lnR_y_split_", "AOO", ".svg"),
  width = 3.5, height = 2.8
)
ggsave(
  plot = plots_numeric[[7]],
  path = here::here("Figures/B_models/performance/lnR_y/"),
  filename = paste0("B_04_PartialPlot_lnR_y_split_", "SAC", ".svg"),
  width = 3.5, height = 2.8
)
ggsave(
  plot = plots_numeric[[8]],
  path = here::here("Figures/B_models/performance/lnR_y/"),
  filename = paste0("B_04_PartialPlot_lnR_y_split_", "Circ", ".svg"),
  width = 3.5, height = 2.8
)
ggsave(
  plot = plots_numeric[[9]],
  path = here::here("Figures/B_models/performance/lnR_y/"),
  filename = paste0("B_04_PartialPlot_lnR_y_split_", "DistBorder", ".svg"),
  width = 3.5, height = 2.8
)

Code

plots_categorical <- list()
for (var_i in seq_along(unique(pdp_res[[2]]$variable))) {
  this_var <- unique(pdp_res[[2]]$variable)[var_i]
  this_resp <- "log_R2_1_per_year"
  this_model <- "log_R2_1_y_split"

  # Categorical yearly rate plots
  p1 <- pdp_res[[2]] %>%
    filter(model == "log_R2_1_y_split" & variable == this_var) %>%
    ggplot() +
    geom_hline(yintercept = 0, linewidth = 0.3, col = "darkgrey") +
    geom_point(
      aes(
        x = value,
        y = yhat,
        col = datasetID
      ),
      size = 3,
      alpha = 0.75
    ) +
    ggthemes::theme_few() +
    scale_color_brewer(palette = "PuOr") +
    labs(
      x = lookup[this_var],
      y = bquote(italic(.(this_resp)) ~ "(Partial)"),
      col = "Dataset"
    ) +
    ggtitle(NULL, paste0(this_model, "-model"))

  plots_categorical[[var_i]] <- p1
}

plots_categorical <- plots_categorical[c(1:4)]
plots_categorical

[[1]]


[[2]]


[[3]]


[[4]]

Code

ggsave(
  plot = plots_categorical[[1]],
  path = here::here("Figures/B_models/performance/lnR_y/"),
  filename = paste0("B_04_PartialPlot_lnR_y_split_", "Migration", ".svg"),
  width = 3.5, height = 2.8
)
ggsave(
  plot = plots_categorical[[2]],
  path = here::here("Figures/B_models/performance/lnR_y/"),
  filename = paste0("B_04_PartialPlot_lnR_y_split_", "Habitat", ".svg"),
  width = 3.5, height = 2.8
)
ggsave(
  plot = plots_categorical[[3]],
  path = here::here("Figures/B_models/performance/lnR_y/"),
  filename = paste0("B_04_PartialPlot_lnR_y_split_", "Generalism", ".svg"),
  width = 3.5, height = 2.8
)
ggsave(
  plot = plots_categorical[[4]],
  path = here::here("Figures/B_models/performance/lnR_y/"),
  filename = paste0("B_04_PartialPlot_lnR_y_split_", "Threat", ".svg"),
  width = 3.5, height = 2.8
)

Code

patchwork::wrap_plots(plots_numeric, ncol = 3) +
  patchwork::plot_layout(tag_level = "keep", guides = "collect", axes = "collect_y") +
  patchwork::plot_annotation(tag_levels = "a", tag_suffix = ")")

Code

# export::graph2ppt(
#   file=here::here("Figures/B_models/performance/B_04_PartialSep_LnR_y.ppt"), width=8, height=10)

Code

patchwork::wrap_plots(plots_categorical, ncol = 3) +
  patchwork::plot_layout(tag_level = "keep", guides = "collect", axes = "collect_y") +
  patchwork::plot_annotation(tag_levels = "a", tag_suffix = ")")

Code

# export::graph2ppt(
#   file=here::here("Figures/B_models/performance/B_04_PartialSep_Categorical_LnR_y.ppt"), width=8, height=(10/3)*2)

2.2 Interactions & their partial dependencies

Moved also to Script D_04_Figure_3

Code

data <-
  readRDS(
    here::here(
      "Data/output/B_models/",
      "B_01_list_all_results_rf.rds"
    )
  )

fit_1 <-
  data$res[[1]] %>%
  extract_fit_parsnip()

fit_2 <-
  data$res[[2]] %>%
  extract_fit_parsnip()

fit_3 <-
  data$res[[3]] %>%
  extract_fit_parsnip()

Jaccard ~ lac partial

Code

partial_dep(
  object = fit_1,
  v = "mean_lnLac",
  X = data$list_split_data[[1]]$train,
  BY = "joincount_delta"
) %>%
  plot(show_points = FALSE) +
  ylim(0, 1)

Warning: Removed 49 rows containing missing values or values outside the scale range
(`geom_line()`).

Jaccard: lac ~ jc Heatmap

Code

partial_dep(
  object = fit_1,
  v = c("mean_lnLac", "joincount_delta"),
  X = data$list_split_data[[1]]$train,
  # BY = "joincount_delta",
  grid_size = 100
) %>%
  plot()

Jaccard: D ~ AOO Heatmap

Code

partial_dep(
  object = fit_1,
  v = c("D_AOO_a", "AOO"),
  X = data$list_split_data[[1]]$train,
  # BY = "AOO",
  grid_size = 100
) %>%
  plot()

Log Ratio: Body mass ~ AOO Heatmap

Code

partial_dep(
  object = fit_2,
  v = c("Mass", "AOO"),
  X = data$list_split_data[[1]]$train,
  grid_size = 100
) %>%
  plot()

Yearly rate: Mass ~ Habitat

Code

partial_dep(
  object = fit_3,
  v = c("Mass", "Habitat_5"),
  BY = "datasetID",
  X = data$list_split_data[[1]]$train,
  grid_size = 100
) %>%
  plot()

Yearly rate:

Code

partial_dep(
  object = fit_3,
  v = c("AOO", "D_AOO_a"),
  BY = "datasetID",
  X = data$list_split_data[[1]]$train,
  grid_size = 100
) %>%
  plot()

--- title: "B_04_Model_performance_figures.R" format: html: self-contained: true embed-resources: true toc: true # optional: adds a table of contents theme: cosmo # optional: Bootstrap theme code-fold: show # optional: collapsible code blocks code-tools: true # optional: adds copy/paste buttons toc-depth: 4 editor: markdown: wrap: 72 --- # B - 04: Model performance figures ```{r} #| echo: false #| message: false #| warning: false #| results: "hide" #| output: false rm(list = ls()) gc() ``` ## Libraries ```{r} #| message: false #| warning: false #| results: "hide" #| output: false suppressPackageStartupMessages({ library(here) source(here::here("Code/00_Configuration.R")) package_list <- c( package_list, "ranger", "tidymodels", "caret", "skimr", "DALEXtra", "DALEX", "ggthemes", "hstats", "tidytext" ) lapply(package_list, require, character = TRUE) tidymodels_prefer() }) rm(list = ls()) ``` ## Set variable vectors for hypotheses ```{r} sp_id <- c( "verbatimIdentification", "scientificName" ) H1 <- c( "Mass", "GlobRangeSize_km2", "Migration", "Habitat_5", "Generalism", "Threatened", "pd" ) H2 <- c( "D_AOO_a", "mean_lnLac", "AOO", "joincount_delta", "circNorm", "minDist_toBorder_centr" ) H3 <- c("datasetID") predictors <- c(H1, H2, H3) responses <- c( "Jaccard_dissim", "log_R2_1", "log_R2_1_per_year" # yearly rate ) ``` # Part 1: Mp, Vip, Hstats Read result .csvs back in (csv2 with ; as separator) ```{r} # pooled model results mp_full <- read.csv2( here::here( "Data/output/results/", "B_01_ranger_all_data.csv" ) ) vip_full <- read.csv2( here::here( "Data/output/results/", "B_01_ranger_vimp_all_data.csv" ) ) int_full <- read.csv2( here::here( "Data/output/results/", "B_01_Hstats_all_data.csv" ) ) # split by dataset mp_split <- read.csv2( here::here( "Data/output/results/", "B_02_ranger_atlas_separate.csv" ) ) vip_split <- read.csv2( here::here( "Data/output/results/", "B_02_ranger_vimp_split_data.csv" ) ) int_split <- read.csv2( here::here( "Data/output/results/", "B_02_Hstats_split_data.csv" ) ) ``` ## 1.1 Pooled Model ### 1.1.1 Model Performance ```{r} #| fig-width: 6 #| fig-height: 3 p_1 <- mp_full %>% # reformat to long: pivot_longer( cols = c(rmse, rsq) ) %>% # Plot ggplot() + geom_point( aes(x = response, y = value, shape = name), cex = 2, col = "black", bg = "black" ) + ggthemes::theme_few() + scale_shape_manual( values = c(rmse = 21, rsq = 1) ) p_1 ``` ### 1.1.2 Variable Importance ```{r} vip_full <- vip_full %>% group_by(variable) %>% mutate( mean_importance_jaccard = mean(importance[response == "Jaccard_dissim"], na.rm = TRUE ) ) %>% ungroup() %>% mutate( variable = fct_reorder( variable, mean_importance_jaccard ) ) %>% mutate( hypo = case_when( variable %in% H1 ~ "H1", variable %in% H2 ~ "H2", variable %in% H3 ~ "H3" ) ) p_3 <- vip_full %>% ggplot() + geom_col( aes( x = importance, y = variable, fill = hypo ) ) + facet_wrap(~response, scales = "free_x" ) + labs( y = "Variable", x = "Importance" ) + ggthemes::theme_few() p_3 ``` ### 1.1.3 Variable Interactions ```{r} #| fig-width: 30 #| fig-height: 20 p_5 <- int_full %>% filter( !c(test == "total_interaction_strength") ) %>% ggplot() + geom_col( aes( x = H2, y = reorder_within(variable, H2, response) ) ) + facet_wrap(~ test + response, scales = "free" ) + scale_y_reordered() + ggthemes::theme_few() p_5 # saved as 1200x800 ``` ## 1.2 Split Model ### 1.2.1 Model Performance ```{r} #| fig-width: 6 #| fig-height: 3 p_2 <- mp_split %>% # reformat to long: pivot_longer( cols = c(rmse, rsq) ) %>% # Plot ggplot() + geom_point( aes( x = factor(datasetID), y = value, shape = name ), cex = 2, col = "black", bg = "black" ) + facet_wrap(~response) + ggthemes::theme_few() + scale_shape_manual( values = c(rmse = 21, rsq = 1) ) + xlab("DatasetID") p_2 ``` ### 1.2.2 Variable Importance ```{r} #| fig-width: 10 #| fig-height: 10 vip_split <- vip_split %>% group_by(variable, datasetID, response) %>% mutate( mean_importance_jaccard = mean(importance[response == "Jaccard_dissim"], na.rm = TRUE ) ) %>% ungroup() %>% mutate( variable = fct_reorder(variable, mean_importance_jaccard) ) %>% mutate( hypo = case_when( variable %in% H1 ~ "H1", variable %in% H2 ~ "H2", variable %in% H3 ~ "H3" ) ) p_4 <- vip_split %>% ggplot() + geom_col( aes( x = importance, y = variable, fill = hypo ) ) + facet_wrap(~ response + datasetID, scales = "free_x" ) + labs( y = "Variable", x = "Importance" ) + ggthemes::theme_few() p_4 # saved as 1200x800 ``` ### 1.2.3 Variable Interactions ```{r} #| fig-width: 30 #| fig-height: 30 p_6 <- int_split %>% filter( !c(test == "total_interaction_strength") ) %>% ggplot() + geom_col( aes( x = H2, y = reorder_within(variable, H2, response), fill = factor(datasetID) ) ) + facet_wrap(~ test + response, scales = "free" ) + scale_y_reordered() + ggthemes::theme_few() p_6 # saved as 1500x1500 ``` save as ppt for postediting ```{r} #| eval: false esquisse::ggplot_to_ppt() ``` # Part 2: Partial dependence plots Start fresh: ```{r} #| echo: false #| message: false #| warning: false #| results: "hide" #| output: false rm(list = ls()) gc() ``` Libraries ```{r} #| message: false #| warning: false #| results: "hide" #| output: false suppressPackageStartupMessages({ library(here) source(here::here("Code/00_Configuration.R")) package_list <- c( package_list, "ranger", "tidymodels", "caret", "skimr", "DALEXtra", "DALEX", "ggthemes", "hstats", "tidytext", "pdp" ) lapply(package_list, require, character = TRUE) tidymodels_prefer() }) rm(list = ls()) ``` Set variable vectors for hypotheses ```{r} sp_id <- c( "verbatimIdentification", "scientificName" ) H1 <- c( "Mass", "GlobRangeSize_km2", "Migration", "Habitat_5", "Generalism", "Threatened", "pd" ) H2 <- c( "D_AOO_a", "mean_lnLac", "AOO", "joincount_delta", "circNorm", "minDist_toBorder_centr" ) H3 <- c("datasetID") predictors <- c(H1, H2, H3) responses <- c( "Jaccard_dissim", "log_R2_1", "log_R2_1_per_year" # yearly rate ) ``` Create lookup labels for clean plotting: ```{r} lookup <- c( "Mass" = "Body mass", "GlobRangeSize_km2" = "Global range size", "Migration" = "Migration", "Habitat_5" = "Habitat type", "Generalism" = "Generalism", "Threatened" = "Threatened", "pd" = "Phylogenetic distinctiveness", "D_AOO_a" = "Fractal dimension", "mean_lnLac" = "Mean log lacunarity", "AOO" = "Area of occupancy", "joincount_delta" = "Spatial autocorrelation", "circNorm" = "Normalized circularity", "minDist_toBorder_centr" = "Smallest distance to the border" ) ``` Categorical plot: ```{r} cat_vars <- c( "Migration", "Habitat_5", "Generalism", "Threatened", "datasetID" ) ``` ## 2.1 Model results: Read result objects from .rds ```{r} all <- readRDS( here::here( "Data/output/B_models/", "B_01_list_all_results_rf.rds" ) ) split <- readRDS( here::here( "Data/output/B_models/", "B_02_rf_res_atlas_split.rds" ) ) ``` Extract all fit objects into a named list: /- Naming convention: response_atlas ```{r} all_models_list <- list( # full models: Jaccard = all$res[[1]] %>% extract_fit_parsnip(), log_Ratio = all$res[[2]] %>% extract_fit_parsnip(), log_Ratio_y = all$res[[3]] %>% extract_fit_parsnip(), # split by datasetID: Jaccard_5 = split[[1]][[1]]$final_fit %>% extract_fit_parsnip(), Jaccard_6 = split[[2]][[1]]$final_fit %>% extract_fit_parsnip(), Jaccard_13 = split[[3]][[1]]$final_fit %>% extract_fit_parsnip(), Jaccard_26 = split[[4]][[1]]$final_fit %>% extract_fit_parsnip(), log_Ratio_5 = split[[1]][[2]]$final_fit %>% extract_fit_parsnip(), log_Ratio_6 = split[[2]][[2]]$final_fit %>% extract_fit_parsnip(), log_Ratio_13 = split[[3]][[2]]$final_fit %>% extract_fit_parsnip(), log_Ratio_26 = split[[4]][[2]]$final_fit %>% extract_fit_parsnip(), log_Ratio_y_5 = split[[1]][[3]]$final_fit %>% extract_fit_parsnip(), log_Ratio_y_6 = split[[2]][[3]]$final_fit %>% extract_fit_parsnip(), log_Ratio_y_13 = split[[3]][[3]]$final_fit %>% extract_fit_parsnip(), log_Ratio_y_26 = split[[4]][[3]]$final_fit %>% extract_fit_parsnip() ) # Extract all training data to a list (note: these are identical for different responses) train_data <- list( # full data all = all$list_split_data[[1]]$train, # split by dataset split_5 = split[[1]][[1]]$final_fit$splits[[1]] %>% training(), split_6 = split[[2]][[1]]$final_fit$splits[[1]] %>% training(), split_13 = split[[3]][[1]]$final_fit$splits[[1]] %>% training(), split_26 = split[[4]][[1]]$final_fit$splits[[1]] %>% training() ) ``` create empty list: ```{r} #| eval: false pdp_list <- replicate(15, list) ``` ```{r} #| eval: false #| include: true for (model_i in seq_along(all_models_list)) { if (model_i %in% c(1, 4, 5, 6, 7)) response <- "Jaccard_dissim" else if (model_i %in% c(2, 8, 9, 10, 11)) response <- "log_R2_1" else if (model_i %in% c(3, 12, 13, 14, 15)) response <- "log_R2_1_per_year" # select model this_model <- all_models_list[[model_i]] # dynamically remove datasetID as predictor depending on model if (model_i %in% c(4:15)) { predictors <- predictors[1:13] } else { predictors } # create empty list for each model_i pdp_list[[model_i]] <- list() # match the model to its train_data (it was previously saved in the model results list) if (model_i %in% c(1:3)) train_i <- 1 else if (model_i %in% c(4:7)) train_i <- 2 else if (model_i %in% c(8:11)) train_i <- 3 else if (model_i %in% c(12:15)) train_i <- 4 if (model_i %in% c(1, 2, 3)) obj_ident <- "full" else if (model_i %in% c(4:7)) obj_ident <- "Jaccard_split" else if (model_i %in% c(8:11)) obj_ident <- "log_R2_1_split" else if (model_i %in% c(12:15)) obj_ident <- "log_R2_1_y_ split" # set current training data this_train <- train_data[[train_i]] # loop through all variables in the data (all predictors) for (var_i in seq_along(predictors)) { # select current variable to plot this_variable <- predictors[var_i] print(paste0("object = ", obj_ident, ", Response = ", response, ", Var = ", this_variable)) pdp_list[[model_i]][[var_i]] <- pdp::partial( object = this_model, train = this_train, pred.var = this_variable, plot = FALSE ) } } # save to temporary folder saveRDS(pdp_list, here::here("Data/output/temp/B_04_partial_dependencies.rds")) ``` ```{r} #| echo: false #| eval: true pdp_list <- readRDS(here::here("Data/output/temp/B_04_partial_dependencies.rds")) ``` Merge to plotting df: ```{r} pdp_df_list <- replicate(15, list) pdp_df_list_cat <- replicate(15, list) for (model_i in seq_along(pdp_list)) { this_model <- pdp_list[[model_i]] pdp_df_list[[model_i]] <- list() pdp_df_list_cat[[model_i]] <- list() if (model_i %in% c(1, 4, 5, 6, 7)) response <- "Jaccard_dissim" else if (model_i %in% c(2, 8, 9, 10, 11)) response <- "log_R2_1" else if (model_i %in% c(3, 12, 13, 14, 15)) response <- "log_R2_1_per_year" if (model_i %in% c(1, 2, 3)) obj_ident <- "full" else if (model_i %in% c(4:7)) obj_ident <- "Jaccard_split" else if (model_i %in% c(8:11)) obj_ident <- "log_R2_1_split" else if (model_i %in% c(12:15)) obj_ident <- "log_R2_1_y_split" if (model_i %in% c(1, 2, 3)) datasetID <- "all" else if (model_i %in% c(4, 8, 12)) datasetID <- "5" else if (model_i %in% c(5, 9, 13)) datasetID <- "6" else if (model_i %in% c(6, 10, 14)) datasetID <- "13" else if (model_i %in% c(7, 11, 15)) datasetID <- "26" for (var_i in seq_along(this_model)) { pdp_df_i <- this_model[[var_i]] pdp_df_i$variable <- names(pdp_df_i)[1] pdp_df_i$value <- pdp_df_i[[1]] pdp_df_i$response <- response pdp_df_i$datasetID <- datasetID pdp_df_i$model <- obj_ident pdp_df_i[[1]] <- NULL if (unique(pdp_df_i$variable) %in% cat_vars) { pdp_df_list_cat[[model_i]][[var_i]] <- pdp_df_i } else { pdp_df_list[[model_i]][[var_i]] <- pdp_df_i } } } pdp_num <- pdp_df_list %>% bind_rows() pdp_cat <- pdp_df_list_cat %>% bind_rows() pdp_res <- list( numeric = pdp_num, categorical = pdp_cat ) ``` Plots (ggplot2) ```{r} #| fig-width: 12 #| fig-heigh: 12 for (num_cat_i in seq_along(pdp_res)) { # there are 2 sub-lists (num, cat) if (num_cat_i == 1) { # numeric # chose sublist from results pdp_subset <- pdp_res[[num_cat_i]] # extract results from models dynamically for (model_i in seq_along(unique(pdp_subset$model))) { # there are 4 models this_model <- unique(pdp_subset$model)[model_i] model_dd <- pdp_subset %>% filter(model == this_model) for (resp_i in seq_along(unique(model_dd$response))) { this_resp <- unique(model_dd$response)[resp_i] print(paste0("Num/Cat = ", num_cat_i, "; model = ", this_model, "; response = ", this_resp)) response_dd <- model_dd %>% filter(response == this_resp) p0 <- response_dd %>% ggplot() + geom_line( aes( y = yhat, x = value, col = datasetID, group = interaction(variable, datasetID) ), linewidth = 0.8, alpha = 0.8 ) + labs( x = NULL, y = bquote(italic(.(this_resp)) ~ "(Partial)") ) + facet_wrap(~variable, scales = "free_x", labeller = labeller(variable = lookup)) + ggthemes::theme_par() + ggtitle(paste(this_resp), paste0(this_model, "-model")) + scale_color_brewer(palette = "PuOr") # adjust axes and plot hline if (this_resp == "Jaccard_dissim") { p1 <- p0 + ylim(0, 1) + geom_hline( yintercept = 0.5, linewidth = 0.3, col = "darkgrey" ) } else if (this_resp == "log_R2_1") { p1 <- p0 + # ylim(min(temp$log_R2_1), max(temp$log_R2_1)) + geom_hline( yintercept = 0, linewidth = 0.3, col = "darkgrey" ) } else if (this_resp == "log_R2_1_per_year") { p1 <- p0 + # ylim(min(temp$log_R2_1_per_year), max(temp$log_R2_1_per_year)) + geom_hline( yintercept = 0, linewidth = 0.3, col = "darkgrey" ) } print(p1) } # responses loop closing } # models loop closing } else { # Categorical variables # chose sublist from results pdp_subset <- pdp_res[[num_cat_i]] # extract results from models dynamically for (model_i in seq_along(unique(pdp_subset$model))) { # there are 4 models this_model <- unique(pdp_subset$model)[model_i] model_dd <- pdp_subset %>% filter(model == this_model) for (resp_i in seq_along(unique(model_dd$response))) { this_resp <- unique(model_dd$response)[resp_i] print(paste0("Num/Cat = ", num_cat_i, "; model = ", this_model, "; response = ", this_resp)) response_dd <- model_dd %>% filter(response == this_resp) p0 <- response_dd %>% ggplot() + geom_point( aes( y = yhat, x = value, col = datasetID ), alpha = 0.8 ) + ggthemes::theme_par() + labs(y = bquote(italic(.(this_resp)) ~ "(Partial)")) + facet_wrap(~variable, scales = "free_x", labeller = labeller(variable = lookup)) + scale_color_brewer(palette = "PuOr") # adjust axes and plot hline if (this_resp == "Jaccard_dissim") { p1 <- p0 + ylim(0, 1) + geom_hline( yintercept = 0.5, linewidth = 0.3, col = "darkgrey" ) } else if (this_resp == "log_R2_1") { p1 <- p0 + # ylim(min(temp$log_R2_1), max(temp$log_R2_1)) + geom_hline( yintercept = 0, linewidth = 0.3, col = "darkgrey" ) } else if (this_resp == "log_R2_1_per_year") { p1 <- p0 + # ylim(min(temp$log_R2_1_per_year), max(temp$log_R2_1_per_year)) + geom_hline( yintercept = 0, linewidth = 0.3, col = "darkgrey" ) } print(p1) } # responses loop closing } # models loop closing } # categorical variables loop closing } # closing the whole thing ``` ## For maintext: ### 2.1.1 Jaccard per model: Numeric Jaccard plots ```{r} plots_numeric <- list() for (var_i in seq_along(unique(pdp_res[[1]]$variable))) { this_var <- unique(pdp_res[[1]]$variable)[var_i] this_resp <- "Jaccard_dissim" this_model <- "Jaccard_split" p0 <- pdp_res[[1]] %>% filter(model == "Jaccard_split" & variable == this_var) %>% ggplot() + geom_hline(yintercept = 0.5, linewidth = 0.3, col = "darkgrey") + geom_line( aes( x = value, y = yhat, group = datasetID, col = datasetID ), linewidth = 0.8 ) + ggthemes::theme_few() + scale_color_manual(values = c( "all" = "black", "5" = "#E66101", "6" = "#FDB863", "13" = "#B2ABD2", "26" = "#5E3C99" )) + labs( x = lookup[this_var], y = bquote(italic(.(this_resp)) ~ "(Partial)"), col = "Dataset" ) + ggtitle(NULL, paste0(this_model, "-model")) + ylim(0, 1) p0 plots_numeric[[var_i]] <- p0 } plots_numeric ``` ```{r} #| eval: false ggsave( plot = plots_numeric[[1]], path = here::here("Figures/B_models/performance/J/"), filename = paste0("B_04_PartialPlot_Jaccard_split_", "mass", ".svg"), width = 3.5, height = 2.8 ) ggsave( plot = plots_numeric[[2]], path = here::here("Figures/B_models/performance/J/"), filename = paste0("B_04_PartialPlot_Jaccard_split_", "globalrangesize", ".svg"), width = 3.5, height = 2.8 ) ggsave( plot = plots_numeric[[3]], path = here::here("Figures/B_models/performance/J/"), filename = paste0("B_04_PartialPlot_Jaccard_split_", "phylodist", ".svg"), width = 3.5, height = 2.8 ) ggsave( plot = plots_numeric[[4]], path = here::here("Figures/B_models/performance/J/"), filename = paste0("B_04_PartialPlot_Jaccard_split_", "D", ".svg"), width = 3.5, height = 2.8 ) ggsave( plot = plots_numeric[[5]], path = here::here("Figures/B_models/performance/J/"), filename = paste0("B_04_PartialPlot_Jaccard_split_", "Lacunarity", ".svg"), width = 3.5, height = 2.8 ) ggsave( plot = plots_numeric[[6]], path = here::here("Figures/B_models/performance/J/"), filename = paste0("B_04_PartialPlot_Jaccard_split_", "AOO", ".svg"), width = 3.5, height = 2.8 ) ggsave( plot = plots_numeric[[7]], path = here::here("Figures/B_models/performance/J/"), filename = paste0("B_04_PartialPlot_Jaccard_split_", "SAC", ".svg"), width = 3.5, height = 2.8 ) ggsave( plot = plots_numeric[[8]], path = here::here("Figures/B_models/performance/J/"), filename = paste0("B_04_PartialPlot_Jaccard_split_", "Circ", ".svg"), width = 3.5, height = 2.8 ) ggsave( plot = plots_numeric[[9]], path = here::here("Figures/B_models/performance/J/"), filename = paste0("B_04_PartialPlot_Jaccard_split_", "DistBorder", ".svg"), width = 3.5, height = 2.8 ) ``` Categorical Jaccard plots ```{r} plots_categorical <- list() for (var_i in seq_along(unique(pdp_res[[2]]$variable))) { this_var <- unique(pdp_res[[2]]$variable)[var_i] this_resp <- "Jaccard_dissim" this_model <- "Jaccard_split" # Categorical Jaccard plots p1 <- pdp_res[[2]] %>% filter(model == "Jaccard_split" & variable == this_var) %>% ggplot() + geom_hline(yintercept = 0.5, linewidth = 0.3, col = "darkgrey") + geom_point( aes( x = value, y = yhat, col = datasetID ), size = 3, alpha = 0.75 ) + ggthemes::theme_few() + scale_color_brewer(palette = "PuOr") + labs( x = lookup[this_var], y = bquote(italic(.(this_resp)) ~ "(Partial)"), col = "Dataset" ) + ggtitle(NULL, paste0(this_model, "-model")) + ylim(0, 1) plots_categorical[[var_i]] <- p1 } plots_categorical <- plots_categorical[c(1:4)] plots_categorical ``` save to svg ```{r} #| eval: false ggsave( plot = plots_categorical[[1]], path = here::here("Figures/B_models/performance/J/"), filename = paste0("B_04_PartialPlot_Jaccard_split_", "Migration", ".svg"), width = 3.5, height = 2.8 ) ggsave( plot = plots_categorical[[2]], path = here::here("Figures/B_models/performance/J/"), filename = paste0("B_04_PartialPlot_Jaccard_split_", "Habitat", ".svg"), width = 3.5, height = 2.8 ) ggsave( plot = plots_categorical[[3]], path = here::here("Figures/B_models/performance/J/"), filename = paste0("B_04_PartialPlot_Jaccard_split_", "Generalism", ".svg"), width = 3.5, height = 2.8 ) ggsave( plot = plots_categorical[[4]], path = here::here("Figures/B_models/performance/J/"), filename = paste0("B_04_PartialPlot_Jaccard_split_", "Threat", ".svg"), width = 3.5, height = 2.8 ) ``` ```{r} #| fig-width: 12 #| fig-height: 12 patchwork::wrap_plots(plots_numeric, ncol = 3) + patchwork::plot_layout(tag_level = "keep", guides = "collect", axes = "collect_y") + patchwork::plot_annotation(tag_levels = "a", tag_suffix = ")") # export::graph2ppt( # file=here::here("Figures/B_models/performance/B_04_PartialSep_Jaccard.ppt"), width=8, height=10) ``` ```{r} #| fig-width: 12 #| fig-height: 8 patchwork::wrap_plots(plots_categorical, ncol = 3) + patchwork::plot_layout(tag_level = "keep", guides = "collect", axes = "collect_y") + patchwork::plot_annotation(tag_levels = "a", tag_suffix = ")") # export::graph2ppt( # file=here::here("Figures/B_models/performance/B_04_PartialSep_Categorical_Jaccard.ppt"), width=8, height=(10/3)*2) ``` ### 2.1.2 Log Ratio per model: ```{r} plots_numeric <- list() for (var_i in seq_along(unique(pdp_res[[1]]$variable))) { this_var <- unique(pdp_res[[1]]$variable)[var_i] this_resp <- "log_R2_1" this_model <- "log_R2_1_split" # Numeric log ratio plots p0 <- pdp_res[[1]] %>% filter(model == "log_R2_1_split" & variable == this_var) %>% ggplot() + geom_hline(yintercept = 0, linewidth = 0.3, col = "darkgrey") + geom_line( aes( x = value, y = yhat, group = datasetID, col = datasetID ), linewidth = 0.8 ) + ggthemes::theme_few() + scale_color_manual(values = c( "all" = "black", "5" = "#E66101", "6" = "#FDB863", "13" = "#B2ABD2", "26" = "#5E3C99" )) + labs( x = lookup[this_var], y = bquote(italic(.(this_resp)) ~ "(Partial)"), col = "Dataset" ) + ggtitle(NULL, paste0(this_model, "-model")) p0 plots_numeric[[var_i]] <- p0 } plots_numeric ``` ```{r} #| eval: false ggsave( plot = plots_numeric[[1]], path = here::here("Figures/B_models/performance/lnR/"), filename = paste0("B_04_PartialPlot_lnR_split_", "mass", ".svg"), width = 3.5, height = 2.8 ) ggsave( plot = plots_numeric[[2]], path = here::here("Figures/B_models/performance/lnR/"), filename = paste0("B_04_PartialPlot_lnR_split_", "globalrangesize", ".svg"), width = 3.5, height = 2.8 ) ggsave( plot = plots_numeric[[3]], path = here::here("Figures/B_models/performance/lnR/"), filename = paste0("B_04_PartialPlot_lnR_split_", "phylodist", ".svg"), width = 3.5, height = 2.8 ) ggsave( plot = plots_numeric[[4]], path = here::here("Figures/B_models/performance/lnR/"), filename = paste0("B_04_PartialPlot_lnR_split_", "D", ".svg"), width = 3.5, height = 2.8 ) ggsave( plot = plots_numeric[[5]], path = here::here("Figures/B_models/performance/lnR/"), filename = paste0("B_04_PartialPlot_lnR_split_", "Lacunarity", ".svg"), width = 3.5, height = 2.8 ) ggsave( plot = plots_numeric[[6]], path = here::here("Figures/B_models/performance/lnR/"), filename = paste0("B_04_PartialPlot_lnR_split_", "AOO", ".svg"), width = 3.5, height = 2.8 ) ggsave( plot = plots_numeric[[7]], path = here::here("Figures/B_models/performance/lnR/"), filename = paste0("B_04_PartialPlot_lnR_split_", "SAC", ".svg"), width = 3.5, height = 2.8 ) ggsave( plot = plots_numeric[[8]], path = here::here("Figures/B_models/performance/lnR/"), filename = paste0("B_04_PartialPlot_lnR_split_", "Circ", ".svg"), width = 3.5, height = 2.8 ) ggsave( plot = plots_numeric[[9]], path = here::here("Figures/B_models/performance/lnR/"), filename = paste0("B_04_PartialPlot_lnR_split_", "DistBorder", ".svg"), width = 3.5, height = 2.8 ) ``` ```{r} plots_categorical <- list() for (var_i in seq_along(unique(pdp_res[[2]]$variable))) { this_var <- unique(pdp_res[[2]]$variable)[var_i] this_resp <- "log_R2_1" this_model <- "log_R2_1_split" # Categorical log ratio plots p1 <- pdp_res[[2]] %>% filter(model == "log_R2_1_split" & variable == this_var) %>% ggplot() + geom_hline(yintercept = 0, linewidth = 0.3, col = "darkgrey") + geom_point( aes( x = value, y = yhat, col = datasetID ), size = 3, alpha = 0.75 ) + ggthemes::theme_few() + scale_color_brewer(palette = "PuOr") + labs( x = lookup[this_var], y = bquote(italic(.(this_resp)) ~ "(Partial)"), col = "Dataset" ) + ggtitle(NULL, paste0(this_model, "-model")) plots_categorical[[var_i]] <- p1 } plots_categorical <- plots_categorical[c(1:4)] plots_categorical ``` ```{r} #| eval: false ggsave( plot = plots_categorical[[1]], path = here::here("Figures/B_models/performance/lnR/"), filename = paste0("B_04_PartialPlot_lnR_split_", "Migration", ".svg"), width = 3.5, height = 2.8 ) ggsave( plot = plots_categorical[[2]], path = here::here("Figures/B_models/performance/lnR/"), filename = paste0("B_04_PartialPlot_lnR_split_", "Habitat", ".svg"), width = 3.5, height = 2.8 ) ggsave( plot = plots_categorical[[3]], path = here::here("Figures/B_models/performance/lnR/"), filename = paste0("B_04_PartialPlot_lnR_split_", "Generalism", ".svg"), width = 3.5, height = 2.8 ) ggsave( plot = plots_categorical[[4]], path = here::here("Figures/B_models/performance/lnR/"), filename = paste0("B_04_PartialPlot_lnR_split_", "Threat", ".svg"), width = 3.5, height = 2.8 ) ``` ```{r} #| fig-width: 12 #| fig-height: 12 patchwork::wrap_plots(plots_numeric, ncol = 3) + patchwork::plot_layout(tag_level = "keep", guides = "collect", axes = "collect_y") + patchwork::plot_annotation(tag_levels = "a", tag_suffix = ")") # export::graph2ppt( # file=here::here("Figures/B_models/performance/B_04_PartialSep_logR.ppt"), width=8, height=10) ``` ```{r} #| fig-width: 12 #| fig-height: 8 patchwork::wrap_plots(plots_categorical, ncol = 3) + patchwork::plot_layout(tag_level = "keep", guides = "collect", axes = "collect_y") + patchwork::plot_annotation(tag_levels = "a", tag_suffix = ")") # export::graph2ppt( # file=here::here("Figures/B_models/performance/B_04_PartialSep_Categorical_logR.ppt"), width=8, height=(10/3)*2) ``` ### 2.1.3 Log ratio per year per model: ```{r} plots_numeric <- list() for (var_i in seq_along(unique(pdp_res[[1]]$variable))) { this_var <- unique(pdp_res[[1]]$variable)[var_i] this_resp <- "log_R2_1_per_year" this_model <- "log_R2_1_y_split" # Numeric yearly rate plots p0 <- pdp_res[[1]] %>% filter(model == "log_R2_1_y_split" & variable == this_var) %>% ggplot() + geom_hline(yintercept = 0, linewidth = 0.3, col = "darkgrey") + geom_line( aes( x = value, y = yhat, group = datasetID, col = datasetID ), linewidth = 0.8 ) + ggthemes::theme_few() + scale_color_manual(values = c( "all" = "black", "5" = "#E66101", "6" = "#FDB863", "13" = "#B2ABD2", "26" = "#5E3C99" )) + labs( x = lookup[this_var], y = bquote(italic(.(this_resp)) ~ "(Partial)"), col = "Dataset" ) + ggtitle(NULL, paste0(this_model, "-model")) p0 plots_numeric[[var_i]] <- p0 } plots_numeric ``` ```{r} #| eval: false ggsave( plot = plots_numeric[[1]], path = here::here("Figures/B_models/performance/lnR_y/"), filename = paste0("B_04_PartialPlot_lnR_y_split_", "mass", ".svg"), width = 3.5, height = 2.8 ) ggsave( plot = plots_numeric[[2]], path = here::here("Figures/B_models/performance/lnR_y/"), filename = paste0("B_04_PartialPlot_lnR_y_split_", "globalrangesize", ".svg"), width = 3.5, height = 2.8 ) ggsave( plot = plots_numeric[[3]], path = here::here("Figures/B_models/performance/lnR_y/"), filename = paste0("B_04_PartialPlot_lnR_y_split_", "phylodist", ".svg"), width = 3.5, height = 2.8 ) ggsave( plot = plots_numeric[[4]], path = here::here("Figures/B_models/performance/lnR_y/"), filename = paste0("B_04_PartialPlot_lnR_y_split_", "D", ".svg"), width = 3.5, height = 2.8 ) ggsave( plot = plots_numeric[[5]], path = here::here("Figures/B_models/performance/lnR_y/"), filename = paste0("B_04_PartialPlot_lnR_y_split_", "Lacunarity", ".svg"), width = 3.5, height = 2.8 ) ggsave( plot = plots_numeric[[6]], path = here::here("Figures/B_models/performance/lnR_y/"), filename = paste0("B_04_PartialPlot_lnR_y_split_", "AOO", ".svg"), width = 3.5, height = 2.8 ) ggsave( plot = plots_numeric[[7]], path = here::here("Figures/B_models/performance/lnR_y/"), filename = paste0("B_04_PartialPlot_lnR_y_split_", "SAC", ".svg"), width = 3.5, height = 2.8 ) ggsave( plot = plots_numeric[[8]], path = here::here("Figures/B_models/performance/lnR_y/"), filename = paste0("B_04_PartialPlot_lnR_y_split_", "Circ", ".svg"), width = 3.5, height = 2.8 ) ggsave( plot = plots_numeric[[9]], path = here::here("Figures/B_models/performance/lnR_y/"), filename = paste0("B_04_PartialPlot_lnR_y_split_", "DistBorder", ".svg"), width = 3.5, height = 2.8 ) ``` ```{r} plots_categorical <- list() for (var_i in seq_along(unique(pdp_res[[2]]$variable))) { this_var <- unique(pdp_res[[2]]$variable)[var_i] this_resp <- "log_R2_1_per_year" this_model <- "log_R2_1_y_split" # Categorical yearly rate plots p1 <- pdp_res[[2]] %>% filter(model == "log_R2_1_y_split" & variable == this_var) %>% ggplot() + geom_hline(yintercept = 0, linewidth = 0.3, col = "darkgrey") + geom_point( aes( x = value, y = yhat, col = datasetID ), size = 3, alpha = 0.75 ) + ggthemes::theme_few() + scale_color_brewer(palette = "PuOr") + labs( x = lookup[this_var], y = bquote(italic(.(this_resp)) ~ "(Partial)"), col = "Dataset" ) + ggtitle(NULL, paste0(this_model, "-model")) plots_categorical[[var_i]] <- p1 } plots_categorical <- plots_categorical[c(1:4)] plots_categorical ``` ```{r} #| eval: false ggsave( plot = plots_categorical[[1]], path = here::here("Figures/B_models/performance/lnR_y/"), filename = paste0("B_04_PartialPlot_lnR_y_split_", "Migration", ".svg"), width = 3.5, height = 2.8 ) ggsave( plot = plots_categorical[[2]], path = here::here("Figures/B_models/performance/lnR_y/"), filename = paste0("B_04_PartialPlot_lnR_y_split_", "Habitat", ".svg"), width = 3.5, height = 2.8 ) ggsave( plot = plots_categorical[[3]], path = here::here("Figures/B_models/performance/lnR_y/"), filename = paste0("B_04_PartialPlot_lnR_y_split_", "Generalism", ".svg"), width = 3.5, height = 2.8 ) ggsave( plot = plots_categorical[[4]], path = here::here("Figures/B_models/performance/lnR_y/"), filename = paste0("B_04_PartialPlot_lnR_y_split_", "Threat", ".svg"), width = 3.5, height = 2.8 ) ``` ```{r} #| fig-width: 12 #| fig-height: 12 patchwork::wrap_plots(plots_numeric, ncol = 3) + patchwork::plot_layout(tag_level = "keep", guides = "collect", axes = "collect_y") + patchwork::plot_annotation(tag_levels = "a", tag_suffix = ")") # export::graph2ppt( # file=here::here("Figures/B_models/performance/B_04_PartialSep_LnR_y.ppt"), width=8, height=10) ``` ```{r} #| fig-width: 12 #| fig-height: 8 patchwork::wrap_plots(plots_categorical, ncol = 3) + patchwork::plot_layout(tag_level = "keep", guides = "collect", axes = "collect_y") + patchwork::plot_annotation(tag_levels = "a", tag_suffix = ")") # export::graph2ppt( # file=here::here("Figures/B_models/performance/B_04_PartialSep_Categorical_LnR_y.ppt"), width=8, height=(10/3)*2) ``` ## 2.2 Interactions & their partial dependencies Moved also to Script D_04_Figure_3 ```{r} data <- readRDS( here::here( "Data/output/B_models/", "B_01_list_all_results_rf.rds" ) ) fit_1 <- data$res[[1]] %>% extract_fit_parsnip() fit_2 <- data$res[[2]] %>% extract_fit_parsnip() fit_3 <- data$res[[3]] %>% extract_fit_parsnip() ``` Jaccard ~ lac partial ```{r} partial_dep( object = fit_1, v = "mean_lnLac", X = data$list_split_data[[1]]$train, BY = "joincount_delta" ) %>% plot(show_points = FALSE) + ylim(0, 1) ``` Jaccard: lac ~ jc Heatmap ```{r} partial_dep( object = fit_1, v = c("mean_lnLac", "joincount_delta"), X = data$list_split_data[[1]]$train, # BY = "joincount_delta", grid_size = 100 ) %>% plot() ``` Jaccard: D ~ AOO Heatmap ```{r} partial_dep( object = fit_1, v = c("D_AOO_a", "AOO"), X = data$list_split_data[[1]]$train, # BY = "AOO", grid_size = 100 ) %>% plot() ``` Log Ratio: Body mass ~ AOO Heatmap ```{r} partial_dep( object = fit_2, v = c("Mass", "AOO"), X = data$list_split_data[[1]]$train, grid_size = 100 ) %>% plot() ``` Yearly rate: Mass ~ Habitat ```{r} partial_dep( object = fit_3, v = c("Mass", "Habitat_5"), BY = "datasetID", X = data$list_split_data[[1]]$train, grid_size = 100 ) %>% plot() ``` Yearly rate: ```{r} partial_dep( object = fit_3, v = c("AOO", "D_AOO_a"), BY = "datasetID", X = data$list_split_data[[1]]$train, grid_size = 100 ) %>% plot() ```