Simulating J dissimilarity under different scenarios of scale, occupancy, temporal change magnitude

C - 02: Temporal Change simulations

Simulating temporal J dissimilarity & log Ratio under different scenarios of scale, occupancy, temporal change magnitude

Parameters to simulate:

  • universe size

  • occu in time 1

  • flip type: directional/random

  • number of changes

  • number of runs

⚠️Preparations

📚 Libraries

Code
suppressPackageStartupMessages({
  source(here::here("Code/00_Configuration.R"))

  package_list <-
    c(
      package_list,
      "skimr",
      "ggthemes",
      "plotly",
      "ggplot2",
      "mgcv",
      "GGally",
      "corrr",
      "broom"
    )

  lapply(package_list, require, character = TRUE)
})


rm(list = ls())

🔍 Jaccard formula

Code
my_jaccard_abc <- function(set1, set2) {
  a <- length(intersect(set1, set2))
  b <- length(setdiff(set2, set1))
  c <- length(setdiff(set1, set2))
  res <- data.frame(
    j = a / (a + b + c),
    intersect = a,
    unique_1 = c,
    unique2 = b
  )
  return(res)
}

❇️ Simulation function

Code
set.seed(123) # For reproducibility

simulate_jaccard <- function(lengths, occu1, flip_types, n_runs = 1) {
  # Expand grid for parameter combinations
  params <- expand.grid(lengths, occu1, flip_types, stringsAsFactors = FALSE)
  colnames(params) <- c("length", "occu1", "flip_type")

  # Initialize storage
  jaccard_results <- list()

  # Loop through parameter combinations
  for (row_i in seq_len(nrow(params))) {
    length_i <- params$length[row_i]
    occu_1_i <- params$occu1[row_i]
    flip_type <- params$flip_type[row_i]

    # Ensure the number of occupied sites does not exceed the vector length
    occu_1_i <- min(occu_1_i, length_i)

    # Repeat the simulation `n_runs` times
    for (run in seq_len(n_runs)) {
      # Generate set1 (initially occupied sites)
      set1_i <- sample(1:length_i, occu_1_i, replace = FALSE)

      # Sequence of number of changes (from 1 to max_vector_length)
      n_changes <- seq(from = 1, to = length_i, by = 1)

      # Dataframe to store results for this run
      jaccard_temp <- data.frame(num_changes = n_changes, jaccard = NA, run = run)

      set2_temp_list <- vector("list", length(n_changes))

      # Modify set1 stepwise and store set2 states
      for (n_i in seq_along(n_changes)) {
        set2_temp <- set1_i
        changes_i <- n_changes[n_i]

        if (flip_type == "random") {
          flip_sites <- sample(1:length_i, size = changes_i, replace = FALSE)

          set2_i <- union( # Combine left overs after 1 to 0 flips with new cells from 0 to 1 flips
            setdiff(set2_temp, flip_sites), # Remove indices in `flip_sites` already in `set2_temp`
            intersect(flip_sites, setdiff(1:length_i, set2_temp)) # Add indices not in `set2_temp` (0 to 1; colonizations)
          )
        } else if (flip_type == "from_1_to_0") {
          sample_mat <- if (length(set1_i) == 1) rep(set1_i, 2) else set1_i
          flip_sites <- sample(x = sample_mat, size = min(changes_i, length(set1_i)), replace = FALSE)
          set2_i <- setdiff(set2_temp, flip_sites) # Remove flipped sites from set1
          set2_i <- if (length(set2_i) == 0) NA else set2_i
        }

        # Store modified set2
        set2_temp_list[[n_i]] <- set2_i

        # Compute Jaccard similarity
        jaccard_res <- my_jaccard_abc(set1_i, set2_i)

        jaccard_temp$jaccard[n_i] <- jaccard_res$j
        jaccard_temp$intersect[n_i] <- jaccard_res$intersect
        jaccard_temp$unique_1[n_i] <- jaccard_res$unique_1
        jaccard_temp$unique_2[n_i] <- jaccard_res$unique_2
        jaccard_temp$N_occu_1[n_i] <- jaccard_res$N_occu_1
        jaccard_temp$N_occu_2[n_i] <- jaccard_res$N_occu_2
        jaccard_temp$length[n_i] <- length_i
        jaccard_temp$occu1[n_i] <- occu_1_i
        jaccard_temp$flip_type[n_i] <- flip_type
      }

      # Append results for this run
      jaccard_results[[length(jaccard_results) + 1]] <- jaccard_temp
    }
  }

  # Combine Jaccard results into a single dataframe for visualization
  jaccard_df <- do.call(rbind, jaccard_results)
  return(jaccard_df)
}

🪄 ️Run simulation

Code
# Parameters
lengths <- c(10, 100, 1000)
occu1 <- c(2^(0:floor(log2(1000))), 999)
flip_types <- c("random", "from_1_to_0")
n_runs <- 100 # Repeat each parameter combination 100 times

# Run simulations
jaccard_df <-
  simulate_jaccard(lengths, occu1, flip_types, n_runs)

# save as rds
saveRDS(jaccard_df, here(
  "Data/output/temp",
  "C_02_Jaccard_simulations_100.rds"
))

🦾 Prepare analysis data

organize data frame for analysis

Code
sim_df <- jaccard_df %>%
  filter(
    N_occu_1  != 0,
    N_occu_2  != 0,
    !(N_occu_1 == 0 & N_occu_2 == 0),
    !(N_occu_1 == 1 & N_occu_2 == 1) 
  ) %>%
  mutate(
    universe_size = length,
    change_type   = factor(flip_type),
    n_changes     = num_changes,
    j_dissim      = 1 - jaccard,
    aoo_1         = N_occu_1,
    aoo_2         = N_occu_2,
    a             = intersect,
    b             = unique_2,
    c             = unique_1,
    d             = universe_size - a - b - c,
    log_ratio     = log(aoo_2 / aoo_1),
    rel_persist   = a / universe_size,
    change_int    = (b + c) / aoo_1,
    trend         = factor(
      case_when(
        b > c ~ "expanding",
        b < c ~ "declining",
        TRUE  ~ "net_zero"
      ),
      levels = c("declining", "net_zero", "expanding")
    )
  ) %>%
  select(run, universe_size, change_type, n_changes, j_dissim, aoo_1, aoo_2,
         a, b, c, d, log_ratio, rel_persist, change_int, trend)

create expanding trend for directional simulations from declining simulations (because it is symmetrical, we can just switch up aoo_1 and aoo_2 and recalculate change)

Code
expanding <-
  sim_df %>%
  filter(change_type == "from_1_to_0") %>%
  mutate(change_type = "from_0_to_1") %>%
  rename(
    "b" = "c", #switch up number of colonizations (b) and extirpations (c)
    "c" = "b",
    "aoo_1" = "aoo_2", # switch up aoo 1 and aoo 2
    "aoo_2" = "aoo_1"
  ) %>%
  mutate(log_ratio = log(aoo_2 / aoo_1)) %>% # re-calculate log ratio
  mutate(change_int = (b + c) / aoo_1) %>% # re-calculate change intensity
  mutate(
    trend = as.factor(
      case_when(
        b > c ~ "expanding",
        b < c ~ "declining",
        b == c ~ "net_zero",
        # added A to sort levels for regression
        .default = NA
    ))
  )

skimr::skim(expanding)
Data summary
Name expanding
Number of rows 1074000
Number of columns 15
_______________________
Column type frequency:
character 1
factor 1
numeric 13
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
change_type 0 1 11 11 0 1 0

Variable type: factor

skim_variable n_missing complete_rate ordered n_unique top_counts
trend 0 1 FALSE 1 exp: 1074000

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
run 0 1 50.50 28.87 1 25.75 50.50 75.25 100.00 ▇▇▇▇▇
universe_size 0 1 937.65 229.25 10 1000.00 1000.00 1000.00 1000.00 ▁▁▁▁▇
n_changes 0 1 469.33 301.31 1 195.00 463.50 732.00 1000.00 ▇▆▆▆▆
j_dissim 0 1 0.90 0.24 0 1.00 1.00 1.00 1.00 ▁▁▁▁▇
aoo_2 0 1 190.30 298.99 2 8.00 32.00 256.00 999.00 ▇▁▁▁▁
aoo_1 0 1 64.15 177.67 1 1.00 1.00 1.00 998.00 ▇▁▁▁▁
a 0 1 63.36 177.95 0 0.00 0.00 0.00 998.00 ▇▁▁▁▁
c 0 1 0.79 0.41 0 1.00 1.00 1.00 1.00 ▂▁▁▁▇
b 0 1 126.95 195.12 1 8.00 32.00 128.00 999.00 ▇▁▁▁▁
d 0 1 746.56 349.10 -1 488.00 935.00 991.00 998.00 ▂▁▁▁▇
log_ratio 0 1 2.68 1.79 0 1.20 2.50 4.16 6.91 ▇▇▆▆▃
rel_persist 0 1 0.07 0.19 0 0.00 0.00 0.00 1.00 ▇▁▁▁▁
change_int 0 1 65.55 120.59 0 3.00 11.19 65.00 1000.00 ▇▁▁▁▁
Code
simulations_final <-
  sim_df %>%
  full_join(expanding) %>%
  mutate(trend = as.factor(trend),
         change_type = case_when(change_type == "random" ~ "A_random",
                                 change_type != "random" ~ change_type)) %>%
  mutate(
    change_type2 =
      as.factor(
      case_when(change_type == "A_random" ~ "A_random",
        change_type %in% c("from_1_to_0", "from_0_to_1") ~ "directional",
        .default = NA
      ))
  )

🧳 Exploratory data analysis

Below we see that:

  1. for random change, large inital AOO produces declining trends, small and medium inital AOO leads to expansion. Net zero is rare.
Code
#
simulations_final %>%
  group_by(change_type) %>%
  summarize(n_trend = n_distinct(trend))
# A tibble: 3 × 2
  change_type n_trend
  <chr>         <int>
1 A_random          3
2 from_0_to_1       1
3 from_1_to_0       1
Code
simulations_final %>%
  filter(run <=1) %>%
  ggplot(
    aes(x = trend, y = change_type, fill = aoo_1/universe_size), alpha = 0.8
  )+
  geom_tile()+
  facet_wrap(~ universe_size)+
  ggthemes::theme_few()

Code
simulations_final %>%
  filter(run <=1) %>%
  ggplot(
    aes(x = trend, y = change_type, col = aoo_1/universe_size), alpha = 0.8
  )+
  geom_jitter()+
  facet_wrap(~ universe_size)+
  ggthemes::theme_few()

1. Univariate distributions

Code
library(patchwork)

Attache Paket: 'patchwork'
Das folgende Objekt ist maskiert 'package:terra':

    area
Code
g1 <- ggplot(simulations_final, aes(j_dissim)) + 
  geom_histogram(bins = 20) + 
  labs(title = "Jaccard dissimilarity")+
  ggthemes::theme_few()

g2 <- ggplot(simulations_final, aes(log_ratio)) + 
  geom_histogram(bins = 20) + 
  labs(title = "log ratio AOO")+
  ggthemes::theme_few()

g3 <- ggplot(simulations_final, aes(change_int)) + 
  geom_histogram(bins = 10) + 
  labs(title = "Change intensity")+
  ggthemes::theme_few()

(g1 | g2) / g3

2. Correlation matrix

Below we see that: - we have the highest correlation between Jaccard dissim and relative persistence (r=-0.86), or a (r=-0.82) –> persistence drives J dissim - aoo 1 is also important (r=-0.62) –> the higher the initial occupancy, the lower the dissimilarity - surprisingly, d shows also a high correlation (r=0.39 - similarly high as the correlation with n_changes r = 0.42) –> indicating that the number of unoccupied cells does play an important role in determining the magnitude of change that can occur

  • in contrast: log ratio is driven by change intensity (r=0.58), aoo 2 (r=0.42), c (r=-0.42) and b (r=0.56), while persistence plays a subordinate role (r=-0.12; r=-0.11 foe rel persistence and a respectively)
  • again, d is also weakly correlated (r=-0.14)
Code
# Select numeric columns and compute correlation with targets
cor_df_focus <- 
  simulations_final %>%
  select(-run) %>%
  select(where(is.numeric)) %>%
  correlate() %>%
  corrr::focus(j_dissim, log_ratio) %>%
  mutate(
    j_dissim = round(j_dissim, 2),
    log_ratio = round(log_ratio, 2)
  )
# print correlations with log ratio and j
kableExtra::kable(cor_df_focus)
term j_dissim log_ratio
universe_size 0.07 0.03
n_changes 0.42 0.06
aoo_1 -0.62 -0.33
aoo_2 -0.35 0.42
a -0.82 -0.11
b 0.14 0.56
c -0.14 -0.42
d 0.39 -0.14
rel_persist -0.86 -0.12
change_int 0.20 0.58
Code
cor_df <- simulations_final %>%
  select(where(is.numeric)) %>%
  select(-run) %>%
  correlate() %>%
  rearrange()

# shaved plot
p <- cor_df %>%
  shave() %>%
  rplot(print_cor = TRUE)

# Rotate x‑axis labels by 45 degrees for readability
p + 
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1))+
  ggthemes::theme_few()

3. Pair plot

Below we see that the change intensity is stronger for expansions than for declines

Code
library(GGally)

# a) small universe
simulations_final %>%
  filter(run <= 3 & universe_size == 10) %>%
  select(j_dissim, log_ratio, rel_persist, change_int, change_type) %>%
  ggpairs(aes(alpha = 0.05, col = as.factor(as.character(change_type))),
          progress = FALSE,
          columns = c("j_dissim", "log_ratio", "rel_persist", "change_int"))+
  ggthemes::theme_few()+
  ggtitle("small universe (N = 10)")

Code
# b) medium universe
simulations_final %>%
  filter(run <= 3 & universe_size == 100) %>%
  select(j_dissim, log_ratio, rel_persist, change_int, change_type) %>%
  ggpairs(aes(alpha = 0.05, col = as.factor(as.character(change_type))),
          progress = FALSE,
          columns = c("j_dissim", "log_ratio", "rel_persist", "change_int"))+
  ggthemes::theme_few()+
  ggtitle("medium universe (N = 100)")

Code
# c) large universe
simulations_final %>%
  filter(run <= 3 & universe_size == 1000) %>%
  select(j_dissim, log_ratio, rel_persist, change_int, change_type) %>%
  ggpairs(aes(alpha = 0.05, col = as.factor(as.character(change_type))),
          progress = FALSE,
          columns = c("j_dissim", "log_ratio", "rel_persist", "change_int"))+
  ggthemes::theme_few()+
  ggtitle("large universe (N = 1000)")

4. Linear models

Q = Which of the parameters drives J or log ratio?

Code
# scale vars for lms:
df_scaled <- 
  simulations_final %>% 
  select(j_dissim, 
         log_ratio,
         universe_size, 
         aoo_1, 
         n_changes, 
         change_type) %>% 
  mutate(across(c(universe_size, aoo_1, n_changes),
                scale, .names = "z_{.col}"), .keep = "unused")

Jaccard model

Code
# Jaccard parameter model
m_j <-
  lm(
    j_dissim ~
      z_universe_size +
      z_aoo_1 *
        z_n_changes +
      change_type,
    data = df_scaled
  )

# J results:
summary(m_j)

Call:
lm(formula = j_dissim ~ z_universe_size + z_aoo_1 * z_n_changes + 
    change_type, data = df_scaled)

Residuals:
     Min       1Q   Median       3Q      Max 
-0.85866 -0.02673  0.01706  0.06964  0.26638 

Coefficients:
                         Estimate Std. Error  t value Pr(>|t|)    
(Intercept)             8.519e-01  1.213e-04  7023.73   <2e-16 ***
z_universe_size         1.372e-02  8.009e-05   171.25   <2e-16 ***
z_aoo_1                -1.554e-01  7.440e-05 -2088.20   <2e-16 ***
z_n_changes             9.930e-02  7.825e-05  1269.07   <2e-16 ***
change_typefrom_0_to_1  1.076e-02  1.786e-04    60.22   <2e-16 ***
change_typefrom_1_to_0  6.471e-02  1.755e-04   368.80   <2e-16 ***
z_aoo_1:z_n_changes     9.181e-02  7.541e-05  1217.49   <2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.1316 on 3332754 degrees of freedom
Multiple R-squared:  0.7088,    Adjusted R-squared:  0.7088 
F-statistic: 1.352e+06 on 6 and 3332754 DF,  p-value: < 2.2e-16
Code
broom::glance(m_j)
# A tibble: 1 × 12
  r.squared adj.r.squared sigma statistic p.value    df   logLik     AIC     BIC
      <dbl>         <dbl> <dbl>     <dbl>   <dbl> <dbl>    <dbl>   <dbl>   <dbl>
1     0.709         0.709 0.132  1351850.       0     6 2029000. -4.06e6 -4.06e6
# ℹ 3 more variables: deviance <dbl>, df.residual <int>, nobs <int>
Code
d <- broom::tidy(m_j, conf.int = TRUE) %>% 
  arrange(desc(abs(statistic)))   # biggest |t| first
augment(m_j, data = df_scaled)
# A tibble: 3,332,761 × 12
   j_dissim log_ratio change_type z_universe_size[,1] z_aoo_1[,1]
      <dbl>     <dbl> <chr>                     <dbl>       <dbl>
 1    0.667     1.10  A_random                  -4.02      -0.531
 2    0.75      1.39  A_random                  -4.02      -0.531
 3    1         1.10  A_random                  -4.02      -0.531
 4    1         1.39  A_random                  -4.02      -0.531
 5    1         1.61  A_random                  -4.02      -0.531
 6    1         1.79  A_random                  -4.02      -0.531
 7    1         1.95  A_random                  -4.02      -0.531
 8    1         2.08  A_random                  -4.02      -0.531
 9    1         2.20  A_random                  -4.02      -0.531
10    0.5       0.693 A_random                  -4.02      -0.531
# ℹ 3,332,751 more rows
# ℹ 7 more variables: z_n_changes <dbl[,1]>, .fitted <dbl>, .resid <dbl>,
#   .hat <dbl>, .sigma <dbl>, .cooksd <dbl>, .std.resid <dbl>
Code
ggplot(d, 
       aes(estimate, term, xmin = conf.low, xmax = conf.high, height = 0)) +
  geom_point() +
  geom_vline(xintercept = 0, lty = 4) +
  geom_errorbarh()+
  ggthemes::theme_few()

log ratio model

Code
# log ratio parameter model
m_lr <-
  lm(
    log_ratio ~
      z_universe_size +
      z_aoo_1 *
        z_n_changes +
      change_type,
    data = df_scaled
  )

# log ratio results:
summary(m_lr)

Call:
lm(formula = log_ratio ~ z_universe_size + z_aoo_1 * z_n_changes + 
    change_type, data = df_scaled)

Residuals:
    Min      1Q  Median      3Q     Max 
-6.7296 -1.4053 -0.3464  1.2683  4.6171 

Coefficients:
                         Estimate Std. Error  t value Pr(>|t|)    
(Intercept)             2.5643723  0.0016258  1577.27   <2e-16 ***
z_universe_size         0.0824272  0.0010736    76.77   <2e-16 ***
z_aoo_1                -0.7632115  0.0009974  -765.24   <2e-16 ***
z_n_changes             0.1499282  0.0010489   142.94   <2e-16 ***
change_typefrom_0_to_1 -0.1653424  0.0023943   -69.06   <2e-16 ***
change_typefrom_1_to_0 -5.0906715  0.0023519 -2164.52   <2e-16 ***
z_aoo_1:z_n_changes    -0.4051572  0.0010108  -400.82   <2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 1.765 on 3332754 degrees of freedom
Multiple R-squared:  0.6849,    Adjusted R-squared:  0.6849 
F-statistic: 1.207e+06 on 6 and 3332754 DF,  p-value: < 2.2e-16
Code
broom::glance(m_lr)
# A tibble: 1 × 12
  r.squared adj.r.squared sigma statistic p.value    df    logLik     AIC    BIC
      <dbl>         <dbl> <dbl>     <dbl>   <dbl> <dbl>     <dbl>   <dbl>  <dbl>
1     0.685         0.685  1.76  1207248.       0     6 -6621586.  1.32e7 1.32e7
# ℹ 3 more variables: deviance <dbl>, df.residual <int>, nobs <int>
Code
e <- broom::tidy(m_lr, conf.int = TRUE) %>% 
  arrange(desc(abs(statistic)))

ggplot(e, 
       aes(estimate, term, xmin = conf.low, xmax = conf.high, height = 0)) +
  geom_point() +
  geom_vline(xintercept = 0, lty = 4) +
  geom_errorbarh()+
  ggthemes::theme_few()

Code
# tidy merged results
bind_rows(
  tidy(m_j)  %>% mutate(model = "J dissim ~ ..."),
  tidy(m_lr) %>% mutate(model = "log ratio ~ ...")
) %>%
  select(model, term, estimate, std.error, statistic, p.value) %>%
  kableExtra::kable()
model term estimate std.error statistic p.value
J dissim ~ … (Intercept) 0.8518821 0.0001213 7023.73433 0
J dissim ~ … z_universe_size 0.0137158 0.0000801 171.24987 0
J dissim ~ … z_aoo_1 -0.1553669 0.0000744 -2088.20530 0
J dissim ~ … z_n_changes 0.0993003 0.0000782 1269.07146 0
J dissim ~ … change_typefrom_0_to_1 0.0107563 0.0001786 60.22100 0
J dissim ~ … change_typefrom_1_to_0 0.0647061 0.0001754 368.80429 0
J dissim ~ … z_aoo_1:z_n_changes 0.0918062 0.0000754 1217.48742 0
log ratio ~ … (Intercept) 2.5643723 0.0016258 1577.26832 0
log ratio ~ … z_universe_size 0.0824272 0.0010736 76.77425 0
log ratio ~ … z_aoo_1 -0.7632115 0.0009974 -765.23621 0
log ratio ~ … z_n_changes 0.1499282 0.0010489 142.94027 0
log ratio ~ … change_typefrom_0_to_1 -0.1653424 0.0023943 -69.05691 0
log ratio ~ … change_typefrom_1_to_0 -5.0906715 0.0023519 -2164.51994 0
log ratio ~ … z_aoo_1:z_n_changes -0.4051572 0.0010108 -400.82237 0

5. Summary by change type

Notes: random-expanding: higher J dissim than random-decline (similar J for random-expanding to directional-expanding) random-declining: lower J dissim than directional-declining random-expanding: more positive log ratio than direcitonal-expanding random-declining: less negative log ratio than directional-decline

Code
simulations_final %>%
  group_by(change_type, trend) %>%
  summarise(
    across(c(j_dissim, log_ratio, change_int, rel_persist), 
           list(mean = mean, 
                sd = sd), 
           .names = "{.col}_{.fn}"),
    .groups = "drop"
  ) %>%
  mutate(across(3:10, \(x) round(x, 3)))
# A tibble: 5 × 10
  change_type trend     j_dissim_mean j_dissim_sd log_ratio_mean log_ratio_sd
  <chr>       <fct>             <dbl>       <dbl>          <dbl>        <dbl>
1 A_random    declining         0.569       0.29          -0.584        0.866
2 A_random    net_zero          0.363       0.285          0            0    
3 A_random    expanding         0.896       0.188          3.08         1.91 
4 from_0_to_1 expanding         0.895       0.242          2.68         1.79 
5 from_1_to_0 declining         0.895       0.242         -2.68         1.79 
# ℹ 4 more variables: change_int_mean <dbl>, change_int_sd <dbl>,
#   rel_persist_mean <dbl>, rel_persist_sd <dbl>

6. 3D Surface Plots

jaccard

Code
# make 3D surface
gam_mod <- gam(j_dissim ~ 
                 te(aoo_1, n_changes), 
               data = simulations_final %>% 
                 filter(change_type == "A_random" & universe_size == 100))


summary(gam_mod)

Family: gaussian 
Link function: identity 

Formula:
j_dissim ~ te(aoo_1, n_changes)

Parametric coefficients:
             Estimate Std. Error t value Pr(>|t|)    
(Intercept) 0.7778970  0.0001139    6827   <2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Approximate significance of smooth terms:
                    edf Ref.df      F p-value    
te(aoo_1,n_changes)  24     24 222969  <2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

R-sq.(adj) =  0.985   Deviance explained = 98.5%
GCV = 0.0010377  Scale est. = 0.0010374  n = 79897
Code
# Predict over a grid
grid_df <- expand.grid(
  aoo_1 = seq(1,100, length = 100),
  n_changes = seq(1,100, length = 100)
)

grid_df$pred_jaccard <- predict(gam_mod, newdata = grid_df)
range(grid_df$pred_jaccard)
[1] 0.01025627 1.00786810
Code
# Reshape into matrix for surface plot
z_matrix <- matrix(grid_df$pred_jaccard, nrow = 100, ncol = 100)

p <- plot_ly(
  x = ~ unique(grid_df$aoo_1),
  y = ~ unique(grid_df$n_changes),
  z = ~z_matrix,
  showlegend = FALSE
) %>%
  add_surface(opacity = 0.7, showscale = FALSE) %>%
  layout(
    title = "Jaccard Surface",
    scene = list(
      xaxis = list(title = "Initial Occupancy"),
      yaxis = list(title = "Number of Changes"),
      zaxis = list(title = "Predicted Jaccard")
    )
  )%>%
  config(
    toImageButtonOptions = list(
      format = "svg",
      filename = "3d_plot_J",
      width = 1000,
      height = 700
    )
  )

## add boxes outside of cube
# Compute axis limits
xlim <- range(grid_df$aoo_1)
ylim <- range(grid_df$n_changes)
zlim <- range(grid_df$pred_jaccard)

# Define cube corners
x_vals <- xlim
y_vals <- ylim
z_vals <- zlim

# Function to create lines between pairs of corners
add_box_edges <- function(plot) {
  lines <- list(
    # bottom rectangle
    list(x = x_vals, y = rep(y_vals[1], 2), z = rep(z_vals[1], 2)),
    list(x = x_vals, y = rep(y_vals[2], 2), z = rep(z_vals[1], 2)),
    list(x = rep(x_vals[1], 2), y = y_vals, z = rep(z_vals[1], 2)),
    list(x = rep(x_vals[2], 2), y = y_vals, z = rep(z_vals[1], 2)),

    # top rectangle
    list(x = x_vals, y = rep(y_vals[1], 2), z = rep(z_vals[2], 2)),
    list(x = x_vals, y = rep(y_vals[2], 2), z = rep(z_vals[2], 2)),
    list(x = rep(x_vals[1], 2), y = y_vals, z = rep(z_vals[2], 2)),
    list(x = rep(x_vals[2], 2), y = y_vals, z = rep(z_vals[2], 2)),

    # vertical lines
    list(x = rep(x_vals[1], 2), y = rep(y_vals[1], 2), z = z_vals),
    list(x = rep(x_vals[1], 2), y = rep(y_vals[2], 2), z = z_vals),
    list(x = rep(x_vals[2], 2), y = rep(y_vals[1], 2), z = z_vals),
    list(x = rep(x_vals[2], 2), y = rep(y_vals[2], 2), z = z_vals)
  )

  for (edge in lines) {
    plot <- plot %>% add_trace(
      type = "scatter3d",
      mode = "lines",
      x = edge$x,
      y = edge$y,
      z = edge$z,
      line = list(color = "black", width = 2),
      showlegend = FALSE
    )
  }

  return(plot)
}

# Add the box outlines
p_with_box <- add_box_edges(p)


# Add contours
fig <- p_with_box %>%
  add_surface(
   
    opacity = 0.7,
    contours = list(
      z = list(
        show = TRUE,
        usecolormap = TRUE,
        highlightcolor = "#ff0000",
        project = list(z = TRUE)
      )
    )
  )

fig %>%
  layout(scene = list(
    camera = list(
      eye = list(
        x = -1.25,
        y = 1.25,
        z = 1.25
      )
    )
  ))

log ratio

Code
# Create smoothed surface with GAM
gam_mod <- gam(log_ratio ~                  
                 te(aoo_1, n_changes), 
               data = simulations_final %>% 
                 filter(change_type == "A_random" & universe_size == 100))

summary(gam_mod)

Family: gaussian 
Link function: identity 

Formula:
log_ratio ~ te(aoo_1, n_changes)

Parametric coefficients:
             Estimate Std. Error t value Pr(>|t|)    
(Intercept) 1.3659201  0.0004397    3106   <2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Approximate significance of smooth terms:
                    edf Ref.df      F p-value    
te(aoo_1,n_changes)  24     24 574834  <2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

R-sq.(adj) =  0.994   Deviance explained = 99.4%
GCV = 0.015455  Scale est. = 0.01545   n = 79897
Code
# Predict over a grid
grid_df <- expand.grid(
  aoo_1 = seq(1,100, length = 100),
  n_changes = seq(1,100, length = 100)
)

grid_df$pred_lr <- predict(gam_mod, newdata = grid_df)

# Reshape into matrix for surface plot
z_matrix <- matrix(grid_df$pred_lr, nrow = 100, ncol = 100)

p <- plot_ly(
  x = ~ unique(grid_df$aoo_1),
  y = ~ unique(grid_df$n_changes),
  z = ~z_matrix
) %>%
  add_surface(opacity = 0.7,
              showscale = FALSE) %>%
  layout(
    title = "Log Ratio Surface",
    scene = list(
      xaxis = list(title = "Initial Occupancy"),
      yaxis = list(title = "Number of Changes"),
      zaxis = list(title = "Predicted log ratio")
    )
  )%>%
  config(
    toImageButtonOptions = list(
      format = "svg",
      filename = "3d_plot_J",
      width = 1000,
      height = 700
    )
  )



## add boxes outside of cube
# Compute axis limits
xlim <- range(grid_df$aoo_1)
ylim <- range(grid_df$n_changes)
zlim <- range(grid_df$pred_lr)

# Define cube corners
x_vals <- xlim
y_vals <- ylim
z_vals <- zlim

# Function to create lines between pairs of corners
add_box_edges <- function(plot) {
  lines <- list(
    # bottom rectangle
    list(x = x_vals, y = rep(y_vals[1], 2), z = rep(z_vals[1], 2)),
    list(x = x_vals, y = rep(y_vals[2], 2), z = rep(z_vals[1], 2)),
    list(x = rep(x_vals[1], 2), y = y_vals, z = rep(z_vals[1], 2)),
    list(x = rep(x_vals[2], 2), y = y_vals, z = rep(z_vals[1], 2)),

    # top rectangle
    list(x = x_vals, y = rep(y_vals[1], 2), z = rep(z_vals[2], 2)),
    list(x = x_vals, y = rep(y_vals[2], 2), z = rep(z_vals[2], 2)),
    list(x = rep(x_vals[1], 2), y = y_vals, z = rep(z_vals[2], 2)),
    list(x = rep(x_vals[2], 2), y = y_vals, z = rep(z_vals[2], 2)),

    # vertical lines
    list(x = rep(x_vals[1], 2), y = rep(y_vals[1], 2), z = z_vals),
    list(x = rep(x_vals[1], 2), y = rep(y_vals[2], 2), z = z_vals),
    list(x = rep(x_vals[2], 2), y = rep(y_vals[1], 2), z = z_vals),
    list(x = rep(x_vals[2], 2), y = rep(y_vals[2], 2), z = z_vals)
  )

  for (edge in lines) {
    plot <- plot %>% add_trace(
      type = "scatter3d",
      mode = "lines",
      x = edge$x,
      y = edge$y,
      z = edge$z,
      line = list(color = "black", width = 2),
      showlegend = FALSE
    )
  }

  return(plot)
}

# Add the box outlines
p_with_box <- add_box_edges(p)
p_with_box
Code
# Add contours
fig <- p_with_box %>%
  add_surface(
    opacity = 0.7,
    contours = list(
      z = list(
        show = TRUE,
        usecolormap = TRUE,
        highlightcolor = "#ff0000",
        project = list(z = TRUE)
      )
    )
  )


fig %>%
  layout(
    scene = list(
      camera = list(
        eye = list(x = -1.25, y = 1.25, z = 1.25)
      )
    )
  )

7. Data summary

Code
# quick check:
simulations_final %>%
  filter(change_type == "A_random") %>%
  group_by(universe_size, aoo_1, n_changes, b, c) %>%
  summarize(
    mean_j = mean(j_dissim),
    sd_j = sd(j_dissim),
    n = n()
  )
`summarise()` has grouped output by 'universe_size', 'aoo_1', 'n_changes', 'b'.
You can override using the `.groups` argument.
# A tibble: 123,891 × 8
# Groups:   universe_size, aoo_1, n_changes, b [123,891]
   universe_size aoo_1 n_changes     b     c mean_j  sd_j     n
           <dbl> <int>     <dbl> <int> <int>  <dbl> <dbl> <int>
 1            10     1         1     1     0  0.5       0    92
 2            10     1         2     2     0  0.667     0    83
 3            10     1         3     2     1  1         0    32
 4            10     1         3     3     0  0.75      0    68
 5            10     1         4     3     1  1         0    51
 6            10     1         4     4     0  0.8       0    49
 7            10     1         5     4     1  1         0    56
 8            10     1         5     5     0  0.833     0    44
 9            10     1         6     5     1  1         0    65
10            10     1         6     6     0  0.857     0    35
# ℹ 123,881 more rows
Code
# --> for directional change, there is no variation in J.
simulations_final %>%
  filter(change_type == "A_random" & universe_size == 1000) %>%
  group_by(trend) %>%
  skimr::skim()
Data summary
Name Piped data
Number of rows 1100000
Number of columns 16
_______________________
Column type frequency:
character 1
factor 1
numeric 13
________________________
Group variables trend

Variable type: character

skim_variable trend n_missing complete_rate min max empty n_unique whitespace
change_type declining 0 1 8 8 0 1 0
change_type net_zero 0 1 8 8 0 1 0
change_type expanding 0 1 8 8 0 1 0

Variable type: factor

skim_variable trend n_missing complete_rate ordered n_unique top_counts
change_type2 declining 0 1 FALSE 1 A_r: 177637, dir: 0
change_type2 net_zero 0 1 FALSE 1 A_r: 2376, dir: 0
change_type2 expanding 0 1 FALSE 1 A_r: 919987, dir: 0

Variable type: numeric

skim_variable trend n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
run declining 0 1 50.48 28.88 1.00 25.00 51.00 75.00 100.00 ▇▇▇▇▇
run net_zero 0 1 50.12 29.16 1.00 25.00 49.00 76.00 100.00 ▇▇▇▇▇
run expanding 0 1 50.50 28.86 1.00 26.00 50.00 76.00 100.00 ▇▇▇▇▇
universe_size declining 0 1 1000.00 0.00 1000.00 1000.00 1000.00 1000.00 1000.00 ▁▁▇▁▁
universe_size net_zero 0 1 1000.00 0.00 1000.00 1000.00 1000.00 1000.00 1000.00 ▁▁▇▁▁
universe_size expanding 0 1 1000.00 0.00 1000.00 1000.00 1000.00 1000.00 1000.00 ▁▁▇▁▁
n_changes declining 0 1 524.12 287.63 1.00 279.00 536.00 776.00 1000.00 ▆▇▇▇▇
n_changes net_zero 0 1 272.53 249.35 2.00 46.00 202.00 456.50 934.00 ▇▃▂▂▁
n_changes expanding 0 1 496.53 288.52 1.00 246.00 494.00 746.00 1000.00 ▇▇▇▇▇
j_dissim declining 0 1 0.57 0.29 0.00 0.33 0.61 0.83 1.00 ▅▅▅▆▇
j_dissim net_zero 0 1 0.37 0.28 0.00 0.09 0.33 0.62 0.95 ▇▅▃▃▃
j_dissim expanding 0 1 0.90 0.19 0.00 0.90 0.98 1.00 1.00 ▁▁▁▁▇
aoo_1 declining 0 1 785.93 241.96 16.00 512.00 999.00 999.00 999.00 ▁▁▆▁▇
aoo_1 net_zero 0 1 486.47 89.11 8.00 512.00 512.00 512.00 512.00 ▁▁▁▁▇
aoo_1 expanding 0 1 66.78 103.98 1.00 4.00 16.00 64.00 999.00 ▇▁▁▁▁
aoo_2 declining 0 1 497.02 216.44 1.00 442.00 495.00 555.00 998.00 ▂▂▇▂▂
aoo_2 net_zero 0 1 486.47 89.11 8.00 512.00 512.00 512.00 512.00 ▁▁▁▁▇
aoo_2 expanding 0 1 500.98 257.25 2.00 294.00 510.00 707.00 1000.00 ▅▇▇▇▅
a declining 0 1 379.41 272.47 0.00 154.00 327.00 555.00 998.00 ▇▇▅▃▃
a net_zero 0 1 350.21 129.03 7.00 253.00 377.00 467.00 511.00 ▁▂▃▃▇
a expanding 0 1 35.61 70.73 0.00 1.00 7.00 33.00 999.00 ▇▁▁▁▁
b declining 0 1 117.60 161.80 0.00 0.00 1.00 245.00 488.00 ▇▁▁▁▂
b net_zero 0 1 136.26 124.68 1.00 23.00 101.00 228.25 467.00 ▇▃▂▂▁
b expanding 0 1 465.37 277.38 1.00 225.00 458.00 697.00 999.00 ▇▇▇▇▆
c declining 0 1 406.51 259.11 1.00 203.00 374.00 556.00 999.00 ▇▇▆▃▃
c net_zero 0 1 136.26 124.68 1.00 23.00 101.00 228.25 467.00 ▇▃▂▂▁
c expanding 0 1 31.16 54.59 0.00 1.00 7.00 32.00 470.00 ▇▁▁▁▁
d declining 0 1 96.47 143.41 0.00 0.00 1.00 175.00 984.00 ▇▂▁▁▁
d net_zero 0 1 377.26 174.13 21.00 259.75 387.00 465.00 991.00 ▃▇▇▁▁
d expanding 0 1 467.86 274.44 0.00 234.00 459.00 695.00 998.00 ▇▇▇▇▆
log_ratio declining 0 1 -0.58 0.88 -6.91 -0.81 -0.12 -0.04 0.00 ▁▁▁▁▇
log_ratio net_zero 0 1 0.00 0.00 0.00 0.00 0.00 0.00 0.00 ▁▁▇▁▁
log_ratio expanding 0 1 3.16 1.92 0.00 1.51 3.10 4.73 6.91 ▇▇▇▇▅
rel_persist declining 0 1 0.38 0.27 0.00 0.15 0.33 0.56 1.00 ▇▇▅▃▃
rel_persist net_zero 0 1 0.35 0.13 0.01 0.25 0.38 0.47 0.51 ▁▂▃▃▇
rel_persist expanding 0 1 0.04 0.07 0.00 0.00 0.01 0.03 1.00 ▇▁▁▁▁
change_int declining 0 1 0.76 0.51 0.00 0.35 0.68 1.00 1.95 ▇▇▆▂▃
change_int net_zero 0 1 0.53 0.48 0.00 0.09 0.39 0.89 1.82 ▇▃▂▂▁
change_int expanding 0 1 108.61 191.28 0.00 4.33 22.25 113.50 1000.00 ▇▁▁▁▁

8. J by net area trend

for SI (Fig S9)

Code
# Boxplot for j_dissim
ggplot(simulations_final, aes(x = trend, y = j_dissim, fill = trend)) +
  geom_hline(yintercept = 0.5, col = "darkgrey") +
  geom_boxplot(outlier.colour = "darkgrey") +
  ggthemes::theme_par() +
  labs(title = "Jaccard Dissimilarity by Trend", 
       x = "Trend", 
       y = "j_dissim") +
  facet_grid(universe_size ~ change_type2) +
  scale_fill_manual(values = c("#d7191c","#ffffbf", "#1a9641")) +
  theme(
    axis.text.x = element_text(
      angle = 45,
      vjust = 0.9,
      hjust = 1
    )
  )

Code
# ggsave(filename = "C_02_J_simulations_j_trends_boxplots.bmp", path = here::here("Figures/C_validation/"), width = 10, height = 12)

9. log ratio boxplot

For SI (Fig S8)

Code
# Boxplot for log_ratio
ggplot(simulations_final, aes(x = trend, y = log_ratio, fill = trend)) +
  geom_hline(yintercept = 0, col = "darkgrey") +
  geom_boxplot(outlier.colour = "darkgrey") +
  ggthemes::theme_par() +
  labs(title = "Log Ratio by Trend", x = "Trend", y = "log_ratio") +
  facet_grid(universe_size ~ change_type2) +
  scale_fill_manual(values = c("#d7191c","#ffffbf", "#1a9641")) +
  theme(
    axis.text.x = element_text(
      angle = 45,
      vjust = 0.9,
      hjust = 1
    )
  )

Code
# ggsave(filename = "C_02_J_simulations_logRatio_trends_boxplots.bmp", path = here::here("Figures/C_validation/"), width = 10, height = 12)
Code
simulations_final %>%
  filter(change_type != "random") %>%
  filter(j_dissim != 1) %>%
  ggplot() +
  geom_histogram(aes(j_dissim))
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Code
simulations_final %>%
  filter(change_type == "A_random") %>%
  ggplot(
    aes(
      x = log_ratio,
      y = j_dissim
    ),
    alpha = 0.5,
    cex = 0.1
  ) +
  geom_point() +
  facet_wrap(~universe_size) +
  ggthemes::theme_par()

Code
# Mean values (faster)
simulations_final %>%
  group_by(universe_size, change_type, n_changes) %>%
  summarize(
    mean_a = mean(a),
    mean_b = mean(b),
    mean_c = mean(c),
    mean_d = mean(d),
    mean_j = mean(j_dissim),
    mean_log = mean(log_ratio)
  ) %>%
  ggplot() +
  geom_point(
    aes(
      x = log(n_changes),
      y = mean_j
    )
  ) +
  facet_wrap(~ interaction(universe_size, change_type), scales = "free_x") +
  ggthemes::theme_few()
`summarise()` has grouped output by 'universe_size', 'change_type'. You can
override using the `.groups` argument.

Code
library(mgcv)

# fit gam
gam_mod <- gam(j_dissim ~ s(a) + s(b) + s(c) + s(d) + s(universe_size, k = 3) + change_type, data = simulations_final %>% filter(run < 3))

# plot smoothed effects
plot(gam_mod, pages = 1)

Code
#install.packages("gratia") # if not already installed
library(gratia)
Warning: Paket 'gratia' wurde unter R Version 4.4.3 erstellt

Attache Paket: 'gratia'
Das folgende Objekt ist maskiert 'package:dials':

    penalty
Das folgende Objekt ist maskiert 'package:terra':

    draw
Das folgende Objekt ist maskiert 'package:stringr':

    boundary
Code
draw(gam_mod)