Simulating J dissimilarity under different scenarios of scale, occupancy, temporal change magnitude

C - 02: Temporal Change simulations

Simulating temporal J dissimilarity & log Ratio under different scenarios of scale, occupancy, temporal change magnitude

Parameters to simulate:

universe size
occu in time 1
flip type: directional/random
number of changes
number of runs

⚠️Preparations

📚 Libraries

Code

suppressPackageStartupMessages({
  source(here::here("Code/00_Configuration.R"))

  package_list <-
    c(
      package_list,
      "skimr",
      "ggthemes",
      "plotly",
      "ggplot2",
      "mgcv",
      "GGally",
      "corrr",
      "broom"
    )

  lapply(package_list, require, character = TRUE)
})


rm(list = ls())

🔍 Jaccard formula

Code

my_jaccard_abc <- function(set1, set2) {
  a <- length(intersect(set1, set2))
  b <- length(setdiff(set2, set1))
  c <- length(setdiff(set1, set2))
  res <- data.frame(
    j = a / (a + b + c),
    intersect = a,
    unique_1 = c,
    unique2 = b
  )
  return(res)
}

❇️ Simulation function

Code

set.seed(123) # For reproducibility

simulate_jaccard <- function(lengths, occu1, flip_types, n_runs = 1) {
  # Expand grid for parameter combinations
  params <- expand.grid(lengths, occu1, flip_types, stringsAsFactors = FALSE)
  colnames(params) <- c("length", "occu1", "flip_type")

  # Initialize storage
  jaccard_results <- list()

  # Loop through parameter combinations
  for (row_i in seq_len(nrow(params))) {
    length_i <- params$length[row_i]
    occu_1_i <- params$occu1[row_i]
    flip_type <- params$flip_type[row_i]

    # Ensure the number of occupied sites does not exceed the vector length
    occu_1_i <- min(occu_1_i, length_i)

    # Repeat the simulation `n_runs` times
    for (run in seq_len(n_runs)) {
      # Generate set1 (initially occupied sites)
      set1_i <- sample(1:length_i, occu_1_i, replace = FALSE)

      # Sequence of number of changes (from 1 to max_vector_length)
      n_changes <- seq(from = 1, to = length_i, by = 1)

      # Dataframe to store results for this run
      jaccard_temp <- data.frame(num_changes = n_changes, jaccard = NA, run = run)

      set2_temp_list <- vector("list", length(n_changes))

      # Modify set1 stepwise and store set2 states
      for (n_i in seq_along(n_changes)) {
        set2_temp <- set1_i
        changes_i <- n_changes[n_i]

        if (flip_type == "random") {
          flip_sites <- sample(1:length_i, size = changes_i, replace = FALSE)

          set2_i <- union( # Combine left overs after 1 to 0 flips with new cells from 0 to 1 flips
            setdiff(set2_temp, flip_sites), # Remove indices in `flip_sites` already in `set2_temp`
            intersect(flip_sites, setdiff(1:length_i, set2_temp)) # Add indices not in `set2_temp` (0 to 1; colonizations)
          )
        } else if (flip_type == "from_1_to_0") {
          sample_mat <- if (length(set1_i) == 1) rep(set1_i, 2) else set1_i
          flip_sites <- sample(x = sample_mat, size = min(changes_i, length(set1_i)), replace = FALSE)
          set2_i <- setdiff(set2_temp, flip_sites) # Remove flipped sites from set1
          set2_i <- if (length(set2_i) == 0) NA else set2_i
        }

        # Store modified set2
        set2_temp_list[[n_i]] <- set2_i

        # Compute Jaccard similarity
        jaccard_res <- my_jaccard_abc(set1_i, set2_i)

        jaccard_temp$jaccard[n_i] <- jaccard_res$j
        jaccard_temp$intersect[n_i] <- jaccard_res$intersect
        jaccard_temp$unique_1[n_i] <- jaccard_res$unique_1
        jaccard_temp$unique_2[n_i] <- jaccard_res$unique_2
        jaccard_temp$N_occu_1[n_i] <- jaccard_res$N_occu_1
        jaccard_temp$N_occu_2[n_i] <- jaccard_res$N_occu_2
        jaccard_temp$length[n_i] <- length_i
        jaccard_temp$occu1[n_i] <- occu_1_i
        jaccard_temp$flip_type[n_i] <- flip_type
      }

      # Append results for this run
      jaccard_results[[length(jaccard_results) + 1]] <- jaccard_temp
    }
  }

  # Combine Jaccard results into a single dataframe for visualization
  jaccard_df <- do.call(rbind, jaccard_results)
  return(jaccard_df)
}

🪄 ️Run simulation

Code

# Parameters
lengths <- c(10, 100, 1000)
occu1 <- c(2^(0:floor(log2(1000))), 999)
flip_types <- c("random", "from_1_to_0")
n_runs <- 100 # Repeat each parameter combination 100 times

# Run simulations
jaccard_df <-
  simulate_jaccard(lengths, occu1, flip_types, n_runs)

# save as rds
saveRDS(jaccard_df, here(
  "Data/output/temp",
  "C_02_Jaccard_simulations_100.rds"
))

🦾 Prepare analysis data

organize data frame for analysis

Code

sim_df <- jaccard_df %>%
  filter(
    N_occu_1  != 0,
    N_occu_2  != 0,
    !(N_occu_1 == 0 & N_occu_2 == 0),
    !(N_occu_1 == 1 & N_occu_2 == 1) 
  ) %>%
  mutate(
    universe_size = length,
    change_type   = factor(flip_type),
    n_changes     = num_changes,
    j_dissim      = 1 - jaccard,
    aoo_1         = N_occu_1,
    aoo_2         = N_occu_2,
    a             = intersect,
    b             = unique_2,
    c             = unique_1,
    d             = universe_size - a - b - c,
    log_ratio     = log(aoo_2 / aoo_1),
    rel_persist   = a / universe_size,
    change_int    = (b + c) / aoo_1,
    trend         = factor(
      case_when(
        b > c ~ "expanding",
        b < c ~ "declining",
        TRUE  ~ "net_zero"
      ),
      levels = c("declining", "net_zero", "expanding")
    )
  ) %>%
  select(run, universe_size, change_type, n_changes, j_dissim, aoo_1, aoo_2,
         a, b, c, d, log_ratio, rel_persist, change_int, trend)

create expanding trend for directional simulations from declining simulations (because it is symmetrical, we can just switch up aoo_1 and aoo_2 and recalculate change)

Code

expanding <-
  sim_df %>%
  filter(change_type == "from_1_to_0") %>%
  mutate(change_type = "from_0_to_1") %>%
  rename(
    "b" = "c", #switch up number of colonizations (b) and extirpations (c)
    "c" = "b",
    "aoo_1" = "aoo_2", # switch up aoo 1 and aoo 2
    "aoo_2" = "aoo_1"
  ) %>%
  mutate(log_ratio = log(aoo_2 / aoo_1)) %>% # re-calculate log ratio
  mutate(change_int = (b + c) / aoo_1) %>% # re-calculate change intensity
  mutate(
    trend = as.factor(
      case_when(
        b > c ~ "expanding",
        b < c ~ "declining",
        b == c ~ "net_zero",
        # added A to sort levels for regression
        .default = NA
    ))
  )

skimr::skim(expanding)

Data summary
Name	expanding
Number of rows	1074000
Number of columns	15
_______________________
Column type frequency:
character	1
factor	1
numeric	13
________________________
Group variables	None

Variable type: character

skim_variable	n_missing	complete_rate	min	max	empty	n_unique	whitespace
change_type	0	1	11	11	0	1	0

Variable type: factor

skim_variable	n_missing	complete_rate	ordered	n_unique	top_counts
trend	0	1	FALSE	1	exp: 1074000

Variable type: numeric

skim_variable	complete_rate	mean	sd	p0	p25	p50	p75	p100	hist
run	1	50.50	28.87	1	25.75	50.50	75.25	100.00	▇▇▇▇▇
universe_size	1	937.65	229.25	10	1000.00	1000.00	1000.00	1000.00	▁▁▁▁▇
n_changes	1	469.33	301.31	1	195.00	463.50	732.00	1000.00	▇▆▆▆▆
j_dissim	1	0.90	0.24	0	1.00	1.00	1.00	1.00	▁▁▁▁▇
aoo_2	1	190.30	298.99	2	8.00	32.00	256.00	999.00	▇▁▁▁▁
aoo_1	1	64.15	177.67	1	1.00	1.00	1.00	998.00	▇▁▁▁▁
a	1	63.36	177.95	0	0.00	0.00	0.00	998.00	▇▁▁▁▁
c	1	0.79	0.41	0	1.00	1.00	1.00	1.00	▂▁▁▁▇
b	1	126.95	195.12	1	8.00	32.00	128.00	999.00	▇▁▁▁▁
d	1	746.56	349.10	-1	488.00	935.00	991.00	998.00	▂▁▁▁▇
log_ratio	1	2.68	1.79	0	1.20	2.50	4.16	6.91	▇▇▆▆▃
rel_persist	1	0.07	0.19	0	0.00	0.00	0.00	1.00	▇▁▁▁▁
change_int	1	65.55	120.59	0	3.00	11.19	65.00	1000.00	▇▁▁▁▁

Code

simulations_final <-
  sim_df %>%
  full_join(expanding) %>%
  mutate(trend = as.factor(trend),
         change_type = case_when(change_type == "random" ~ "A_random",
                                 change_type != "random" ~ change_type)) %>%
  mutate(
    change_type2 =
      as.factor(
      case_when(change_type == "A_random" ~ "A_random",
        change_type %in% c("from_1_to_0", "from_0_to_1") ~ "directional",
        .default = NA
      ))
  )

🧳 Exploratory data analysis

Below we see that:

for random change, large inital AOO produces declining trends, small and medium inital AOO leads to expansion. Net zero is rare.

Code

#
simulations_final %>%
  group_by(change_type) %>%
  summarize(n_trend = n_distinct(trend))

# A tibble: 3 × 2
  change_type n_trend
  <chr>         <int>
1 A_random          3
2 from_0_to_1       1
3 from_1_to_0       1

Code

simulations_final %>%
  filter(run <=1) %>%
  ggplot(
    aes(x = trend, y = change_type, fill = aoo_1/universe_size), alpha = 0.8
  )+
  geom_tile()+
  facet_wrap(~ universe_size)+
  ggthemes::theme_few()

Code

simulations_final %>%
  filter(run <=1) %>%
  ggplot(
    aes(x = trend, y = change_type, col = aoo_1/universe_size), alpha = 0.8
  )+
  geom_jitter()+
  facet_wrap(~ universe_size)+
  ggthemes::theme_few()

1. Univariate distributions

Code

library(patchwork)


Attache Paket: 'patchwork'

Das folgende Objekt ist maskiert 'package:terra':

    area

Code

g1 <- ggplot(simulations_final, aes(j_dissim)) + 
  geom_histogram(bins = 20) + 
  labs(title = "Jaccard dissimilarity")+
  ggthemes::theme_few()

g2 <- ggplot(simulations_final, aes(log_ratio)) + 
  geom_histogram(bins = 20) + 
  labs(title = "log ratio AOO")+
  ggthemes::theme_few()

g3 <- ggplot(simulations_final, aes(change_int)) + 
  geom_histogram(bins = 10) + 
  labs(title = "Change intensity")+
  ggthemes::theme_few()

(g1 | g2) / g3

2. Correlation matrix

Below we see that: - we have the highest correlation between Jaccard dissim and relative persistence (r=-0.86), or a (r=-0.82) –> persistence drives J dissim - aoo 1 is also important (r=-0.62) –> the higher the initial occupancy, the lower the dissimilarity - surprisingly, d shows also a high correlation (r=0.39 - similarly high as the correlation with n_changes r = 0.42) –> indicating that the number of unoccupied cells does play an important role in determining the magnitude of change that can occur

in contrast: log ratio is driven by change intensity (r=0.58), aoo 2 (r=0.42), c (r=-0.42) and b (r=0.56), while persistence plays a subordinate role (r=-0.12; r=-0.11 foe rel persistence and a respectively)
again, d is also weakly correlated (r=-0.14)

Code

# Select numeric columns and compute correlation with targets
cor_df_focus <- 
  simulations_final %>%
  select(-run) %>%
  select(where(is.numeric)) %>%
  correlate() %>%
  corrr::focus(j_dissim, log_ratio) %>%
  mutate(
    j_dissim = round(j_dissim, 2),
    log_ratio = round(log_ratio, 2)
  )
# print correlations with log ratio and j
kableExtra::kable(cor_df_focus)

term	j_dissim	log_ratio
universe_size	0.07	0.03
n_changes	0.42	0.06
aoo_1	-0.62	-0.33
aoo_2	-0.35	0.42
a	-0.82	-0.11
b	0.14	0.56
c	-0.14	-0.42
d	0.39	-0.14
rel_persist	-0.86	-0.12
change_int	0.20	0.58

Code

cor_df <- simulations_final %>%
  select(where(is.numeric)) %>%
  select(-run) %>%
  correlate() %>%
  rearrange()

# shaved plot
p <- cor_df %>%
  shave() %>%
  rplot(print_cor = TRUE)

# Rotate x‑axis labels by 45 degrees for readability
p + 
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1))+
  ggthemes::theme_few()

3. Pair plot

Below we see that the change intensity is stronger for expansions than for declines

Code

library(GGally)

# a) small universe
simulations_final %>%
  filter(run <= 3 & universe_size == 10) %>%
  select(j_dissim, log_ratio, rel_persist, change_int, change_type) %>%
  ggpairs(aes(alpha = 0.05, col = as.factor(as.character(change_type))),
          progress = FALSE,
          columns = c("j_dissim", "log_ratio", "rel_persist", "change_int"))+
  ggthemes::theme_few()+
  ggtitle("small universe (N = 10)")

Code

# b) medium universe
simulations_final %>%
  filter(run <= 3 & universe_size == 100) %>%
  select(j_dissim, log_ratio, rel_persist, change_int, change_type) %>%
  ggpairs(aes(alpha = 0.05, col = as.factor(as.character(change_type))),
          progress = FALSE,
          columns = c("j_dissim", "log_ratio", "rel_persist", "change_int"))+
  ggthemes::theme_few()+
  ggtitle("medium universe (N = 100)")

Code

# c) large universe
simulations_final %>%
  filter(run <= 3 & universe_size == 1000) %>%
  select(j_dissim, log_ratio, rel_persist, change_int, change_type) %>%
  ggpairs(aes(alpha = 0.05, col = as.factor(as.character(change_type))),
          progress = FALSE,
          columns = c("j_dissim", "log_ratio", "rel_persist", "change_int"))+
  ggthemes::theme_few()+
  ggtitle("large universe (N = 1000)")

4. Linear models

Q = Which of the parameters drives J or log ratio?

Code

# scale vars for lms:
df_scaled <- 
  simulations_final %>% 
  select(j_dissim, 
         log_ratio,
         universe_size, 
         aoo_1, 
         n_changes, 
         change_type) %>% 
  mutate(across(c(universe_size, aoo_1, n_changes),
                scale, .names = "z_{.col}"), .keep = "unused")

Jaccard model

Code

# Jaccard parameter model
m_j <-
  lm(
    j_dissim ~
      z_universe_size +
      z_aoo_1 *
        z_n_changes +
      change_type,
    data = df_scaled
  )

# J results:
summary(m_j)


Call:
lm(formula = j_dissim ~ z_universe_size + z_aoo_1 * z_n_changes + 
    change_type, data = df_scaled)

Residuals:
     Min       1Q   Median       3Q      Max 
-0.85866 -0.02673  0.01706  0.06964  0.26638 

Coefficients:
                         Estimate Std. Error  t value Pr(>|t|)    
(Intercept)             8.519e-01  1.213e-04  7023.73   <2e-16 ***
z_universe_size         1.372e-02  8.009e-05   171.25   <2e-16 ***
z_aoo_1                -1.554e-01  7.440e-05 -2088.20   <2e-16 ***
z_n_changes             9.930e-02  7.825e-05  1269.07   <2e-16 ***
change_typefrom_0_to_1  1.076e-02  1.786e-04    60.22   <2e-16 ***
change_typefrom_1_to_0  6.471e-02  1.755e-04   368.80   <2e-16 ***
z_aoo_1:z_n_changes     9.181e-02  7.541e-05  1217.49   <2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.1316 on 3332754 degrees of freedom
Multiple R-squared:  0.7088,    Adjusted R-squared:  0.7088 
F-statistic: 1.352e+06 on 6 and 3332754 DF,  p-value: < 2.2e-16

Code

broom::glance(m_j)

# A tibble: 1 × 12
  r.squared adj.r.squared sigma statistic p.value    df   logLik     AIC     BIC
      <dbl>         <dbl> <dbl>     <dbl>   <dbl> <dbl>    <dbl>   <dbl>   <dbl>
1     0.709         0.709 0.132  1351850.       0     6 2029000. -4.06e6 -4.06e6
# ℹ 3 more variables: deviance <dbl>, df.residual <int>, nobs <int>

Code

d <- broom::tidy(m_j, conf.int = TRUE) %>% 
  arrange(desc(abs(statistic)))   # biggest |t| first
augment(m_j, data = df_scaled)

# A tibble: 3,332,761 × 12
   j_dissim log_ratio change_type z_universe_size[,1] z_aoo_1[,1]
      <dbl>     <dbl> <chr>                     <dbl>       <dbl>
 1    0.667     1.10  A_random                  -4.02      -0.531
 2    0.75      1.39  A_random                  -4.02      -0.531
 3    1         1.10  A_random                  -4.02      -0.531
 4    1         1.39  A_random                  -4.02      -0.531
 5    1         1.61  A_random                  -4.02      -0.531
 6    1         1.79  A_random                  -4.02      -0.531
 7    1         1.95  A_random                  -4.02      -0.531
 8    1         2.08  A_random                  -4.02      -0.531
 9    1         2.20  A_random                  -4.02      -0.531
10    0.5       0.693 A_random                  -4.02      -0.531
# ℹ 3,332,751 more rows
# ℹ 7 more variables: z_n_changes <dbl[,1]>, .fitted <dbl>, .resid <dbl>,
#   .hat <dbl>, .sigma <dbl>, .cooksd <dbl>, .std.resid <dbl>

Code

ggplot(d, 
       aes(estimate, term, xmin = conf.low, xmax = conf.high, height = 0)) +
  geom_point() +
  geom_vline(xintercept = 0, lty = 4) +
  geom_errorbarh()+
  ggthemes::theme_few()

log ratio model

Code

# log ratio parameter model
m_lr <-
  lm(
    log_ratio ~
      z_universe_size +
      z_aoo_1 *
        z_n_changes +
      change_type,
    data = df_scaled
  )

# log ratio results:
summary(m_lr)


Call:
lm(formula = log_ratio ~ z_universe_size + z_aoo_1 * z_n_changes + 
    change_type, data = df_scaled)

Residuals:
    Min      1Q  Median      3Q     Max 
-6.7296 -1.4053 -0.3464  1.2683  4.6171 

Coefficients:
                         Estimate Std. Error  t value Pr(>|t|)    
(Intercept)             2.5643723  0.0016258  1577.27   <2e-16 ***
z_universe_size         0.0824272  0.0010736    76.77   <2e-16 ***
z_aoo_1                -0.7632115  0.0009974  -765.24   <2e-16 ***
z_n_changes             0.1499282  0.0010489   142.94   <2e-16 ***
change_typefrom_0_to_1 -0.1653424  0.0023943   -69.06   <2e-16 ***
change_typefrom_1_to_0 -5.0906715  0.0023519 -2164.52   <2e-16 ***
z_aoo_1:z_n_changes    -0.4051572  0.0010108  -400.82   <2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 1.765 on 3332754 degrees of freedom
Multiple R-squared:  0.6849,    Adjusted R-squared:  0.6849 
F-statistic: 1.207e+06 on 6 and 3332754 DF,  p-value: < 2.2e-16

Code

broom::glance(m_lr)

# A tibble: 1 × 12
  r.squared adj.r.squared sigma statistic p.value    df    logLik     AIC    BIC
      <dbl>         <dbl> <dbl>     <dbl>   <dbl> <dbl>     <dbl>   <dbl>  <dbl>
1     0.685         0.685  1.76  1207248.       0     6 -6621586.  1.32e7 1.32e7
# ℹ 3 more variables: deviance <dbl>, df.residual <int>, nobs <int>

Code

e <- broom::tidy(m_lr, conf.int = TRUE) %>% 
  arrange(desc(abs(statistic)))

ggplot(e, 
       aes(estimate, term, xmin = conf.low, xmax = conf.high, height = 0)) +
  geom_point() +
  geom_vline(xintercept = 0, lty = 4) +
  geom_errorbarh()+
  ggthemes::theme_few()

Code

# tidy merged results
bind_rows(
  tidy(m_j)  %>% mutate(model = "J dissim ~ ..."),
  tidy(m_lr) %>% mutate(model = "log ratio ~ ...")
) %>%
  select(model, term, estimate, std.error, statistic, p.value) %>%
  kableExtra::kable()

model	term	estimate	std.error	statistic
J dissim ~ …	(Intercept)	0.8518821	0.0001213	7023.73433
J dissim ~ …	z_universe_size	0.0137158	0.0000801	171.24987
J dissim ~ …	z_aoo_1	-0.1553669	0.0000744	-2088.20530
J dissim ~ …	z_n_changes	0.0993003	0.0000782	1269.07146
J dissim ~ …	change_typefrom_0_to_1	0.0107563	0.0001786	60.22100
J dissim ~ …	change_typefrom_1_to_0	0.0647061	0.0001754	368.80429
J dissim ~ …	z_aoo_1:z_n_changes	0.0918062	0.0000754	1217.48742
log ratio ~ …	(Intercept)	2.5643723	0.0016258	1577.26832
log ratio ~ …	z_universe_size	0.0824272	0.0010736	76.77425
log ratio ~ …	z_aoo_1	-0.7632115	0.0009974	-765.23621
log ratio ~ …	z_n_changes	0.1499282	0.0010489	142.94027
log ratio ~ …	change_typefrom_0_to_1	-0.1653424	0.0023943	-69.05691
log ratio ~ …	change_typefrom_1_to_0	-5.0906715	0.0023519	-2164.51994
log ratio ~ …	z_aoo_1:z_n_changes	-0.4051572	0.0010108	-400.82237

5. Summary by change type

Notes: random-expanding: higher J dissim than random-decline (similar J for random-expanding to directional-expanding) random-declining: lower J dissim than directional-declining random-expanding: more positive log ratio than direcitonal-expanding random-declining: less negative log ratio than directional-decline

Code

simulations_final %>%
  group_by(change_type, trend) %>%
  summarise(
    across(c(j_dissim, log_ratio, change_int, rel_persist), 
           list(mean = mean, 
                sd = sd), 
           .names = "{.col}_{.fn}"),
    .groups = "drop"
  ) %>%
  mutate(across(3:10, \(x) round(x, 3)))

# A tibble: 5 × 10
  change_type trend     j_dissim_mean j_dissim_sd log_ratio_mean log_ratio_sd
  <chr>       <fct>             <dbl>       <dbl>          <dbl>        <dbl>
1 A_random    declining         0.569       0.29          -0.584        0.866
2 A_random    net_zero          0.363       0.285          0            0    
3 A_random    expanding         0.896       0.188          3.08         1.91 
4 from_0_to_1 expanding         0.895       0.242          2.68         1.79 
5 from_1_to_0 declining         0.895       0.242         -2.68         1.79 
# ℹ 4 more variables: change_int_mean <dbl>, change_int_sd <dbl>,
#   rel_persist_mean <dbl>, rel_persist_sd <dbl>

6. 3D Surface Plots

jaccard

Code

# make 3D surface
gam_mod <- gam(j_dissim ~ 
                 te(aoo_1, n_changes), 
               data = simulations_final %>% 
                 filter(change_type == "A_random" & universe_size == 100))


summary(gam_mod)


Family: gaussian 
Link function: identity 

Formula:
j_dissim ~ te(aoo_1, n_changes)

Parametric coefficients:
             Estimate Std. Error t value Pr(>|t|)    
(Intercept) 0.7778970  0.0001139    6827   <2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Approximate significance of smooth terms:
                    edf Ref.df      F p-value    
te(aoo_1,n_changes)  24     24 222969  <2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

R-sq.(adj) =  0.985   Deviance explained = 98.5%
GCV = 0.0010377  Scale est. = 0.0010374  n = 79897

Code

# Predict over a grid
grid_df <- expand.grid(
  aoo_1 = seq(1,100, length = 100),
  n_changes = seq(1,100, length = 100)
)

grid_df$pred_jaccard <- predict(gam_mod, newdata = grid_df)
range(grid_df$pred_jaccard)

[1] 0.01025627 1.00786810

Code

# Reshape into matrix for surface plot
z_matrix <- matrix(grid_df$pred_jaccard, nrow = 100, ncol = 100)

p <- plot_ly(
  x = ~ unique(grid_df$aoo_1),
  y = ~ unique(grid_df$n_changes),
  z = ~z_matrix,
  showlegend = FALSE
) %>%
  add_surface(opacity = 0.7, showscale = FALSE) %>%
  layout(
    title = "Jaccard Surface",
    scene = list(
      xaxis = list(title = "Initial Occupancy"),
      yaxis = list(title = "Number of Changes"),
      zaxis = list(title = "Predicted Jaccard")
    )
  )%>%
  config(
    toImageButtonOptions = list(
      format = "svg",
      filename = "3d_plot_J",
      width = 1000,
      height = 700
    )
  )

## add boxes outside of cube
# Compute axis limits
xlim <- range(grid_df$aoo_1)
ylim <- range(grid_df$n_changes)
zlim <- range(grid_df$pred_jaccard)

# Define cube corners
x_vals <- xlim
y_vals <- ylim
z_vals <- zlim

# Function to create lines between pairs of corners
add_box_edges <- function(plot) {
  lines <- list(
    # bottom rectangle
    list(x = x_vals, y = rep(y_vals[1], 2), z = rep(z_vals[1], 2)),
    list(x = x_vals, y = rep(y_vals[2], 2), z = rep(z_vals[1], 2)),
    list(x = rep(x_vals[1], 2), y = y_vals, z = rep(z_vals[1], 2)),
    list(x = rep(x_vals[2], 2), y = y_vals, z = rep(z_vals[1], 2)),

    # top rectangle
    list(x = x_vals, y = rep(y_vals[1], 2), z = rep(z_vals[2], 2)),
    list(x = x_vals, y = rep(y_vals[2], 2), z = rep(z_vals[2], 2)),
    list(x = rep(x_vals[1], 2), y = y_vals, z = rep(z_vals[2], 2)),
    list(x = rep(x_vals[2], 2), y = y_vals, z = rep(z_vals[2], 2)),

    # vertical lines
    list(x = rep(x_vals[1], 2), y = rep(y_vals[1], 2), z = z_vals),
    list(x = rep(x_vals[1], 2), y = rep(y_vals[2], 2), z = z_vals),
    list(x = rep(x_vals[2], 2), y = rep(y_vals[1], 2), z = z_vals),
    list(x = rep(x_vals[2], 2), y = rep(y_vals[2], 2), z = z_vals)
  )

  for (edge in lines) {
    plot <- plot %>% add_trace(
      type = "scatter3d",
      mode = "lines",
      x = edge$x,
      y = edge$y,
      z = edge$z,
      line = list(color = "black", width = 2),
      showlegend = FALSE
    )
  }

  return(plot)
}

# Add the box outlines
p_with_box <- add_box_edges(p)


# Add contours
fig <- p_with_box %>%
  add_surface(
   
    opacity = 0.7,
    contours = list(
      z = list(
        show = TRUE,
        usecolormap = TRUE,
        highlightcolor = "#ff0000",
        project = list(z = TRUE)
      )
    )
  )

fig %>%
  layout(scene = list(
    camera = list(
      eye = list(
        x = -1.25,
        y = 1.25,
        z = 1.25
      )
    )
  ))

log ratio

Code

# Create smoothed surface with GAM
gam_mod <- gam(log_ratio ~                  
                 te(aoo_1, n_changes), 
               data = simulations_final %>% 
                 filter(change_type == "A_random" & universe_size == 100))

summary(gam_mod)


Family: gaussian 
Link function: identity 

Formula:
log_ratio ~ te(aoo_1, n_changes)

Parametric coefficients:
             Estimate Std. Error t value Pr(>|t|)    
(Intercept) 1.3659201  0.0004397    3106   <2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Approximate significance of smooth terms:
                    edf Ref.df      F p-value    
te(aoo_1,n_changes)  24     24 574834  <2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

R-sq.(adj) =  0.994   Deviance explained = 99.4%
GCV = 0.015455  Scale est. = 0.01545   n = 79897

Code

# Predict over a grid
grid_df <- expand.grid(
  aoo_1 = seq(1,100, length = 100),
  n_changes = seq(1,100, length = 100)
)

grid_df$pred_lr <- predict(gam_mod, newdata = grid_df)

# Reshape into matrix for surface plot
z_matrix <- matrix(grid_df$pred_lr, nrow = 100, ncol = 100)

p <- plot_ly(
  x = ~ unique(grid_df$aoo_1),
  y = ~ unique(grid_df$n_changes),
  z = ~z_matrix
) %>%
  add_surface(opacity = 0.7,
              showscale = FALSE) %>%
  layout(
    title = "Log Ratio Surface",
    scene = list(
      xaxis = list(title = "Initial Occupancy"),
      yaxis = list(title = "Number of Changes"),
      zaxis = list(title = "Predicted log ratio")
    )
  )%>%
  config(
    toImageButtonOptions = list(
      format = "svg",
      filename = "3d_plot_J",
      width = 1000,
      height = 700
    )
  )



## add boxes outside of cube
# Compute axis limits
xlim <- range(grid_df$aoo_1)
ylim <- range(grid_df$n_changes)
zlim <- range(grid_df$pred_lr)

# Define cube corners
x_vals <- xlim
y_vals <- ylim
z_vals <- zlim

# Function to create lines between pairs of corners
add_box_edges <- function(plot) {
  lines <- list(
    # bottom rectangle
    list(x = x_vals, y = rep(y_vals[1], 2), z = rep(z_vals[1], 2)),
    list(x = x_vals, y = rep(y_vals[2], 2), z = rep(z_vals[1], 2)),
    list(x = rep(x_vals[1], 2), y = y_vals, z = rep(z_vals[1], 2)),
    list(x = rep(x_vals[2], 2), y = y_vals, z = rep(z_vals[1], 2)),

    # top rectangle
    list(x = x_vals, y = rep(y_vals[1], 2), z = rep(z_vals[2], 2)),
    list(x = x_vals, y = rep(y_vals[2], 2), z = rep(z_vals[2], 2)),
    list(x = rep(x_vals[1], 2), y = y_vals, z = rep(z_vals[2], 2)),
    list(x = rep(x_vals[2], 2), y = y_vals, z = rep(z_vals[2], 2)),

    # vertical lines
    list(x = rep(x_vals[1], 2), y = rep(y_vals[1], 2), z = z_vals),
    list(x = rep(x_vals[1], 2), y = rep(y_vals[2], 2), z = z_vals),
    list(x = rep(x_vals[2], 2), y = rep(y_vals[1], 2), z = z_vals),
    list(x = rep(x_vals[2], 2), y = rep(y_vals[2], 2), z = z_vals)
  )

  for (edge in lines) {
    plot <- plot %>% add_trace(
      type = "scatter3d",
      mode = "lines",
      x = edge$x,
      y = edge$y,
      z = edge$z,
      line = list(color = "black", width = 2),
      showlegend = FALSE
    )
  }

  return(plot)
}

# Add the box outlines
p_with_box <- add_box_edges(p)
p_with_box

Code

# Add contours
fig <- p_with_box %>%
  add_surface(
    opacity = 0.7,
    contours = list(
      z = list(
        show = TRUE,
        usecolormap = TRUE,
        highlightcolor = "#ff0000",
        project = list(z = TRUE)
      )
    )
  )


fig %>%
  layout(
    scene = list(
      camera = list(
        eye = list(x = -1.25, y = 1.25, z = 1.25)
      )
    )
  )

7. Data summary

Code

# quick check:
simulations_final %>%
  filter(change_type == "A_random") %>%
  group_by(universe_size, aoo_1, n_changes, b, c) %>%
  summarize(
    mean_j = mean(j_dissim),
    sd_j = sd(j_dissim),
    n = n()
  )

`summarise()` has grouped output by 'universe_size', 'aoo_1', 'n_changes', 'b'.
You can override using the `.groups` argument.

# A tibble: 123,891 × 8
# Groups:   universe_size, aoo_1, n_changes, b [123,891]
   universe_size aoo_1 n_changes     b     c mean_j  sd_j     n
           <dbl> <int>     <dbl> <int> <int>  <dbl> <dbl> <int>
 1            10     1         1     1     0  0.5       0    92
 2            10     1         2     2     0  0.667     0    83
 3            10     1         3     2     1  1         0    32
 4            10     1         3     3     0  0.75      0    68
 5            10     1         4     3     1  1         0    51
 6            10     1         4     4     0  0.8       0    49
 7            10     1         5     4     1  1         0    56
 8            10     1         5     5     0  0.833     0    44
 9            10     1         6     5     1  1         0    65
10            10     1         6     6     0  0.857     0    35
# ℹ 123,881 more rows

Code

# --> for directional change, there is no variation in J.
simulations_final %>%
  filter(change_type == "A_random" & universe_size == 1000) %>%
  group_by(trend) %>%
  skimr::skim()

Data summary
Name	Piped data
Number of rows	1100000
Number of columns	16
_______________________
Column type frequency:
character	1
factor	1
numeric	13
________________________
Group variables	trend

Variable type: character

skim_variable	trend	complete_rate	min	max	n_unique
change_type	declining	1	8	8	1
change_type	net_zero	1	8	8	1
change_type	expanding	1	8	8	1

Variable type: factor

skim_variable	trend	complete_rate	ordered	n_unique	top_counts
change_type2	declining	1	FALSE	1	A_r: 177637, dir: 0
change_type2	net_zero	1	FALSE	1	A_r: 2376, dir: 0
change_type2	expanding	1	FALSE	1	A_r: 919987, dir: 0

Variable type: numeric

skim_variable	trend	complete_rate	mean	sd	p0	p25	p50	p75	p100	hist
run	declining	1	50.48	28.88	1.00	25.00	51.00	75.00	100.00	▇▇▇▇▇
run	net_zero	1	50.12	29.16	1.00	25.00	49.00	76.00	100.00	▇▇▇▇▇
run	expanding	1	50.50	28.86	1.00	26.00	50.00	76.00	100.00	▇▇▇▇▇
universe_size	declining	1	1000.00	0.00	1000.00	1000.00	1000.00	1000.00	1000.00	▁▁▇▁▁
universe_size	net_zero	1	1000.00	0.00	1000.00	1000.00	1000.00	1000.00	1000.00	▁▁▇▁▁
universe_size	expanding	1	1000.00	0.00	1000.00	1000.00	1000.00	1000.00	1000.00	▁▁▇▁▁
n_changes	declining	1	524.12	287.63	1.00	279.00	536.00	776.00	1000.00	▆▇▇▇▇
n_changes	net_zero	1	272.53	249.35	2.00	46.00	202.00	456.50	934.00	▇▃▂▂▁
n_changes	expanding	1	496.53	288.52	1.00	246.00	494.00	746.00	1000.00	▇▇▇▇▇
j_dissim	declining	1	0.57	0.29	0.00	0.33	0.61	0.83	1.00	▅▅▅▆▇
j_dissim	net_zero	1	0.37	0.28	0.00	0.09	0.33	0.62	0.95	▇▅▃▃▃
j_dissim	expanding	1	0.90	0.19	0.00	0.90	0.98	1.00	1.00	▁▁▁▁▇
aoo_1	declining	1	785.93	241.96	16.00	512.00	999.00	999.00	999.00	▁▁▆▁▇
aoo_1	net_zero	1	486.47	89.11	8.00	512.00	512.00	512.00	512.00	▁▁▁▁▇
aoo_1	expanding	1	66.78	103.98	1.00	4.00	16.00	64.00	999.00	▇▁▁▁▁
aoo_2	declining	1	497.02	216.44	1.00	442.00	495.00	555.00	998.00	▂▂▇▂▂
aoo_2	net_zero	1	486.47	89.11	8.00	512.00	512.00	512.00	512.00	▁▁▁▁▇
aoo_2	expanding	1	500.98	257.25	2.00	294.00	510.00	707.00	1000.00	▅▇▇▇▅
a	declining	1	379.41	272.47	0.00	154.00	327.00	555.00	998.00	▇▇▅▃▃
a	net_zero	1	350.21	129.03	7.00	253.00	377.00	467.00	511.00	▁▂▃▃▇
a	expanding	1	35.61	70.73	0.00	1.00	7.00	33.00	999.00	▇▁▁▁▁
b	declining	1	117.60	161.80	0.00	0.00	1.00	245.00	488.00	▇▁▁▁▂
b	net_zero	1	136.26	124.68	1.00	23.00	101.00	228.25	467.00	▇▃▂▂▁
b	expanding	1	465.37	277.38	1.00	225.00	458.00	697.00	999.00	▇▇▇▇▆
c	declining	1	406.51	259.11	1.00	203.00	374.00	556.00	999.00	▇▇▆▃▃
c	net_zero	1	136.26	124.68	1.00	23.00	101.00	228.25	467.00	▇▃▂▂▁
c	expanding	1	31.16	54.59	0.00	1.00	7.00	32.00	470.00	▇▁▁▁▁
d	declining	1	96.47	143.41	0.00	0.00	1.00	175.00	984.00	▇▂▁▁▁
d	net_zero	1	377.26	174.13	21.00	259.75	387.00	465.00	991.00	▃▇▇▁▁
d	expanding	1	467.86	274.44	0.00	234.00	459.00	695.00	998.00	▇▇▇▇▆
log_ratio	declining	1	-0.58	0.88	-6.91	-0.81	-0.12	-0.04	0.00	▁▁▁▁▇
log_ratio	net_zero	1	0.00	0.00	0.00	0.00	0.00	0.00	0.00	▁▁▇▁▁
log_ratio	expanding	1	3.16	1.92	0.00	1.51	3.10	4.73	6.91	▇▇▇▇▅
rel_persist	declining	1	0.38	0.27	0.00	0.15	0.33	0.56	1.00	▇▇▅▃▃
rel_persist	net_zero	1	0.35	0.13	0.01	0.25	0.38	0.47	0.51	▁▂▃▃▇
rel_persist	expanding	1	0.04	0.07	0.00	0.00	0.01	0.03	1.00	▇▁▁▁▁
change_int	declining	1	0.76	0.51	0.00	0.35	0.68	1.00	1.95	▇▇▆▂▃
change_int	net_zero	1	0.53	0.48	0.00	0.09	0.39	0.89	1.82	▇▃▂▂▁
change_int	expanding	1	108.61	191.28	0.00	4.33	22.25	113.50	1000.00	▇▁▁▁▁

8. J by net area trend

for SI (Fig S9)

Code

# Boxplot for j_dissim
ggplot(simulations_final, aes(x = trend, y = j_dissim, fill = trend)) +
  geom_hline(yintercept = 0.5, col = "darkgrey") +
  geom_boxplot(outlier.colour = "darkgrey") +
  ggthemes::theme_par() +
  labs(title = "Jaccard Dissimilarity by Trend", 
       x = "Trend", 
       y = "j_dissim") +
  facet_grid(universe_size ~ change_type2) +
  scale_fill_manual(values = c("#d7191c","#ffffbf", "#1a9641")) +
  theme(
    axis.text.x = element_text(
      angle = 45,
      vjust = 0.9,
      hjust = 1
    )
  )

Code

# ggsave(filename = "C_02_J_simulations_j_trends_boxplots.bmp", path = here::here("Figures/C_validation/"), width = 10, height = 12)

9. log ratio boxplot

For SI (Fig S8)

Code

# Boxplot for log_ratio
ggplot(simulations_final, aes(x = trend, y = log_ratio, fill = trend)) +
  geom_hline(yintercept = 0, col = "darkgrey") +
  geom_boxplot(outlier.colour = "darkgrey") +
  ggthemes::theme_par() +
  labs(title = "Log Ratio by Trend", x = "Trend", y = "log_ratio") +
  facet_grid(universe_size ~ change_type2) +
  scale_fill_manual(values = c("#d7191c","#ffffbf", "#1a9641")) +
  theme(
    axis.text.x = element_text(
      angle = 45,
      vjust = 0.9,
      hjust = 1
    )
  )

Code

# ggsave(filename = "C_02_J_simulations_logRatio_trends_boxplots.bmp", path = here::here("Figures/C_validation/"), width = 10, height = 12)

Code

simulations_final %>%
  filter(change_type != "random") %>%
  filter(j_dissim != 1) %>%
  ggplot() +
  geom_histogram(aes(j_dissim))

`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Code

simulations_final %>%
  filter(change_type == "A_random") %>%
  ggplot(
    aes(
      x = log_ratio,
      y = j_dissim
    ),
    alpha = 0.5,
    cex = 0.1
  ) +
  geom_point() +
  facet_wrap(~universe_size) +
  ggthemes::theme_par()

Code

# Mean values (faster)
simulations_final %>%
  group_by(universe_size, change_type, n_changes) %>%
  summarize(
    mean_a = mean(a),
    mean_b = mean(b),
    mean_c = mean(c),
    mean_d = mean(d),
    mean_j = mean(j_dissim),
    mean_log = mean(log_ratio)
  ) %>%
  ggplot() +
  geom_point(
    aes(
      x = log(n_changes),
      y = mean_j
    )
  ) +
  facet_wrap(~ interaction(universe_size, change_type), scales = "free_x") +
  ggthemes::theme_few()

`summarise()` has grouped output by 'universe_size', 'change_type'. You can
override using the `.groups` argument.

Code

library(mgcv)

# fit gam
gam_mod <- gam(j_dissim ~ s(a) + s(b) + s(c) + s(d) + s(universe_size, k = 3) + change_type, data = simulations_final %>% filter(run < 3))

# plot smoothed effects
plot(gam_mod, pages = 1)

Code

#install.packages("gratia") # if not already installed
library(gratia)

Warning: Paket 'gratia' wurde unter R Version 4.4.3 erstellt


Attache Paket: 'gratia'

Das folgende Objekt ist maskiert 'package:dials':

    penalty

Das folgende Objekt ist maskiert 'package:terra':

    draw

Das folgende Objekt ist maskiert 'package:stringr':

    boundary

Code

draw(gam_mod)