Simulating temporal J dissimilarity & log Ratio under different scenarios of scale, occupancy, temporal change magnitude
Parameters to simulate:
universe size
occu in time 1
flip type: directional/random
number of changes
number of runs
⚠️Preparations
📚 Libraries
Code
suppressPackageStartupMessages({source(here::here("Code/00_Configuration.R")) package_list <-c( package_list,"skimr","ggthemes","plotly","ggplot2","mgcv","GGally","corrr","broom" )lapply(package_list, require, character =TRUE)})rm(list =ls())
🔍 Jaccard formula
Code
my_jaccard_abc <-function(set1, set2) { a <-length(intersect(set1, set2)) b <-length(setdiff(set2, set1)) c <-length(setdiff(set1, set2)) res <-data.frame(j = a / (a + b + c),intersect = a,unique_1 = c,unique2 = b )return(res)}
❇️ Simulation function
Code
set.seed(123) # For reproducibilitysimulate_jaccard <-function(lengths, occu1, flip_types, n_runs =1) {# Expand grid for parameter combinations params <-expand.grid(lengths, occu1, flip_types, stringsAsFactors =FALSE)colnames(params) <-c("length", "occu1", "flip_type")# Initialize storage jaccard_results <-list()# Loop through parameter combinationsfor (row_i inseq_len(nrow(params))) { length_i <- params$length[row_i] occu_1_i <- params$occu1[row_i] flip_type <- params$flip_type[row_i]# Ensure the number of occupied sites does not exceed the vector length occu_1_i <-min(occu_1_i, length_i)# Repeat the simulation `n_runs` timesfor (run inseq_len(n_runs)) {# Generate set1 (initially occupied sites) set1_i <-sample(1:length_i, occu_1_i, replace =FALSE)# Sequence of number of changes (from 1 to max_vector_length) n_changes <-seq(from =1, to = length_i, by =1)# Dataframe to store results for this run jaccard_temp <-data.frame(num_changes = n_changes, jaccard =NA, run = run) set2_temp_list <-vector("list", length(n_changes))# Modify set1 stepwise and store set2 statesfor (n_i inseq_along(n_changes)) { set2_temp <- set1_i changes_i <- n_changes[n_i]if (flip_type =="random") { flip_sites <-sample(1:length_i, size = changes_i, replace =FALSE) set2_i <-union( # Combine left overs after 1 to 0 flips with new cells from 0 to 1 flipssetdiff(set2_temp, flip_sites), # Remove indices in `flip_sites` already in `set2_temp`intersect(flip_sites, setdiff(1:length_i, set2_temp)) # Add indices not in `set2_temp` (0 to 1; colonizations) ) } elseif (flip_type =="from_1_to_0") { sample_mat <-if (length(set1_i) ==1) rep(set1_i, 2) else set1_i flip_sites <-sample(x = sample_mat, size =min(changes_i, length(set1_i)), replace =FALSE) set2_i <-setdiff(set2_temp, flip_sites) # Remove flipped sites from set1 set2_i <-if (length(set2_i) ==0) NAelse set2_i }# Store modified set2 set2_temp_list[[n_i]] <- set2_i# Compute Jaccard similarity jaccard_res <-my_jaccard_abc(set1_i, set2_i) jaccard_temp$jaccard[n_i] <- jaccard_res$j jaccard_temp$intersect[n_i] <- jaccard_res$intersect jaccard_temp$unique_1[n_i] <- jaccard_res$unique_1 jaccard_temp$unique_2[n_i] <- jaccard_res$unique_2 jaccard_temp$N_occu_1[n_i] <- jaccard_res$N_occu_1 jaccard_temp$N_occu_2[n_i] <- jaccard_res$N_occu_2 jaccard_temp$length[n_i] <- length_i jaccard_temp$occu1[n_i] <- occu_1_i jaccard_temp$flip_type[n_i] <- flip_type }# Append results for this run jaccard_results[[length(jaccard_results) +1]] <- jaccard_temp } }# Combine Jaccard results into a single dataframe for visualization jaccard_df <-do.call(rbind, jaccard_results)return(jaccard_df)}
🪄 ️Run simulation
Code
# Parameterslengths <-c(10, 100, 1000)occu1 <-c(2^(0:floor(log2(1000))), 999)flip_types <-c("random", "from_1_to_0")n_runs <-100# Repeat each parameter combination 100 times# Run simulationsjaccard_df <-simulate_jaccard(lengths, occu1, flip_types, n_runs)# save as rdssaveRDS(jaccard_df, here("Data/output/temp","C_02_Jaccard_simulations_100.rds"))
🦾 Prepare analysis data
organize data frame for analysis
Code
sim_df <- jaccard_df %>%filter( N_occu_1 !=0, N_occu_2 !=0,!(N_occu_1 ==0& N_occu_2 ==0),!(N_occu_1 ==1& N_occu_2 ==1) ) %>%mutate(universe_size = length,change_type =factor(flip_type),n_changes = num_changes,j_dissim =1- jaccard,aoo_1 = N_occu_1,aoo_2 = N_occu_2,a = intersect,b = unique_2,c = unique_1,d = universe_size - a - b - c,log_ratio =log(aoo_2 / aoo_1),rel_persist = a / universe_size,change_int = (b + c) / aoo_1,trend =factor(case_when( b > c ~"expanding", b < c ~"declining",TRUE~"net_zero" ),levels =c("declining", "net_zero", "expanding") ) ) %>%select(run, universe_size, change_type, n_changes, j_dissim, aoo_1, aoo_2, a, b, c, d, log_ratio, rel_persist, change_int, trend)
create expanding trend for directional simulations from declining simulations (because it is symmetrical, we can just switch up aoo_1 and aoo_2 and recalculate change)
Code
expanding <- sim_df %>%filter(change_type =="from_1_to_0") %>%mutate(change_type ="from_0_to_1") %>%rename("b"="c", #switch up number of colonizations (b) and extirpations (c)"c"="b","aoo_1"="aoo_2", # switch up aoo 1 and aoo 2"aoo_2"="aoo_1" ) %>%mutate(log_ratio =log(aoo_2 / aoo_1)) %>%# re-calculate log ratiomutate(change_int = (b + c) / aoo_1) %>%# re-calculate change intensitymutate(trend =as.factor(case_when( b > c ~"expanding", b < c ~"declining", b == c ~"net_zero",# added A to sort levels for regression.default =NA )) )skimr::skim(expanding)
Below we see that: - we have the highest correlation between Jaccard dissim and relative persistence (r=-0.86), or a (r=-0.82) –> persistence drives J dissim - aoo 1 is also important (r=-0.62) –> the higher the initial occupancy, the lower the dissimilarity - surprisingly, d shows also a high correlation (r=0.39 - similarly high as the correlation with n_changes r = 0.42) –> indicating that the number of unoccupied cells does play an important role in determining the magnitude of change that can occur
in contrast: log ratio is driven by change intensity (r=0.58), aoo 2 (r=0.42), c (r=-0.42) and b (r=0.56), while persistence plays a subordinate role (r=-0.12; r=-0.11 foe rel persistence and a respectively)
again, d is also weakly correlated (r=-0.14)
Code
# Select numeric columns and compute correlation with targetscor_df_focus <- simulations_final %>%select(-run) %>%select(where(is.numeric)) %>%correlate() %>% corrr::focus(j_dissim, log_ratio) %>%mutate(j_dissim =round(j_dissim, 2),log_ratio =round(log_ratio, 2) )# print correlations with log ratio and jkableExtra::kable(cor_df_focus)
Notes: random-expanding: higher J dissim than random-decline (similar J for random-expanding to directional-expanding) random-declining: lower J dissim than directional-declining random-expanding: more positive log ratio than direcitonal-expanding random-declining: less negative log ratio than directional-decline
# make 3D surfacegam_mod <-gam(j_dissim ~te(aoo_1, n_changes), data = simulations_final %>%filter(change_type =="A_random"& universe_size ==100))summary(gam_mod)
# --> for directional change, there is no variation in J.simulations_final %>%filter(change_type =="A_random"& universe_size ==1000) %>%group_by(trend) %>% skimr::skim()
Data summary
Name
Piped data
Number of rows
1100000
Number of columns
16
_______________________
Column type frequency:
character
1
factor
1
numeric
13
________________________
Group variables
trend
Variable type: character
skim_variable
trend
n_missing
complete_rate
min
max
empty
n_unique
whitespace
change_type
declining
0
1
8
8
0
1
0
change_type
net_zero
0
1
8
8
0
1
0
change_type
expanding
0
1
8
8
0
1
0
Variable type: factor
skim_variable
trend
n_missing
complete_rate
ordered
n_unique
top_counts
change_type2
declining
0
1
FALSE
1
A_r: 177637, dir: 0
change_type2
net_zero
0
1
FALSE
1
A_r: 2376, dir: 0
change_type2
expanding
0
1
FALSE
1
A_r: 919987, dir: 0
Variable type: numeric
skim_variable
trend
n_missing
complete_rate
mean
sd
p0
p25
p50
p75
p100
hist
run
declining
0
1
50.48
28.88
1.00
25.00
51.00
75.00
100.00
▇▇▇▇▇
run
net_zero
0
1
50.12
29.16
1.00
25.00
49.00
76.00
100.00
▇▇▇▇▇
run
expanding
0
1
50.50
28.86
1.00
26.00
50.00
76.00
100.00
▇▇▇▇▇
universe_size
declining
0
1
1000.00
0.00
1000.00
1000.00
1000.00
1000.00
1000.00
▁▁▇▁▁
universe_size
net_zero
0
1
1000.00
0.00
1000.00
1000.00
1000.00
1000.00
1000.00
▁▁▇▁▁
universe_size
expanding
0
1
1000.00
0.00
1000.00
1000.00
1000.00
1000.00
1000.00
▁▁▇▁▁
n_changes
declining
0
1
524.12
287.63
1.00
279.00
536.00
776.00
1000.00
▆▇▇▇▇
n_changes
net_zero
0
1
272.53
249.35
2.00
46.00
202.00
456.50
934.00
▇▃▂▂▁
n_changes
expanding
0
1
496.53
288.52
1.00
246.00
494.00
746.00
1000.00
▇▇▇▇▇
j_dissim
declining
0
1
0.57
0.29
0.00
0.33
0.61
0.83
1.00
▅▅▅▆▇
j_dissim
net_zero
0
1
0.37
0.28
0.00
0.09
0.33
0.62
0.95
▇▅▃▃▃
j_dissim
expanding
0
1
0.90
0.19
0.00
0.90
0.98
1.00
1.00
▁▁▁▁▇
aoo_1
declining
0
1
785.93
241.96
16.00
512.00
999.00
999.00
999.00
▁▁▆▁▇
aoo_1
net_zero
0
1
486.47
89.11
8.00
512.00
512.00
512.00
512.00
▁▁▁▁▇
aoo_1
expanding
0
1
66.78
103.98
1.00
4.00
16.00
64.00
999.00
▇▁▁▁▁
aoo_2
declining
0
1
497.02
216.44
1.00
442.00
495.00
555.00
998.00
▂▂▇▂▂
aoo_2
net_zero
0
1
486.47
89.11
8.00
512.00
512.00
512.00
512.00
▁▁▁▁▇
aoo_2
expanding
0
1
500.98
257.25
2.00
294.00
510.00
707.00
1000.00
▅▇▇▇▅
a
declining
0
1
379.41
272.47
0.00
154.00
327.00
555.00
998.00
▇▇▅▃▃
a
net_zero
0
1
350.21
129.03
7.00
253.00
377.00
467.00
511.00
▁▂▃▃▇
a
expanding
0
1
35.61
70.73
0.00
1.00
7.00
33.00
999.00
▇▁▁▁▁
b
declining
0
1
117.60
161.80
0.00
0.00
1.00
245.00
488.00
▇▁▁▁▂
b
net_zero
0
1
136.26
124.68
1.00
23.00
101.00
228.25
467.00
▇▃▂▂▁
b
expanding
0
1
465.37
277.38
1.00
225.00
458.00
697.00
999.00
▇▇▇▇▆
c
declining
0
1
406.51
259.11
1.00
203.00
374.00
556.00
999.00
▇▇▆▃▃
c
net_zero
0
1
136.26
124.68
1.00
23.00
101.00
228.25
467.00
▇▃▂▂▁
c
expanding
0
1
31.16
54.59
0.00
1.00
7.00
32.00
470.00
▇▁▁▁▁
d
declining
0
1
96.47
143.41
0.00
0.00
1.00
175.00
984.00
▇▂▁▁▁
d
net_zero
0
1
377.26
174.13
21.00
259.75
387.00
465.00
991.00
▃▇▇▁▁
d
expanding
0
1
467.86
274.44
0.00
234.00
459.00
695.00
998.00
▇▇▇▇▆
log_ratio
declining
0
1
-0.58
0.88
-6.91
-0.81
-0.12
-0.04
0.00
▁▁▁▁▇
log_ratio
net_zero
0
1
0.00
0.00
0.00
0.00
0.00
0.00
0.00
▁▁▇▁▁
log_ratio
expanding
0
1
3.16
1.92
0.00
1.51
3.10
4.73
6.91
▇▇▇▇▅
rel_persist
declining
0
1
0.38
0.27
0.00
0.15
0.33
0.56
1.00
▇▇▅▃▃
rel_persist
net_zero
0
1
0.35
0.13
0.01
0.25
0.38
0.47
0.51
▁▂▃▃▇
rel_persist
expanding
0
1
0.04
0.07
0.00
0.00
0.01
0.03
1.00
▇▁▁▁▁
change_int
declining
0
1
0.76
0.51
0.00
0.35
0.68
1.00
1.95
▇▇▆▂▃
change_int
net_zero
0
1
0.53
0.48
0.00
0.09
0.39
0.89
1.82
▇▃▂▂▁
change_int
expanding
0
1
108.61
191.28
0.00
4.33
22.25
113.50
1000.00
▇▁▁▁▁
8. J by net area trend
for SI (Fig S9)
Code
# Boxplot for j_dissimggplot(simulations_final, aes(x = trend, y = j_dissim, fill = trend)) +geom_hline(yintercept =0.5, col ="darkgrey") +geom_boxplot(outlier.colour ="darkgrey") + ggthemes::theme_par() +labs(title ="Jaccard Dissimilarity by Trend", x ="Trend", y ="j_dissim") +facet_grid(universe_size ~ change_type2) +scale_fill_manual(values =c("#d7191c","#ffffbf", "#1a9641")) +theme(axis.text.x =element_text(angle =45,vjust =0.9,hjust =1 ) )