Code
library(here)
library(dplyr)
library(tidyr)
library(ggplot2)
library(skimr)
library(sf)
source(here::here("Code/00_Configuration.R"))library(here)
library(dplyr)
library(tidyr)
library(ggplot2)
library(skimr)
library(sf)
source(here::here("Code/00_Configuration.R"))Get raw species lists with filtering columns:
Raw species data (including columns to filter just to check if other species were lost)
dta0 <-
readRDS(here("Data/output/1_data_sf.rds")) %>%
st_drop_geometry() %>%
filter(scalingID == 1 & cells_keep == 1) %>%
distinct(datasetID, samplingPeriodID, verbatimIdentification, scientificName,
introduced, sp_remove_expert, sp_sampling_repeats, species_keep) %>%
filter(!is.na(verbatimIdentification)) %>%
distinct(datasetID, samplingPeriodID, verbatimIdentification, species_keep, .keep_all = TRUE) %>%
mutate(sp_sampling_repeats = case_when(is.na(sp_sampling_repeats) ~ 0,
!is.na(sp_sampling_repeats) ~ sp_sampling_repeats,
.default = sp_sampling_repeats)) %>%
distinct()let’s match first those with the same (approx) taxonomy
range_size <-
readRDS(here("Data/output/A_predictors/RangeSizes.rds"))
avonet <-
readRDS(here("Data/output/A_predictors/Avonet.rds"))
iucn <-
readRDS(here("Data/output/A_predictors/IUCN_2025_03_25.rds")) %>%
# manually fix non matches:
mutate(
code = case_when(scientificName %in%
c("Morus bassanus", "Agropsar philippensis", "Tadorna tadorna") ~ "LC",
.default = code))
phylo_dist <-
readRDS(here("Data/output/A_predictors/Phylo_distinct.rds")) # Phylogenetic distinctiveness
species_predictors <-
full_join(range_size, avonet, relationship = "many-to-many") %>%
full_join(iucn, relationship = "many-to-many") %>%
full_join(phylo_dist, relationship = "many-to-many") %>%
distinct()Check NAs
colSums(is.na(species_predictors)) scientificName GlobRangeSize_km2 verbatimIdentification
0 3 0
Mass Habitat Migration
12 12 12
Primary.Lifestyle code pd
12 2 12
Merge species with traits
species_predictors2 <- dta0 %>%
filter() %>%
left_join(species_predictors, relationship = "many-to-many") %>%
rename("IUCN" = "code")now let’s merge those with a similar taxonomy
big_table <-
readRDS(here("Data/output/A_predictors/Big_table.rds"))
geometry <-
readRDS(here("Data/output/A_predictors/Range_geometries.rds")) %>% # Species ranges, Atlas geometry
select(datasetID, samplingPeriodID, verbatimIdentification, circNorm, minDist_toBorder_centr)
sac_metrics <-
readRDS(here("Data/output/A_predictors/Spatial_auto.rds")) %>%
select(datasetID, samplingPeriodID, verbatimIdentification,joincount_delta)lacunarity <-
readRDS(here("Data/output/A_predictors/Lacunarity.rds")) %>%
ungroup() %>%
select(-name) %>%
mutate(samplingPeriodID = as.numeric(as.character(samplingPeriodID)),
datasetID = as.numeric(as.character(datasetID))) %>%
mutate(verbatimIdentification = gsub("_", " ", verbatimIdentification)) %>%
as.data.frame()
names(lacunarity) <- c("r", "ln(r)", "lac", "ln(lac)", "datasetID", "samplingPeriodID", "verbatimIdentification")Calculate mean across increasing window sizes
mean_lac <- lacunarity %>%
group_by(datasetID, samplingPeriodID, verbatimIdentification) %>%
summarize(mean_lnLac = mean(`ln(lac)`, na.rm = TRUE)) %>%
mutate(
verbatimIdentification = case_when(verbatimIdentification == "Fringilla moringilla" ~ "Fringilla montifringilla",
verbatimIdentification == "Moringilla nivalis" ~ "Montifringilla nivalis",
.default = verbatimIdentification)
)quick check on lacunarity data
mean_lac %>%
group_by(datasetID, samplingPeriodID) %>%
summarize(n_sp = n_distinct(verbatimIdentification))# A tibble: 8 × 3
# Groups: datasetID [4]
datasetID samplingPeriodID n_sp
<dbl> <dbl> <int>
1 5 1 209
2 5 2 216
3 6 1 244
4 6 2 251
5 13 1 226
6 13 2 248
7 26 1 432
8 26 2 446
Not matched:
setdiff(mean_lac$verbatimIdentification, dta0$verbatimIdentification) #7[1] "Apalopteron familiare" "Chloris sinica kittlitzi"
[3] "Oceanodroma tristrami" "Phoebastria albatrus"
[5] "Phoebastria nigripes" "Phylloscopus ijimae"
[7] "Turdus celaenops"
setdiff(dta0$verbatimIdentification,mean_lac$verbatimIdentification) #0character(0)
predictors <-
species_predictors2 %>%
full_join(big_table, relationship =
"many-to-many") %>%
full_join(sac_metrics, relationship =
"many-to-many") %>%
full_join(geometry, relationship =
"many-to-many") %>%
left_join(mean_lac, relationship =
"many-to-many") %>%
distinct(datasetID, verbatimIdentification, samplingPeriodID,
.keep_all = TRUE) %>%
mutate(
across(
where(is.character) & !matches("verbatimIdentification") & !matches("scientificName"),
as.factor)) %>%
mutate(
across(c("datasetID","samplingPeriodID",
"Habitat", "IUCN",
"Migration", "Primary.Lifestyle",
"introduced", "sp_remove_expert", "sp_sampling_repeats", "species_keep"),
as.factor)) %>%
distinct(datasetID, samplingPeriodID, verbatimIdentification, species_keep,
.keep_all = TRUE) %>%
ungroup() %>%
mutate(Habitat_5 = as.factor(case_when(Habitat %in% c("Grassland", "Shrubland", "Desert", "Rock") ~ "open",
Habitat %in% c("Woodland", "Forest") ~ "closed",
Habitat %in% c("Coastal", "Marine") ~ "marine",
Habitat %in% c("Wetland", "Riverine") ~ "freshwater",
Habitat == "Human Modified" ~ "human",
.default = NA_character_
))) %>%
mutate(Generalism = as.factor(case_when(Primary.Lifestyle == "Generalist" ~ 1,
.default = 0
))) %>%
mutate(Threatened = as.factor(case_when(IUCN %in% c("LC") ~ 0,
is.na(IUCN) ~ NA,
.default = 1
))) %>%
select(-Habitat, -Primary.Lifestyle, -IUCN)Check predictors
predictors %>%
glimpse()Rows: 2,264
Columns: 38
$ datasetID <fct> 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,…
$ samplingPeriodID <fct> 2, 2, 2, 1, 1, 1, 2, 1, 2, 2, 1, 1, 1, 2, 1, 2,…
$ verbatimIdentification <chr> "Nucifraga caryocatactes", "Anas platyrhynchos"…
$ scientificName <chr> "Nucifraga caryocatactes", "Anas platyrhynchos"…
$ introduced <fct> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
$ sp_remove_expert <fct> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
$ sp_sampling_repeats <fct> 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,…
$ species_keep <fct> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…
$ GlobRangeSize_km2 <dbl> 15439515, 39656070, 20386088, 18362593, 1185125…
$ Mass <dbl> 182.51, 843.42, 701.17, 2926.00, 16.00, 21.39, …
$ Migration <fct> 2, 2, 2, 3, 1, 1, 3, 3, 1, 2, 3, 3, 1, 3, 1, 3,…
$ pd <dbl> 8.132575, 1.573267, 3.241734, 25.282865, 8.5072…
$ Total_area_samp <dbl> 78308.81, 78308.81, 78308.81, 78308.81, 78308.8…
$ Total_Ncells <dbl> 671, 671, 671, 671, 671, 671, 671, 671, 671, 67…
$ Total_Ncells_samp <dbl> 628, 628, 628, 628, 628, 628, 628, 628, 628, 62…
$ AOO <dbl> 41707.68, 76499.91, 57486.17, 48681.68, 77328.6…
$ occ_Ncells <dbl> 339, 609, 449, 390, 618, 605, 452, 573, 599, 62…
$ rel_occ_Ncells <dbl> 0.540, 0.970, 0.715, 0.621, 0.984, 0.963, 0.720…
$ rel_AOO <dbl> 0.533, 0.977, 0.734, 0.622, 0.987, 0.971, 0.741…
$ Jaccard_dissim <dbl> 0.391, 0.058, 0.262, 0.369, 0.024, 0.063, 0.256…
$ a <dbl> 248, 584, 378, 342, 613, 579, 378, 534, 554, 62…
$ b <dbl> 91, 25, 71, 152, 10, 13, 74, 48, 45, 5, 97, 65,…
$ c <dbl> 68, 11, 63, 48, 5, 26, 56, 39, 21, 3, 71, 132, …
$ d <dbl> 221, 8, 116, 86, 0, 10, 120, 7, 8, 0, 383, 256,…
$ D_AOO_a <dbl> 1.577, 1.960, 1.830, 1.628, 1.985, 1.973, 1.831…
$ time_span <dbl> 2, 2, 2, 4, 4, 4, 2, 4, 2, 2, 4, 4, 4, 2, 4, 2,…
$ startYear1 <dbl> 1985, 1985, 1985, 1985, 1985, 1985, 1985, 1985,…
$ endYear2 <dbl> 2003, 2003, 2003, 2003, 2003, 2003, 2003, 2003,…
$ n_years <dbl> 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,…
$ log_R2_1 <dbl> 0.064, 0.021, 0.010, 0.231, 0.009, -0.017, 0.03…
$ log_R2_1_per_year <dbl> 0.003, 0.001, 0.001, 0.012, 0.000, -0.001, 0.00…
$ joincount_delta <dbl> 0.6766781674, 0.0428289040, 0.3667871962, 0.255…
$ circNorm <dbl> 41.820347, 5.051998, 30.193688, 56.230680, 4.01…
$ minDist_toBorder_centr <dbl> 85776.80, 84088.37, 80739.67, 83213.69, 84680.3…
$ mean_lnLac <dbl> 0.23551088, 0.09352329, 0.16640292, 0.14003097,…
$ Habitat_5 <fct> closed, freshwater, freshwater, closed, closed,…
$ Generalism <fct> 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0,…
$ Threatened <fct> 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,…
str(predictors)'data.frame': 2264 obs. of 38 variables:
$ datasetID : Factor w/ 4 levels "5","6","13","26": 1 1 1 1 1 1 1 1 1 1 ...
$ samplingPeriodID : Factor w/ 2 levels "1","2": 2 2 2 1 1 1 2 1 2 2 ...
$ verbatimIdentification: chr "Nucifraga caryocatactes" "Anas platyrhynchos" "Aythya fuligula" "Ciconia nigra" ...
$ scientificName : chr "Nucifraga caryocatactes" "Anas platyrhynchos" "Aythya fuligula" "Ciconia nigra" ...
$ introduced : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
$ sp_remove_expert : Factor w/ 1 level "0": 1 1 1 1 1 1 1 1 1 1 ...
$ sp_sampling_repeats : Factor w/ 3 levels "0","1","2": 3 3 3 3 3 3 3 3 3 3 ...
$ species_keep : Factor w/ 2 levels "0","1": 2 2 2 2 2 2 2 2 2 2 ...
$ GlobRangeSize_km2 : num 15439515 39656070 20386088 18362593 11851257 ...
$ Mass : num 183 843 701 2926 16 ...
$ Migration : Factor w/ 4 levels "1","2","3","NA": 2 2 2 3 1 1 3 3 1 2 ...
$ pd : num 8.13 1.57 3.24 25.28 8.51 ...
$ Total_area_samp : num 78309 78309 78309 78309 78309 ...
$ Total_Ncells : num 671 671 671 671 671 671 671 671 671 671 ...
$ Total_Ncells_samp : num 628 628 628 628 628 628 628 628 628 628 ...
$ AOO : num 41708 76500 57486 48682 77329 ...
$ occ_Ncells : num 339 609 449 390 618 605 452 573 599 625 ...
$ rel_occ_Ncells : num 0.54 0.97 0.715 0.621 0.984 0.963 0.72 0.912 0.954 0.995 ...
$ rel_AOO : num 0.533 0.977 0.734 0.622 0.987 0.971 0.741 0.916 0.957 0.999 ...
$ Jaccard_dissim : num 0.391 0.058 0.262 0.369 0.024 0.063 0.256 0.14 0.106 0.013 ...
$ a : num 248 584 378 342 613 579 378 534 554 620 ...
$ b : num 91 25 71 152 10 13 74 48 45 5 ...
$ c : num 68 11 63 48 5 26 56 39 21 3 ...
$ d : num 221 8 116 86 0 10 120 7 8 0 ...
$ D_AOO_a : Named num 1.58 1.96 1.83 1.63 1.99 ...
..- attr(*, "names")= chr [1:2264] "log(mean_area)" "log(mean_area)" "log(mean_area)" "log(mean_area)" ...
$ time_span : num 2 2 2 4 4 4 2 4 2 2 ...
$ startYear1 : num 1985 1985 1985 1985 1985 ...
$ endYear2 : num 2003 2003 2003 2003 2003 ...
$ n_years : num 19 19 19 19 19 19 19 19 19 19 ...
$ log_R2_1 : num 0.064 0.021 0.01 0.231 0.009 -0.017 0.039 0.018 0.038 0.005 ...
$ log_R2_1_per_year : num 0.003 0.001 0.001 0.012 0 -0.001 0.002 0.001 0.002 0 ...
$ joincount_delta : num 0.6767 0.0428 0.3668 0.2551 0.0238 ...
$ circNorm : num 41.82 5.05 30.19 56.23 4.01 ...
$ minDist_toBorder_centr: num 85777 84088 80740 83214 84680 ...
$ mean_lnLac : num 0.2355 0.0935 0.1664 0.14 0.0906 ...
$ Habitat_5 : Factor w/ 5 levels "closed","freshwater",..: 1 2 2 1 1 1 2 1 1 1 ...
$ Generalism : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 2 ...
$ Threatened : Factor w/ 2 levels "0","1": 1 1 2 1 1 1 1 1 1 1 ...
predictors %>%
filter(species_keep == 1 & samplingPeriodID == 1) %>%
is.na() %>%
colSums() datasetID samplingPeriodID verbatimIdentification
0 0 0
scientificName introduced sp_remove_expert
0 0 0
sp_sampling_repeats species_keep GlobRangeSize_km2
0 0 3
Mass Migration pd
9 9 9
Total_area_samp Total_Ncells Total_Ncells_samp
0 0 0
AOO occ_Ncells rel_occ_Ncells
0 0 0
rel_AOO Jaccard_dissim a
0 0 0
b c d
0 0 0
D_AOO_a time_span startYear1
0 0 0
endYear2 n_years log_R2_1
0 0 0
log_R2_1_per_year joincount_delta circNorm
0 4 0
minDist_toBorder_centr mean_lnLac Habitat_5
0 0 9
Generalism Threatened
0 2
predictors %>%
filter(samplingPeriodID == 1) %>%
skim() %>%
to_long() %>%
setNames(c("variable_class", "variable", "metric", "value"))# A tibble: 324 × 4
variable_class variable metric value
<chr> <chr> <chr> <chr>
1 character verbatimIdentification n_missing 0
2 character scientificName n_missing 0
3 factor datasetID n_missing 0
4 factor samplingPeriodID n_missing 0
5 factor introduced n_missing 0
6 factor sp_remove_expert n_missing 0
7 factor sp_sampling_repeats n_missing 0
8 factor species_keep n_missing 0
9 factor Migration n_missing 45
10 factor Habitat_5 n_missing 45
# ℹ 314 more rows
predictors %>% filter(is.na(species_keep)) [1] datasetID samplingPeriodID verbatimIdentification
[4] scientificName introduced sp_remove_expert
[7] sp_sampling_repeats species_keep GlobRangeSize_km2
[10] Mass Migration pd
[13] Total_area_samp Total_Ncells Total_Ncells_samp
[16] AOO occ_Ncells rel_occ_Ncells
[19] rel_AOO Jaccard_dissim a
[22] b c d
[25] D_AOO_a time_span startYear1
[28] endYear2 n_years log_R2_1
[31] log_R2_1_per_year joincount_delta circNorm
[34] minDist_toBorder_centr mean_lnLac Habitat_5
[37] Generalism Threatened
<0 rows> (or 0-length row.names)
predictors %>%
filter(samplingPeriodID == 1) %>%
group_by(datasetID) %>%
skim() %>%
as_tibble()# A tibble: 148 × 21
skim_type skim_variable datasetID n_missing complete_rate character.min
<chr> <chr> <fct> <int> <dbl> <int>
1 character verbatimIdentifica… 5 0 1 9
2 character verbatimIdentifica… 6 0 1 9
3 character verbatimIdentifica… 13 0 1 9
4 character verbatimIdentifica… 26 0 1 9
5 character scientificName 5 0 1 9
6 character scientificName 6 0 1 9
7 character scientificName 13 0 1 9
8 character scientificName 26 0 1 9
9 factor samplingPeriodID 5 0 1 NA
10 factor samplingPeriodID 6 0 1 NA
# ℹ 138 more rows
# ℹ 15 more variables: character.max <int>, character.empty <int>,
# character.n_unique <int>, character.whitespace <int>, factor.ordered <lgl>,
# factor.n_unique <int>, factor.top_counts <chr>, numeric.mean <dbl>,
# numeric.sd <dbl>, numeric.p0 <dbl>, numeric.p25 <dbl>, numeric.p50 <dbl>,
# numeric.p75 <dbl>, numeric.p100 <dbl>, numeric.hist <chr>
names(predictors$D_AOO_a) <- NULL
names(predictors$morans_I) <- NULL
names(predictors$morans_I_p) <- NULL
names(predictors$Lac) <- NULL
str(predictors)'data.frame': 2264 obs. of 38 variables:
$ datasetID : Factor w/ 4 levels "5","6","13","26": 1 1 1 1 1 1 1 1 1 1 ...
$ samplingPeriodID : Factor w/ 2 levels "1","2": 2 2 2 1 1 1 2 1 2 2 ...
$ verbatimIdentification: chr "Nucifraga caryocatactes" "Anas platyrhynchos" "Aythya fuligula" "Ciconia nigra" ...
$ scientificName : chr "Nucifraga caryocatactes" "Anas platyrhynchos" "Aythya fuligula" "Ciconia nigra" ...
$ introduced : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
$ sp_remove_expert : Factor w/ 1 level "0": 1 1 1 1 1 1 1 1 1 1 ...
$ sp_sampling_repeats : Factor w/ 3 levels "0","1","2": 3 3 3 3 3 3 3 3 3 3 ...
$ species_keep : Factor w/ 2 levels "0","1": 2 2 2 2 2 2 2 2 2 2 ...
$ GlobRangeSize_km2 : num 15439515 39656070 20386088 18362593 11851257 ...
$ Mass : num 183 843 701 2926 16 ...
$ Migration : Factor w/ 4 levels "1","2","3","NA": 2 2 2 3 1 1 3 3 1 2 ...
$ pd : num 8.13 1.57 3.24 25.28 8.51 ...
$ Total_area_samp : num 78309 78309 78309 78309 78309 ...
$ Total_Ncells : num 671 671 671 671 671 671 671 671 671 671 ...
$ Total_Ncells_samp : num 628 628 628 628 628 628 628 628 628 628 ...
$ AOO : num 41708 76500 57486 48682 77329 ...
$ occ_Ncells : num 339 609 449 390 618 605 452 573 599 625 ...
$ rel_occ_Ncells : num 0.54 0.97 0.715 0.621 0.984 0.963 0.72 0.912 0.954 0.995 ...
$ rel_AOO : num 0.533 0.977 0.734 0.622 0.987 0.971 0.741 0.916 0.957 0.999 ...
$ Jaccard_dissim : num 0.391 0.058 0.262 0.369 0.024 0.063 0.256 0.14 0.106 0.013 ...
$ a : num 248 584 378 342 613 579 378 534 554 620 ...
$ b : num 91 25 71 152 10 13 74 48 45 5 ...
$ c : num 68 11 63 48 5 26 56 39 21 3 ...
$ d : num 221 8 116 86 0 10 120 7 8 0 ...
$ D_AOO_a : num 1.58 1.96 1.83 1.63 1.99 ...
$ time_span : num 2 2 2 4 4 4 2 4 2 2 ...
$ startYear1 : num 1985 1985 1985 1985 1985 ...
$ endYear2 : num 2003 2003 2003 2003 2003 ...
$ n_years : num 19 19 19 19 19 19 19 19 19 19 ...
$ log_R2_1 : num 0.064 0.021 0.01 0.231 0.009 -0.017 0.039 0.018 0.038 0.005 ...
$ log_R2_1_per_year : num 0.003 0.001 0.001 0.012 0 -0.001 0.002 0.001 0.002 0 ...
$ joincount_delta : num 0.6767 0.0428 0.3668 0.2551 0.0238 ...
$ circNorm : num 41.82 5.05 30.19 56.23 4.01 ...
$ minDist_toBorder_centr: num 85777 84088 80740 83214 84680 ...
$ mean_lnLac : num 0.2355 0.0935 0.1664 0.14 0.0906 ...
$ Habitat_5 : Factor w/ 5 levels "closed","freshwater",..: 1 2 2 1 1 1 2 1 1 1 ...
$ Generalism : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 2 ...
$ Threatened : Factor w/ 2 levels "0","1": 1 1 2 1 1 1 1 1 1 1 ...
final_predictors <- predictors %>%
filter(species_keep == 1) %>%
select(-sp_remove_expert, -sp_sampling_repeats, -species_keep, -introduced) %>%
distinct()
final_predictors %>% is.na() %>% colSums() datasetID samplingPeriodID verbatimIdentification
0 0 0
scientificName GlobRangeSize_km2 Mass
0 6 18
Migration pd Total_area_samp
18 18 0
Total_Ncells Total_Ncells_samp AOO
0 0 0
occ_Ncells rel_occ_Ncells rel_AOO
0 0 0
Jaccard_dissim a b
0 0 0
c d D_AOO_a
0 0 0
time_span startYear1 endYear2
0 0 0
n_years log_R2_1 log_R2_1_per_year
0 0 0
joincount_delta circNorm minDist_toBorder_centr
9 0 0
mean_lnLac Habitat_5 Generalism
0 18 0
Threatened
4
skimr::skim(final_predictors)| Name | final_predictors |
| Number of rows | 2108 |
| Number of columns | 34 |
| _______________________ | |
| Column type frequency: | |
| character | 2 |
| factor | 6 |
| numeric | 26 |
| ________________________ | |
| Group variables | None |
Variable type: character
| skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
|---|---|---|---|---|---|---|---|
| verbatimIdentification | 0 | 1 | 9 | 32 | 0 | 762 | 0 |
| scientificName | 0 | 1 | 9 | 39 | 0 | 726 | 0 |
Variable type: factor
| skim_variable | n_missing | complete_rate | ordered | n_unique | top_counts |
|---|---|---|---|---|---|
| datasetID | 0 | 1.00 | FALSE | 4 | 26: 824, 6: 466, 13: 416, 5: 402 |
| samplingPeriodID | 0 | 1.00 | FALSE | 2 | 1: 1054, 2: 1054 |
| Migration | 18 | 0.99 | FALSE | 3 | 3: 1126, 2: 494, 1: 470, NA: 0 |
| Habitat_5 | 18 | 0.99 | FALSE | 5 | clo: 786, fre: 530, ope: 486, mar: 190 |
| Generalism | 0 | 1.00 | FALSE | 2 | 0: 1836, 1: 272 |
| Threatened | 4 | 1.00 | FALSE | 2 | 0: 1804, 1: 300 |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| GlobRangeSize_km2 | 6 | 1.00 | 14098593.16 | 14050260.43 | 587.60 | 4856350.33 | 10151559.24 | 18843405.07 | 118014530.88 | ▇▁▁▁▁ |
| Mass | 18 | 0.99 | 453.65 | 1059.12 | 3.09 | 19.90 | 77.50 | 452.10 | 10682.04 | ▇▁▁▁▁ |
| pd | 18 | 0.99 | 8.20 | 6.31 | 1.11 | 4.36 | 6.19 | 9.71 | 56.96 | ▇▁▁▁▁ |
| Total_area_samp | 0 | 1.00 | 2425389.23 | 2793173.45 | 78308.81 | 126878.54 | 367713.86 | 5909157.77 | 5909157.77 | ▇▁▁▁▅ |
| Total_Ncells | 0 | 1.00 | 3551.38 | 2047.71 | 671.00 | 1309.00 | 5080.00 | 5080.00 | 5335.00 | ▅▁▁▁▇ |
| Total_Ncells_samp | 0 | 1.00 | 2631.95 | 1670.62 | 628.00 | 1184.00 | 2821.00 | 2821.00 | 5319.00 | ▇▁▇▁▅ |
| AOO | 0 | 1.00 | 734092.09 | 1398576.24 | 0.69 | 15384.02 | 74679.96 | 504160.98 | 5768894.73 | ▇▁▁▁▁ |
| occ_Ncells | 0 | 1.00 | 763.68 | 1077.14 | 1.00 | 69.00 | 323.50 | 899.25 | 5229.00 | ▇▁▁▁▁ |
| rel_occ_Ncells | 0 | 1.00 | 0.31 | 0.32 | 0.00 | 0.04 | 0.18 | 0.56 | 1.00 | ▇▂▂▂▂ |
| rel_AOO | 0 | 1.00 | 0.32 | 0.33 | 0.00 | 0.03 | 0.18 | 0.60 | 1.00 | ▇▂▂▂▂ |
| Jaccard_dissim | 0 | 1.00 | 0.50 | 0.29 | 0.00 | 0.26 | 0.50 | 0.74 | 1.00 | ▇▇▇▇▇ |
| a | 0 | 1.00 | 612.62 | 957.95 | 0.00 | 30.00 | 199.50 | 666.00 | 5138.00 | ▇▁▁▁▁ |
| b | 0 | 1.00 | 179.74 | 276.65 | 0.00 | 27.00 | 86.00 | 206.00 | 2999.00 | ▇▁▁▁▁ |
| c | 0 | 1.00 | 122.37 | 193.83 | 0.00 | 20.00 | 57.00 | 126.00 | 1502.00 | ▇▁▁▁▁ |
| d | 0 | 1.00 | 1717.23 | 1512.70 | 0.00 | 545.00 | 1167.50 | 2603.00 | 5318.00 | ▇▃▅▁▂ |
| D_AOO_a | 0 | 1.00 | 1.32 | 0.50 | 0.00 | 0.97 | 1.41 | 1.74 | 2.00 | ▁▂▅▆▇ |
| time_span | 0 | 1.00 | 7.84 | 7.52 | 2.00 | 4.00 | 5.00 | 5.00 | 23.00 | ▇▁▁▁▂ |
| startYear1 | 0 | 1.00 | 1976.64 | 5.06 | 1972.00 | 1972.00 | 1974.00 | 1980.00 | 1985.00 | ▇▁▁▃▂ |
| endYear2 | 0 | 1.00 | 2008.72 | 6.71 | 2002.00 | 2003.00 | 2005.00 | 2017.00 | 2017.00 | ▇▁▁▁▅ |
| n_years | 0 | 1.00 | 33.07 | 10.84 | 19.00 | 26.00 | 29.00 | 46.00 | 46.00 | ▃▇▁▁▇ |
| log_R2_1 | 0 | 1.00 | 0.17 | 0.67 | -3.17 | -0.04 | 0.05 | 0.26 | 7.96 | ▁▇▁▁▁ |
| log_R2_1_per_year | 0 | 1.00 | 0.01 | 0.02 | -0.07 | 0.00 | 0.00 | 0.01 | 0.28 | ▃▇▁▁▁ |
| joincount_delta | 9 | 1.00 | 0.93 | 0.72 | -0.07 | 0.35 | 0.78 | 1.39 | 3.17 | ▇▆▃▂▁ |
| circNorm | 0 | 1.00 | 76.43 | 102.39 | 1.27 | 18.82 | 44.80 | 89.20 | 769.50 | ▇▁▁▁▁ |
| minDist_toBorder_centr | 0 | 1.00 | 72826.78 | 67591.24 | 320.38 | 26694.02 | 52508.81 | 84907.04 | 455237.42 | ▇▂▁▁▁ |
| mean_lnLac | 0 | 1.00 | 1.36 | 1.08 | 0.06 | 0.50 | 1.06 | 2.03 | 5.73 | ▇▃▂▁▁ |
saveRDS(final_predictors, here("Data/output/1_all_predictors_merged.rds"))