A_12_Merge_predictors.R

A - 12: Merge Predictors

Libraries

Code
library(here)
library(dplyr)
library(tidyr)
library(ggplot2)
library(skimr)
library(sf)
source(here::here("Code/00_Configuration.R"))

Get raw species lists with filtering columns:

Raw species data (including columns to filter just to check if other species were lost)

Code
dta0 <-
  readRDS(here("Data/output/1_data_sf.rds")) %>%
  st_drop_geometry() %>%
  filter(scalingID == 1 & cells_keep == 1) %>%
  distinct(datasetID, samplingPeriodID, verbatimIdentification, scientificName,
    introduced, sp_remove_expert, sp_sampling_repeats, species_keep) %>%
  filter(!is.na(verbatimIdentification)) %>%
  distinct(datasetID, samplingPeriodID, verbatimIdentification, species_keep, .keep_all = TRUE) %>%
  mutate(sp_sampling_repeats = case_when(is.na(sp_sampling_repeats) ~ 0,
                                        !is.na(sp_sampling_repeats) ~ sp_sampling_repeats,
                                        .default = sp_sampling_repeats)) %>%
  distinct()

Load predictors

let’s match first those with the same (approx) taxonomy

Code
range_size <-
   readRDS(here("Data/output/A_predictors/RangeSizes.rds"))

avonet <-
   readRDS(here("Data/output/A_predictors/Avonet.rds"))

iucn <-
   readRDS(here("Data/output/A_predictors/IUCN_2025_03_25.rds")) %>%
   # manually fix non matches:
   mutate(
    code = case_when(scientificName %in% 
    c("Morus bassanus",  "Agropsar philippensis", "Tadorna tadorna") ~ "LC",
    .default = code))


phylo_dist <-
   readRDS(here("Data/output/A_predictors/Phylo_distinct.rds")) # Phylogenetic distinctiveness

species_predictors <-
   full_join(range_size, avonet, relationship = "many-to-many") %>%
   full_join(iucn, relationship = "many-to-many") %>%
   full_join(phylo_dist, relationship = "many-to-many") %>%
   distinct()

Check NAs

Code
colSums(is.na(species_predictors))
        scientificName      GlobRangeSize_km2 verbatimIdentification 
                     0                      3                      0 
                  Mass                Habitat              Migration 
                    12                     12                     12 
     Primary.Lifestyle                   code                     pd 
                    12                      2                     12 

Merge species with traits

Code
species_predictors2 <- dta0 %>%
  filter() %>%
  left_join(species_predictors, relationship = "many-to-many") %>%
  rename("IUCN" = "code")

now let’s merge those with a similar taxonomy

Code
big_table <-
  readRDS(here("Data/output/A_predictors/Big_table.rds"))

geometry <-
  readRDS(here("Data/output/A_predictors/Range_geometries.rds")) %>% # Species ranges, Atlas geometry
  select(datasetID, samplingPeriodID, verbatimIdentification, circNorm, minDist_toBorder_centr)

sac_metrics <-
  readRDS(here("Data/output/A_predictors/Spatial_auto.rds")) %>%
  select(datasetID, samplingPeriodID, verbatimIdentification,joincount_delta)

Handle Lacunarity

Code
lacunarity <-
  readRDS(here("Data/output/A_predictors/Lacunarity.rds")) %>%
  ungroup() %>%
  select(-name) %>%
  mutate(samplingPeriodID = as.numeric(as.character(samplingPeriodID)),
         datasetID = as.numeric(as.character(datasetID))) %>%
  mutate(verbatimIdentification = gsub("_", " ", verbatimIdentification)) %>%
  as.data.frame()
names(lacunarity) <- c("r", "ln(r)", "lac", "ln(lac)", "datasetID", "samplingPeriodID", "verbatimIdentification")

Calculate mean across increasing window sizes

Code
mean_lac <- lacunarity %>%
  group_by(datasetID, samplingPeriodID, verbatimIdentification) %>%
  summarize(mean_lnLac = mean(`ln(lac)`, na.rm = TRUE)) %>%
  mutate(
    verbatimIdentification = case_when(verbatimIdentification == "Fringilla moringilla" ~ "Fringilla montifringilla",
                                       verbatimIdentification == "Moringilla nivalis" ~ "Montifringilla nivalis",
                                       .default = verbatimIdentification)
  )

quick check on lacunarity data

Code
mean_lac %>%
  group_by(datasetID, samplingPeriodID) %>%
  summarize(n_sp = n_distinct(verbatimIdentification))
# A tibble: 8 × 3
# Groups:   datasetID [4]
  datasetID samplingPeriodID  n_sp
      <dbl>            <dbl> <int>
1         5                1   209
2         5                2   216
3         6                1   244
4         6                2   251
5        13                1   226
6        13                2   248
7        26                1   432
8        26                2   446

Not matched:

Code
setdiff(mean_lac$verbatimIdentification, dta0$verbatimIdentification) #7
[1] "Apalopteron familiare"    "Chloris sinica kittlitzi"
[3] "Oceanodroma tristrami"    "Phoebastria albatrus"    
[5] "Phoebastria nigripes"     "Phylloscopus ijimae"     
[7] "Turdus celaenops"        
Code
setdiff(dta0$verbatimIdentification,mean_lac$verbatimIdentification) #0
character(0)

Merge all predictors

Code
predictors <-
  species_predictors2 %>%
  full_join(big_table, relationship =
              "many-to-many") %>%
  full_join(sac_metrics, relationship =
              "many-to-many") %>%
  full_join(geometry, relationship =
              "many-to-many") %>%
  left_join(mean_lac, relationship =
              "many-to-many") %>%
  distinct(datasetID, verbatimIdentification, samplingPeriodID,
           .keep_all = TRUE) %>%
  mutate(
    across(
      where(is.character) & !matches("verbatimIdentification") & !matches("scientificName"),
      as.factor)) %>%
  mutate(
    across(c("datasetID","samplingPeriodID",
             "Habitat", "IUCN",
             "Migration", "Primary.Lifestyle",
             "introduced", "sp_remove_expert", "sp_sampling_repeats", "species_keep"),
           as.factor)) %>%
  distinct(datasetID, samplingPeriodID, verbatimIdentification, species_keep,
           .keep_all = TRUE) %>%
  ungroup() %>%
  mutate(Habitat_5 = as.factor(case_when(Habitat %in% c("Grassland", "Shrubland", "Desert", "Rock") ~ "open",
                                         Habitat %in% c("Woodland", "Forest") ~ "closed",
                                         Habitat %in% c("Coastal", "Marine") ~ "marine",
                                         Habitat %in% c("Wetland", "Riverine") ~ "freshwater",
                                         Habitat == "Human Modified" ~ "human",
                                         .default = NA_character_
  ))) %>%
  mutate(Generalism = as.factor(case_when(Primary.Lifestyle == "Generalist" ~ 1,
                                          .default = 0
  ))) %>%
  mutate(Threatened = as.factor(case_when(IUCN %in% c("LC") ~ 0,
                                          is.na(IUCN) ~ NA,
                                          .default = 1
  ))) %>%
  select(-Habitat, -Primary.Lifestyle, -IUCN)

Check predictors

Code
predictors %>%
  glimpse()
Rows: 2,264
Columns: 38
$ datasetID              <fct> 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,…
$ samplingPeriodID       <fct> 2, 2, 2, 1, 1, 1, 2, 1, 2, 2, 1, 1, 1, 2, 1, 2,…
$ verbatimIdentification <chr> "Nucifraga caryocatactes", "Anas platyrhynchos"…
$ scientificName         <chr> "Nucifraga caryocatactes", "Anas platyrhynchos"…
$ introduced             <fct> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
$ sp_remove_expert       <fct> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
$ sp_sampling_repeats    <fct> 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,…
$ species_keep           <fct> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…
$ GlobRangeSize_km2      <dbl> 15439515, 39656070, 20386088, 18362593, 1185125…
$ Mass                   <dbl> 182.51, 843.42, 701.17, 2926.00, 16.00, 21.39, …
$ Migration              <fct> 2, 2, 2, 3, 1, 1, 3, 3, 1, 2, 3, 3, 1, 3, 1, 3,…
$ pd                     <dbl> 8.132575, 1.573267, 3.241734, 25.282865, 8.5072…
$ Total_area_samp        <dbl> 78308.81, 78308.81, 78308.81, 78308.81, 78308.8…
$ Total_Ncells           <dbl> 671, 671, 671, 671, 671, 671, 671, 671, 671, 67…
$ Total_Ncells_samp      <dbl> 628, 628, 628, 628, 628, 628, 628, 628, 628, 62…
$ AOO                    <dbl> 41707.68, 76499.91, 57486.17, 48681.68, 77328.6…
$ occ_Ncells             <dbl> 339, 609, 449, 390, 618, 605, 452, 573, 599, 62…
$ rel_occ_Ncells         <dbl> 0.540, 0.970, 0.715, 0.621, 0.984, 0.963, 0.720…
$ rel_AOO                <dbl> 0.533, 0.977, 0.734, 0.622, 0.987, 0.971, 0.741…
$ Jaccard_dissim         <dbl> 0.391, 0.058, 0.262, 0.369, 0.024, 0.063, 0.256…
$ a                      <dbl> 248, 584, 378, 342, 613, 579, 378, 534, 554, 62…
$ b                      <dbl> 91, 25, 71, 152, 10, 13, 74, 48, 45, 5, 97, 65,…
$ c                      <dbl> 68, 11, 63, 48, 5, 26, 56, 39, 21, 3, 71, 132, …
$ d                      <dbl> 221, 8, 116, 86, 0, 10, 120, 7, 8, 0, 383, 256,…
$ D_AOO_a                <dbl> 1.577, 1.960, 1.830, 1.628, 1.985, 1.973, 1.831…
$ time_span              <dbl> 2, 2, 2, 4, 4, 4, 2, 4, 2, 2, 4, 4, 4, 2, 4, 2,…
$ startYear1             <dbl> 1985, 1985, 1985, 1985, 1985, 1985, 1985, 1985,…
$ endYear2               <dbl> 2003, 2003, 2003, 2003, 2003, 2003, 2003, 2003,…
$ n_years                <dbl> 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19, 19,…
$ log_R2_1               <dbl> 0.064, 0.021, 0.010, 0.231, 0.009, -0.017, 0.03…
$ log_R2_1_per_year      <dbl> 0.003, 0.001, 0.001, 0.012, 0.000, -0.001, 0.00…
$ joincount_delta        <dbl> 0.6766781674, 0.0428289040, 0.3667871962, 0.255…
$ circNorm               <dbl> 41.820347, 5.051998, 30.193688, 56.230680, 4.01…
$ minDist_toBorder_centr <dbl> 85776.80, 84088.37, 80739.67, 83213.69, 84680.3…
$ mean_lnLac             <dbl> 0.23551088, 0.09352329, 0.16640292, 0.14003097,…
$ Habitat_5              <fct> closed, freshwater, freshwater, closed, closed,…
$ Generalism             <fct> 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0,…
$ Threatened             <fct> 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,…
Code
str(predictors)
'data.frame':   2264 obs. of  38 variables:
 $ datasetID             : Factor w/ 4 levels "5","6","13","26": 1 1 1 1 1 1 1 1 1 1 ...
 $ samplingPeriodID      : Factor w/ 2 levels "1","2": 2 2 2 1 1 1 2 1 2 2 ...
 $ verbatimIdentification: chr  "Nucifraga caryocatactes" "Anas platyrhynchos" "Aythya fuligula" "Ciconia nigra" ...
 $ scientificName        : chr  "Nucifraga caryocatactes" "Anas platyrhynchos" "Aythya fuligula" "Ciconia nigra" ...
 $ introduced            : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
 $ sp_remove_expert      : Factor w/ 1 level "0": 1 1 1 1 1 1 1 1 1 1 ...
 $ sp_sampling_repeats   : Factor w/ 3 levels "0","1","2": 3 3 3 3 3 3 3 3 3 3 ...
 $ species_keep          : Factor w/ 2 levels "0","1": 2 2 2 2 2 2 2 2 2 2 ...
 $ GlobRangeSize_km2     : num  15439515 39656070 20386088 18362593 11851257 ...
 $ Mass                  : num  183 843 701 2926 16 ...
 $ Migration             : Factor w/ 4 levels "1","2","3","NA": 2 2 2 3 1 1 3 3 1 2 ...
 $ pd                    : num  8.13 1.57 3.24 25.28 8.51 ...
 $ Total_area_samp       : num  78309 78309 78309 78309 78309 ...
 $ Total_Ncells          : num  671 671 671 671 671 671 671 671 671 671 ...
 $ Total_Ncells_samp     : num  628 628 628 628 628 628 628 628 628 628 ...
 $ AOO                   : num  41708 76500 57486 48682 77329 ...
 $ occ_Ncells            : num  339 609 449 390 618 605 452 573 599 625 ...
 $ rel_occ_Ncells        : num  0.54 0.97 0.715 0.621 0.984 0.963 0.72 0.912 0.954 0.995 ...
 $ rel_AOO               : num  0.533 0.977 0.734 0.622 0.987 0.971 0.741 0.916 0.957 0.999 ...
 $ Jaccard_dissim        : num  0.391 0.058 0.262 0.369 0.024 0.063 0.256 0.14 0.106 0.013 ...
 $ a                     : num  248 584 378 342 613 579 378 534 554 620 ...
 $ b                     : num  91 25 71 152 10 13 74 48 45 5 ...
 $ c                     : num  68 11 63 48 5 26 56 39 21 3 ...
 $ d                     : num  221 8 116 86 0 10 120 7 8 0 ...
 $ D_AOO_a               : Named num  1.58 1.96 1.83 1.63 1.99 ...
  ..- attr(*, "names")= chr [1:2264] "log(mean_area)" "log(mean_area)" "log(mean_area)" "log(mean_area)" ...
 $ time_span             : num  2 2 2 4 4 4 2 4 2 2 ...
 $ startYear1            : num  1985 1985 1985 1985 1985 ...
 $ endYear2              : num  2003 2003 2003 2003 2003 ...
 $ n_years               : num  19 19 19 19 19 19 19 19 19 19 ...
 $ log_R2_1              : num  0.064 0.021 0.01 0.231 0.009 -0.017 0.039 0.018 0.038 0.005 ...
 $ log_R2_1_per_year     : num  0.003 0.001 0.001 0.012 0 -0.001 0.002 0.001 0.002 0 ...
 $ joincount_delta       : num  0.6767 0.0428 0.3668 0.2551 0.0238 ...
 $ circNorm              : num  41.82 5.05 30.19 56.23 4.01 ...
 $ minDist_toBorder_centr: num  85777 84088 80740 83214 84680 ...
 $ mean_lnLac            : num  0.2355 0.0935 0.1664 0.14 0.0906 ...
 $ Habitat_5             : Factor w/ 5 levels "closed","freshwater",..: 1 2 2 1 1 1 2 1 1 1 ...
 $ Generalism            : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 2 ...
 $ Threatened            : Factor w/ 2 levels "0","1": 1 1 2 1 1 1 1 1 1 1 ...
Code
predictors %>%
  filter(species_keep == 1 & samplingPeriodID == 1) %>%
  is.na() %>%
  colSums()
             datasetID       samplingPeriodID verbatimIdentification 
                     0                      0                      0 
        scientificName             introduced       sp_remove_expert 
                     0                      0                      0 
   sp_sampling_repeats           species_keep      GlobRangeSize_km2 
                     0                      0                      3 
                  Mass              Migration                     pd 
                     9                      9                      9 
       Total_area_samp           Total_Ncells      Total_Ncells_samp 
                     0                      0                      0 
                   AOO             occ_Ncells         rel_occ_Ncells 
                     0                      0                      0 
               rel_AOO         Jaccard_dissim                      a 
                     0                      0                      0 
                     b                      c                      d 
                     0                      0                      0 
               D_AOO_a              time_span             startYear1 
                     0                      0                      0 
              endYear2                n_years               log_R2_1 
                     0                      0                      0 
     log_R2_1_per_year        joincount_delta               circNorm 
                     0                      4                      0 
minDist_toBorder_centr             mean_lnLac              Habitat_5 
                     0                      0                      9 
            Generalism             Threatened 
                     0                      2 
Code
predictors %>%
  filter(samplingPeriodID == 1) %>%
  skim() %>%
  to_long() %>%
  setNames(c("variable_class", "variable", "metric", "value"))
# A tibble: 324 × 4
   variable_class variable               metric    value
   <chr>          <chr>                  <chr>     <chr>
 1 character      verbatimIdentification n_missing 0    
 2 character      scientificName         n_missing 0    
 3 factor         datasetID              n_missing 0    
 4 factor         samplingPeriodID       n_missing 0    
 5 factor         introduced             n_missing 0    
 6 factor         sp_remove_expert       n_missing 0    
 7 factor         sp_sampling_repeats    n_missing 0    
 8 factor         species_keep           n_missing 0    
 9 factor         Migration              n_missing 45   
10 factor         Habitat_5              n_missing 45   
# ℹ 314 more rows
Code
predictors %>% filter(is.na(species_keep))
 [1] datasetID              samplingPeriodID       verbatimIdentification
 [4] scientificName         introduced             sp_remove_expert      
 [7] sp_sampling_repeats    species_keep           GlobRangeSize_km2     
[10] Mass                   Migration              pd                    
[13] Total_area_samp        Total_Ncells           Total_Ncells_samp     
[16] AOO                    occ_Ncells             rel_occ_Ncells        
[19] rel_AOO                Jaccard_dissim         a                     
[22] b                      c                      d                     
[25] D_AOO_a                time_span              startYear1            
[28] endYear2               n_years                log_R2_1              
[31] log_R2_1_per_year      joincount_delta        circNorm              
[34] minDist_toBorder_centr mean_lnLac             Habitat_5             
[37] Generalism             Threatened            
<0 rows> (or 0-length row.names)
Code
predictors %>%
  filter(samplingPeriodID == 1) %>%
  group_by(datasetID) %>%
  skim() %>%
  as_tibble()
# A tibble: 148 × 21
   skim_type skim_variable       datasetID n_missing complete_rate character.min
   <chr>     <chr>               <fct>         <int>         <dbl>         <int>
 1 character verbatimIdentifica… 5                 0             1             9
 2 character verbatimIdentifica… 6                 0             1             9
 3 character verbatimIdentifica… 13                0             1             9
 4 character verbatimIdentifica… 26                0             1             9
 5 character scientificName      5                 0             1             9
 6 character scientificName      6                 0             1             9
 7 character scientificName      13                0             1             9
 8 character scientificName      26                0             1             9
 9 factor    samplingPeriodID    5                 0             1            NA
10 factor    samplingPeriodID    6                 0             1            NA
# ℹ 138 more rows
# ℹ 15 more variables: character.max <int>, character.empty <int>,
#   character.n_unique <int>, character.whitespace <int>, factor.ordered <lgl>,
#   factor.n_unique <int>, factor.top_counts <chr>, numeric.mean <dbl>,
#   numeric.sd <dbl>, numeric.p0 <dbl>, numeric.p25 <dbl>, numeric.p50 <dbl>,
#   numeric.p75 <dbl>, numeric.p100 <dbl>, numeric.hist <chr>
Code
names(predictors$D_AOO_a) <- NULL
names(predictors$morans_I) <- NULL
names(predictors$morans_I_p) <- NULL
names(predictors$Lac) <- NULL
str(predictors)
'data.frame':   2264 obs. of  38 variables:
 $ datasetID             : Factor w/ 4 levels "5","6","13","26": 1 1 1 1 1 1 1 1 1 1 ...
 $ samplingPeriodID      : Factor w/ 2 levels "1","2": 2 2 2 1 1 1 2 1 2 2 ...
 $ verbatimIdentification: chr  "Nucifraga caryocatactes" "Anas platyrhynchos" "Aythya fuligula" "Ciconia nigra" ...
 $ scientificName        : chr  "Nucifraga caryocatactes" "Anas platyrhynchos" "Aythya fuligula" "Ciconia nigra" ...
 $ introduced            : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
 $ sp_remove_expert      : Factor w/ 1 level "0": 1 1 1 1 1 1 1 1 1 1 ...
 $ sp_sampling_repeats   : Factor w/ 3 levels "0","1","2": 3 3 3 3 3 3 3 3 3 3 ...
 $ species_keep          : Factor w/ 2 levels "0","1": 2 2 2 2 2 2 2 2 2 2 ...
 $ GlobRangeSize_km2     : num  15439515 39656070 20386088 18362593 11851257 ...
 $ Mass                  : num  183 843 701 2926 16 ...
 $ Migration             : Factor w/ 4 levels "1","2","3","NA": 2 2 2 3 1 1 3 3 1 2 ...
 $ pd                    : num  8.13 1.57 3.24 25.28 8.51 ...
 $ Total_area_samp       : num  78309 78309 78309 78309 78309 ...
 $ Total_Ncells          : num  671 671 671 671 671 671 671 671 671 671 ...
 $ Total_Ncells_samp     : num  628 628 628 628 628 628 628 628 628 628 ...
 $ AOO                   : num  41708 76500 57486 48682 77329 ...
 $ occ_Ncells            : num  339 609 449 390 618 605 452 573 599 625 ...
 $ rel_occ_Ncells        : num  0.54 0.97 0.715 0.621 0.984 0.963 0.72 0.912 0.954 0.995 ...
 $ rel_AOO               : num  0.533 0.977 0.734 0.622 0.987 0.971 0.741 0.916 0.957 0.999 ...
 $ Jaccard_dissim        : num  0.391 0.058 0.262 0.369 0.024 0.063 0.256 0.14 0.106 0.013 ...
 $ a                     : num  248 584 378 342 613 579 378 534 554 620 ...
 $ b                     : num  91 25 71 152 10 13 74 48 45 5 ...
 $ c                     : num  68 11 63 48 5 26 56 39 21 3 ...
 $ d                     : num  221 8 116 86 0 10 120 7 8 0 ...
 $ D_AOO_a               : num  1.58 1.96 1.83 1.63 1.99 ...
 $ time_span             : num  2 2 2 4 4 4 2 4 2 2 ...
 $ startYear1            : num  1985 1985 1985 1985 1985 ...
 $ endYear2              : num  2003 2003 2003 2003 2003 ...
 $ n_years               : num  19 19 19 19 19 19 19 19 19 19 ...
 $ log_R2_1              : num  0.064 0.021 0.01 0.231 0.009 -0.017 0.039 0.018 0.038 0.005 ...
 $ log_R2_1_per_year     : num  0.003 0.001 0.001 0.012 0 -0.001 0.002 0.001 0.002 0 ...
 $ joincount_delta       : num  0.6767 0.0428 0.3668 0.2551 0.0238 ...
 $ circNorm              : num  41.82 5.05 30.19 56.23 4.01 ...
 $ minDist_toBorder_centr: num  85777 84088 80740 83214 84680 ...
 $ mean_lnLac            : num  0.2355 0.0935 0.1664 0.14 0.0906 ...
 $ Habitat_5             : Factor w/ 5 levels "closed","freshwater",..: 1 2 2 1 1 1 2 1 1 1 ...
 $ Generalism            : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 2 ...
 $ Threatened            : Factor w/ 2 levels "0","1": 1 1 2 1 1 1 1 1 1 1 ...
Code
final_predictors <- predictors %>%
  filter(species_keep == 1) %>%
  select(-sp_remove_expert, -sp_sampling_repeats, -species_keep, -introduced) %>%
  distinct()



final_predictors %>% is.na() %>% colSums()
             datasetID       samplingPeriodID verbatimIdentification 
                     0                      0                      0 
        scientificName      GlobRangeSize_km2                   Mass 
                     0                      6                     18 
             Migration                     pd        Total_area_samp 
                    18                     18                      0 
          Total_Ncells      Total_Ncells_samp                    AOO 
                     0                      0                      0 
            occ_Ncells         rel_occ_Ncells                rel_AOO 
                     0                      0                      0 
        Jaccard_dissim                      a                      b 
                     0                      0                      0 
                     c                      d                D_AOO_a 
                     0                      0                      0 
             time_span             startYear1               endYear2 
                     0                      0                      0 
               n_years               log_R2_1      log_R2_1_per_year 
                     0                      0                      0 
       joincount_delta               circNorm minDist_toBorder_centr 
                     9                      0                      0 
            mean_lnLac              Habitat_5             Generalism 
                     0                     18                      0 
            Threatened 
                     4 
Code
skimr::skim(final_predictors)
Data summary
Name final_predictors
Number of rows 2108
Number of columns 34
_______________________
Column type frequency:
character 2
factor 6
numeric 26
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
verbatimIdentification 0 1 9 32 0 762 0
scientificName 0 1 9 39 0 726 0

Variable type: factor

skim_variable n_missing complete_rate ordered n_unique top_counts
datasetID 0 1.00 FALSE 4 26: 824, 6: 466, 13: 416, 5: 402
samplingPeriodID 0 1.00 FALSE 2 1: 1054, 2: 1054
Migration 18 0.99 FALSE 3 3: 1126, 2: 494, 1: 470, NA: 0
Habitat_5 18 0.99 FALSE 5 clo: 786, fre: 530, ope: 486, mar: 190
Generalism 0 1.00 FALSE 2 0: 1836, 1: 272
Threatened 4 1.00 FALSE 2 0: 1804, 1: 300

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
GlobRangeSize_km2 6 1.00 14098593.16 14050260.43 587.60 4856350.33 10151559.24 18843405.07 118014530.88 ▇▁▁▁▁
Mass 18 0.99 453.65 1059.12 3.09 19.90 77.50 452.10 10682.04 ▇▁▁▁▁
pd 18 0.99 8.20 6.31 1.11 4.36 6.19 9.71 56.96 ▇▁▁▁▁
Total_area_samp 0 1.00 2425389.23 2793173.45 78308.81 126878.54 367713.86 5909157.77 5909157.77 ▇▁▁▁▅
Total_Ncells 0 1.00 3551.38 2047.71 671.00 1309.00 5080.00 5080.00 5335.00 ▅▁▁▁▇
Total_Ncells_samp 0 1.00 2631.95 1670.62 628.00 1184.00 2821.00 2821.00 5319.00 ▇▁▇▁▅
AOO 0 1.00 734092.09 1398576.24 0.69 15384.02 74679.96 504160.98 5768894.73 ▇▁▁▁▁
occ_Ncells 0 1.00 763.68 1077.14 1.00 69.00 323.50 899.25 5229.00 ▇▁▁▁▁
rel_occ_Ncells 0 1.00 0.31 0.32 0.00 0.04 0.18 0.56 1.00 ▇▂▂▂▂
rel_AOO 0 1.00 0.32 0.33 0.00 0.03 0.18 0.60 1.00 ▇▂▂▂▂
Jaccard_dissim 0 1.00 0.50 0.29 0.00 0.26 0.50 0.74 1.00 ▇▇▇▇▇
a 0 1.00 612.62 957.95 0.00 30.00 199.50 666.00 5138.00 ▇▁▁▁▁
b 0 1.00 179.74 276.65 0.00 27.00 86.00 206.00 2999.00 ▇▁▁▁▁
c 0 1.00 122.37 193.83 0.00 20.00 57.00 126.00 1502.00 ▇▁▁▁▁
d 0 1.00 1717.23 1512.70 0.00 545.00 1167.50 2603.00 5318.00 ▇▃▅▁▂
D_AOO_a 0 1.00 1.32 0.50 0.00 0.97 1.41 1.74 2.00 ▁▂▅▆▇
time_span 0 1.00 7.84 7.52 2.00 4.00 5.00 5.00 23.00 ▇▁▁▁▂
startYear1 0 1.00 1976.64 5.06 1972.00 1972.00 1974.00 1980.00 1985.00 ▇▁▁▃▂
endYear2 0 1.00 2008.72 6.71 2002.00 2003.00 2005.00 2017.00 2017.00 ▇▁▁▁▅
n_years 0 1.00 33.07 10.84 19.00 26.00 29.00 46.00 46.00 ▃▇▁▁▇
log_R2_1 0 1.00 0.17 0.67 -3.17 -0.04 0.05 0.26 7.96 ▁▇▁▁▁
log_R2_1_per_year 0 1.00 0.01 0.02 -0.07 0.00 0.00 0.01 0.28 ▃▇▁▁▁
joincount_delta 9 1.00 0.93 0.72 -0.07 0.35 0.78 1.39 3.17 ▇▆▃▂▁
circNorm 0 1.00 76.43 102.39 1.27 18.82 44.80 89.20 769.50 ▇▁▁▁▁
minDist_toBorder_centr 0 1.00 72826.78 67591.24 320.38 26694.02 52508.81 84907.04 455237.42 ▇▂▁▁▁
mean_lnLac 0 1.00 1.36 1.08 0.06 0.50 1.06 2.03 5.73 ▇▃▂▁▁

Save to .rds

Code
saveRDS(final_predictors, here("Data/output/1_all_predictors_merged.rds"))