In the following we match external data to our species. 1. Avonet traits [1] 2. Phylogenetic distinctiveness [2,3] 3. IUCN threat status[4] 4. Range geometry 5. Spatial autocorrelation [5] 6. Lacunarity 7. Global range size [6]

Finally, we merge all predictors together.

Code
rm(list=ls())
gc()
          used (Mb) gc trigger (Mb) max used (Mb)
Ncells  595849 31.9    1359218 72.6   686414 36.7
Vcells 1095508  8.4    8388608 64.0  1877491 14.4
Code
suppressPackageStartupMessages({
library(here)
library(dplyr)
library(skimr)
library(readxl)
library(tictoc)

# Phylo distinct
library(ape)
library(phyloregion)

# IUCN
library(taxize)

# Range geometry
library(sf)
library(tidyverse)
library(purrr)
library(terra)
library(broom)
library(geosphere)

# Spatial autocorrelation

library(spdep)
library(rstatix)
# install.packages("spdep", repos = "https://cloud.r-project.org/") # if not installed


})

Paths

Code
data_sf_path <- 
  here("Demo_NewYork/Data/output/1_data_sf_ny.rds")

sp_data_path <- 
  here("Demo_NewYork/Data/output/1_data_filtered_ny.rds")

tax_path <-
  here("Demo_NewYork/Data/input/Tax_lookup_ny.csv")

avonet_path <- 
  here("Demo_NewYork/Data/input/AVONET Supplementary dataset 1.xlsx")

tree_path <- 
   here("Demo_NewYork/Data/input/MRC_consensus_BirdTree.tre")

Avonet traits

  • Download AVONET Supplementary dataset 1.xlsx here ([1] Tobias et al. (2022))

  • We placed it here: Demo_NewYork/Data/input/AVONET Supplementary dataset 1.xlsx (see path above)

Code
#----------------------------------------------------------#
# Load data  -----
#----------------------------------------------------------#

# filtered species list
sci_name <-
  readRDS(sp_data_path) %>%
  distinct(verbatimIdentification, scientificName) # BirdLife 2024 taxonomy

# taxonomic crosswalk with BirdLife 2018 taxonomy as used in Avonet
Tax <- 
  read.csv2(tax_path) %>%
  select(verbatimIdentification, scientificName, ScientificName2018)%>%
  filter(verbatimIdentification %in% sci_name$verbatimIdentification) %>%
  distinct(verbatimIdentification, scientificName, .keep_all=TRUE)

# Extract traits from Avonet
traits <-
  readxl::read_excel(avonet_path, sheet = "AVONET1_BirdLife")  %>%
  dplyr::select(
    Species1, Mass,
    Habitat, Migration, Primary.Lifestyle) %>%
  mutate(
    across(c(Habitat, Migration, Primary.Lifestyle),
           as.factor)) 
Warning: Expecting numeric in A4995 / R4995C1: got 'NA'
Warning: Expecting numeric in A5255 / R5255C1: got 'NA'
Warning: Expecting numeric in A5265 / R5265C1: got 'NA'
Warning: Expecting numeric in A5276 / R5276C1: got 'NA'
Warning: Expecting numeric in A6700 / R6700C1: got 'NA'
Warning: Expecting numeric in A6746 / R6746C1: got 'NA'
Warning: Expecting numeric in A7296 / R7296C1: got 'NA'
Warning: Expecting numeric in A7302 / R7302C1: got 'NA'
Warning: Expecting numeric in A7329 / R7329C1: got 'NA'
Warning: Expecting numeric in A7354 / R7354C1: got 'NA'
Code
# Merge with species names
traits_merged <- traits %>%
  rename("ScientificName2018" = "Species1") %>%
  right_join(Tax) %>%
  select(-ScientificName2018) %>%
  distinct(verbatimIdentification, scientificName, .keep_all = TRUE)
Joining with `by = join_by(ScientificName2018)`
Code
# checks:
traits_merged %>% skimr::skim() # 3 missing
Data summary
Name Piped data
Number of rows 233
Number of columns 6
_______________________
Column type frequency:
character 2
factor 3
numeric 1
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
verbatimIdentification 0 1 9 32 0 233 0
scientificName 0 1 9 39 0 233 0

Variable type: factor

skim_variable n_missing complete_rate ordered n_unique top_counts
Habitat 3 0.99 FALSE 9 For: 90, Wet: 51, Shr: 21, Gra: 20
Migration 3 0.99 FALSE 3 3: 143, 2: 47, 1: 40, NA: 0
Primary.Lifestyle 3 0.99 FALSE 5 Ins: 98, Ter: 54, Gen: 31, Aer: 27

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
Mass 3 0.99 324.87 747.25 3.09 14.4 47.18 334.61 5791.37 ▇▁▁▁▁
Code
rm(list = setdiff(ls(),c("data_sf_path","tax_path","sp_data_path","tree_path","sci_name","traits_merged")))

Phylogenetic distinctiveness

This requires the BirdTree [2] tree file, which can be downloaded from here. We use the consensus tree that we created based on recommendations from [3]. We placed the tree file here: Demo_NewYork/Data/input/MRC_consensus_BirdTree.tre (see path above).

Code
Tax <- read.csv2(tax_path)[2:7] %>%
  filter(verbatimIdentification %in% sci_name$verbatimIdentification)
tree <- read.tree(tree_path)


# pd for all species in the tree
pd <-
  phyloregion::evol_distinct(
  tree,
  type = "fair.proportion",
  scale = FALSE,
  use.branch.lengths = TRUE) %>%
  as.data.frame() %>%
  tibble::rownames_to_column(var = "tip.label") %>%
  rename(pd = ".") %>%
  right_join(Tax) %>%
  select(verbatimIdentification, scientificName, pd) %>%
  filter(verbatimIdentification %in% sci_name$verbatimIdentification) %>%
  distinct(scientificName, verbatimIdentification, .keep_all=T)
Joining with `by = join_by(tip.label)`
Code
# checks
colSums(is.na(pd)) # 3 species not matched
verbatimIdentification         scientificName                     pd 
                     0                      0                      3 
Code
pd %>% 
  filter(is.na(pd)) %>% 
  pull(verbatimIdentification)
[1] "Falcipennis canadensis"           "Anas platyrhynchos x A. rubripes"
[3] "Vermivora chrysoptera x V. pinus"
Code
hist(pd$pd, breaks = 30, 
  main = "Phylogenetic Distinctiveness distribution", 
  xlab = "Phylogenetic Distinctiveness")

Code
rm(list = setdiff(ls(),c("data_sf_path","tax_path","sp_data_path","sci_name","traits_merged", "pd")))

IUCN Threat status

For this part we need a personal API key to access the IUCN Red List API. You can get one from here. There you have to sign up with an account and request an API token. Once you have it, you can either set it as an environment variable in an .Renviron file e.g., IUCN_REDLIST_KEY = [key] or paste it directly into the code below.

  • [4] IUCN (2025)
Code
# Download status via Red List API (acces through taxize)
tictoc::tic()

IUCN.list <-
  suppressMessages(iucn_summary(unique(sci_name$scientificName),
    distr.detail = F,
    key = my_key # change to your token key
  ))
══  233 queries  ═════════════
v  Found:  Baeolophus bicolor
v  Found:  Junco hyemalis
v  Found:  Corvus brachyrhynchos
v  Found:  Cathartes aura
v  Found:  Toxostoma rufum
v  Found:  Icterus galbula
v  Found:  Dumetella carolinensis
v  Found:  Piranga olivacea
v  Found:  Setophaga pensylvanica
v  Found:  Tyrannus tyrannus
v  Found:  Troglodytes aedon
v  Found:  Setophaga ruticilla
v  Found:  Hylocichla mustelina
v  Found:  Catharus fuscescens
v  Found:  Vireo gilvus
v  Found:  Haemorhous mexicanus
v  Found:  Buteo jamaicensis
v  Found:  Cyanocitta cristata
v  Found:  Colaptes auratus
v  Found:  Turdus migratorius
v  Found:  Mimus polyglottos
v  Found:  Empidonax alnorum
v  Found:  Pheucticus ludovicianus
v  Found:  Seiurus aurocapilla
v  Found:  Pipilo erythrophthalmus
v  Found:  Melospiza georgiana
v  Found:  Vireo olivaceus
v  Found:  Empidonax minimus
v  Found:  Hirundo rustica
v  Found:  Tachycineta bicolor
v  Found:  Hylatomus pileatus
v  Found:  Vermivora cyanoptera
v  Found:  Quiscalus quiscula
v  Found:  Setophaga petechia
v  Found:  Passerina cyanea
v  Found:  Empidonax traillii
v  Found:  Progne subis
v  Found:  Dolichonyx oryzivorus
v  Found:  Sturnella magna
v  Found:  Columba livia
v  Found:  Megascops asio
v  Found:  Spizella pusilla
v  Found:  Dryobates pubescens
v  Found:  Empidonax virescens
v  Found:  Geothlypis trichas
v  Found:  Setophaga fusca
v  Found:  Passerculus sandwichensis
v  Found:  Contopus virens
v  Found:  Setophaga magnolia
v  Found:  Sphyrapicus varius
v  Found:  Molothrus ater
v  Found:  Sayornis phoebe
v  Found:  Setophaga citrina
v  Found:  Myiarchus crinitus
v  Found:  Haemorhous purpureus
v  Found:  Coccyzus americanus
v  Found:  Parkesia motacilla
v  Found:  Sitta canadensis
v  Found:  Zenaida macroura
v  Found:  Geothlypis philadelphia
v  Found:  Megaceryle alcyon
v  Found:  Chaetura pelagica
v  Found:  Riparia riparia
v  Found:  Spinus tristis
v  Found:  Poecile atricapillus
v  Found:  Melospiza melodia
v  Found:  Setophaga caerulescens
v  Found:  Setophaga coronata
v  Found:  Spizella passerina
v  Found:  Melanerpes carolinus
v  Found:  Sialia sialis
v  Found:  Falco sparverius
v  Found:  Certhia americana
v  Found:  Sitta carolinensis
v  Found:  Charadrius vociferus
v  Found:  Archilochus colubris
v  Found:  Meleagris gallopavo
v  Found:  Setophaga virens
v  Found:  Agelaius phoeniceus
v  Found:  Aix sponsa
v  Found:  Butorides striata
v  Found:  Branta canadensis
x  Not Found:  Ardea herodias
x  Not Found:  Buteo platypterus
x  Not Found:  Cardinalis cardinalis
v  Found:  Bonasa umbellus
v  Found:  Anas platyrhynchos
v  Found:  Bombycilla cedrorum
v  Found:  Leuconotopicus villosus
v  Found:  Vireo solitarius
v  Found:  Icterus spurius
v  Found:  Accipiter cooperii
v  Found:  Parkesia noveboracensis
v  Found:  Troglodytes troglodytes
v  Found:  Catharus guttatus
v  Found:  Coccyzus erythropthalmus
v  Found:  Stelgidopteryx serripennis
v  Found:  Buteo lineatus
v  Found:  Accipiter gentilis
v  Found:  Gallinago delicata
v  Found:  Protonotaria citrea
v  Found:  Scolopax minor
v  Found:  Accipiter striatus
v  Found:  Actitis macularius
v  Found:  Anas rubripes
v  Found:  Fulica americana
v  Found:  Circus cyaneus
v  Found:  Petrochelidon pyrrhonota
v  Found:  Bubo virginianus
v  Found:  Melanerpes erythrocephalus
v  Found:  Zonotrichia albicollis
v  Found:  Thryothorus ludovicianus
v  Found:  Passerculus henslowii
v  Found:  Lophodytes cucullatus
v  Found:  Strix varia
v  Found:  Hesperiphona vespertina
v  Found:  Haliaeetus leucocephalus
v  Found:  Cardellina canadensis
v  Found:  Spinus pinus
v  Found:  Pooecetes gramineus
v  Found:  Bartramia longicauda
v  Found:  Mniotilta varia
v  Found:  Porzana carolina
v  Found:  Colinus virginianus
v  Found:  Polioptila caerulea
v  Found:  Ammodramus savannarum
v  Found:  Eremophila alpestris
v  Found:  Mergus merganser
v  Found:  Spatula discors
v  Found:  Cistothorus palustris
v  Found:  Rallus limicola
v  Found:  Cistothorus platensis
v  Found:  Ixobrychus exilis
v  Found:  Leiothlypis ruficapilla
v  Found:  Vireo griseus
v  Found:  Vireo flavifrons
v  Found:  Regulus satrapa
v  Found:  Corvus corax
v  Found:  Chordeiles minor
v  Found:  Catharus ustulatus
v  Found:  Setophaga americana
v  Found:  Podilymbus podiceps
v  Found:  Pandion haliaetus
v  Found:  Oxyura jamaicensis
v  Found:  Botaurus lentiginosus
x  Not Found:  Aythya collaris
x  Not Found:  Gavia immer
v  Found:  Setophaga pinus
v  Found:  Spizella pallida
v  Found:  Spiza americana
v  Found:  Setophaga cerulea
v  Found:  Gallinula chloropus
v  Found:  Icteria virens
v  Found:  Anas crecca
v  Found:  Larus marinus
v  Found:  Hydroprogne caspia
v  Found:  Vermivora chrysoptera
v  Found:  Larus argentatus
v  Found:  Sterna hirundo
v  Found:  Asio otus
v  Found:  Mareca strepera
v  Found:  Mareca americana
v  Found:  Setophaga discolor
v  Found:  Aegolius acadicus
v  Found:  Tyto alba
v  Found:  Geothlypis formosa
v  Found:  Rallus elegans
v  Found:  Corthylio calendula
v  Found:  Falco peregrinus
v  Found:  Larus delawarensis
x  Not Found:  Nannopterum auritus
v  Found:  Ardea alba
v  Found:  Nycticorax nycticorax
v  Found:  Aquila chrysaetos
v  Found:  Egretta thula
v  Found:  Spatula clypeata
v  Found:  Antrostomus vociferus
v  Found:  Asio flammeus
v  Found:  Setophaga dominica
v  Found:  Helmitheros vermivorum
v  Found:  Chlidonias niger
v  Found:  Loxia curvirostra
v  Found:  Anas acuta
v  Found:  Aythya americana
v  Found:  Aythya affinis
v  Found:  Lanius ludovicianus
v  Found:  Sturnella neglecta
v  Found:  Loxia leucoptera
v  Found:  Corvus ossifragus
v  Found:  Mergus serrator
v  Found:  Bucephala clangula
v  Found:  Aythya valisineria
v  Found:  Bubulcus ibis
v  Found:  Charadrius melodus
v  Found:  Contopus cooperi
v  Found:  Empidonax flaviventris
v  Found:  Vireo philadelphicus
v  Found:  Melospiza lincolnii
v  Found:  Euphagus carolinus
v  Found:  Poecile hudsonicus
v  Found:  Picoides arcticus
v  Found:  Picoides tridactylus
v  Found:  Setophaga castanea
v  Found:  Setophaga striata
v  Found:  Perisoreus canadensis
v  Found:  Leiothlypis peregrina
v  Found:  Canachites canadensis
x  Not Found:  Setophaga palmarum
x  Not Found:  Catharus bicknelli
v  Found:  Setophaga tigrina
v  Found:  Passerina caerulea
v  Found:  Cardellina pusilla
v  Found:  Ammospiza caudacuta
v  Found:  Rallus longirostris
v  Found:  Quiscalus major
v  Found:  Antrostomus carolinensis
v  Found:  Larus atricilla
v  Found:  Haematopus palliatus
v  Found:  Plegadis falcinellus
v  Found:  Tringa semipalmata
v  Found:  Nyctanassa violacea
v  Found:  Egretta caerulea
v  Found:  Ammospiza maritima
v  Found:  Egretta tricolor
v  Found:  Piranga rubra
x  Not Found:  Anas platyrhynchos x Anas rubripes
v  Found:  Sternula antillarum
v  Found:  Rynchops niger
v  Found:  Gelochelidon nilotica
v  Found:  Sterna forsteri
x  Not Found:  Vermivora chrysoptera x Vermivora pinus
v  Found:  Sterna dougallii
v  Found:  Laterallus jamaicensis
══  Results  ═════════════════

* Total: 233 
* Found: 223 
* Not Found: 10
Code
tictoc::toc() # 179.11 sec elapsed
204.73 sec elapsed
Code
# Extract codes and bind to dataframe
IUCN <- 
  lapply(names(IUCN.list), function(name) {
  item <- IUCN.list[[name]]

  # Check if the element is a list and contains `red_list_category`
  if (is.list(item) && 
      !is.null(item$red_list_category) && 
      !is.null(item$red_list_category$code)) {
    
    return(
      data.frame(
        name = name, 
        code = item$red_list_category$code, 
        stringsAsFactors = FALSE)
      )
    
  } else {
    return(
      data.frame(
        name = name, 
        code = NA, 
        stringsAsFactors = FALSE)
      ) # Fill missing with NA
  }
}) %>% 
  do.call(rbind, .) # bind rows to dataframe

# Check NA species: 10 species not found
species_with_NA <-
  IUCN %>% 
  filter(is.na(code))

# try again with those that were not found:
IUCN.list2 <-
  suppressMessages(iucn_summary(unique(species_with_NA$name),
    distr.detail = F,
    key =my_key
  ))
══  10 queries  ══════════════
v  Found:  Ardea herodias
v  Found:  Buteo platypterus
v  Found:  Cardinalis cardinalis
v  Found:  Aythya collaris
v  Found:  Gavia immer
x  Not Found:  Nannopterum auritus
v  Found:  Setophaga palmarum
v  Found:  Catharus bicknelli
x  Not Found:  Anas platyrhynchos x Anas rubripes
x  Not Found:  Vermivora chrysoptera x Vermivora pinus
══  Results  ═════════════════

* Total: 10 
* Found: 7 
* Not Found: 3
Code
# 3 species not found of which 2 are hybrids and Nannopterium auritus which we will look up manually and assign the status by hand

IUCN2 <- lapply(names(IUCN.list2), function(name) {
  item <- IUCN.list2[[name]]

  # Check if the element is a list and contains `red_list_category`
  if (is.list(item) && 
      !is.null(item$red_list_category) && 
      !is.null(item$red_list_category$code)) {
    return(
      data.frame(
        name = name, 
        code = item$red_list_category$code, #
        stringsAsFactors = FALSE)
      )
  } else {
    return(
      data.frame(
        name = name, 
        code = NA, 
        stringsAsFactors = FALSE)
      ) # Fill missing with NA
  }
}) %>% 
  do.call(rbind, .)


# Merge Results 1 and Results 2
IUCN_merged <- 
  rbind(IUCN %>% na.omit(), IUCN2) %>%
  unique() %>%
  # assign code to Nannopterum auritus by hand - because it is on IUCN.
  mutate(code = case_when(name == "Nannopterum auritus" ~ "LC",
    .default = code
  ))

# checks: only the two hybrids left
IUCN_merged %>% 
  filter(is.na(code))
                                        name code
910       Anas platyrhynchos x Anas rubripes <NA>
1010 Vermivora chrysoptera x Vermivora pinus <NA>
Code
# Reshape
IUCN_df <-
  as.data.frame(IUCN_merged,
    row.names = c(IUCN_merged$name)
  ) %>%
  tibble::rownames_to_column(var = "scientificName") %>%
  right_join(sci_name) %>%
  distinct(verbatimIdentification, scientificName, code)

table(IUCN_df$code)

 EN  LC  NT  VU 
  3 207  15   6 
Code
rm(list = setdiff(ls(),c("data_sf_path","tax_path","sp_data_path","sci_name","traits_merged", "pd", "IUCN_df")))

Range geometry

Code
# Functions:
## Normalized circularity function (written by Gabriel Ortega Solís)
circNorm <- function(x){
    perimeter <- perim(x)
    area <- expanse(x)
    res <- (perimeter^2) / (4 * pi * area)
    return(res)
}

## Smallest distance from the range centroid to the atlas border
minDist <- function(centroid, borders){
  res <- min(
    distance(
      crds(centroid), 
      crds(borders), 
      lonlat = F)
    )
  return(res)
}

# s2 settings:
sf_use_s2(FALSE) # we use planar projections (i.e., in metres) so this can be turned off
Spherical geometry (s2) switched off
Code
#----------------------------------------------------------#
# Load data -----
#----------------------------------------------------------#

data_sf <-
  readRDS(data_sf_path) %>%
  filter(scalingID == 1 & 
           cells_keep == 1) 

#----------------------------------------------------------#
# Atlas geometries  -----
#----------------------------------------------------------#

sf_current_atlas <- 
  data_sf %>% 
  filter(samplingPeriodID == 1) %>%
  select(geometry) %>%
  st_as_sf() %>%
  summarise(geometry = st_union(geometry), .groups = "drop") %>%
  st_transform(crs = unique(data_sf$verbatimFootprintSRS)) %>%
  st_make_valid()
although coordinates are longitude/latitude, st_union assumes that they are
planar
Code
plot(sf_current_atlas)

Code
# vectorize atlas
terra_current_atlas <- vect(sf_current_atlas)

plot(terra_current_atlas)

Code
# Store borders as lines
atlas_border_lines <- terra::as.lines(terra_current_atlas)

#----------------------------------------------------------#
# Single species range geometries  -----
#----------------------------------------------------------#

grouped_list <- 
  data_sf %>% 
  group_by(samplingPeriodID, verbatimIdentification) %>%
  filter(species_keep == 1) %>%
  group_split()

tictoc::tic()
range_geometries <- map_dfr(grouped_list, function(dta) {
  
  sp_range <- 
    dta %>%
      dplyr::select(geometry, siteID, datasetID) %>%
      st_transform(crs = st_crs(atlas_border_lines)) %>%
      summarise() %>%
      terra::vect()
  
  sp_range_centroid <- 
    centroids(sp_range)
  
  # checks
  # plot(sp_range); plot(sp_range_centroid, add = T, col = "red")
  
  # Compute range attributes
  tibble(
    datasetID = unique(dta$datasetID),
    samplingPeriodID = unique(dta$samplingPeriodID),
    verbatimIdentification = unique(dta$verbatimIdentification),
    circNorm = circNorm(sp_range), 
    minDist_toBorder_centr = minDist(sp_range_centroid, atlas_border_lines)
    )
  
})
tictoc::toc() # 108.94 sec elapsed
118.28 sec elapsed
Code
# checks:
range_geometries %>% 
  skimr::skim()
Data summary
Name Piped data
Number of rows 466
Number of columns 5
_______________________
Column type frequency:
character 1
numeric 4
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
verbatimIdentification 0 1 9 32 0 233 0

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
datasetID 0 1 6.00 0.00 6.00 6.00 6.00 6.00 6.00 ▁▁▇▁▁
samplingPeriodID 0 1 1.50 0.50 1.00 1.00 1.50 2.00 2.00 ▇▁▁▁▇
circNorm 0 1 172.32 173.49 1.27 30.19 116.39 269.71 769.29 ▇▂▂▁▁
minDist_toBorder_centr 0 1 34788.29 17516.27 1152.08 24545.67 35029.02 44744.41 93733.18 ▅▇▆▂▁
Code
rm(list = setdiff(ls(),c("data_sf_path","tax_path","sp_data_path","sci_name","traits_merged", "pd", "IUCN_df", "range_geometries")))

Spatial autocorrelation

We calculate the difference in expected and observed join counts for presences across the atlas for each species and sampling period. This is done using the joincount.test() function from the spdep package. The join count test is a measure of spatial autocorrelation that counts the number of pairs of neighboring sites that have the same value (in this case, presence or absence of a species). In the calculation of delat join count, we follow still unpublished work by Soria et al. (2025) [5]

Code
# CRS
ny_crs <-"epsg:32118"  #verbatimFootprintSRS (NAD83 / New York Long Island)

# Read data
data_sf <- 
  readRDS(data_sf_path) %>%
  filter(scalingID == 1 &
           cells_keep == 1 &
           species_keep == 1) %>%
  st_transform(crs = ny_crs)


# calculate join count across samplingPeriods for each species
sampling_periods <- c(1,2)

# Safe versions of spatial autocorrelation functions
safe_joincount_test <- possibly(function(x, nb) joincount.test(as.factor(x), nb2listw(nb, style = "B", zero.policy = TRUE)), otherwise = NA)

tictoc::tic()
joincount_df <- map_dfr(sampling_periods, function(sampling_period){
  tp_subset <- 
    data_sf %>%
    filter(samplingPeriodID == sampling_period)
  
  sp_names <- 
    tp_subset %>% 
    st_drop_geometry() %>% 
    distinct(verbatimIdentification) %>% 
    pull() 
  
  all_sites <- 
    data_sf %>%
    select(siteID, geometry) %>%
    unique()
  
  map_dfr(sp_names, function(sp_name){
    
    sp_data <- 
      tp_subset %>%
      st_drop_geometry() %>%
      filter(verbatimIdentification == sp_name) %>%
      full_join(all_sites, by = "siteID") %>%
      mutate(
        presence = if_else(is.na(verbatimIdentification), 0, 1),
        verbatimIdentification = sp_name) %>%
      st_as_sf()
    
    # checks:
    # plot(sp_data["presence"])
    
    # Count the number of presences
    num_pres <- sum(sp_data$presence, na.rm = TRUE)
    
    # Creating neighbors (queen)
    nb_q <- spdep::poly2nb(sp_data, queen = TRUE)

    # Join count test
    jc_res_q <- safe_joincount_test(sp_data$presence, nb_q)
    
    # Save results in a dataframe
    tibble(
      samplingPeriodID = sampling_period,
      verbatimIdentification = sp_name,
      presence_n = num_pres,
      joincount_statistic = ifelse(is.na(jc_res_q), NA,
                                   jc_res_q[[2]]$estimate[1])[[1]],
      joincount_expectation = ifelse(is.na(jc_res_q), NA,
                                     jc_res_q[[2]]$estimate[2])[[1]],
      joincount_p_val = ifelse(is.na(jc_res_q), NA,
                               jc_res_q[[2]]$p.value)[[1]],
      joincount_delta = ifelse(is.na(jc_res_q), NA,
                               ((jc_res_q[[2]]$estimate[1] /
                                   num_pres) -
                                  (jc_res_q[[2]]$estimate[2] /
                                     num_pres)))[[1]]
      ) %>%
      unique()

    
  })
  
})

tictoc::toc() # 1376.94 sec elapsed
1267.75 sec elapsed
Code
# checks:
joincount_df %>%
  skimr::skim()
Data summary
Name Piped data
Number of rows 466
Number of columns 7
_______________________
Column type frequency:
character 1
numeric 6
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
verbatimIdentification 0 1 9 32 0 233 0

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
samplingPeriodID 0 1.00 1.50 0.50 1.00 1.00 1.50 2.00 2.00 ▇▁▁▁▇
presence_n 0 1.00 1550.62 1729.79 1.00 70.50 700.00 2993.75 5229.00 ▇▂▁▂▂
joincount_statistic 0 1.00 4667.40 6162.95 0.00 59.25 1024.50 8634.75 19930.00 ▇▁▁▁▁
joincount_expectation 0 1.00 3908.73 5845.46 0.00 3.55 354.92 6499.22 19829.46 ▇▁▁▁▁
joincount_p_val 11 0.98 0.02 0.11 0.00 0.00 0.00 0.00 0.62 ▇▁▁▁▁
joincount_delta 0 1.00 0.68 0.48 -0.01 0.30 0.59 1.01 2.16 ▇▆▅▂▁
Code
rm(list = setdiff(ls(),c("data_sf_path","tax_path","sp_data_path","sci_name","traits_merged", "pd", "IUCN_df", "range_geometries", "joincount_df")))

Lacunarity

Code
# Load required packages
package_list <-
  c("here", "sf", "tidyverse", "purrr", "tictoc", "terra")
x <-
  lapply(package_list, require, character.only = TRUE)
rm(x)

# turn spherical geometry off (we are working with planar proj. in meters)
sf_use_s2(FALSE) # we use data in meters

# Original CRS
ny_crs <-"epsg:32118"  #verbatimFootprintSRS (NAD83 / New York Long Island)

# Read in sf data
data_sf <- 
  readRDS(data_sf_path) %>% 
  filter(scalingID == 1) %>%
  st_transform(crs = ny_crs)
Code
# Prepare country template for rasterizing ----
resolution <- c(5000, 5000) # in metres

bbox_ny <- 
  data_sf %>% 
  select(siteID, geometry) %>%
  unique() %>% 
  st_bbox()

country_border <-   
  data_sf %>% 
  select(siteID, geometry) %>%
  unique() %>%
  st_union() %>%
  vect()

# create masked template for rasterizing based on NY outlines
template <- 
  rast(
    ext(bbox_ny$xmin, bbox_ny$xmax, bbox_ny$ymin, bbox_ny$ymax),
    resolution = resolution,
    crs = ny_crs)
values(template) <- 0
masked_template <- mask(template, country_border)

# check:
plot(masked_template)

Code
# Prepare species data ----

unsampled_sites_expanded <- 
  data_sf %>%
  filter(cell_sampling_repeats == 0) %>%
  distinct(siteID, geometry) %>%
  tidyr::expand_grid(samplingPeriodID = c(1,2)) %>%
  mutate(verbatimIdentification = NA)

# combine with sampled cells
data_sf_with_unsampled <- 
  bind_rows(
    data_sf %>%
      filter(cell_sampling_repeats == 2 | !is.na(verbatimIdentification)),
    unsampled_sites_expanded
  )

# get filtered species list
species_list <- 
  sci_name %>% 
  pull(verbatimIdentification) %>% 
  unique()

# rasterize ranges

raster_list <- replicate(2, list())
for (tp in seq_along(c(1,2))){
  
  raster_list[[tp]] <- list()
  
  data_tp_i <- 
    data_sf_with_unsampled %>%
    filter(samplingPeriodID == tp)
  
  for (sp_name in species_list){
    
    this_data <- 
      data_tp_i %>%
      filter(verbatimIdentification == sp_name & samplingPeriodID == tp) %>%
      st_as_sf() %>%
      st_transform(crs = ny_crs) %>%
      mutate(
        presence = 1)
  
  r <- 
    rasterize(this_data, 
              masked_template, 
              field = "presence", 
              update = TRUE)
    
  raster_list[[tp]][[sp_name]] <- r
    
  }
}

# example plot
plot(r)

Code
rm(list = setdiff(ls(),c("data_sf_path","tax_path","sp_data_path","sci_name","traits_merged", "pd", "IUCN_df", "range_geometries", "joincount_df", "raster_list")))
gc()
          used  (Mb) gc trigger   (Mb)  max used   (Mb)
Ncells 5340502 285.3   20765147 1109.0  25956433 1386.3
Vcells 8382310  64.0  111065255  847.4 138831568 1059.3
Code
# Calculate lacunarity across sub-lists
#----------------------------------------------------------#
# Function to run terra::focal() for multiple window-sizes -----
#----------------------------------------------------------#
apply_focal_multiple_w <- function(r, w_vec) {

  results <- lapply(w_vec, function(w) {
    f <- focal(r, w = w, fun = sum, na.policy="omit", na.rm=TRUE)

    # n_S_r = number of filled cells in the box (of size r)
    # Q_S_r = normalized frequency distribution of S for box size r

    box_masses_int <- as.integer(values(f, na.rm = T))
    max_value <- max(box_masses_int, na.rm = TRUE)

    # initialize empty vector (add +1 to enable indexing with 0 values)
    n_S_r <- integer(max_value + 1L)
    for (val in box_masses_int) {
      if (val >= 0 && val <= max_value) {
        n_S_r[val + 1L] <- n_S_r[val + 1L] + 1
      }
    }
    Q_S_r <- n_S_r / length(box_masses_int[!is.na(box_masses_int)])
    S_vals <- seq_along(Q_S_r) - 1
    Z_1 <- sum(S_vals * Q_S_r) #first moment = mean
    Z_2 <- sum((S_vals^2) * Q_S_r) # second moment = variance (mean of squared values)

    if (Z_1 == 0) {
      return(NA_real_)
    }
    lac <- Z_2 / (Z_1 * Z_1)

    return(lac)
  })


  names(results) <- paste0("w=", w_vec)
 r_name <- sp_name
  message(r_name, ": done")
  # lac_res <- data.frame(lac = do.call(rbind, results), w = w_vec, species = r_name)
  # return(lac_res)


  this_out <- dplyr::tibble(name = rep(r_name, length(results)),
                            r = w_vec, "ln(r)" = log(w_vec),
                            Lac = results %>% do.call(rbind,.) %>% .[,1], "ln(Lac)" = log(results %>% do.call(rbind,.) %>% .[,1]))
  return(this_out)
}
Code
#----------------------------------------------------------#
# Run focal function across multiple window-sizes -----
#----------------------------------------------------------#

w_vec <- c(3L, 5L, 9L, 17L, 33L) # window sizes

tictoc::tic()
res_list <- replicate(2, list())

for (tp_i in seq_along(raster_list)){
  res_list[[tp_i]] <- list()
  for(sp_i in seq_along(raster_list[[tp_i]])){
    
    sp_name <- names(raster_list[[tp_i]])[sp_i]
    r <- raster_list[[tp_i]][[sp_i]]
    test_res <- apply_focal_multiple_w(r, w_vec)
    
    # add species info
    test_res$samplingPeriodID <- tp_i
    test_res$verbatimIdentification <- sp_name
    
    
    res_list[[tp_i]][[sp_i]] <- test_res
  }
}
Baeolophus bicolor: done
Junco hyemalis: done
Corvus brachyrhynchos: done
Cathartes aura: done
Toxostoma rufum: done
Icterus galbula: done
Dumetella carolinensis: done
Piranga olivacea: done
Dendroica pensylvanica: done
Tyrannus tyrannus: done
Troglodytes aedon: done
Setophaga ruticilla: done
Hylocichla mustelina: done
Catharus fuscescens: done
Vireo gilvus: done
Carpodacus mexicanus: done
Buteo jamaicensis: done
Cyanocitta cristata: done
Colaptes auratus: done
Turdus migratorius: done
Mimus polyglottos: done
Empidonax alnorum: done
Pheucticus ludovicianus: done
Seiurus aurocapilla: done
Pipilo erythrophthalmus: done
Melospiza georgiana: done
Vireo olivaceus: done
Empidonax minimus: done
Hirundo rustica: done
Tachycineta bicolor: done
Dryocopus pileatus: done
Vermivora pinus: done
Quiscalus quiscula: done
Dendroica petechia: done
Passerina cyanea: done
Empidonax traillii: done
Progne subis: done
Dolichonyx oryzivorus: done
Sturnella magna: done
Columba livia: done
Megascops asio: done
Spizella pusilla: done
Picoides pubescens: done
Empidonax virescens: done
Geothlypis trichas: done
Dendroica fusca: done
Passerculus sandwichensis: done
Contopus virens: done
Dendroica magnolia: done
Sphyrapicus varius: done
Molothrus ater: done
Sayornis phoebe: done
Wilsonia citrina: done
Myiarchus crinitus: done
Carpodacus purpureus: done
Coccyzus americanus: done
Seiurus motacilla: done
Sitta canadensis: done
Zenaida macroura: done
Oporornis philadelphia: done
Megaceryle alcyon: done
Chaetura pelagica: done
Riparia riparia: done
Spinus tristis: done
Poecile atricapillus: done
Melospiza melodia: done
Dendroica caerulescens: done
Dendroica coronata: done
Spizella passerina: done
Melanerpes carolinus: done
Sialia sialis: done
Falco sparverius: done
Certhia americana: done
Sitta carolinensis: done
Charadrius vociferus: done
Archilochus colubris: done
Meleagris gallopavo: done
Dendroica virens: done
Agelaius phoeniceus: done
Aix sponsa: done
Butorides virescens: done
Branta canadensis: done
Ardea herodias: done
Buteo platypterus: done
Cardinalis cardinalis: done
Bonasa umbellus: done
Anas platyrhynchos: done
Bombycilla cedrorum: done
Picoides villosus: done
Vireo solitarius: done
Icterus spurius: done
Accipiter cooperii: done
Seiurus noveboracensis: done
Troglodytes troglodytes: done
Catharus guttatus: done
Coccyzus erythropthalmus: done
Stelgidopteryx serripennis: done
Buteo lineatus: done
Accipiter gentilis: done
Gallinago delicata: done
Protonotaria citrea: done
Scolopax minor: done
Accipiter striatus: done
Actitis macularius: done
Anas rubripes: done
Fulica americana: done
Circus cyaneus: done
Petrochelidon pyrrhonota: done
Bubo virginianus: done
Melanerpes erythrocephalus: done
Zonotrichia albicollis: done
Thryothorus ludovicianus: done
Ammodramus henslowii: done
Lophodytes cucullatus: done
Strix varia: done
Coccothraustes vespertinus: done
Haliaeetus leucocephalus: done
Wilsonia canadensis: done
Spinus pinus: done
Pooecetes gramineus: done
Bartramia longicauda: done
Mniotilta varia: done
Porzana carolina: done
Colinus virginianus: done
Polioptila caerulea: done
Ammodramus savannarum: done
Eremophila alpestris: done
Mergus merganser: done
Anas discors: done
Cistothorus palustris: done
Rallus limicola: done
Cistothorus platensis: done
Ixobrychus exilis: done
Vermivora ruficapilla: done
Vireo griseus: done
Vireo flavifrons: done
Regulus satrapa: done
Corvus corax: done
Chordeiles minor: done
Catharus ustulatus: done
Parula americana: done
Podilymbus podiceps: done
Pandion haliaetus: done
Oxyura jamaicensis: done
Botaurus lentiginosus: done
Aythya collaris: done
Gavia immer: done
Dendroica pinus: done
Spizella pallida: done
Spiza americana: done
Dendroica cerulea: done
Gallinula chloropus: done
Icteria virens: done
Anas crecca: done
Larus marinus: done
Hydropogne caspia: done
Vermivora chrysoptera: done
Larus argentatus: done
Sterna hirundo: done
Asio otus: done
Anas strepera: done
Anas americana: done
Dendroica discolor: done
Aegolius acadicus: done
Tyto alba: done
Oporornis formosus: done
Rallus elegans: done
Regulus calendula: done
Falco peregrinus: done
Larus delawarensis: done
Phalacrocorax auritus: done
Ardea alba: done
Nycticorax nycticorax: done
Aquila chrysaetos: done
Egretta thula: done
Anas clypeata: done
Caprimulgus vociferus: done
Asio flammeus: done
Dendroica dominica: done
Helmitheros vermivorum: done
Chlidonias niger: done
Loxia curvirostra: done
Anas acuta: done
Aythya americana: done
Aythya affinis: done
Lanius ludovicianus: done
Sturnella neglecta: done
Loxia leucoptera: done
Corvus ossifragus: done
Mergus serrator: done
Bucephala clangula: done
Aythya valisineria: done
Bubulcus ibis: done
Charadrius melodus: done
Contopus cooperi: done
Empidonax flaviventris: done
Vireo philadelphicus: done
Melospiza lincolnii: done
Euphagus carolinus: done
Poecile hudsonicus: done
Picoides arcticus: done
Picoides dorsalis: done
Dendroica castanea: done
Dendroica striata: done
Perisoreus canadensis: done
Vermivora peregrina: done
Falcipennis canadensis: done
Dendroica palmarum: done
Catharus bicknelli: done
Dendroica tigrina: done
Passerina caerulea: done
Wilsonia pusilla: done
Ammodramus caudacutus: done
Rallus longirostris: done
Quiscalus major: done
Caprimulgus carolinensis: done
Leucophaeus atricilla: done
Haematopus palliatus: done
Plegadis falcinellus: done
Tringa semipalmata: done
Nyctanassa violacea: done
Egretta caerulea: done
Ammodramus maritimus: done
Egretta tricolor: done
Piranga rubra: done
Anas platyrhynchos x A. rubripes: done
Sternula antillarum: done
Rynchops niger: done
Gelochelidon nilotica: done
Sterna forsteri: done
Vermivora chrysoptera x V. pinus: done
Sterna dougallii: done
Laterallus jamaicensis: done
Baeolophus bicolor: done
Junco hyemalis: done
Corvus brachyrhynchos: done
Cathartes aura: done
Toxostoma rufum: done
Icterus galbula: done
Dumetella carolinensis: done
Piranga olivacea: done
Dendroica pensylvanica: done
Tyrannus tyrannus: done
Troglodytes aedon: done
Setophaga ruticilla: done
Hylocichla mustelina: done
Catharus fuscescens: done
Vireo gilvus: done
Carpodacus mexicanus: done
Buteo jamaicensis: done
Cyanocitta cristata: done
Colaptes auratus: done
Turdus migratorius: done
Mimus polyglottos: done
Empidonax alnorum: done
Pheucticus ludovicianus: done
Seiurus aurocapilla: done
Pipilo erythrophthalmus: done
Melospiza georgiana: done
Vireo olivaceus: done
Empidonax minimus: done
Hirundo rustica: done
Tachycineta bicolor: done
Dryocopus pileatus: done
Vermivora pinus: done
Quiscalus quiscula: done
Dendroica petechia: done
Passerina cyanea: done
Empidonax traillii: done
Progne subis: done
Dolichonyx oryzivorus: done
Sturnella magna: done
Columba livia: done
Megascops asio: done
Spizella pusilla: done
Picoides pubescens: done
Empidonax virescens: done
Geothlypis trichas: done
Dendroica fusca: done
Passerculus sandwichensis: done
Contopus virens: done
Dendroica magnolia: done
Sphyrapicus varius: done
Molothrus ater: done
Sayornis phoebe: done
Wilsonia citrina: done
Myiarchus crinitus: done
Carpodacus purpureus: done
Coccyzus americanus: done
Seiurus motacilla: done
Sitta canadensis: done
Zenaida macroura: done
Oporornis philadelphia: done
Megaceryle alcyon: done
Chaetura pelagica: done
Riparia riparia: done
Spinus tristis: done
Poecile atricapillus: done
Melospiza melodia: done
Dendroica caerulescens: done
Dendroica coronata: done
Spizella passerina: done
Melanerpes carolinus: done
Sialia sialis: done
Falco sparverius: done
Certhia americana: done
Sitta carolinensis: done
Charadrius vociferus: done
Archilochus colubris: done
Meleagris gallopavo: done
Dendroica virens: done
Agelaius phoeniceus: done
Aix sponsa: done
Butorides virescens: done
Branta canadensis: done
Ardea herodias: done
Buteo platypterus: done
Cardinalis cardinalis: done
Bonasa umbellus: done
Anas platyrhynchos: done
Bombycilla cedrorum: done
Picoides villosus: done
Vireo solitarius: done
Icterus spurius: done
Accipiter cooperii: done
Seiurus noveboracensis: done
Troglodytes troglodytes: done
Catharus guttatus: done
Coccyzus erythropthalmus: done
Stelgidopteryx serripennis: done
Buteo lineatus: done
Accipiter gentilis: done
Gallinago delicata: done
Protonotaria citrea: done
Scolopax minor: done
Accipiter striatus: done
Actitis macularius: done
Anas rubripes: done
Fulica americana: done
Circus cyaneus: done
Petrochelidon pyrrhonota: done
Bubo virginianus: done
Melanerpes erythrocephalus: done
Zonotrichia albicollis: done
Thryothorus ludovicianus: done
Ammodramus henslowii: done
Lophodytes cucullatus: done
Strix varia: done
Coccothraustes vespertinus: done
Haliaeetus leucocephalus: done
Wilsonia canadensis: done
Spinus pinus: done
Pooecetes gramineus: done
Bartramia longicauda: done
Mniotilta varia: done
Porzana carolina: done
Colinus virginianus: done
Polioptila caerulea: done
Ammodramus savannarum: done
Eremophila alpestris: done
Mergus merganser: done
Anas discors: done
Cistothorus palustris: done
Rallus limicola: done
Cistothorus platensis: done
Ixobrychus exilis: done
Vermivora ruficapilla: done
Vireo griseus: done
Vireo flavifrons: done
Regulus satrapa: done
Corvus corax: done
Chordeiles minor: done
Catharus ustulatus: done
Parula americana: done
Podilymbus podiceps: done
Pandion haliaetus: done
Oxyura jamaicensis: done
Botaurus lentiginosus: done
Aythya collaris: done
Gavia immer: done
Dendroica pinus: done
Spizella pallida: done
Spiza americana: done
Dendroica cerulea: done
Gallinula chloropus: done
Icteria virens: done
Anas crecca: done
Larus marinus: done
Hydropogne caspia: done
Vermivora chrysoptera: done
Larus argentatus: done
Sterna hirundo: done
Asio otus: done
Anas strepera: done
Anas americana: done
Dendroica discolor: done
Aegolius acadicus: done
Tyto alba: done
Oporornis formosus: done
Rallus elegans: done
Regulus calendula: done
Falco peregrinus: done
Larus delawarensis: done
Phalacrocorax auritus: done
Ardea alba: done
Nycticorax nycticorax: done
Aquila chrysaetos: done
Egretta thula: done
Anas clypeata: done
Caprimulgus vociferus: done
Asio flammeus: done
Dendroica dominica: done
Helmitheros vermivorum: done
Chlidonias niger: done
Loxia curvirostra: done
Anas acuta: done
Aythya americana: done
Aythya affinis: done
Lanius ludovicianus: done
Sturnella neglecta: done
Loxia leucoptera: done
Corvus ossifragus: done
Mergus serrator: done
Bucephala clangula: done
Aythya valisineria: done
Bubulcus ibis: done
Charadrius melodus: done
Contopus cooperi: done
Empidonax flaviventris: done
Vireo philadelphicus: done
Melospiza lincolnii: done
Euphagus carolinus: done
Poecile hudsonicus: done
Picoides arcticus: done
Picoides dorsalis: done
Dendroica castanea: done
Dendroica striata: done
Perisoreus canadensis: done
Vermivora peregrina: done
Falcipennis canadensis: done
Dendroica palmarum: done
Catharus bicknelli: done
Dendroica tigrina: done
Passerina caerulea: done
Wilsonia pusilla: done
Ammodramus caudacutus: done
Rallus longirostris: done
Quiscalus major: done
Caprimulgus carolinensis: done
Leucophaeus atricilla: done
Haematopus palliatus: done
Plegadis falcinellus: done
Tringa semipalmata: done
Nyctanassa violacea: done
Egretta caerulea: done
Ammodramus maritimus: done
Egretta tricolor: done
Piranga rubra: done
Anas platyrhynchos x A. rubripes: done
Sternula antillarum: done
Rynchops niger: done
Gelochelidon nilotica: done
Sterna forsteri: done
Vermivora chrysoptera x V. pinus: done
Sterna dougallii: done
Laterallus jamaicensis: done
Code
tictoc::toc() # 96.32 sec elapsed
37.42 sec elapsed
Code
df_res1 <- do.call(rbind, res_list[[1]])
df_res2 <- do.call(rbind, res_list[[2]])

lac_results <- 
  rbind(df_res1, df_res2) %>% 
  select(-name) %>%
  as.data.frame()

names(lac_results) <- c("r", "ln(r)", "lac", "ln(lac)", "samplingPeriodID", "verbatimIdentification")


# Calculate mean across increasing window sizes
mean_lac <- lac_results %>%
  ungroup() %>%
  group_by(samplingPeriodID, verbatimIdentification) %>%
  summarize(mean_lnLac = mean(`ln(lac)`, na.rm = TRUE), .groups = "keep") 


hist(mean_lac$mean_lnLac, breaks = 30, 
     main = "Lacunarity distribution", 
     xlab = "Mean Lacunarity (ln)")

Code
rm(list = setdiff(ls(),c("data_sf_path","tax_path","sp_data_path","sci_name","traits_merged", "pd", "IUCN_df", "range_geometries", "joincount_df", "mean_lac")))
gc()
          used  (Mb) gc trigger  (Mb)  max used   (Mb)
Ncells 5379794 287.4   16612118 887.2  25956433 1386.3
Vcells 8384431  64.0   88852204 677.9 138831568 1059.3

Global range size

We again use the BirdLife range polygons [6], which can be downloaded from here. We use the Mollweide projection to calculate the global range size in km². The BirdLife shapefile is in WGS84, so we transform it to Mollweide before calculating the area.

Code
# planar CRS
Mollweide_CRS <- 
'PROJCS["ProjWiz_Custom_Mollweide",
 GEOGCS["GCS_WGS_1984",
  DATUM["D_WGS_1984",
   SPHEROID["WGS_1984",6378137.0,298.257223563]],
  PRIMEM["Greenwich",0.0],
  UNIT["Degree",0.0174532925199433]],
 PROJECTION["Mollweide"],
 PARAMETER["False_Easting",0.0],
 PARAMETER["False_Northing",0.0],
 PARAMETER["Central_Meridian",0],
 UNIT["Meter",1.0]]'

sf_use_s2(FALSE)

# Taxonomic match table
Tax <- 
  sci_name # Taxa

# Global ranges
tictoc::tic()
BirdLife_split <- 
  st_read(here("Data/input/shp_global/BirdLife.shp"), quiet = TRUE)%>% 
  filter(sci_name %in% c(Tax$scientificName)) %>%
  st_transform(crs = Mollweide_CRS)%>%
  group_by(sci_name) %>%
  st_make_valid() %>% # takes a while...
  group_by(sci_name) %>% 
  group_split()
  

# loop through list and sum up the combined range areas for each species
range_size_df <- 
  map_dfr(BirdLife_split, function(this_species){
    
    message(paste0(unique(this_species$sci_name), "... done!"))
  
    this_species %>%
      group_by(sci_name) %>%
      summarise(.groups = "keep") %>%
      # Convert to km²
      group_by(sci_name) %>%
      mutate(GlobRangeSize_km2 = as.numeric(st_area(.))/ 1e6) %>%  
      st_drop_geometry()%>%
      rename(scientificName = sci_name)
  
    
    })
Accipiter cooperii... done!
Accipiter gentilis... done!
Accipiter striatus... done!
Actitis macularius... done!
Aegolius acadicus... done!
Agelaius phoeniceus... done!
Aix sponsa... done!
Ammodramus savannarum... done!
Ammospiza caudacuta... done!
Ammospiza maritima... done!
Anas acuta... done!
Anas crecca... done!
Anas platyrhynchos... done!
Anas rubripes... done!
Antrostomus carolinensis... done!
Antrostomus vociferus... done!
Aquila chrysaetos... done!
Archilochus colubris... done!
Ardea alba... done!
Ardea herodias... done!
Asio flammeus... done!
Asio otus... done!
Aythya affinis... done!
Aythya americana... done!
Aythya collaris... done!
Aythya valisineria... done!
Baeolophus bicolor... done!
Bartramia longicauda... done!
Bombycilla cedrorum... done!
Bonasa umbellus... done!
Botaurus lentiginosus... done!
Branta canadensis... done!
Bubo virginianus... done!
Bubulcus ibis... done!
Bucephala clangula... done!
Buteo jamaicensis... done!
Buteo lineatus... done!
Buteo platypterus... done!
Butorides striata... done!
Canachites canadensis... done!
Cardellina canadensis... done!
Cardellina pusilla... done!
Cardinalis cardinalis... done!
Cathartes aura... done!
Catharus bicknelli... done!
Catharus fuscescens... done!
Catharus guttatus... done!
Catharus ustulatus... done!
Certhia americana... done!
Chaetura pelagica... done!
Charadrius melodus... done!
Charadrius vociferus... done!
Chlidonias niger... done!
Chordeiles minor... done!
Circus cyaneus... done!
Cistothorus palustris... done!
Cistothorus platensis... done!
Coccyzus americanus... done!
Coccyzus erythropthalmus... done!
Colaptes auratus... done!
Colinus virginianus... done!
Columba livia... done!
Contopus cooperi... done!
Contopus virens... done!
Corthylio calendula... done!
Corvus brachyrhynchos... done!
Corvus corax... done!
Corvus ossifragus... done!
Cyanocitta cristata... done!
Dolichonyx oryzivorus... done!
Dryobates pubescens... done!
Dumetella carolinensis... done!
Egretta caerulea... done!
Egretta thula... done!
Egretta tricolor... done!
Empidonax alnorum... done!
Empidonax flaviventris... done!
Empidonax minimus... done!
Empidonax traillii... done!
Empidonax virescens... done!
Eremophila alpestris... done!
Euphagus carolinus... done!
Falco peregrinus... done!
Falco sparverius... done!
Fulica americana... done!
Gallinago delicata... done!
Gallinula chloropus... done!
Gavia immer... done!
Gelochelidon nilotica... done!
Geothlypis formosa... done!
Geothlypis philadelphia... done!
Geothlypis trichas... done!
Haematopus palliatus... done!
Haemorhous mexicanus... done!
Haemorhous purpureus... done!
Haliaeetus leucocephalus... done!
Helmitheros vermivorum... done!
Hesperiphona vespertina... done!
Hirundo rustica... done!
Hydroprogne caspia... done!
Hylatomus pileatus... done!
Hylocichla mustelina... done!
Icteria virens... done!
Icterus galbula... done!
Icterus spurius... done!
Ixobrychus exilis... done!
Junco hyemalis... done!
Lanius ludovicianus... done!
Larus argentatus... done!
Larus atricilla... done!
Larus delawarensis... done!
Larus marinus... done!
Laterallus jamaicensis... done!
Leiothlypis peregrina... done!
Leiothlypis ruficapilla... done!
Leuconotopicus villosus... done!
Lophodytes cucullatus... done!
Loxia curvirostra... done!
Loxia leucoptera... done!
Mareca americana... done!
Mareca strepera... done!
Megaceryle alcyon... done!
Megascops asio... done!
Melanerpes carolinus... done!
Melanerpes erythrocephalus... done!
Meleagris gallopavo... done!
Melospiza georgiana... done!
Melospiza lincolnii... done!
Melospiza melodia... done!
Mergus merganser... done!
Mergus serrator... done!
Mimus polyglottos... done!
Mniotilta varia... done!
Molothrus ater... done!
Myiarchus crinitus... done!
Nannopterum auritus... done!
Nyctanassa violacea... done!
Nycticorax nycticorax... done!
Oxyura jamaicensis... done!
Pandion haliaetus... done!
Parkesia motacilla... done!
Parkesia noveboracensis... done!
Passerculus henslowii... done!
Passerculus sandwichensis... done!
Passerina caerulea... done!
Passerina cyanea... done!
Perisoreus canadensis... done!
Petrochelidon pyrrhonota... done!
Pheucticus ludovicianus... done!
Picoides arcticus... done!
Picoides tridactylus... done!
Pipilo erythrophthalmus... done!
Piranga olivacea... done!
Piranga rubra... done!
Plegadis falcinellus... done!
Podilymbus podiceps... done!
Poecile atricapillus... done!
Poecile hudsonicus... done!
Polioptila caerulea... done!
Pooecetes gramineus... done!
Porzana carolina... done!
Progne subis... done!
Protonotaria citrea... done!
Quiscalus major... done!
Quiscalus quiscula... done!
Rallus elegans... done!
Rallus limicola... done!
Rallus longirostris... done!
Regulus satrapa... done!
Riparia riparia... done!
Rynchops niger... done!
Sayornis phoebe... done!
Scolopax minor... done!
Seiurus aurocapilla... done!
Setophaga americana... done!
Setophaga caerulescens... done!
Setophaga castanea... done!
Setophaga cerulea... done!
Setophaga citrina... done!
Setophaga coronata... done!
Setophaga discolor... done!
Setophaga dominica... done!
Setophaga fusca... done!
Setophaga magnolia... done!
Setophaga palmarum... done!
Setophaga pensylvanica... done!
Setophaga petechia... done!
Setophaga pinus... done!
Setophaga ruticilla... done!
Setophaga striata... done!
Setophaga tigrina... done!
Setophaga virens... done!
Sialia sialis... done!
Sitta canadensis... done!
Sitta carolinensis... done!
Spatula clypeata... done!
Spatula discors... done!
Sphyrapicus varius... done!
Spinus pinus... done!
Spinus tristis... done!
Spiza americana... done!
Spizella pallida... done!
Spizella passerina... done!
Spizella pusilla... done!
Stelgidopteryx serripennis... done!
Sterna dougallii... done!
Sterna forsteri... done!
Sterna hirundo... done!
Sternula antillarum... done!
Strix varia... done!
Sturnella magna... done!
Sturnella neglecta... done!
Tachycineta bicolor... done!
Thryothorus ludovicianus... done!
Toxostoma rufum... done!
Tringa semipalmata... done!
Troglodytes aedon... done!
Troglodytes troglodytes... done!
Turdus migratorius... done!
Tyrannus tyrannus... done!
Tyto alba... done!
Vermivora chrysoptera... done!
Vermivora cyanoptera... done!
Vireo flavifrons... done!
Vireo gilvus... done!
Vireo griseus... done!
Vireo olivaceus... done!
Vireo philadelphicus... done!
Vireo solitarius... done!
Zenaida macroura... done!
Zonotrichia albicollis... done!
Code
tictoc::toc()
982.04 sec elapsed
Code
# Merge with verbatimIdentification
range_size_df2 <- 
  range_size_df %>%
  right_join(Tax, by = "scientificName") %>%
  distinct(scientificName, verbatimIdentification, .keep_all = TRUE) 

# checks
colSums(is.na(range_size_df2))
        scientificName      GlobRangeSize_km2 verbatimIdentification 
                     0                      2                      0 
Code
range(range_size_df2$GlobRangeSize_km2, na.rm=TRUE)
[1]    38728.84 75222636.54
Code
hist(log(range_size_df2$GlobRangeSize_km2))

Code
rm(list = setdiff(ls(),c("data_sf_path","tax_path","sp_data_path","sci_name","traits_merged", "pd", "IUCN_df", "range_geometries", "joincount_df", "mean_lac", "range_size_df2")))
gc()
          used  (Mb) gc trigger   (Mb)  max used   (Mb)
Ncells 5280285 282.0   16612118  887.2  25956433 1386.3
Vcells 8359782  63.8  337015003 2571.3 421268753 3214.1

Merge Predictors together

Code
# Set 1: External data
predictors_1 <- 
  full_join(range_size_df2, traits_merged) %>%
  full_join(IUCN_df %>% mutate(code = as.factor(code)) %>% rename("IUCN" = "code")) %>%
  full_join(pd) %>%
  distinct(verbatimIdentification, .keep_all = TRUE)
Joining with `by = join_by(scientificName, verbatimIdentification)`
Joining with `by = join_by(scientificName, verbatimIdentification)`
Joining with `by = join_by(scientificName, verbatimIdentification)`
Code
colSums(is.na(predictors_1))
        scientificName      GlobRangeSize_km2 verbatimIdentification 
                     0                      2                      0 
                  Mass                Habitat              Migration 
                     3                      3                      3 
     Primary.Lifestyle                   IUCN                     pd 
                     3                      2                      3 
Code
glimpse(predictors_1)
Rows: 233
Columns: 9
Groups: scientificName [233]
$ scientificName         <chr> "Accipiter cooperii", "Accipiter gentilis", "Ac…
$ GlobRangeSize_km2      <dbl> 8402847.43, 31714948.46, 16393557.31, 13610945.…
$ verbatimIdentification <chr> "Accipiter cooperii", "Accipiter gentilis", "Ac…
$ Mass                   <dbl> 429.67, 866.04, 130.59, 40.40, 100.69, 50.78, 6…
$ Habitat                <fct> Woodland, Forest, Forest, Wetland, Forest, Wetl…
$ Migration              <fct> 3, 2, 3, 3, 3, 3, 1, 2, 3, 2, 3, 2, 2, 2, 3, 3,…
$ Primary.Lifestyle      <fct> Aerial, Generalist, Aerial, Terrestrial, Insess…
$ IUCN                   <fct> LC, LC, LC, LC, LC, LC, LC, LC, EN, LC, VU, LC,…
$ pd                     <dbl> 7.899108, 6.994202, 5.769648, 27.170491, 12.953…
Code
# Set 2: Calculated from atlas

big_table <- 
  readRDS(here("Demo_NewYork/Data/output/A_Big_table_ny.rds"))

predictors_2 <- 
  full_join(big_table, joincount_df) %>%
  full_join(range_geometries) %>%
  full_join(mean_lac) %>%
  distinct(samplingPeriodID, verbatimIdentification, .keep_all = TRUE)
Joining with `by = join_by(verbatimIdentification, samplingPeriodID)`
Joining with `by = join_by(datasetID, verbatimIdentification,
samplingPeriodID)`
Joining with `by = join_by(verbatimIdentification, samplingPeriodID)`
Code
colSums(is.na(predictors_2))
             datasetID verbatimIdentification       samplingPeriodID 
                     0                      0                      0 
       Total_area_samp           Total_Ncells      Total_Ncells_samp 
                     0                      0                      0 
                   AOO             occ_Ncells         rel_occ_Ncells 
                     0                      0                      0 
               rel_AOO         Jaccard_dissim                      a 
                     0                      0                      0 
                     b                      c                      d 
                     0                      0                      0 
               D_AOO_a              time_span              startYear 
                     0                      0                      0 
               endYear                n_years               log_R2_1 
                     0                      0                      0 
     log_R2_1_per_year             presence_n    joincount_statistic 
                     0                      0                      0 
 joincount_expectation        joincount_p_val        joincount_delta 
                     0                     11                      0 
              circNorm minDist_toBorder_centr             mean_lnLac 
                     0                      0                      0 
Code
glimpse(predictors_2)
Rows: 466
Columns: 30
$ datasetID              <dbl> 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,…
$ verbatimIdentification <chr> "Accipiter cooperii", "Accipiter gentilis", "Ac…
$ samplingPeriodID       <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…
$ Total_area_samp        <dbl> 126878.5, 126878.5, 126878.5, 126878.5, 126878.…
$ Total_Ncells           <dbl> 5335, 5335, 5335, 5335, 5335, 5335, 5335, 5335,…
$ Total_Ncells_samp      <dbl> 5319, 5319, 5319, 5319, 5319, 5319, 5319, 5319,…
$ AOO                    <dbl> 13645.261, 10967.382, 21187.304, 48157.370, 321…
$ occ_Ncells             <dbl> 550, 444, 859, 2021, 129, 5059, 1923, 72, 348, …
$ rel_occ_Ncells         <dbl> 0.103, 0.083, 0.161, 0.380, 0.024, 0.951, 0.362…
$ rel_AOO                <dbl> 0.108, 0.086, 0.167, 0.380, 0.025, 0.954, 0.370…
$ Jaccard_dissim         <dbl> 0.881, 0.904, 0.849, 0.697, 0.950, 0.068, 0.623…
$ a                      <dbl> 202, 70, 301, 881, 13, 4837, 1288, 48, 18, 27, …
$ b                      <dbl> 1152, 285, 1138, 888, 133, 133, 1496, 13, 52, 9…
$ c                      <dbl> 348, 374, 558, 1140, 116, 222, 635, 24, 330, 21…
$ d                      <dbl> 3617, 4590, 3322, 2410, 5057, 127, 1900, 5234, …
$ D_AOO_a                <dbl> 1.538, 1.474, 1.647, 1.638, 1.129, 1.931, 1.630…
$ time_span              <dbl> 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,…
$ startYear              <dbl> 1980, 1980, 1980, 1980, 1980, 1980, 1980, 1980,…
$ endYear                <dbl> 1985, 1985, 1985, 1985, 1985, 1985, 1985, 1985,…
$ n_years                <dbl> 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,…
$ log_R2_1               <dbl> 0.880, -0.225, 0.513, -0.124, 0.121, -0.019, 0.…
$ log_R2_1_per_year      <dbl> 0.176, -0.045, 0.103, -0.025, 0.024, -0.004, 0.…
$ presence_n             <dbl> 550, 444, 859, 2021, 129, 5059, 1923, 72, 348, …
$ joincount_statistic    <dbl> 446, 368, 864, 3745, 44, 18808, 3395, 114, 364,…
$ joincount_expectation  <dbl> 2.190240e+02, 1.426735e+02, 5.346101e+02, 2.961…
$ joincount_p_val        <dbl> 1.665900e-64, 7.560269e-93, 9.569905e-63, 6.798…
$ joincount_delta        <dbl> 0.41268361, 0.50749206, 0.38345740, 0.38780539,…
$ circNorm               <dbl> 436.950278, 339.166291, 579.799200, 713.038539,…
$ minDist_toBorder_centr <dbl> 44066.842, 48414.413, 22775.763, 35147.233, 172…
$ mean_lnLac             <dbl> 0.58701976, 0.67089253, 0.37159881, 0.21483881,…

Modify predictors for modeling

Code
# Merge together
data_final <- 
  full_join(predictors_2, predictors_1) %>%
  filter(samplingPeriodID == 1) %>%
  mutate(
    across(
      where(is.character) & 
        !matches("verbatimIdentification") & 
        !matches("scientificName"),
      as.factor)) %>%
  mutate(
    across(
      c("Habitat", "IUCN", "Migration", "Primary.Lifestyle"),
      as.factor)) %>%
  distinct(verbatimIdentification, .keep_all = TRUE) %>%
  ungroup() %>%
  mutate(
    Habitat_5 = 
      as.factor(
        case_when(
          Habitat %in% c("Grassland", "Shrubland", 
                         "Desert", "Rock") ~ "open",
          Habitat %in% c("Woodland", "Forest") ~ "closed",
          Habitat %in% c("Coastal", "Marine") ~ "marine",
          Habitat %in% c("Wetland", "Riverine") ~ "freshwater",
          Habitat == "Human Modified" ~ "human",
          .default = NA_character_)
        )
    ) %>%
  mutate(
    Generalism = 
      as.factor(
        case_when(
          Primary.Lifestyle == "Generalist" ~ 1,
          .default = 0)
        )
    ) %>%
  mutate(
    Threatened = 
      as.factor(
        case_when(
          IUCN %in% c("LC") ~ 0,
          is.na(IUCN) ~ NA,
          .default = 1)
        )
    ) %>%
  select(
    verbatimIdentification, scientificName, 
    log_R2_1, log_R2_1_per_year, Jaccard_dissim, 
    D_AOO_a, AOO, joincount_delta, mean_lnLac,
    circNorm, minDist_toBorder_centr, 
    GlobRangeSize_km2, Mass, Migration, 
    pd, Habitat_5, Generalism, Threatened
  )
Joining with `by = join_by(verbatimIdentification)`
Code
data_final %>% 
  skimr::skim()
Data summary
Name Piped data
Number of rows 233
Number of columns 18
_______________________
Column type frequency:
character 2
factor 4
numeric 12
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
verbatimIdentification 0 1 9 32 0 233 0
scientificName 0 1 9 39 0 233 0

Variable type: factor

skim_variable n_missing complete_rate ordered n_unique top_counts
Migration 3 0.99 FALSE 3 3: 143, 2: 47, 1: 40, NA: 0
Habitat_5 3 0.99 FALSE 5 clo: 108, fre: 55, ope: 41, mar: 17
Generalism 0 1.00 FALSE 2 0: 202, 1: 31
Threatened 2 0.99 FALSE 2 0: 207, 1: 24

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
log_R2_1 0 1.00 0.06 0.66 -1.79 -0.15 0.01 0.24 3.75 ▁▇▁▁▁
log_R2_1_per_year 0 1.00 0.01 0.13 -0.36 -0.03 0.00 0.05 0.75 ▁▇▁▁▁
Jaccard_dissim 0 1.00 0.61 0.28 0.00 0.43 0.68 0.84 1.00 ▃▃▃▇▇
D_AOO_a 0 1.00 1.31 0.55 0.00 0.90 1.41 1.81 1.98 ▂▂▃▃▇
AOO 0 1.00 36317.49 41373.36 4.98 1117.66 14775.02 67379.24 124025.31 ▇▂▁▂▂
joincount_delta 0 1.00 0.70 0.51 0.00 0.30 0.59 1.08 2.16 ▇▆▅▂▁
mean_lnLac 0 1.00 1.32 1.23 0.06 0.25 0.81 2.23 4.51 ▇▂▂▂▁
circNorm 0 1.00 174.74 173.24 1.27 32.02 116.16 284.30 733.85 ▇▂▂▁▁
minDist_toBorder_centr 0 1.00 35094.08 17486.26 1178.59 24849.69 35580.49 44151.44 93733.18 ▅▇▆▂▁
GlobRangeSize_km2 2 0.99 10454413.81 12006642.63 38728.84 3522641.41 6322724.06 11841957.85 75222636.54 ▇▁▁▁▁
Mass 3 0.99 324.87 747.25 3.09 14.40 47.18 334.61 5791.37 ▇▁▁▁▁
pd 3 0.99 7.55 6.26 1.47 3.84 5.65 8.97 51.14 ▇▁▁▁▁
Code
write.csv(data_final, here::here("Demo_NewYork/Data/output/data_final_ny.csv"))

References

[1] Tobias, J. A., Sheard, C., Pigot, A. L., Devenish, A. J., Yang, J., Sayol, F., Neate-Clegg, M. H., Alioravainen, N., Weeks, T. L., Barber, R. A., & others. (2022). AVONET: morphological, ecological and geographical data for all birds. Ecology Letters, 25(3), 581–597.

[2] Jetz, W., Thomas, G. H., Joy, J. B., Hartmann, K., & Mooers, A. O. (2012). The global diversity of birds in space and time. Nature, 491(7424), 444–448.

[3] Rubolini, D., Liker, A., Garamszegi, L. Z., Møller, A. P., & Saino, N. (2015). Using the BirdTree.org website to obtain robust phylogenies for avian comparative studies: A primer. Current Zoology, 61(6), 959–965. https://doi.org/10.1093/czoolo/61.6.959

[4] IUCN (2025). The IUCN Red List of Threatened Species. Version 2025-1. https://www.iucnredlist.org. Accessed 14 June 2025.

[5] Soria, C. D., Ortega-Solís, G. R., Wölke, F. J. R., Barták, V., Tschernosterová, K., Bejček, V., Herrando, S., Mikuláš, I., Šťastný, K., Ueta, M., Voříšek, P., & Keil, P. (in preparation). Spatial autocorrelation of species diversity and distributions in time and across spatial scales.

[6] BirdLife International & Handbook of the Birds of the World. (2024). Bird species distribution maps of the world (Version 9.0) [Dataset]. http://datazone.birdlife.org/species/requestdis