Calculate atlas variables

Now we will calculate log ratio, jaccard, fractal dimension.

Code
rm(list=ls())
gc()
          used (Mb) gc trigger (Mb) max used (Mb)
Ncells  592983 31.7    1351252 72.2   685968 36.7
Vcells 1086517  8.3    8388608 64.0  1876884 14.4
Code
suppressPackageStartupMessages({
library(sf)
library(here)
library(dplyr)
library(tidyr)
library(ggplot2)
})
sf_use_s2(TRUE) # we use WGS84 projected data (it's spherical)

Read data

Code
data_sf <- 
  readRDS(here::here("Demo_NewYork/Data/output/1_data_sf_ny.rds")) %>%
  select(datasetID, scalingID, samplingPeriodID,
         siteID, croppedArea, area, 
         verbatimIdentification, scientificName,
         centroidDecimalLatitude, centroidDecimalLongitude,
         startYear, endYear, time_span, 
         cells_keep, species_keep,cell_sampling_repeats,
         verbatimFootprintSRS, footprintSRS,
         geometry)
Code
#----------------------------------------------------------#
# Calculate grid information  -----
#----------------------------------------------------------#

# Custom function to get total and sampled area/cells from datasets
calculate_grid_info <- function(data_sf) {
  grid_info <-
    full_join(
      data_sf %>%
      st_drop_geometry() %>%
      distinct(datasetID, scalingID, siteID, 
               cell_sampling_repeats, croppedArea, time_span) %>%
      group_by(datasetID, scalingID) %>%
      summarise(
        Total_Ncells = n_distinct(siteID)),
    data_sf %>%
      st_drop_geometry() %>%
      distinct(datasetID, scalingID, siteID, 
               cell_sampling_repeats, croppedArea, time_span) %>%
      filter(cell_sampling_repeats == 2) %>%
      group_by(datasetID, scalingID, time_span) %>%
      summarise(
        Total_Ncells_samp = n_distinct(siteID),
        Total_area_samp = sum(croppedArea, na.rm = TRUE))
  )
  return(grid_info)
}

#--------------------------------------------------#

# Apply function to compute grid information
atlas_areas <-
  calculate_grid_info(data_sf)
`summarise()` has grouped output by 'datasetID'. You can override using the
`.groups` argument.
`summarise()` has grouped output by 'datasetID', 'scalingID'. You can override
using the `.groups` argument.
Joining with `by = join_by(datasetID, scalingID)`
Code
# Checks
kableExtra::kable(atlas_areas)
datasetID scalingID Total_Ncells time_span Total_Ncells_samp Total_area_samp
6 1 5335 5 5319 126878.5
6 2 1398 5 1396 127118.1
6 4 386 5 384 127118.1
6 8 113 5 112 127121.8
6 16 37 5 37 127146.8
6 32 13 5 13 127146.8
6 64 5 5 5 127146.8
6 128 2 5 2 127146.8
Code
desired_levels <- 
  factor(
    c("1", "2", "4", "8", "16", "32", "64", "128"),
    ordered = T, 
    levels = c("1", "2", "4", "8", "16", "32", "64", "128")
    )

# Read processed species data & merge with atlas_areas
pres_dat_final <- 
  readRDS(here::here("Demo_NewYork/Data/output/1_data_filtered_ny.rds")) %>%
  left_join(atlas_areas) %>%
  mutate(scalingID = factor(scalingID, levels = desired_levels)) %>%
  filter(!is.na(verbatimIdentification)) # Remove unsampled cells
Joining with `by = join_by(scalingID)`
Code
glimpse(pres_dat_final)
Rows: 1,120,865
Columns: 18
$ scalingID                <fct> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
$ siteID                   <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
$ samplingPeriodID         <dbl> 2, 2, 2, 2, 2, 1, 2, 1, 1, 1, 2, 2, 1, 2, 1, …
$ verbatimIdentification   <chr> "Baeolophus bicolor", "Junco hyemalis", "Corv…
$ scientificName           <chr> "Baeolophus bicolor", "Junco hyemalis", "Corv…
$ verbatimFootprintSRS     <chr> "epsg:26918", "epsg:26918", "epsg:26918", "ep…
$ footprintSRS             <chr> "epsg:4326", "epsg:4326", "epsg:4326", "epsg:…
$ croppedArea              <dbl> 20.62383, 20.62383, 20.62383, 20.62383, 20.62…
$ areaUnit                 <chr> "km2", "km2", "km2", "km2", "km2", "km2", "km…
$ centroidDecimalLongitude <dbl> -79.74312, -79.74312, -79.74312, -79.74312, -…
$ centroidDecimalLatitude  <dbl> 42.06346, 42.06346, 42.06346, 42.06346, 42.06…
$ startYear                <int> 2000, 2000, 2000, 2000, 2000, 1980, 2000, 198…
$ endYear                  <int> 2005, 2005, 2005, 2005, 2005, 1985, 2005, 198…
$ datasetID                <int> 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, …
$ Total_Ncells             <int> 5335, 5335, 5335, 5335, 5335, 5335, 5335, 533…
$ time_span                <int> 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, …
$ Total_Ncells_samp        <int> 5319, 5319, 5319, 5319, 5319, 5319, 5319, 531…
$ Total_area_samp          <dbl> 126878.5, 126878.5, 126878.5, 126878.5, 12687…

Calculate Jaccard dissimilarity

Code
#----------------------------------------------------------#
# Calculate jaccard similarity index   -----
#----------------------------------------------------------#

# Custom function to calculate jaccard across sites
jaccard <- function(set1, set2) {
  a <- length(base::intersect(set1, set2))
  b <- length(setdiff(set2, set1))
  c <- length(setdiff(set1, set2))
  n_union  <- length(base::union(set1,set2))
  return(list(jaccard_index = a / (a + b + c), 
              a = a, 
              b = b, 
              c = c, 
              n_union = n_union))
}
#----------------------------------------------------------#

# Calculate jaccard for all datasets
Jaccard_df <- 
  pres_dat_final %>%
  filter(scalingID == 1) %>%
  group_by(datasetID, verbatimIdentification) %>%
  mutate(
    jaccard_results = list(jaccard(
      filter(pick(everything()), samplingPeriodID == 1)$siteID,
      filter(pick(everything()), samplingPeriodID == 2)$siteID
    ))
  ) %>%
  tidyr::unnest_wider(jaccard_results) %>%
  group_by(verbatimIdentification, datasetID) %>%
  mutate(d = Total_Ncells_samp - n_union) %>%
  rename(Jaccard_sim = jaccard_index) %>%
  mutate(Jaccard_dissim = 1 - Jaccard_sim) %>%
  ungroup() %>%
  distinct(datasetID, verbatimIdentification, Jaccard_dissim, Jaccard_sim, a, b, c, d, Total_Ncells_samp)

hist(Jaccard_df$Jaccard_dissim, breaks = 30, main = "Jaccard dissimilarity distribution", xlab = "Jaccard dissimilarity")

Code
hist(log(Jaccard_df$d), breaks = 30, main = "log Number of cells never occupied", xlab = "log Number of cells never occupied")

Code
hist(log(Jaccard_df$a), breaks = 30, main = "log Number of persistances", xlab = "log Number of persistances (cells occupied in both time periods)")

Code
# Merge:
pres_dat_final_v2 <- pres_dat_final %>%
  left_join(Jaccard_df)
Joining with `by = join_by(verbatimIdentification, datasetID,
Total_Ncells_samp)`
Code
head(pres_dat_final_v2)
  scalingID siteID samplingPeriodID verbatimIdentification
1         1      1                2     Baeolophus bicolor
2         1      1                2         Junco hyemalis
3         1      1                2  Corvus brachyrhynchos
4         1      1                2         Cathartes aura
5         1      1                2        Toxostoma rufum
6         1      1                1        Icterus galbula
         scientificName verbatimFootprintSRS footprintSRS croppedArea areaUnit
1    Baeolophus bicolor           epsg:26918    epsg:4326    20.62383      km2
2        Junco hyemalis           epsg:26918    epsg:4326    20.62383      km2
3 Corvus brachyrhynchos           epsg:26918    epsg:4326    20.62383      km2
4        Cathartes aura           epsg:26918    epsg:4326    20.62383      km2
5       Toxostoma rufum           epsg:26918    epsg:4326    20.62383      km2
6       Icterus galbula           epsg:26918    epsg:4326    20.62383      km2
  centroidDecimalLongitude centroidDecimalLatitude startYear endYear datasetID
1                -79.74312                42.06346      2000    2005         6
2                -79.74312                42.06346      2000    2005         6
3                -79.74312                42.06346      2000    2005         6
4                -79.74312                42.06346      2000    2005         6
5                -79.74312                42.06346      2000    2005         6
6                -79.74312                42.06346      1980    1985         6
  Total_Ncells time_span Total_Ncells_samp Total_area_samp Jaccard_dissim
1         5335         5              5319        126878.5     0.56401597
2         5335         5              5319        126878.5     0.39391086
3         5335         5              5319        126878.5     0.07149777
4         5335         5              5319        126878.5     0.56727452
5         5335         5              5319        126878.5     0.52084419
6         5335         5              5319        126878.5     0.14839259
  Jaccard_sim    a    b    c    d
1   0.4359840 1420 1700  137 2062
2   0.6060891 1931  892  363 2133
3   0.9285022 4792  204  165  158
4   0.4327255 1756 1912  390 1261
5   0.4791558 1839  497 1502 1481
6   0.8516074 4000  271  426  622
Code
#----------------------------------------------------------#

rm(pres_dat_final, Jaccard_df, jaccard, atlas_areas, calculate_grid_info)

Calculate Area of occupancy (AOO)

Code
#----------------------------------------------------------#
# Calculate area of occupancy   -----
#----------------------------------------------------------#

occ_data_final <-
  pres_dat_final_v2 %>%
  ungroup() %>%
  distinct(datasetID, samplingPeriodID, scalingID,
           verbatimIdentification, siteID, time_span,
           .keep_all = TRUE) %>%
  group_by(datasetID, samplingPeriodID, scalingID, verbatimIdentification) %>%
  # Calculate AOO:
  dplyr::summarise(
    mean_area = mean(croppedArea, na.rm = TRUE),
    AOO = sum(croppedArea, na.rm = TRUE),
    occ_Ncells = n_distinct(siteID)) %>%
  left_join(pres_dat_final_v2 %>%
              mutate(scalingID = as.factor(as.character(scalingID)))) %>%
  # Calculate relative Occupancy:
  dplyr::mutate(
    rel_AOO = AOO / Total_area_samp,
    rel_occ_Ncells = occ_Ncells / Total_Ncells_samp) %>% # Prevalence
  # Remove duplicated rows:
  distinct()
`summarise()` has grouped output by 'datasetID', 'samplingPeriodID',
'scalingID'. You can override using the `.groups` argument.
Joining with `by = join_by(datasetID, samplingPeriodID, scalingID,
verbatimIdentification)`
Code
#----------------------------------------------------------#

rm(pres_dat_final_v2)
hist(occ_data_final$AOO, breaks = 30, main = "AOO distribution", xlab = "AOO")

Code
#----------------------------------------------------------#

# Create species-level data
species_data <-
  occ_data_final %>%
  dplyr::select(
    -siteID, -croppedArea
  ) %>%
  distinct(datasetID, samplingPeriodID, scalingID, verbatimIdentification,
           .keep_all = TRUE)

glimpse(species_data)
Rows: 3,728
Columns: 27
Groups: datasetID, samplingPeriodID, scalingID [16]
$ datasetID                <int> 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, …
$ samplingPeriodID         <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
$ scalingID                <fct> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
$ verbatimIdentification   <chr> "Accipiter cooperii", "Accipiter gentilis", "…
$ mean_area                <dbl> 24.80957, 24.70131, 24.66508, 23.82849, 24.91…
$ AOO                      <dbl> 13645.26106, 10967.38185, 21187.30398, 48157.…
$ occ_Ncells               <int> 550, 444, 859, 2021, 129, 5059, 1923, 72, 348…
$ scientificName           <chr> "Accipiter cooperii", "Accipiter gentilis", "…
$ verbatimFootprintSRS     <chr> "epsg:26918", "epsg:26918", "epsg:26918", "ep…
$ footprintSRS             <chr> "epsg:4326", "epsg:4326", "epsg:4326", "epsg:…
$ areaUnit                 <chr> "km2", "km2", "km2", "km2", "km2", "km2", "km…
$ centroidDecimalLongitude <dbl> -79.44191, -79.21572, -79.67959, -79.68952, -…
$ centroidDecimalLatitude  <dbl> 42.07554, 42.30919, 42.02105, 42.15570, 42.30…
$ startYear                <int> 1980, 1980, 1980, 1980, 1980, 1980, 1980, 198…
$ endYear                  <int> 1985, 1985, 1985, 1985, 1985, 1985, 1985, 198…
$ Total_Ncells             <int> 5335, 5335, 5335, 5335, 5335, 5335, 5335, 533…
$ time_span                <int> 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, …
$ Total_Ncells_samp        <int> 5319, 5319, 5319, 5319, 5319, 5319, 5319, 531…
$ Total_area_samp          <dbl> 126878.5, 126878.5, 126878.5, 126878.5, 12687…
$ Jaccard_dissim           <dbl> 0.88131610, 0.90397805, 0.84927391, 0.6971467…
$ Jaccard_sim              <dbl> 0.11868390, 0.09602195, 0.15072609, 0.3028532…
$ a                        <int> 202, 70, 301, 881, 13, 4837, 1288, 48, 18, 27…
$ b                        <int> 1152, 285, 1138, 888, 133, 133, 1496, 13, 52,…
$ c                        <int> 348, 374, 558, 1140, 116, 222, 635, 24, 330, …
$ d                        <int> 3617, 4590, 3322, 2410, 5057, 127, 1900, 5234…
$ rel_AOO                  <dbl> 0.1075458567, 0.0864400081, 0.1669888724, 0.3…
$ rel_occ_Ncells           <dbl> 0.1034028953, 0.0834743373, 0.1614965219, 0.3…
Code
library(purrr)
#----------------------------------------------------------#
# Calculate occupancy-area-relationship   -----
#----------------------------------------------------------#

# Custom function to calculate OAR and Fractal dimension
calculate_OAR <- function(species_data) {
  datasets <-
    unique(species_data$datasetID)
  sampling_periods <-
    unique(species_data$samplingPeriodID)

  sp_dta <-
    species_data %>%
    select(datasetID, samplingPeriodID, scalingID,
         verbatimIdentification,
         rel_AOO, rel_occ_Ncells, mean_area, AOO)

# Expand the grid for all combinations of datasetID and samplingPeriodID
species_data_new <-
  expand_grid(
  datasetID = datasets,
  samplingPeriodID = sampling_periods) %>%
  inner_join(sp_dta, by = c("datasetID", "samplingPeriodID")) %>%
  # Group by identifiers
  group_by(datasetID, samplingPeriodID, verbatimIdentification) %>%
  # Get available scales where relative occupancy is not saturated (< 1)
  summarise(
    available_scales = n(),
    mean_relAOO = mean(rel_AOO, na.rm = TRUE),
    exclude_sp_OAR = if_else(available_scales < 2, 1, 0),
    .groups = "drop"
  ) %>%
  # remove those where the range is saturated at the smallest resolution
  mutate(
    exclude_sp_OAR = if_else(available_scales == 0, 1, exclude_sp_OAR),
    mean_relAOO = if_else(available_scales == 0, 1, mean_relAOO)
  ) %>%
  full_join(sp_dta, by = c("datasetID", "samplingPeriodID", "verbatimIdentification")) %>%
  filter(
    exclude_sp_OAR == 0,
    rel_occ_Ncells < 1
  ) %>%
  distinct() %>%
  filter_at(vars(scalingID, AOO, mean_area), any_vars(!is.na(.))) %>%
  ungroup()

# Fit models using purrr::map
species_data_new_v2 <-
  species_data_new %>%
  group_by(datasetID, samplingPeriodID, verbatimIdentification) %>%
  nest(data = c(scalingID, AOO, mean_area, rel_AOO, rel_occ_Ncells)) %>%
  mutate(
    coefficients = map(
      data,
      ~ .x %>%
        filter(
          !is.na(AOO) & AOO > 0,
          !is.na(mean_area) & mean_area > 0
        ) %>%
        lm(log(AOO) ~ log(mean_area), data = .) %>%
        coef() %>%
        {
          tibble(
            m_AOO_a = .[2],
            b_AOO_a = .[1],
            D_AOO_a = -2 * .[2] + 2
          )
        }
    )
  ) %>%
  unnest(coefficients) %>%
  ungroup() %>%
  # Final cleanup of results
  select(datasetID, samplingPeriodID, verbatimIdentification, m_AOO_a, b_AOO_a, D_AOO_a) %>%
  distinct() %>%
  # Merge with the original dataset
  full_join(species_data) %>%
  filter(scalingID == 1) %>%
  select(
    datasetID, verbatimIdentification, samplingPeriodID, Total_area_samp,
    Total_Ncells, Total_Ncells_samp, AOO, occ_Ncells, rel_occ_Ncells,
    rel_AOO, Jaccard_dissim, m_AOO_a, b_AOO_a, D_AOO_a
  )

return(species_data_new_v2)
}


#----------------------------------------------------------#

# Apply OAR-function:
species_data_new <- 
  calculate_OAR(species_data) %>%
  distinct(datasetID, samplingPeriodID, verbatimIdentification, .keep_all = TRUE)   %>%
  mutate(
    D_AOO_a = case_when(is.na(D_AOO_a) ~ 2,
                        .default = D_AOO_a)) %>%
      left_join(species_data %>% filter(scalingID == 1)) %>%
      # reduce columns
      select(
        datasetID, verbatimIdentification, samplingPeriodID,
        Total_area_samp, Total_Ncells, Total_Ncells_samp,
        AOO, occ_Ncells, rel_occ_Ncells, rel_AOO,
        Jaccard_dissim, a,b,c,d, D_AOO_a, time_span
      ) %>%
      distinct(datasetID, samplingPeriodID, verbatimIdentification,
               .keep_all = TRUE)
Joining with `by = join_by(datasetID, samplingPeriodID,
verbatimIdentification)`
Joining with `by = join_by(datasetID, verbatimIdentification, samplingPeriodID,
Total_area_samp, Total_Ncells, Total_Ncells_samp, AOO, occ_Ncells,
rel_occ_Ncells, rel_AOO, Jaccard_dissim)`
Code
glimpse(species_data_new)
Rows: 466
Columns: 17
$ datasetID              <int> 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,…
$ verbatimIdentification <chr> "Accipiter cooperii", "Accipiter gentilis", "Ac…
$ samplingPeriodID       <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…
$ Total_area_samp        <dbl> 126878.5, 126878.5, 126878.5, 126878.5, 126878.…
$ Total_Ncells           <int> 5335, 5335, 5335, 5335, 5335, 5335, 5335, 5335,…
$ Total_Ncells_samp      <int> 5319, 5319, 5319, 5319, 5319, 5319, 5319, 5319,…
$ AOO                    <dbl> 13645.26106, 10967.38185, 21187.30398, 48157.36…
$ occ_Ncells             <int> 550, 444, 859, 2021, 129, 5059, 1923, 72, 348, …
$ rel_occ_Ncells         <dbl> 0.1034028953, 0.0834743373, 0.1614965219, 0.379…
$ rel_AOO                <dbl> 0.1075458567, 0.0864400081, 0.1669888724, 0.379…
$ Jaccard_dissim         <dbl> 0.88131610, 0.90397805, 0.84927391, 0.69714679,…
$ a                      <int> 202, 70, 301, 881, 13, 4837, 1288, 48, 18, 27, …
$ b                      <int> 1152, 285, 1138, 888, 133, 133, 1496, 13, 52, 9…
$ c                      <int> 348, 374, 558, 1140, 116, 222, 635, 24, 330, 21…
$ d                      <int> 3617, 4590, 3322, 2410, 5057, 127, 1900, 5234, …
$ D_AOO_a                <dbl> 1.537572e+00, 1.473556e+00, 1.647100e+00, 1.637…
$ time_span              <int> 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,…
Code
hist(species_data_new$D_AOO_a, breaks = 30, main = "Fractal dimension distribution", xlab = "Fractal dimension (D)")

Code
#----------------------------------------------------------#
# Calculate log ratio AOO   -----
#----------------------------------------------------------#

time_periods <- c(1,2)

# Custom function to transform data to wide-format
transform_to_wide <- function(species_data_new, time_periods = c(1, 2)) {
  # Create a list to store wide data for each time period
  wide_dfs <- list()

  for (i in seq_along(time_periods)) {
    wide_dfs[[i]] <- species_data_new %>%
      distinct(datasetID, samplingPeriodID, verbatimIdentification, AOO) %>%
      group_by(datasetID, samplingPeriodID, verbatimIdentification) %>%
      filter(samplingPeriodID == time_periods[i]) %>%
      setNames(paste0("samplingPeriodID", i, "_", names(.))) %>%
      ungroup() %>%
      select(-c(paste0("samplingPeriodID", i, "_samplingPeriodID"))) %>%
      dplyr::rename(
        verbatimIdentification = paste0("samplingPeriodID", i, "_verbatimIdentification"),
        datasetID = paste0("samplingPeriodID", i, "_datasetID")
      )
  }

  # Merge the wide data frames sequentially
  sp_dat_wide <- reduce(wide_dfs, full_join, by = c("verbatimIdentification", "datasetID"))

  cat("NA counts in wide data after processing:\n")
  print(colSums(is.na(sp_dat_wide)))

  cat("Preview of wide data:\n")
  print(head(sp_dat_wide))

  return(sp_dat_wide)
}


#----------------------------------------------------------#
time_between_samples <- species_data %>% 
  ungroup() %>% 
  select(datasetID, startYear, endYear) %>% distinct() %>% mutate(n_years = endYear-startYear)

# Apply function:
sp_dat_wide <-
  transform_to_wide(species_data_new, time_periods) %>%
  na.omit() # drop species lost or gained completely
NA counts in wide data after processing:
             datasetID verbatimIdentification  samplingPeriodID1_AOO 
                     0                      0                      0 
 samplingPeriodID2_AOO 
                     0 
Preview of wide data:
# A tibble: 6 × 4
  datasetID verbatimIdentification samplingPeriodID1_AOO samplingPeriodID2_AOO
      <int> <chr>                                  <dbl>                 <dbl>
1         6 Accipiter cooperii                    13645.                32884.
2         6 Accipiter gentilis                    10967.                 8758.
3         6 Accipiter striatus                    21187.                35376.
4         6 Actitis macularius                    48157.                42520.
5         6 Aegolius acadicus                      3214.                 3626.
6         6 Agelaius phoeniceus                  120992.               118744.
Code
#----------------------------------------------------------#
# log[(St2 /St1 )/(t2 − t1 + 1)]
# Calculate log ratio of AOO
logRatio <-
  sp_dat_wide %>%
  left_join(time_between_samples) %>%
  group_by(datasetID, verbatimIdentification) %>%
  mutate(
    log_R2_1 = log(samplingPeriodID2_AOO / samplingPeriodID1_AOO)) %>%
  mutate(
    log_R2_1_per_year = log_R2_1/ n_years) %>%
  select(-samplingPeriodID1_AOO, -samplingPeriodID2_AOO) %>% 
  unique()
Joining with `by = join_by(datasetID)`
Warning in left_join(., time_between_samples): Detected an unexpected many-to-many relationship between `x` and `y`.
ℹ Row 1 of `x` matches multiple rows in `y`.
ℹ Row 1 of `y` matches multiple rows in `x`.
ℹ If a many-to-many relationship is expected, set `relationship =
  "many-to-many"` to silence this warning.
Code
hist(logRatio$log_R2_1, breaks = 30, main = "Log ratio of AOO distribution", xlab = "Log ratio of AOO = log(AOO2/AOO1)")

Code
hist(logRatio$log_R2_1_per_year, breaks = 30, main = "Log ratio of AOO per year distribution", xlab = "Log ratio of AOO per year = log(AOO2/AOO1)/duration in years")

Write to file

Code
# save final predictor table from grids/atlases
big_table <-
  full_join(species_data_new, logRatio) %>%
  distinct(datasetID, samplingPeriodID, verbatimIdentification,
           .keep_all = T) %>%
  mutate_if(is.numeric, round, 3)
Joining with `by = join_by(datasetID, verbatimIdentification)`
Warning in full_join(species_data_new, logRatio): Detected an unexpected many-to-many relationship between `x` and `y`.
ℹ Row 1 of `x` matches multiple rows in `y`.
ℹ Row 1 of `y` matches multiple rows in `x`.
ℹ If a many-to-many relationship is expected, set `relationship =
  "many-to-many"` to silence this warning.
Code
saveRDS(big_table, here("Demo_NewYork/Data/output/A_Big_table_ny.rds"))
glimpse(big_table)
Rows: 466
Columns: 22
$ datasetID              <dbl> 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,…
$ verbatimIdentification <chr> "Accipiter cooperii", "Accipiter gentilis", "Ac…
$ samplingPeriodID       <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…
$ Total_area_samp        <dbl> 126878.5, 126878.5, 126878.5, 126878.5, 126878.…
$ Total_Ncells           <dbl> 5335, 5335, 5335, 5335, 5335, 5335, 5335, 5335,…
$ Total_Ncells_samp      <dbl> 5319, 5319, 5319, 5319, 5319, 5319, 5319, 5319,…
$ AOO                    <dbl> 13645.261, 10967.382, 21187.304, 48157.370, 321…
$ occ_Ncells             <dbl> 550, 444, 859, 2021, 129, 5059, 1923, 72, 348, …
$ rel_occ_Ncells         <dbl> 0.103, 0.083, 0.161, 0.380, 0.024, 0.951, 0.362…
$ rel_AOO                <dbl> 0.108, 0.086, 0.167, 0.380, 0.025, 0.954, 0.370…
$ Jaccard_dissim         <dbl> 0.881, 0.904, 0.849, 0.697, 0.950, 0.068, 0.623…
$ a                      <dbl> 202, 70, 301, 881, 13, 4837, 1288, 48, 18, 27, …
$ b                      <dbl> 1152, 285, 1138, 888, 133, 133, 1496, 13, 52, 9…
$ c                      <dbl> 348, 374, 558, 1140, 116, 222, 635, 24, 330, 21…
$ d                      <dbl> 3617, 4590, 3322, 2410, 5057, 127, 1900, 5234, …
$ D_AOO_a                <dbl> 1.538, 1.474, 1.647, 1.638, 1.129, 1.931, 1.630…
$ time_span              <dbl> 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,…
$ startYear              <dbl> 1980, 1980, 1980, 1980, 1980, 1980, 1980, 1980,…
$ endYear                <dbl> 1985, 1985, 1985, 1985, 1985, 1985, 1985, 1985,…
$ n_years                <dbl> 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,…
$ log_R2_1               <dbl> 0.880, -0.225, 0.513, -0.124, 0.121, -0.019, 0.…
$ log_R2_1_per_year      <dbl> 0.176, -0.045, 0.103, -0.025, 0.024, -0.004, 0.…
Code
skimr::skim(big_table)
Data summary
Name big_table
Number of rows 466
Number of columns 22
_______________________
Column type frequency:
character 1
numeric 21
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
verbatimIdentification 0 1 9 32 0 233 0

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
datasetID 0 1 6.00 0.00 6.00 6.00 6.00 6.00 6.00 ▁▁▇▁▁
samplingPeriodID 0 1 1.50 0.50 1.00 1.00 1.50 2.00 2.00 ▇▁▁▁▇
Total_area_samp 0 1 126878.54 0.00 126878.54 126878.54 126878.54 126878.54 126878.54 ▁▁▇▁▁
Total_Ncells 0 1 5335.00 0.00 5335.00 5335.00 5335.00 5335.00 5335.00 ▁▁▇▁▁
Total_Ncells_samp 0 1 5319.00 0.00 5319.00 5319.00 5319.00 5319.00 5319.00 ▁▁▇▁▁
AOO 0 1 37447.22 41802.67 4.98 1156.73 16974.24 73050.82 125067.48 ▇▂▁▂▂
occ_Ncells 0 1 1550.62 1729.79 1.00 70.50 700.00 2993.75 5229.00 ▇▂▁▂▂
rel_occ_Ncells 0 1 0.29 0.33 0.00 0.01 0.13 0.56 0.98 ▇▂▁▂▂
rel_AOO 0 1 0.30 0.33 0.00 0.01 0.13 0.58 0.99 ▇▂▁▂▂
Jaccard_dissim 0 1 0.61 0.28 0.00 0.43 0.68 0.84 1.00 ▃▃▃▇▇
a 0 1 1181.49 1573.21 0.00 25.00 248.00 2012.00 5138.00 ▇▂▁▁▁
b 0 1 415.33 473.61 0.00 50.00 231.00 685.00 2999.00 ▇▂▁▁▁
c 0 1 322.92 322.93 0.00 36.00 214.00 530.00 1502.00 ▇▃▂▁▁
d 0 1 3399.26 1910.40 33.00 1474.00 4126.00 5197.00 5318.00 ▃▂▂▂▇
D_AOO_a 0 1 1.32 0.55 0.00 0.90 1.41 1.82 1.99 ▂▂▃▃▇
time_span 0 1 5.00 0.00 5.00 5.00 5.00 5.00 5.00 ▁▁▇▁▁
startYear 0 1 1980.00 0.00 1980.00 1980.00 1980.00 1980.00 1980.00 ▁▁▇▁▁
endYear 0 1 1985.00 0.00 1985.00 1985.00 1985.00 1985.00 1985.00 ▁▁▇▁▁
n_years 0 1 5.00 0.00 5.00 5.00 5.00 5.00 5.00 ▁▁▇▁▁
log_R2_1 0 1 0.06 0.66 -1.79 -0.15 0.01 0.24 3.75 ▁▇▁▁▁
log_R2_1_per_year 0 1 0.01 0.13 -0.36 -0.03 0.00 0.05 0.75 ▁▇▁▁▁