Demonstration: New York

Data preparation (by Gabriel Ortega Solís)

Since most data we used here come with restricted rights of sharing, we set up a workflow for the New York Breeding Bird atlas, which is freely available from the website: https://www.dec.ny.gov/animals/7312.html

We processed the original New York BBA data into a common format with the other atlases that we used. The structure is therefore a bit different from the original data. These steps are not necessary to repeat the analysis but it shows how the data was formated after downloading it. Multiple steps in between were done manually by hand to standardize all BBAs to the same format - these cannot be reproduced with the code chunk below. The code below is just for documentation. We provide .rds files with the prepared (standardized) data used for our study below.

Code
# libraries for preparation only:

pacman::p_load(
  sf, terra, data.table, tidyverse, tidyterra, tidytable, knitr,
  tictoc, RPostgres, DBI, dbplyr, parallel,
  geodata
)


# Variables
seldata <- "data/4processedAtlases/Birds_Atlas_New_York"
datasetID <- 6
licenseID <- 1
verbatimFootprintSRS <- "epsg:26918"

# Data processing
dataset <- readRDS(paste0(seldata, "_beast_data.rds")) %>% as.data.table()

spnamesinfo <- read.csv(paste0(seldata, "_data_INFO_spnames.csv")) %>%
  as.data.table() %>%
  mutate(cellID = str_split(cellID, pattern = ",")) %>%
  unnest(cellID) %>%
  select(cellID, name_in_data, original_verbatim_name) %>%
  mutate(cellID = cellID) %>%
  unique()

grid <- st_layers(paste0(seldata, "_grid.gpkg"))$name %>%
  sapply(., USE.NAMES = T, simplify = F, function(x) {
    st_read(paste0(seldata, "_grid.gpkg"), layer = x)
  })

corrs <- readRDS(paste0(seldata, "_cells_corr.rds")) %>%
  as.data.table() %>%
  pivot_longer(-cellID, names_to = "cell_grouping", values_to = "cell_label") %>%
  mutate(cell_grouping = as.numeric(cell_grouping))

full_data <- left_join(dataset, corrs, by = c("cell_grouping", "cell_label"), relationship = "many-to-many") %>%
  left_join(., filter(spnamesinfo, !is.na(cellID)), by = c("cellID" = "cellID", "verbatim_name" = "name_in_data")) %>%
  mutate(verbatim_name = ifelse(is.na(original_verbatim_name), verbatim_name, original_verbatim_name)) %>%
  select(-original_verbatim_name) %>%
  unique() %>%
  mutate(
    datasetID = datasetID,
    licenseID = licenseID,
    verbatimIdentificationID = dense_rank(verbatim_name),
    samplingPeriod = dense_rank(start_year),
    effort = ifelse(effort == 0, NA, effort),
    samp_effort_type = ifelse(is.na(samp_effort_type), NA, 4)
  ) %>%
  unique()

gc()


data_table <- data.table(
  datasetID = datasetID,
  datasetName = "New York State Breeding Bird Atlas",
  datasetPublisher = "New York State Department of Environmental Conservation, New York Natural Heritage Program",
  datasetPublisherContact = "nybbba3@gmail.com | julie.hart@dec.ny.gov",
  licenseID = licenseID,
  rightsHolder = "New York State Department of Environmental Conservation",
  bibliographicCitation = "BBA1 data: New York State Breeding Bird Atlas [Internet]. 1980 - 1985. Release 1.0. Albany (New York): New York State Department of Environmental Conservation. updated 2007 Jun 6; cited 2024 Apr 09. Available from: https://www.dec.ny.gov/animals/7312.html. | BBA2 data: New York State Breeding Bird Atlas 2000 [Internet]. 2000 - 2005. Release 1.0. Albany (New York): New York State Department of Environmental Conservation. updated 2007 Jun 11; cited 2024 Apr 09. Available from: https://www.dec.ny.gov/animals/7312.html.",
  citationIdentifier = "",
  provider = "New York State Department of Environmental Conservation",
  shareable = "NO",
  coauthorshipRequired = "NO",
  coauthors = "",
  coauthorshipSuggested = "Carmen Soria - carmendianasoria@gmail.com | Kateřina Tschernosterová - tschernosterova@fzp.czu.cz | Friederike Wölke - friederike.woelke@gmail.com | Gabriel Ortega - g.ortega.solis@gmail.com",
  isSamplingEffortReported = "YES",
  isOccurrenceProbabilityAvailable = "NO"
)


data_table <- data.table(
  siteID = full_data$cell_label,
  scalingID = full_data$cell_grouping,
  datasetID = full_data$datasetID,
  area = full_data$area,
  croppedArea = full_data$area_cropped,
  areaUnit = "km2",
  maxLength = NA,
  northSouthLength = NA,
  eastWestLength = NA,
  lengthUnit = "km",
  centroidDecimalLongitude = full_data$cell_long,
  centroidDecimalLatitude = full_data$cell_lat,
  samplingRepetitions = full_data$repeated
) %>% unique()


data_table <- grid %>%
  dplyr::bind_rows() %>%
  ungroup() %>%
  rename(
    siteID = cell_label,
    scalingID = cell_grouping,
    geometry = geom
  ) %>%
  mutate(
    siteID = as.integer(siteID),
    scalingID = as.integer(scalingID),
    datasetID = as.integer(datasetID),
    footprintSRS = "epsg:4326",
    verbatimFootprintSRS = verbatimFootprintSRS
  ) %>%
  select(
    ., siteID, scalingID, datasetID,
    footprintSRS, verbatimFootprintSRS, geometry
  )

data_table$geometry <- st_cast(data_table$geometry, "MULTIPOLYGON")


data_table <- filter(full_data, cell_grouping == 1) %>%
  mutate(
    verbatimIdentificationID = verbatimIdentificationID,
    verbatimSiteID = cellID,
    datasetID = datasetID,
    samplingPeriodID = samplingPeriod,
    recordFilter = NA
  ) %>%
  select(
    verbatimIdentificationID,
    verbatimSiteID,
    datasetID,
    samplingPeriodID,
    recordFilter
  ) %>%
  unique()

Getting started

Code
suppressPackageStartupMessages({
library(dplyr)
library(sf)
library(here)
#install.packages("skimr", repos = "https://cloud.r-project.org/")
library(skimr)
})
Warning: package 'dplyr' was built under R version 4.4.2
Warning: package 'sf' was built under R version 4.4.3
Warning: package 'skimr' was built under R version 4.4.2
Code
dta <- 
  readRDS(here::here("Data/input/data.rds")) %>% 
  filter(datasetID == 6)
grids <- 
  readRDS(here::here("Data/input/grid.rds")) %>% 
  filter(datasetID == 6)
data_sf <- 
  readRDS(here::here("Data/input/data_sf.rds")) %>% 
  filter(datasetID == 6)


if (!dir.exists(here::here("Demo_NewYork/Data/input"))) {
  dir.create(here::here("Demo_NewYork/Data/input"))
  }

saveRDS(dta, here::here("Demo_NewYork/Data/input/raw_data_ny.rds"))


saveRDS(grids, here::here("Demo_NewYork/Data/input/grid_ny.rds"))
saveRDS(data_sf, here::here("Demo_NewYork/Data/input/sf_data_ny.rds"))

Data filtering

Code
# 1. cells sampled twice
cells <-
  data_sf %>%
  st_drop_geometry() %>%
  group_by(datasetID, scalingID, siteID, samplingPeriodID) %>%
  mutate(cell_sampled = if_else(is.na(verbatimIdentification), 0, 1)) %>%
  ungroup() %>%
  distinct(datasetID, scalingID, siteID, samplingPeriodID, cell_sampled, croppedArea, time_span) %>%
  group_by(datasetID, scalingID, siteID, croppedArea) %>%
  dplyr::summarise(
    cell_sampling_repeats = n_distinct(samplingPeriodID, na.rm = TRUE),
    .groups = "keep") %>%
  mutate(cells_keep =
           case_when(
             cell_sampling_repeats == 2 & !is.na(croppedArea) ~ 1,
             cell_sampling_repeats %in% c(0,1) | is.na(croppedArea) ~ 0,
            .default = NA)) %>%
  unique()


# Checks:
colSums(is.na(cells)) # no NAs
            datasetID             scalingID                siteID 
                    0                     0                     0 
          croppedArea cell_sampling_repeats            cells_keep 
                    1                     0                     0 
Code
glimpse(cells) 
Rows: 7,289
Columns: 6
Groups: datasetID, scalingID, siteID, croppedArea [7,289]
$ datasetID             <int> 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, …
$ scalingID             <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
$ siteID                <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 1…
$ croppedArea           <dbl> 20.623832, 24.927742, 20.822468, 24.824370, 17.9…
$ cell_sampling_repeats <int> 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, …
$ cells_keep            <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
Code
species_df <-
  data_sf %>%
  st_drop_geometry() %>%
  filter(scalingID == 1) %>%
  distinct(datasetID, verbatimIdentification, scientificName, samplingPeriodID, time_span) %>%
  filter(!is.na(verbatimIdentification)) # NAs from cells that were not sampled twice (here: only site 5316)

glimpse(species_df)
Rows: 495
Columns: 5
$ datasetID              <int> 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,…
$ verbatimIdentification <chr> "Picoides pubescens", "Empidonax virescens", "G…
$ scientificName         <chr> "Dryobates pubescens", "Empidonax virescens", "…
$ samplingPeriodID       <dbl> 2, 2, 1, 2, 1, 2, 2, 1, 2, 1, 2, 2, 1, 2, 2, 2,…
$ time_span              <int> 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,…

Introduced species

This part requires access to the BirdLife International range maps available here: https://datazone.birdlife.org/contact-us/request-our-data

Due to licensing restrictions we cannot share this part, but instead we share a list of species that are introduced in New York.

Code
# temp <-  
#   readRDS(here::here("Data", "output", "1_data_sf.rds")) %>% 
#   st_drop_geometry() %>%
#   filter(datasetID == 6)
# 
# # write to file
# temp %>% 
#   distinct(verbatimIdentification, introduced) %>% 
#   write.csv2(here::here("Demo_NewYork/Data/input/introduced_species_ny.csv"))


# read back in
introduced <- 
  read.csv2(here::here("Demo_NewYork/Data/input/introduced_species_ny.csv"))

# cheks: 7 introduced species
table(introduced$introduced)

  0   1 
249   7 
Code
# here is the list of species removed:
introduced %>% 
  filter(introduced == 1) %>% 
  pull(verbatimIdentification)
[1] "Passer domesticus"   "Sturnus vulgaris"    "Phasianus colchicus"
[4] "Cygnus olor"         "Perdix perdix"       "Myiopsitta monachus"
[7] "Nandayus nenday"    
Code
species_df2 <- 
  species_df %>% 
  left_join(introduced[2:3])
Joining with `by = join_by(verbatimIdentification)`
Code
glimpse(species_df2)
Rows: 495
Columns: 6
$ datasetID              <int> 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,…
$ verbatimIdentification <chr> "Picoides pubescens", "Empidonax virescens", "G…
$ scientificName         <chr> "Dryobates pubescens", "Empidonax virescens", "…
$ samplingPeriodID       <dbl> 2, 2, 1, 2, 1, 2, 2, 1, 2, 1, 2, 2, 1, 2, 2, 2,…
$ time_span              <int> 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,…
$ introduced             <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…

Species lost or gained completely

Code
common_sp <- 
  data_sf %>%
  st_drop_geometry() %>%
  filter(scalingID == 1) %>%
  left_join(species_df2) %>%
  left_join(cells %>% filter(scalingID == 1)) %>%
  na.omit() %>%
  filter(cells_keep == 1 & introduced == 0) %>%
  group_by(verbatimIdentification) %>%
  dplyr::summarise(sp_sampling_repeats = n_distinct(samplingPeriodID),
                   .groups = "drop")
Joining with `by = join_by(datasetID, verbatimIdentification, scientificName,
samplingPeriodID, time_span)`
Joining with `by = join_by(datasetID, scalingID, siteID, croppedArea)`
Code
# checks: removes 16 species that were only sampled once
table(common_sp$sp_sampling_repeats)

  1   2 
 16 233 
Code
# here is the list of species removed:
common_sp %>%
  filter(sp_sampling_repeats == 1) %>% 
  pull(verbatimIdentification)
 [1] "Ammodramus nelsoni"               "Aythya marila"                   
 [3] "Bucephala albeola"                "Chlidonias leucopterus"          
 [5] "Coragyps atratus"                 "Cygnus buccinator"               
 [7] "Euphagus cyanocephalus"           "Falco columbarius"               
 [9] "Grus canadensis"                  "Pelecanus erythrorhynchos"       
[11] "Phalaropus tricolor"              "Plegadis chihi"                  
[13] "Somateria mollissima"             "Streptopelia decaocto"           
[15] "Tyrannus verticalis"              "Vermivora pinus x V. chrysoptera"

Decide which species to keep

Code
species_df3 <- 
  species_df2 %>%
  left_join(common_sp) %>%
    mutate(
    species_keep =
      case_when(
        sp_sampling_repeats == 2 & introduced == 0 ~ 1,
        TRUE ~ 0))
Joining with `by = join_by(verbatimIdentification)`
Code
# checks: we remove 29 species and keep 466 total in both sampling periods.
table(species_df3$species_keep)

  0   1 
 29 466 

Apply data filters

Code
data_sf2 <-
  data_sf %>%
  left_join(cells) %>%
  left_join(species_df3)
Joining with `by = join_by(datasetID, scalingID, siteID, croppedArea)`
Joining with `by = join_by(datasetID, verbatimIdentification, scientificName,
samplingPeriodID, time_span)`
Code
# keep only species and cells sampled twice
data_filt <-
  data_sf2 %>%
  st_drop_geometry() %>%
  filter(!is.na(verbatimIdentification) & species_keep == 1 & cells_keep == 1)

# Checks:
data_filt %>%
  group_by(samplingPeriodID) %>%
  skimr::skim()
Data summary
Name Piped data
Number of rows 1120865
Number of columns 31
_______________________
Column type frequency:
character 6
numeric 24
________________________
Group variables samplingPeriodID

Variable type: character

skim_variable samplingPeriodID n_missing complete_rate min max empty n_unique whitespace
footprintSRS 1 0 1 9 9 0 1 0
footprintSRS 2 0 1 9 9 0 1 0
verbatimFootprintSRS 1 0 1 10 10 0 1 0
verbatimFootprintSRS 2 0 1 10 10 0 1 0
areaUnit 1 0 1 3 3 0 1 0
areaUnit 2 0 1 3 3 0 1 0
lengthUnit 1 0 1 2 2 0 1 0
lengthUnit 2 0 1 2 2 0 1 0
verbatimIdentification 1 0 1 9 32 0 233 0
verbatimIdentification 2 0 1 9 32 0 233 0
scientificName 1 0 1 9 39 0 233 0
scientificName 2 0 1 9 39 0 233 0

Variable type: numeric

skim_variable samplingPeriodID n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
datasetID 1 0 1 6.00 0.00 6.00 6.00 6.00 6.00 6.00 ▁▁▇▁▁
datasetID 2 0 1 6.00 0.00 6.00 6.00 6.00 6.00 6.00 ▁▁▇▁▁
scalingID 1 0 1 2.13 4.91 1.00 1.00 1.00 2.00 128.00 ▇▁▁▁▁
scalingID 2 0 1 2.09 4.76 1.00 1.00 1.00 2.00 128.00 ▇▁▁▁▁
siteID 1 0 1 2150.86 1484.84 1.00 872.00 1921.00 3225.00 5335.00 ▇▇▆▃▃
siteID 2 0 1 2128.88 1473.71 1.00 859.00 1905.00 3175.00 5335.00 ▇▇▆▃▃
area 1 0 1 291.59 3264.13 24.93 25.01 25.02 100.05 133087.29 ▇▁▁▁▁
area 2 0 1 282.42 3188.68 24.93 25.01 25.02 100.05 133087.29 ▇▁▁▁▁
croppedArea 1 0 1 278.72 3120.13 0.01 25.01 25.02 100.05 127077.41 ▇▁▁▁▁
croppedArea 2 0 1 270.15 3048.35 0.01 25.01 25.02 100.04 127077.41 ▇▁▁▁▁
croppedAreaPercent 1 0 1 0.96 0.16 0.00 1.00 1.00 1.00 1.00 ▁▁▁▁▇
croppedAreaPercent 2 0 1 0.96 0.15 0.00 1.00 1.00 1.00 1.00 ▁▁▁▁▇
centroidDecimalLongitude 1 0 1 -75.49 1.71 -79.76 -76.63 -75.12 -74.12 -71.87 ▂▃▆▇▁
centroidDecimalLongitude 2 0 1 -75.55 1.72 -79.76 -76.69 -75.21 -74.15 -71.87 ▂▃▆▇▁
centroidDecimalLatitude 1 0 1 42.88 0.99 40.49 42.22 42.80 43.51 45.03 ▂▅▇▅▃
centroidDecimalLatitude 2 0 1 42.89 0.97 40.49 42.24 42.81 43.51 45.03 ▂▅▇▅▃
croppedDecimalLongitude 1 0 1 -75.49 1.71 -79.74 -76.63 -75.12 -74.12 -71.88 ▂▃▅▇▁
croppedDecimalLongitude 2 0 1 -75.55 1.72 -79.74 -76.69 -75.21 -74.15 -71.88 ▂▃▆▇▁
croppedDecimalLatitude 1 0 1 42.88 0.99 40.51 42.23 42.80 43.51 45.01 ▂▅▇▅▃
croppedDecimalLatitude 2 0 1 42.89 0.97 40.51 42.24 42.81 43.51 45.01 ▂▅▇▅▃
northSouthLength 1 0 1 9.78 17.62 5.00 5.05 5.12 10.11 509.30 ▇▁▁▁▁
northSouthLength 2 0 1 9.67 17.28 5.00 5.05 5.12 10.11 509.30 ▇▁▁▁▁
eastWestLength 1 0 1 10.03 20.55 5.01 5.05 5.12 10.12 654.17 ▇▁▁▁▁
eastWestLength 2 0 1 9.90 20.15 5.01 5.05 5.12 10.12 654.17 ▇▁▁▁▁
maxLength 1 0 1 13.77 24.32 7.06 7.07 7.07 14.15 659.64 ▇▁▁▁▁
maxLength 2 0 1 13.60 23.88 7.06 7.07 7.07 14.15 659.64 ▇▁▁▁▁
croppedNorthSouthLength 1 0 1 9.63 17.40 0.18 5.05 5.10 10.10 502.08 ▇▁▁▁▁
croppedNorthSouthLength 2 0 1 9.52 17.07 0.18 5.05 5.10 10.09 502.08 ▇▁▁▁▁
croppedEastWestLength 1 0 1 9.92 20.45 0.06 5.05 5.11 10.11 650.95 ▇▁▁▁▁
croppedEastWestLength 2 0 1 9.78 20.05 0.06 5.05 5.11 10.10 650.95 ▇▁▁▁▁
croppedMaxLength 1 0 1 13.61 24.16 0.32 7.07 7.07 14.15 655.19 ▇▁▁▁▁
croppedMaxLength 2 0 1 13.44 23.72 0.32 7.07 7.07 14.15 655.19 ▇▁▁▁▁
startYear 1 0 1 1980.00 0.00 1980.00 1980.00 1980.00 1980.00 1980.00 ▁▁▇▁▁
startYear 2 0 1 2000.00 0.00 2000.00 2000.00 2000.00 2000.00 2000.00 ▁▁▇▁▁
endYear 1 0 1 1985.00 0.00 1985.00 1985.00 1985.00 1985.00 1985.00 ▁▁▇▁▁
endYear 2 0 1 2005.00 0.00 2005.00 2005.00 2005.00 2005.00 2005.00 ▁▁▇▁▁
time_span 1 0 1 5.00 0.00 5.00 5.00 5.00 5.00 5.00 ▁▁▇▁▁
time_span 2 0 1 5.00 0.00 5.00 5.00 5.00 5.00 5.00 ▁▁▇▁▁
cell_sampling_repeats 1 0 1 2.00 0.00 2.00 2.00 2.00 2.00 2.00 ▁▁▇▁▁
cell_sampling_repeats 2 0 1 2.00 0.00 2.00 2.00 2.00 2.00 2.00 ▁▁▇▁▁
cells_keep 1 0 1 1.00 0.00 1.00 1.00 1.00 1.00 1.00 ▁▁▇▁▁
cells_keep 2 0 1 1.00 0.00 1.00 1.00 1.00 1.00 1.00 ▁▁▇▁▁
introduced 1 0 1 0.00 0.00 0.00 0.00 0.00 0.00 0.00 ▁▁▇▁▁
introduced 2 0 1 0.00 0.00 0.00 0.00 0.00 0.00 0.00 ▁▁▇▁▁
sp_sampling_repeats 1 0 1 2.00 0.00 2.00 2.00 2.00 2.00 2.00 ▁▁▇▁▁
sp_sampling_repeats 2 0 1 2.00 0.00 2.00 2.00 2.00 2.00 2.00 ▁▁▇▁▁
species_keep 1 0 1 1.00 0.00 1.00 1.00 1.00 1.00 1.00 ▁▁▇▁▁
species_keep 2 0 1 1.00 0.00 1.00 1.00 1.00 1.00 1.00 ▁▁▇▁▁
Code
data_filt_reduced <- 
  data_filt %>% 
  select(scalingID, siteID, samplingPeriodID, 
         verbatimIdentification, scientificName, 
         verbatimFootprintSRS, footprintSRS, croppedArea, areaUnit, 
         centroidDecimalLongitude, centroidDecimalLatitude, 
         startYear, endYear)

# checks: looks good.
glimpse(data_filt_reduced)
Rows: 1,120,865
Columns: 13
$ scalingID                <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
$ siteID                   <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
$ samplingPeriodID         <dbl> 2, 2, 1, 2, 1, 2, 2, 1, 2, 1, 2, 2, 1, 2, 2, …
$ verbatimIdentification   <chr> "Picoides pubescens", "Empidonax virescens", …
$ scientificName           <chr> "Dryobates pubescens", "Empidonax virescens",…
$ verbatimFootprintSRS     <chr> "epsg:26918", "epsg:26918", "epsg:26918", "ep…
$ footprintSRS             <chr> "epsg:4326", "epsg:4326", "epsg:4326", "epsg:…
$ croppedArea              <dbl> 20.62383, 20.62383, 20.62383, 20.62383, 20.62…
$ areaUnit                 <chr> "km2", "km2", "km2", "km2", "km2", "km2", "km…
$ centroidDecimalLongitude <dbl> -79.74312, -79.74312, -79.74312, -79.74312, -…
$ centroidDecimalLatitude  <dbl> 42.06346, 42.06346, 42.06346, 42.06346, 42.06…
$ startYear                <int> 2000, 2000, 1980, 2000, 1980, 2000, 2000, 198…
$ endYear                  <int> 2005, 2005, 1985, 2005, 1985, 2005, 2005, 198…

Taxon matching

Code
tax_lookup <-
  read.csv(here::here("Data/input/Tax_lookup.csv"))[2:7] %>%
  filter(verbatimIdentification %in% data_filt_reduced$verbatimIdentification)

write.csv2(tax_lookup, 
           here::here("Demo_NewYork/Data/input/Tax_lookup_ny.csv"))

Write to file

Code
if (!dir.exists(here::here("Demo_NewYork/Data/output"))) {
  dir.create(here::here("Demo_NewYork/Data/output"))
}

saveRDS(data_sf2, 
        here::here("Demo_NewYork/Data/output/1_data_sf_ny.rds"))
saveRDS(data_filt_reduced, 
        here::here("Demo_NewYork/Data/output/1_data_filtered_ny.rds"))