A - 01: Get data

Source 00_Configuration.R to load libraries

Code
library(skimr)
library(kableExtra)
source(here::here("Code/00_Configuration.R"))
x <- lapply(package_list, require, character = TRUE)
rm(x)

Connect to the database (requires open ssh tunnel to the server)

Code
con <- dbConnect(Postgres(),
  dbname = "MOBI_atlases_v1",
  host = "localhost",
  port = 5432,
  user = "frieda",
  password = Sys.getenv("PASSWORD_SERVER")
)

Get atlas data:

Code
tic()
data <- tbl(con, sql(vars$sql_query_data)) %>%
  collect() %>%
  mutate(samplingPeriodID = case_when(
    datasetID == 5 & startYear == 1985 ~ 1,
    datasetID == 5 & startYear == 2001 ~ 2,
    datasetID == 6 & startYear == 1980 ~ 1,
    datasetID == 6 & startYear == 2000 ~ 2,
    datasetID == 13 & startYear == 1974 ~ 1,
    datasetID == 13 & startYear == 1997 ~ 2,
    datasetID == 26 & startYear == 1972 ~ 1,
    datasetID == 26 & startYear == 2013 ~ 2,
    TRUE ~ NA_integer_ # Default case: NA if no match
  )) %>%
  mutate(time_span = endYear - startYear) %>%
  distinct(datasetID, scalingID, siteID, samplingPeriodID, verbatimIdentification, .keep_all = TRUE)
toc() # 6.78 seconds
74.41 sec elapsed
Code
data %>%
  group_by(datasetID, startYear) %>%
  distinct(endYear)
# A tibble: 8 × 3
# Groups:   datasetID, startYear [8]
  datasetID startYear endYear
      <int>     <int>   <int>
1        26      2013    2017
2        26      1972    1995
3         6      2000    2005
4        13      1997    2002
5         6      1980    1985
6        13      1974    1978
7         5      2001    2003
8         5      1985    1989
Code
skim(data)
Data summary
Name data
Number of rows 2595355
Number of columns 12
_______________________
Column type frequency:
character 2
numeric 10
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
verbatimIdentification 0 1 9 32 0 847 0
scientificName 0 1 9 39 0 801 0

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
datasetID 0 1 14.56 9.67 5.00 6.00 6.00 26.00 26.00 ▇▁▁▁▆
scalingID 0 1 2.24 5.46 1.00 1.00 1.00 2.00 128.00 ▇▁▁▁▁
siteID 0 1 1640.72 1311.01 1.00 529.00 1402.00 2444.00 5335.00 ▇▆▃▂▁
startYear 0 1 1991.54 15.29 1972.00 1980.00 1997.00 2001.00 2013.00 ▇▁▁▆▃
endYear 0 1 1999.63 11.89 1978.00 1989.00 2002.00 2005.00 2017.00 ▆▁▅▇▆
croppedArea 0 1 14272.77 245445.34 0.01 25.02 399.63 2501.65 11171485.87 ▇▁▁▁▁
centroidDecimalLongitude 0 1 -17.27 60.66 -79.76 -74.90 -1.81 17.83 145.88 ▇▃▆▁▁
centroidDecimalLatitude 0 1 46.33 7.04 24.25 42.33 43.69 49.65 80.81 ▁▇▃▁▁
samplingPeriodID 0 1 1.52 0.50 1.00 1.00 2.00 2.00 2.00 ▇▁▁▁▇
time_span 0 1 8.09 7.35 2.00 4.00 5.00 5.00 23.00 ▇▁▁▁▂

Get original grids:

Code
grids <- st_read(con, query = vars$sql_query_grid)
head(grid)
                                                                           
1 function (nx = NULL, ny = nx, col = "lightgray", lty = "dotted",         
2     lwd = par("lwd"), equilogs = TRUE)                                   
3 {                                                                        
4     atx <- if (is.null(nx) || (!is.na(nx) && nx >= 1))                   
5         .grid.at(1L, nx, log = par("xlog"), equilogs, axp = par("xaxp"), 
6             usr2 = par("usr")[1:2])                                      

Create sf that includes cells not sampled at all:

Code
data_sf <- grids %>%
  left_join(data)
head(data_sf)
Simple feature collection with 6 features and 26 fields
Geometry type: MULTIPOLYGON
Dimension:     XY
Bounding box:  xmin: 14.16488 ymin: 50.99961 xmax: 14.49822 ymax: 51.09962
Geodetic CRS:  WGS 84
  datasetID scalingID siteID footprintSRS verbatimFootprintSRS     area
1         5         1      1    epsg:4326            epsg:5514 130.0203
2         5         1      2    epsg:4326            epsg:5514 130.0204
3         5         1      2    epsg:4326            epsg:5514 130.0204
4         5         1      2    epsg:4326            epsg:5514 130.0204
5         5         1      2    epsg:4326            epsg:5514 130.0204
6         5         1      2    epsg:4326            epsg:5514 130.0204
  croppedArea areaUnit croppedAreaPercent centroidDecimalLongitude
1    21.07632      km2          0.1621002                 14.24822
2    41.36332      km2          0.3181295                 14.41488
3    41.36332      km2          0.3181295                 14.41488
4    41.36332      km2          0.3181295                 14.41488
5    41.36332      km2          0.3181295                 14.41488
6    41.36332      km2          0.3181295                 14.41488
  centroidDecimalLatitude croppedDecimalLongitude croppedDecimalLatitude
1                51.04961                14.30475               51.02347
2                51.04962                14.40960               51.01669
3                51.04962                14.40960               51.01669
4                51.04962                14.40960               51.01669
5                51.04962                14.40960               51.01669
6                51.04962                14.40960               51.01669
  northSouthLength eastWestLength maxLength croppedNorthSouthLength
1         11.12540       11.69999  16.13557                6.239639
2         11.12541       11.70000  16.13557                5.178504
3         11.12541       11.70000  16.13557                5.178504
4         11.12541       11.70000  16.13557                5.178504
5         11.12541       11.70000  16.13557                5.178504
6         11.12541       11.70000  16.13557                5.178504
  croppedEastWestLength croppedMaxLength lengthUnit startYear endYear
1              4.989069         7.069381         km        NA      NA
2             11.699831        12.789048         km      2001    2003
3             11.699831        12.789048         km      2001    2003
4             11.699831        12.789048         km      1985    1989
5             11.699831        12.789048         km      1985    1989
6             11.699831        12.789048         km      1985    1989
   verbatimIdentification          scientificName samplingPeriodID time_span
1                    <NA>                    <NA>               NA        NA
2 Acrocephalus scirpaceus Acrocephalus scirpaceus                2         2
3 Nucifraga caryocatactes Nucifraga caryocatactes                2         2
4     Carduelis carduelis     Carduelis carduelis                1         4
5           Ciconia nigra           Ciconia nigra                1         4
6         Passer montanus         Passer montanus                1         4
                        geometry
1 MULTIPOLYGON (((14.16488 51...
2 MULTIPOLYGON (((14.33155 51...
3 MULTIPOLYGON (((14.33155 51...
4 MULTIPOLYGON (((14.33155 51...
5 MULTIPOLYGON (((14.33155 51...
6 MULTIPOLYGON (((14.33155 51...

Get metadata for datasets:

Code
meta <- tbl(con, "MOBI_dataset") %>%
  filter(datasetID %in% c(5, 6, 13, 26)) %>%
  collect()

kableExtra::kable(meta)
datasetID datasetName datasetPublisher datasetPublisherContact licenseID rightsHolder bibliographicCitation citationIdentifier provider shareable coauthorshipRequired coauthors coauthorshipSuggested isSamplingEffortReported isOccurrenceProbabilityAvailable occurrenceModelID recordFilterMeaning taxa cellSize samplingYears
6 New York State Breeding Bird Atlas New York State Department of Environmental Conservation, New York Natural Heritage Program nybbba3@gmail.com | julie.hart@dec.ny.gov 1 New York State Department of Environmental Conservation BBA1 data: New York State Breeding Bird Atlas [Internet]. 1980 - 1985. Release 1.0. Albany (New York): New York State Department of Environmental Conservation. updated 2007 Jun 6; cited 2024 Apr 09. Available from: https://www.dec.ny.gov/animals/7312.html. | BBA2 data: New York State Breeding Bird Atlas 2000 [Internet]. 2000 - 2005. Release 1.0. Albany (New York): New York State Department of Environmental Conservation. updated 2007 Jun 11; cited 2024 Apr 09. Available from: https://www.dec.ny.gov/animals/7312.html. New York State Department of Environmental Conservation NO NO Julie Hart - atlas coordinator, julie.hart@dec.ny.gov Carmen Soria - carmendianasoria@gmail.com | Kateřina Tschernosterová - tschernosterova@fzp.czu.cz | Friederike Wölke - friederike.woelke@gmail.com | Gabriel Ortega - g.ortega.solis@gmail.com YES NO NA NULL = No information Aves 5 1980-1985, 2000-2005
13 Japan Breeding Bird Atlas Japan Bird Research Association mj-ueta@bird-research.jp | info@bird-research.jp 1 Japan Bird Research Association Mutsuyuki Ueta, Shingo Uemura, Hayama Seiji, Teppei Ara, Shinichi Takagawa, Noritomo Kawaji, Manabu Kajita, Noboru Nakamura, Saki Tabata, Yuna Uchiyama, 20km mesh bird breeding distribution data collected in the National Bird Breeding Distribution Survey, Bird Research, 2021, Volume 17, p. R5-R9, Publication date 2021/10/25, Online ISSN 1880-1595, Print ISSN 1880-1587, https://doi.org/10.11211/birdresearch.17.R5, https://www.jstage.jst.go.jp/article/birdresearch/17/0/17_R5/_article/-char/ja | BBA1: Environment Agency (1981) Breeding distribution of Japanese birds. Environment Agency, Tokyo. | BBA2: Ministry of the Environment, Natural Environment Bureau, Biodiversity Center (2004) Bird breeding distribution survey report. Ministry of the Environment, Natural Environment Bureau, Biodiversity Center, Fujiyoshida City. | BBA3: Mutsuyuki Ueda and Shingo Uemura (2021) National Bird Breeding Distribution Survey Report Let’s Describe the Current Status of Japanese Birds 2016-2021. Bird Breeding Distribution Research Committee, Fuchu City. 10.11211/birdresearch.17.R5 Japan Bird Research Association NO YES Mutsuyuki Ueta - mj-ueta@bird-research.jp Carmen Soria - carmendianasoria@gmail.com | Kateřina Tschernosterová - tschernosterova@fzp.czu.cz | Friederike Wölke - friederike.woelke@gmail.com | Gabriel Ortega - g.ortega.solis@gmail.com NO NO NA NULL = No information, 1 = Questionnaires, 2 = Field surveys, Aves 20 1974-1978, 1997-2002, 2016-2021
5 Czech Breeding Bird Atlas Czech Society for Ornithology bejcek@fzp.czu.cz | stastny@fzp.czu.cz 0 Vladimír Bejček & Karel Šťastný BBA1: Šťastný, Karel; Randík, Aladár; Hudec, Karel. 1987. Atlas hnízdního rozšíření ptáků v ČSSR: 1973/77. Praha: Academia. ISBN 21-003-87. | BBA2: Šťastný, Karel; Bejček, Vladimír; Hudec, Karel. Atlas hnízdního rozšíření ptáků v České republice 1985-1989. Ilustroval Pavel Procházka. Jinočany: Nakladatelství a vydavatelství H&H, 1997. ISBN 80-86022-18-8. | BBA3: Šťastný, Karel; Bejček, Vladimír; Hudec, Karel. 2006. Atlas hnízdního rozšíření ptáků v České republice: 2001-2003. Praha: Aventinum. ISBN 80-86858-19-7. | BBA4: Šťastný, Karel; Bejček, Vladimír; Mikuláš, Ivan; Telenský, Tomáš. 2021. Atlas hnízdního rozšíření ptáků v České republice: 2014-2017. Praha: Aventinum. ISBN 978-80-7442-130-3. ISBN 21-003-87 | ISBN 80-86022-18-8 | ISBN 80-86858-19-7 | ISBN 978-80-7442-130-3 Vladimír Bejček | Karel Šťastný NO YES Vladimír Bejček - bejcek@fzp.czu.cz | Karel Šťastný - stastny@fzp.czu.cz | BBA4: Ivan Mikulas - ivan.mikulas@nature.cz Carmen Soria - carmendianasoria@gmail.com | Kateřina Tschernosterová - tschernosterova@fzp.czu.cz | Friederike Wölke - wolke@fzp.czu.cz | Gabriel Ortega - g.ortega.solis@gmail.com | Francois Leroy - francois.libert.leroy@gmail.com YES NO NA NULL = No information Aves 10 1973-1977, 1985-1989, 2001-2003, 2014-2017
26 European Breeding Birds Atlas European Bird Census Council Sergi Herrando (vice-chair, European Bird Census Council) - ornitologia@ornitologia.org, s.herrando@creaf.uab.cat 0 European Bird Census Council EBBA1 book: Hagemeijer, W.J.M. & Blair, M.J. (1997). The EBCC Atlas of European Breeding Birds: Their Distribution and Abundance. T. & A.D. Poyser, London.
EBBA1 dataset/web: Hagemeyer W, Blair M, Loos W (2016). EBCC Atlas of European Breeding Birds. Version 1.3. European Bird Census Council (EBCC). Occurrence dataset https://doi.org/10.15468/adtfvf accessed via GBIF.org on 2024-04-09. | EBBA2 book: Keller, V., Herrando, S., Voříšek, P., Franch, M., Kipson, M., Milanesi, P., Martí, D., Anton, M., Klvaňová, A., Kalyakin, M.V., Bauer, H.-G. & Foppen, R.P.B. (2020). European Breeding Bird Atlas 2: Distribution, Abundance and Change. European Bird Census Council & Lynx Edicions, Barcelona. | EBBA2 web: EBCC (2022). European Breeding Bird Atlas 2 website. European Bird Census Council. Accessed from: http://ebba2.info (Day/Month/Year). European Bird Census Council NO YES Sergi Herrando - ornitologia@ornitologia.org | Petr Voříšek - Vorisek@ebcc.info | Verena Keller - verena.keller@vogelwarte.ch Carmen Soria - carmendianasoria@gmail.com | Kateřina Tschernosterová - tschernosterova@fzp.czu.cz | Friederike Wölke - wolke@fzp.czu.cz | Gabriel Ortega - g.ortega.solis@gmail.com YES NO NA NULL = No information, 1 = Apparent trust 2 = Trusted occurrences Aves 50 1972-1995, 2013-2017

Disconnect from MOBI db:

Code
dbDisconnect(con)

Write to file

save METADATA for atlases to documentation file:

Code
write.csv(
  meta,
  here("Documentation", "Metadata", "METADATA_datasets.csv")
)

write to input data folder

Code
saveRDS(grids, vars$grid)

filtered by sampling period & croppedArea & recordFilter

Code
saveRDS(data, vars$data)

filtered by sampling period & croppedArea & recordFilter but with unsampled cells as NA in verbatimIdentification

Code
saveRDS(data_sf, vars$data_sf)

Get ‘BirdLife 2024’ data

Connect to the database

Code
con <-
  dbConnect(Postgres(),
    dbname = "Birds_of_the_World",
    host = "localhost",
    port = 5432,
    user = "frieda",
    password = Sys.getenv("PASSWORD_SERVER")
  )

Get species names (BirdLife 2024 taxonomy - some not matched)

Code
name_vector_sql <- unique(data$scientificName)

Ensure name_vector_sql is correctly formatted as an SQL-compatible string

Code
name_vector_sql <- paste0("'", paste(name_vector_sql, collapse = "', '"), "'")

Construct the query:

presence = “Extant”, “probably extant”, “possibly extant” (not “possibly extinct” or “extinct”)

Code
bl_query <-
  paste0(
    "SELECT \"sci_name\", \"presence\", \"origin\", \"seasonal\",\"geometry\" ",
    "FROM \"MOBI_botw_multipolygon_2024\" ",
    "WHERE \"sci_name\" IN (", name_vector_sql, ") ",
    "AND \"presence\" IN (1, 2, 3)"
  )
Code
tic()
BirdLife <- st_read(con, query = bl_query)
toc() # 106.88 sec
2383.57 sec elapsed
Code
head(BirdLife)
Simple feature collection with 6 features and 4 fields
Geometry type: MULTIPOLYGON
Dimension:     XY
Bounding box:  xmin: -157.0839 ymin: -8.821678 xmax: 176.9767 ymax: 70.2287
Geodetic CRS:  WGS 84
            sci_name presence origin seasonal                       geometry
1 Accipiter brevipes        1      1        2 MULTIPOLYGON (((59.90518 48...
2 Accipiter brevipes        1      1        4 MULTIPOLYGON (((41.73014 42...
3 Accipiter brevipes        1      1        3 MULTIPOLYGON (((37.27209 19...
4  Aegolius funereus        1      1        1 MULTIPOLYGON (((8.98562 63....
5  Aegolius funereus        1      1        3 MULTIPOLYGON (((9.397705 57...
6         Aix sponsa        1      1        2 MULTIPOLYGON (((-89.57379 4...

disconnect from BirdLife db

Code
dbDisconnect(con)

For global range maps we need: origin = c(1) (native) seasonal = c(1,2) (breeding and resident)

For invasive species we need: origin = 3 (introduced)

Code
BirdLife_global <- BirdLife %>%
  filter(origin == 1 & seasonal %in% c(1, 2))
BirdLife_introduced <- BirdLife %>%
  filter(origin == 3)

Save BirdLife24 to shp

Code
st_write(BirdLife_global, here::here("Data/input/shp_global/BirdLife_global.shp"), append = F)
st_write(BirdLife_introduced, here::here("Data/input/shp_introduced/BirdLife_introduced.shp"), append = F)