Now we will calculate log ratio, jaccard, fractal dimension.
Code
rm(list=ls())gc()
used (Mb) gc trigger (Mb) max used (Mb)
Ncells 592983 31.7 1351252 72.2 685968 36.7
Vcells 1086517 8.3 8388608 64.0 1876884 14.4
Code
suppressPackageStartupMessages({library(sf)library(here)library(dplyr)library(tidyr)library(ggplot2)})sf_use_s2(TRUE) # we use WGS84 projected data (it's spherical)
#----------------------------------------------------------## Calculate grid information -----#----------------------------------------------------------## Custom function to get total and sampled area/cells from datasetscalculate_grid_info <-function(data_sf) { grid_info <-full_join( data_sf %>%st_drop_geometry() %>%distinct(datasetID, scalingID, siteID, cell_sampling_repeats, croppedArea, time_span) %>%group_by(datasetID, scalingID) %>%summarise(Total_Ncells =n_distinct(siteID)), data_sf %>%st_drop_geometry() %>%distinct(datasetID, scalingID, siteID, cell_sampling_repeats, croppedArea, time_span) %>%filter(cell_sampling_repeats ==2) %>%group_by(datasetID, scalingID, time_span) %>%summarise(Total_Ncells_samp =n_distinct(siteID),Total_area_samp =sum(croppedArea, na.rm =TRUE)) )return(grid_info)}#--------------------------------------------------## Apply function to compute grid informationatlas_areas <-calculate_grid_info(data_sf)
`summarise()` has grouped output by 'datasetID'. You can override using the
`.groups` argument.
`summarise()` has grouped output by 'datasetID', 'scalingID'. You can override
using the `.groups` argument.
Joining with `by = join_by(datasetID, scalingID)`
`summarise()` has grouped output by 'datasetID', 'samplingPeriodID',
'scalingID'. You can override using the `.groups` argument.
Joining with `by = join_by(datasetID, samplingPeriodID, scalingID,
verbatimIdentification)`
Code
#----------------------------------------------------------#rm(pres_dat_final_v2)hist(occ_data_final$AOO, breaks =30, main ="AOO distribution", xlab ="AOO")
#----------------------------------------------------------## Calculate log ratio AOO -----#----------------------------------------------------------#time_periods <-c(1,2)# Custom function to transform data to wide-formattransform_to_wide <-function(species_data_new, time_periods =c(1, 2)) {# Create a list to store wide data for each time period wide_dfs <-list()for (i inseq_along(time_periods)) { wide_dfs[[i]] <- species_data_new %>%distinct(datasetID, samplingPeriodID, verbatimIdentification, AOO) %>%group_by(datasetID, samplingPeriodID, verbatimIdentification) %>%filter(samplingPeriodID == time_periods[i]) %>%setNames(paste0("samplingPeriodID", i, "_", names(.))) %>%ungroup() %>%select(-c(paste0("samplingPeriodID", i, "_samplingPeriodID"))) %>% dplyr::rename(verbatimIdentification =paste0("samplingPeriodID", i, "_verbatimIdentification"),datasetID =paste0("samplingPeriodID", i, "_datasetID") ) }# Merge the wide data frames sequentially sp_dat_wide <-reduce(wide_dfs, full_join, by =c("verbatimIdentification", "datasetID"))cat("NA counts in wide data after processing:\n")print(colSums(is.na(sp_dat_wide)))cat("Preview of wide data:\n")print(head(sp_dat_wide))return(sp_dat_wide)}#----------------------------------------------------------#time_between_samples <- species_data %>%ungroup() %>%select(datasetID, startYear, endYear) %>%distinct() %>%mutate(n_years = endYear-startYear)# Apply function:sp_dat_wide <-transform_to_wide(species_data_new, time_periods) %>%na.omit() # drop species lost or gained completely
Warning in left_join(., time_between_samples): Detected an unexpected many-to-many relationship between `x` and `y`.
ℹ Row 1 of `x` matches multiple rows in `y`.
ℹ Row 1 of `y` matches multiple rows in `x`.
ℹ If a many-to-many relationship is expected, set `relationship =
"many-to-many"` to silence this warning.
Code
hist(logRatio$log_R2_1, breaks =30, main ="Log ratio of AOO distribution", xlab ="Log ratio of AOO = log(AOO2/AOO1)")
Code
hist(logRatio$log_R2_1_per_year, breaks =30, main ="Log ratio of AOO per year distribution", xlab ="Log ratio of AOO per year = log(AOO2/AOO1)/duration in years")
Write to file
Code
# save final predictor table from grids/atlasesbig_table <-full_join(species_data_new, logRatio) %>%distinct(datasetID, samplingPeriodID, verbatimIdentification,.keep_all = T) %>%mutate_if(is.numeric, round, 3)
Joining with `by = join_by(datasetID, verbatimIdentification)`
Warning in full_join(species_data_new, logRatio): Detected an unexpected many-to-many relationship between `x` and `y`.
ℹ Row 1 of `x` matches multiple rows in `y`.
ℹ Row 1 of `y` matches multiple rows in `x`.
ℹ If a many-to-many relationship is expected, set `relationship =
"many-to-many"` to silence this warning.