A - 13: Data visualization

Libraries

Code
suppressPackageStartupMessages({
  library(here)
  source(here::here("Code/00_Configuration.R"))
  lapply(package_list, require, character = TRUE)


  library(corrr)
  library(fastDummies)
  library(inspectdf)
  library(caret)
  library(summarytools)
})

Read data

Code
dta <- readRDS(here::here("Data/output/1_all_predictors_merged.rds")) %>%
  filter(samplingPeriodID == 1)

Set variable vectors for hypotheses

Code
sp_id <-
  c("verbatimIdentification", "scientificName")

H1 <-
  c("Mass", "GlobRangeSize_km2", "Migration", "Habitat_5", "Generalism", "Threatened", "pd")

H2 <-
  c("D_AOO_a", "mean_lnLac", "AOO", "joincount_delta", "circNorm", "minDist_toBorder_centr")

H3 <-
  c("datasetID")

predictors <-
  c(H1, H2, H3)

responses <-
  c("Jaccard_dissim", "log_R2_1", "log_R2_1_per_year")

Reduce data to model variables

Code
dta_new <-
  dta %>%
  select(all_of(c(sp_id, responses, H3, H1, H2))) %>%
  ungroup()

Correlations

We will dummy code the ordinal variables to include them in the correlation matrix

Code
dummy_reduced <-
  dta_new %>%
  select(
    -datasetID,
    -verbatimIdentification,
    -scientificName,
    -Jaccard_dissim,
    -log_R2_1,
    -log_R2_1_per_year
  ) %>%
  ## turn the level "NA" into a real NA
  mutate(
    Migration = na_if(as.character(Migration), "NA"),
    Threatened = na_if(as.character(Threatened), "NA"),
    Habitat_5 = na_if(as.character(Habitat_5), "NA"),
    Generalism = na_if(as.character(Generalism), "NA")
  ) %>%
  mutate(
    Migration = factor(
      Migration,
      levels = c("1", "2", "3"),
    ),
    Threatened = factor(
      Threatened,
      levels = c("1", "0")
    ),
    Habitat_5 = factor(
      Habitat_5,
      levels = c("closed", "freshwater", "open", "human", "marine")
    ),
    Generalism = factor(
      Generalism,
      levels = c("1", "0")
    )
  ) %>%
  na.omit()

dta_dummies <-
  fastDummies::dummy_cols(dummy_reduced,
    remove_first_dummy = FALSE, # avoids multicollinearity
    remove_selected_columns = TRUE
  )

cor_matrix <-
  corrr::correlate(dta_dummies,
    use = "pairwise.complete.obs",
    quiet = TRUE
  ) %>%
  tidyr::replace_na(list(r = 1)) %>%
  rearrange()
Registered S3 methods overwritten by 'registry':
  method               from 
  print.registry_field proxy
  print.registry_entry proxy
Registered S3 method overwritten by 'seriation':
  method         from 
  reorder.hclust vegan

a) tile-chart

Code
rplot(cor_matrix)

Inspect data

a) Pairs-plot of all variables

Code
GGally::ggpairs(
  ggplot2::aes(colour = datasetID),
  data = dta_new %>%
    select(
      -verbatimIdentification,
      -scientificName
    ) %>% as.data.frame(),
  progress = FALSE
)

b) Inspect variable distributions

  1. variables & types
Code
inspect_types(dta_new) %>%
  show_plot()

  1. numeric variables (univariate plots)
Code
inspect_num(dta_new) %>%
  show_plot()

  1. categorical variable imbalance (most frequent level)
Code
inspect_imb(dta_new %>%
  select(
    -verbatimIdentification,
    -scientificName
  )) %>%
  show_plot()

  1. categorical variables: frequency of levels
Code
inspect_cat(dta_new %>%
  select(-verbatimIdentification, -scientificName)) %>%
  show_plot()

Code
ggplot(dta_new, aes(x = Threatened, fill = datasetID)) +
  geom_bar()

Code
ggplot(dta_new, aes(x = Habitat_5, fill = datasetID)) +
  geom_bar()

  1. strongest correlations between variables
Code
inspect_cor(dta_new) %>%
  show_plot()
Warning: Columns with 0 variance found: Jaccard_dissim, log_R2_1,
log_R2_1_per_year, D_AOO_a

c) Cross-tabulations

Categorical Variables

Code
# summarytools::freq(dta_new)
summarytools::ctable(
  x = dta_new$Habitat_5,
  y = dta_new$Threatened
)
Cross-Tabulation, Row Proportions  
Habitat_5 * Threatened  
Data Frame: dta_new  

------------ ------------ ------------- ------------- ----------- ---------------
               Threatened             0             1        <NA>           Total
   Habitat_5                                                                     
      closed                364 (92.6%)    29 ( 7.4%)   0 ( 0.0%)    393 (100.0%)
  freshwater                215 (81.1%)    50 (18.9%)   0 ( 0.0%)    265 (100.0%)
       human                 40 (81.6%)     9 (18.4%)   0 ( 0.0%)     49 (100.0%)
      marine                 74 (77.9%)    21 (22.1%)   0 ( 0.0%)     95 (100.0%)
        open                202 (83.1%)    41 (16.9%)   0 ( 0.0%)    243 (100.0%)
        <NA>                  7 (77.8%)     0 ( 0.0%)   2 (22.2%)      9 (100.0%)
       Total                902 (85.6%)   150 (14.2%)   2 ( 0.2%)   1054 (100.0%)
------------ ------------ ------------- ------------- ----------- ---------------
Code
summarytools::ctable(
  x = dta_new$datasetID,
  y = dta_new$Threatened
)
Cross-Tabulation, Row Proportions  
datasetID * Threatened  
Data Frame: dta_new  

----------- ------------ ------------- ------------- ---------- ---------------
              Threatened             0             1       <NA>           Total
  datasetID                                                                    
          5                178 (88.6%)    23 (11.4%)   0 (0.0%)    201 (100.0%)
          6                207 (88.8%)    24 (10.3%)   2 (0.9%)    233 (100.0%)
         13                179 (86.1%)    29 (13.9%)   0 (0.0%)    208 (100.0%)
         26                338 (82.0%)    74 (18.0%)   0 (0.0%)    412 (100.0%)
      Total                902 (85.6%)   150 (14.2%)   2 (0.2%)   1054 (100.0%)
----------- ------------ ------------- ------------- ---------- ---------------
Code
summarytools::descr(dta_new)
Non-numerical variable(s) ignored: verbatimIdentification, scientificName, datasetID, Migration, Habitat_5, Generalism, Threatened
Descriptive Statistics  
dta_new  
N: 1054  

                           AOO   circNorm   D_AOO_a   GlobRangeSize_km2   Jaccard_dissim
----------------- ------------ ---------- --------- ------------------- ----------------
             Mean    700709.67      76.29      1.30         14098593.16             0.50
          Std.Dev   1353154.87     102.62      0.51         14053605.33             0.29
              Min         0.69       1.27      0.00              587.60             0.00
               Q1     13566.10      17.40      0.95          4856303.72             0.26
           Median     72260.14      44.09      1.40         10151559.24             0.50
               Q3    474578.44      90.35      1.73         18860060.90             0.74
              Max   5626163.31     733.99      2.00        118014530.88             1.00
              MAD    102734.28      46.11      0.54          9904227.49             0.36
              IQR    456992.66      72.91      0.77         13970352.29             0.49
               CV         1.93       1.35      0.39                1.00             0.57
         Skewness         2.19       3.09     -0.66                2.41             0.02
      SE.Skewness         0.08       0.08      0.08                0.08             0.08
         Kurtosis         3.66      11.25     -0.43                9.47            -1.15
          N.Valid      1054.00    1054.00   1054.00             1051.00          1054.00
                N      1054.00    1054.00   1054.00             1054.00          1054.00
        Pct.Valid       100.00     100.00    100.00               99.72           100.00

Table: Table continues below

 

                    joincount_delta   log_R2_1   log_R2_1_per_year       Mass   mean_lnLac
----------------- ----------------- ---------- ------------------- ---------- ------------
             Mean              0.94       0.17                0.01     453.65         1.40
          Std.Dev              0.72       0.67                0.02    1059.38         1.10
              Min             -0.03      -3.17               -0.07       3.09         0.06
               Q1              0.35      -0.04                0.00      19.90         0.55
           Median              0.80       0.05                0.00      77.50         1.13
               Q3              1.39       0.26                0.01     452.10         2.10
              Max              2.98       7.96                0.28   10682.04         5.73
              MAD              0.73       0.22                0.01      98.30         1.16
              IQR              1.04       0.31                0.01     432.20         1.55
               CV              0.77       4.00                4.18       2.34         0.78
         Skewness              0.71       3.13                3.21       5.34         0.83
      SE.Skewness              0.08       0.08                0.08       0.08         0.08
         Kurtosis             -0.33      27.92               26.24      36.16         0.03
          N.Valid           1050.00    1054.00             1054.00    1045.00      1054.00
                N           1054.00    1054.00             1054.00    1054.00      1054.00
        Pct.Valid             99.62     100.00              100.00      99.15       100.00

Table: Table continues below

 

                    minDist_toBorder_centr        pd
----------------- ------------------------ ---------
             Mean                 72934.72      8.20
          Std.Dev                 67093.11      6.31
              Min                   537.50      1.11
               Q1                 27412.12      4.36
           Median                 53372.12      6.19
               Q3                 84898.34      9.71
              Max                439876.69     56.96
              MAD                 45151.34      3.40
              IQR                 57471.17      5.36
               CV                     0.92      0.77
         Skewness                     1.88      2.72
      SE.Skewness                     0.08      0.08
         Kurtosis                     4.30     11.38
          N.Valid                  1054.00   1045.00
                N                  1054.00   1054.00
        Pct.Valid                   100.00     99.15

Numerical variables:

Code
summarytools::dfSummary(dta_new, plain.ascii = FALSE)
### Data Frame Summary  
#### dta_new  
**Dimensions:** 1054 x 19  
**Duplicates:** 0  

--------------------------------------------------------------------------------------------------------------------------------------------------
No   Variable                  Stats / Values                     Freqs (% of Valid)     Graph                                Valid      Missing  
---- ------------------------- ---------------------------------- ---------------------- ------------------------------------ ---------- ---------
1    verbatimIdentification\   1\. Accipiter gentilis\            4 ( 0.4%)\             \                                    1054\      0\       
     [character]               2\. Anas crecca\                   4 ( 0.4%)\             \                                    (100.0%)   (0.0%)   
                               3\. Anas platyrhynchos\            4 ( 0.4%)\             \                                                        
                               4\. Asio otus\                     4 ( 0.4%)\             \                                                        
                               5\. Falco peregrinus\              4 ( 0.4%)\             \                                                        
                               6\. Gallinula chloropus\           4 ( 0.4%)\             \                                                        
                               7\. Hirundo rustica\               4 ( 0.4%)\             \                                                        
                               8\. Loxia curvirostra\             4 ( 0.4%)\             \                                                        
                               9\. Mergus merganser\              4 ( 0.4%)\             \                                                        
                               10\. Nycticorax nycticorax\        4 ( 0.4%)\             \                                                        
                               [ 752 others ]                     1014 (96.2%)           IIIIIIIIIIIIIIIIIII                                      

2    scientificName\           1\. Accipiter gentilis\            4 ( 0.4%)\             \                                    1054\      0\       
     [character]               2\. Anas crecca\                   4 ( 0.4%)\             \                                    (100.0%)   (0.0%)   
                               3\. Anas platyrhynchos\            4 ( 0.4%)\             \                                                        
                               4\. Asio otus\                     4 ( 0.4%)\             \                                                        
                               5\. Corvus corone\                 4 ( 0.4%)\             \                                                        
                               6\. Falco peregrinus\              4 ( 0.4%)\             \                                                        
                               7\. Gallinula chloropus\           4 ( 0.4%)\             \                                                        
                               8\. Hirundo rustica\               4 ( 0.4%)\             \                                                        
                               9\. Loxia curvirostra\             4 ( 0.4%)\             \                                                        
                               10\. Mareca strepera\              4 ( 0.4%)\             \                                                        
                               [ 716 others ]                     1014 (96.2%)           IIIIIIIIIIIIIIIIIII                                      

3    Jaccard_dissim\           Mean (sd) : 0.5 (0.3)\             631 distinct values    \ \ : : \ \ : . : \ \ \ \ .\         1054\      0\       
     [numeric]                 min < med < max:\                                         . : : : : : : : : :\                 (100.0%)   (0.0%)   
                               0 < 0.5 < 1\                                              : : : : : : : : : :\                                     
                               IQR (CV) : 0.5 (0.6)                                      : : : : : : : : : :\                                     
                                                                                         : : : : : : : : : :                                      

4    log_R2_1\                 Mean (sd) : 0.2 (0.7)\             689 distinct values    \ \ \ \ :\                           1054\      0\       
     [numeric]                 min < med < max:\                                         \ \ \ \ :\                           (100.0%)   (0.0%)   
                               -3.2 < 0 < 8\                                             \ \ \ \ :\                                               
                               IQR (CV) : 0.3 (4)                                        \ \ \ \ : :\                                             
                                                                                         \ \ \ \ : : .                                            

5    log_R2_1_per_year\        Mean (sd) : 0 (0)\                 130 distinct values    \ \ \ \ :\                           1054\      0\       
     [numeric]                 min < med < max:\                                         \ \ . :\                             (100.0%)   (0.0%)   
                               -0.1 < 0 < 0.3\                                           \ \ : :\                                                 
                               IQR (CV) : 0 (4.2)                                        \ \ : :\                                                 
                                                                                         \ \ : : .                                                

6    datasetID\                1\. 5\                             201 (19.1%)\           III \                                1054\      0\       
     [factor]                  2\. 6\                             233 (22.1%)\           IIII \                               (100.0%)   (0.0%)   
                               3\. 13\                            208 (19.7%)\           III \                                                    
                               4\. 26                             412 (39.1%)            IIIIIII                                                  

7    Mass\                     Mean (sd) : 453.6 (1059.4)\        657 distinct values    :\                                   1045\      9\       
     [numeric]                 min < med < max:\                                         :\                                   (99.1%)    (0.9%)   
                               3.1 < 77.5 < 10682\                                       :\                                                       
                               IQR (CV) : 432.2 (2.3)                                    :\                                                       
                                                                                         : .                                                      

8    GlobRangeSize_km2\        Mean (sd) : 14098593 (14053605)\   723 distinct values    :\                                   1051\      3\       
     [numeric]                 min < med < max:\                                         :\                                   (99.7%)    (0.3%)   
                               587.6 < 10151559 < 118014531\                             : .\                                                     
                               IQR (CV) : 13970352 (1)                                   : :\                                                     
                                                                                         : : : .                                                  

9    Migration\                1\. 1\                             235 (22.5%)\           IIII \                               1045\      9\       
     [factor]                  2\. 2\                             247 (23.6%)\           IIII \                               (99.1%)    (0.9%)   
                               3\. 3\                             563 (53.9%)\           IIIIIIIIII \                                             
                               4\. NA                             0 ( 0.0%)                                                                       

10   Habitat_5\                1\. closed\                        393 (37.6%)\           IIIIIII \                            1045\      9\       
     [factor]                  2\. freshwater\                    265 (25.4%)\           IIIII \                              (99.1%)    (0.9%)   
                               3\. human\                         49 ( 4.7%)\            \                                                        
                               4\. marine\                        95 ( 9.1%)\            I \                                                      
                               5\. open                           243 (23.3%)            IIII                                                     

11   Generalism\               1\. 0\                             918 (87.1%)\           IIIIIIIIIIIIIIIII \                  1054\      0\       
     [factor]                  2\. 1                              136 (12.9%)            II                                   (100.0%)   (0.0%)   

12   Threatened\               1\. 0\                             902 (85.7%)\           IIIIIIIIIIIIIIIII \                  1052\      2\       
     [factor]                  2\. 1                              150 (14.3%)            II                                   (99.8%)    (0.2%)   

13   pd\                       Mean (sd) : 8.2 (6.3)\             645 distinct values    :\                                   1045\      9\       
     [numeric]                 min < med < max:\                                         :\                                   (99.1%)    (0.9%)   
                               1.1 < 6.2 < 57\                                           : .\                                                     
                               IQR (CV) : 5.4 (0.8)                                      : :\                                                     
                                                                                         : : : .                                                  

14   D_AOO_a\                  Mean (sd) : 1.3 (0.5)\             736 distinct values    \ \ \ \ \ \ \ \ \ \ \ \ \ \ . : .\   1054\      0\       
     [numeric]                 min < med < max:\                                         \ \ \ \ \ \ \ \ \ \ \ \ . : : :\     (100.0%)   (0.0%)   
                               0 < 1.4 < 2\                                              \ \ \ \ \ \ \ \ . : : : : :\                             
                               IQR (CV) : 0.8 (0.4)                                      \ \ \ \ \ \ . : : : : : :\                               
                                                                                         : : : : : : : : : :                                      

15   mean_lnLac\               Mean (sd) : 1.4 (1.1)\             1051 distinct values   : .\                                 1054\      0\       
     [numeric]                 min < med < max:\                                         : :\                                 (100.0%)   (0.0%)   
                               0.1 < 1.1 < 5.7\                                          : : .\                                                   
                               IQR (CV) : 1.6 (0.8)                                      : : : : .\                                               
                                                                                         : : : : : : . .                                          

16   AOO\                      Mean (sd) : 700709.7 (1353155)\    1051 distinct values   :\                                   1054\      0\       
     [numeric]                 min < med < max:\                                         :\                                   (100.0%)   (0.0%)   
                               0.7 < 72260.1 < 5626163\                                  :\                                                       
                               IQR (CV) : 456992.7 (1.9)                                 :\                                                       
                                                                                         : .                                                      

17   joincount_delta\          Mean (sd) : 0.9 (0.7)\             1010 distinct values   . :\                                 1050\      4\       
     [numeric]                 min < med < max:\                                         : : : .\                             (99.6%)    (0.4%)   
                               0 < 0.8 < 3\                                              : : : : .\                                               
                               IQR (CV) : 1 (0.8)                                        : : : : : : . .\                                         
                                                                                         : : : : : : : : . .                                      

18   circNorm\                 Mean (sd) : 76.3 (102.6)\          1051 distinct values   :\                                   1054\      0\       
     [numeric]                 min < med < max:\                                         :\                                   (100.0%)   (0.0%)   
                               1.3 < 44.1 < 734\                                         :\                                                       
                               IQR (CV) : 72.9 (1.3)                                     : .\                                                     
                                                                                         : :                                                      

19   minDist_toBorder_centr\   Mean (sd) : 72934.7 (67093.1)\     1051 distinct values   :\                                   1054\      0\       
     [numeric]                 min < med < max:\                                         : .\                                 (100.0%)   (0.0%)   
                               537.5 < 53372.1 < 439876.7\                               : :\                                                     
                               IQR (CV) : 57471.2 (0.9)                                  : :\                                                     
                                                                                         : : : . .                                                
--------------------------------------------------------------------------------------------------------------------------------------------------
Code
summarytools::stby(
  data = dta_new,
  INDICES = dta_new$datasetID,
  FUN = summarytools::descr,
  stats = "common",
  transpose = T
)
Error in match.call(f, call): ... in einer Situation benutzt, in der es nicht existiert
Warning in parse_call(mc = match.call(), var_name = (ncol(xx) == 1), var_label
= (ncol(xx) == : metadata extraction terminated unexpectedly; inspect results
carefully
Error in match.call(f, call): ... in einer Situation benutzt, in der es nicht existiert
Warning in parse_call(mc = match.call(), var_name = (ncol(xx) == 1), var_label
= (ncol(xx) == : metadata extraction terminated unexpectedly; inspect results
carefully
Error in match.call(f, call): ... in einer Situation benutzt, in der es nicht existiert
Warning in parse_call(mc = match.call(), var_name = (ncol(xx) == 1), var_label
= (ncol(xx) == : metadata extraction terminated unexpectedly; inspect results
carefully
Error in match.call(f, call): ... in einer Situation benutzt, in der es nicht existiert
Warning in parse_call(mc = match.call(), var_name = (ncol(xx) == 1), var_label
= (ncol(xx) == : metadata extraction terminated unexpectedly; inspect results
carefully
Non-numerical variable(s) ignored: verbatimIdentification, scientificName, datasetID, Migration, Habitat_5, Generalism, Threatened
Descriptive Statistics  

                                      Mean       Std.Dev         Min        Median           Max
---------------------------- ------------- ------------- ----------- ------------- -------------
                         AOO      40551.97      29464.72      136.03      42194.75      78308.81
                    circNorm         29.09         25.54        1.28         19.89         99.62
                     D_AOO_a          1.43          0.56        0.00          1.61          2.00
           GlobRangeSize_km2   17897011.73   11653330.19   250505.58   15439514.89   63574544.76
              Jaccard_dissim          0.42          0.31        0.00          0.40          1.00
             joincount_delta          0.39          0.37       -0.03          0.32          1.69
                    log_R2_1          0.14          0.43       -1.17          0.03          2.37
           log_R2_1_per_year          0.01          0.02       -0.06          0.00          0.12
                        Mass        431.05       1049.59        5.54         77.14      10682.04
                  mean_lnLac          0.50          0.59        0.09          0.20          2.59
      minDist_toBorder_centr      74163.15      18565.12      773.21      81937.12     102402.45
                          pd          7.81          5.40        1.20          5.92         29.59

Table: Table continues below

 

                               N.Valid        N   Pct.Valid
---------------------------- --------- -------- -----------
                         AOO    201.00   201.00      100.00
                    circNorm    201.00   201.00      100.00
                     D_AOO_a    201.00   201.00      100.00
           GlobRangeSize_km2    201.00   201.00      100.00
              Jaccard_dissim    201.00   201.00      100.00
             joincount_delta    197.00   201.00       98.01
                    log_R2_1    201.00   201.00      100.00
           log_R2_1_per_year    201.00   201.00      100.00
                        Mass    199.00   201.00       99.00
                  mean_lnLac    201.00   201.00      100.00
      minDist_toBorder_centr    201.00   201.00      100.00
                          pd    199.00   201.00       99.00
Non-numerical variable(s) ignored: verbatimIdentification, scientificName, datasetID, Migration, Habitat_5, Generalism, Threatened

N: 233  

                                      Mean       Std.Dev        Min       Median           Max
---------------------------- ------------- ------------- ---------- ------------ -------------
                         AOO      36317.49      41373.36       4.98     14775.02     124025.31
                    circNorm        174.79        173.29       1.27       116.16        733.99
                     D_AOO_a          1.31          0.55       0.00         1.41          1.98
           GlobRangeSize_km2   10454413.81   12006642.63   38728.84   6322724.06   75222636.54
              Jaccard_dissim          0.61          0.28       0.00         0.68          1.00
             joincount_delta          0.70          0.51       0.00         0.59          2.16
                    log_R2_1          0.06          0.66      -1.79         0.01          3.75
           log_R2_1_per_year          0.00          0.03      -0.07         0.00          0.14
                        Mass        324.87        747.25       3.09        47.18       5791.37
                  mean_lnLac          1.32          1.23       0.06         0.81          4.51
      minDist_toBorder_centr      35110.41      17495.36    1179.47     35571.94      93755.94
                          pd          7.55          6.26       1.47         5.65         51.14

Table: Table continues below

 

                               N.Valid        N   Pct.Valid
---------------------------- --------- -------- -----------
                         AOO    233.00   233.00      100.00
                    circNorm    233.00   233.00      100.00
                     D_AOO_a    233.00   233.00      100.00
           GlobRangeSize_km2    231.00   233.00       99.14
              Jaccard_dissim    233.00   233.00      100.00
             joincount_delta    233.00   233.00      100.00
                    log_R2_1    233.00   233.00      100.00
           log_R2_1_per_year    233.00   233.00      100.00
                        Mass    230.00   233.00       98.71
                  mean_lnLac    233.00   233.00      100.00
      minDist_toBorder_centr    233.00   233.00      100.00
                          pd    230.00   233.00       98.71
Non-numerical variable(s) ignored: verbatimIdentification, scientificName, datasetID, Migration, Habitat_5, Generalism, Threatened

N: 208  

                                      Mean       Std.Dev       Min       Median            Max
---------------------------- ------------- ------------- --------- ------------ --------------
                         AOO      66495.56      86418.79      0.69     29119.11      345243.91
                    circNorm         58.98         44.85      1.27        52.49         189.98
                     D_AOO_a          1.05          0.55      0.00         1.08           1.96
           GlobRangeSize_km2   14255068.43   16200414.70    587.60   9621437.80   110453714.25
              Jaccard_dissim          0.69          0.23      0.00         0.74           1.00
             joincount_delta          0.58          0.40     -0.03         0.58           1.87
                    log_R2_1          0.38          1.05     -1.86         0.16           7.96
           log_R2_1_per_year          0.01          0.04     -0.06         0.00           0.28
                        Mass        366.69        824.55      5.54        89.94        8785.99
                  mean_lnLac          1.42          1.00      0.24         1.07           4.30
      minDist_toBorder_centr      29552.26      25679.79   1718.86     21550.57      235785.26
                          pd          7.96          5.84      1.11         6.29          51.14

Table: Table continues below

 

                               N.Valid        N   Pct.Valid
---------------------------- --------- -------- -----------
                         AOO    208.00   208.00      100.00
                    circNorm    208.00   208.00      100.00
                     D_AOO_a    208.00   208.00      100.00
           GlobRangeSize_km2    208.00   208.00      100.00
              Jaccard_dissim    208.00   208.00      100.00
             joincount_delta    208.00   208.00      100.00
                    log_R2_1    208.00   208.00      100.00
           log_R2_1_per_year    208.00   208.00      100.00
                        Mass    206.00   208.00       99.04
                  mean_lnLac    208.00   208.00      100.00
      minDist_toBorder_centr    208.00   208.00      100.00
                          pd    206.00   208.00       99.04
Non-numerical variable(s) ignored: verbatimIdentification, scientificName, datasetID, Migration, Habitat_5, Generalism, Threatened

N: 412  

                                      Mean       Std.Dev       Min        Median            Max
---------------------------- ------------- ------------- --------- ------------- --------------
                         AOO    1718699.01    1726344.06    784.13    1011167.89     5626163.31
                    circNorm         52.35         37.20      1.43         43.35         206.16
                     D_AOO_a          1.35          0.37      0.11          1.43           1.89
           GlobRangeSize_km2   14209971.35   14546523.20   8313.39   10569402.42   118014530.88
              Jaccard_dissim          0.38          0.22      0.00          0.34           1.00
             joincount_delta          1.50          0.68      0.00          1.49           2.98
                    log_R2_1          0.13          0.47     -3.17          0.06           3.11
           log_R2_1_per_year          0.00          0.01     -0.07          0.00           0.07
                        Mass        580.55       1283.92      5.54        104.37        9512.09
                  mean_lnLac          1.87          0.97      0.64          1.66           5.73
      minDist_toBorder_centr     115628.17      85258.05    537.50     103462.17      439876.69
                          pd          8.88          6.90      1.20          6.67          56.96

Table: Table continues below

 

                               N.Valid        N   Pct.Valid
---------------------------- --------- -------- -----------
                         AOO    412.00   412.00      100.00
                    circNorm    412.00   412.00      100.00
                     D_AOO_a    412.00   412.00      100.00
           GlobRangeSize_km2    411.00   412.00       99.76
              Jaccard_dissim    412.00   412.00      100.00
             joincount_delta    412.00   412.00      100.00
                    log_R2_1    412.00   412.00      100.00
           log_R2_1_per_year    412.00   412.00      100.00
                        Mass    410.00   412.00       99.51
                  mean_lnLac    412.00   412.00      100.00
      minDist_toBorder_centr    412.00   412.00      100.00
                          pd    410.00   412.00       99.51

Feature plots

all atlases together

Code
trellis.par.set(theme = col.whitebg(), warn = FALSE)

Jaccard

H1 ~ Jaccard

Code
featurePlot(
  x = dta_new %>% select(datasetID, all_of(H1)),
  y = dta_new$Jaccard_dissim,
  group = dta_new$datasetID,
  plot = "pairs",
  pch = 16,
  alpha = 0.3,
  cex = 0.5,
  xlab = "Scatterplot Matrix of traits (H1) - Jaccard 1",
  auto.key = list(columns = 4),
  par.settings =
    list(fontsize = list(text = 6))
)

H2 ~ Jaccard

Code
featurePlot(
  x = dta_new %>% select(datasetID, all_of(H2)),
  y = dta_new$Jaccard_dissim,
  group = dta_new$datasetID,
  plot = "pairs",
  pch = 16,
  alpha = 0.3,
  cex = 0.5,
  xlab = "Scatterplot Matrix of range geometry (H2) - Jaccard 1",
  auto.key = list(columns = 4),
  par.settings =
    list(fontsize = list(text = 6))
)

log Ratio

H1 ~ log ratio

Code
featurePlot(
  x = dta_new %>% select(datasetID, all_of(H1)),
  y = dta_new$log_R2_1,
  group = dta_new$datasetID,
  plot = "pairs",
  pch = 16,
  alpha = 0.3,
  cex = 0.5,
  xlab = "Scatterplot Matrix of traits (H1) - Log Ratio",
  auto.key = list(columns = 4),
  par.settings =
    list(fontsize = list(text = 6))
)

H2 ~ log ratio

Code
featurePlot(
  x = dta_new %>% select(datasetID, all_of(H2)),
  y = dta_new$log_R2_1,
  group = dta_new$datasetID,
  plot = "pairs",
  pch = 16,
  alpha = 0.3,
  cex = 0.5,
  xlab = "Scatterplot Matrix of range geometry (H2) - log Ratio",
  auto.key = list(columns = 4),
  par.settings =
    list(fontsize = list(text = 6))
)

For each atlas separately:

H2 ~ Jaccard

Code
featurePlot(
  x = dta_new %>% filter(datasetID == 5) %>%
    select(all_of(c(H2))),
  y = dta_new %>% filter(datasetID == 5) %>%
    select(Jaccard_dissim),
  plot = "pairs",
  pch = 16,
  alpha = 0.3,
  cex = 0.5,
  xlab = "Czechia: Scatterplot Matrix of traits (H1) - Jaccard 1",
  auto.key = list(columns = 4),
  par.settings =
    list(fontsize = list(text = 6))
)

Code
featurePlot(
  x = dta_new %>% filter(datasetID == 6) %>%
    select(all_of(c(H2))),
  y = dta_new %>% filter(datasetID == 6) %>%
    select(Jaccard_dissim),
  plot = "pairs",
  pch = 16,
  alpha = 0.3,
  cex = 0.5,
  xlab = "New York: Scatterplot Matrix of range geometry (H2) - Jaccard 1",
  auto.key = list(columns = 4),
  par.settings =
    list(fontsize = list(text = 6))
)

Code
featurePlot(
  x = dta_new %>% filter(datasetID == 13) %>%
    select(all_of(c(H2))),
  y = dta_new %>% filter(datasetID == 13) %>%
    select(Jaccard_dissim),
  plot = "pairs",
  pch = 16,
  alpha = 0.3,
  cex = 0.5,
  xlab = "Japan: Scatterplot Matrix of range geometry (H2) - Jaccard 1",
  auto.key = list(columns = 4),
  par.settings =
    list(fontsize = list(text = 6))
)

Code
featurePlot(
  x = dta_new %>% filter(datasetID == 26) %>%
    select(all_of(c(H2))),
  y = dta_new %>% filter(datasetID == 26) %>%
    select(Jaccard_dissim),
  plot = "pairs",
  pch = 16,
  alpha = 0.3,
  cex = 0.5,
  xlab = "Europe: Scatterplot Matrix of range geometry (H2) - Jaccard 1",
  auto.key = list(columns = 4),
  par.settings =
    list(fontsize = list(text = 6))
)

H2 ~ log ratio

Code
featurePlot(
  x = dta_new %>% filter(datasetID == 5) %>%
    select(all_of(c(H2))),
  y = dta_new %>% filter(datasetID == 5) %>%
    select(log_R2_1),
  plot = "pairs",
  pch = 16,
  alpha = 0.3,
  cex = 0.5,
  xlab = "Czechia: Scatterplot Matrix of traits (H1) - Jaccard 1",
  auto.key = list(columns = 4),
  par.settings =
    list(fontsize = list(text = 6))
)

Code
featurePlot(
  x = dta_new %>% filter(datasetID == 6) %>%
    select(all_of(c(H2))),
  y = dta_new %>% filter(datasetID == 6) %>%
    select(log_R2_1),
  plot = "pairs",
  pch = 16,
  alpha = 0.3,
  cex = 0.5,
  xlab = "New York: Scatterplot Matrix of range geometry (H2) - Jaccard 1",
  auto.key = list(columns = 4),
  par.settings =
    list(fontsize = list(text = 6))
)

Code
featurePlot(
  x = dta_new %>% filter(datasetID == 13) %>%
    select(all_of(c(H2))),
  y = dta_new %>% filter(datasetID == 13) %>%
    select(log_R2_1),
  plot = "pairs",
  pch = 16,
  alpha = 0.3,
  cex = 0.5,
  xlab = "Japan: Scatterplot Matrix of range geometry (H2) - Jaccard 1",
  auto.key = list(columns = 4),
  par.settings =
    list(fontsize = list(text = 6))
)

Code
featurePlot(
  x = dta_new %>% filter(datasetID == 26) %>%
    select(all_of(c(H2))),
  y = dta_new %>% filter(datasetID == 26) %>%
    select(log_R2_1),
  plot = "pairs",
  pch = 16,
  alpha = 0.3,
  cex = 0.5,
  xlab = "Europe: Scatterplot Matrix of range geometry (H2) - Jaccard 1",
  auto.key = list(columns = 4),
  par.settings =
    list(fontsize = list(text = 6))
)