Primary analysis
Means
accMeansMain<-accMeans %>%
filter(ValidationData=="GBLUPs", grepl("DirDom",Model))accMeansMain %>%
select(-Model,-ValidationData) %>%
spread(predOf,Accuracy) %>%
mutate(diffAcc=MeanTGV-MeanBV) -> diffAcc
diffAcc %$% summary(diffAcc) Min. 1st Qu. Median Mean 3rd Qu. Max.
-0.0826119 -0.0312055 -0.0171228 0.0018702 -0.0004026 0.2795664
diffAcc %>%
summarise(quantile = scales::percent(c(0.05,0.25, 0.5, 0.75,0.95)),
diffAcc = quantile(diffAcc, c(0.05,0.25, 0.5, 0.75,0.95))) %>%
spread(quantile,diffAcc)# A tibble: 1 x 5
`25%` `5%` `50%` `75%` `95%`
<dbl> <dbl> <dbl> <dbl> <dbl>
1 -0.0312 -0.0656 -0.0171 -0.000403 0.176
diffAcc %>%
group_by(Trait) %>%
summarise(quantile = scales::percent(c(0.05,0.25, 0.5, 0.75,0.95)),
diffAcc = quantile(diffAcc, c(0.05,0.25, 0.5, 0.75,0.95))) %>%
spread(quantile,diffAcc)# A tibble: 6 x 6
# Groups: Trait [6]
Trait `25%` `5%` `50%` `75%` `95%`
<chr> <dbl> <dbl> <dbl> <dbl> <dbl>
1 biofortSI -0.0375 -0.0526 -0.0237 -0.00934 0.0123
2 DM -0.0277 -0.0444 -0.0183 -0.0116 0.00924
3 logFYLD 0.0616 -0.00674 0.130 0.180 0.246
4 MCMDS -0.0580 -0.0791 -0.0391 -0.0157 -0.00193
5 stdSI -0.0323 -0.0708 -0.0173 0.00685 0.0271
6 TCHART -0.0231 -0.0293 -0.0161 -0.0123 -0.00706
(Co)variances
library(tidyverse); library(magrittr);
## Table S11: Accuracies predicting the variances
accVars<-readxl::read_xlsx(here::here("manuscript","SupplementaryTables.xlsx"),sheet = "TableS11")
accVarsMain<-accVars %>%
filter(ValidationData=="GBLUPs", grepl("DirDom",Model),VarMethod=="PMV")Across traits. Proportion of accuracy estimates above zero? Percentiles: 10%, 50% (median), 90%
accVarsMain %>%
select(-Model,-ValidationData,-AccuracyCor) %>%
mutate(Component=ifelse(Trait1==Trait2,"Variance","Covariance"),
TraitType=ifelse(grepl("SI",Trait1),"SI","ComponentTrait")) %>%
group_by(Component) %>%
summarize(propAboveZero=length(which(AccuracyWtCor>=0))/n(),
quantile = scales::percent(c(0.1,0.5,0.9)),
AccuracyWtCor = quantile(AccuracyWtCor, c(0.1,0.5,0.9))) %>%
pivot_wider(names_from = "quantile", values_from = "AccuracyWtCor")# A tibble: 2 x 5
# Groups: Component [2]
Component propAboveZero `10%` `50%` `90%`
<chr> <dbl> <dbl> <dbl> <dbl>
1 Covariance 0.7 -0.0812 0.0720 0.265
2 Variance 0.89 -0.0118 0.137 0.303
By trait variance / trait-trait covariances.
accVarsMain %>%
select(-Model,-ValidationData,-AccuracyCor) %>%
mutate(Component=ifelse(Trait1==Trait2,"Variance","Covariance"),
TraitType=ifelse(grepl("SI",Trait1),"SI","ComponentTrait")) %>%
group_by(Component,TraitType,Trait1,Trait2) %>%
summarize(propAboveZero=length(which(AccuracyWtCor>=0))/n(),
quantile = scales::percent(c(0.1,0.5,0.9)),
AccuracyWtCor = quantile(AccuracyWtCor, c(0.1,0.5,0.9))) %>%
pivot_wider(names_from = "quantile", values_from = "AccuracyWtCor") %>%
arrange(desc(Component),TraitType,desc(`50%`))# A tibble: 12 x 8
# Groups: Component, TraitType, Trait1, Trait2 [12]
Component TraitType Trait1 Trait2 propAboveZero `10%` `50%` `90%`
<chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
1 Variance ComponentT… MCMDS MCMDS 0.92 0.0507 0.248 0.360
2 Variance ComponentT… TCHART TCHART 0.84 -0.0306 0.165 0.283
3 Variance ComponentT… logFYLD logFYLD 1 0.0490 0.150 0.269
4 Variance ComponentT… DM DM 0.88 -0.0260 0.0651 0.164
5 Variance SI stdSI stdSI 0.92 0.0623 0.172 0.283
6 Variance SI biofor… biofor… 0.78 -0.0501 0.0890 0.194
7 Covariance ComponentT… DM TCHART 0.98 0.115 0.232 0.351
8 Covariance ComponentT… logFYLD MCMDS 0.9 -0.00151 0.157 0.297
9 Covariance ComponentT… MCMDS TCHART 0.68 -0.0879 0.0565 0.156
10 Covariance ComponentT… DM MCMDS 0.76 -0.0988 0.0515 0.174
11 Covariance ComponentT… logFYLD TCHART 0.5 -0.142 -0.00147 0.121
12 Covariance ComponentT… DM logFYLD 0.38 -0.100 -0.0265 0.0850
Difference in accuracy between VarTGV and VarBV?
accVarsMain %>%
select(-Model,-ValidationData,-AccuracyCor) %>%
mutate(Component=ifelse(Trait1==Trait2,"Variance","Covariance"),
TraitType=ifelse(grepl("SI",Trait1),"SI","ComponentTrait")) %>%
spread(predOf,AccuracyWtCor) %>%
mutate(diffAcc=VarTGV-VarBV) -> diffAcc
diffAcc %$% round(summary(diffAcc),3) Min. 1st Qu. Median Mean 3rd Qu. Max.
-0.174 -0.033 -0.003 -0.002 0.028 0.196
diffAcc %>%
summarise(quantile = scales::percent(c(0.1,0.5,0.9)),
diffAcc = quantile(diffAcc, c(0.1,0.5,0.9))) %>%
spread(quantile,diffAcc)# A tibble: 1 x 3
`10%` `50%` `90%`
<dbl> <dbl> <dbl>
1 -0.0634 -0.00303 0.0578
diffAcc %>%
group_by(Component,TraitType,Trait1,Trait2) %>%
summarise(quantile = scales::percent(c(0.1,0.5,0.9)),
diffAcc = quantile(diffAcc, c(0.1,0.5,0.9))) %>%
spread(quantile,diffAcc) %>%
arrange(desc(Component),TraitType,desc(`50%`))# A tibble: 12 x 7
# Groups: Component, TraitType, Trait1, Trait2 [12]
Component TraitType Trait1 Trait2 `10%` `50%` `90%`
<chr> <chr> <chr> <chr> <dbl> <dbl> <dbl>
1 Variance ComponentTrait MCMDS MCMDS -0.0111 0.0134 0.0368
2 Variance ComponentTrait TCHART TCHART -0.0439 -0.0275 -0.0107
3 Variance ComponentTrait DM DM -0.0591 -0.0301 -0.000190
4 Variance ComponentTrait logFYLD logFYLD -0.0929 -0.0319 0.0908
5 Variance SI stdSI stdSI -0.0284 0.0184 0.0436
6 Variance SI biofortSI biofortSI -0.0426 -0.00653 0.0262
7 Covariance ComponentTrait logFYLD TCHART -0.0532 0.0533 0.145
8 Covariance ComponentTrait DM MCMDS -0.0137 0.0229 0.0563
9 Covariance ComponentTrait logFYLD MCMDS -0.0372 0.0211 0.0847
10 Covariance ComponentTrait MCMDS TCHART -0.0351 0.0116 0.0384
11 Covariance ComponentTrait DM logFYLD -0.0715 0.00321 0.0626
12 Covariance ComponentTrait DM TCHART -0.0985 -0.0605 -0.0334
Usefulness
The usefulness criteria i.e. \(UC_{parent}\) and \(UC_{variety}\) are predicted by:
\[UC_{parent}=\mu_{BV} + (i_{RS} \times \sigma_{BV})\]
\[UC_{variety}=\mu_{TGV} + (i_{VDP} \times \sigma_{TGV})\]
Selection intensity
In order to combined predicted means and variances into a UC, we first calculated the realized intensity of within-family selection (\(i_{RS}\) and \(i_{VDP}\)). For \(UC_{parent}\) we computed the \(i_{RS}\) based on the proportion of progeny from each family, that themselves later appeared in the pedigree as parents. For \(UC_{clone}\) we compute computed \(i_{VDP}\) based on the proportion of family-members that had at least one plot at the penultimate stage of the VDP, the advanced yield trial (AYT). For completeness and as part of exploratory analysis, we computed the proportion selected at each of the VDP stages: clonal evaluation trial (CET), preliminary yield trial (PYT), advanced yield trial (AYT) and uniform yield trial (UYT).
The table below provides a quick summary of the number of families available with realized selection observed at each stage, plus the corresponding mean selection intensity and proportion selected across families.
## Table S13: Realized within-cross selection metrics
crossmetrics<-readxl::read_xlsx(here::here("manuscript","SupplementaryTables.xlsx"),sheet = "TableS13")
left_join(crossmetrics %>%
select(sireID,damID,contains("realIntensity")) %>% distinct %>%
pivot_longer(cols = contains("realIntensity"),
names_to = "Stage", values_to = "Intensity",names_prefix = "realIntensity") %>%
group_by(Stage) %>%
summarize(meanIntensity=mean(Intensity, na.rm = T),
Nfam=length(which(!is.na(Intensity)))),
crossmetrics %>%
select(sireID,damID,contains("prop")) %>% distinct %>%
rename(propParent=propUsedAsParent,
propCET=propPhenotyped,
propPYT=propPastCET,
propAYT=propPastPYT,
propUYT=propPastAYT) %>%
pivot_longer(cols = contains("prop"),
values_to = "PropPast",names_to = "Stage",names_prefix = "propPast|prop") %>%
group_by(Stage) %>%
summarize(meanPropPast=mean(PropPast, na.rm = T))) %>%
mutate(Stage=factor(Stage,levels=c("Parent","CET","PYT","AYT","UYT"))) %>%
arrange(Stage) %>%
select(Stage,Nfam,meanIntensity,meanPropPast) %>% mutate_if(is.numeric,~round(.,2))# A tibble: 5 x 4
Stage Nfam meanIntensity meanPropPast
<fct> <dbl> <dbl> <dbl>
1 Parent 48 1.59 0.02
2 CET 415 0.26 0.76
3 PYT 288 0.9 0.3
4 AYT 104 1.46 0.05
5 UYT 39 1.4 0.03
There were 48 families with a mean intensity of 1.59 (mean 2% selected) that themselves had members who were parents in the pedigree. As expected, the number of available families and the proportion selected decreased (increasing selection intensity) from CET to UYT. We choose to focus on the AYT stage, which has 104 families, mean intensity 1.46 (mean 5% selected).
library(tidyverse); library(magrittr);
## Table S9: Predicted and observed UC
predVSobsUC<-read.csv(here::here("manuscript","SupplementaryTable09.csv"),stringsAsFactors = F)
uc_cv_summary<-predVSobsUC %>%
filter(VarMethod=="PMV") %>%
group_by(Model,predOf,Stage,Trait,Repeat,Fold) %>%
summarize(Nfam=n(),
meanFamSize=round(mean(FamSize),1)) %>%
ungroup() %>%
select(-Trait,-Model) %>%
distinct
uc_cv_summary %>%
group_by(predOf,Stage) %>%
summarize(minNfam=min(Nfam),
meanNfam=mean(Nfam),
maxNfam=max(Nfam),
minMeanFamSize=min(meanFamSize),
meanMeanFamSize=mean(meanFamSize),
maxMeanFamSize=max(meanFamSize))# A tibble: 7 x 8
# Groups: predOf [2]
predOf Stage minNfam meanNfam maxNfam minMeanFamSize meanMeanFamSize
<chr> <chr> <int> <dbl> <int> <dbl> <dbl>
1 BV ConstIntensity 143 166. 204 5.9 6.85
2 BV Parent 9 17.3 24 8.8 13.8
3 TGV AYT 25 37.3 50 10.9 12.9
4 TGV CET 129 149. 192 6.3 7.48
5 TGV ConstIntensity 143 166. 204 5.9 6.85
6 TGV PYT 69 103. 142 7.3 8.73
7 TGV UYT 8 14.1 20 7.3 11.8
# … with 1 more variable: maxMeanFamSize <dbl>
Accuracy comparisons
library(tidyverse); library(magrittr);
## Table S12: Accuracies predicting the usefulness criteria
accUC<-readxl::read_xlsx(here::here("manuscript","SupplementaryTables.xlsx"),sheet = "TableS12")
accUCmain<-accUC %>%
filter(VarMethod=="PMV",grepl("DirDom",Model),Stage %in% c("Parent","AYT")) %>%
select(-Model,-VarMethod,-AccuracyCor,-Stage)
# accMeansMain %>% count(Trait,predOf)
# accVarsMain %>% count(Trait1,Trait2,predOf)
# accUCmain %>% count(Repeat,Fold)Proportion of UC accuracy estimates greater than zero.
length(which(accUC$AccuracyWtCor>0))/nrow(accUC)[1] 0.952619
Compare Mean, Variance and UC accuracy:
meanVsUCacc<-accUCmain %>%
rename(AccUC=AccuracyWtCor) %>%
left_join(accMeansMain %>%
select(-ValidationData,-Model) %>%
mutate(predOf=gsub("Mean","",predOf)) %>%
rename(AccMean=Accuracy))
meanVsUCacc %<>%
left_join(accVarsMain %>%
filter(Trait1==Trait2) %>%
select(-ValidationData,-Model,-AccuracyCor,-VarMethod,-Trait2) %>%
mutate(predOf=gsub("Var","",predOf)) %>%
rename(AccVar=AccuracyWtCor,
Trait=Trait1))Correlation between mean, Var and UC accuracy, overall
meanVsUCacc %>%
summarize(corMeanUCacc=cor(AccUC,AccMean),
meanDiffMeanUC=mean(AccUC-AccMean),
corMeanVarAcc=cor(AccVar,AccMean),
corVarUCacc=cor(AccUC,AccVar))# A tibble: 1 x 4
corMeanUCacc meanDiffMeanUC corMeanVarAcc corVarUCacc
<dbl> <dbl> <dbl> <dbl>
1 0.745 -0.0929 -0.222 -0.236
Correlation between mean, Var and UC accuracy, by predOf (BV vs. TGV)
meanVsUCacc %>%
group_by(predOf) %>%
summarize(corMeanUCacc=cor(AccUC,AccMean),
meanDiffMeanUC=mean(AccUC-AccMean),
corMeanVarAcc=cor(AccVar,AccMean))# A tibble: 2 x 4
predOf corMeanUCacc meanDiffMeanUC corMeanVarAcc
<chr> <dbl> <dbl> <dbl>
1 BV 0.704 -0.0748 -0.177
2 TGV 0.802 -0.111 -0.275
Correlation between mean, Var and UC accuracy, by Trait
meanVsUCacc %>%
group_by(Trait) %>%
summarize(corMeanUCacc=cor(AccUC,AccMean),
meanDiffMeanUC=mean(AccUC-AccMean),
corMeanVarAcc=cor(AccVar,AccMean)) %>% arrange(desc(corMeanUCacc))# A tibble: 6 x 4
Trait corMeanUCacc meanDiffMeanUC corMeanVarAcc
<chr> <dbl> <dbl> <dbl>
1 TCHART 0.494 -0.00356 -0.0951
2 logFYLD 0.410 -0.0762 -0.0571
3 MCMDS 0.358 -0.280 0.475
4 stdSI 0.315 -0.0736 0.129
5 DM 0.110 -0.0938 0.227
6 biofortSI 0.0189 -0.0297 0.00484
Make an overall comparison of trait accuracies for UC
accUCmain %>%
mutate(TraitType=ifelse(grepl("SI",Trait),"SI","ComponentTrait")) %>%
group_by(Trait) %>%
summarize(propAboveZero=length(which(AccuracyWtCor>0))/n(),
quantile = scales::percent(c(0.1,0.5,0.9)),
AccuracyWtCor = quantile(AccuracyWtCor, c(0.1,0.5,0.9))) %>%
pivot_wider(names_from = "quantile", values_from = "AccuracyWtCor") %>%
mutate(across(is.numeric,~round(.,2))) %>%
arrange(desc(`50%`))# A tibble: 6 x 5
# Groups: Trait [6]
Trait propAboveZero `10%` `50%` `90%`
<chr> <dbl> <dbl> <dbl> <dbl>
1 TCHART 1 0.66 0.83 0.94
2 DM 1 0.36 0.65 0.81
3 biofortSI 1 0.39 0.580 0.74
4 stdSI 0.96 0.16 0.49 0.65
5 logFYLD 0.88 -0.04 0.24 0.49
6 MCMDS 0.7 -0.18 0.1 0.41
Compare UC accuracy of TGV vs. BV, overall
accUCmain %>%
spread(predOf,AccuracyWtCor) %>%
mutate(diffAcc=TGV-BV) %$% round(summary(diffAcc),2) Min. 1st Qu. Median Mean 3rd Qu. Max.
-0.75 -0.17 -0.06 -0.03 0.06 1.01
Proportion of accuracy estimates where BV>TGV.
accUCmain %>%
spread(predOf,AccuracyWtCor) %>%
mutate(diffAcc=TGV-BV) %$% (length(which(diffAcc<=0))/length(diffAcc))[1] 0.62
Compare UC accuracy difference TGV minus BV, by Trait
accUCmain %>%
mutate(TraitType=ifelse(grepl("SI",Trait),"SI","ComponentTrait")) %>%
spread(predOf,AccuracyWtCor) %>%
mutate(diffAcc=TGV-BV) %>%
group_by(Trait,TraitType) %>%
summarize(quantile = scales::percent(c(0.1,0.5,0.9)),
diffAcc = quantile(diffAcc, c(0.1,0.5,0.9))) %>%
pivot_wider(names_from = "quantile", values_from = "diffAcc") %>%
mutate(across(is.numeric,~round(.,2))) %>%
arrange(desc(`50%`))# A tibble: 6 x 5
# Groups: Trait, TraitType [6]
Trait TraitType `10%` `50%` `90%`
<chr> <chr> <dbl> <dbl> <dbl>
1 stdSI SI -0.15 0.1 0.44
2 logFYLD ComponentTrait -0.26 -0.02 0.28
3 DM ComponentTrait -0.24 -0.05 0.12
4 biofortSI SI -0.19 -0.08 0.12
5 TCHART ComponentTrait -0.17 -0.08 0.07
6 MCMDS ComponentTrait -0.49 -0.17 0.12
- Mean: TGV<BV (median -0.017)
- Var: No consistent / small differences
- UC: TGV<BV



