Primary analysis
Means
<-accMeans %>%
accMeansMainfilter(ValidationData=="GBLUPs", grepl("DirDom",Model))
%>%
accMeansMain select(-Model,-ValidationData) %>%
spread(predOf,Accuracy) %>%
mutate(diffAcc=MeanTGV-MeanBV) -> diffAcc
%$% summary(diffAcc) diffAcc
Min. 1st Qu. Median Mean 3rd Qu. Max.
-0.0826119 -0.0312055 -0.0171228 0.0018702 -0.0004026 0.2795664
%>%
diffAcc summarise(quantile = scales::percent(c(0.05,0.25, 0.5, 0.75,0.95)),
diffAcc = quantile(diffAcc, c(0.05,0.25, 0.5, 0.75,0.95))) %>%
spread(quantile,diffAcc)
# A tibble: 1 x 5
`25%` `5%` `50%` `75%` `95%`
<dbl> <dbl> <dbl> <dbl> <dbl>
1 -0.0312 -0.0656 -0.0171 -0.000403 0.176
%>%
diffAcc group_by(Trait) %>%
summarise(quantile = scales::percent(c(0.05,0.25, 0.5, 0.75,0.95)),
diffAcc = quantile(diffAcc, c(0.05,0.25, 0.5, 0.75,0.95))) %>%
spread(quantile,diffAcc)
# A tibble: 6 x 6
# Groups: Trait [6]
Trait `25%` `5%` `50%` `75%` `95%`
<chr> <dbl> <dbl> <dbl> <dbl> <dbl>
1 biofortSI -0.0375 -0.0526 -0.0237 -0.00934 0.0123
2 DM -0.0277 -0.0444 -0.0183 -0.0116 0.00924
3 logFYLD 0.0616 -0.00674 0.130 0.180 0.246
4 MCMDS -0.0580 -0.0791 -0.0391 -0.0157 -0.00193
5 stdSI -0.0323 -0.0708 -0.0173 0.00685 0.0271
6 TCHART -0.0231 -0.0293 -0.0161 -0.0123 -0.00706
(Co)variances
library(tidyverse); library(magrittr);
## Table S11: Accuracies predicting the variances
<-readxl::read_xlsx(here::here("manuscript","SupplementaryTables.xlsx"),sheet = "TableS11")
accVars<-accVars %>%
accVarsMainfilter(ValidationData=="GBLUPs", grepl("DirDom",Model),VarMethod=="PMV")
Across traits. Proportion of accuracy estimates above zero? Percentiles: 10%, 50% (median), 90%
%>%
accVarsMain select(-Model,-ValidationData,-AccuracyCor) %>%
mutate(Component=ifelse(Trait1==Trait2,"Variance","Covariance"),
TraitType=ifelse(grepl("SI",Trait1),"SI","ComponentTrait")) %>%
group_by(Component) %>%
summarize(propAboveZero=length(which(AccuracyWtCor>=0))/n(),
quantile = scales::percent(c(0.1,0.5,0.9)),
AccuracyWtCor = quantile(AccuracyWtCor, c(0.1,0.5,0.9))) %>%
pivot_wider(names_from = "quantile", values_from = "AccuracyWtCor")
# A tibble: 2 x 5
# Groups: Component [2]
Component propAboveZero `10%` `50%` `90%`
<chr> <dbl> <dbl> <dbl> <dbl>
1 Covariance 0.7 -0.0812 0.0720 0.265
2 Variance 0.89 -0.0118 0.137 0.303
By trait variance / trait-trait covariances.
%>%
accVarsMain select(-Model,-ValidationData,-AccuracyCor) %>%
mutate(Component=ifelse(Trait1==Trait2,"Variance","Covariance"),
TraitType=ifelse(grepl("SI",Trait1),"SI","ComponentTrait")) %>%
group_by(Component,TraitType,Trait1,Trait2) %>%
summarize(propAboveZero=length(which(AccuracyWtCor>=0))/n(),
quantile = scales::percent(c(0.1,0.5,0.9)),
AccuracyWtCor = quantile(AccuracyWtCor, c(0.1,0.5,0.9))) %>%
pivot_wider(names_from = "quantile", values_from = "AccuracyWtCor") %>%
arrange(desc(Component),TraitType,desc(`50%`))
# A tibble: 12 x 8
# Groups: Component, TraitType, Trait1, Trait2 [12]
Component TraitType Trait1 Trait2 propAboveZero `10%` `50%` `90%`
<chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
1 Variance ComponentT… MCMDS MCMDS 0.92 0.0507 0.248 0.360
2 Variance ComponentT… TCHART TCHART 0.84 -0.0306 0.165 0.283
3 Variance ComponentT… logFYLD logFYLD 1 0.0490 0.150 0.269
4 Variance ComponentT… DM DM 0.88 -0.0260 0.0651 0.164
5 Variance SI stdSI stdSI 0.92 0.0623 0.172 0.283
6 Variance SI biofor… biofor… 0.78 -0.0501 0.0890 0.194
7 Covariance ComponentT… DM TCHART 0.98 0.115 0.232 0.351
8 Covariance ComponentT… logFYLD MCMDS 0.9 -0.00151 0.157 0.297
9 Covariance ComponentT… MCMDS TCHART 0.68 -0.0879 0.0565 0.156
10 Covariance ComponentT… DM MCMDS 0.76 -0.0988 0.0515 0.174
11 Covariance ComponentT… logFYLD TCHART 0.5 -0.142 -0.00147 0.121
12 Covariance ComponentT… DM logFYLD 0.38 -0.100 -0.0265 0.0850
Difference in accuracy between VarTGV and VarBV?
%>%
accVarsMain select(-Model,-ValidationData,-AccuracyCor) %>%
mutate(Component=ifelse(Trait1==Trait2,"Variance","Covariance"),
TraitType=ifelse(grepl("SI",Trait1),"SI","ComponentTrait")) %>%
spread(predOf,AccuracyWtCor) %>%
mutate(diffAcc=VarTGV-VarBV) -> diffAcc
%$% round(summary(diffAcc),3) diffAcc
Min. 1st Qu. Median Mean 3rd Qu. Max.
-0.174 -0.033 -0.003 -0.002 0.028 0.196
%>%
diffAcc summarise(quantile = scales::percent(c(0.1,0.5,0.9)),
diffAcc = quantile(diffAcc, c(0.1,0.5,0.9))) %>%
spread(quantile,diffAcc)
# A tibble: 1 x 3
`10%` `50%` `90%`
<dbl> <dbl> <dbl>
1 -0.0634 -0.00303 0.0578
%>%
diffAcc group_by(Component,TraitType,Trait1,Trait2) %>%
summarise(quantile = scales::percent(c(0.1,0.5,0.9)),
diffAcc = quantile(diffAcc, c(0.1,0.5,0.9))) %>%
spread(quantile,diffAcc) %>%
arrange(desc(Component),TraitType,desc(`50%`))
# A tibble: 12 x 7
# Groups: Component, TraitType, Trait1, Trait2 [12]
Component TraitType Trait1 Trait2 `10%` `50%` `90%`
<chr> <chr> <chr> <chr> <dbl> <dbl> <dbl>
1 Variance ComponentTrait MCMDS MCMDS -0.0111 0.0134 0.0368
2 Variance ComponentTrait TCHART TCHART -0.0439 -0.0275 -0.0107
3 Variance ComponentTrait DM DM -0.0591 -0.0301 -0.000190
4 Variance ComponentTrait logFYLD logFYLD -0.0929 -0.0319 0.0908
5 Variance SI stdSI stdSI -0.0284 0.0184 0.0436
6 Variance SI biofortSI biofortSI -0.0426 -0.00653 0.0262
7 Covariance ComponentTrait logFYLD TCHART -0.0532 0.0533 0.145
8 Covariance ComponentTrait DM MCMDS -0.0137 0.0229 0.0563
9 Covariance ComponentTrait logFYLD MCMDS -0.0372 0.0211 0.0847
10 Covariance ComponentTrait MCMDS TCHART -0.0351 0.0116 0.0384
11 Covariance ComponentTrait DM logFYLD -0.0715 0.00321 0.0626
12 Covariance ComponentTrait DM TCHART -0.0985 -0.0605 -0.0334
Usefulness
The usefulness criteria i.e. \(UC_{parent}\) and \(UC_{variety}\) are predicted by:
\[UC_{parent}=\mu_{BV} + (i_{RS} \times \sigma_{BV})\]
\[UC_{variety}=\mu_{TGV} + (i_{VDP} \times \sigma_{TGV})\]
Selection intensity
In order to combined predicted means and variances into a UC, we first calculated the realized intensity of within-family selection (\(i_{RS}\) and \(i_{VDP}\)). For \(UC_{parent}\) we computed the \(i_{RS}\) based on the proportion of progeny from each family, that themselves later appeared in the pedigree as parents. For \(UC_{clone}\) we compute computed \(i_{VDP}\) based on the proportion of family-members that had at least one plot at the penultimate stage of the VDP, the advanced yield trial (AYT). For completeness and as part of exploratory analysis, we computed the proportion selected at each of the VDP stages: clonal evaluation trial (CET), preliminary yield trial (PYT), advanced yield trial (AYT) and uniform yield trial (UYT).
The table below provides a quick summary of the number of families available with realized selection observed at each stage, plus the corresponding mean selection intensity and proportion selected across families.
## Table S13: Realized within-cross selection metrics
<-readxl::read_xlsx(here::here("manuscript","SupplementaryTables.xlsx"),sheet = "TableS13")
crossmetricsleft_join(crossmetrics %>%
select(sireID,damID,contains("realIntensity")) %>% distinct %>%
pivot_longer(cols = contains("realIntensity"),
names_to = "Stage", values_to = "Intensity",names_prefix = "realIntensity") %>%
group_by(Stage) %>%
summarize(meanIntensity=mean(Intensity, na.rm = T),
Nfam=length(which(!is.na(Intensity)))),
%>%
crossmetrics select(sireID,damID,contains("prop")) %>% distinct %>%
rename(propParent=propUsedAsParent,
propCET=propPhenotyped,
propPYT=propPastCET,
propAYT=propPastPYT,
propUYT=propPastAYT) %>%
pivot_longer(cols = contains("prop"),
values_to = "PropPast",names_to = "Stage",names_prefix = "propPast|prop") %>%
group_by(Stage) %>%
summarize(meanPropPast=mean(PropPast, na.rm = T))) %>%
mutate(Stage=factor(Stage,levels=c("Parent","CET","PYT","AYT","UYT"))) %>%
arrange(Stage) %>%
select(Stage,Nfam,meanIntensity,meanPropPast) %>% mutate_if(is.numeric,~round(.,2))
# A tibble: 5 x 4
Stage Nfam meanIntensity meanPropPast
<fct> <dbl> <dbl> <dbl>
1 Parent 48 1.59 0.02
2 CET 415 0.26 0.76
3 PYT 288 0.9 0.3
4 AYT 104 1.46 0.05
5 UYT 39 1.4 0.03
There were 48 families with a mean intensity of 1.59 (mean 2% selected) that themselves had members who were parents in the pedigree. As expected, the number of available families and the proportion selected decreased (increasing selection intensity) from CET to UYT. We choose to focus on the AYT stage, which has 104 families, mean intensity 1.46 (mean 5% selected).
library(tidyverse); library(magrittr);
## Table S9: Predicted and observed UC
<-read.csv(here::here("manuscript","SupplementaryTable09.csv"),stringsAsFactors = F)
predVSobsUC
<-predVSobsUC %>%
uc_cv_summaryfilter(VarMethod=="PMV") %>%
group_by(Model,predOf,Stage,Trait,Repeat,Fold) %>%
summarize(Nfam=n(),
meanFamSize=round(mean(FamSize),1)) %>%
ungroup() %>%
select(-Trait,-Model) %>%
distinct%>%
uc_cv_summary group_by(predOf,Stage) %>%
summarize(minNfam=min(Nfam),
meanNfam=mean(Nfam),
maxNfam=max(Nfam),
minMeanFamSize=min(meanFamSize),
meanMeanFamSize=mean(meanFamSize),
maxMeanFamSize=max(meanFamSize))
# A tibble: 7 x 8
# Groups: predOf [2]
predOf Stage minNfam meanNfam maxNfam minMeanFamSize meanMeanFamSize
<chr> <chr> <int> <dbl> <int> <dbl> <dbl>
1 BV ConstIntensity 143 166. 204 5.9 6.85
2 BV Parent 9 17.3 24 8.8 13.8
3 TGV AYT 25 37.3 50 10.9 12.9
4 TGV CET 129 149. 192 6.3 7.48
5 TGV ConstIntensity 143 166. 204 5.9 6.85
6 TGV PYT 69 103. 142 7.3 8.73
7 TGV UYT 8 14.1 20 7.3 11.8
# … with 1 more variable: maxMeanFamSize <dbl>
Accuracy comparisons
library(tidyverse); library(magrittr);
## Table S12: Accuracies predicting the usefulness criteria
<-readxl::read_xlsx(here::here("manuscript","SupplementaryTables.xlsx"),sheet = "TableS12")
accUC<-accUC %>%
accUCmainfilter(VarMethod=="PMV",grepl("DirDom",Model),Stage %in% c("Parent","AYT")) %>%
select(-Model,-VarMethod,-AccuracyCor,-Stage)
# accMeansMain %>% count(Trait,predOf)
# accVarsMain %>% count(Trait1,Trait2,predOf)
# accUCmain %>% count(Repeat,Fold)
Proportion of UC accuracy estimates greater than zero.
length(which(accUC$AccuracyWtCor>0))/nrow(accUC)
[1] 0.952619
Compare Mean, Variance and UC accuracy:
<-accUCmain %>%
meanVsUCaccrename(AccUC=AccuracyWtCor) %>%
left_join(accMeansMain %>%
select(-ValidationData,-Model) %>%
mutate(predOf=gsub("Mean","",predOf)) %>%
rename(AccMean=Accuracy))
%<>%
meanVsUCacc left_join(accVarsMain %>%
filter(Trait1==Trait2) %>%
select(-ValidationData,-Model,-AccuracyCor,-VarMethod,-Trait2) %>%
mutate(predOf=gsub("Var","",predOf)) %>%
rename(AccVar=AccuracyWtCor,
Trait=Trait1))
Correlation between mean, Var and UC accuracy, overall
%>%
meanVsUCacc summarize(corMeanUCacc=cor(AccUC,AccMean),
meanDiffMeanUC=mean(AccUC-AccMean),
corMeanVarAcc=cor(AccVar,AccMean),
corVarUCacc=cor(AccUC,AccVar))
# A tibble: 1 x 4
corMeanUCacc meanDiffMeanUC corMeanVarAcc corVarUCacc
<dbl> <dbl> <dbl> <dbl>
1 0.745 -0.0929 -0.222 -0.236
Correlation between mean, Var and UC accuracy, by predOf (BV vs. TGV)
%>%
meanVsUCacc group_by(predOf) %>%
summarize(corMeanUCacc=cor(AccUC,AccMean),
meanDiffMeanUC=mean(AccUC-AccMean),
corMeanVarAcc=cor(AccVar,AccMean))
# A tibble: 2 x 4
predOf corMeanUCacc meanDiffMeanUC corMeanVarAcc
<chr> <dbl> <dbl> <dbl>
1 BV 0.704 -0.0748 -0.177
2 TGV 0.802 -0.111 -0.275
Correlation between mean, Var and UC accuracy, by Trait
%>%
meanVsUCacc group_by(Trait) %>%
summarize(corMeanUCacc=cor(AccUC,AccMean),
meanDiffMeanUC=mean(AccUC-AccMean),
corMeanVarAcc=cor(AccVar,AccMean)) %>% arrange(desc(corMeanUCacc))
# A tibble: 6 x 4
Trait corMeanUCacc meanDiffMeanUC corMeanVarAcc
<chr> <dbl> <dbl> <dbl>
1 TCHART 0.494 -0.00356 -0.0951
2 logFYLD 0.410 -0.0762 -0.0571
3 MCMDS 0.358 -0.280 0.475
4 stdSI 0.315 -0.0736 0.129
5 DM 0.110 -0.0938 0.227
6 biofortSI 0.0189 -0.0297 0.00484
Make an overall comparison of trait accuracies for UC
%>%
accUCmain mutate(TraitType=ifelse(grepl("SI",Trait),"SI","ComponentTrait")) %>%
group_by(Trait) %>%
summarize(propAboveZero=length(which(AccuracyWtCor>0))/n(),
quantile = scales::percent(c(0.1,0.5,0.9)),
AccuracyWtCor = quantile(AccuracyWtCor, c(0.1,0.5,0.9))) %>%
pivot_wider(names_from = "quantile", values_from = "AccuracyWtCor") %>%
mutate(across(is.numeric,~round(.,2))) %>%
arrange(desc(`50%`))
# A tibble: 6 x 5
# Groups: Trait [6]
Trait propAboveZero `10%` `50%` `90%`
<chr> <dbl> <dbl> <dbl> <dbl>
1 TCHART 1 0.66 0.83 0.94
2 DM 1 0.36 0.65 0.81
3 biofortSI 1 0.39 0.580 0.74
4 stdSI 0.96 0.16 0.49 0.65
5 logFYLD 0.88 -0.04 0.24 0.49
6 MCMDS 0.7 -0.18 0.1 0.41
Compare UC accuracy of TGV vs. BV, overall
%>%
accUCmain spread(predOf,AccuracyWtCor) %>%
mutate(diffAcc=TGV-BV) %$% round(summary(diffAcc),2)
Min. 1st Qu. Median Mean 3rd Qu. Max.
-0.75 -0.17 -0.06 -0.03 0.06 1.01
Proportion of accuracy estimates where BV>TGV.
%>%
accUCmain spread(predOf,AccuracyWtCor) %>%
mutate(diffAcc=TGV-BV) %$% (length(which(diffAcc<=0))/length(diffAcc))
[1] 0.62
Compare UC accuracy difference TGV minus BV, by Trait
%>%
accUCmain mutate(TraitType=ifelse(grepl("SI",Trait),"SI","ComponentTrait")) %>%
spread(predOf,AccuracyWtCor) %>%
mutate(diffAcc=TGV-BV) %>%
group_by(Trait,TraitType) %>%
summarize(quantile = scales::percent(c(0.1,0.5,0.9)),
diffAcc = quantile(diffAcc, c(0.1,0.5,0.9))) %>%
pivot_wider(names_from = "quantile", values_from = "diffAcc") %>%
mutate(across(is.numeric,~round(.,2))) %>%
arrange(desc(`50%`))
# A tibble: 6 x 5
# Groups: Trait, TraitType [6]
Trait TraitType `10%` `50%` `90%`
<chr> <chr> <dbl> <dbl> <dbl>
1 stdSI SI -0.15 0.1 0.44
2 logFYLD ComponentTrait -0.26 -0.02 0.28
3 DM ComponentTrait -0.24 -0.05 0.12
4 biofortSI SI -0.19 -0.08 0.12
5 TCHART ComponentTrait -0.17 -0.08 0.07
6 MCMDS ComponentTrait -0.49 -0.17 0.12
- Mean: TGV<BV (median -0.017)
- Var: No consistent / small differences
- UC: TGV<BV