Last updated: 2018-08-14
library(mashr)
Loading required package: ashr
library(knitr)
library(kableExtra)
source('../code/estimate_cor.R')
source('../code/generateDataV.R')
source('../code/summary.R')
We use EM algorithm to update \rho
.
P(ˆB,B,Z|ρ,π)=n∏i=1K∏k=0[πkN(ˆbi;bi,V)N(bi;0,Uk)]I(zi=k)
EZ,B|ˆBlogP(ˆB,B,Z|ρ,π)=n∑i=1K∑k=0P(zi=k|ˆbi)[logπk+EB|ˆB(logN(ˆbi;bi,V))+EB|ˆB(logN(bi;0,Uk))]
logN(ˆbi;bi,V)+logN(bi;0,Uk)=−p2log2π−12log|V|−12(ˆbi−bi)TV−1(ˆbi−bi)−p2log2π−12log|Uk|−12bTiU−1kbi=−plog2π−12log|Uk|−12log|V|−12ˆbTiV−1ˆbi+ˆbTiV−1bi−12bTiV−1bi−12bTiU−1kbiEbi|ˆbi[logN(ˆbi;bi,V)+logN(bi;0,Uk)]=−plog2π−12log|Uk|−12log|V|−12ˆbTiV−1ˆbi+ˆbTiV−1E(bi|ˆbi)−12tr(V−1E(bibTi|ˆbi))−12tr(U−1kE(bibTi|ˆbi)) V=(1ρρ1) Let μi=E(bi|ˆbi) Ebi|ˆbi[logN(ˆbi;bi,V)+logN(bi;0,Uk)]=−2log2π−12log|Uk|−12log(1−ρ2)−12(1−ρ2)(ˆb2i1+ˆb2i2−2ˆbi1μi1−2ˆbi2μi2+E(b2i1|ˆbi)+E(b2i2|ˆbi)−2ˆbi1ˆbi2ρ+2ˆbi1μi2ρ+2ˆbi2μi1ρ−2ρE(bi1bi2|ˆbi))−12tr(U−1kE(bibTi|ˆbi))
γZi(k)=P(zi=k|Xi)=πkN(xi;0,V+Uk)∑Kk′=0πk′N(xi;0,V+Uk′)
V: f(V−1)=n∑i=1K∑k=0γZi(k)[−plog2π−12log|Uk|−12log|V|−12ˆbTiV−1ˆbi+ˆbTiV−1E(bi|ˆbi)−12tr(V−1E(bibTi|ˆbi))−12tr(U−1kE(bibTi|ˆbi))]
f(V−1)′=n∑i=1K∑k=0γZi(k)[12V−12ˆbiˆbTi+E(bi|ˆbi)ˆbTi−12E(bibTi|ˆbi)]=012Vn=n∑i=1K∑k=0γZi(k)[12ˆbiˆbTi−E(bi|ˆbi)ˆbTi+12E(bibTi|ˆbi)]ˆV=1nn∑i=1[ˆbiˆbTi−2E(bi|ˆbi)ˆbTi+E(bibTi|ˆbi)]
ρ: f(ρ)=n∑i=1K∑k=1γZi(k)[−2log2π−12log|Uk|−12log(1−ρ2)−12(1−ρ2)(ˆb2i1+ˆb2i2−2ˆbi1μi1−2ˆbi2μi2+E(b2i1|ˆbi)+E(b2i2|ˆbi)−2ˆbi1ˆbi2ρ+2ˆbi1μi2ρ+2ˆbi2μi1ρ−2ρE(bi1bi2|ˆbi))−12tr(U−1kE(bibTi|ˆbi))]
f(ρ)′=n∑i=1K∑k=1γZi(k)[ρ1−ρ2−ρ(1−ρ2)2(ˆb2i1+ˆb2i2−2ˆbi1μi1−2ˆbi2μi2+E(b2i1|ˆbi)+E(b2i2|ˆbi))−ρ2+1(1−ρ2)2(−ˆbi1ˆbi2+ˆbi1μi2+ˆbi2μi1−E(bi1bi2|ˆbi))]=0ρ(1−ρ2)n−ρn∑i=1K∑k=1γZi(k)(ˆb2i1+ˆb2i2−2ˆbi1μi1−2ˆbi2μi2+E(b2i1|ˆbi)+E(b2i2|ˆbi))−(ρ2+1)n∑i=1K∑k=1γZi(k)(−ˆbi1ˆbi2+ˆbi1μi2+ˆbi2μi1−E(bi1bi2|ˆbi))=0−nρ3−ρ2n∑i=1(−ˆbi1ˆbi2+ˆbi1μi2+ˆbi2μi1−E(bi1bi2|ˆbi))−ρ(n∑i=1(ˆb2i1+ˆb2i2−2ˆbi1μi1−2ˆbi2μi2+E(b2i1|ˆbi)+E(b2i2|ˆbi))−n)−n∑i=1(−ˆbi1ˆbi2+ˆbi1μi2+ˆbi2μi1−E(bi1bi2|ˆbi))=0
The polynomial has either 1 or 3 real roots in (-1, 1).
Algorithm:
Input: X, Ulist, init_rho
Compute loglikelihood
delta = 1
while delta > tol
Given rho, Estimate pi using convex method (current mash method)
M step: update rho: find all roots of polynomial, if it has three real roots, choose the one with higher loglikelihood.
Compute loglikelihood
Update delta
ˆβ|β∼N2(ˆβ;β,(10.50.51))
β∼14δ0+14N2(0,(1000))+14N2(0,(0001))+14N2(0,(1111))
n = 4000
set.seed(1)
n = 4000; p = 2
Sigma = matrix(c(1,0.5,0.5,1),p,p)
U0 = matrix(0,2,2)
U1 = U0; U1[1,1] = 1
U2 = U0; U2[2,2] = 1
U3 = matrix(1,2,2)
Utrue = list(U0=U0, U1=U1, U2=U2, U3=U3)
data = generate_data(n, p, Sigma, Utrue)
m.data = mash_set_data(data$Bhat, data$Shat)
U.c = cov_canonical(m.data)
grid = mashr:::autoselect_grid(m.data, sqrt(2))
Ulist = mashr:::normalize_Ulist(U.c)
xUlist = mashr:::expand_cov(Ulist,grid,usepointmass = TRUE)
result <- mixture.EM2.times(data$Bhat, xUlist, init_rho = c(-0.7,0,0.7), grid=1)
plot(result$result$log_liks)
The estimated ρ is 0.5576293.
m.data.em = mash_set_data(data$Bhat, data$Shat, V = matrix(c(1,result[[1]]$rho,result[[1]]$rho,1),2,2))
U.c = cov_canonical(m.data.em)
m.em = mash(m.data.em, U.c, verbose= FALSE)
null.ind = which(apply(data$B,1,sum) == 0)
The log likelihood is -1.23021110^{4}. There are 37 significant samples, 1 false positives. The RRMSE is 0.5871084.
The estimated pi
is
barplot(get_estimated_pi(m.em), las=2, cex.names = 0.7, main='OverEst', ylim=c(0,0.8))
The ROC curve:
m.data.correct = mash_set_data(data$Bhat, data$Shat, V=Sigma)
m.correct = mash(m.data.correct, U.c, verbose = FALSE)
m.correct.seq = ROC.table(data$B, m.correct)
m.em.seq = ROC.table(data$B, m.em)
sessionInfo()
R version 3.5.1 (2018-07-02)
Platform: x86_64-apple-darwin15.6.0 (64-bit)
Running under: macOS High Sierra 10.13.6
Matrix products: default
BLAS: /Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libRblas.0.dylib
LAPACK: /Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libRlapack.dylib
locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
attached base packages:
[1] stats graphics grDevices utils datasets methods base
other attached packages:
[1] kableExtra_0.9.0 knitr_1.20 mashr_0.2-11 ashr_2.2-10
loaded via a namespace (and not attached):
[1] Rcpp_0.12.18 compiler_3.5.1 pillar_1.3.0
[4] plyr_1.8.4 iterators_1.0.10 tools_3.5.1
[7] digest_0.6.15 viridisLite_0.3.0 evaluate_0.11
[10] tibble_1.4.2 lattice_0.20-35 pkgconfig_2.0.1
[13] rlang_0.2.1 Matrix_1.2-14 foreach_1.4.4
[16] rstudioapi_0.7 yaml_2.2.0 parallel_3.5.1
[19] mvtnorm_1.0-8 xml2_1.2.0 httr_1.3.1
[22] stringr_1.3.1 REBayes_1.3 hms_0.4.2
[25] rprojroot_1.3-2 grid_3.5.1 R6_2.2.2
[28] rmarkdown_1.10 rmeta_3.0 readr_1.1.1
[31] magrittr_1.5 scales_0.5.0 backports_1.1.2
[34] codetools_0.2-15 htmltools_0.3.6 MASS_7.3-50
[37] rvest_0.3.2 assertthat_0.2.0 colorspace_1.3-2
[40] stringi_1.2.4 Rmosek_8.0.69 munsell_0.5.0
[43] pscl_1.5.2 doParallel_1.0.11 truncnorm_1.0-8
[46] SQUAREM_2017.10-1 crayon_1.3.4
This R Markdown site was created with workflowr