Last updated: 2017-10-27
Code version: 38e9100
Here we read in data and fit the two varbvs models:
d=readRDS("../data/Thyroid.FMO2.1Mb.RDS")
storage.mode(d$X) <- "double"
d$X <- d$X[,3501:4500]
X = d$X
y = d$y
Z = d$Z
n <- nrow(X)
p <- ncol(X)
set.seed(1)
fit <- varbvs::varbvs(X,Z,y,verbose = FALSE)
sd.grid <- c(0,10^seq(-2,1,length.out = 19))
set.seed(1)
fit2 <- varbvs::varbvsmix(X,Z,y,sa =sd.grid,verbose=FALSE)
Now try with Gao’s intialization
autoselect.mixsd = function(betahat,sebetahat,mult = sqrt(2)){
# To avoid exact measure causing (usually by mistake)
sebetahat = sebetahat[sebetahat!=0]
# so that the minimum is small compared with measurement precision
sigmaamin = min(sebetahat)/10
if (all(betahat^2 <= sebetahat^2)) {
# to deal with the occassional odd case where this could happen; 8 is arbitrary
sigmaamax = 8*sigmaamin
} else {
# this computes a rough largest value you'd want to use,
# based on idea that sigmaamax^2 + sebetahat^2 should be at least betahat^2
sigmaamax = 2*sqrt(max(betahat^2-sebetahat^2))
}
if(mult==0){
return(c(0,sigmaamax/2))
} else {
npoint = ceiling(log2(sigmaamax/sigmaamin)/log2(mult))
return(mult^((-npoint):0) * sigmaamax)
}
}
univariate_regression = function(X,y,Z = NULL){
P = dim(X)[2]
if (!is.null(Z)) {
y = lm(y~Z)$residuals
}
output = matrix(0,nrow = P,ncol = 2)
for(i in 1:P){
g = summary(lm(y ~ X[,i]))
output[i,] = g$coefficients[2,1:2]
}
return(list(betahat = output[,1], sebetahat = output[,2],
residuals = y))
}
lasso_reorder = function(X, y) {
# perform lasso regression and reorder regressors by "importance"
fit.glmnet <- glmnet::glmnet(X, y, intercept = F)
beta_path = coef(fit.glmnet)[-1,]
K = dim(beta_path)[2]
path_order = c()
for (k in 1:K) {
crt_path = which(beta_path[,k] != 0)
if (length(crt_path) != 0 & length(path_order) == 0) {
path_order = c(path_order, crt_path)
} else if(length(crt_path) != 0) {
path_order = c(path_order, crt_path[-which(crt_path %in% path_order)] )
}
}
path_order = unname(path_order)
index_order = c(path_order, seq(1,dim(beta_path)[1])[-path_order])
return(index_order)
}
initial = univariate_regression(X, y ,Z)
mixsd = autoselect.mixsd(initial$betahat, initial$sebetahat)
mu_zero = matrix(0, ncol = length(mixsd)+1, nrow = ncol(X))
alpha_zero = matrix(1/ncol(X), ncol = length(mixsd)+1,nrow = ncol(X))
alpha_zero[,1] = 1 - length(mixsd) / ncol(X)
index_order = lasso_reorder(X, initial$residuals)
fit3 = varbvs::varbvsmix(X[, index_order], Z, y, sa = c(0,mixsd^2),
mu = mu_zero,
alpha = alpha_zero,verbose=FALSE)
fit4 = varbvs::varbvsmix(X[, index_order], Z, y, sa = sd.grid,
mu = mu_zero,
alpha = alpha_zero,verbose=FALSE)
fit5 = varbvs::varbvs(X[,index_order],Z,y,verbose=FALSE)
Compare the fit of the fitted values
# compute fitted values from varbvsmix fit
fv = function(X,Z,fit){
bhat = rowSums(fit$alpha*fit$mu)
return(X %*% bhat + fit$mu.cov[1] + Z %*% fit$mu.cov[-1])
}
# fitted values for varbvs fit
fv.b = function(X,Z,fit){
X %*% fit$beta + fit$beta.cov[1] + Z %*% fit$beta.cov[-1]
}
fitted.values2 = fv(X,Z,fit2)
fitted.values3 = fv(X[, index_order],Z,fit3)
fitted.values4 = fv(X[, index_order],Z,fit4)
fitted.values = fv.b(X,Z,fit)
fitted.values5 = fv.b(X[,index_order],Z,fit5)
mean((d$y - fitted.values)^2)
[1] 0.2357036
mean((d$y - fitted.values2)^2)
[1] 0.2515889
mean((d$y - fitted.values3)^2)
[1] 0.2436024
mean((d$y - fitted.values4)^2)
[1] 0.239161
mean((d$y - fitted.values5)^2)
[1] 0.2357043
Compute BFs
#simple bf calculation X an n by p matrix of genoytpes
log10BF = function(X,y,sigmaa){
p = ncol(X)
n = nrow(X)
X = cbind(rep(1,n),X)
invnu = diag(c(0,rep(1/sigmaa^2,p)))
invOmega = invnu + t(X) %*% X
B = solve(invOmega, t(X) %*% cbind(y))
invOmega0 = n
return(-0.5*log10(det(invOmega)) + 0.5*log10(invOmega0) - p*log10(sigmaa) -(n/2) * (log10( t(y- X %*% B) %*% y) - log10(t(y) %*% y - n*mean(y)^2) ))
}
sum(fit$pip>0.95)
[1] 4
sum(fit2$lfsr<0.05)
[1] 3
sum(fit3$lfsr<0.05)
[1] 3
sum(fit4$lfsr<0.05)
[1] 3
sum(fit5$pip>0.95)
[1] 4
log10BF(X[,which(fit$pip>0.95)], initial$residuals,0.5)
[,1]
y 29.522
log10BF(X[,which(fit2$lfsr<0.05)], initial$residuals,0.5)
[,1]
y 20.72821
log10BF(X[,index_order][,which(fit3$lfsr<0.05)], initial$residuals,0.5)
[,1]
y 24.37681
log10BF(X[,index_order][,which(fit4$lfsr<0.05)], initial$residuals,0.5)
[,1]
y 25.95124
log10BF(X[,index_order][,which(fit5$pip>0.95)], initial$residuals,0.5)
[,1]
y 29.522
sessionInfo()
R version 3.3.2 (2016-10-31)
Platform: x86_64-apple-darwin13.4.0 (64-bit)
Running under: OS X El Capitan 10.11.6
locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
attached base packages:
[1] stats graphics grDevices utils datasets methods base
loaded via a namespace (and not attached):
[1] Rcpp_0.12.13 codetools_0.2-15 lattice_0.20-35
[4] foreach_1.4.3 glmnet_2.0-10 digest_0.6.12
[7] rprojroot_1.2 grid_3.3.2 backports_1.1.0
[10] git2r_0.19.0 magrittr_1.5 evaluate_0.10
[13] varbvs_2.4-9 stringi_1.1.5 latticeExtra_0.6-28
[16] Matrix_1.2-10 rmarkdown_1.6 RColorBrewer_1.1-2
[19] iterators_1.0.8 tools_3.3.2 stringr_1.2.0
[22] yaml_2.1.14 htmltools_0.3.6 knitr_1.16
This R Markdown site was created with workflowr