figures2-4-slideseq.knit

Code for reproducing Figures 2-4.

# xchr annotations for filtering out xchr genes in single-cell
xchr <- read.delim('../inst/extdata/MGImarkerQuery_20190611_114216.txt')$Symbol

# Slide-seq data
# puck 4 needs to be rotated and shifted
load('../inst/extdata/pucks.3-4.spatial.RData')
puck3.spatial <- puck3.spatial %>%
  mutate(total = CAST+X129) %>%
  mutate(X = xcoord/1000, Y = ycoord/1000)
puck4.spatial <- puck4.spatial %>%
  mutate(total = CAST+X129) %>%
  mutate(X = xcoord/1000, Y = ycoord/1000)


angle <- 261*pi/180
x.shift <- 0.37
y.shift <- -0.16
x.center <- 3.25
y.center <- 3.025
# rotate and shift puck 4
new.X <- (puck4.spatial$X-x.center)*cos(angle) - (puck4.spatial$Y-y.center)*sin(angle)+x.center + x.shift
new.Y <- (puck4.spatial$X-x.center)*sin(angle) + (puck4.spatial$Y-y.center)*cos(angle)+y.center + y.shift
puck4.spatial$X <- new.X
puck4.spatial$Y <- new.Y

# load in RCTD results
# use lower threshold for calling a singlet
# doublet_uncertain and doublet_certains will be reclassified as singlets
# if they meet threshold
SINGLET_THRESH <- 100
rctd.results.puck3 <- readRDS('../inst/extdata/rctd_puck_3.rds')
test <- rctd.results.puck3$results_df
test %>%
  mutate(diff = singlet_score-min_score) %>%
  filter(spot_class != 'reject') %>%
  ggplot(aes(x = diff)) +
  geom_histogram(aes(fill = spot_class), color='white') +
  theme_bw() +
  geom_vline(xintercept=100, lty='dashed') +
  xlab('singlet score - min score') +
  ylab('num pixels') +
  ggtitle('singlet threshold - default of 25, changing to 100')

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

coords <- puck3.spatial %>%
  dplyr::select(X, Y, bead, total) %>%
  group_by(X, Y, bead) %>%
  summarise(nUMI = sum(total))

## `summarise()` has grouped output by 'X', 'Y'. You can override using the `.groups` argument.

puck3.rctd <- rctd.results.puck3$results_df %>%
  mutate(bead = rownames(rctd.results.puck3$results_df)) %>%
  left_join(coords) %>%
  mutate(spot_class = ifelse((spot_class != 'reject') & (singlet_score-min_score<=SINGLET_THRESH), 'singlet', 'reject or doublet'))

## Joining, by = "bead"

rctd.results.puck4 <- readRDS('../inst/extdata/rctd_puck_4.rds')
coords <- puck4.spatial %>%
  dplyr::select(X, Y, bead, total) %>%
  group_by(X, Y, bead) %>%
  summarise(nUMI = sum(total))

## `summarise()` has grouped output by 'X', 'Y'. You can override using the `.groups` argument.

puck4.rctd <- rctd.results.puck4$results_df %>%
  mutate(bead = rownames(rctd.results.puck4$results_df)) %>%
  left_join(coords)  %>%
  mutate(spot_class = ifelse((spot_class != 'reject') & (singlet_score-min_score<=SINGLET_THRESH), 'singlet', 'reject or doublet'))

## Joining, by = "bead"

# combine RCTD annotations and both pucks
puck3.spatial <- left_join(puck3.spatial, puck3.rctd %>% dplyr::select(bead, first_type, spot_class), by = 'bead')
puck4.spatial <- left_join(puck4.spatial, puck4.rctd %>% dplyr::select(bead, first_type, spot_class), by = 'bead')

joint <- bind_rows(puck3.spatial, puck4.spatial)
joint <- joint %>%
  filter(!grepl('mt-',gene)) %>% 
  filter(bead!='TTTTTTTTTTTTTT') %>%
  filter(bead!='GGGGGGGGGGGGGG') %>%
  filter(bead!='TTTGGAATTTAACT') %>% # duplicated bead
  mutate(gene = as.factor(as.character(gene)), bead = as.factor(as.character(bead))) %>%
  group_by(bead, gene, xcoord, ycoord, X, Y, first_type, spot_class) %>%
  summarise(CAST = sum(CAST), X129 = sum(X129), total = sum(total)) %>% # since the pucks have the same bead barcodes which have the same coordinates
  ungroup()

## `summarise()` has grouped output by 'bead', 'gene', 'xcoord', 'ycoord', 'X', 'Y', 'first_type'. You can override using the `.groups` argument.

# get locations at a grid of points
xcoords <- seq(min(joint$X), max(joint$X), by = 0.1)
ycoords <- seq(min(joint$Y), max(joint$Y), by = 0.1)
predictCoords <- expand.grid(xcoords, ycoords)
colnames(predictCoords) <- c('X', 'Y')
predictCoords <- data.frame(predictCoords) %>%
  filter(((X-3.2)^2+(Y-3.1)^2<=2.3^2) | ((X-x.center-x.shift)^2+(Y-y.center-y.shift)^2<=2.3^2))

singlets <- joint %>% 
  filter(spot_class == 'singlet') %>%
  mutate(gene = as.factor(as.character(gene)), bead = as.factor(as.character(bead)))

# only use the following abundant singlet cell types:
# Astrocyte, CA1, CA3, Dentate, Interneuron, Microglia/Macrophages, Oligo, Endothelial tip
# (the RCTD reference also includes more, but for our purposes we are only
# interested in these)
# RCTD reference also misspelled Dentate as Denate

singlets <- singlets %>%
  filter(first_type %in% c('Astrocyte','CA1','CA3','Denate','Interneuron',
                           'Microglia_Macrophages', 'Oligodendrocyte', 'Endothelial_Tip')) %>%
  mutate(first_type = factor(first_type))

remove.beads <- readRDS('../inst/extdata/CA1-CA3-Dentate-remove-these-beads.rds')
singlets <- singlets %>%
  filter(!(bead %in% remove.beads))

# plot a map of cell types called from RCTD
# make a palette out of Set1 and 2
mypal <- data.frame(first_type=unique(singlets$first_type) %>% sort(),fill=c("#4DAF4A","#FF7F00","#377EB8","#984EA3","#E41A1C","#FFD700","#F781BF","#A65628")) %>%
  arrange(first_type)
mypal$fill <- factor(mypal$fill, levels=unique(mypal$fill))
singlets %>%
  dplyr::select(X,Y,first_type) %>%
  distinct() %>%
  left_join(mypal, by='first_type') %>%
  # arrange(first_type) %>%
  # mutate(first_type = factor(first_type, levels=unique(mypal$first_type))) %>%
  ggplot(aes(x = X, y= Y)) +
  geom_point(aes(color=fill),size=0.2) +
  scale_color_identity(guide='legend', name='', labels=(mypal%>%arrange(fill)%>%
                                                          mutate(first_type = gsub('_',' ',first_type)) %>%
                                                          mutate(first_type = gsub('Denate','Dentate',first_type))%>%mutate(first_type = gsub('Microglia Macrophages','Microglia/Macrophages',first_type))%>%mutate(gsub('Entorihinal','Entorhinal',first_type))%>%pull(first_type))) +
  theme_classic() +
  guides(colour = guide_legend(ncol=1,override.aes = list(size=3))) +
  #theme(legend.position='none') +
  xlab('x1') +
  ylab('x2')

ggsave('figure2a.png', width=4,height=4,units='in')

# singlets, with and without using cell type
mcast <- with(singlets, sparseMatrix(i = as.numeric(gene), j = as.numeric(bead), x=CAST, dimnames=list(levels(gene),levels(bead))))
m129 <- with(singlets, sparseMatrix(i = as.numeric(gene), j = as.numeric(bead), x=X129, dimnames=list(levels(gene),levels(bead))))
covariates <- singlets%>% dplyr::select(bead, X, Y, first_type) %>% distinct() 
res10_singlets_nocelltype<-spase(mcast, m129, covariates %>% dplyr::select(-first_type), cores=1, df=10, min.umi=100)

## 4140 genes pass min threshold of 100 pixels, 100 UMI

## found 3 columns in covariates; going to assume that first column is pixel names, 2nd and 3rd column are 2D coordinates

res10_singlets_celltype<-spase(mcast, m129, covariates, cores=2, df=10, min.umi=100)

## 4140 genes pass min threshold of 100 pixels, 100 UMI

## using first_type as baseline covariates

singlets %>%
  group_by(gene) %>%
  summarise(totalSpots = n()) %>%
  ggplot(aes(x = log2(totalSpots))) +
  geom_histogram(bins=40) +
  geom_vline(xintercept=log2(100), lty='dashed') +
  theme_classic() +
  scale_y_continuous(expand=c(0,0)) +
  xlab('log2(total pixels)') +
  ylab('Number of genes')

ggsave('fig3-totalpixels-singlets.png', height=2, width=2)

# summary of number of pixels for each gene
summary(res10_singlets_nocelltype$result$totalSpots)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   100.0   140.0   210.0   357.8   384.2  7879.0

# summary of number of UMI per pixel for each gene
summary(res10_singlets_nocelltype$result$totalUMI/res10_singlets_nocelltype$result$totalSpots)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.000   1.040   1.060   1.088   1.093   5.451

# summary of baseline overdispersion for each gene
summary(res10_singlets_nocelltype$result$phi.baseline)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##  0.0000  0.1134  0.3380  0.3985  0.6239  1.0000      63

# summary of spatial model overdispersion for each gene
summary(res10_singlets_nocelltype$result$phi.full)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
## 0.00000 0.08877 0.31225 0.38003 0.60216 1.00000      63

# summary of number of pixels for each gene
summary(res10_singlets_celltype$result$totalSpots)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   100.0   140.0   210.0   357.8   384.2  7879.0

# summary of number of UMI per pixel for each gene
summary(res10_singlets_celltype$result$totalUMI/res10_singlets_nocelltype$result$totalSpots)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.000   1.040   1.060   1.088   1.093   5.451

# summary of baseline overdispersion for each gene
summary(res10_singlets_celltype$result$phi.baseline)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##  0.0000  0.0917  0.3042  0.3634  0.5652  1.0000     669

# summary of spatial model overdispersion for each gene
summary(res10_singlets_celltype$result$phi.full)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##  0.0000  0.0690  0.2744  0.3454  0.5424  1.0000     669

res10_singlets_nocelltype$result$xchr <- ifelse(res10_singlets_nocelltype$result$gene %in% xchr, 'xchr', 'autosome')
res10_singlets_nocelltype$result %>%
  ggplot(aes(x = phi.baseline, y = phi.full)) +
  geom_point(aes(color=log2(totalSpots)),alpha=1, size=0.05) +
  scale_color_viridis(name = 'log2(total pixels)') +
  geom_abline(intercept=0, slope=1, lty='dashed', color='red',size=0.5) +
  geom_rug(alpha=0.01) +
  theme_classic() +
  theme(legend.position = 'none') +
  xlab(TeX(r'(Estimated $\phi$ null)')) +
  ylab(TeX(r'(Estimated $\phi$ spatial)'))

## Warning: Removed 63 rows containing missing values (geom_point).

ggsave('overdispersions-singlets.png', height=2, width=2)

## Warning: Removed 63 rows containing missing values (geom_point).

res10_singlets_nocelltype$result$xchr <- ifelse(res10_singlets_nocelltype$result$gene %in% xchr, 'X - no cell type', 'A - no cell type')
res10_singlets_celltype$result$xchr <- ifelse(res10_singlets_celltype$result$gene %in% xchr, 'X - cell type', 'A - cell type')
dd <- res10_singlets_nocelltype$result %>%
  bind_rows(res10_singlets_celltype$result) %>%
  mutate(xchr = factor(xchr, levels=c('A - no cell type', 'A - cell type',
                                      'X - no cell type', 'X - cell type'))) 
dd %>%
  filter(xchr=='A - no cell type') %>%
  pull(chisq.p) %>%
  summary()

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##  0.0000  0.1906  0.4201  0.4465  0.6897  0.9994      59

dd %>%
  filter(xchr=='A - cell type') %>%
  pull(chisq.p) %>%
  summary()

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##  0.0000  0.1937  0.4356  0.4497  0.6944  0.9999     643

dd %>%
  filter(xchr=='X - no cell type') %>%
  pull(chisq.p) %>%
  summary()

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
## 0.00000 0.03598 0.18306 0.29680 0.49113 0.98423       4

dd %>%
  filter(xchr=='X - cell type') %>%
  pull(chisq.p) %>%
  summary()

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
## 0.00000 0.06513 0.32665 0.33587 0.54245 0.99814      26

dd %>%
  mutate(p.value.bin = cut(chisq.p, breaks=seq(0,1,0.25))) %>%
  group_by(xchr, p.value.bin) %>%
  summarise(num.genes = n()) %>%
  filter(!is.na(p.value.bin)) %>%
  group_by(xchr) %>%
  mutate(total = sum(num.genes)) %>%
  mutate(prop = num.genes/total) %>%
  ggplot(aes(x = p.value.bin, y = prop)) +
  geom_bar(stat='identity') +
  theme_classic() +
  scale_y_continuous(expand=c(0,0)) +
  facet_wrap(xchr ~ ., nrow=1) +
  xlab('p-value') +
  ylab('Fraction of genes') +
  theme_bw() +
  theme(axis.text.x = element_text(angle=45, hjust=1))

## `summarise()` has grouped output by 'xchr'. You can override using the `.groups` argument.

ggsave('fig2b-pvals-singlets.png', height=3, width=6)

ellipseX <- 3
ellipseY <- 3.2
ellipseWidth <- 1.9
ellipseHeight <- 1.1
ellipseAngle <- pi/2.3
predictCoordsEllipse <- predictCoords %>%
  filter(((X-ellipseX)*cos(ellipseAngle)+(Y-ellipseY)*sin(ellipseAngle))^2/ellipseWidth^2 + ((X-ellipseX)*sin(ellipseAngle)+(Y-ellipseY)*cos(ellipseAngle))^2/ellipseHeight^2 <= 1)

#plotSpase(mcast, m129, covariates, res10_singlets_nocelltype, coords = predictCoords, size.scale = F, point.size=1, point.outline=T, crosshairs=T, cross.x1 = 3, cross.x2 = 3, genes=c('Hpca'), save='hpca')
plotSpase(mcast, m129, covariates, res10_singlets_nocelltype, coords = predictCoords, size.scale = F, point.size=1, point.outline=T, crosshairs=T, cross.x1 = 3, cross.x2 = 3, genes=c('Hpca'))

## using first_type as baseline covariates

## [1] "Hpca"

result.nocelltype <- res10_singlets_nocelltype$result %>%
  filter(qval <= 0.01) %>%
  arrange(qval)
result.nocelltype$xchr <- ifelse(result.nocelltype$gene %in% xchr, T, F)
# total number of significant genes with min pixels 400:
nrow(result.nocelltype)

## [1] 10

# total number of x chromosome genes 
sum(result.nocelltype$xchr)

## [1] 6

xtable(result.nocelltype %>% dplyr::select(gene,totalUMI,chisq.p,qval,xchr),display=c('d', 's', 'd', 'e', 'e', 's')) #supp table 2

result.celltype <- res10_singlets_celltype$result %>%
  filter(qval <= 0.01) %>%
  arrange(qval)
result.celltype$xchr <- ifelse(result.celltype$gene %in% xchr, T, F)
# total number of significant genes with min pixels 400:
nrow(result.celltype)

## [1] 3

# total number of x chromosome genes 
sum(result.celltype$xchr)

## [1] 2

xtable(result.celltype %>% dplyr::select(gene,totalUMI,chisq.p,qval,xchr),display=c('d', 's', 'd', 'e', 'e', 's')) #supp table 2

# repeat above two blocks for k=5, 15, 20 to see if reproducible
res5.nocelltype <- spase(mcast, m129, covariates %>% dplyr::select(-first_type), cores=2, df=5, min.umi=100)

## 4140 genes pass min threshold of 100 pixels, 100 UMI

## found 3 columns in covariates; going to assume that first column is pixel names, 2nd and 3rd column are 2D coordinates

res10.nocelltype <- spase(mcast, m129, covariates %>% dplyr::select(-first_type), cores=2, df=15, min.umi=100)

## 4140 genes pass min threshold of 100 pixels, 100 UMI
## found 3 columns in covariates; going to assume that first column is pixel names, 2nd and 3rd column are 2D coordinates

res20.nocelltype <- spase(mcast, m129, covariates %>% dplyr::select(-first_type), cores=2, df=20, min.umi=100)

## 4140 genes pass min threshold of 100 pixels, 100 UMI
## found 3 columns in covariates; going to assume that first column is pixel names, 2nd and 3rd column are 2D coordinates

result.nocelltype.5 <- res5.nocelltype$result %>%
  filter(qval <= 0.01) %>%
  arrange(qval)
result.nocelltype.5$xchr <- ifelse(result.nocelltype.5$gene %in% xchr, T, F)
# total number of significant genes with min total UMI 1000, min pixels 100:
nrow(result.nocelltype.5)

## [1] 6

# total number of x chromosome genes 
sum(result.nocelltype.5$xchr)

## [1] 1

xtable(result.nocelltype.5 %>% dplyr::select(gene,totalUMI,chisq.p,qval,xchr),display=c('d', 's', 'd', 'e', 'e', 's'))

result.nocelltype.10 <- res10.nocelltype$result %>%
  filter(qval <= 0.01) %>%
  arrange(qval)
result.nocelltype.10$xchr <- ifelse(result.nocelltype.10$gene %in% xchr, T, F)
# total number of significant genes with min total UMI 1000, min pixels 100:
nrow(result.nocelltype.10)

## [1] 8

# total number of x chromosome genes 
sum(result.nocelltype.10$xchr)

## [1] 4

xtable(result.nocelltype.10 %>% dplyr::select(gene,totalUMI,chisq.p,qval,xchr),display=c('d', 's', 'd', 'e', 'e', 's'))

result.nocelltype.20 <- res20.nocelltype$result %>%
  filter(qval <= 0.01) %>%
  arrange(qval)
result.nocelltype.20$xchr <- ifelse(result.nocelltype.20$gene %in% xchr, T, F)
# total number of significant genes with min total UMI 1000, min pixels 100:
nrow(result.nocelltype.20)

## [1] 8

# total number of x chromosome genes 
sum(result.nocelltype.20$xchr)

## [1] 4

xtable(result.nocelltype.20 %>% dplyr::select(gene,totalUMI,chisq.p,qval,xchr),display=c('d', 's', 'd', 'e', 'e', 's'))

# use all pixels, spatial coordinates, to increase resolution for estimated p
mcast <- with(joint, sparseMatrix(i = as.numeric(gene), j = as.numeric(bead), x=CAST, dimnames=list(levels(gene),levels(bead))))
m129 <- with(joint, sparseMatrix(i = as.numeric(gene), j = as.numeric(bead), x=X129, dimnames=list(levels(gene),levels(bead))))
covariates <- joint%>% dplyr::select(bead, X, Y) %>% distinct() 
# filter out beads with more than two locations
dupebeads <- covariates %>% group_by(bead) %>% summarise(n=n()) %>% filter(n>1) %>% pull(bead)
covariates <- covariates %>% filter(!(bead %in% dupebeads))
res15.nocelltype <- spase(mcast, m129, covariates, cores=2, df=15, min.umi=400)

## 2265 genes pass min threshold of 100 pixels, 400 UMI

## found 3 columns in covariates; going to assume that first column is pixel names, 2nd and 3rd column are 2D coordinates

#plotSpase(mcast, m129, covariates, res15.nocelltype, coords=predictCoords, genes=c('Tspan7', 'Plp1', 'Xist', 'Tceal3'), crosshairs=F, save='fig3-')
result.nocelltype <- res15.nocelltype$result
result.nocelltype$xchr <- ifelse(result.nocelltype$gene %in% xchr, T, F)
plotSpase(mcast, m129, covariates, res15.nocelltype, coords=predictCoords, genes=c('Tspan7', 'Plp1', 'Xist', 'Tceal3'), crosshairs=F)

## found 3 columns in covariates; going to assume that first column is pixel names, 2nd and 3rd column are 2D coordinates

## [1] "Tspan7"

## [1] "Plp1"

## Warning: Removed 1 rows containing missing values (geom_point).

## [1] "Xist"

## [1] "Tceal3"

#plotSpase(mcast, m129, covariates, res15.nocelltype, coords=predictCoords, genes=c('Ptgds', 'Nrip3', 'Sst'), save='fig4-autosome')
plotSpase(mcast, m129, covariates, res15.nocelltype, coords=predictCoords, genes=c('Ptgds', 'Nrip3', 'Sst'))

## found 3 columns in covariates; going to assume that first column is pixel names, 2nd and 3rd column are 2D coordinates

## [1] "Ptgds"

## [1] "Nrip3"

## [1] "Sst"

# merge the xchr genes except for xist 
xchr.idx <- which(rownames(mcast) %in% xchr)
xchr.idx <- xchr.idx[-which(xchr.idx == (which(rownames(mcast)=='Xist')))]
mcast.xchr <- colSums(mcast[xchr.idx,])
m129.xchr <- colSums(m129[xchr.idx,])
mcast.xchr <- t(as.matrix(mcast.xchr))
m129.xchr <- t(as.matrix(m129.xchr))
rownames(mcast.xchr) <- 'merged xchr'
rownames(m129.xchr) <- 'merged xchr'
res.xchr <- spase(mcast.xchr, m129.xchr, covariates, cores=1, df=15)

## 1 genes pass min threshold of 100 pixels, 500 UMI

## found 3 columns in covariates; going to assume that first column is pixel names, 2nd and 3rd column are 2D coordinates

#plotSpase(mcast.xchr, m129.xchr, covariates, res.xchr, coords=predictCoords, cross.x1=3, cross.x2=3, save='figure3xchrmerged')
plotSpase(mcast.xchr, m129.xchr, covariates, res.xchr, coords=predictCoords, cross.x1=3, cross.x2=3)

## found 3 columns in covariates; going to assume that first column is pixel names, 2nd and 3rd column are 2D coordinates

## [1] "merged xchr"

## Warning: Removed 3 rows containing missing values (geom_point).

#plotSpase(mcast, m129, covariates, res15.nocelltype, genes=c('Xist', 'Tspan7', 'Plp1', 'Tceal6'), coords=predictCoords,cross.x1=3, cross.x2=3, save='figure3xchr')
plotSpase(mcast, m129, covariates, res15.nocelltype, genes=c('Xist', 'Tspan7', 'Plp1', 'Tceal3'), coords=predictCoords,cross.x1=3, cross.x2=3)

## found 3 columns in covariates; going to assume that first column is pixel names, 2nd and 3rd column are 2D coordinates

## [1] "Xist"

## [1] "Tspan7"

## [1] "Plp1"

## Warning: Removed 1 rows containing missing values (geom_point).

## [1] "Tceal3"

mcast <- with(singlets, sparseMatrix(i = as.numeric(gene), j = as.numeric(bead), x=CAST, dimnames=list(levels(gene),levels(bead))))
m129 <- with(singlets, sparseMatrix(i = as.numeric(gene), j = as.numeric(bead), x=X129, dimnames=list(levels(gene),levels(bead))))
covariates <- singlets%>% dplyr::select(bead, first_type) %>% distinct() 
ss <- scase(mcast, m129, min.cells=300, cores=2)

## 1399 genes pass min threshold of 300 cells

# test for gradient within cell type for gene/cell type combos with high
# enough cells and total UMI
# THIS LOOP TAKES A WHILE WHICH IS WHY I SAVED THE RESULTS
# singlets.celltype <- singlets %>%
#   group_by(gene, first_type) %>%
#   summarise(nbeads=n(), numis = sum(CAST+X129)) %>%
#   arrange(desc(nbeads, numis)) %>%
#   filter(nbeads > 100, numis > 100)
# singlets.celltype$chisq.p <- NA
# for (i in 1:nrow(singlets.celltype)) {
#   print(i)
#   singlets.subset <- singlets %>%
#     filter(gene==singlets.celltype$gene[i], first_type==singlets.celltype$first_type[i]) %>%
#     mutate(gene=factor(gene), first_type=factor(first_type), bead=factor(bead))
#   mcast <- with(singlets.subset, sparseMatrix(i=as.numeric(gene), j=as.numeric(bead), x=CAST,dimnames=list(levels(gene),levels(bead))))
#   m129 <- with(singlets.subset, sparseMatrix(i=as.numeric(gene), j=as.numeric(bead), x=X129,dimnames=list(levels(gene),levels(bead))))
#   covariates <- singlets.subset%>% dplyr::select(bead, X, Y) %>% distinct()
#   res.subset <- spase(mcast, m129, covariates, cores=1, df=5, min.umi = 100)
#   singlets.celltype$chisq.p[i] <- res.subset$result$chisq.p
# }
# saveRDS(singlets.celltype, file='../inst/extdata/ASE_paper_singlets_celltype_results.rds')
singlets.celltype <- readRDS('../inst/extdata/ASE_paper_singlets_celltype_results.rds')
singlets.celltype$qval <- p.adjust(singlets.celltype$chisq.p, method='BH')
singlets.celltype <- singlets.celltype %>% arrange(qval) 
colnames(singlets.celltype) <- c('Gene', 'Cell type', 'Beads', 'UMIs', 'pval', 'qval')
xtable(singlets.celltype %>% filter(qval<0.01),display=c('d','s', 's', 'd', 'd', 'e', 'e'))

# investigate/plot cell type specific gradients
df<-5
singlets.subset <- singlets %>%
  filter(gene=='Tspan7', first_type=='Astrocyte') %>%
  mutate(gene=factor(gene), first_type=factor(first_type), bead=factor(bead)) 
mcast <- with(singlets.subset, sparseMatrix(i=as.numeric(gene), j=as.numeric(bead), x=CAST,dimnames=list(levels(gene),levels(bead))))
m129 <- with(singlets.subset, sparseMatrix(i=as.numeric(gene), j=as.numeric(bead), x=X129,dimnames=list(levels(gene),levels(bead))))
covariates <- singlets.subset%>% dplyr::select(bead, X, Y) %>% distinct()
res.subset <- spase(mcast, m129, covariates, cores=1, df=df, min.umi = 100)

## 1 genes pass min threshold of 100 pixels, 100 UMI

## found 3 columns in covariates; going to assume that first column is pixel names, 2nd and 3rd column are 2D coordinates

#plotSpase(mcast, m129, covariates, res.subset, coords=predictCoords, crosshairs=F, save=paste0('tspan7-Astrocyte','-df-',df))
plotSpase(mcast, m129, covariates, res.subset, coords=predictCoords, crosshairs=F)

## found 3 columns in covariates; going to assume that first column is pixel names, 2nd and 3rd column are 2D coordinates

## [1] "Tspan7"

# investigate/plot cell type specific gradients
df<-15
singlets.subset <- singlets %>%
  filter(gene=='Plp1', first_type=='Oligodendrocyte') %>%
  mutate(gene=factor(gene), first_type=factor(first_type), bead=factor(bead)) 
mcast <- with(singlets.subset, sparseMatrix(i=as.numeric(gene), j=as.numeric(bead), x=CAST,dimnames=list(levels(gene),levels(bead))))
m129 <- with(singlets.subset, sparseMatrix(i=as.numeric(gene), j=as.numeric(bead), x=X129,dimnames=list(levels(gene),levels(bead))))
covariates <- singlets.subset%>% dplyr::select(bead, X, Y) %>% distinct()
res.subset <- spase(mcast, m129, covariates, cores=1, df=df, min.umi = 100)

## 1 genes pass min threshold of 100 pixels, 100 UMI

## found 3 columns in covariates; going to assume that first column is pixel names, 2nd and 3rd column are 2D coordinates

#plotSpase(mcast, m129, covariates, res.subset, coords=predictCoords, crosshairs=F, save=paste0('plp1-oligodendrocyte-df-',df))
plotSpase(mcast, m129, covariates, res.subset, coords=predictCoords, crosshairs=F)

## found 3 columns in covariates; going to assume that first column is pixel names, 2nd and 3rd column are 2D coordinates

## [1] "Plp1"

# get individual CI results for each cell type using scase
# which ones are significant at qval<0.01
signif0.01 <- res10.nocelltype$result %>% arrange(qval) %>% filter(qval<0.01)
signif0.01$xchr <- ifelse(signif0.01$gene%in%xchr, T, F)
mcast <- with(singlets, sparseMatrix(i = as.numeric(gene), j = as.numeric(bead), x=CAST, dimnames=list(levels(gene),levels(bead))))
m129 <- with(singlets, sparseMatrix(i = as.numeric(gene), j = as.numeric(bead), x=X129, dimnames=list(levels(gene),levels(bead))))
covariates <- singlets%>% dplyr::select(bead, X, Y, first_type) %>% distinct() 
sccov <- covariates[,c(1,4)]
sccov$first_type <- as.factor(sccov$first_type)
res_celltype_sc <- scase(mcast, m129, sccov, cores=2, genes = signif0.01$gene)

## assuming covariates first column is cell, using first_type as baseline covariates

## using user-supplied genes

# for each gene, plot the estimated p, CI, and total UMI
res_celltype_sc <- res_celltype_sc %>%
  mutate(p.low = expit(logit.p-2*logit.p.sd),
         p.high = expit(logit.p+2*logit.p.sd),
         p = expit(logit.p)) %>%
  left_join(mypal, by =c('lvl'='first_type'))

res_celltype_sc %>%
  ggplot(aes(y = totalUMI)) +
  geom_point(aes(x = p, color=fill)) +
  geom_errorbarh(aes(xmin = p.low, xmax=p.high, color=fill)) +
  scale_color_identity() +
  geom_vline(xintercept=0.5, lty='dashed', color='grey') +
  facet_wrap(gene ~ ., scales='free') +
  scale_x_continuous(limits=c(0,1),breaks=c(0,0.25,0.50,0.75,1)) +
  theme_bw() +
  theme(axis.text.x = element_text(angle=45, hjust=1)) +
  xlab('Estimated p maternal') +
  ylab('Total UMI')

#ggsave('figure4-celltype-cis.png', height=8, width=7)

res_celltype_sc %>%
  filter(gene %in% c('Nrip3','Ptgds','Sst')) %>%
  ggplot(aes(y = totalUMI)) +
  geom_point(aes(x = p, color=fill),size=3) +
  geom_errorbarh(aes(xmin = p.low, xmax=p.high, color=fill)) +
  scale_color_identity() +
  geom_vline(xintercept=0.5, lty='dashed', color='grey') +
  facet_wrap(gene ~ ., nrow=1) +
  scale_x_continuous(limits=c(0,1),breaks=c(0,0.25,0.50,0.75,1)) +
  theme_bw() +
  theme(axis.text.x = element_text(angle=45, hjust=1))+
  xlab('Estimated p maternal') +
  ylab('Total UMI')

#ggsave('figure4-celltype-cis-top.png', height=3, width=5)

# plot only oligodendrocytes for Ptgds
both.idx <- which(covariates$first_type%in%c('Oligodendrocyte','Endothelial_Tip'))
#both.idx <- c(both.idx, grep('Endothelial', covariates$first_type))
both.beads <- covariates$bead[both.idx]
y <- mcast['Ptgds',both.beads]
total <- y+m129['Ptgds',both.beads]
idx <- which(total>0)
y<-y[idx]; total<-total[idx]; covari<-covariates[both.idx,]
covari <- covari[idx,]
covari$shape <- ifelse(covari$first_type=='Oligodendrocyte',21,24)
ggplot(cbind(data.frame(y=y, total=total), covari),
       aes(x=X,y=Y)) +
  geom_point(aes(fill=y/total,size=total,shape=shape),  color='black') +
  scale_fill_gradient2(name='CAST/total', low='blue', mid='white', high='red',
                       midpoint=0.5, limits=c(0,1)) +
  scale_shape_identity(labels=c('Oligo', 'Endo')) +
  scale_size_continuous(limits=c(1,43), breaks=c(1,10,20,30,40)) +
  ylim(c(0.5,5.5)) +
  xlim(c(1,6)) +
  xlab('x1') +
  ylab('x2') +
  theme_classic()

## Warning: Removed 1 rows containing missing values (geom_point).

#ggsave('bottom legend.png', height=3,width=7)