library(Seurat)
library(data.table)
library(dplyr)
library(SeuratDisk)
library(ggplot2)
library(patchwork)
library(clusterProfiler)
library(org.Hs.eg.db)
library(enrichplot)
library(GOSemSim)
library(gridExtra)
options(repr.plot.width=16, repr.plot.height=8)
work.dir<-"~/cluster/projects/u19_multiomics"
immune.combined<-readRDS(paste0(work.dir, "/analyses/lungs_and_spleens_merged.rds"))
lungs@meta.data%>%group_by(majority_voting)%>%summarise(mean_RNA=median(nCount_RNA))
majority_voting | mean_RNA |
---|---|
<chr> | <dbl> |
CD16- NK cells | 2018.0 |
CD16+ NK cells | 1939.5 |
Classical monocytes | 1631.5 |
Epithelial cells | 3113.5 |
ILC3 | 2203.0 |
Memory B cells | 739.0 |
Naive B cells | 1705.0 |
Plasma cells | 4437.0 |
Regulatory T cells | 2275.5 |
Tcm/Naive helper T cells | 2030.0 |
Tem/Trm cytotoxic T cells | 1854.0 |
Type 17 helper T cells | 2083.5 |
ct.clusters<-read.csv(paste0(work.dir, "/analyses/merged_scRNA_Immune_low_labels.csv"))
immune.combined@meta.data[,"majority_voting"]<-ct.clusters$majority_voting
# Visualization
p1 <- DimPlot(immune.combined, reduction = "umap",
group.by = "full.ident", label=TRUE, repel = TRUE) +
ggtitle("Tissue of origin")
p2 <- DimPlot(immune.combined, reduction = "umap",
group.by = "majority_voting", label = TRUE, repel = TRUE) +
ggtitle("cell types from majority voting")
p1 + p2
DimPlot(immune.combined, split.by="tissue.ident")
Idents(immune.combined)<-"majority_voting"
levels(immune.combined@active.ident)
write.table(immune.combined@meta.data, "../../output/all_merged_metadata.txt", sep="\t", quote = F)
df<-data.frame(table(immune.combined@meta.data[, c("tissue.ident", "majority_voting")]))
ggplot(df,
aes(x = majority_voting, y=Freq,
fill=tissue.ident, label=Freq,
width=0.5)) +
geom_col(position="dodge2") + geom_label(size=5,position = position_dodge2(width=0.8)) +
theme_light(base_size = 15) +
theme(axis.text.x=element_text(size=16, vjust=0.7, angle = 45))
immune.combined@meta.data[,"merged"]<-immune.combined$majority_voting
immune.combined@meta.data$merged[grep("T cells", immune.combined$merged)]<-"T cells"
df<-data.frame(table(immune.combined@meta.data[, c("tissue.ident", "merged")]))
ggplot(df,
aes(x = merged, y=Freq,
fill=tissue.ident, label=Freq,
width=0.5)) +
geom_col(position="dodge2") + geom_label(size=5,position = position_dodge2(width=0.8)) +
theme_light(base_size = 15) +
theme(axis.text.x=element_text(size=16, vjust=0.7, angle = 45))
Differential expression test was performed on normalized values stored in immune.combine[["RNA"]]@data
. Here are the normalization steps:
Check how different cutoffs for genes that are detected in a minimum fraction of cells in either of the two populations
Idents(immune.combined)<-"merged"
DEG.test<-FindMarkers(immune.combined, ident.1="lungs", group.by="tissue.ident",
subset.ident="T cells", test.use = "MAST")
DEG.test<-read.table("../../output/DE_analysis_mast/merged_T_cells_DE_results.csv", sep=",")
DEG.test%>%filter(p_val_adj<0.01)%>%group_by(avg_log2FC>0)%>%summarise(n=n())
avg_log2FC > 0 | n |
---|---|
<lgl> | <int> |
FALSE | 642 |
TRUE | 580 |
ggplot(DEG.test, aes(x=avg_log2FC, y=-log10(p_val),
colour=avg_log2FC>0)) +
geom_point(size=2) +
geom_point(data=DEG.test%>%filter(p_val_adj>0.05),
aes(x=avg_log2FC, y=-log10(p_val)),
colour=alpha("grey",0.7), size=2) +
theme_light(base_size=15) + theme(legend.position = "none")
DEG.test<-read.table("../../output/DE_analysis_mast/Naive_B_cells_DE_results.csv", sep=",")
for(c in levels(immune.combined@active.ident)){
DEG.test<-FindMarkers(immune.combined, ident.1="lungs", group.by="tissue.ident", min.pct=0.25,
subset.ident=c, test.use = "MAST")
#uniform the format of separator
c<-gsub("/", "_", c)
c<-gsub(" ", "_", c)
write.table(DEG.test, sprintf("../../output/DE_analysis_mast_minpct_0.25/%s_DE_results.csv", c), quote = F, sep=",")
}
null.list<-list()
for(i in 1:10){
null<-readRDS(sprintf("%s/analyses/Tem_Trm_cytotoxic_T_cells.null.%s.rds", work.dir, i))
null.list<-append(null.list, null)
}
genes<-unique(unlist(lapply(null.list, function(i){rownames(i)})))
meta.pvals<-lapply(genes, function(i){
pvals<-c()
for(j in 1:1000){
pvals<-c(pvals, null.list[[j]][rownames(null.list[[j]])==i,]$p_val)
}
combined<--2*sum(log(pvals))
meta.p<-pchisq(combined, df=2*length(pvals[!is.na(pvals)]), lower.tail=FALSE)
return(list(p=pvals, stats=combined, meta.p=meta.p))
})
#FOSB gene
pvals<-meta.pvals[[which(genes=="FOSB")]]$p
test.stats<--2*sum(log(pvals))
print(test.stats)
[1] 2136.512
hist(meta.pvals[[which(genes=="FOSB")]]$p, xlab="p-values under the null", main="FOSB gene",
cex=2, cex.axis=2, cex.lab=2)
hist(unlist(lapply(meta.pvals, function(i){i$stats})), main="",
xlab="test statistics distribution", cex=2, cex.axis=2, cex.lab=2)
hist(unlist(lapply(meta.pvals, function(i){i$meta.p})), main="",
xlab="p-value null distribution", cex=2, cex.axis=2, cex.lab=2)
gene.pvals<-unlist(lapply(meta.pvals, function(i){i$meta.p}))
print(sprintf("%s percent of genes with p-value < 0.05", 100*round(sum(gene.pvals<0.05)/length(gene.pvals),2)))
[1] "34 percent of genes with p-value < 0.05"
DEG.test<-read.csv("../../output/DE_analysis_mast/Tem_Trm_cytotoxic_T_cells_DE_results.csv")
length(genes[gene.pvals>=0.05])
DE.genes<-rownames(DEG.test %>% filter(p_val_adj<0.01))
length(intersect(DE.genes, genes[gene.pvals>=0.05]))/length(DE.genes)
DE.calibrated<-DEG.test[rownames(DEG.test)%in% intersect(DE.genes, genes[gene.pvals>=0.05]),]
eg = bitr(rownames(DE.calibrated), fromType = "SYMBOL", toType="ENTREZID", OrgDb = "org.Hs.eg.db")
all.eg = bitr(rownames(DEG.test), fromType = "SYMBOL", toType="ENTREZID", OrgDb = "org.Hs.eg.db")
go_list <- enrichGO(gene = eg$ENTREZID,
universe = names(all.eg$ENTREZID),
OrgDb = org.Hs.eg.db,
ont = "BP",
pAdjustMethod = "BH",
pvalueCutoff = 0.01,
qvalueCutoff = 0.05,
readable = TRUE)
'select()' returned 1:1 mapping between keys and columns Warning message in bitr(rownames(DE.calibrated), fromType = "SYMBOL", toType = "ENTREZID", : “2.18% of input gene IDs are fail to map...” 'select()' returned 1:1 mapping between keys and columns Warning message in bitr(rownames(DEG.test), fromType = "SYMBOL", toType = "ENTREZID", : “3.38% of input gene IDs are fail to map...”
library(forcats)
library(enrichplot)
go_list<-mutate(go_list, richFactor = Count / as.numeric(sub("/\\d+", "", BgRatio)))
ggplot(go_list, showCategory = 20,
aes(richFactor, fct_reorder(Description, richFactor))) +
geom_segment(aes(xend=0, yend = Description)) +
geom_point(aes(color=qvalue, size = Count)) +
scale_color_viridis_c(guide=guide_colorbar(reverse=TRUE)) +
scale_size_continuous(range=c(2, 10)) +
theme_minimal() + theme(axis.text.y=element_text(size=12)) +
xlab("rich factor") +
ylab(NULL)
for(c in levels(immune.combined@active.ident)){
DEG.test<-FindMarkers(immune.combined, ident.1="lungs", group.by="tissue.ident",
subset.ident=c, test.use = "wilcox")
#uniform the format of separator
c<-gsub("/", "_", c)
c<-gsub(" ", "_", c)
write.table(DEG.test, sprintf("../../output/DE_analysis_wilcox/%s_DE_results.csv", c), quote = F, sep=",")
}
Color scheme:
Red: negative log2FC
blue: positive log2FC
grey: data points not passing bonferroni corrected cutoff 0.05
input.tbl<-read.csv("../../output/DE_analysis_mast/Memory_B_cells_DE_results.csv", header=T)
Idents(immune.combined)<-"majority_voting"
volcano_plots<-list()
counts.tbl<-c()
gene.set<-c()
for(c in levels(immune.combined@active.ident)){
#uniform the format of separator
c<-gsub("/", "_", c)
c<-gsub(" ", "_", c)
input.tbl<-read.table(sprintf("../../output/DE_analysis_mast/%s_DE_results.csv", c), sep=",")
counts<-input.tbl %>% filter(p_val_adj<=0.05) %>% group_by((avg_log2FC>=0)) %>% summarise(n=n())
colnames(counts)<-c("labels", "n")
gene.set<-unique(c(gene.set, row.names(input.tbl)))
#filter out cell types with no DEGs
if(dim(counts)[1]>0){
counts.tbl<-rbind(counts.tbl, cbind(c, counts))
}
p<-ggplot(input.tbl, aes(x=avg_log2FC, y=-log10(p_val),
colour=avg_log2FC>0)) +
geom_point(size=2) +
geom_point(data=input.tbl%>%filter(p_val_adj>0.05),
aes(x=avg_log2FC, y=-log10(p_val)),
colour=alpha("grey",0.7), size=2) +
theme_light(base_size=15) + theme(legend.position = "none") +
ggtitle(c)
volcano_plots[[c]]<-p
}
ml <- marrangeGrob(volcano_plots, nrow=2, ncol=3)
ml
[[1]] NULL [[2]] NULL
ggplot(counts.tbl,
aes(x = c, y=n,
fill=labels, label=n,
width=0.5)) +
geom_col(position="dodge2") + geom_label(size=5,position = position_dodge2(width=0.8)) +
theme_light(base_size = 15) + labs(y="DEG Counts") +
scale_fill_discrete(labels=c("Up-regulated","Down-regulated")) +
theme(axis.text.x=element_text(size=16, vjust=0.6, angle = 45))
Identify the known pathways that are enriched with the differentially expressed genes in lungs
go_list<-list()
for(c in levels(immune.combined@active.ident)){
#uniform the format of separator
c<-gsub("/", "_", c)
c<-gsub(" ", "_", c)
print(c)
input.tbl<-read.table(sprintf("../../output/DE_analysis_mast/%s_DE_results.csv", c), sep=",")
subset.tbl<-input.tbl %>% filter(p_val_adj<=0.05)
eg = bitr(rownames(subset.tbl), fromType = "SYMBOL", toType="ENTREZID", OrgDb = "org.Hs.eg.db")
all.eg = bitr(rownames(input.tbl), fromType = "SYMBOL", toType="ENTREZID", OrgDb = "org.Hs.eg.db")
go_list[[c]] <- enrichGO(gene = eg$ENTREZID,
universe = names(all.eg$ENTREZID),
OrgDb = org.Hs.eg.db,
ont = "BP",
pAdjustMethod = "BH",
pvalueCutoff = 0.01,
qvalueCutoff = 0.05,
readable = TRUE)
}
new_list <- lapply(go_list, function(i){
z<-mutate(i, richFactor = Count / as.numeric(sub("/\\d+", "", BgRatio)))
return(z)
})
go_plots<-list()
for(c in names(new_list)){
go_plots[[c]]<-ggplot(new_list[[c]], showCategory = 20,
aes(richFactor, fct_reorder(Description, richFactor))) +
geom_segment(aes(xend=0, yend = Description)) +
geom_point(aes(color=qvalue, size = Count)) +
scale_color_viridis_c(guide=guide_colorbar(reverse=TRUE)) +
scale_size_continuous(range=c(2, 10)) +
theme_minimal() + theme(axis.text.y=element_text(size=12)) +
xlab("rich factor") +
ylab(NULL) +
ggtitle(c)
}
saveRDS(new_list, "~/cluster/projects/u19_multiomics/analyses/go_BP_results.rds")
options(repr.plot.width=18, repr.plot.height=18)
marrangeGrob(go_plots[c(3,4,5,8)], nrow=2, ncol=2)
[[1]] NULL
options(repr.plot.width=18, repr.plot.height=16)
marrangeGrob(go_plots[c(2,6,10)], nrow=2, ncol=2)
[[1]] NULL
marrangeGrob(go_plots[c(1,7,9,11)], nrow=2, ncol=2)
[[1]] NULL