%load_ext rpy2.ipython

/home/jinggu/scratch-midway2/tools/miniconda3/lib/python3.7/site-packages/rpy2/robjects/pandas2ri.py:14: FutureWarning: pandas.core.index is deprecated and will be removed in a future version.  The public classes are available in the top-level namespace.
  from pandas.core.index import Index as PandasIndex


%%capture
%%R
library("ArchR")
library(Matrix)
library("Seurat")
library(repr)
library("dplyr")
#Load ArchR object
proj<-loadArchRProject("~/cluster/projects/lung_subset/")
proj.orig<-loadArchRProject("lung_snATAC/")


%%capture --no-display
%%R -w 800 -h 800 -u px
p1 <- plotEmbedding(ArchRProj = proj.orig, colorBy = "cellColData", name = "Clusters", embedding = "UMAP") + ggtitle("Human lung snATAC-seq(74930 nuclei)")
p1 + theme_ArchR(baseSize = 12)


%%capture --no-display
%%R -w 1000 -h 1000 -u px
load(file="/home/jinggu/cluster/projects/lung_snATAC/Objects/markersGS.Rdata")
markerList <- getMarkers(markersGS, cutOff = "FDR <= 0.01 & Log2FC >= 1.25")

markerGenes  <- c(
    'TEKT4','KRT5','SCGB3A2',
    'ASCL1','SFTPC','WNT7A',
    'PDGFRB','MYOCD','WNT2',
    'PROX1','CA4','VWF',
    'ICOS','NCR1','MS4A1',
    'GATA1','CD68','TCF7',
    'CD8A','CD4','CD74','PRF1', 
    'CCR7', 'ITGAE','CD69'
  )
#make a heatmap labeled with marker genes
heatmapGS <- markerHeatmap(
  seMarker = markersGS, 
  cutOff = "FDR <= 0.01 & Log2FC >= 1.25", 
  labelMarkers = markerGenes,
  transpose = TRUE
)
ComplexHeatmap::draw(heatmapGS, heatmap_legend_side = "bot", annotation_legend_side = "bot")


%%capture --no-display
%%R -w 800 -h 800 -u px
remapClust<-c("C1"="Endothelial cells","C2"="Endothelial cells","C3"="Endothelial cells",
              "C4"="Epithelial cells", "C5"="Epithelial cells", "C6"="Epithelial cells", "C7"="Epithelial cells",
              "C8"="Epithelial cells", "C9"="Epithelial cells",
              "C10"="Mesenchymal cells", "C11"="Mesenchymal cells", "C12"="Mesenchymal cells",
              "C13"="Mesenchymal cells","C14"="Mesenchymal cells",
              "C15"="Epithelial cells", "C16"="Epithelial cells",
              "C17"= "Erythrocytes","C18"= "Erythrocytes",
              "C22"= "B cells", "C19"="T/NK cells", "C20"="T/NK cells",
              "C21"="T/NK cells","C23"="T/NK cells", "C24"="T/NK cells","C25"="T/NK cells")

labelNew2 <- mapLabels(proj.orig$Clusters, oldLabels = names(remapClust), newLabels = remapClust)
proj.orig$Clusters_GS<-labelNew2
p1 <- plotEmbedding(ArchRProj = proj.orig, colorBy = "cellColData", name = "Clusters_GS", size=0.2, baseSize=15,
                    embedding = "UMAP") + ggtitle("Human lung snATAC-seq(74930 nuclei)")


%%capture
%%R 
#predict gene scores for marker genes
markerGenes  <- c(
    'NCR1','CD8A','CD4','CD74', 
    'CCR7', 'ITGAE','CD69',
    'IL7R', 'ICOS'
  )
#proj.orig <- addImputeWeights(proj.orig)
#make plots for marker genes
p <- plotEmbedding(
    ArchRProj = proj.orig, 
    colorBy = "GeneScoreMatrix", 
    name = markerGenes, 
    embedding = "UMAP",
    imputeWeights = getImputeWeights(proj.orig)
)
#Rearrange for grid plotting
p2 <- lapply(p, function(x){
    x + guides(color = FALSE, fill = FALSE) + 
    theme_ArchR(baseSize = 12) +
    theme(plot.margin = unit(c(0, 0, 0, 0), "cm")) +
    theme(
        axis.text.x=element_blank(), 
        axis.ticks.x=element_blank(), 
        axis.text.y=element_blank(), 
        axis.ticks.y=element_blank()
    )
})


%%capture --no-display
%%R -w 800 -h 800 -u px
do.call(cowplot::plot_grid, c(list(ncol = 3),p2))


%%R -w 800 -h 800 -u px
lung.rna<-readRDS("~/cluster/projects/lung_others/lung_scRNA/lung_Wang2020.rds")
DimPlot(lung.rna, reduction = "umap", label = TRUE, pt.size = 0.5) + NoLegend() + 
ggtitle("Human lung snRNA-Seq (46500 nuclei)")


%%R
tcell.rna<-readRDS("~/cluster/projects/lung_others/lung_scRNA/lung_Tcell_subclustering_Wang2020.rds")
DimPlot(tcell.rna, reduction = "umap", label = TRUE, pt.size = 0.5) + NoLegend() + ggtitle("Human lung snRNA-Seq T cells (3209 nuclei)")


%%R -w 800 -h 800 -u px
cluster.markers<-read.table("~/cluster/projects//lung_others//lung_scRNA//t_cell_subclusters_DEG.txt", header = T)
cluster.markers %>%
    group_by(cluster) %>%
    top_n(n = 10, wt = avg_log2FC) -> top10
DoHeatmap(tcell.rna, features = top10$gene) + NoLegend()


%%R
cluster1.markers<-FindMarkers(tcell.rna, ident.1 = c(0,1), ident.2 = seq(2,6), min.pct = 0.1)
cluster1.markers[which(rownames(cluster1.markers)=="CD69"),]

           p_val avg_log2FC pct.1 pct.2    p_val_adj
CD69 5.81979e-09 -0.3943621 0.282  0.37 0.0001546784


%%R
new.cluster.ids<-c("0"="unknown", "1"="Non-TR cells", "2"="TR cells", "3"="TR cells",
                   "4"="TR cells", "5"="TR cells", "6"="TR cells")

tcell.rna<-RenameIdents(tcell.rna, new.cluster.ids)
DimPlot(tcell.rna, reduction = "umap", label = TRUE, pt.size = 0.5) + NoLegend()


%%R -w 800 -h 400 -u px
markerGenes  <- c(
    'CCR7', 'ITGAE','CD69')
VlnPlot(tcell.rna, features = markerGenes, slot = "counts", log = TRUE)


%%capture --no-display
%%R -w 800 -h 800 -u px
p2 <- plotEmbedding(ArchRProj = proj, colorBy = "cellColData", name = "Clusters", embedding = "UMAP", title="Sub-clustering of T cell/NK cells")
p2+ theme_ArchR(baseSize = 12)


%%capture
%%R
#predict gene scores for marker genes
markerGenes  <- c(
    'NCR1','CD8A','CD4','CD74', 
    'CCR7', 'ITGAE','CD69',
    'IL7R', 'ICOS'
  )
proj<- addImputeWeights(proj)
#make plots for marker genes
p <- plotEmbedding(
    ArchRProj = proj, 
    colorBy = "GeneScoreMatrix", 
    name = markerGenes, 
    embedding = "UMAP",
    imputeWeights = getImputeWeights(proj)
)
#Rearrange for grid plotting
p2 <- lapply(p, function(x){
    x + guides(color = FALSE, fill = FALSE) + 
    theme_ArchR(baseSize = 6.5) +
    theme(plot.margin = unit(c(0, 0, 0, 0), "cm")) +
    theme(
        axis.text.x=element_blank(), 
        axis.ticks.x=element_blank(), 
        axis.text.y=element_blank(), 
        axis.ticks.y=element_blank()
    )
})


%%capture --no-display
%%R -w 800 -h 800 -u px
do.call(cowplot::plot_grid, c(list(ncol = 3),p2))


%%capture
%%R
tcell.rna$cell_types<-tcell.rna@active.ident
head(tcell.rna)
projInteg <- addGeneIntegrationMatrix(
    ArchRProj = proj, 
    useMatrix = "GeneScoreMatrix",
    matrixName = "GeneIntegrationMatrix",
    reducedDims = "IterativeLSI",
    seRNA = tcell.rna,
    addToArrow = FALSE,
    groupRNA = "cell_types",
    nameCell = "predictedCell_Un",
    nameGroup = "predictedGroup_Un",
    nameScore = "predictedScore_Un"
)


%%R
#Unconstrained integration
cM <- as.matrix(confusionMatrix(projInteg$Clusters, projInteg$predictedGroup_Un))
preClust <- colnames(cM)[apply(cM, 1 , which.max)]
cbind(preClust, rownames(cM)) #Assignments

      preClust            
 [1,] "unknown"      "C10"
 [2,] "unknown"      "C3" 
 [3,] "TR cells"     "C8" 
 [4,] "Non-TR cells" "C7" 
 [5,] "TR cells"     "C9" 
 [6,] "TR cells"     "C11"
 [7,] "Non-TR cells" "C5" 
 [8,] "unknown"      "C4" 
 [9,] "unknown"      "C2" 
[10,] "TR cells"     "C6" 
[11,] "unknown"      "C1"


%%R
cM

    unknown TR cells Non-TR cells
C10     352      249            5
C3      322      320            0
C8      291      803            0
C7       43       99          671
C9      267     1132           20
C11      20      796            2
C5       64      164          322
C4      610      113            1
C2      106       71            1
C6       50      237            0
C1       46        1            0


%%R
#Constrained Integration
clustTNK <- rownames(cM)[grep("TR cells", preClust)]
clustTNK
clustNonTNK <- paste0("C", 1:10)[!(paste0("C", 1:10) %in% clustTNK)]
clustNonTNK
rnaTNK <- rownames(tcell.rna@meta.data[tcell.rna$cell_types=="TR cells",])
rnaNonTNK <- rownames(tcell.rna@meta.data[tcell.rna$cell_types!="TR cells",])

[1] "C1"  "C2"  "C3"  "C4"  "C10"


%%R
groupList <- SimpleList(
    TNK = SimpleList(
        ATAC = projInteg$cellNames[projInteg$Clusters %in% clustTNK],
        RNA = rnaTNK
    ),
    NonTNK = SimpleList(
        ATAC = projInteg$cellNames[projInteg$Clusters %in% clustNonTNK],
        RNA = rnaNonTNK
    )    
)

projInteg <- addGeneIntegrationMatrix(
    ArchRProj = projInteg, 
    useMatrix = "GeneScoreMatrix",
    matrixName = "GeneIntegrationMatrix",
    reducedDims = "IterativeLSI",
    seRNA = tcell.rna,
    addToArrow = FALSE, 
    groupList = groupList,
    groupRNA = "cell_types",
    nameCell = "predictedCell_Co",
    nameGroup = "predictedGroup_Co",
    nameScore = "predictedScore_Co"
)
pal <- paletteDiscrete(values = unique(tcell.rna$cell_types))
p1 <- plotEmbedding(
    projInteg, 
    colorBy = "cellColData", 
    name = "predictedGroup_Un", 
    pal = pal
)
p2 <- plotEmbedding(
    projInteg, 
    colorBy = "cellColData", 
    name = "predictedGroup_Co", 
    pal = pal
)


%%R
p1


%%R
p2


%%R
projPeakcalling<-addGroupCoverages(
    ArchRProj = projInteg, 
    groupBy = "predictedGroup_Un", 
)
projPeakcalling<-addReproduciblePeakSet(
    ArchRProj = projPeakcalling, 
    groupBy = "predictedGroup_Un", 
    pathToMacs2="/scratch/midway2/jinggu/env/sc-atac/bin/macs2"
)

output<-getPeakSet(projPeakcalling)
save(output, file="~/cluster/projects/archR/tissueRes_T_cells_Wang2020.Rdata")
saveArchRProject(ArchRProj = projPeakcalling, outputDirectory = "/home/jinggu/cluster/projects/archR/tissueRes_tcells", load = FALSE)


%%R -w 800 -h 800 -u px
library(ggplot2)
source("~/projects/funcFinemapping/code/make_plots.R")
tbl<-data.frame()
for(trait in c("allergy", "asthma_adult", "asthma_child")){
  df<-read.table(sprintf("~/projects/funcFinemapping/output/AAD/%s/Wang2020_indiv.est", trait), header=F)
  colnames(df)<-c("raw_term", "estimate", "low", "high")
  df["term"]<-unlist(lapply(df$raw_term, function(i){strsplit(i, "[.]")[[1]][1]}))
  tbl<-data.frame(rbind(tbl, cbind(df, trait)))
}

snp_enrichment_plot(tbl, trait="", y.order = c()) + facet_grid(. ~ trait)

Identifying Tissue-resident T cells in lungs¶

Outlines¶

Project Overview¶

Dataset:¶

Methods:¶

Pre-processing snATAC-seq data using ArchR¶

Create an ArchR Project for data processing¶

Dimension reduction and clustering¶

Results¶

Clustering of human lung snATAC-seq data¶

Assign clusters with predicted gene scores¶

Heatmap for predicted gene scores¶

UMAP plots colored by marker genes¶

Assign clusters with scRNA-seq¶

Clustering of scRNA-seq lung data¶

Sub-clustering of the T-cell cluster using scRNA-seq data¶

Heatmap on sub-clusters of T-cells display heterogeneity¶

Perform DEG analysis between cluster 2-6 and cluster 0-1¶

Re-group the T-cell subclusters identified in scRNA-seq data¶

Sub-clustering of the T-cell cluster using scATAC-seq data¶

UMAP plots for marker genes¶

Integration with scRNA-seq data¶

Assign clusters using the identified subclusters from scRNA-seq lung data¶

Matching results with unconstrained integration¶

Counts of cells that match to groups in scRNA-seq data¶

Matching results with constrained integration¶

Calling peaks and Enrichment analysis¶