## Build ArchR objects
ArrowFiles <- createArrowFiles(
# perform QC and filtering
inputFiles = c(sprintf("%s/AGG_lungs/%s", work.dir, "atac_fragments.tsv.gz"),
sprintf("%s/AGG_spleens/%s", work.dir, "atac_fragments.tsv.gz")),
sampleNames = c("lungs","spleens"),
minTSS = 4,
minFrags = 1000,
addTileMat = TRUE,
addGeneScoreMat = TRUE
)
doubScores <- addDoubletScores
# detect doublets
input = ArrowFiles,
k = 10, #Refers to how many cells near a "pseudo-doublet" to count.
knnMethod = "UMAP", #Refers to the embedding to use for nearest neighbor search.
LSIMethod = 1
)
proj <- ArchRProject(
ArrowFiles = ArrowFiles,
outputDirectory = "all_samples",
copyArrows = TRUE #This is recommened so that you maintain an unaltered copy for later usage.
)
proj <- filterDoublets(ArchRProj = proj)
## Normalization, dimensional reduction, and UMAP embedding
proj <- addIterativeLSI(ArchRProj = proj, useMatrix = "TileMatrix", name = "IterativeLSI")
proj <- addClusters(input = proj, reducedDims = "IterativeLSI")
proj <- addUMAP(ArchRProj = proj, reducedDims = "IterativeLSI")
The correction with Harmony seems to exacerbate the difference between lungs and spleens, so the following analysis was performed on the uncorrected one.
left: colored by clustering results with ATAC-seq data alone
right: colored by cell type labels from RNA-seq data
The unsupervised clustering results on chromatin accessibility data shows overall consistency with cell type labels obtained from scRNA-seq data.
A summary table for the most represented cell type and its percentage in each cluster
C1 | C10 | C11 | C12 | C13 | C14 | C15 | C16 | C17 | C2 | C3 | C4 | C5 | C6 | C7 | C8 | C9 | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
<chr> | <chr> | <chr> | <chr> | <chr> | <chr> | <chr> | <chr> | <chr> | <chr> | <chr> | <chr> | <chr> | <chr> | <chr> | <chr> | <chr> | |
cell type | Classical_monocytes | T_cells | T_cells | Memory_B_cells | T_cells | T_cells | T_cells | CD16_pos_NK_cells | CD16_neg_NK_cells | Naive_B_cells | Naive_B_cells | Memory_B_cells | Memory_B_cells | Classical_monocytes | T_cells | T_cells | T_cells |
percentage | 0.94 | 0.92 | 0.92 | 0.69 | 0.86 | 0.73 | 0.89 | 0.84 | 0.44 | 0.65 | 0.84 | 0.93 | 0.99 | 0.39 | 0.95 | 0.88 | 0.92 |
Barplots to visualize cell type composition in each cluster
Note: no unusual symbols shall be included in the group names, otherwise no hdf5 file can be found.
Peak numbers for all cells from our own dataset
Group | sum |
---|---|
<chr> | <dbl> |
CD16_neg_NK_cells(n = 695) | 54171 |
CD16_pos_NK_cells(n = 7267) | 51859 |
Classical_monocytes(n = 974) | 38549 |
Epithelial_cells(n = 103) | 20484 |
ILC3(n = 497) | 30863 |
Memory_B_cells(n = 13556) | 58948 |
Naive_B_cells(n = 2566) | 67457 |
Plasma_cells(n = 56) | 24367 |
T_cells(n = 23521) | 61570 |
UnionPeaks | 120676 |
Peaks numbers for T cells alone from our dataset
projT@peakSet@metadata$PeakCallSummary %>% group_by(Group) %>% summarise(sum=sum(Freq)*1000)
Group | sum |
---|---|
<chr> | <dbl> |
Regulatory T cells(n = 1332) | 27953 |
Tcm/Naive helper T cells(n = 7493) | 64650 |
Tem/Trm cytotoxic T cells(n = 12027) | 51287 |
Type 17 helper T cells(n = 2669) | 33236 |
UnionPeaks | 75099 |
Peak numbers from Wang et al.
cell_type | Wang2020 | u19_dataset | percent_increase |
---|---|---|---|
<chr> | <chr> | <chr> | <chr> |
B_cells | 52100 | 67457 | 29% |
T_cells | 56732 | 61570 | 9% |
natural_killer_cells | 48730 | 54171 | 11% |
Heatmap DA results including T cell subsets
enrichMotifs
class: SummarizedExperiment dim: 870 12 metadata(0): assays(10): mlog10Padj mlog10p ... CompareFrequency feature rownames(870): TFAP2B_1 TFAP2D_2 ... TBX18_869 TBX22_870 rowData names(0): colnames(12): CD16- NK cells CD16+ NK cells ... Tem/Trm cytotoxic T cells Type 17 helper T cells colData names(0):
enrichMotifs
class: SummarizedExperiment dim: 870 1 metadata(0): assays(10): mlog10Padj mlog10p ... CompareFrequency feature rownames(870): TFAP2B_1 TFAP2D_2 ... TBX18_869 TBX22_870 rowData names(0): colnames(1): lungs-Tem/Trm cytotoxic T cells colData names(0):
heatmapEM <- plotEnrichHeatmap(enrichMotifs, n = 7, transpose = TRUE)
ComplexHeatmap::draw(heatmapEM, heatmap_legend_side = "bot", annotation_legend_side = "bot")
ArchR logging to : ArchRLogs/ArchR-plotEnrichHeatmap-1452196a32f4-Date-2023-03-24_Time-15-45-53.log If there is an issue, please report to github with logFile! Adding Annotations.. Preparing Main Heatmap.. 'magick' package is suggested to install to give better rasterization. Set `ht_opt$message = FALSE` to turn off this message.