## Build ArchR objects

ArrowFiles <- createArrowFiles(
    # perform QC and filtering
  inputFiles = c(sprintf("%s/AGG_lungs/%s", work.dir, "atac_fragments.tsv.gz"),
                 sprintf("%s/AGG_spleens/%s", work.dir, "atac_fragments.tsv.gz")),
  sampleNames = c("lungs","spleens"),
  minTSS = 4, 
  minFrags = 1000, 
  addTileMat = TRUE,
  addGeneScoreMat = TRUE
)

doubScores <- addDoubletScores
    # detect doublets
  input = ArrowFiles,
  k = 10, #Refers to how many cells near a "pseudo-doublet" to count.
  knnMethod = "UMAP", #Refers to the embedding to use for nearest neighbor search.
  LSIMethod = 1
)

proj <- ArchRProject(
  ArrowFiles = ArrowFiles, 
  outputDirectory = "all_samples",
  copyArrows = TRUE #This is recommened so that you maintain an unaltered copy for later usage.
)
proj <- filterDoublets(ArchRProj = proj)


## Normalization, dimensional reduction, and UMAP embedding

proj <- addIterativeLSI(ArchRProj = proj, useMatrix = "TileMatrix", name = "IterativeLSI")

proj <- addClusters(input = proj, reducedDims = "IterativeLSI")

proj <- addUMAP(ArchRProj = proj, reducedDims = "IterativeLSI")

[1] "total number of peaks: 387033"


projT@peakSet@metadata$PeakCallSummary %>% group_by(Group) %>% summarise(sum=sum(Freq)*1000)


table(peaksDiff$peakType)
length(peaksDiff$peakType) # non-overlapping peaks for T cells

  Distal   Exonic Intronic Promoter 
    5182     2670    14085     3446

Plotting ComplexHeatmap!

Warning message:
“ggrepel: 33 unlabeled data points (too many overlaps). Consider increasing max.overlaps”
Warning message:
“ggrepel: 38 unlabeled data points (too many overlaps). Consider increasing max.overlaps”


ggAlignPlots(plotList=vplots$Memory_B_cells, type = "h")

Warning message:
“ggrepel: 33 unlabeled data points (too many overlaps). Consider increasing max.overlaps”
Warning message:
“ggrepel: 41 unlabeled data points (too many overlaps). Consider increasing max.overlaps”


ggAlignPlots(plotList=vplots$Naive_B_cells, type = "h")

Warning message:
“ggrepel: 39 unlabeled data points (too many overlaps). Consider increasing max.overlaps”
Warning message:
“ggrepel: 50 unlabeled data points (too many overlaps). Consider increasing max.overlaps”


ggAlignPlots(plotList=vplots$CD16_neg_NK_cells, type = "h")

Warning message:
“ggrepel: 28 unlabeled data points (too many overlaps). Consider increasing max.overlaps”
Warning message:
“ggrepel: 43 unlabeled data points (too many overlaps). Consider increasing max.overlaps”


ggAlignPlots(plotList=vplots$CD16_pos_NK_cells, type = "h")

Warning message:
“ggrepel: 43 unlabeled data points (too many overlaps). Consider increasing max.overlaps”
Warning message:
“ggrepel: 39 unlabeled data points (too many overlaps). Consider increasing max.overlaps”


ggAlignPlots(plotList=vplotsT$Regulatory_T_cells, type = "h")

Warning message:
“ggrepel: 50 unlabeled data points (too many overlaps). Consider increasing max.overlaps”
Warning message:
“ggrepel: 50 unlabeled data points (too many overlaps). Consider increasing max.overlaps”


ggAlignPlots(plotList=vplotsT$`Tem-Trm_cytotoxic_T_cells`, type = "h")

Warning message:
“ggrepel: 35 unlabeled data points (too many overlaps). Consider increasing max.overlaps”
Warning message:
“ggrepel: 35 unlabeled data points (too many overlaps). Consider increasing max.overlaps”


ggAlignPlots(plotList=vplotsT$`Tcm-Naive_helper_T_cells`, type = "h")

Warning message:
“ggrepel: 46 unlabeled data points (too many overlaps). Consider increasing max.overlaps”
Warning message:
“ggrepel: 33 unlabeled data points (too many overlaps). Consider increasing max.overlaps”

	C1	C10	C11	C12	C13	C14	C15	C16	C17	C2	C3	C4	C5	C6	C7	C8	C9
	<chr>	<chr>	<chr>	<chr>	<chr>	<chr>	<chr>	<chr>	<chr>	<chr>	<chr>	<chr>	<chr>	<chr>	<chr>	<chr>	<chr>
cell type	Classical_monocytes	T_cells	T_cells	Memory_B_cells	T_cells	T_cells	T_cells	CD16_pos_NK_cells	CD16_neg_NK_cells	Naive_B_cells	Naive_B_cells	Memory_B_cells	Memory_B_cells	Classical_monocytes	T_cells	T_cells	T_cells
percentage	0.94	0.92	0.92	0.69	0.86	0.73	0.89	0.84	0.44	0.65	0.84	0.93	0.99	0.39	0.95	0.88	0.92

Group	sum
<chr>	<dbl>
B_cells(n = 16122)	59120
Classical_monocytes(n = 974)	35566
Epithelial_cells(n = 103)	20879
ILC3(n = 497)	27860
NK_cells(n = 7962)	54463
Plasma_cells(n = 56)	24367
T_cells(n = 23521)	61570
UnionPeaks	103208

Group	sum
<chr>	<dbl>
Regulatory T cells(n = 1332)	27953
Tcm/Naive helper T cells(n = 7493)	64650
Tem/Trm cytotoxic T cells(n = 12027)	51287
Type 17 helper T cells(n = 2669)	33236
UnionPeaks	75099

cell_type	Wang2020	u19_dataset	percent_increase
<chr>	<chr>	<chr>	<chr>
B_cells	52100	59120	13%
T_cells	56732	61570	9%
natural_killer_cells	48730	54463	12%

Identifying differentially accessible peaks with ArchR¶

1 Proprocessing chromatin accessibility data¶

Quality Control¶

2 Dimension reduction and clustering¶

Batch effects not corrected for downstream analysis¶

3 Two-dimensional embedding of chromatin accessibility data¶

Evaluate the consistency between clusters and cell type labels from RNA data¶

Cell compositions across immune cell types¶

4 Peak calling¶

Peak abundance comparison with current snATAC-seq dataset¶

Peak overlapping with current snATAC-seq dataset¶

5 Differential accessibility analysis by cell types¶

Heatmaps for DA z-scores¶

Motif analysis for DA peaks across cell types¶

Motif analysis for DA peaks across T cell subsets¶

6 Differential accessibility analysis between tissues¶

Motif analysis for DA peaks across tissues¶