The single-cell multi-omics data contains single-cell transcriptomic and proteomic and phospho-proteomic data of in vitro generated antibody-secreting cells. The code below generates quality control plots, performs filtering, normalization and scaling of the counts

Count matrix to Seurat object

Import all count matrixes, combine plates and create unfiltered Seurat objects.

myfiles <- list.files(path="output/", pattern = ".rds$")
## only read all raw files and create raw combined table if not done yet. Speeds up generation of html file
if ("seu.RNA.rds" %in%  myfiles) {
  seu_RNA <- readRDS("output/seu.RNA.rds")
  seu.PROT_live <- readRDS("output/seu.PROT_live.rds")
  seu.PROT_fix <- readRDS("output/seu.PROT_fix.rds")
} else {

QC plots and filters

plot_RNA_nCount <- plot_QC_paper(
  seu_object = seu_RNA,
  feature = "nCount_RNA",
  ytext = "Total UMI counts per cell",
  xtext = "Plate number",
  paneltitle = "Fixed cells (1586 to 1589) show lower counts",
  colorviolin = "dodgerblue2"
) +
  geom_vline(xintercept = 6.5,
             size = 0.3,
             color = "red") +
    geom = "text",
    x = 6.6,
    y = 20000,
    label = "Fixed cells",
    hjust = 0,
    size = 2.5
  ) +
  theme(axis.title.x = element_blank())

plot_RNA_ngenes <- plot_QC_paper(
  seu_object = seu_RNA,
  feature = "nFeature_RNA",
  ytext = "Total genes per cell",
  xtext = "Plate number",
  paneltitle = "Keep cells >300 genes",
  colorviolin = "dodgerblue2"
) +
  geom_hline(yintercept = 300,
             size = 0.3,
             color = "red") +
  theme(axis.title.x = element_blank()) <- plot_QC_paper(
  seu_object = seu_RNA,
  feature = "",
  ytext = "% Mitochondrial counts",
  xtext = "Plate number",
  paneltitle = "Keep cells < 5 % mitochondrial genecounts",
  colorviolin = "dodgerblue2"
) +
  geom_hline(yintercept = 5,
             color = "red",
             size = 0.3) +
  theme(axis.title.x = element_blank())

plot_percent.rb <- plot_QC_paper(
  seu_object = seu_RNA,
  feature = "percent.rb",
  ytext = "% Ribosomal counts",
  xtext = "Plate number",
  paneltitle = "comparable % ribosomal counts in all plates",
  colorviolin = "dodgerblue2"
) +
  theme(axis.title.x = element_blank()) <- plot_QC_paper(
  seu_object = seu.PROT_live,
  feature = "nCount_PROT",
  ytext =  "Total UMI counts per cell",
  xtext = "Plate number",
  paneltitle = "Keep cells > 1500 & < 9000 PROT counts",
  colorviolin = "deeppink3"
) +
  geom_hline(yintercept = 1500, size = 0.3) +
  geom_hline(yintercept = 9000, size = 0.3) +
  theme(axis.title.x = element_blank())

plot_PROT_nCount.fix <- plot_QC_paper(
  seu_object = seu.PROT_fix,
  feature = "nCount_PROT",
  ytext =  "Total UMI counts per cell",
  xtext = "Plate number",
  paneltitle = "Keep cells > 2500 & < 20000 PROT counts",
  colorviolin = "deeppink3"
) +
  geom_hline(yintercept = 2500, size = 0.3) +
  geom_hline(yintercept = 20000, size = 0.3) +
  theme(axis.title.x = element_blank()) <-
    seu_object = seu.PROT_live,
    feature = "nFeature_PROT",
    ytext =  "Total proteins per cell",
    xtext = "Plate number",
    paneltitle = "Keep cells >40 proteins",
    colorviolin = "deeppink3"
  ) +
  geom_hline(yintercept = 40,
             size = 0.3,
             color = "red") +
  theme(axis.title.x = element_blank())

plot_PROT_nproteins.fix <- plot_QC_paper(
  seu_object = seu.PROT_fix,
  feature = "nFeature_PROT",
  ytext =  "Total proteins per cell",
  xtext = "Plate number",
  paneltitle = "Keep cells >65 proteins",
  colorviolin = "deeppink3"
) +
  geom_hline(yintercept = 65,
             size = 0.3,
             color = "red") +
  theme(axis.title.x = element_blank())

plot.QC <- plot_grid(
  labels = c('a', 'b', 'c', 'd' , 'e', 'f', 'g', 'h'),
  label_size = 10,
  ncol = 2

  filename = "output/paper_figures/Suppl_QC_filters.pdf",
  width = 177,
  height = 200,
  units = "mm",
  dpi = 300
  filename = "output/paper_figures/Suppl_QC_filters.eps",
  width = 177,
  height = 200,
  units = "mm",
  dpi = 300
  filename = "output/paper_figures/Suppl_QC_filters.png",
  width = 177,
  height = 200,
  units = "mm",
  dpi = 300

Supplementary Figure Thresholds for selection of high-quality samples and cells.

  1. Based on the indicated cut-offs, high-quality cells are filtered for further analysis.
## Filter fixed protein dataset
seu.PROT.fix.subset <- subset(seu.PROT_fix, subset = nCount_PROT >= 2500 & nCount_PROT < 20000)

## Filter live-cell protein dataset <- subset(seu.PROT_live, subset = nCount_PROT >= 1500 & nCount_PROT <= 9000)

## RNA quality of fixed dataset is too low (very low gene numbers and counts). Therefore continue only with live-cell dataset.
seu.RNA_live <- subset(seu_RNA, idents = c(1586:1589), invert = TRUE)
seu.RNA_fix <- subset(seu_RNA, idents = c(1586:1589))

## Filter RNA live dataset
seu.RNA_live.subset <- subset(seu.RNA_live, subset = <=5 & nFeature_RNA >= 300)
seu.RNA_fix.subset <- subset(seu.RNA_fix) #, subset = <= 5 & nFeature_RNA >= 300 # Nofilter because RNA not taken along.
  1. Filter low-detected genes: Keep genes that are >1% cells detected.
## Additional filter features (genes) detected in 1% of cells
seu.RNA_live.subset <- CreateSeuratObject(seu.RNA_live.subset[["RNA"]]@counts, min.cells = round(ncol(seu.RNA_live.subset)/100)) ## keep features detected in 1% of cells
seu.RNA_fix.subset <- CreateSeuratObject(seu.RNA_fix.subset[["RNA"]]@counts, min.cells = round(ncol(seu.RNA_fix.subset)/100)) ## keep features detected in min 1% cells
  1. Merge Seurat objects from Protein & RNA modalities
## Merge Seurat objects live dataset
intersect <- colnames(seu.RNA_live.subset)[colnames(seu.RNA_live.subset) %in% colnames(]
intersect <- colnames([colnames( %in% intersect] <- subset(seu.RNA_live.subset, cells = intersect ) <-$PROT@counts[,colnames( %in% intersect][["PROT"]] <- CreateAssayObject(counts =
An object of class Seurat 
10211 features across 1433 samples within 2 assays 
Active assay: RNA (10158 features, 0 variable features)
 1 other assay present: PROT
## fix dataset
intersect <- colnames(seu.RNA_fix.subset)[colnames(seu.RNA_fix.subset) %in% colnames(seu.PROT.fix.subset)]
intersect <- colnames(seu.PROT.fix.subset)[colnames(seu.PROT.fix.subset) %in% intersect]

seu.RNA_combined.fix <- subset(seu.RNA_fix.subset, cells = intersect )
Prot.fix.intersect <- seu.PROT.fix.subset@assays$PROT@counts[,colnames(seu.PROT.fix.subset) %in% intersect]

seu.RNA_combined.fix[["PROT"]] <- CreateAssayObject(counts = Prot.fix.intersect)
An object of class Seurat 
5095 features across 1038 samples within 2 assays 
Active assay: RNA (5019 features, 0 variable features)
 1 other assay present: PROT
  1. Antibody quality (Non-detected proteins) filter: remove proteins with median counts < 0.2 (fixed cells), or < 1 (live cells)
PROT_tbl_subset.fix <-$PROT@counts) %>%
  mutate(protein = rownames(seu.PROT.fix.subset)) %>%
  dplyr::select(protein, everything()) %>%
  gather("cell", "count", 2:c(ncol(seu.PROT.fix.subset)+1)) %>%
  mutate(sample = gsub('.{5}$', '', cell) )

prot.median.fix <- aggregate(PROT_tbl_subset.fix[, 3], list(protein =PROT_tbl_subset.fix$protein), mean)

prot.fix.toremove <- prot.median.fix$protein[prot.median.fix$x <=0.2]

filtered.prot.counts <- seu.PROT.fix.subset[["PROT"]]@counts[!c(rownames(seu.PROT.fix.subset[["PROT"]]@counts) %chin% prot.fix.toremove),]

seu.PROT.fix.subset <- CreateSeuratObject(filtered.prot.counts, assay = "PROT")

## Live cells <-$PROT@counts) %>%
  mutate(protein = rownames(seu.PROT_live[["PROT"]])) %>%
  dplyr::select(protein, everything()) %>%
  gather("cell", "count", 2:c(ncol(seu.PROT_live[["PROT"]])+1)) %>%
  mutate(sample = gsub('.{9}$', '', cell) ) <- aggregate([, 3], list(protein$protein), mean) <-$protein[$x <1] <-[["PROT"]]@counts[!c(rownames([["PROT"]]@counts) %chin%,][["PROT"]] <- CreateAssayObject(counts =
  1. Add metadata to object.
## metadata import
metadata <- read_delim("data/metadata.txt", "\t", escape_double = FALSE, trim_ws = TRUE)
metadata$sample <- as.factor(metadata$sample)

## add metadata to fix dataset
meta.fix <- data.frame(  %>%
  mutate(sample = orig.ident ) %>%
  left_join(metadata) %>%
  mutate(group = sample) 
rownames(meta.fix) <- rownames(data.frame( )
seu.RNA_combined.fix <- AddMetaData(object = seu.RNA_combined.fix, metadata = meta.fix)

#meta.fix <- data.frame( %>%
#  mutate(sample = rownames(

## add metadata to live dataset <- data.frame(  %>%
  mutate(sample = orig.ident ) %>%
  left_join(metadata) %>%
  mutate(group = sample)<
rownames( <- rownames(data.frame( ) <- AddMetaData(object =, metadata = <- data.frame( %>%
#  mutate(sample = rownames([[""]] <- PercentageFeatureSet(, pattern = "^MT")

seu.RNA_combined.fix[[""]] <- PercentageFeatureSet(seu.RNA_combined.fix, pattern = "^MT")

Normalize and Scale

Finally, the datasets are normalized (SCT for RNA, CLR for (phospho-)protein), and scaled (regress out: nCount, percentage mitochondiral, and plate ID for RNA, and regress out: nCount and plate ID for protein).

## fix data normalize RNA
DefaultAssay(seu.RNA_combined.fix) <- 'RNA'
seu.RNA_combined.fix <- SCTransform(seu.RNA_combined.fix, assay = "RNA", = "SCT", = c("nCount_RNA", "", "plate"), return.only.var.genes = FALSE, verbose = FALSE)

# Add some metadata to normalized data (ncounts & percent mt)
seu.RNA_combined.fix <- AddMetaData(seu.RNA_combined.fix,$SCT@counts) %>% summarise_all(funs(sum)) %>% unlist(), = "nCount_RNA_SCT")

seu.RNA_combined.fix <- PercentageFeatureSet(seu.RNA_combined.fix, pattern = "^MT\\.|^MTRN", = "", assay = "SCT")

## Fixed dataset normalize protein
DefaultAssay(seu.RNA_combined.fix) <- 'PROT'
VariableFeatures(seu.RNA_combined.fix) <- rownames(seu.RNA_combined.fix[["PROT"]])
seu.RNA_combined.fix <- NormalizeData(seu.RNA_combined.fix, normalization.method = 'CLR', margin = 2, assay = "PROT") %>% 
  ScaleData( = c("nCount_PROT", "plate"))

## live data normalize RNA
DefaultAssay( <- 'RNA' <- SCTransform(, assay = "RNA", = "SCT", = c("nCount_RNA", "", "plate"), return.only.var.genes = FALSE, verbose = FALSE)

# Add some metadata to normalized data (ncounts & percent mt) <- AddMetaData(,$SCT@counts) %>% summarise_all(funs(sum)) %>% unlist(), = "nCount_RNA_SCT") <- PercentageFeatureSet(, pattern = "^MT\\.|^MTRN", = "", assay = "SCT")

## live normalize & scale protein data 
DefaultAssay( <- 'PROT'
VariableFeatures( <- rownames([["PROT"]]) <- NormalizeData(, normalization.method = 'CLR', margin = 2, assay = "PROT") %>% 
  ScaleData( = c("nCount_PROT", "plate")) 

Filtered dataset properties

Overview of the number of cells and data properties of all plates.

Live-cells RNA & surface protein dataset
An object of class Seurat 
20366 features across 1433 samples within 3 assays 
Active assay: PROT (50 features, 50 variable features)
 2 other assays present: RNA, SCT

Table Overview of per plate properties after filtering.

kable( %>% 
        group_by(donor,plate) %>% 
        summarise(`Number of cells` = round(n(),0),
                  `Median counts RNA` = round(median(nCount_RNA),0),
                  `Median Number genes` = round(median(nFeature_RNA),0),
                  `Median Mitochondrial counts (Median %)` = round(median(,2), 
                  `Median counts PROT` = round(median(nCount_PROT),0),
                  `Number proteins` = round(median(nFeature_PROT),0)
                  )) %>%
  kable_styling(bootstrap_options = c("striped", "hover"))
donor plate Number of cells Median counts RNA Median Number genes Median Mitochondrial counts (Median %) Median counts PROT Number proteins
D1 P_1578 216 1624 556 1.14 3842 46
D1 P_1579 293 2333 861 1.21 3601 45
D2 P_1580 274 2888 1040 1.18 3908 47
D2 P_1584 184 3688 1220 1.21 4383 48
D3 P_1582 231 1150 524 1.11 3575 47
D3 P_1585 235 3706 1133 1.24 3831 47

Table Overview of per donor properties after filtering.

kable( %>% 
        group_by(donor) %>% 
        summarise(`Number of cells` = round(n(),0),
                  `Median counts RNA` = round(median(nCount_RNA),0),
                  `Median Number genes` = round(median(nFeature_RNA),0),
                  `Median Mitochondrial counts (Median %)` = round(median(,2), 
                  `Median counts PROT` = round(median(nCount_PROT),0),
                  `Number proteins` = round(median(nFeature_PROT),0)
                  )) %>%
  kable_styling(bootstrap_options = c("striped", "hover"))
donor Number of cells Median counts RNA Median Number genes Median Mitochondrial counts (Median %) Median counts PROT Number proteins
D1 509 2008 732 1.18 3693 46
D2 458 3168 1122 1.18 4098 47
D3 466 1817 731 1.17 3694 47

Fixed cells intracellular proteins dataset

An object of class Seurat 
10114 features across 1038 samples within 3 assays 
Active assay: PROT (76 features, 76 variable features)
 2 other assays present: RNA, SCT

Table Overview of per plate properties after filtering.

kable( %>% 
        group_by(donor,plate) %>% 
        summarise(`Number of cells` = round(n(),0),
                  `Median counts RNA` = round(median(nCount_RNA),0),
                  `Median Number genes` = round(median(nFeature_RNA),0),
                  `Median Mitochondrial counts (Median %)` = round(median(,2), 
                  `Median counts PROT` = round(median(nCount_PROT),0),
                  `Number proteins` = round(median(nFeature_PROT),0)
                  )) %>%
  kable_styling(bootstrap_options = c("striped", "hover"))
donor plate Number of cells Median counts RNA Median Number genes Median Mitochondrial counts (Median %) Median counts PROT Number proteins
D2 P_1586 290 280 106 0.52 7664 72
D2 P_1587 232 266 120 0.99 8492 72
D3 P_1588 254 322 140 0.57 7250 72
D3 P_1589 262 272 116 0.81 8704 72

Table Overview of per donor properties after filtering.

kable( %>% 
        group_by(donor) %>% 
        summarise(`Number of cells` = round(n(),0),
                  `Median counts RNA` = round(median(nCount_RNA),0),
                  `Median Number genes` = round(median(nFeature_RNA),0),
                  `Median Mitochondrial counts (Median %)` = round(median(,2), 
                  `Median counts PROT` = round(median(nCount_PROT),0),
                  `Number proteins` = round(median(nFeature_PROT),0)
                  )) %>%
  kable_styling(bootstrap_options = c("striped", "hover"))
donor Number of cells Median counts RNA Median Number genes Median Mitochondrial counts (Median %) Median counts PROT Number proteins
D2 522 272 112 0.73 7924 72
D3 516 292 126 0.64 7942 72

Save dataset

Seurat object with filtered cells and normalized counts is stored in “output/seu.fix_norm.rds” (intracellular protein modality) and “output/seu.live_norm.rds”(RNA and surface protein modalities).

## Save Seurat objects
saveRDS(seu.RNA_combined.fix, file = "output/seu.fix_norm.rds")
saveRDS(, file = "output/seu.live_norm.rds")

