source("code/plot_QC_function.R") #change settings in script to get different layout per subpanel for this figure
dir.create("output/paper_figures") # location where paper figures are stored


The QuRIE-seq data contains single-cell transcriptomic and proteomic data of BJAB cells with 9 different durations of aIg stimulation, and 3 additional timepoints with ibrutinib drug inhibition. Script below extracts all metadata (sequencing sample names, adds metadata info per sample (prot or RNA library)), and reads all data tables into R. The metadata table is saved in output folder: “output/metadata.csv”

For QC and filtering, cells with matching RNA and protein information are used to create a Seurat object (settings RNA: min.cells = 100, min.features = 100; proteins added as additional modality PROT). Several QC stats are calculated, and the object is saved in: “output/seu_combined_raw.rds”

myfiles <- list.files(path="output/", pattern = ".rds$")
## only read all raw files and create raw combined table if not done yet. Speeds up generation of html file
if("seu_combined_raw.rds" %in%  myfiles){seu_combined <- readRDS("output/seu_combined_raw.rds")} else { 

Before QC dataset properties

Seurat object:

An object of class Seurat 
8452 features across 7449 samples within 2 assays 
Active assay: RNA (8372 features, 0 variable features)
 1 other assay present: PROT

Table Overview of per sample properties.

kable( %>% 
        group_by(condition) %>% 
        summarise(`Total number of cells` = round(n(),0),
                  `Median counts RNA` = round(median(nCount_RNA),0),
                  `Median Number genes` = round(median(nFeature_RNA),0),
                  `Median Mitochondrial counts (Median %)` = round(median(,2), 
                  `Ribosomal counts (Median %)` = round(median(percent.rb),2),
                  `Median counts PROT` = round(median(nCount_PROT),0),
                  `Number proteins` = round(median(nFeature_PROT),0)
                  )) %>%
  kable_styling(bootstrap_options = c("striped", "hover"))
condition Total number of cells Median counts RNA Median Number genes Median Mitochondrial counts (Median %) Ribosomal counts (Median %) Median counts PROT Number proteins
000.aIg.contr 688 1364 947 7.32 6.76 1108 61
002.aIg.contr 958 772 585 7.81 6.83 984 59
004.aIg.contr 545 829 619 8.04 6.06 1035 59
006.aIg.contr 820 1047 762 8.37 6.93 720 54
006.aIg.ibr 1148 459 354 11.14 6.10 756 57
060.aIg.contr 879 636 489 8.84 6.71 891 57
180.aIg.contr 1121 710 548 7.43 6.85 867 57
180.aIg.ibr 1290 735 572 6.37 6.97 808 57

Table Overview of full dataset properties.

kable( %>% 
        summarise(`Number of cells` = round(n(),0),
          `Median counts RNA` = round(median(nCount_RNA),0),
                  `Median Number genes` = round(median(nFeature_RNA),0),
                  `Median Mitochondrial counts (Median %)` = round(median(,2), 
                  `Ribosomal counts (Median %)` = round(median(percent.rb),2),
                  `Median counts PROT` = round(median(nCount_PROT),0),
                  `Number proteins` = round(median(nFeature_PROT),0)
                  ) %>%
        t()) %>%

  kable_styling(bootstrap_options = c("striped", "hover"))
Number of cells 7449.00
Median counts RNA 727.00
Median Number genes 556.00
Median Mitochondrial counts (Median %) 8.05
Ribosomal counts (Median %) 6.70
Median counts PROT 863.00
Number proteins 57.00

Quality Control & filtering

plot_RNA_nCount <- plot_QC_paper(seu_object = seu_combined, 
                                 feature = "nCount_RNA", 
                                 ytext = "Total UMI counts per cell",
                                 xtext = "Time aIg stimulation (minutes)",
                                 paneltitle = "Keep cells < 4000 RNA counts",
                                 colorviolin = "dodgerblue2" ) + 
                        geom_hline(yintercept = 4000, size = 0.3) +
                        theme(axis.title.x = element_blank())

plot_RNA_ngenes <- plot_QC_paper(seu_object = seu_combined, 
                                 feature = "nFeature_RNA", 
                                 ytext = "Total genes per cell",
                                 xtext = "Time aIg stimulation (minutes)",
                                 paneltitle = "Keep cells >150 genes",
                                 colorviolin = "dodgerblue2" ) + 
                        geom_hline(yintercept = 150, size = 0.3) +
                        theme(axis.title.x = element_blank()) <- plot_QC_paper(seu_object = seu_combined, 
                                 feature = "", 
                                 ytext = "% Mitochondrial counts",
                                 xtext = "Time aIg stimulation (minutes)",
                                 paneltitle = "Keep cells < 15 % mitochondrial genecounts",
                                 colorviolin = "dodgerblue2" ) +
                        geom_hline(yintercept = 15, color = "black", size = 0.3) +
                        theme(axis.title.x = element_blank())

plot_percent.rb <- plot_QC_paper(seu_object = seu_combined, 
                                 feature = "percent.rb", 
                                 ytext = "% Ribosomal counts",
                                 xtext = "Time aIg stimulation (minutes)",
                                 paneltitle = "Stable % ribosomal counts over time",
                                 colorviolin = "dodgerblue2" ) +
                        theme(axis.title.x = element_blank())
plot_PROT_nCount <- plot_QC_paper(seu_object = seu_combined, 
                                 feature = "nCount_PROT", 
                                 ytext =  "Total UMI counts per cell",
                                 xtext = "Time aIg stimulation (minutes)",
                                 paneltitle = "Keep cells < 3000 PROT counts",
                                 colorviolin = "deeppink3" ) +
                        geom_hline(yintercept = 3000, size = 0.3) +
                        theme(axis.title.x = element_blank())

plot_PROT_nproteins <- plot_QC_paper(seu_object = seu_combined, 
                                 feature = "nFeature_PROT", 
                                 ytext =  "Total proteins per cell",
                                 xtext = "Time aIg stimulation (minutes)",
                                 paneltitle = "Keep cells >45 proteins",
                                 colorviolin = "deeppink3" ) +
                        geom_hline(yintercept = 45, size = 0.3) 

plot_percent.H3 <- plot_QC_paper(seu_object = seu_combined, 
                                 feature = "percent.HisH3", 
                                 ytext =   "% Histone H3 counts",
                                 xtext = "Time aIg stimulation (minutes)",
                                 paneltitle = "Variation in % Histone H3 counts",
                                 colorviolin = "deeppink3" ) 
plot.QC <- plot_grid(plot_RNA_nCount, plot_RNA_ngenes,, plot_percent.rb,plot_PROT_nCount,plot_PROT_nproteins, plot_percent.H3, labels = c('a', 'b', 'c','d' , 'e', 'f', 'g'), label_size = 10, ncol = 2)

ggsave(plot.QC, filename = "output/paper_figures/Suppl_QC_filters.pdf", width = 183, height = 200, units = "mm",  dpi = 300, useDingbats = FALSE)
ggsave(plot.QC, filename = "output/paper_figures/Suppl_QC_filters.png", width = 183, height = 200, units = "mm",  dpi = 300)

Supplementary Figure Thresholds for selection of high-quality samples and cells from the QuRIE-seq datasets.

Based on the indicated cut-offs, high-quality cells are filtered for further analysis.

seu_combined_filtered <- subset(seu_combined, subset = nFeature_RNA > 150 & nCount_RNA < 4000 & nFeature_PROT > 45 & nCount_PROT < 3000 & < 15)

Normalize and scale

# run sctransform with default settings.
seu_combined_filtered <- SCTransform(seu_combined_filtered,
                            assay = "RNA",
                   = "SCT.RNA",
                            do.correct.umi = TRUE,
                            ncells = NULL,
                            variable.features.n = 3000,
                   = c("", "nCount_RNA"), # substantial variation between samples & cells in mito and ncount
                            do.scale = FALSE,
                   = TRUE,
                            conserve.memory = FALSE,
                            return.only.var.genes = FALSE,
                            seed.use = 42,
                            verbose = FALSE
# Add some metadata to normalized data (ncounts & percent mt)
seu_combined_filtered <- AddMetaData(seu_combined_filtered,$SCT.RNA@counts) %>% summarise_all(funs(sum)) %>% unlist(), = "nCount_RNA_SCT")

seu_combined_filtered <- PercentageFeatureSet(seu_combined_filtered, pattern = "^MT\\.|^MTRN", = "", assay = "SCT.RNA")

## cell cycle scoring metadata
s.genes <- cc.genes$s.genes
g2m.genes <- cc.genes$g2m.genes
seu_combined_filtered <- CellCycleScoring(seu_combined_filtered, s.features = s.genes, g2m.features = g2m.genes, set.ident = FALSE, assay = "SCT.RNA")

seu_combined_filtered[["S.score"]] <-$S.Score
seu_combined_filtered[["G2M.score"]] <-$G2M.Score
seu_combined_filtered[["CCphase"]] <-$Phase
all.prot <- rownames(seu_combined_filtered[["PROT"]])

seu_combined_filtered <- NormalizeData(seu_combined_filtered, assay = "PROT", normalization.method = "CLR", verbose = FALSE)

seu_combined_filtered <- ScaleData(seu_combined_filtered, assay = "PROT", features = all.prot, = c("nFeature_PROT","nCount_PROT", "percent.HisH3"))
Regressing out nFeature_PROT, nCount_PROT, percent.HisH3
Centering and scaling data matrix

Seurat object with filtered cells and normalized counts is stored in “output/seu_combined_filtered_normalized.rds”

saveRDS(seu_combined_filtered, "output/seu_combined_filtered_normalized.rds")

Subset of samples

The manuscript describes two analysis of different collection of samples:
* Effect of aIg stimulation over two time-scales (see MOFA aIg page) * Effect of ibrutinib on the cell-state at these two timescales (see MOFA ibru page)

seu_combined_aIg_selected <- subset(seu_combined_filtered, idents = c("006.aIg.ibr", "180.aIg.ibr"), invert = TRUE)

saveRDS(seu_combined_aIg_selected, "output/seu_aIG_samples.rds")
seu_combined_ibru_selected <- subset(seu_combined_filtered, idents = c("002.aIg.contr","004.aIg.contr","060.aIg.contr"), invert = TRUE)

saveRDS(seu_combined_ibru_selected, "output/seu_ibru_samples.rds")

Filtered dataset properties

Overview of the number of cells and data properties of all samples, aIg subset of samples, or ibrutinib subset of samples.

Full dataset

An object of class Seurat 
16824 features across 6952 samples within 3 assays 
Active assay: SCT.RNA (8372 features, 3000 variable features)
 2 other assays present: RNA, PROT

Table Overview of per sample properties after filtering

kable( %>% 
        group_by(condition) %>% 
        summarise(`Number of cells` = round(n(),0),
                  `Median counts RNA` = round(median(nCount_RNA),0),
                  `Median Number genes` = round(median(nFeature_RNA),0),
                  `Median Mitochondrial counts (Median %)` = round(median(,2), 
                  `Ribosomal counts (Median %)` = round(median(percent.rb),2),
                  `Median counts PROT` = round(median(nCount_PROT),0),
                  `Number proteins` = round(median(nFeature_PROT),0)
                  )) %>%
  kable_styling(bootstrap_options = c("striped", "hover"))
condition Number of cells Median counts RNA Median Number genes Median Mitochondrial counts (Median %) Ribosomal counts (Median %) Median counts PROT Number proteins
000.aIg.contr 648 1393 964 7.18 6.78 1108 60
002.aIg.contr 923 783 599 7.69 6.86 993 59
004.aIg.contr 508 863 640 7.85 6.11 1042 59
006.aIg.contr 713 1087 794 8.13 7.03 749 54
006.aIg.ibr 943 493 386 10.46 6.05 777 57
060.aIg.contr 863 638 492 8.81 6.72 894 57
180.aIg.contr 1099 711 548 7.42 6.85 871 57
180.aIg.ibr 1255 732 571 6.37 6.96 810 57

Table Overview of full filtered dataset properties.

kable( %>% 
        summarise(`Number of cells` = round(n(),0),
                  `Median counts RNA` = round(median(nCount_RNA),0),
                  `Median Number genes` = round(median(nFeature_RNA),0),
                  `Median Mitochondrial counts (Median %)` = round(median(,2), 
                  `Ribosomal counts (Median %)` = round(median(percent.rb),2),
                  `Median counts PROT` = round(median(nCount_PROT),0),
                  `Number proteins` = round(median(nFeature_PROT),0)
                  ) %>%
        t()) %>%

  kable_styling(bootstrap_options = c("striped", "hover"))
Number of cells 6952.00
Median counts RNA 741.00
Median Number genes 566.00
Median Mitochondrial counts (Median %) 7.90
Ribosomal counts (Median %) 6.71
Median counts PROT 876.00
Number proteins 58.00

aIg samples

An object of class Seurat 
16824 features across 4754 samples within 3 assays 
Active assay: SCT.RNA (8372 features, 3000 variable features)
 2 other assays present: RNA, PROT

Table Overview of aIg dataset properties per sample

kable( %>% 
        group_by(condition) %>% 
        summarise(`Number of cells` = round(n(),0),
                  `Median counts RNA` = round(median(nCount_RNA),0),
                  `Median Number genes` = round(median(nFeature_RNA),0),
                  `Median Mitochondrial counts (Median %)` = round(median(,2), 
                  `Ribosomal counts (Median %)` = round(median(percent.rb),2),
                  `Median counts PROT` = round(median(nCount_PROT),0),
                  `Number proteins` = round(median(nFeature_PROT),0)
                  )) %>%
  kable_styling(bootstrap_options = c("striped", "hover"))
condition Number of cells Median counts RNA Median Number genes Median Mitochondrial counts (Median %) Ribosomal counts (Median %) Median counts PROT Number proteins
000.aIg.contr 648 1393 964 7.18 6.78 1108 60
002.aIg.contr 923 783 599 7.69 6.86 993 59
004.aIg.contr 508 863 640 7.85 6.11 1042 59
006.aIg.contr 713 1087 794 8.13 7.03 749 54
060.aIg.contr 863 638 492 8.81 6.72 894 57
180.aIg.contr 1099 711 548 7.42 6.85 871 57

Table Overview of aIg dataset properties.

kable( %>% 
        summarise(`Number of cells` = round(n(),0),
                  `Median counts RNA` = round(median(nCount_RNA),0),
                  `Median Number genes` = round(median(nFeature_RNA),0),
                  `Median Mitochondrial counts (Median %)` = round(median(,2), 
                  `Ribosomal counts (Median %)` = round(median(percent.rb),2),
                  `Median counts PROT` = round(median(nCount_PROT),0),
                  `Number proteins` = round(median(nFeature_PROT),0)
                  ) %>%
        t()) %>%

  kable_styling(bootstrap_options = c("striped", "hover"))
Number of cells 4754.00
Median counts RNA 808.00
Median Number genes 608.00
Median Mitochondrial counts (Median %) 7.86
Ribosomal counts (Median %) 6.77
Median counts PROT 917.00
Number proteins 58.00

aIg + ibrutinib samples

An object of class Seurat 
16824 features across 4658 samples within 3 assays 
Active assay: SCT.RNA (8372 features, 3000 variable features)
 2 other assays present: RNA, PROT

Table Overview of ibru dataset properties per sample

kable( %>% 
        group_by(condition) %>% 
        summarise(`Number of cells` = round(n(),0),
                  `Median counts RNA` = round(median(nCount_RNA),0),
                  `Median Number genes` = round(median(nFeature_RNA),0),
                  `Median Mitochondrial counts (Median %)` = round(median(,2), 
                  `Ribosomal counts (Median %)` = round(median(percent.rb),2),
                  `Median counts PROT` = round(median(nCount_PROT),0),
                  `Number proteins` = round(median(nFeature_PROT),0)
                  )) %>%
  kable_styling(bootstrap_options = c("striped", "hover"))
condition Number of cells Median counts RNA Median Number genes Median Mitochondrial counts (Median %) Ribosomal counts (Median %) Median counts PROT Number proteins
000.aIg.contr 648 1393 964 7.18 6.78 1108 60
006.aIg.contr 713 1087 794 8.13 7.03 749 54
006.aIg.ibr 943 493 386 10.46 6.05 777 57
180.aIg.contr 1099 711 548 7.42 6.85 871 57
180.aIg.ibr 1255 732 571 6.37 6.96 810 57

Table Overview of ibru dataset properties.

kable( %>% 
        summarise(`Number of cells` = round(n(),0),
                  `Median counts RNA` = round(median(nCount_RNA),0),
                  `Median Number genes` = round(median(nFeature_RNA),0),
                  `Median Mitochondrial counts (Median %)` = round(median(,2), 
                  `Ribosomal counts (Median %)` = round(median(percent.rb),2),
                  `Median counts PROT` = round(median(nCount_PROT),0),
                  `Number proteins` = round(median(nFeature_PROT),0)
                  ) %>%
        t()) %>%

  kable_styling(bootstrap_options = c("striped", "hover"))
Number of cells 4658.00
Median counts RNA 752.00
Median Number genes 574.00
Median Mitochondrial counts (Median %) 7.75
Ribosomal counts (Median %) 6.74
Median counts PROT 840.00
Number proteins 57.00

