Last updated: 2020-07-08

Script to download single-cell data from the pancreas IMC dataset and format the data as a SingleCellExperiment object


Read-in single-cell data

Here, a subset of single-cell data, corresponding to 100 images from the full dataset is downloaded.

# Download the zipped folder image and unzip it
url.cells <- ("")
download.file(url.cells, destfile = "data/PancreasData/")
unzip("data/PancreasData/", exdir = "data/PancreasData")
[1] TRUE
# Read-in the data
cells <- read.csv("data/PancreasData/CellSubset.csv", stringsAsFactors = FALSE)

# Order the dataset by ImageNumber and ObjectNumber
cells <- cells[order(cells$ImageNumber, cells$ObjectNumber), ]

Read-in Image metadata

# Download the zipped folder image and unzip it
url.image <- ("")
download.file(url.image, destfile = "data/PancreasData/")
unzip("data/PancreasData/", exdir = "data/PancreasData")
[1] TRUE
# Read-in the data
image <- read.csv("data/PancreasData/All_Image.csv", stringsAsFactors = FALSE)

Read-in cell type information

# Download the zipped folder image and unzip it
url.celltypes <- ("")
download.file(url.celltypes, destfile = "data/PancreasData/")
unzip("data/PancreasData/", exdir = "data/PancreasData")
[1] TRUE
# Read-in the data
celltypes <- read.csv("data/PancreasData/CellTypes.csv", stringsAsFactors = FALSE)

Read-in donor information

# Download the zipped folder image and unzip it
url.donors <- ("")
download.file(url.donors, destfile = "data/PancreasData/")
unzip("data/PancreasData/", exdir = "data/PancreasData")
[1] TRUE
# Read-in the data
donors <- read.csv("data/PancreasData/Donors.csv", stringsAsFactors = FALSE)

Load relevant cell-specific metadata

cell.metadata <- DataFrame(ImageNumber = cells$ImageNumber,
                           CellNumber = cells$ObjectNumber,
                           Pos_X = cells$Location_Center_X,
                           Pos_Y = cells$Location_Center_Y,
                           ParentIslet = cells$Parent_Islets,
                           ClosestIslet = cells$Parent_ExpandedIslets,
                           Area = cells$AreaShape_Area,
                           NbNeighbours = cells$Neighbors_NumberOfNeighbors_3)

Load relevant image-specific metadata

image.metadata <- DataFrame(ImageNumber = image$ImageNumber,
                            ImageFullName = image$FileName_CleanStack,
                            slide = image$Metadata_Slide,
                            width = image$Width_CleanStack,
                            height = image$Height_CleanStack)

Merge cell and image metadata

cell.metadata <- merge(cell.metadata, image.metadata, by="ImageNumber")

Add image names

This information is used by cytomapper to match single-cell data with images and masks

cell.metadata$ImageName <- sub("_a0_full_clean.tiff", "", cell.metadata$ImageFullName)

Import cell types

# Add cell ids to cell metadata (format: "ImageName_CellNumber")
cell.metadata$id <- paste(cell.metadata$ImageName, cell.metadata$CellNumber, sep="_")

# Merge cell metadata and cell type information
cell.metadata <- merge(cell.metadata,
                       celltypes[, c("id", "CellCat", "CellType")],

Import donor metadata

cell.metadata <- merge(cell.metadata, donors, by="slide")

Order the cell metadata dataset and add rownames

# Rows are ordered by ImageNumber and CellNumber
cell.metadata <- cell.metadata[order(cell.metadata$ImageNumber, cell.metadata$CellNumber), ]

# Cell ids are used as row names
rownames(cell.metadata) <- cell.metadata$id

Load panel data

The panel contains antibody-related metadata. The channel-mass file is used to match panel information and image stack slices.

# Import panel
url.panel <- ("")
download.file(url.panel, destfile = "data/PancreasData/panel.csv")
panel <- read.csv("data/PancreasData/panel.csv")

# Import channel-mass file
url.channelmass <- ("")
download.file(url.channelmass, destfile = "data/PancreasData/ChannelMass.csv")
channel.mass <- read.csv("data/PancreasData/ChannelMass.csv", header = FALSE)

Select relevant channels and match them with image stack slices

# Match panel and stack slice information
panel <- panel[panel$full == 1,]
panel <- panel[match(channel.mass[,1], panel$MetalTag),]

# Add short protein names as panel rownames
rownames(panel) <- panel$shortname

Load single cell measurements

Here, we import the mean intensity per cell

cur_counts <- cells[, grepl("Intensity_MeanIntensity_CleanStack", colnames(cells))]

Reorder the counts channels (based on channel number)

channelNumber <- as.numeric(sub("^.*_c", "", colnames(cur_counts)))
cur_counts <- cur_counts[, order(channelNumber, decreasing = FALSE)]

Create the SingleCellExperiment (SCE) object

sce <- SingleCellExperiment(assays = list(counts = t(as.matrix(cur_counts))))

Add transformed counts as a new assay

exprs = asinh-transformed counts

assay(sce, "exprs") <- asinh(counts(sce)/1)

Set dimnames

rownames(sce) <- rownames(panel)
colnames(sce) <- rownames(cell.metadata)

Store metadata in the SCE object

colData(sce) <- cell.metadata
rowData(sce) <- panel
class: SingleCellExperiment 
dim: 38 252059 
assays(2): counts exprs
rownames(38): H3 SMA ... Ir191 Ir193
rowData names(15): TubeNb MetalTag ... miCAT2 miCAT
colnames(252059): E02_1 E02_2 ... J34_1149 J34_1150
colData names(26): slide id ... Ethnicity BMI

Save SCE

saveRDS(sce, "data/PancreasData/pancreas_sce.rds")

Delete unneeded CSV files from the extdata directory


R version 4.0.0 (2020-04-24)
Platform: x86_64-apple-darwin17.0 (64-bit)
Running under: macOS Catalina 10.15.5

Matrix products: default
BLAS:   /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRblas.dylib
LAPACK: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRlapack.dylib

[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8

attached base packages:
[1] parallel  stats4    stats     graphics  grDevices utils     datasets 
[8] methods   base     

other attached packages:
 [1] cytomapper_1.1.1            EBImage_4.30.0             
 [3] SingleCellExperiment_1.10.1 SummarizedExperiment_1.18.1
 [5] DelayedArray_0.14.0         matrixStats_0.56.0         
 [7] Biobase_2.48.0              GenomicRanges_1.40.0       
 [9] GenomeInfoDb_1.24.2         IRanges_2.22.2             
[11] S4Vectors_0.26.1            BiocGenerics_0.34.0        
[13] workflowr_1.6.2            

loaded via a namespace (and not attached):
 [1] Rcpp_1.0.4.6           locfit_1.5-9.4         lattice_0.20-41       
 [4] fftwtools_0.9-8        png_0.1-7              rprojroot_1.3-2       
 [7] digest_0.6.25          R6_2.4.1               tiff_0.1-5            
[10] backports_1.1.7        evaluate_0.14          ggplot2_3.3.1         
[13] pillar_1.4.4           zlibbioc_1.34.0        rlang_0.4.6           
[16] whisker_0.4            raster_3.1-5           Matrix_1.2-18         
[19] rmarkdown_2.2          stringr_1.4.0          htmlwidgets_1.5.1     
[22] RCurl_1.98-1.2         munsell_0.5.0          compiler_4.0.0        
[25] httpuv_1.5.4           xfun_0.14              pkgconfig_2.0.3       
[28] htmltools_0.4.0        tidyselect_1.1.0       gridExtra_2.3         
[31] tibble_3.0.1           GenomeInfoDbData_1.2.3 codetools_0.2-16      
[34] viridisLite_0.3.0      crayon_1.3.4           dplyr_1.0.0           
[37] later_1.1.0.1          bitops_1.0-6           grid_4.0.0            
[40] gtable_0.3.0           lifecycle_0.2.0        git2r_0.27.1          
[43] magrittr_1.5           scales_1.1.1           stringi_1.4.6         
[46] XVector_0.28.0         viridis_0.5.1          fs_1.4.1              
[49] promises_1.1.1         sp_1.4-2               generics_0.0.2        
[52] ellipsis_0.3.1         vctrs_0.3.1            RColorBrewer_1.1-2    
[55] tools_4.0.0            glue_1.4.1             purrr_0.3.4           
[58] jpeg_0.1-8.1           abind_1.4-5            yaml_2.2.1            
[61] colorspace_1.4-1       knitr_1.28