Last updated: 2023-07-14

File Version Author Date Message
Rmd 3a5ec3f Dave Tang 2023-07-14 Interactive heatmap
html 7fa8b41 Dave Tang 2023-07-13 Build site.
Rmd 922aa56 Dave Tang 2023-07-13 ARCHS4 heatmap

Prepare data using base R.

  list.files("data/archs4/cancer", pattern = ".csv$", full.names = TRUE),
    cbind(gene = sub("\\.\\w+$", "", basename(x)), read.csv(x))
) |>"rbind", args = _) -> my_df

# Split `id` column."rbind", strsplit(x = my_df$id, split = "\\.")) |> -> id_split

colnames(id_split) <- c('root', 'system', 'organ', 'tissue')

# Rename tissues.
cap_first <- function(x){
  s <- strsplit(x, "")[[1]][1]
  return(sub(s, toupper(s), x))

id_split$tissue <- tolower(id_split$tissue)
id_split$tissue <- sapply(id_split$tissue, cap_first)

my_df <- cbind(my_df, id_split)

# Order `my_df` by system.
my_df <- my_df[order(my_df$gene, my_df$system), ]
my_df$tissue <- factor(my_df$tissue, levels = unique(my_df$tissue))

    gene                                                id      min       q1
12 CCND1          System.Cardiovascular System.Heart.VALVE 10.62560 11.68490
28 CCND1          System.Cardiovascular System.Heart.HEART  5.87724 10.15820
30 CCND1      System.Cardiovascular System.Heart.VENTRICLE  9.54469 10.37180
36 CCND1         System.Cardiovascular System.Heart.ATRIUM  8.44515  9.67321
5  CCND1          System.Connective Tissue.Bone.OSTEOBLAST 11.30840 12.09570
18 CCND1 System.Connective Tissue.Adipose tissue.ADIPOCYTE  8.38312 10.48580
    median      q3     max   root                system          organ
12 12.0648 12.5311 13.7986 System Cardiovascular System          Heart
28 10.9207 11.5210 12.8617 System Cardiovascular System          Heart
30 10.8446 11.2841 11.9118 System Cardiovascular System          Heart
36 10.5234 11.0560 11.4873 System Cardiovascular System          Heart
5  12.6214 13.2789 14.0211 System     Connective Tissue           Bone
18 11.7684 12.7769 14.1867 System     Connective Tissue Adipose tissue
12      Valve
28      Heart
30  Ventricle
36     Atrium
5  Osteoblast
18  Adipocyte

Back to wide format.

my_df |>
  dplyr::select(gene, median, tissue) |>
  tidyr::pivot_wider(names_from = tissue, values_from = median) -> my_df_wide

Convert to matrix and plot.

my_mat <- as.matrix(my_df_wide[, -1])
row.names(my_mat) <- my_df_wide$gene


Create sample annotation.

my_order <- colnames(my_mat)

my_df |>
  dplyr::select(system, tissue) |>
  dplyr::distinct() |>
  dplyr::arrange(match(tissue, my_order)) |>
  dplyr::select(-tissue) -> sample_anno

row.names(sample_anno) <- my_order
Valve      Cardiovascular System
Heart      Cardiovascular System
Ventricle  Cardiovascular System
Atrium     Cardiovascular System
Osteoblast     Connective Tissue
Adipocyte      Connective Tissue

Heatmap with system annotation.

pheatmap(my_mat, annotation_col = sample_anno)

Interactive heatmap.

  z = my_mat,
  colors = colorRamp(c("green", "red")),
  type = "heatmap"
ValveVentricleOsteoblastStromal cellChondrocyteHepatic stellate cellEsophagusColonHepatocyteBeta cellPancreatic isletKupffer cellMacrophageBlymphocyteTlymphocyteThymusThymocytePlasma cellHair follicleSkinBasal cellVascular smooth muscleMyoblastMedullaRetinaSpinal cordHypothalamusMotor neuronMidbrainSensory neuronLung epithelial cellLungMammary glandRenal cortexOvaryGranulosaCCND1ERBB2FGFR1GATA3MYCPIK3CAPTENTP53

R version 4.3.0 (2023-04-21)
Platform: x86_64-pc-linux-gnu (64-bit)
Running under: Ubuntu 22.04.2 LTS

Matrix products: default
BLAS:   /usr/lib/x86_64-linux-gnu/openblas-pthread/ 
LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/;  LAPACK version 3.10.0

 [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
 [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
 [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
 [9] LC_ADDRESS=C               LC_TELEPHONE=C            

time zone: Etc/UTC
tzcode source: system (glibc)

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
[1] plotly_4.10.2   ggplot2_3.4.2   pheatmap_1.0.12 workflowr_1.7.0

loaded via a namespace (and not attached):
 [1] tidyr_1.3.0        sass_0.4.5         utf8_1.2.3         generics_0.1.3    
 [5] stringi_1.7.12     digest_0.6.31      magrittr_2.0.3     evaluate_0.20     
 [9] grid_4.3.0         RColorBrewer_1.1-3 fastmap_1.1.1      rprojroot_2.0.3   
[13] jsonlite_1.8.5     processx_3.8.1     whisker_0.4.1      ps_1.7.5          
[17] promises_1.2.0.1   httr_1.4.5         purrr_1.0.1        fansi_1.0.4       
[21] crosstalk_1.2.0    viridisLite_0.4.1  scales_1.2.1       lazyeval_0.2.2    
[25] jquerylib_0.1.4    cli_3.6.1          rlang_1.1.0        ellipsis_0.3.2    
[29] munsell_0.5.0      withr_2.5.0        cachem_1.0.7       yaml_2.3.7        
[33] tools_4.3.0        dplyr_1.1.2        colorspace_2.1-0   httpuv_1.6.9      
[37] vctrs_0.6.2        R6_2.5.1           lifecycle_1.0.3    git2r_0.32.0      
[41] stringr_1.5.0      htmlwidgets_1.6.2  fs_1.6.2           pkgconfig_2.0.3   
[45] callr_3.7.3        pillar_1.9.0       bslib_0.4.2        later_1.3.0       
[49] gtable_0.3.3       data.table_1.14.8  glue_1.6.2         Rcpp_1.0.10       
[53] highr_0.10         xfun_0.39          tibble_3.2.1       tidyselect_1.2.0  
[57] rstudioapi_0.14    knitr_1.42         farver_2.1.1       htmltools_0.5.5   
[61] rmarkdown_2.21     compiler_4.3.0     getPass_0.2-2