Making a heatmap using the pheatmap package.

example_file <- ""
data <- read.delim(example_file, header = TRUE, row.names = "gene")
data_subset <- as.matrix(data[rowSums(data)>50000,])

Default heatmap using pheatmap.


cal_z_score <- function(x){
  (x - mean(x)) / sd(x)

data_subset_norm <- t(apply(data_subset, 1, cal_z_score))

Using scale produces the same heatmap as using cal_z_score.

pheatmap(data_subset, scale = "row")

Two heatmaps.

one <- pheatmap(data_subset, silent = TRUE)
two <- pheatmap(data_subset, silent = TRUE)

grid.arrange(grobs = list(one[[4]], two[[4]]))

Reproduce the gene dendrogram.

par(mar = c(3.1, 2.1, 1.1, 5.1))

my_hclust_gene <- hclust(dist(data_subset), method = "complete")
 [1]  2502.208  3771.244  4252.402  4366.211  4700.444  5069.851  5208.367
 [8]  6439.545  6474.863  6938.482  7983.369  8141.632  9198.185  9849.175
[15] 10818.256 10868.066 11127.621 11168.654 12699.557 12871.187 13511.763
[22] 13549.622 14483.876 14856.478 14860.904 15033.046 16304.877 16574.315
[29] 16935.384 17713.534 18798.131 18904.899 20250.185 22302.634 22512.593
[36] 24345.199 29826.722 30846.374 31530.137 31849.145 40048.202 43714.148
[43] 47029.264 48908.962 56038.953 67891.667 74124.247 95015.400
as.dendrogram(my_hclust_gene) %>%
  plot(horiz = TRUE)

Reproduce the sample dendrogram.

my_hclust_sample <- hclust(dist(t(data_subset)), method = "complete")

as.dendrogram(my_hclust_sample) %>%

Add annotations.

my_gene_col <- cutree(tree = as.dendrogram(my_hclust_gene), k = 2)
my_gene_col <- data.frame(cluster = ifelse(test = my_gene_col == 1, yes = "cluster 1", no = "cluster 2"))

my_sample_col <- data.frame(sample = rep(c("tumour", "normal"), c(4,2)))
row.names(my_sample_col) <- colnames(data_subset)

my_random <- as.factor(sample(x = 1:2, size = nrow(my_gene_col), replace = TRUE))
my_gene_col$random <- my_random

pheatmap(data_subset, annotation_row = my_gene_col, annotation_col = my_sample_col)

More clusters.

my_gene_col <- cutree(tree = as.dendrogram(my_hclust_gene), k = 6)
my_gene_col <- data.frame(cluster = paste0("cluster ", my_gene_col), row.names = names(my_gene_col))

my_sample_col <- data.frame(sample = rep(c("tumour", "normal"), c(4,2)))
row.names(my_sample_col) <- colnames(data_subset)

my_random <- as.factor(sample(x = 1:2, size = nrow(my_gene_col), replace = TRUE))
my_gene_col$random <- my_random

pheatmap(data_subset, annotation_row = my_gene_col, annotation_col = my_sample_col)

Change annotation colours and ordering.

my_gene_col <- cutree(tree = as.dendrogram(my_hclust_gene), k = 2)
my_gene_col <- data.frame(cluster = ifelse(test = my_gene_col == 1, yes = "cluster1", no = "cluster2"))

my_sample_col <- data.frame(sample = rep(c("tumour", "normal"), c(4,2)))
row.names(my_sample_col) <- colnames(data_subset)

# change order
my_sample_col$sample <- factor(my_sample_col$sample, levels = c("normal", "tumour"))

my_random <- as.factor(sample(x = c("random1", "random2"), size = nrow(my_gene_col), replace = TRUE))
my_gene_col$random <- my_random

my_colour = list(
    sample = c(normal = "#5977ff", tumour = "#f74747"),
    random = c(random1 = "#82ed82", random2 = "#9e82ed"),
    cluster = c(cluster1 = "#e89829", cluster2 = "#cc4ee0")

p <- pheatmap(data_subset,
              annotation_colors = my_colour,
              annotation_row = my_gene_col,
              annotation_col = my_sample_col,
              cellheight = 7,
              cellwidth = 18)

save_pheatmap_png <- function(x, filename, width=1200, height=1000, res = 150) {
  png(filename, width = width, height = height, res = res)

# not run
# save_pheatmap_png(p, "heatmap_colour.png")

Introduce breaks by cutting the dendrogram.

         annotation_row = my_gene_col,
         annotation_col = my_sample_col,
         cutree_rows = 2,
         cutree_cols = 2)

Dendrogram results from pheatmap().

par(mar = c(3.1, 2.1, 1.1, 5.1))

my_heatmap <- pheatmap(data_subset, silent = TRUE)
[1] "tree_row" "tree_col" "kmeans"   "gtable"  
my_heatmap$tree_row %>%
  as.dendrogram() %>%
  plot(horiz = TRUE)

