library(Gviz)
library(GenomicRanges)
library(GenomicFeatures)
library(rtracklayer)
library(dplyr)
source("~/projects/funcFinemapping/code/run_susie.R")
annoDIR <- "~/cluster/data/features/raw/"
build_grtrack <- function(curr.locus.gr,
genome="hg19",
start.pos = start,
end.pos = end
){
## match gene ids in TxDb with gene symbols if available
## Otherwise, return transcript symbol
ga.track <- GenomeAxisTrack()
if(genome == "hg19"){
#txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene::TxDb.Hsapiens.UCSC.hg19.knownGene
txdb <- loadDb("~/resources/gencode/gencode_v43lift37.txdb")
}
if(genome == "hg38"){
txdb <- TxDb.Hsapiens.UCSC.hg38.knownGene::TxDb.Hsapiens.UCSC.hg38.knownGene
}
grtrack <- GeneRegionTrack(range = txdb,
genome = genome,
chromosome = as.character(seqnames(curr.locus.gr)),
name = "Genes",
collapse = TRUE
)
# subsetting the grtrack to keep all transcripts regardless of whether they have gene annotations
grtrack <- subset(grtrack,
from = start.pos,
to = end.pos)
# match gene IDs with gene symbols
symbolList <- mapIds(org.Hs.eg.db::org.Hs.eg.db,
keys=sub("\\.\\d+$", "",
sub("\\_\\d+$", "",
ranges(grtrack)$gene)),
keytype="ENSEMBL", column="SYMBOL")
# return transcript symbol if no gene symbol available
symbol(grtrack) <- unlist(lapply(1:length(symbolList), function(i){
if(is.na(symbolList[[i]])){
return(symbol(grtrack)[i])
}else{
return(symbolList[[i]])
}
}))
return(grtrack)
}
make_track_plots <- function(dsRNA.locus,
extend.size=5000,
reverse.strand = FALSE,
to.highligh = TRUE,
collapse.type = "meta"
){
# build GRanges for each track
m6a_peaks<-GRanges(seqnames = dsRNA.locus$chr,
IRanges(start = dsRNA.locus$start,
end = dsRNA.locus$end),
strand = dsRNA.locus$strand
)
gene_query <-GRanges(seqnames = dsRNA.locus$chr,
IRanges(start = min(dsRNA.locus$start,
dsRNA.locus$end,
dsRNA.locus$DS_start,
dsRNA.locus$DS_end),
end = max(dsRNA.locus$start,
dsRNA.locus$end,
dsRNA.locus$DS_start,
dsRNA.locus$DS_end)),
strand = dsRNA.locus$strand
)
dsRNA <- GRanges(seqnames = dsRNA.locus$DS_chr,
IRanges(start = dsRNA.locus$DS_start,
end = dsRNA.locus$DS_end))
## annotation tracks
m6a_track <- AnnotationTrack(m6a_peaks, genome = "hg19", name = "m6A")
dsRNA_track <- AnnotationTrack(dsRNA[1,], genome = "hg19", name = "dsRNA")
ed_track <- AnnotationTrack(
ed_gr[seqnames(ed_gr) == seqnames(m6a_track), ],
genome = "hg19",
name = "RNA_editing",
stacking = "dense"
)
RADAR_track <- AnnotationTrack(
ed_gr[seqnames(ed_gr) == seqnames(m6a_track) &
ed_gr$RADAR == 1, ],
genome = "hg19",
name = "RADAR",
stacking = "dense"
)
## genomic coordinates
gtrack <- GenomeAxisTrack()
## gene tracks
grtrack<-build_grtrack(curr.locus.gr = gene_query,
start = start(gene_query),
end = end(gene_query),
genome = "hg19")
# highlight a particular SNP
trackList <-list(
gtrack,grtrack,
m6a_track,
dsRNA_track,
ed_track,
RADAR_track
)
if(to.highligh){
ht1 <- HighlightTrack(trackList,
start = dsRNA.locus$physical.pos, width = 1,
chromosome = as.character(seqnames(gene_query)),
col = 'pink')
dist <- abs(dsRNA.locus$physical.pos - end(gene_query))
if(dist > extend.size){
print(sprintf("Extend the window size to be longer than %d.",
dist))
}
}else{
ht1 <- trackList
}
plotTracks(ht1,
transcriptAnnotation = 'symbol',
from = start(gene_query) - extend.size,
to = end(gene_query) + extend.size,
collapseTranscripts = collapse.type,
just.group = "right",
reverseStrand = reverse.strand
)
}
## GTEx dataset
# ed_sites <- read.csv(paste0(annoDIR, "RNA_editing/Tan2017/GTEx_avg_RNA_editing_levels_by_tissue.csv"))
#
# ed_sites[, c("chr", "pos")]<-t(sapply(ed_sites$sites,
# function(i){strsplit(i, "_")[[1]]}
# ))
## Mai2019 +400 LCLs
ed_sites <- readxl::read_excel(paste0(annoDIR,
"RNA_editing/Mai2019/RNA_editing_sites_LCLs.xlsx"),
skip = 2)
## liftover to hg19
chain.file.path <- "~/resources/genomes/chain_files/hg38ToHg19.over.chain"
The features on the plot from top to bottom are genes, m6a peaks, dsRNA regions, RNA editing sites detected in >400 LCLs and the subset of editing sites that were also found in RADAR database. The red line highlights the position for the top m6A QTL.
rs3204541 - DDX55
DDX55 has the molecular function of RNA Binding (GO:0003723). DDX55 is found in Nuclear Lumen (GO:0031981).
DDX55 is found in Nucleolus (GO:0005730).
DDX55 is upregulated in 721 B lymphoblasts cells.
EIF2B1 is involved in the biological process T Cell Receptor
Signaling Pathway (GO:0050852).
EIF2B1 is involved in the biological process Antigen Receptor-Mediated
Signaling Pathway (GO:0050851).
EIF2B1 has the molecular function of GTPase Regulator Activity (GO:0030695).
EIF2B1 has the molecular function of Guanyl-Nucleotide Exchange Factor
Activity (GO:0005085).
EIF2B1 has the molecular function of Translation Initiation Factor
Activity (GO:0003743).
## 'select()' returned 1:1 mapping between keys and columns
rs1806261 - RABEP1
RABEP1 has the molecular function of Protein Homodimerization
Activity (GO:0042803).
RABEP1 is involved in the biological process Golgi To Plasma Membrane
Transport (GO:0006893).
RABEP1 is involved in the biological process Membrane Fusion (GO:0061025).
RABEP1 is involved in the biological process Membrane Organization (GO:0061024).
RABEP1 is involved in the biological process post-Golgi Vesicle-Mediated
Transport (GO:0006892).
RABEP1 is involved in the biological process Protein Localization To
Cell Periphery (GO:1990778).
## 'select()' returned 1:1 mapping between keys and columns
Zoom-in plots for the dsRNA region
## 'select()' returned 1:1 mapping between keys and columns
rs6120719 - HM13
HM13 is found in Cytoplasmic Side Of Endoplasmic Reticulum Membrane
(GO:0098554).
HM13 is found in Endoplasmic Reticulum Membrane (GO:0005789).
HM13 has the molecular function of Aspartic-Type Endopeptidase Activity
(GO:0004190).
HM13 has the molecular function of Protein Homodimerization Activity (GO:0042803).
HM13 has the molecular function of Ubiquitin Protein Ligase Binding (GO:0031625).
HM13 has the molecular function of Ubiquitin-Like Protein Ligase Binding
(GO:0044389).
## 'select()' returned 1:1 mapping between keys and columns
rs3177647 - MAPKAPK5
MAPKAPK5 has the molecular function of MAP Kinase Kinase Activity (GO:0004708).
MAPKAPK5 has the molecular function of Calcium-Dependent Protein Kinase
Activity (GO:0010857).
MAPKAPK5 has the molecular function of Calcium-Dependent Protein
Serine/Threonine Kinase Activity
## 'select()' returned 1:1 mapping between keys and columns
rs1056879 - PPP2R3C
PPP2R3C is involved in the biological process B Cell Homeostasis (GO:0001782).
PPP2R3C is involved in the biological process T Cell Homeostasis (GO:0043029).
PPP2R3C is involved in the biological process Positive Regulation Of B
Cell Activation (GO:0050871).
PPP2R3C is involved in the biological process Positive Regulation Of B
Cell Differentiation (GO:0045579).
PPP2R3C is involved in the biological process Positive Regulation Of
Lymphocyte Differentiation (GO:0045621).
PPP2R3C is involved in the biological process Regulation Of B Cell
Differentiation (GO:0045577).
PRORP is involved in the biological process Mitochondrial tRNA
Processing (GO:0090646).
PRORP is involved in the biological process tRNA 5’-End Processing (GO:0099116).
PRORP is involved in the biological process tRNA 5’-Leader Removal (GO:0001682).
PRORP is found in Intracellular Organelle Lumen (GO:0070013).
PRORP is found in Mitochondrial Matrix (GO:0005759).
PRORP has the molecular function of tRNA-specific Ribonuclease Activity
(GO:0004549).
## 'select()' returned 1:many mapping between keys and columns