Setting

library(Gviz)
library(GenomicRanges)
library(GenomicFeatures)
library(rtracklayer)
library(dplyr)
source("~/projects/funcFinemapping/code/run_susie.R")
annoDIR <- "~/cluster/data/features/raw/"

Functions

build_grtrack <- function(curr.locus.gr, 
                          genome="hg19",
                          start.pos = start,
                          end.pos = end
                          ){
    ## match gene ids in TxDb with gene symbols if available
    ## Otherwise, return transcript symbol
    ga.track <- GenomeAxisTrack()
    if(genome == "hg19"){
        #txdb <- TxDb.Hsapiens.UCSC.hg19.knownGene::TxDb.Hsapiens.UCSC.hg19.knownGene
        txdb <- loadDb("~/resources/gencode/gencode_v43lift37.txdb")

    }
    if(genome == "hg38"){
        txdb <- TxDb.Hsapiens.UCSC.hg38.knownGene::TxDb.Hsapiens.UCSC.hg38.knownGene
    }
    
    grtrack <- GeneRegionTrack(range = txdb, 
                               genome = genome, 
                               chromosome = as.character(seqnames(curr.locus.gr)),
                               name = "Genes",
                               collapse = TRUE
                 )
    # subsetting the grtrack to keep all transcripts regardless of whether they have gene annotations
    grtrack <- subset(grtrack,
                      from = start.pos,
                      to = end.pos)
    # match gene IDs with gene symbols
    symbolList <- mapIds(org.Hs.eg.db::org.Hs.eg.db, 
                              keys=sub("\\.\\d+$", "", 
                                       sub("\\_\\d+$", "", 
                                           ranges(grtrack)$gene)), 
                              keytype="ENSEMBL", column="SYMBOL")
    
    # return transcript symbol if no gene symbol available
    symbol(grtrack) <- unlist(lapply(1:length(symbolList), function(i){
      if(is.na(symbolList[[i]])){
        return(symbol(grtrack)[i])
      }else{
        return(symbolList[[i]])
      }
    }))
    return(grtrack)
}



make_track_plots <- function(dsRNA.locus, 
                             extend.size=5000,
                             reverse.strand = FALSE,
                             to.highligh = TRUE,
                             collapse.type = "meta"
                             ){
      
  # build GRanges for each track
  m6a_peaks<-GRanges(seqnames = dsRNA.locus$chr,
                   IRanges(start = dsRNA.locus$start,
                           end = dsRNA.locus$end),
                   strand = dsRNA.locus$strand
                )

  gene_query <-GRanges(seqnames = dsRNA.locus$chr,
                     IRanges(start = min(dsRNA.locus$start, 
                                         dsRNA.locus$end,
                                         dsRNA.locus$DS_start,
                                         dsRNA.locus$DS_end),
                             end = max(dsRNA.locus$start, 
                                         dsRNA.locus$end,
                                         dsRNA.locus$DS_start,
                                         dsRNA.locus$DS_end)),
                     strand = dsRNA.locus$strand
                  )
  
  dsRNA <- GRanges(seqnames = dsRNA.locus$DS_chr,
                   IRanges(start = dsRNA.locus$DS_start,
                           end = dsRNA.locus$DS_end))
  
  ## annotation tracks
  m6a_track <- AnnotationTrack(m6a_peaks, genome = "hg19", name = "m6A")
  dsRNA_track <- AnnotationTrack(dsRNA[1,], genome = "hg19", name = "dsRNA")
  ed_track <- AnnotationTrack(
                        ed_gr[seqnames(ed_gr) == seqnames(m6a_track), ], 
                        genome = "hg19", 
                        name = "RNA_editing",
                        stacking = "dense"
                        )
  RADAR_track <- AnnotationTrack(
                        ed_gr[seqnames(ed_gr) == seqnames(m6a_track) &
                                ed_gr$RADAR == 1, ], 
                        genome = "hg19", 
                        name = "RADAR",
                        stacking = "dense"
                        )

  ## genomic coordinates
  gtrack <- GenomeAxisTrack()
  

  ## gene tracks
  grtrack<-build_grtrack(curr.locus.gr = gene_query,
                            start = start(gene_query),
                            end = end(gene_query),
                            genome = "hg19")
  
  # highlight a particular SNP
  trackList <-list(
                   gtrack,grtrack, 
                   m6a_track, 
                   dsRNA_track, 
                   ed_track,
                   RADAR_track
                   )
  if(to.highligh){
    ht1 <- HighlightTrack(trackList,
                          start = dsRNA.locus$physical.pos, width = 1, 
                          chromosome = as.character(seqnames(gene_query)), 
                          col = 'pink')
    dist <- abs(dsRNA.locus$physical.pos - end(gene_query)) 
    
    if(dist > extend.size){
       print(sprintf("Extend the window size to be longer than %d.", 
                     dist))
       }
  }else{
    ht1 <- trackList
  }
  
  plotTracks(ht1,
             transcriptAnnotation = 'symbol',
             from = start(gene_query) - extend.size,
             to = end(gene_query) + extend.size,
             collapseTranscripts = collapse.type,
             just.group = "right",
             reverseStrand = reverse.strand
             )
}

Load data

## GTEx dataset
# ed_sites <- read.csv(paste0(annoDIR, "RNA_editing/Tan2017/GTEx_avg_RNA_editing_levels_by_tissue.csv"))
# 
# ed_sites[, c("chr", "pos")]<-t(sapply(ed_sites$sites, 
#                                       function(i){strsplit(i, "_")[[1]]}
# ))
## Mai2019 +400 LCLs
ed_sites <- readxl::read_excel(paste0(annoDIR, 
                                    "RNA_editing/Mai2019/RNA_editing_sites_LCLs.xlsx"),
                               skip = 2)

## liftover to hg19
chain.file.path <- "~/resources/genomes/chain_files/hg38ToHg19.over.chain"

Results

The features on the plot from top to bottom are genes, m6a peaks, dsRNA regions, RNA editing sites detected in >400 LCLs and the subset of editing sites that were also found in RADAR database. The red line highlights the position for the top m6A QTL.

TWAS genes with PP4 > 0.5 for blood traits

Specific examples for blood traits

rs3204541 - DDX55

DDX55 has the molecular function of RNA Binding (GO:0003723). DDX55 is found in Nuclear Lumen (GO:0031981).
DDX55 is found in Nucleolus (GO:0005730).
DDX55 is upregulated in 721 B lymphoblasts cells.

EIF2B1 is involved in the biological process T Cell Receptor Signaling Pathway (GO:0050852).
EIF2B1 is involved in the biological process Antigen Receptor-Mediated Signaling Pathway (GO:0050851).
EIF2B1 has the molecular function of GTPase Regulator Activity (GO:0030695).
EIF2B1 has the molecular function of Guanyl-Nucleotide Exchange Factor Activity (GO:0005085).
EIF2B1 has the molecular function of Translation Initiation Factor Activity (GO:0003743).

## 'select()' returned 1:1 mapping between keys and columns

rs1806261 - RABEP1

RABEP1 has the molecular function of Protein Homodimerization Activity (GO:0042803).
RABEP1 is involved in the biological process Golgi To Plasma Membrane Transport (GO:0006893).
RABEP1 is involved in the biological process Membrane Fusion (GO:0061025).
RABEP1 is involved in the biological process Membrane Organization (GO:0061024).
RABEP1 is involved in the biological process post-Golgi Vesicle-Mediated Transport (GO:0006892).
RABEP1 is involved in the biological process Protein Localization To Cell Periphery (GO:1990778).

## 'select()' returned 1:1 mapping between keys and columns

Zoom-in plots for the dsRNA region

## 'select()' returned 1:1 mapping between keys and columns

rs6120719 - HM13

HM13 is found in Cytoplasmic Side Of Endoplasmic Reticulum Membrane (GO:0098554).
HM13 is found in Endoplasmic Reticulum Membrane (GO:0005789).
HM13 has the molecular function of Aspartic-Type Endopeptidase Activity (GO:0004190).
HM13 has the molecular function of Protein Homodimerization Activity (GO:0042803).
HM13 has the molecular function of Ubiquitin Protein Ligase Binding (GO:0031625).
HM13 has the molecular function of Ubiquitin-Like Protein Ligase Binding (GO:0044389).

## 'select()' returned 1:1 mapping between keys and columns

rs3177647 - MAPKAPK5

MAPKAPK5 has the molecular function of MAP Kinase Kinase Activity (GO:0004708).
MAPKAPK5 has the molecular function of Calcium-Dependent Protein Kinase Activity (GO:0010857).
MAPKAPK5 has the molecular function of Calcium-Dependent Protein Serine/Threonine Kinase Activity

## 'select()' returned 1:1 mapping between keys and columns

rs1056879 - PPP2R3C

PPP2R3C is involved in the biological process B Cell Homeostasis (GO:0001782).
PPP2R3C is involved in the biological process T Cell Homeostasis (GO:0043029).
PPP2R3C is involved in the biological process Positive Regulation Of B Cell Activation (GO:0050871).
PPP2R3C is involved in the biological process Positive Regulation Of B Cell Differentiation (GO:0045579).
PPP2R3C is involved in the biological process Positive Regulation Of Lymphocyte Differentiation (GO:0045621).
PPP2R3C is involved in the biological process Regulation Of B Cell Differentiation (GO:0045577).

PRORP is involved in the biological process Mitochondrial tRNA Processing (GO:0090646).
PRORP is involved in the biological process tRNA 5’-End Processing (GO:0099116).
PRORP is involved in the biological process tRNA 5’-Leader Removal (GO:0001682).
PRORP is found in Intracellular Organelle Lumen (GO:0070013).
PRORP is found in Mitochondrial Matrix (GO:0005759).
PRORP has the molecular function of tRNA-specific Ribonuclease Activity (GO:0004549).

## 'select()' returned 1:many mapping between keys and columns