library(ggbio)
library(GenomicRanges)
library(tidyverse)
<- function(genename, n = Inf){
gencode_plot <- gencode_gff[ gencode_gff$gene_name == genename]
gff_loc
<- unique(gff_loc$transcript_id)
transcripts <- transcripts[order(transcripts)]
transcripts <- tail(transcripts, n)
transcripts
<- gff_loc[ gff_loc$transcript_id %in% transcripts]
gff_loc
#gff_split <- split(gff_loc, gff_loc$transcript_id)
<- gff_loc[ gff_loc$type %in% c("exon", "UTR")]
gff_loc ggplot() + geom_alignment( gff_loc, aes(group = transcript_id, fill = "GENCODE"), range.geom = "rect", gap.geom = "arrow")+
theme_bw() + scale_fill_manual(values = "black") +
labs(fill = "") #+ theme(legend.text = element_blank())
}
<- function(gene, gwas, qtl, n = Inf, colourby = "PP.H4.abf", legend_pos = "right", min_coloc = 0.5){
junction_plot <- filter(all_coloc, QTL_Gene == gene, QTL == qtl, GWAS == gwas, PP.H4.abf > min_coloc) %>%
junctions_loc ::separate(QTL_junction, into = c("chr", "start", "end"), sep = ":|-", remove = FALSE ) %>%
tidyrmutate(start = as.numeric(start), end = as.numeric(end)) %>%
arrange(desc(PP.H4.abf)) %>%
head(n)
stopifnot(nrow(junctions_loc) > 0)
## Plot a bar
<-
grange_loc GRanges(seqnames = junctions_loc$chr,
ranges = IRanges(start = junctions_loc$start, end = junctions_loc$end),
PP.H4.abf = junctions_loc$PP.H4.abf,
SNP = junctions_loc$QTL_SNP, junction_id = junctions_loc$QTL_junction#,
#QTL_Beta = junctions_loc$QTL_Beta
)
$junction_id <- factor(grange_loc$junction_id, levels = rev(junctions_loc$QTL_junction))
grange_loc
<-
plot ggplot() +
geom_alignment(data = grange_loc, aes_string(group = "junction_id", fill = colourby), colour = NA, range.geom = "rect" ) +
scale_fill_viridis_c(values = c(0,1)) + theme_bw() + theme(legend.position = legend_pos)
return(plot)
}
<- function(Gene, qtl, snp_width = 50, type = "GWAS", n = Inf, min_coloc = 0.5, rs = FALSE){
snp_plot if(type == "GWAS"){
<- filter(all_coloc, QTL_Gene == Gene, QTL == qtl) %>%
snp_loc ::select(SNP = GWAS_SNP, chr = GWAS_chr, end = GWAS_pos, GWAS) %>%
dplyrdistinct() %>%
mutate(label = paste0(GWAS, "\n", SNP) )
#tidyr::separate(QTL_junction, into = c("chr", "start", "end"), sep = ":|-", remove = FALSE ) %>%
#mutate(start = as.numeric(start), end = as.numeric(end)) %>%
#arrange(desc(PP.H4.abf)) %>%
#head(n)
stopifnot(nrow(snp_loc) > 0)
## Plot a bar
<-
grange_loc GRanges(seqnames = snp_loc$chr,
ranges = IRanges(start = snp_loc$end - snp_width, end = snp_loc$end),
SNP = snp_loc$SNP, GWAS = snp_loc$GWAS, label = snp_loc$label )
}if(type == "QTL"){
<- filter(all_coloc, QTL_Gene == Gene, QTL == qtl, PP.H4.abf >= min_coloc) %>%
snp_loc ::select(SNP = QTL_SNP, chr = QTL_chr, end = QTL_pos, QTL_junction, PP.H4.abf) %>%
dplyrdistinct() %>%
arrange(desc(PP.H4.abf)) %>%
mutate(label = paste0(QTL_junction,"\n", SNP) )
#tidyr::separate(QTL_junction, into = c("chr", "start", "end"), sep = ":|-", remove = FALSE ) %>%
#mutate(start = as.numeric(start), end = as.numeric(end)) %>%
#arrange(desc(PP.H4.abf)) %>%
#head(n)
# only keep RS ID SNPs
if(rs == TRUE){
<- snp_loc %>%
snp_loc filter(grepl("rs", SNP ) )
}# if only want top n SNPs
<- snp_loc %>% head(n)
snp_loc
stopifnot(nrow(snp_loc) > 0)
## Plot a bar
<-
grange_loc GRanges(seqnames = snp_loc$chr,
ranges = IRanges(start = snp_loc$end - snp_width, end = snp_loc$end),
SNP = snp_loc$SNP, label = snp_loc$label )
$label <- factor(grange_loc$label, levels = rev(snp_loc$label))
grange_loc
}
#grange_loc$junction_id <- factor(grange_loc$junction_id, levels = rev(junctions_loc$QTL_junction))
<-
plot ggplot() +
geom_alignment(data = grange_loc, aes(group = label, fill = "test"), range.geom = "rect" ) +
theme_bw() + scale_fill_manual(values = "black") +
theme(legend.text = element_blank() ) + labs(fill = "")
#scale_fill_viridis_c() + theme_bw() #+# theme(legend.position = "top")
return(plot)
}
<- read_tsv(here::here("data/QTLs/Nov_2020/results/COLOC/all_COLOC_results_merged_H4_0_no_LD.tsv.gz"))
all_coloc
<- filter(all_coloc, GWAS == "NicolasSuggestive_2018")
all_coloc
#all_coloc <- read_tsv(here::here("data/COLOC/Nicolas_suggestive/all_COLOC_results_merged_H4_0_no_LD.tsv.gz"))
$GWAS <- "Nicolas_2018"
all_coloc
<- filter(all_coloc, PP.H4.abf > 0.5) %>% pull(QTL_Gene)
coloc_hit_genes
<- here::here("ALS_SC/gencode_for_coloc_hits.RData")
gencode_coloc if( !file.exists(gencode_coloc)){
<- rtracklayer::import("~/GENCODE/gencode.v30.annotation.gtf.gz")
gencode_gff <- gencode_gff[ gencode_gff$gene_name %in% coloc_hit_genes ]
gencode_gff save(gencode_gff, file = gencode_coloc )
else{
}load(gencode_coloc)
}
<- function(vcf, ID = NA){
process_vcf if( !is.na(ID) ){
$ID <- ID
vcf
}<- vcf %>%
out select(-CHROM, -POS,-REF, -ALT, -QUAL, -FILTER, -INFO, -FORMAT) %>%
::gather(key = "sample", value = "genotype", -ID) %>%
tidyrmutate(genotype = str_split_fixed(genotype, ":", 2)[,1] ) %>%
mutate(genotype_binary =
case_when(genotype == "0/0" ~ 0,
== "0/1" ~ 1,
genotype == "1/1" ~ 2 ),
genotype sample = gsub("\\.", "-", sample)
)return(out)
}
<- function(junction_bed, gene){
process_junctions <- junction_bed #%>%
sqtl_loc #filter(gid == gene)
<- str_split_fixed(sqtl_loc$ID, ":", 5)[,c(1,2,3)]
junction_ids $junction_id <- apply(junction_ids, MAR = 1, FUN = function(x) { paste0(x[1], ":", x[2], "-", x[3])} )
sqtl_loc
%>%
sqtl_loc select(-Chr, -start, -end, -gid, -strand, -ID ) %>%
::gather(key = "sample", value = "junction_ratio", -junction_id)
tidyr }
<- gencode_plot("ATXN3", 10)
atxn3_gencode
<- junction_plot(gene = "ATXN3", gwas = "Nicolas_2018", qtl = "NYGC_Lumbar_sQTL", n = 3 )
atxn3_junctions
#tracks("GENCODE v30" = atxn3_gencode, "sQTL junctions" = atxn3_junctions, heights = c(4,1))
<- snp_plot(Gene = "ATXN3", qtl = "NYGC_Lumbar_sQTL", type = "QTL", min_coloc = 0.8, n = 1, rs = TRUE)
qtl_snps
<- snp_plot(Gene = "ATXN3", qtl = "NYGC_Lumbar_sQTL", type = "GWAS")
gwas_snps
## POLYQ EXPANSION - hg38 chr14:92071010-92071011
<- GRanges(seqnames = "chr14", ranges = IRanges(start = 92071010, end = 92071011), label = "SCA3 expansion site")
expansion_gr
<-
expansion_plot ggplot() +
geom_alignment(data = expansion_gr, aes(group = label, fill = ""), range.geom = "rect" ) + theme_bw() +
labs(fill = "")
<- tracks("GENCODE v30" = atxn3_gencode, "sQTL junctions" = atxn3_junctions,
atxn3_plot "GWAS" = gwas_snps,
"QTL" = qtl_snps,
"SCA3" = expansion_plot,
heights = c(7,3,1,1,1),
xlim = c(92044496,92106625 )
+
) theme(panel.grid = element_blank(), axis.text = element_text(colour = "black"), axis.ticks = element_line(colour = "black") )
ggsave(atxn3_plot,filename = here::here("ALS_SC/plots/ATXN3_multiplot.pdf"), width = 8, height = 5 )
atxn3_plot
<- gencode_plot("C9orf72", 10)
c9_gencode
<- junction_plot(gene = "C9orf72", gwas = "Nicolas_2018", qtl = "NYGC_Lumbar_sQTL", min_coloc = 0, n = 2 )
c9_junctions
#tracks("GENCODE v30" = c9_gencode, "sQTL junctions" = c9_junctions, heights = c(4,1))
<- snp_plot(Gene = "C9orf72", qtl = "NYGC_Lumbar_sQTL", type = "QTL", min_coloc = 0.7, n = 1, rs = TRUE)
qtl_snps
<- snp_plot(Gene = "C9orf72", qtl = "NYGC_Lumbar_sQTL", type = "GWAS")
gwas_snps
# find C9 HRE region
# chr9:27573528-27573546 (thanks Expansion Hunter!)
<- GRanges(seqnames = "chr9", ranges = IRanges(start = 27573528, end = 27573546), label = "Hexanucleotide repeat")
expansion_gr
<-
expansion_plot ggplot() +
geom_alignment(data = expansion_gr, aes(group = label, fill = ""), range.geom = "rect" ) + theme_bw() +
labs(fill = "")
<- tracks("GENCODE v30" = c9_gencode,
c9_plot "sQTL junctions" = c9_junctions,
"GWAS" = gwas_snps,
"QTL" = qtl_snps,
"SCA3" = expansion_plot,
heights = c(7,2,1,1,1),
xlim = c(27535340,27573866 )
+
) theme(panel.grid = element_blank(), axis.text = element_text(colour = "black"), axis.ticks = element_line(colour = "black") )
c9_plot
ggsave(c9_plot,filename = here::here("ALS_SC/plots/C9orf72_multiplot.pdf"), width = 8, height = 5 )
<- gencode_plot("FNBP1", Inf)
fnbp1_gencode
<- junction_plot(gene = "FNBP1", gwas = "Nicolas_2018", qtl = "NYGC_Lumbar_sQTL", min_coloc = 0.1)
fnbp1_junctions
#tracks("GENCODE v30" = fnbp1_gencode, "sQTL junctions" = fnbp1_junctions, heights = c(4,1))
<- snp_plot(Gene = "FNBP1", qtl = "NYGC_Lumbar_sQTL", type = "QTL", min_coloc = 0.1, rs = TRUE)
qtl_snps
<- snp_plot(Gene = "FNBP1", qtl = "NYGC_Lumbar_sQTL", type = "GWAS")
gwas_snps
<- tracks("GENCODE v30" = fnbp1_gencode,
fnbp1_plot "sQTL junctions" = fnbp1_junctions,
"GWAS" = gwas_snps,
"QTL" = qtl_snps,
heights = c(7,2,1,2)
+
) theme(panel.grid = element_blank(), axis.text = element_text(colour = "black"), axis.ticks = element_line(colour = "black") )
<- filter(all_coloc, QTL_Gene == "FNBP1", GWAS == "Nicolas_2018", QTL == "NYGC_Lumbar_sQTL")
fnbp1_meta
fnbp1_plot
FNBP1 is a sQTL, clearly a cassette exon. GTEx labels it exon 12. This exon sQTL turns up in GTEX (esophagus, colon) but not in brain. A different sQTL which I don’t detect futher upstream. Is inclusion of this exon cell-type specific? Plan: Correlate exon usage with deconvolved proportions. If it’s a cell-type specific effect there should be an association.
<- read_tsv(here::here("data/deconvolution/Mathys_MuSiC_residual_results.tsv"))
mathys_music_resid <- read_tsv( here::here("ALS_SC/FNBP1/LumbarSpinalCord_sample_key.txt"))
sample_key
<- mathys_music_resid %>%
deconv_df select(sample_id = sample, cell, deconv) %>%
pivot_wider(names_from = cell,values_from = deconv ) %>%
left_join(sample_key) %>%
select(sample = participant_id, -sample_id, everything() ) %>%
drop_na()
<- read_tsv( here::here("ALS_SC/FNBP1/CSC_FNBP1_splicing.txt") ) %>%
sqtl_junctions select(-`#Chr`, -start, -end, -ID ) %>%
::gather(key = "sample", value = "junction_ratio")
tidyr
<- process_vcf(vcf = read.table(here::here("ALS_SC/FNBP1/FNBP1_rs11370213.vcf"), header=TRUE), ID = "rs11370213" )
fnbp_genotypes
<- read_tsv(here::here("ALS_SC/FNBP1/LumbarSpinalCord_splicing_peer15.gene.combined_covariates.txt")) %>%
covariates pivot_longer(names_to = "sample", values_to = "value", cols = !ID) %>%
pivot_wider( names_from = "ID",values_from = "value" )
<- inner_join(sqtl_junctions, covariates) %>%
fnbp1_lm column_to_rownames(var = "sample")
#summary( lm(junction_ratio ~ genotype_binary, fnbp1_lm) )
<- resid( lm(junction_ratio ~ ., fnbp1_lm) )
fnbp1_resid
<- fnbp1_lm %>%
fnbp1_all rownames_to_column(var = "sample") %>%
mutate(genotype_binary = fnbp_genotypes$genotype_binary[match(.$sample, fnbp_genotypes$sample)]) %>%
mutate( junction_resid = fnbp1_resid) %>%
left_join(deconv_df, by = "sample")
%>%
fnbp1_all mutate(genotype_binary = factor(genotype_binary)) %>%
ggplot(aes(x = genotype_binary, y = junction_ratio)) + geom_boxplot(aes(group = genotype_binary)) +
geom_jitter(width = 0.2, height = 0) +
::stat_compare_means() ggpubr
%>%
fnbp1_all mutate(genotype_binary = factor(genotype_binary)) %>%
ggplot(aes(x = genotype_binary, y = junction_resid)) + geom_boxplot(aes(group = genotype_binary)) +
geom_jitter(width = 0.2, height = 0) +
::stat_compare_means() ggpubr
%>%
fnbp1_all ggplot(aes(x = Oli, y = junction_resid, colour = factor(genotype_binary) )) + geom_point() +
geom_smooth(method = "lm") + theme_classic()
summary(lm( junction_resid ~ genotype_binary + Oli + Ast + Mic + End + Per + Ex, fnbp1_all ))
##
## Call:
## lm(formula = junction_resid ~ genotype_binary + Oli + Ast + Mic +
## End + Per + Ex, data = fnbp1_all)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.54182 -0.44603 -0.02447 0.45210 1.60061
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1037.1020 687.2382 1.509 0.134
## genotype_binary 0.5874 0.1396 4.206 5.48e-05 ***
## Oli -1037.9273 687.2301 -1.510 0.134
## Ast -1038.2448 687.2575 -1.511 0.134
## Mic -1039.9807 687.4368 -1.513 0.133
## End -1035.1853 686.9735 -1.507 0.135
## Per -1041.3814 687.2181 -1.515 0.133
## Ex -1058.2985 691.3527 -1.531 0.129
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.659 on 105 degrees of freedom
## (66 observations deleted due to missingness)
## Multiple R-squared: 0.1853, Adjusted R-squared: 0.131
## F-statistic: 3.411 on 7 and 105 DF, p-value: 0.002524
FNBP1 sQTL is only significant after regressing covariates
deconvolution only available for 129 out of 179 samples no obvious interaction effect of FNBP1 splicing.
<- gencode_plot("NFASC", 10)
nfasc_gencode
<- junction_plot(gene = "NFASC", gwas = "Nicolas_2018", qtl = "NYGC_Cervical_sQTL", min_coloc = 0.2)
nfasc_junctions
#tracks("GENCODE v30" = nfasc_gencode, "sQTL junctions" = nfasc_junctions, heights = c(4,1))
<- snp_plot(Gene = "NFASC", qtl = "NYGC_Cervical_sQTL", type = "QTL", min_coloc = 0.2, rs = FALSE)
qtl_snps
<- snp_plot(Gene = "NFASC", qtl = "NYGC_Cervical_sQTL", type = "GWAS")
gwas_snps
<- tracks("GENCODE v30" = nfasc_gencode,
nfasc_plot "sQTL junctions" = nfasc_junctions,
"GWAS" = gwas_snps,
"QTL" = qtl_snps,
heights = c(10,2,1,2),
xlim = c(204970000, 205000000)
+
) theme(panel.grid = element_blank(), axis.text = element_text(colour = "black"), axis.ticks = element_line(colour = "black") )
nfasc_plot
NFASC has an orphan intron sQTL - no other introns from its Leafcutter cluster remained for testing. LSC - chr1 204982020-204987418 has high PP4 = 0.77 In CSC - same intron has highest coloc (0.32), but a couple of other introns are kept, showing a complex locus.
An eQTL
<- gencode_plot("GGNBP2", 10)
ggnbp2_gencode
#ggnbp2_junctions <- junction_plot(gene = "GGNBP2", gwas = "Nicolas_2018", qtl = "NYGC_Cervical_eQTL", min_coloc = 0.2)
#tracks("GENCODE v30" = ggnbp2_gencode, "sQTL junctions" = ggnbp2_junctions, heights = c(4,1))
<- snp_plot(Gene = "GGNBP2", qtl = "NYGC_Cervical_eQTL", type = "QTL", min_coloc = 0.2, rs = FALSE)
qtl_snps
<- snp_plot(Gene = "GGNBP2", qtl = "NYGC_Cervical_eQTL", type = "GWAS")
gwas_snps
<- tracks("GENCODE v30" = ggnbp2_gencode,
ggnbp2_plot # "sQTL junctions" = ggnbp2_junctions,
"GWAS" = gwas_snps,
"QTL" = qtl_snps,
heights = c(10,1,2)
+
) theme(panel.grid = element_blank(), axis.text = element_text(colour = "black"), axis.ticks = element_line(colour = "black") )
ggnbp2_plot
<- gencode_plot("ACSL5", 10)
ACSL5_gencode
#ACSL5_junctions <- junction_plot(gene = "ACSL5", gwas = "Nicolas_2018", qtl = "NYGC_Cervical_eQTL", min_coloc = 0.2)
#tracks("GENCODE v30" = ACSL5_gencode, "sQTL junctions" = ACSL5_junctions, heights = c(4,1))
<- snp_plot(Gene = "ACSL5", qtl = "NYGC_Cervical_eQTL", type = "QTL", min_coloc = 0.2, rs = FALSE)
qtl_snps
<- snp_plot(Gene = "ACSL5", qtl = "NYGC_Cervical_eQTL", type = "GWAS")
gwas_snps
<- tracks("GENCODE v30" = ACSL5_gencode,
ACSL5_plot # "sQTL junctions" = ACSL5_junctions,
"GWAS" = gwas_snps,
"QTL" = qtl_snps,
heights = c(10,1,2)
+
) theme(panel.grid = element_blank(), axis.text = element_text(colour = "black"), axis.ticks = element_line(colour = "black") )
ACSL5_plot
ACSL5 is nice - lead GWAS SNP is lead QTL SNP.
Nott data suggests only expressed in microglia - no ChIP or ATAC from any other cell type. However, none of the Microglia QTL data shows colocalisation.