<- list.files(here::here("data/QTLs/Nov_2020/input/"), pattern = "sample_key.txt", full.names = TRUE)
tissue_files <- gsub("_sample_key.txt", "", basename(tissue_files))
tissue_names <- tissue_files %>%
tissue_keys map(read_tsv, col_types = "cc" )
<- map_dbl(tissue_keys, nrow )
tissue_n
<- tibble( tissue = tissue_names, n = tissue_n) tissue_metadata
First, how does altering PEER factors change the number of eGenes?
<- list.files(here::here("data/QTLs/Nov_2020/results/normal_qtls"), pattern = "cis_qtl.txt.gz", full.names = TRUE)
sig_res
<- here::here("data/QTLs/Nov_2020/results/normal_qtls/all_tissues_sig_results.RData")
sig_res_out
if (rerun){
<- map(sig_res, ~{read_tsv(.x) %>% filter(qval < 0.05) })
sig_res_files save(sig_res_files,file = sig_res_out)
else{
}load(sig_res_out)
}
<- map_dbl(sig_res_files, nrow )
sig_res_n
# for sQTL results grouped by cluster
<- map_dbl(sig_res_files, ~{
sig_res_n_gene if("group_id" %in% names(.x) ){
$gene <- str_split_fixed(.x$group_id, ":", 2)[,1]
.xlength(unique(.x$gene) )
else{
}return(nrow(.x))
} })
<- gsub(".cis_qtl.txt.gz", "", basename(sig_res))
sig_res_info
<-
sig_res_df ::str_split_fixed(sig_res_info, pattern = "_", n = 4) %>%
stringras.data.frame( stringsAsFactors = FALSE ) %>%
rename(tissue = V1, type = V2, n_peer = V3, group = V4) %>%
mutate(group = ifelse(group == "", "gene", group)) %>%
::unite( col = "type", c(type, group)) %>%
tidyrmutate(n_peer = as.numeric(gsub("peer", "", n_peer)) ) %>%
mutate(n_QTL = sig_res_n) %>%
mutate(file = basename(sig_res) ) %>%
mutate(n_genes = sig_res_n_gene)
names(sig_res_files) <- sig_res_df$file
<- sig_res_df %>%
to_plot left_join( tissue_metadata, by = "tissue") %>%
filter(type %in% c("expression_gene", "splicing_cluster"))
<- arrange(tissue_metadata, desc(n)) %>% pull(tissue)
size_order
<-
max_n_per_dataset %>%
to_plot split( paste(.$tissue, .$type) ) %>%
map_df( ~{ .x %>% arrange(desc(n_genes)) %>% head(1) })
<-
max_n_per_dataset_plot %>%
max_n_per_dataset mutate(tissue = forcats::fct_relevel(tissue, size_order)) %>%
ggplot(aes( x = n, y =n_genes, colour = tissue )) +
geom_point() +
geom_text_repel(aes(label = tissue)) +
xlab("Number of individuals") +
ylab("Number of genes with associations") +
scale_y_log10()+
theme_jh() +
facet_wrap(~type) +
guides(colour = FALSE)
max_n_per_dataset_plot
%>%
max_n_per_dataset mutate(type = ifelse(type == "expression_gene", "eGenes", "sGenes")) %>%
select(tissue, type, n, n_genes ) %>%
pivot_wider(names_from = type, values_from = n_genes) %>%
mutate(tissue = snakecase::to_any_case(tissue, case = "title", sep_out = " ") ) %>%
gt()
tissue | n | eGenes | sGenes |
---|---|---|---|
Cervical Spinal Cord | 216 | 7890 | 4730 |
Lumbar Spinal Cord | 197 | 6751 | 4302 |
Thoracic Spinal Cord | 68 | 962 | 1387 |
<-
n_per_PEER_plot %>%
to_plot mutate(tissue = forcats::fct_relevel(tissue, size_order)) %>%
mutate(type = ifelse(type == "expression_gene", "eQTL", "sQTL")) %>%
ggplot(aes( y = n_genes, x = n_peer, group = tissue)) +
geom_point(aes(colour = tissue)) +
geom_line(aes(colour = tissue), show.legend=FALSE) +
geom_text(aes(label = n_genes), nudge_y = 0.025, show.legend=FALSE, size = 3 ) +
scale_y_log10()+
theme_jh() +
#geom_text_repel(aes(x = 30, label = tissue)) +
xlab("Number of PEER factors") +
ylab("Number of QTLs") +
facet_wrap(~type) +
theme(legend.position = "bottom") +
scale_colour_brewer(type = "qual")
n_per_PEER_plot
ggsave(plot = n_per_PEER_plot, file = here::here("ALS_SC/plots/QTL_PEER_plot.pdf"), width =7, height = 5)
PEER factors: 30 for Cervical and Lumbar expression 10 for Thoracic expression 15 for Cervical splicing 10 for Lumbar splicing 5 for Thoracic splicing
names(sig_res_files)
## [1] "CervicalSpinalCord_expression_peer0_gene.cis_qtl.txt.gz"
## [2] "CervicalSpinalCord_expression_peer10_gene.cis_qtl.txt.gz"
## [3] "CervicalSpinalCord_expression_peer15_gene.cis_qtl.txt.gz"
## [4] "CervicalSpinalCord_expression_peer20_gene.cis_qtl.txt.gz"
## [5] "CervicalSpinalCord_expression_peer25_gene.cis_qtl.txt.gz"
## [6] "CervicalSpinalCord_expression_peer30_gene.cis_qtl.txt.gz"
## [7] "CervicalSpinalCord_expression_peer5_gene.cis_qtl.txt.gz"
## [8] "CervicalSpinalCord_splicing_peer0_cluster.cis_qtl.txt.gz"
## [9] "CervicalSpinalCord_splicing_peer10_cluster.cis_qtl.txt.gz"
## [10] "CervicalSpinalCord_splicing_peer15_cluster.cis_qtl.txt.gz"
## [11] "CervicalSpinalCord_splicing_peer20_cluster.cis_qtl.txt.gz"
## [12] "CervicalSpinalCord_splicing_peer25_cluster.cis_qtl.txt.gz"
## [13] "CervicalSpinalCord_splicing_peer30_cluster.cis_qtl.txt.gz"
## [14] "CervicalSpinalCord_splicing_peer5_cluster.cis_qtl.txt.gz"
## [15] "LumbarSpinalCord_expression_peer0_gene.cis_qtl.txt.gz"
## [16] "LumbarSpinalCord_expression_peer10_gene.cis_qtl.txt.gz"
## [17] "LumbarSpinalCord_expression_peer15_gene.cis_qtl.txt.gz"
## [18] "LumbarSpinalCord_expression_peer20_gene.cis_qtl.txt.gz"
## [19] "LumbarSpinalCord_expression_peer25_gene.cis_qtl.txt.gz"
## [20] "LumbarSpinalCord_expression_peer30_gene.cis_qtl.txt.gz"
## [21] "LumbarSpinalCord_expression_peer5_gene.cis_qtl.txt.gz"
## [22] "LumbarSpinalCord_splicing_peer0_cluster.cis_qtl.txt.gz"
## [23] "LumbarSpinalCord_splicing_peer10_cluster.cis_qtl.txt.gz"
## [24] "LumbarSpinalCord_splicing_peer15_cluster.cis_qtl.txt.gz"
## [25] "LumbarSpinalCord_splicing_peer20_cluster.cis_qtl.txt.gz"
## [26] "LumbarSpinalCord_splicing_peer25_cluster.cis_qtl.txt.gz"
## [27] "LumbarSpinalCord_splicing_peer30_cluster.cis_qtl.txt.gz"
## [28] "LumbarSpinalCord_splicing_peer5_cluster.cis_qtl.txt.gz"
## [29] "ThoracicSpinalCord_expression_peer0_gene.cis_qtl.txt.gz"
## [30] "ThoracicSpinalCord_expression_peer10_gene.cis_qtl.txt.gz"
## [31] "ThoracicSpinalCord_expression_peer15_gene.cis_qtl.txt.gz"
## [32] "ThoracicSpinalCord_expression_peer20_gene.cis_qtl.txt.gz"
## [33] "ThoracicSpinalCord_expression_peer25_gene.cis_qtl.txt.gz"
## [34] "ThoracicSpinalCord_expression_peer30_gene.cis_qtl.txt.gz"
## [35] "ThoracicSpinalCord_expression_peer5_gene.cis_qtl.txt.gz"
## [36] "ThoracicSpinalCord_splicing_peer0_cluster.cis_qtl.txt.gz"
## [37] "ThoracicSpinalCord_splicing_peer10_cluster.cis_qtl.txt.gz"
## [38] "ThoracicSpinalCord_splicing_peer15_cluster.cis_qtl.txt.gz"
## [39] "ThoracicSpinalCord_splicing_peer20_cluster.cis_qtl.txt.gz"
## [40] "ThoracicSpinalCord_splicing_peer25_cluster.cis_qtl.txt.gz"
## [41] "ThoracicSpinalCord_splicing_peer30_cluster.cis_qtl.txt.gz"
## [42] "ThoracicSpinalCord_splicing_peer5_cluster.cis_qtl.txt.gz"
<- max_n_per_dataset%>% filter(type == "expression_gene") %>% pull(file)
max_eqtl_files <- max_n_per_dataset%>% filter(type == "splicing_cluster") %>% pull(file)
max_sqtl_files
## total number of eGenes
<- sig_res_files[max_eqtl_files] %>% map( "group_id" ) %>% unlist() %>% unique() %>% length()
total_eGenes
## total number of sGenes
<- sig_res_files[max_sqtl_files] %>% map( ~{.x %>% mutate(gene = str_split_fixed(group_id, ":", 2)[,1]) %>% pull(gene) } ) %>% unlist() %>% unique() %>% length()
total_sGenes
print(total_eGenes)
## [1] 9492
print(total_sGenes)
## [1] 5627
Computed with qvalue comparing nominal P-values in target tissue with permutation P-values of source tissue
#qvalues <- read_tsv(here::here("data/QTLs/April_2020_tensorQTL/results/qvalue/NYGC_eQTL_all_pi0.tsv"))
<- read_tsv(here::here("data/QTLs/Nov_2020/results/qvalue/SC_eQTL_all_qvalues_merged.0.05.tsv"))
eqtl_qvalues
$type <- "eQTL"
eqtl_qvalues
<- read_tsv(here::here("data/QTLs/Nov_2020/results/qvalue/SC_sQTL_all_qvalues_merged.0.05.tsv"))
sqtl_qvalues
$type <- "sQTL"
sqtl_qvalues
<- bind_rows(eqtl_qvalues, sqtl_qvalues)
qvalues
<-
conversion_df tibble(
long = unique(c(qvalues$source_name, qvalues$target_name)),
short = c("GTEx\nCSC", "CSC", "LSC", "TSC","GTEx\nCSC", "CSC", "LSC", "TSC" )
)
$source_name <- conversion_df$short[match(qvalues$source_name, conversion_df$long)]
qvalues$target_name <- conversion_df$short[match(qvalues$target_name, conversion_df$long)]
qvalues
<- c("CSC", "TSC", "LSC", "GTEx\nCSC")
tissue_levels
<-
qvalues_to_plot %>%
qvalues mutate(source_name = factor(source_name, levels = rev(tissue_levels) ),
target_name = factor(target_name, levels = tissue_levels) ) %>%
mutate(pi1 = ifelse(is.na(pi1), yes = 1, no = pi1)) %>%
mutate(pi1 = ifelse(source_name == target_name, 1, pi1)) %>%
mutate(pi1 = signif(pi1, digits = 2))
<-
qvalue_plot %>%
qvalues_to_plot ggplot(aes(x= source_name, y = target_name, fill = pi1, label = pi1)) +
geom_tile() +
geom_text(colour = "white",fontface = "bold", size = 3) +
labs(x = "Discovery", y = "Replication") +
scale_fill_gradient(expression(pi[1]),
low = "lightgoldenrod1",
high = "red3",
limits = c(0.5,1),
breaks = c(0.5,1)
+
) #theme_classic() +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
coord_flip() +
scale_x_discrete(expand = c(0,0)) + scale_y_discrete( expand = c(0,0)) +
facet_wrap(~type, scales = "free") +
theme_jh() +
theme(axis.line = element_blank() ) +
guides(fill = guide_colourbar(barheight = unit(0.75, "in"), barwidth = unit(0.2, "in"),
nbin = 10,title.theme = element_text(size = 10), ticks.colour = NA) )
qvalue_plot
ggsave(plot = qvalue_plot, file = here::here("ALS_SC/plots/ALS_SC_qvalue_plot.pdf" ), width = 5, height = 2.2)
# qvalues_GTEX %>%
# ggplot(aes(x= source_name, y = target_name, fill = pi1, label = pi1)) + geom_tile() + geom_text() +
# labs(x = "", y = "") +
# scale_fill_distiller(expression(pi[1]), type = "div", palette = "RdYlBu", limits = c(0.5,1) ) +
# #theme_classic() +
# theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
# coord_flip() +
# scale_x_discrete(expand = c(0,0)) + scale_y_discrete( expand = c(0,0), position = "right") +
# geom_hline(yintercept = 3.5, colour = "white", size = 3) +
# theme_jh()