Assess the functions of the differentially expressed genes using the following tools:
Gene Ontology enrichment using clusterProfiler
#load(file = here::here("data/differential_expression/ALS/ALS_Spinal_Cord_de_res.RData"))
#de_res <- de_res_final
#load(file = here::here("data/differential_expression/ALS/Model_8d_limma_res.RData"))
#load(here::here("data/differential_expression/ALS/Model_9e_limma_res.RData"))
#de_res <- de_res_9e
#names(de_res) <- c("LSC", "CSC", "TSC")
#de_res <- model_8_final
<- function(set1, set2, universe = 20000){
setEnrichment = sum(set1 %in% set2)
a = length(set1) - a
c = length(set2) - a
b = universe - length(set2) - c
d = matrix(c(a, c, b, d), nrow = 2)
contingency_table # one-tailed test for enrichment only
= fisher.test(contingency_table, alternative = "greater")
fisher_results # returns data.frame containing the lengths of the two sets, the overlap, the enrichment ratio (odds ratio) and P value
<- tibble::tibble( set1_length = length(set1), set2_length = length(set2), overlap = a, ratio = fisher_results$estimate, p.value = fisher_results$p.value)
df return(df)
}
#de_res <- de_res_final
<- function(file){
load_results load(file)
<- de_res_final
de_res <- names(de_res)
tissues
# add tissue name as a column
<-
de_res ::map2(
purrr.x = de_res,
.y = names(de_res), ~{
$tissue <- .y
.xreturn(.x)
})return(de_res)
}
<- function(de_res){
make_gene_sets
# for each result split by direction and get out lists of genes
# output only significant up and downregulated genes
<-
gene_sets_sig map_df(de_res, ~{.x}) %>%
mutate(direction = ifelse( log_fc > 0, "up", "down") ) %>%
mutate( set = paste(tissue, direction, sep = ":")) %>%
filter(adj_p_val < 0.05) %>%
split(.$set) %>%
map("genename") #%>%
#map( ~{ head(.x, 1000)})
# output top X genes sorted by P-value, even if not sig at FDR < 0.05
<-
gene_sets_top map_df(de_res, ~{.x}) %>%
mutate(direction = ifelse( log_fc > 0, "up", "down") ) %>%
mutate( set = paste(tissue, direction, sep = ":")) %>%
split(.$set) %>%
map("genename") %>%
map( ~{ head(.x, 250)})
<- names(de_res)
tissues
<- expand.grid(tissues, c("up","down"), stringsAsFactors = FALSE)
tissue_sets names(tissue_sets) <- c("tissue", "direction")
$set <- paste(tissue_sets$tissue, tissue_sets$direction, sep = ":")
tissue_sets
<- gene_sets_top[ tissue_sets$set]
gene_sets_top
<-
gene_sets_tstat map(de_res, ~{
<-
.x %>%
.x #filter(adj_p_val < 0.05) %>%
arrange( desc(t) )
<- .x$t
tstat names(tstat) <- .x$genename
return(tstat)
})
<-
gene_sets_lfc map(de_res, ~{
<- .x %>%
.x filter(p_value < 0.05, !duplicated(genename) ) %>% ## OOOH
arrange( desc(log_fc) )
<- .x$log_fc
lfc names(lfc) <- .x$genename
return(lfc)
})
return(
list(sig = gene_sets_sig, top = gene_sets_top, t = gene_sets_tstat, lfc = gene_sets_lfc)
)
}
<- function(de_res, title = NULL, annotate_by = NULL){
ma_plot <-
de_res mutate(de_res,
class = case_when(
>= 0.05 ~ "non_sig",
adj_p_val < 0.05 & abs(log_fc) < 1 ~ "sig",
adj_p_val < 0.05 & abs(log_fc) >= 1 ~ "sig - strong"
adj_p_val
))
<- de_res %>%
plot ggplot(aes(x = ave_expr, y = log_fc)) +
rasterise(geom_point(aes(colour = class ), size = 0.5), dpi = 300) +
scale_colour_manual(values = c("gray", "orange", "red")) +
theme_bw() +
labs(x = "log(average expression)", y = expression(log[2]~"(fold change)"), title = title) +
guides(colour = FALSE) +
#xlim(-2,4.6) +
ylim(-4.5,4.5) +
theme_jh()
if(!is.null(annotate_by)){
<- plot +
plot ::geom_text_repel(
ggrepelfontface = "italic",
data = filter(de_res, genename %in% annotate_by),
aes(x = log_fc, y = -log10(p_value), label = genename),
min.segment.length = unit(0, "lines"),
size = 2.5) +
geom_point(
data = filter(de_res, genename %in% annotate_by), size = 0.8, colour = "black"
+
) geom_point(aes(colour = class ),
data = filter(de_res, genename %in% annotate_by), size = 0.6
)
}return(plot)
}
# threshold p-values at 1e-16
# threshold max LFC at 3
<- function(de_res, title = NULL, subtitle = NULL, annotate_by = NULL, type = "ALS"){
volcano_plot if( type == "ALS"){
<- 0.5
xpos <- 17
ymax <- c(-3,3)
xlim else{
}<- 0.025
xpos <- 9
ymax <- c(-0.042, 0.042)
xlim
}
<-
de_res mutate(de_res,
sig = case_when(
>= 0.05 ~ "non_sig",
adj_p_val < 0.05 & abs(log_fc) < 1 ~ "sig",
adj_p_val < 0.05 & abs(log_fc) >= 1 ~ "sig - strong"
adj_p_val %>%
)) mutate(direction = ifelse(log_fc > 0, "up", "down")) %>%
mutate(log_fc = case_when(
>= 3 ~ Inf,
log_fc < -3 ~ -Inf,
log_fc TRUE ~ log_fc
%>%
)) mutate(p_value = ifelse(p_value < 10^-(ymax-1), 10^-(ymax-1), p_value) ) %>%
mutate(class = paste(sig, direction))
<- group_by(de_res, sig, direction, class) %>% tally() %>%
de_tally filter(sig != "non_sig") %>%
mutate( position = ifelse(sig == "sig", xpos, 2) ) %>%
mutate(position = ifelse( direction == "down", -1 * position, position)) %>%
mutate(n = formatC(n, format="f", big.mark=",", digits=0))
<- de_res %>%
plot mutate( p_value = ifelse( p_value < 1e-16, Inf, p_value)) %>% #threshold at 1e16
ggplot(aes(x = log_fc, y = -log10(p_value))) +
#geom_point(aes(colour = class ), size = 0.5) +
rasterise(geom_point(aes(colour = class ), size = 0.5), dpi = 300) +
scale_colour_manual(values = c("non_sig up" = "gray",
"non_sig down" = "gray",
"sig up" = "#EB7F56",
"sig - strong up" = "#B61927",
"sig down" = "#4F8FC4",
"sig - strong down" = "dodgerblue4"
+
)) theme_bw() +
labs(y = expression(-log[10]~P~value), x = expression(log[2]~"(fold change)"), title = title, subtitle = subtitle) +
guides(colour = FALSE) +
scale_y_continuous(expand = c(0,0), limits = c(0,ymax)) +
theme_jh() +
theme(
plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5),
panel.border = element_blank(),
axis.ticks = element_line(colour = "black")
+
) geom_text(fontface = "bold", data = de_tally, aes(x = position, y = ymax - 0.5, label = n, colour = class), size = 2.5 ) +
scale_x_continuous(limits = xlim)
if(!is.null(annotate_by)){
<- plot +
plot ::geom_text_repel(
ggrepelfontface = "italic",
data = filter(de_res, genename %in% annotate_by),
aes(x = log_fc, y = -log10(p_value), label = genename),
min.segment.length = unit(0, "lines"),
size = 2.5) +
geom_point(
data = filter(de_res, genename %in% annotate_by), size = 0.8, colour = "black"
+
) geom_point(aes(colour = class ),
data = filter(de_res, genename %in% annotate_by), size = 0.6
)
}return(plot)
}
## GSEA
<- function(named_list){
named_list_to_df map2_df( named_list, names(named_list), ~{
tibble( term_id = .y, gene = .x)
} )
}
<- function(data){
gsea_plot if("set" %in% names(data)){
$tissue <- data$set
data
} %>%
data #filter(tissue %in% tissues ) %>%
#mutate(tissue = factor(tissue, levels =) %>%
mutate(padj = p.adjust(pvalue, method = "bonferroni")) %>%
mutate(padj_label = case_when(
< 0.0001 ~ "***",
padj < 0.001 ~ "**",
padj < 0.05 ~ "*",
padj TRUE ~ "."
%>%
)) mutate(ID = fct_rev(ID)) %>%
#mutate(sig = ifelse(padj < 0.05, "*", "")) %>%
mutate(NES_label = signif(NES, digits = 2)) %>%
ggplot(aes( x = tissue, y = ID)) +
geom_tile(aes(fill = NES)) +
scale_fill_distiller(type = "div", palette = "RdBu", limits = c(-4.2,4.2)) +
#eom_tile(aes(colour = padj < 0.05), fill = NA, size = 2 ) +
scale_colour_manual(values = c(NA, "white")) +
geom_text(aes(label = padj_label)) +
#geom_text(aes(label = padj_label), nudge_x = 0.25, nudge_y = 0.25, size = 5) +
scale_y_discrete(expand = c(0,0)) +
theme_jh() +
labs(x = "", y = "") +
scale_x_discrete(expand = c(0,0), position = "top") +
theme(axis.line = element_blank() )
}
# plot log2 fold changes of marker genes (nominal P < 0.05)
<- function(res, markers, measure = "log_fc"){
gsea_box_plots if( !is.data.frame(markers)){
<- named_list_to_df(markers)
markers
}if( measure == "log_fc"){
<- expression(log[2]~Fold~Change)
measure_string else{
}<- measure
measure_string
}<- res %>%
d bind_rows() %>%
inner_join(markers, by = c("genename" = "gene")) %>%
filter( p_value < 0.05)
%>%
d mutate( FDR = ifelse(adj_p_val < 0.05, yes = "< 0.05", no = "> 0.05") ) %>%
#mutate(term_id = str_sub(term_id, start = 1, end = 1)) %>%
ggplot(aes_string(x = "term_id", y = measure )) +
geom_jitter(width = 0.25, size = 1, aes(colour = FDR) ) +
geom_boxplot(outlier.color = NA, fill = NA) +
scale_colour_manual( values = c("> 0.05" = "gray", "< 0.05" = "maroon") ) +
theme_jh() +
geom_hline(yintercept = 0, linetype = 2) +
labs( x = "", y = measure_string) +
coord_flip() +
facet_wrap(~tissue)
}
# Neuroexpresso
load(here::here("data/misc/neuroexpresso_spinal_cord_human.RData"))
<- named_list_to_df(nxp)
nxp_gsea
## Kelley
load(here::here("data/markers/kelley_markers.RData"))
<- named_list_to_df(kelley)
kelley_gsea
# Panglaodb
load(here::here("data/markers/PanglaoDB_markers.RData"))
<- named_list_to_df(pg_list) %>%
panglao filter(term_id %in% c("Endothelial cells", "Ependymal cells", "Microglia", "Neurons", "Motor neurons", "Oligodendrocytes", "Pericytes", "Astrocytes", "Cholinergic neurons"))
## activation sets
load(here::here("data/markers/activation_markers.RData"))
<- c( "Disease-associated microglia", "Disease-associated astrocytes", "Reactive astrocytes - MCAO", "Reactive astrocytes - LPS", "Plaque-induced genes")
act_sets
<- named_list_to_df(activation_sets) #%>%
activation_gsea #filter( term_id %in% act_sets)
load(here::here("data/markers/Mathys_single_nucleus.RData"))
<- data.frame(short = c("Ast","End", "Mic", "Ex", "Oli", "Per" ),
mathys_conversion_table long = c("Astrocytes", "Endothelial", "Microglia", "Neurons", "Oligos", "Pericytes"),
mono = c("A","E","M","N","O","P")
)
<- named_list_to_df(mathys) %>%
mathys_gsea left_join(mathys_conversion_table, by = c("term_id" = "short") ) %>%
mutate(term_id = long) %>%
select(term_id, gene) %>%
drop_na()
load(here::here("data/markers/Darmanis_single_cell.RData"))
<- named_list_to_df(darmanis)
darmanis_gsea
load(here::here("data/markers/blum_spinal_cord.RData"))
<- named_list_to_df(blum_markers)
blum_gsea
load( here::here("data/markers/syngo_1.1.RData"))
<- named_list_to_df(syngo) syngo_gsea
MSigDB - hallmark gene sets
<- here::here("data/MSigDB/h.all.v7.2.symbols.gmt")
hallmark_file <- read.gmt(hallmark_file) %>%
hallmarks mutate(term = gsub("HALLMARK|_", " ", term)) %>%
mutate(term = gsub("INTERFERON", "IFN", term)) %>%
mutate(term = gsub("EPITHELIAL MESENCHYMAL", "E-M", term)) %>%
mutate(term = gsub("UNFOLDED PROTEIN RESPONSE", "UPR", term)) %>%
mutate(term = str_trim(term))
<- function(gene_set){
hallmark_enrichr
<-
hallmark_res map_df(gene_set, ~{
as.data.frame(enricher(.x, TERM2GENE=hallmarks))
.id = "set"
},
)%>%
hallmark_res select(set, ID, qvalue) %>%
pivot_wider(names_from = set, values_from = qvalue)
return(hallmark_res)
}
# use t stat or log fc
<- function(gene_set){
hallmark_gsea
<- map_df(gene_set, ~{
hallmark_gsea_res as.data.frame(GSEA(.x, TERM2GENE = hallmarks, minGSSize = 5, pvalueCutoff = 1, eps = 0))
.id = "set")
},
return(hallmark_gsea_res)
}
#als_gsea <- hallmark_gsea(als_genes$lfc)
# plot hallmarks
<- function(df, plot_all = FALSE){
hallmark_gsea_plot $ID <- gsub("^\\s+", "", df$ID)
df
if( plot_all == TRUE){
<- 1
sig_level else{
}<- 0.05
sig_level
}
<-
hallmark_levels %>%
df filter(p.adjust < sig_level) %>%
select(set, ID, NES) %>%
pivot_wider(names_from = set, values_from = NES) %>%
rowwise() %>%
mutate(mean_nes = mean( c(C,L), na.rm = TRUE)) %>%
arrange( mean_nes) %>%
mutate(ID = factor(ID, levels = ID))
if( plot_all == FALSE){
<- hallmark_levels %>% tidyr::drop_na()
hallmark_levels
}
<-
to_plot %>%
df mutate(padj = p.adjust) %>%
mutate(padj_label = case_when(
< 0.0001 ~ "***",
padj < 0.001 ~ "**",
padj < 0.05 ~ "*",
padj TRUE ~ "."
))
%>%
to_plot filter(ID %in% hallmark_levels$ID) %>%
mutate(ID = factor(ID, levels = hallmark_levels$ID)) %>%
mutate(set = factor(set) ) %>% #, levels = c("Cervical", "Thoracic", "Lumbar"))) %>%
ggplot(aes(x = set, y = ID, label = padj_label, fill = NES)) +
geom_tile() +
geom_text() +
scale_fill_distiller(na.value = "lightgray", palette = "RdBu", limits = c(-4,4) ) +
#geom_text(aes(label = signif(value, digits = 2) ) ) +
scale_x_discrete(expand = c(0,0), position = "top") +
scale_y_discrete(expand = c(0,0)) +
theme_jh() +
labs(subtitle = "MSigDB Hallmark gene set enrichment analysis", x = "", y = "", fill = "NES") + theme(plot.subtitle = element_text(hjust = 0))
}
Updated for March 2022 revisions
#load(here::here("data/differential_expression/ALS/ALS_Spinal_Cord_de_res_Mar2022.RData"))
#de_res_final <- de_res_mar2022
#save(de_res_final,file = here::here("data/differential_expression/ALS/ALS_Spinal_Cord_de_res_Mar2022.RData"))
<- load_results(here::here("data/differential_expression/ALS/ALS_Spinal_Cord_de_res_Mar2022.RData"))
als_res
# for main figure plots ignore the thoracic
<- make_gene_sets(als_res)
als_genes <- make_gene_sets(list(C = als_res$CSC, L = als_res$LSC))
pair_genes
# Table 2 - DEG discovery rates
map_df( als_res, ~{
<- filter(.x, adj_p_val < 0.05 ) %>% nrow()
sig_all <- filter(.x, adj_p_val < 0.05 & abs(log_fc) >= 1) %>% nrow()
sig_med <- filter(.x, adj_p_val < 0.05 & abs(log_fc) >= 2) %>% nrow()
sig_strong tibble( sig_all, sig_med, sig_strong)
.id = "tissue") },
## # A tibble: 3 x 4
## tissue sig_all sig_med sig_strong
## <chr> <int> <int> <int>
## 1 CSC 7349 377 29
## 2 TSC 256 65 9
## 3 LSC 4694 233 7
strong genes - LFC > 1
Extreme - Genes with log2 fold change > 2
# read in lFCs from full DE and from shared donor DE - restricted to 130 shared donors
<- read_tsv(here::here("ALS_SC/tables/als_control_deg_combined.tsv") )
deg_combo <- read_tsv( here::here("ALS_SC/tables/shared_donor_als_control_deg_combined.tsv"))
shared_donor_df
<-
strong_genes %>%
deg_combo filter( (cervical.fdr < 0.05 & lumbar.fdr < 0.05) & (cervical.lfc > 1 | lumbar.lfc > 1) ) %>%
pull(genename)
length(strong_genes)
## [1] 238
if( rerun){
<- rtracklayer::import("~/GENCODE/gencode.v30.annotation.gtf.gz")
gencode <- tibble(gene_id = gencode$gene_id, gene = gencode$gene_name, type = gencode$gene_type) %>% distinct()
gene_type_df write_tsv(gene_type_df,file ="~/GENCODE/gencode.v30.gene.type.tsv.gz" )
else{
}<- read_tsv("~/GENCODE/gencode.v30.gene.type.tsv.gz")
gene_type_df
}
<-
strong_down_genes %>%
deg_combo filter( (cervical.fdr < 0.05 & lumbar.fdr < 0.05) & (cervical.lfc < -1 | lumbar.lfc < -1) ) %>%
mutate(mean.lfc = (cervical.lfc + lumbar.lfc)/2) %>%
arrange((mean.lfc)) %>%
left_join(gene_type_df, by = c("genename" = "gene"))
<-
strong_up_genes %>%
deg_combo filter( (cervical.fdr < 0.05 & lumbar.fdr < 0.05) & (cervical.lfc > 1 | lumbar.lfc > 1) ) %>%
mutate(mean.lfc = (cervical.lfc + lumbar.lfc)/2) %>%
arrange((mean.lfc)) %>%
left_join(gene_type_df, by = c("genename" = "gene"))
%>% group_by(type == "protein_coding") %>% tally() strong_down_genes
## # A tibble: 2 x 2
## `type == "protein_coding"` n
## <lgl> <int>
## 1 FALSE 37
## 2 TRUE 30
%>% group_by(type == "protein_coding") %>% tally() strong_up_genes
## # A tibble: 2 x 2
## `type == "protein_coding"` n
## <lgl> <int>
## 1 FALSE 38
## 2 TRUE 208
## scatter plot strongest genes!
<- function(data,
strong_up_gene_plot highlight = c("GPNMB", "CHIT1", "CCL18", "DNAJC5B", "CHRNA1", "AQP9")
){
<- data %>%
to_plot select(!contains("thoracic")) %>%
mutate( gene_set = case_when(
< 0.05 & lumbar.fdr < 0.05) & (cervical.lfc > 1 & lumbar.lfc > 1) ~ "FDR < 0.05 in both; LFC > 1 in both",
(cervical.fdr < 0.05 & lumbar.fdr < 0.05) & (cervical.lfc > 1 & lumbar.lfc <= 1) ~ "FDR < 0.05 in both; LFC > 1 in Cervical",
(cervical.fdr < 0.05 & lumbar.fdr < 0.05) & (cervical.lfc <= 1 & lumbar.lfc > 1) ~ "FDR < 0.05 in both; LFC > 1 in Lumbar",
(cervical.fdr %>%
) ) filter(!is.na(gene_set)) %>%
mutate(genename = ifelse( genename %in% highlight, genename, ""))
<- to_plot %>%
label_df select(gene_set) %>%
drop_na() %>%
group_by(gene_set) %>% tally() %>%
mutate(x = c(2,0.5,2),
y = c(4.5,2,0.5))
# linear model
<- coef(lm(cervical.lfc ~ lumbar.lfc, data = to_plot))
lm_df <- paste0("\u03b2 = ", signif(lm_df[2],2) )
lm_string <- paste0("\u03c1 = ", signif(cor(to_plot$cervical.lfc, to_plot$lumbar.lfc, method ="pearson"),2) )
cor_string
%>%
to_plot ggplot(aes(x = lumbar.lfc, y = cervical.lfc)) +
labs(x = expression(Lumbar~log[2]~fold~change), y =expression(Cervical~log[2]~fold~change), colour = "") +
geom_abline(linetype = 2, colour = "gray") +
theme_jh() +
geom_smooth(method = "lm", se = FALSE, colour = "red") +
theme(legend.position = "bottom") +
geom_point() +
geom_point(aes(colour = gene_set), size = 0.8) +
geom_text_repel( aes(label = genename),
fontface = "italic", size = 2.5, min.segment.length = 0, force = 10, max.overlaps = 100 ) +
scale_colour_manual(values = c("firebrick3","salmon", "peachpuff")) +
geom_text(data = label_df, aes(x = x, y = y, label = n, colour = gene_set), show.legend = FALSE, size = 3 ) +
xlim(0,3.5) +
ylim(0,5) +
annotate(geom = "text", x = 0.5, y = 4.5, label = paste0(cor_string, "\n", lm_string), size = 3.5)
}
## EXTREME GENES
<- map( als_res, ~{
extreme_genes <- filter(.x, adj_p_val < 0.05 & abs(log_fc) > 1) %>% pull(genename)
sig_strong
} )
::upset(UpSetR::fromList(extreme_genes)) UpSetR
#intersect(intersect(strong_genes$CSC, strong_genes$TSC), strong_genes$LSC)
# 2 extreme genes in common
<- map_df(als_res, ~{filter(.x, genename %in% unlist(extreme_genes) ) })
extreme_gene_df
<-
extreme_gene_table %>%
deg_combo filter(genename %in% unlist(extreme_genes) & !duplicated(genename) ) %>%
filter(!is.na(lumbar.lfc) & !is.na(cervical.lfc)) %>%
mutate(mean_lfc = (cervical.lfc + lumbar.lfc) / 2 ) %>%
arrange(desc(mean_lfc)) %>%
mutate(gene = factor(genename, levels = .$genename))
<-
extreme_up_gene_heatmap %>%
extreme_gene_df inner_join(extreme_gene_table, by = "genename") %>%
filter(gene %in% head(extreme_gene_table, 20)$gene ) %>%
#filter(mean_lfc > 0) %>%
mutate(gene = fct_rev(gene)) %>%
mutate(p_label = ifelse(adj_p_val < 0.05, "*", "") ) %>%
left_join(tissue_conversion_table, by = c("tissue" = "tissue_short")) %>%
filter(tissue_mono %in% c("C", "L")) %>%
ggplot(aes(x = tissue_mono, y = gene)) +
geom_tile(aes(fill = log_fc)) +
geom_text(aes(label = p_label), nudge_y = -0.25) +
scale_fill_gradient2(low = "white", mid = "#EB7F56", high = "#B61927", limits = c(0,5), na.value = "gray", midpoint = 2 ) +
scale_x_discrete(expand = c(0,0)) +
scale_y_discrete(expand = c(0,0)) +
theme_jh() +
labs(fill = expression(log[2]~fold~change), x = "", y = "") +
theme(axis.text.y = element_text(face = 'italic'),
legend.key.size = unit(0.5, "cm"),
panel.background=element_rect(fill="gray") )
extreme_up_gene_heatmap
LYZ, CHIT1 and GPNMB are strongly upregulated in all 3 tissues (lfc > 2), but also CCL18, DNAJC5B and CHRNA1, HTRA4 all very strong.
<- function(data, highlight = c("MOBP", "AC009133.6", "LINC00323", "SNORD3C", "ISL1", "MNX1", "MYO1A", "HBD" )
strong_down_gene_plot
){<- data %>%
to_plot select(!contains("thoracic")) %>%
mutate( gene_set = case_when(
< 0.05 & lumbar.fdr < 0.05) & (cervical.lfc < -1 & lumbar.lfc < -1) ~ "FDR < 0.05 in both; LFC < -1 in both",
(cervical.fdr < 0.05 & lumbar.fdr < 0.05) & (cervical.lfc < -1 & lumbar.lfc > -1) ~ "FDR < 0.05 in both; LFC < -1 in Cervical",
(cervical.fdr < 0.05 & lumbar.fdr < 0.05) & (cervical.lfc > -1 & lumbar.lfc < -1) ~ "FDR < 0.05 in both; LFC < -1 in Lumbar",
(cervical.fdr %>%
) ) filter(!is.na(gene_set)) %>%
mutate(gene = ifelse( genename %in% highlight, genename, ""))
<- to_plot %>%
label_df select(gene_set) %>%
drop_na() %>%
group_by(gene_set) %>% tally() %>%
mutate(x = c(-2,-0.75, -1.5),
y = c(-1.75,-1.5,-0.5))
# linear model
<- coef(lm(cervical.lfc ~ lumbar.lfc, data = to_plot))
lm_df <- paste0("\u03b2 = ", signif(lm_df[2],2) )
lm_string <- paste0("\u03c1 = ", signif(cor(to_plot$cervical.lfc, to_plot$lumbar.lfc, method ="pearson"),2) )
cor_string
%>%
to_plot ggplot(aes(x = lumbar.lfc, y = cervical.lfc)) +
labs(x = expression(Lumbar~log[2]~fold~change), y =expression(Cervical~log[2]~fold~change), colour = "") +
geom_abline(linetype = 2, colour = "gray") +
theme_jh() +
geom_smooth(method = "lm", se = FALSE, colour = "red") +
theme(legend.position = "bottom") +
geom_point() +
geom_point(aes(colour = gene_set), size = 0.8) +
geom_text_repel( aes(label = gene),
fontface = "italic", size = 2.5, min.segment.length = 0, force = 10, max.overlaps = 100 ) +
scale_colour_manual(values = c("navy", "dodgerblue", "skyblue" ) ) +
geom_text(data = label_df, aes(x = x, y = y, label = n, colour = gene_set), show.legend = FALSE, size = 3 ) +
xlim(-2.8,-0.45) +
ylim(-2.2,-0.45) +
annotate(geom = "text", x = -2.5, y = -0.6, label = paste0(cor_string, "\n", lm_string), size = 3.5)
}
strong_down_gene_plot(deg_combo)
#strong_down_gene_plot(shared_donor_df)
<-
extreme_down_gene_heatmap %>%
extreme_gene_df inner_join(extreme_gene_table, by = "genename") %>%
filter(gene %in% tail(extreme_gene_table, 20)$gene ) %>%
mutate(p_label = ifelse(adj_p_val < 0.05, "*", "") ) %>%
left_join(tissue_conversion_table, by = c("tissue" = "tissue_short")) %>%
filter(tissue_mono %in% c("C", "L")) %>%
ggplot(aes(x = tissue_mono, y = gene)) +
geom_tile(aes(fill = log_fc) ) +
geom_text(aes(label = p_label), nudge_y = -0.25) +
scale_fill_gradient2(low = "dodgerblue4", mid = "#4F8FC4", high = "white", limits = c(-3, 0), na.value = "gray", midpoint = -1.5 ) +
scale_x_discrete(expand = c(0,0)) +
scale_y_discrete(expand = c(0,0)) +
theme_jh() +
labs(fill = expression(log[2]~fold~change), x = "", y = "") +
theme(axis.text.y = element_text(face = 'italic'),
legend.key.size = unit(0.5, "cm"),
panel.background=element_rect(fill="gray")
)
extreme_down_gene_heatmap
ggsave(plot = extreme_down_gene_heatmap, file = here::here("ALS_SC/plots/downregulated_gene_heatmap.pdf"), width = 2.5, height = 3.5 )
ggsave(plot = extreme_up_gene_heatmap, file = here::here("ALS_SC/plots/upregulated_gene_heatmap.pdf"), width = 2.5, height = 3.5 )
# "sig down" = "#4F8FC4",
# "sig - strong down" = "dodgerblue4"
<-
strong_gene_multiplot
(strong_up_gene_plot(deg_combo) + labs(title = "Full cohorts") +
strong_up_gene_plot(shared_donor_df) + labs(title = "Restricted to shared donors") +
+ labs(title = "Top 20 upregulated genes") +
extreme_up_gene_heatmap plot_layout(guides = "collect")
/ (
) strong_down_gene_plot(deg_combo) + labs(title = "Full cohorts") +
strong_down_gene_plot(shared_donor_df) + labs(title = "Restricted to shared donors") +
+ labs(title = "Top 20 downregulated genes") +
extreme_down_gene_heatmap plot_layout(guides = "collect")
+
) plot_layout(nrow = 2, widths = c(2,2,1)) +
plot_annotation(tag_levels = "a") &
theme(plot.tag = element_text(face = "bold"), legend.position = "bottom", plot.title = element_text(hjust = 0.5))
#strong_gene_multiplot
#strong_gene_multiplot
ggsave(plot = strong_gene_multiplot, filename = here::here("ALS_SC/plots/strong_gene_multiplot.pdf"), width = 9, height = 8 )
# 109 have LFC > 1 in both CSC and LSC and FDR < 0.05 in both
# 238 genes have LFC > 1 in at least 1 of 2 but FDR < 0.05 in both
ggsave(plot = strong_up_gene_plot(deg_combo) + guides(colour = FALSE), file =here::here("ALS_SC/plots/shared_upregulated_genes.pdf"), height = 2.5, width = 2.5)#, device = cairo_pdf )
ggsave(plot = strong_down_gene_plot(deg_combo) + guides(colour = FALSE), file =here::here("ALS_SC/plots/shared_downregulated_genes.pdf"), height = 2.5, width = 2.5)#, device = cairo_pdf )
## combo
<-
both_extremes strong_up_gene_plot(deg_combo) +
strong_down_gene_plot(deg_combo) +
plot_layout(ncol =2) &
guides(colour = FALSE)
ggsave(plot = both_extremes, file =here::here("ALS_SC/plots/shared_genes_up_down.pdf"), height = 2.2, width = 4.5, device = cairo_pdf )
<-
volcano_multiplot volcano_plot(als_res$CSC, "Cervical Spinal Cord",
subtitle = "35 Controls vs 139 ALS",
annotate_by = c("LYZ", "GPNMB", "CHIT1", "C3", "MOBP", "MNX1","CCL18")) +
# volcano_plot(als_res$TSC, "Thoracic Spinal Cord",
# subtitle = "10 Controls vs 42 ALS",
# annotate_by = c("LYZ", "GPNMB", "CHIT1", "C3", "MOBP", "ISL1", "MNX1")) +
volcano_plot(als_res$LSC, "Lumbar Spinal Cord",
subtitle = "32 Controls vs 122 ALS",
annotate_by = c("LYZ", "GPNMB", "CHIT1", "C3", "MOBP", "ISL1", "MNX1","CCL18")) +
plot_layout(nrow = 1)
#volcano_multiplot
ggsave(plot = volcano_multiplot, filename = here::here("ALS_SC/plots/als_volcano_multiplot.pdf"), width = 4.5, height = 2.2)
if(rerun){
<-
ma_multiplot ma_plot(als_res$CSC, title = "Cervical Spinal Cord",annotate_by = c("LYZ", "GPNMB", "CCL18", "C3", "MOBP", "CHIT1")) +
ma_plot(als_res$LSC, title = "Lumbar Spinal Cord",annotate_by = c("LYZ", "GPNMB", "CCL18", "C3", "MOBP", "CHIT1")) +
ma_plot(als_res$TSC, title = "Thoracic Spinal Cord",annotate_by = c("LYZ", "GPNMB", "CCL18", "C3", "MOBP", "CHIT1")) +
plot_layout(ncol = 3)
ma_multiplot
ggsave(plot =ma_multiplot, filename = here::here("ALS_SC/plots/als_ma_multiplot.pdf"), width = 8, height = 3)
}
<- hallmark_gsea(pair_genes$lfc)
als_hallmark
<- hallmark_gsea_plot(als_hallmark) + guides(fill = FALSE) +
h_plot theme(plot.subtitle = element_blank(), plot.background = element_blank() )
h_plot
#als_hallmark <- hallmark_gsea(als_genes$lfc)
#hallmark_gsea_plot(als_hallmark, plot_all = TRUE)
ggsave(plot = h_plot,
filename = here::here("ALS_SC/plots/als_hallmark_selected.pdf"), width = 2.5, height = 3.5)
Supplementary Figure - all hallmark pathway results
#all_hallmark <- hallmark_gsea(als_genes$lfc)
<-
h_plot_all hallmark_gsea_plot(als_hallmark, plot_all = TRUE) + labs(subtitle = "")
h_plot_all
ggsave(plot = h_plot_all,
filename = here::here("ALS_SC/plots/als_hallmark_all.png"), width = 4, height = 10)
<- map_df(pair_genes$lfc, ~{
nxp_gsea_res as.data.frame(GSEA(.x, TERM2GENE = nxp_gsea,pvalueCutoff = 1,eps = 0))
.id = "set")
},
gsea_plot(nxp_gsea_res)
gsea_box_plots(als_res, markers = nxp )
Using Neuroexpresso markers for Cholinergic Spinal Cord - all 3 tissues show modest downregulation using GSEA. This is not observed using the Kelley markers - smaller set but not specific to motor neurons.
NEW - Synaptic Gene Ontologies (SynGO)
<- map_df(als_genes$lfc, ~{
syngo_gsea_res as.data.frame(GSEA(.x, TERM2GENE = syngo_gsea,pvalueCutoff = 1,eps = 0))
.id = "set")
},
gsea_plot(syngo_gsea_res)
gsea_box_plots(als_res, markers = syngo )
Top 100 most specific markers between each cluster and every other
<- map_df(pair_genes$lfc, ~{
mathys_gsea_res as.data.frame(GSEA(.x, TERM2GENE = mathys_gsea,pvalueCutoff = 1,eps = 0))
.id = "set")
},
<- gsea_plot(mathys_gsea_res) mathys_gsea_plot
<- map_df(pair_genes$lfc, ~{
darmanis_gsea_res as.data.frame(GSEA(.x, TERM2GENE = darmanis_gsea,pvalueCutoff = 1,eps = 0))
.id = "set")
},
gsea_plot(darmanis_gsea_res)
<- map_df(pair_genes$lfc, ~{
kelley_gsea_res as.data.frame(GSEA(.x, TERM2GENE = kelley_gsea,pvalueCutoff = 1))
.id = "set")
},
<- gsea_plot(kelley_gsea_res)
kelley_gsea_plot
+ labs(title = "Kelley et al") kelley_gsea_plot
Supplementary plot
gsea_box_plots(als_res, kelley)
$CSC %>%
als_resfilter(genename %in% kelley$Oligos)
## # A tibble: 99 x 9
## geneid log_fc ave_expr t p_value adj_p_val b genename tissue
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <chr>
## 1 ENSG00000170271 -0.463 6.50 -7.56 3.18e-12 7.48e-10 17.3 FAXDC2 CSC
## 2 ENSG00000137221 -0.498 5.26 -7.33 1.11e-11 2.03e- 9 16.1 TJAP1 CSC
## 3 ENSG00000072864 -0.516 5.47 -7.32 1.20e-11 2.17e- 9 16.0 NDE1 CSC
## 4 ENSG00000244274 -0.606 7.91 -6.92 1.10e-10 1.20e- 8 13.8 DBNDD2 CSC
## 5 ENSG00000092871 -0.391 6.65 -6.72 3.12e-10 2.74e- 8 12.8 RFFL CSC
## 6 ENSG00000150510 -0.577 5.75 -6.70 3.51e-10 2.98e- 8 12.7 FAM124A CSC
## 7 ENSG00000147488 -0.571 7.58 -6.70 3.49e-10 2.98e- 8 12.7 ST18 CSC
## 8 ENSG00000154493 -0.529 5.09 -6.65 4.51e-10 3.68e- 8 12.5 C10orf90 CSC
## 9 ENSG00000169247 -0.588 6.34 -6.62 5.32e-10 4.16e- 8 12.3 SH3TC2 CSC
## 10 ENSG00000170775 -0.655 7.10 -6.59 6.26e-10 4.79e- 8 12.1 GPR37 CSC
## # … with 89 more rows
Neuron upregulation enrichment in CSC is driven by GABA genes.
<- map_df(pair_genes$lfc, ~{
panglao_gsea_res as.data.frame(GSEA(.x, TERM2GENE = panglao,pvalueCutoff = 1))
.id = "set")
},
gsea_plot(panglao_gsea_res)
gsea_box_plots(als_res, panglao )
## Plot NES of all datasets
<- bind_rows(
all_gsea %>% mutate(dataset = "PanglaoDB"),
panglao_gsea_res %>% mutate(dataset = "Neuroexpresso") ,
nxp_gsea_res %>% mutate(dataset = "Kelley"),
kelley_gsea_res %>% mutate(dataset = "Darmanis"),
darmanis_gsea_res %>% mutate(dataset = "Mathys")
mathys_gsea_res
)
# make ID conversion table
<- unique(all_gsea$ID)
all_ids <- all_ids[order(all_ids)]
all_ids
<- tibble(
conversion_df ID = all_ids,
cell = c(
rep("Astrocyte", 3),
rep("Endothelial", 3),
NA,
rep("Microglia", 2),
rep("Neuron", 3),
rep("Oligodendrocyte", 4),
rep("Pericyte", 1)
)
)
<-
marker_nes_plot %>%
all_gsea left_join(conversion_df, by = "ID") %>%
filter(!is.na(cell)) %>%
#mutate(dataset = fct_rev(dataset) ) %>%
mutate(padj = pvalue) %>% ##p.adjust(pvalue, method = "bonferroni")) %>%
mutate(padj_label = case_when(
< 0.0001 ~ "***",
padj < 0.001 ~ "**",
padj < 0.05 ~ "*",
padj TRUE ~ "."
%>%
)) mutate(NES_label = signif(NES, digits = 2)) %>%
ggplot(aes(x = dataset, y = NES)) +
geom_col(aes(fill = NES)) +
facet_wrap(~set + cell, nrow = 2) +
#coord_flip() +
scale_fill_distiller(type = "div", palette = "RdBu", limits = c(-4.2,4.2)) + scale_colour_manual(values = c(NA, "white")) +
theme_classic() +
geom_text(aes(label = padj_label), nudge_y = 0.1 ) +
geom_hline(yintercept = 0) +
labs(y = "Normalised Enrichment Score (NES)", x = "") +
theme(strip.background = element_blank(), axis.text.x = element_text(angle = 60, hjust = 1),
axis.text = element_text(colour = "black"),
axis.ticks = element_line(colour = "black"))
ggsave(plot = marker_nes_plot, filename = here::here("ALS_SC/plots/marker_gene_nes_plot.pdf"), width = 8, height =6)
marker_nes_plot
## UpSet plot for all marker gene sets used
<- bind_rows(
all_gsea_genes %>% mutate(dataset = "PanglaoDB"),
panglao %>% mutate(dataset = "Neuroexpresso") ,
nxp_gsea %>% mutate(dataset = "Kelley"),
kelley_gsea %>% mutate(dataset = "Darmanis"),
darmanis_gsea %>% mutate(dataset = "Mathys"),
mathys_gsea %>% mutate(dataset = "Activation")
activation_gsea
)
<-
all_gsea_gene_list %>%
all_gsea_genes left_join(conversion_df, by = c("term_id" = "ID") ) %>%
filter(!is.na(cell)) %>%
mutate(set = paste(dataset, cell, sep = " ")) %>%
split(.$set) %>%
map(~{.x$gene})
## Jaccard index between each set
<- crossing(Var1 = names(all_gsea_gene_list), Var2 = names(all_gsea_gene_list) )
mark_expand
<- map2_df(mark_expand$Var1,mark_expand$Var2, ~{
mark_jac_res <- length(intersect(all_gsea_gene_list[[.x]], all_gsea_gene_list[[.y]]) )
int <- length(unique(c(all_gsea_gene_list[[.x]], all_gsea_gene_list[[.y]]) ) )
uni <- int / uni
jac
tibble(x = .x, y = .y, int = int, uni = uni, jac = jac)
})
<-
annotation_df tibble(x = names(all_gsea_gene_list) ) %>%
distinct() %>%
::separate(col = x, into = c("Dataset", "Cell-type"), sep = " ", remove = FALSE) %>%
tidyr#select(-Dataset) %>%
column_to_rownames("x")
= list(
ann_colors "Cell-type" = viridis::viridis(n = 6),
"Dataset" = viridis::plasma(n =5)
)names(ann_colors$`Cell-type`) <- unique(annotation_df$`Cell-type`)
names(ann_colors$Dataset) <- unique(annotation_df$Dataset)
<-
marker_jaccard_plot %>%
mark_jac_res select(x,y, jac) %>%
mutate(jac = ifelse(jac == 1, NA, jac)) %>%
pivot_wider(names_from = y, values_from = jac) %>%
column_to_rownames("x") %>%
::pheatmap(annotation_col = annotation_df,
pheatmapshow_colnames = FALSE, show_rownames = FALSE,
color = colorRampPalette(c("white", "navy"))(50)
annotation_colors =ann_colors )
, marker_jaccard_plot
ggsave(plot = marker_jaccard_plot, filename = here::here("ALS_SC/plots/marker_gene_jaccard_plot.pdf"), width = 6, height =4)
#
# activation_gene_list <-
# activation_gsea %>%
# split(.$term_id) %>%
# map(~{.x$gene})
#
# act_expand <- crossing(Var1 = names(activation_gene_list), Var2 = names(activation_gene_list) )
#
# act_jac_res <- map2_df(mark_expand$Var1,mark_expand$Var2, ~{
# int <- length(intersect(activation_gene_list[[.x]], activation_gene_list[[.y]]) )
# uni <- length(unique(c(activation_gene_list[[.x]], activation_gene_list[[.y]]) ) )
# jac <- int / uni
#
# tibble(x = .x, y = .y, int = int, uni = uni, jac = jac)
# })
#
#
# #act_jaccard_plot <-
# act_jac_res %>%
# select(x,y, jac) %>%
# mutate(jac = ifelse(jac == 1, NA, jac)) %>%
# pivot_wider(names_from = y, values_from = jac) %>%
# column_to_rownames("x") %>%
# pheatmap::pheatmap(
# #annotation_col = annotation_df,
# show_colnames = FALSE, show_rownames = TRUE,
# color = colorRampPalette(c("white", "navy"))(50)
# #, annotation_colors =ann_colors
# )
#
# annotation_df <- tibble(Dataset = act_jac_res$x)
#
compiled from multiple papers
<- map_df(pair_genes$lfc, ~{
activation_gsea_res as.data.frame(GSEA(.x, TERM2GENE = activation_gsea,pvalueCutoff = 1))
.id = "set")
},
#activation_gsea_res$ID <- factor(activation_gsea_res$ID, levels = rev(act_sets))
<- gsea_plot(activation_gsea_res)
activation_gsea_plot activation_gsea_plot
gsea_box_plots(als_res, markers = activation_sets)
Combine tissue and activation together
<- mathys_gsea_plot +
als_multiplot +
activation_gsea_plot plot_layout(ncol =2) &
guides(fill = FALSE) &
theme_jh()
als_multiplot
ggsave(plot = als_multiplot, filename = here::here("ALS_SC/plots/als_cell_activation.pdf"), width = 3, height = 2)
load(here::here("other_datasets/Oeckl_Proteomics/Oeckl_Proteomics.RData"))
<- oeckl$csf
csf # csf$fdr <- p.adjust(csf$pval, method = "fdr")
# csf_sig <- filter(csf, fdr < 0.05)
<- oeckl$csf_sig
csf_sig
<- str_trim(unlist(strsplit(csf_sig$gene,split = ",")))
csf_sig_genes # 35 significant genes
<- filter(csf_sig, !grepl(",", csf_sig$gene))
csf_sig # 30 non-overlapping genes
filter(als_res$CSC, genename %in% csf_sig$gene) %>% filter(adj_p_val < 0.05)
## # A tibble: 17 x 9
## geneid log_fc ave_expr t p_value adj_p_val b genename tissue
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <chr>
## 1 ENSG00000136235 2.76 7.79 10.6 2.85e-20 7.23e-16 35.5 GPNMB CSC
## 2 ENSG00000042493 1.38 5.74 8.52 1.22e-14 7.96e-12 22.8 CAPG CSC
## 3 ENSG00000133063 4.53 3.84 6.97 8.31e-11 9.94e- 9 14.2 CHIT1 CSC
## 4 ENSG00000234906 1.34 3.63 5.92 1.92e- 8 7.53e- 7 8.99 APOC2 CSC
## 5 ENSG00000100979 0.655 6.47 5.92 1.93e- 8 7.57e- 7 8.81 PLTP CSC
## 6 ENSG00000196136 1.62 8.72 5.88 2.43e- 8 9.24e- 7 8.58 SERPINA3 CSC
## 7 ENSG00000064886 1.79 5.92 5.67 6.64e- 8 2.08e- 6 7.67 CHI3L2 CSC
## 8 ENSG00000095970 0.961 4.92 5.39 2.55e- 7 6.23e- 6 6.39 TREM2 CSC
## 9 ENSG00000170458 1.06 5.80 4.99 1.61e- 6 2.83e- 5 4.56 CD14 CSC
## 10 ENSG00000133048 1.52 6.52 4.54 1.11e- 5 1.39e- 4 2.71 CHI3L1 CSC
## 11 ENSG00000184500 0.594 4.20 4.49 1.37e- 5 1.66e- 4 2.61 PROS1 CSC
## 12 ENSG00000131095 0.303 15.0 4.28 3.18e- 5 3.34e- 4 1.84 GFAP CSC
## 13 ENSG00000144810 0.795 4.05 4.14 5.54e- 5 5.29e- 4 1.30 COL8A1 CSC
## 14 ENSG00000154277 -0.419 7.00 -3.69 3.10e- 4 2.23e- 3 -0.476 UCHL1 CSC
## 15 ENSG00000131386 0.352 9.05 3.41 8.24e- 4 5.05e- 3 -1.37 GALNT15 CSC
## 16 ENSG00000073754 0.841 -3.72 2.53 1.26e- 2 4.45e- 2 -2.88 CD5L CSC
## 17 ENSG00000224389 0.401 8.61 2.80 5.75e- 3 2.40e- 2 -3.16 C4B CSC
filter(als_res$LSC, genename %in% csf_sig$gene) %>% filter(adj_p_val < 0.05)
## # A tibble: 11 x 9
## geneid log_fc ave_expr t p_value adj_p_val b genename tissue
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <chr>
## 1 ENSG00000136235 1.73 7.00 5.61 0.000000104 0.0000103 7.43 GPNMB LSC
## 2 ENSG00000234906 1.32 3.31 5.59 0.000000114 0.0000112 7.39 APOC2 LSC
## 3 ENSG00000042493 0.855 5.34 4.98 0.00000183 0.0000833 4.71 CAPG LSC
## 4 ENSG00000095970 0.845 4.62 4.37 0.0000239 0.000567 2.33 TREM2 LSC
## 5 ENSG00000133063 2.73 2.85 4.02 0.0000939 0.00159 1.22 CHIT1 LSC
## 6 ENSG00000144810 0.603 4.57 3.75 0.000260 0.00354 0.0936 COL8A1 LSC
## 7 ENSG00000184500 0.610 4.30 3.54 0.000548 0.00620 -0.560 PROS1 LSC
## 8 ENSG00000170458 0.796 5.62 3.39 0.000922 0.00915 -1.12 CD14 LSC
## 9 ENSG00000278637 0.663 -1.89 2.89 0.00445 0.0294 -2.09 HIST1H4A LSC
## 10 ENSG00000196136 0.925 8.38 2.82 0.00545 0.0341 -2.73 SERPINA3 LSC
## 11 ENSG00000100979 0.316 6.31 2.66 0.00872 0.0481 -3.16 PLTP LSC
filter(als_res$TSC, genename %in% csf_sig$gene) %>% filter(adj_p_val < 0.05)
## # A tibble: 1 x 9
## geneid log_fc ave_expr t p_value adj_p_val b genename tissue
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <chr>
## 1 ENSG00000136235 3.17 8.28 5.62 0.00000132 0.0256 5.27 GPNMB TSC
setEnrichment(set1 = filter(als_res$CSC, adj_p_val < 0.05) %>% pull(genename),
set2 = csf_sig$gene,
universe = nrow(als_res$CSC)
)
## # A tibble: 1 x 5
## set1_length set2_length overlap ratio p.value
## <int> <int> <int> <dbl> <dbl>
## 1 7349 30 17 3.21 0.00137
32 proteins (35 genes) differentially expressed in ALS CSF, of which 17 are significant in CSC, 11 in LSC and 1 in TSC. GPNMB is top in all 3.
<- oeckl$sp
sp #sp$fdr <- p.adjust(sp$pval, method = "fdr")
#sp_sig <- filter(sp, fdr < 0.05)
<- oeckl$sp_sig
sp_sig <- unlist(strsplit(sp_sig$gene,split = ";"))
sp_sig_genes # 297 genes
table( !grepl(";", sp_sig$gene) )
##
## FALSE TRUE
## 5 287
# 287 non-overlapping genes
<- filter(sp_sig, !grepl(";", sp_sig$gene))
sp_sig
filter(als_res$CSC, genename %in% sp_sig_genes) %>% filter(adj_p_val < 0.05)
## # A tibble: 158 x 9
## geneid log_fc ave_expr t p_value adj_p_val b genename tissue
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <chr>
## 1 ENSG00000136235 2.76 7.79 10.6 2.85e-20 7.23e-16 35.5 GPNMB CSC
## 2 ENSG00000130203 1.18 10.3 10.0 1.44e-18 9.14e-15 31.6 APOE CSC
## 3 ENSG00000204287 1.24 7.97 9.05 5.04e-16 7.99e-13 25.9 HLA-DRA CSC
## 4 ENSG00000197746 0.532 10.4 9.00 6.85e-16 9.14e-13 25.6 PSAP CSC
## 5 ENSG00000145703 1.76 4.42 8.80 2.25e-15 2.38e-12 24.4 IQGAP2 CSC
## 6 ENSG00000131981 1.39 5.22 8.76 2.88e-15 2.71e-12 24.2 LGALS3 CSC
## 7 ENSG00000042493 1.38 5.74 8.52 1.22e-14 7.96e-12 22.8 CAPG CSC
## 8 ENSG00000081237 0.977 6.05 8.45 1.89e-14 1.09e-11 22.3 PTPRC CSC
## 9 ENSG00000120594 0.784 7.62 8.11 1.31e-13 5.34e-11 20.4 PLXDC2 CSC
## 10 ENSG00000177119 0.844 6.52 8.00 2.59e-13 9.26e-11 19.8 ANO6 CSC
## # … with 148 more rows
# 158 overlap
#matrix(c(4455, 112, 287, nrow(als_res$CSC) ), nrow = 2 )
setEnrichment(set1 = filter(als_res$CSC, adj_p_val < 0.05) %>% pull(genename),
set2 = sp_sig$gene,
universe = nrow(als_res$CSC)
)
## # A tibble: 1 x 5
## set1_length set2_length overlap ratio p.value
## <int> <int> <int> <dbl> <dbl>
## 1 7349 287 153 2.84 3.27e-18
#filter(als_res$LSC, genename %in% sp_sig_genes) %>% filter(adj_p_val < 0.05)
#filter(als_res$TSC, genename %in% sp_sig_genes) %>% filter(adj_p_val < 0.05)
<-
csc_prot filter(als_res$CSC, genename %in% c(sp_sig_genes, csf_sig_genes) ) %>%
filter(adj_p_val < 0.05) %>%
left_join(sp_sig, by = c("genename" = "gene") ) %>%
rename(sp_lfc = lfc, sp_p = pval) %>%
left_join(csf_sig, by = c("genename" = "gene")) %>%
rename(csf_lfc = lfc, csf_p = pval) %>%
mutate(darmanis = darmanis_gsea$term_id[match(genename, darmanis_gsea$gene)]) %>%
mutate(mathys = mathys_gsea$term_id[match(genename, mathys_gsea$gene)]) %>%
mutate(kelley = kelley_gsea$term_id[match(genename, kelley_gsea$gene)]) %>%
mutate(panglao = panglao$term_id[match(genename, panglao$gene)]) %>%
mutate(nxp = nxp_gsea$term_id[match(genename, nxp_gsea$gene)])
292 peptides (297 unique genes) are differentially expressed between ALS and control in spinal cord tissue, of which 158, 107 and 19 are DEGs in spinal cord RNA-seq
of the 158 CSC overlapping genes, 16 (91%) go in the opposite direction
#library(ggrepel)
#library(patchwork)
<- function(prot, rna, mytitle, highlight = c("CHIT1", "GPNMB", "IQGAP2", "LYZ", "MOBP", "PEX5L", "APOE", "SERPINA3", "UCHL1"), cross_zero = TRUE){
compare_rna_prot_plot <-
to_plot inner_join(prot, rna, by = c("gene"= "genename"), suffix = c(".prot", ".gene") ) %>%
filter(adj_p_val < 0.05)
print(table(rna = to_plot$lfc > 0, prot = to_plot$log_fc > 0))
<- to_plot %>%
p ggplot(aes(y = lfc, x = log_fc, label = gene)) +
geom_point(size = 1, colour = "black") +
geom_point(size = 0.8, colour = "purple") +
geom_text_repel(data = filter(to_plot, gene %in% highlight),max.overlaps = 10, size = 2.5, fontface = "italic", min.segment.length = unit(x = 0, units = "lines")) +
labs(y = expression(~log[2]~"(Protein)"), x = expression(~log[2]~"(mRNA)"), title = mytitle) +
geom_abline(slope = 1, intercept = 0, linetype = 3) +
theme_jh() +
theme(panel.border = element_blank(),
axis.ticks = element_line(colour = "black"),
plot.title = element_text(hjust = 0, size = 8)
+
) scale_x_continuous(n.breaks = 3, breaks = waiver()) +
scale_y_continuous(n.breaks = 3, breaks = waiver())
if(cross_zero == TRUE){
<- p + geom_hline(yintercept = 0, linetype = 3) + geom_vline(xintercept = 0, linetype = 3)
p
}return(p)
}
## main figure!
#library(ggrepel)
<-
cervical_compare_plot compare_rna_prot_plot(prot = sp_sig, rna = als_res$CSC, "Spinal Cord" ) +
compare_rna_prot_plot(prot = csf_sig, rna = als_res$CSC, "Cerebrospinal Fluid", cross_zero = FALSE) +
plot_layout(ncol = 2)
## prot
## rna FALSE TRUE
## FALSE 40 13
## TRUE 3 97
## prot
## rna FALSE TRUE
## TRUE 1 16
cervical_compare_plot
ggsave(cervical_compare_plot, filename = here::here("ALS_SC/plots/cervical_rna_protein_compare.pdf"), width = 4.5, height = 2.2)
## supplement
<-
lumbar_compare_plot compare_rna_prot_plot(prot = sp_sig, rna = als_res$LSC, "Spinal Cord") +
compare_rna_prot_plot(prot = csf_sig, rna = als_res$LSC, "Cerebrospinal Fluid") +
plot_layout(ncol = 2)
## prot
## rna FALSE TRUE
## FALSE 28 8
## TRUE 2 64
## prot
## rna TRUE
## FALSE 1
## TRUE 10
ggsave(lumbar_compare_plot, filename = here::here("ALS_SC/plots/lumbar_rna_protein_compare.pdf"), width = 6, height = 3)
# put Cervical in main figure
# put Lumbar in supplement
# make table with fold changes of protein and RNA
<- bind_rows(
rna_prot_df %>%
csf_sig mutate(cohort = "CSF_protein"),
%>% mutate(cohort = "SC_protein"),
sp_sig $CSC %>% filter(adj_p_val < 0.05) %>%
als_resselect(gene = genename, lfc = log_fc, pval = p_value) %>%
mutate(cohort = "Cervical_mRNA"),
$LSC %>% filter(adj_p_val < 0.05) %>%
als_resselect(gene = genename, lfc = log_fc, pval = p_value) %>%
mutate(cohort = "Lumbar_mRNA")
%>%
) filter( gene %in% .$gene[duplicated(.$gene)] & gene %in% c(csf_sig$gene, sp_sig$gene) ) %>%
filter( gene != "SOD2") # weird duplicates
<- rna_prot_df %>%
rna_prot_df_wide pivot_wider(names_from = cohort,
values_from = c(lfc, pval),
names_glue = "{cohort}.{.value}", names_sort = TRUE,
%>%
) select(gene, starts_with("SC"), starts_with("CSF"), starts_with("Cervical"), starts_with("Lumbar") ) %>%
arrange((SC_protein.pval))
#View(rna_prot_df_wide)
write_tsv(rna_prot_df_wide, file = here::here("ALS_SC/tables/oeckl_protein_comparison.tsv"))
<- function(data, p = 0.05){
rank_oeckl <- data %>%
genes filter( !grepl(";|,", gene), !is.na(lfc), pval < p ) %>%
filter(!duplicated(gene) ) %>%
select(gene,lfc) %>%
distinct() %>%
arrange(desc(lfc))
<- setNames(genes$lfc, genes$gene)
ranked
}
<-
nxp_compare %>%
nxp_gsea left_join( inner_join(sp, csf, by = "gene",suffix = c(".sp", ".csf") ),
by = "gene")
%>%
nxp_compare filter(pval.sp < 0.05) %>%
ggplot(aes(x = term_id, y = lfc.sp)) + geom_jitter(aes(colour = pval.sp < 0.05), width = 0.25) +
geom_text_repel( aes(label = gene))
%>%
nxp_compare filter(pval.csf < 0.05) %>%
ggplot(aes(x = term_id, y = lfc.csf)) + geom_jitter(aes(colour = pval.csf < 0.05), width = 0.25) +
geom_text_repel(data = filter(nxp_compare, pval.csf < 0.05), aes(label = gene))
<- rank_oeckl(sp, p = 1) # 1201 genes
sp_ranked <- rank_oeckl(csf, p = 1) #392 genes
csf_ranked
<- map_df(list("CSF" = csf_ranked, "Spinal Cord" = sp_ranked ), ~{
oeckl_nxp_gsea_res as.data.frame(GSEA(.x, TERM2GENE = nxp_gsea,pvalueCutoff = 1,eps = 0))
.id = "set")
},
gsea_plot(oeckl_nxp_gsea_res)
<- map_df(list("CSF" = csf_ranked, "Spinal Cord" = sp_ranked ), ~{
oeckl_kelley_gsea_res as.data.frame(GSEA(.x, TERM2GENE = kelley_gsea,pvalueCutoff = 1,eps = 0))
.id = "set")
},
gsea_plot(oeckl_kelley_gsea_res)
<- map_df(list("CSF" = csf_ranked, "Spinal Cord" = sp_ranked ), ~{
oeckl_activation_gsea_res as.data.frame(GSEA(.x, TERM2GENE = activation_gsea,pvalueCutoff = 1,eps = 0))
.id = "set")
},
gsea_plot(oeckl_activation_gsea_res)
<- load_results(here::here("data/differential_expression/ALS/Model_8_limma_res_Mar2022.RData"))
dur_res <- make_gene_sets(dur_res)
dur_genes $lfc$TSC <- NULL
dur_genes
<- c("CHIT1", "C3", "GPNMB", "CHRNA1", "PON3", "DEFA1","MNX1")
dur_anno <-
duration_volcano volcano_plot(dur_res$CSC, title = "Cervical Spinal Cord", subtitle = "139 ALS",
annotate_by = dur_anno, type = "duration") +
# volcano_plot(dur_res$TSC, title = "Thoracic Spinal Cord", subtitle = "42 ALS",
# annotate_by = c("CHIT1", "C3", "LYZ", "GPNMB", "MOBP", "GFAP"),type = "duration") +
volcano_plot(dur_res$LSC, title = "Lumbar Spinal Cord", subtitle = "122 ALS",
annotate_by = dur_anno,type = "duration")
#duration_volcano
ggsave(plot = duration_volcano, filename = here::here("ALS_SC/plots/duration_volcano.pdf"), width = 5, height = 2.2)
Cell types and activation
<-
dur_activation_gsea_res map_df(dur_genes$lfc, ~{
as.data.frame(GSEA(.x, TERM2GENE = activation_gsea,pvalueCutoff = 1, minGSSize = 5, eps = 0))
.id = "set")
},
gsea_plot(dur_activation_gsea_res)
gsea_box_plots(dur_res, markers = activation_sets)
<- map_df(dur_genes$lfc, ~{
dur_kelley_gsea_res as.data.frame(GSEA(.x, TERM2GENE = kelley_gsea,pvalueCutoff = 1, minGSSize = 5, eps = 0))
.id = "set")
},
gsea_box_plots(dur_res, kelley)
<- map_df(dur_genes$lfc, ~{
dur_panglao_gsea_res as.data.frame(GSEA(.x, TERM2GENE = panglao,pvalueCutoff = 1, minGSSize = 5, eps = 0))
.id = "set")
},
gsea_plot(dur_panglao_gsea_res)
gsea_box_plots(dur_res, panglao)
<- map_df(dur_genes$lfc, ~{
dur_nxp_gsea_res as.data.frame(GSEA(.x, TERM2GENE = nxp_gsea,pvalueCutoff = 1, minGSSize = 5, eps = 0))
.id = "set")
},
gsea_plot(dur_nxp_gsea_res)
<- map_df(dur_genes$lfc, ~{
dur_mathys_gsea_res as.data.frame(GSEA(.x, TERM2GENE = mathys_gsea,pvalueCutoff = 1, minGSSize = 5, eps = 0))
.id = "set")
},
<- gsea_plot(dur_mathys_gsea_res)
dur_mathys_gsea_plot
<- gsea_plot(dur_activation_gsea_res)
dur_activation_gsea_plot
<-
dur_gsea_multiplot +
dur_mathys_gsea_plot +
dur_activation_gsea_plot plot_layout(ncol = 2, guides = "collect") #&
# guides(fill = "none")
dur_gsea_multiplot
ggsave(plot = dur_gsea_multiplot, filename = here::here("ALS_SC/plots/dur_cell_activation.pdf"), width = 4, height = 2)
<- read_tsv(here::here("data/deconvolution/Mathys_MuSiC_residual_results.tsv"))
deconv_res
<- data.frame(short = c("Ast","End", "Mic", "Ex", "Oli", "Per" ),
conversion_table long = c("Astrocytes", "Endothelial", "Microglia", "Neurons", "Oligos", "Pericytes")
)
<- deconv_res %>%
mathys_music_df inner_join(conversion_table, by = c("cell" = "short") ) %>%
select(-cell) %>%
rename(cell = long) %>%
select(sample, disease, duration, cell, tissue, deconv, resid, p_nom, p_resid)
<-
duration_deconv_cor_df %>%
mathys_music_df filter( tissue == "Cervical_Spinal_Cord", disease == "ALS") %>%
mutate(duration = as.numeric(duration)) %>%
split(.$cell) %>%
map_df( ~{ cor.test(.x$deconv, .x$duration, method = "spearman" ) %>%
::tidy() }, .id = "cell") %>%
broommutate(padj = p.adjust(p.value, method = "bonferroni")) %>%
mutate(cor = paste0("R = ",signif(estimate, digits = 2)) ) %>%
mutate(padj_label = case_when(
< 0.0001 ~ "***",
padj < 0.001 ~ "**",
padj < 0.05 ~ "*",
padj TRUE ~ "."
))
<-
duration_deconv_plot %>%
mathys_music_df filter( tissue == "Cervical_Spinal_Cord", disease == "ALS") %>%
mutate(duration = as.numeric(duration)) %>%
filter(!is.na(duration)) %>%
left_join(duration_deconv_cor_df, by = "cell" ) %>%
mutate(cell = paste0(cell, "\n", padj_label)) %>%
ggplot(aes(x = duration, y = deconv)) +
rasterise(
geom_point(size = 0.3, colour = "blue" ),
dpi = 300
+
)geom_smooth(method = "lm", colour = "black") +
#geom_boxplot(fill = NA,notch = FALSE, na.rm = TRUE, outlier.color = NA) +
facet_wrap( ~ cell, scales = "free_y", nrow = 2 ) +
#scale_colour_manual(values = c("#4F8FC4", "#B61927")) +
labs( x = "", y = "" ) +
guides(fill = FALSE, colour = FALSE) +
theme_jh() +
theme(
plot.title = element_blank(),
strip.background = element_blank(),
#strip.switch.pad.grid = unit(0,units ="pt"),
#panel.spacing.x = unit(0,units = "pt"),
strip.placement = "outside",
panel.border = element_blank(),
strip.text.x = element_text( face = 'plain', hjust = 0.5, vjust = 1, margin = margin(t =5, b = 0, unit = "pt")),
strip.text.y.left = element_text(angle = 0)#,
#plot.margin = margin()
+
) labs(y = "") +
scale_y_continuous(labels = scales::percent_format(accuracy = 1) ) +
::stat_cor(aes(label = ..r.label..), method = "spearman", label.y.npc = 1, label.x.npc = 0.33, size = 3)
ggpubr
duration_deconv_plot
ggsave( plot = duration_deconv_plot,filename = here::here("ALS_SC/plots/dur_deconvolution.pdf"), width = 4.5, height = 3.5 )