Input data
Just keep ALS samples
Protein-coding genes only
Regress out technical metrics and RIN
Perform WGCNA
Correlate MEs with: age of onset age at death tissue sex C9orf72 vs non-C9 Motor Onset
NEW FOR REVISIONS Correlate modules with cell-type proportions!
<- read_tsv(here::here("data/differential_expression/ALS/ALS_all_differential_expression_support.tsv"))
support <- read_tsv(here::here("data/differential_expression/ALS/ALS_all_differential_expression_tech_metrics.tsv"))
tech_df
<- left_join(support,tech_df, by = "sample")
support
# keeps all 303 ALS samples!
<- filter(support, disease == "ALS") %>%
support mutate(duration = as.numeric(duration) ) %>%
mutate(duration = replace_na(duration, median(duration, na.rm = TRUE))) %>%
mutate(motor_onset = ifelse( motor_onset %in% c("Bulbar", "Limb"), motor_onset, "Other" )) %>%
mutate(tissue = case_when(tissue == "Cervical_Spinal_Cord" ~ "CSC",
== "Lumbar_Spinal_Cord" ~ "LSC",
tissue == "Thoracic_Spinal_Cord" ~ "TSC"
tissue %>%
)) mutate(onset = age - (duration / 12))
## Warning in mask$eval_all_mutate(quo): NAs introduced by coercion
# STMN2 splicing
<- read_tsv("/Users/Jack/google_drive/Work/Mount_Sinai/STMN2-splicing/data/STMN2_NYGC_RNA_data.tsv")
stmn2
$tSTMN2 <- stmn2$tSTMN2_TPM[match(support$sample, stmn2$sample)]
support$tSTMN2[is.na(support$tSTMN2)] <- 0
support
row.names(support) <- support$sample
## Warning: Setting row names on a tibble is deprecated.
# one-hot encode each categorical variable
# model.matrix will one-hot encode any categorical variable
<-
support_onehot as.data.frame(
model.matrix(~ disease_c9 + sex + tissue + motor_onset + prep + site, data = support)
%>%
) rownames_to_column(var = "sample") %>%
left_join(support[, c("sample","rin","age", "onset", "duration", "tSTMN2")], by = "sample") %>%
select( -"(Intercept)") %>%
column_to_rownames(var = "sample")
voom TMM normalisation
if(rerun){
load(here::here("data/feb_2020/gene_counts_no_tags.RData") )
load(here::here("data/feb_2020/gene_tpm_no_tags.RData") )
<- genes_counts[,support$sample]
counts_loc <- genes_tpm[,support$sample]
tpm_loc
rm(genes_counts)
#
#isexpr <- rowSums(cpm(counts_loc)> 1 ) >= 0.5 * ncol(counts_loc)
# new for 2022
<- rowSums(tpm_loc > 0 ) >= 0.5 * ncol(counts_loc)
isexpr
# create data structure with only expressed genes
<- counts_loc[isexpr,]
counts_loc
print( paste0(" * Keeping ", nrow(counts_loc), " expressed genes"))
# keep only protein-coding genes
<- read_tsv("~/GENCODE/gencode.v30.gene.type.tsv.gz") %>%
gencode_gene_types filter(type == "protein_coding") %>%
mutate(gene_id = gsub("\\.[0-9]+", "", gene_id))
#library(biomaRt)
# ensembl <- useMart(biomart="ENSEMBL_MART_ENSEMBL")#, dataset="hsapiens_gene_ensembl")
#
# bm_res <- biomaRt::getBM(attributes=c("ensembl_gene_id","gene_biotype"),
# filters = c("ensembl_gene_id","biotype"),
# values = list(row.names(counts_loc),"protein_coding"),
# mart=ensembl)
#
<- counts_loc[ row.names(counts_loc) %in% gencode_gene_types$gene_id ,]
counts_loc
print( paste0(" * Keeping ", nrow(counts_loc), " protein-coding genes"))
<- DGEList(counts_loc)
gExpr # Perform TMM normalization
<- calcNormFactors(gExpr,method = "TMM")
gExpr # Specify variables to be included in the voom() estimates of
# uncertainty.
# Recommend including variables with a small number of categories
# that explain a substantial amount of variation
<- model.matrix( ~ factor(site) + factor(prep), support)
design
<- voom(gExpr, design = design)$E
counts_voom
#support$disease <- factor(support$disease, levels = c("Control", "ALS"))
# mod0 <- model.matrix( ~ 1, support)
# mod <- model.matrix( ~ age + duration + tissue + motor_onset + disease_c9, support)
#
#nsv <- num.sv(counts_voom, mod, method = "be")
# SVA Network
#print(paste0("SVA Network with ", nsv, " SVs"))
#counts_voom_sva <- t(sva_network(counts_voom, nsv))
# removeBatchEffects
<-
covariate_df ::select(support, rin, pct_mrna_bases, median_3prime_bias, median_5prime_bias, pct_chimeras, gPC1, gPC2, gPC3, gPC4, gPC5, pct_ribosomal_bases, pct_intergenic_bases)
dplyr
# for Mar 2022
<-
covariate_df ::select(support, rin, pct_mrna_bases, prep, gPC1, gPC2, gPC3, gPC4, gPC5) %>%
dplyrmutate(prep = as.numeric(as.factor(prep)) )
#"~ disease + sex + rin + rin_squared + age + age_squared + prep + site + pct_mrna_bases + gPC1 + gPC2 + gPC3 + gPC4 + gPC5 "
row.names(covariate_df) <- support$sample
<- t(removeBatchEffect(counts_voom, batch = support$site, batch2 = support$tissue, covariates = covariate_df ))
counts_voom_rbe
<- t(counts_voom)
counts_voom
# PCA
<- prcomp(counts_voom, center = TRUE, scale. = TRUE)
pca_voom
#pca_sva <- prcomp(counts_voom_sva, center = TRUE, scale. = TRUE)
<- prcomp(counts_voom_rbe, center = TRUE, scale. = TRUE)
pca_rbe
<- autoplot(pca_voom, colour = "site", data =support, shape = "prep") + labs(title = "Voom normalised") + theme_jh()
voom_plot
<- autoplot(pca_rbe, colour = "site", data =support, shape = "prep") + labs(title = "Voom normalised + Site and technical metrics regressed")+ theme_jh()
rbe_plot
# sva_plot <- autoplot(pca_sva, colour = "site",data =support) + labs(title = "Voom + SVA normalised")+ theme_jh()
print(voom_plot)
print(rbe_plot)
#print(sva_plot)
save(counts_voom_rbe, rbe_plot, file = here::here("data/WGCNA/ALS_SC_Voom_RBE_Mar2022.RData"))
#save(counts_voom_sva, sva_plot, file = here::here("data/WGCNA/ALS_SC_Voom_SVA.RData"))
save(counts_voom, voom_plot, file = here::here("data/WGCNA/ALS_SC_Voom_Mar2022.RData"))
else{
}load(here::here("data/WGCNA/ALS_SC_Voom_RBE_Mar2022.RData"))
# load(here::here("data/WGCNA/ALS_SC_Voom_SVA.RData"))
# load(here::here("data/WGCNA/ALS_SC_Voom_Mar2022.RData"))
}
March 2022: 16,922 protein coding genes with median TPM > 0
# one function to rule them all
<- function(datExpr, prefix = "test", deep_split = 4, show_plots = FALSE){
run_WGCNA library(WGCNA)
library("flashClust")
= 8
nthreads allowWGCNAThreads(nthreads)
# power threshold
# Choose a set of soft-thresholding powers
= c(c(1:10), seq(from = 12, to=20, by=2))
powers # Call the network topology analysis function
= pickSoftThreshold(datExpr, powerVector = powers, verbose = 5)
sft # Scale-free topology fit index as a function of the soft-thresholding power
= 0.9
cex1 #png(paste0(out_folder, "power_threshold.png"), width = 4, height = 5, res = 300, units = "in")
if(show_plots){
pdf(paste0(out_folder, prefix, "_soft_power.pdf"))
plot.new()
plot(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2],
xlab="Soft Threshold (power)",ylab="Scale Free Topology Model Fit,signed R^2",type="n",
main = paste("Scale independence"))
text(sft$fitIndices[,1], -sign(sft$fitIndices[,3])*sft$fitIndices[,2], labels=powers,cex=cex1,col="red")
# this line corresponds to using an R^2 cut-off of h
abline(h=0.90,col="red")
#dev.off()
}
if( is.na(sft$powerEstimate) ){
print(" * No power found that raises R^2 above 90%. Picking the lowest power above 80%")
$powerEstimate <- sft$fitIndices[ sft$fitIndices$SFT.R.sq >= 0.8,]$Power[1]
sft
}
print(paste0(" * Using Power Estimate: ", sft$powerEstimate))
if(show_plots){
# Mean connectivity as a function of the soft-thresholding power
pdf(paste0(out_folder, prefix, "_connectivity.pdf"))
plot(sft$fitIndices[,1], sft$fitIndices[,5],
xlab="Soft Threshold (power)",ylab="Mean Connectivity", type="n",
main = paste("Mean connectivity"))
text(sft$fitIndices[,1], sft$fitIndices[,5], labels=powers, cex=cex1,col="red")
dev.off()
}# change based on previous plots
= sft$powerEstimate
softPower
<- adjacency(datExpr, power = softPower, type = "signed")
adjacency <- TOMsimilarity(adjacency, TOMType="signed")
TOM <- 1-TOM
dissTOM <- flashClust(as.dist(dissTOM), method="average")
geneTree #png(paste0(out_folder, "TOM_adjacency_signed.png"), width = 20, height = 10, res = 300, units = "in")
if(show_plots){
pdf(paste0(out_folder, prefix, "_gene_tree.pdf"))
plot(geneTree, xlab="", sub="",
main= "Gene Clustering on TOM-based dissimilarity", labels= FALSE, hang=0.04)
dev.off()
}# "We like large modules" - Tutorial, so we set the minimum module size relatively high:
= 50
minModuleSize # Module identification using dynamic tree cut:
= cutreeDynamic(dendro = geneTree, distM = dissTOM,
dynamicMods deepSplit = deep_split, pamRespectsDendro = FALSE,
minClusterSize = minModuleSize);
= data.matrix(table(dynamicMods))
clusters_size = cbind(rownames(clusters_size),clusters_size)
clusters_size colnames(clusters_size) = c("cluster","size")
#write.table(file = paste0( out_folder, "ALS_SC_WGCNA_sva_clusters_sizes.txt"), x = clusters_size, row.names = FALSE)
# Convert numeric lables into colors
= labels2colors(dynamicMods)
dynamicColors = data.matrix(table(dynamicColors))
clusters_colors = cbind(rownames(clusters_colors), clusters_colors)
clusters_colors colnames(clusters_colors) = c("color","size")
#write.table(file = paste0( out_folder, "FTD_WGCNA_sva_clusters_colors1.txt"), x = clusters_colors, row.names = F)
= table(dynamicColors)
mytable <- as.data.frame(t(as.matrix(unclass(mytable))))
colour_table
<- ncol(colour_table)
n_module_pre
print(paste0( " * Pre-clustering: ", n_module_pre," modules!") )
hist(colour_table[1,])
if(show_plots){
pdf(paste0(out_folder, prefix, "_color_dendro_pre_filter.pdf"))
plotDendroAndColors(geneTree, dynamicColors, "Dynamic Tree Cut",
dendroLabels = FALSE, hang = 0.03,
addGuide = TRUE, guideHang = 0.05,
main = "Gene dendrogram and module colors")
dev.off()
}# Calculate eigengenes
= moduleEigengenes(datExpr, colors = dynamicColors)
MEList = MEList$eigengenes
MEs # Calculate dissimilarity of module eigengenes
= 1-cor(MEs)
MEDiss # Cluster module eigengenes
= hclust(as.dist(MEDiss), method = "average")
METree # Plot the result
#sizeGrWindow(7, 6)
#png(paste0(out_folder, "Tree_eigengenes.png"), width = 12, height = 8, res = 300, units = "in")
#We choose a height cut of 0.25, corresponding to correlation of 0.75, to merge
= 0.25
MEDissThres
if(show_plots){
pdf(paste0(out_folder, prefix, "_clistering.pdf"))
plot(METree, main = "Clustering of module eigengenes",
xlab = "", sub = "")
# Plot the cut line into the dendrogram
abline(h=MEDissThres, col = "red")
dev.off()
}## MERGE MODULES
# Call an automatic merging function
<- mergeCloseModules(datExpr, dynamicColors, cutHeight = MEDissThres, verbose = 3)
merge # The merged module colors
= merge$colors;
mergedColors # Eigengenes of the new merged modules:
= merge$newMEs
mergedMEs #sizeGrWindow(12, 9)
#png(paste0(out_folder, "Merge_modules.png"), width = 16, height = 8, res = 300, units = "in")
if(show_plots){
pdf(paste0(out_folder, prefix, "_color_dendro_post_filter.pdf"))
plotDendroAndColors(geneTree, cbind(dynamicColors, mergedColors),
c("Dynamic Tree Cut", "Merged dynamic"),
dendroLabels = FALSE, hang = 0.03,
addGuide = TRUE, guideHang = 0.05)
dev.off()
}<- length(unique(mergedColors))
n_module_post
print(paste0( " * Post-clustering:", n_module_post," modules!") )
<- mergedColors
moduleColors # Construct numerical labels corresponding to the colors
<- c("grey", standardColors(50));
colorOrder <- match(moduleColors, colorOrder)-1;
moduleLabels = mergedMEs
MEs
<- colnames(datExpr)
gene_ids
<- as.data.frame(cbind(gene_ids, moduleColors))
gene_modules <- gene_modules %>% select(gene = gene_ids, module = moduleColors)
gene_modules $genename <- gene_meta$genename[ match(gene_modules$gene, gene_meta$geneid)]
gene_modules
# Save module colors and labels for use in subsequent parts
save(MEs, mergedMEs, moduleColors, geneTree, gene_modules, file = paste0(out_folder, prefix, "_WGCNA_eigengenes.RData"))
save(x = adjacency, file = paste0(out_folder, prefix, "_adjacency.Rdata"))
}
# test_data <- t(counts_voom)[1:10, 1:5000]
# run_WGCNA(test_data, prefix = "test")
#
# prefix <- "test"
# load(paste0(out_folder, prefix, "_WGCNA_eigengenes.RData"))
if(rerun){
# default deepSplit is 4
#run_WGCNA(counts_voom, prefix = "model1")
# run_WGCNA(counts_voom_rbe, prefix = "model2", show_plots = TRUE)
run_WGCNA(counts_voom_rbe, prefix = "model2a", show_plots = TRUE)
#run_WGCNA(counts_voom_sva, prefix = "model3")
# run_WGCNA(counts_voom_sva, prefix = "model4", deep_split = 3)
# run_WGCNA(counts_voom_rbe, prefix = "model5", deep_split = 3)
}
Correlate modules with clinical traits Overlap with cell type markers Overlap with glial activation lists Overlap with DEGs from the ALS vs Control models.
# correlate traits in support_onehot with each module (spearman)
# report correlations and P-values
<- function(model, plot_return = NULL, return_object = FALSE, support_loc = support_onehot){
trait_assoc require(WGCNA)
load(paste0(out_folder, model, "_WGCNA_eigengenes.RData"))
<- MEs %>% rownames_to_column(var = 'sample') %>%
support_mes left_join(support, by = "sample")
message(paste0(" * ", ncol(MEs), " Modules!" ) )
#nGenes = ncol(datExpr)
= nrow(MEs)
nSamples
= cor(MEs, support_loc, use = "p", method = "spearman")
moduleTraitCor = corPvalueStudent(moduleTraitCor, nSamples)
moduleTraitPvalue
<-
module_trait_res left_join(
%>%
moduleTraitCor as.data.frame() %>%
rownames_to_column(var = "module") %>%
::gather( key = "trait", value = "correlation", -module),
tidyr
%>%
moduleTraitPvalue as.data.frame() %>%
rownames_to_column(var = "module") %>%
::gather( key = "trait", value = "p.value", -module),
tidyrby = c("module", "trait"))
<- unique(moduleColors)
all_colours names(all_colours) <- all_colours
<-
module_trait_res %>%
module_trait_res mutate( module = gsub("ME", "", module) )
return(module_trait_res)
}
# plotting module eigengenes
<- function(m, trait, xlabel = "", traits = support){
module_trait_plot stopifnot(trait %in% colnames(traits))
if( ! "sample" %in% names(traits) ){
<- rownames_to_column(traits, var = "sample")
traits
}<- MEs %>%
module_eigengenes rownames_to_column(var = "sample") %>%
::gather( key = "module", value = "ME", -sample) %>%
tidyrleft_join(traits, by = "sample") %>%
rename(module_name = module) %>%
left_join(colour_df, by = "module_name") %>%
filter(module_name == m)
if( is.numeric(module_eigengenes[[trait]] )){
<- "cont"
mode else{
}<- "categ"
mode
}
stopifnot(nrow(module_eigengenes) > 0)
<-
plot %>%
module_eigengenes ggplot( aes_string( x = trait , y = "ME") ) +
theme_classic() +
labs( x = xlabel, y = m) +
theme(axis.text = element_text(colour = "black"),
axis.title.y = element_text(colour = "black", angle = 0, size =12, face = "bold", vjust = 0.5),
axis.ticks = element_line(colour = "black")
#panel.border = element_rect(size = 0.6, colour = "black", fill = NA),
)
if(mode == "cont"){
<- plot +
plot geom_point(colour = "black", size = 1) +
geom_point(aes(colour = module_name), size = 0.8) +
geom_smooth(method = "lm", colour = "black", formula = y ~ x) +
scale_colour_manual(values = module_colour_order ) +
guides(colour = "none") +
stat_cor(aes(label = ..r.label..), method = "spearman", label.x.npc = 0.5, label.y.npc = 0.15)
# if(m == "M18"){
# plot <- plot +
# geom_point(colour = "black", size = 1) +
# geom_point(colour = "white", size = 0.8)
# }
}
if(mode == "categ"){
<- plot +
plot geom_jitter(width = 0.2, size = 0.5) +
geom_boxplot(aes(fill = module), outlier.colour = NA, notch = TRUE, colour = "black") +
scale_fill_manual(values = module_colour_order ) +
guides(fill = "none") +
::stat_compare_means()
ggpubr
}return(plot)
}
<- function(model, rerun = FALSE, term_set = "KEGG"){
module_go load(paste0(out_folder, model, "_WGCNA_eigengenes.RData"))
%>% group_by(module) %>% tally() %>% arrange( desc(n)) %>%
gene_modules ggplot(aes(x = n)) + geom_histogram()
<-
module_go %>%
gene_modules #filter( !module %in% c("blue","turquoise")) %>%
mutate(gene = as.character(gene), module = as.character(module) ) %>%
split(.$module) %>% purrr::map("genename")
<- paste0(out_folder, model, "_WGCNA_module_", term_set, ".tsv")
module_file
if(rerun | !file.exists(module_file)){
<- gprofiler2::gost(module_go, organism = "hsapiens", sources = term_set )$result
module_gprofile $parents <- NULL
module_gprofilewrite_tsv(module_gprofile, path = module_file )
else{
}<- read_tsv(module_file)
module_gprofile
}
<- module_gprofile %>% group_by(query) %>% tally() %>% nrow()
n_modules_w_kegg
<- ncol(MEs)
n_modules
print(paste0(" * N modules: ", n_modules))
print(paste0(" * N modules with ", term_set, " terms: ", n_modules_w_kegg))
%>% select(module = query, term = term_name, p = p_value, query_size, intersection_size, precision) %>% filter( intersection_size >= 10 ) %>%
module_gprofile mutate( p = signif(p, digits = 2), precision = signif(precision, digits = 2) )
}
## use lists of marker genes for fisher exact test overlaps with modules
<- function(model, set = "kelley"){
marker_gene_enrichment load(paste0(out_folder, model, "_WGCNA_eigengenes.RData"))
<- split(gene_modules, gene_modules$module) %>% purrr::map("genename")
module_gene_list stopifnot(set %in% c("kelley", "activation", "nxp", "DEGs", "Panglaodb", "mathys"))
<- c()
markers # marker_df is empty tibble - grow for each set to give annotations and order to data
<- tibble()
marker_df
if( "kelley" %in% set){
load(here::here("data/markers/kelley_markers.RData"))
<- c(markers, kelley)
markers <- tibble(set = "Cell-type markers\n(Kelley)", marker = names(kelley) )
kelley_df <- c(marker_df, list(kelley_df) )
marker_df
}if( "nxp" %in% set){
load(here::here("data/misc/neuroexpresso_spinal_cord_human.RData"))
<- c(nxp, markers)
markers <- tibble(set = "Cell-type markers\n(Neuroexpresso)", marker = names(nxp) )
nxp_df <- c(marker_df, list(nxp_df) )
marker_df
}
if( "activation" %in% set){
load(here::here("data/markers/activation_markers.RData"))
#sets <- c( "Disease-associated microglia", "Disease-associated astrocytes", "Reactive astrocytes - MCAO", "Reactive astrocytes - LPS", "Plaque-induced genes")
<- activation_sets#[ sets]
activation_markers
<- c(activation_markers, markers)
markers <- tibble(set = "Glial activation", marker = names(activation_markers) )
activation_df $marker <- factor(activation_df$marker)#, levels = sets)
activation_df<- c(marker_df, list(activation_df))
marker_df
}if( "mathys" %in% set){
<- data.frame(
mathys_conversion_table short = c("Ast","End", "Mic", "Ex", "Oli", "Per" ),
long = c("Astrocytes", "Endothelial", "Microglia", "Neurons", "Oligos", "Pericytes"),
mono = c("A","E","M","N","O","P")
)load(here::here("data/markers/Mathys_single_nucleus.RData"))
names(mathys) <- mathys_conversion_table$long[ match(names(mathys), mathys_conversion_table$short)]
<- mathys[ !is.na(names(mathys))]
mathys <- c(mathys, markers)
markers <- tibble(set = "Mathys", marker = names(mathys))
mathys_df <- c(marker_df, list(mathys_df) )
marker_df
}if( "Panglaodb" %in% set){
load( here::here("data/Panglaodb/PanglaoDB_markers_list.RData") )
<- pg_list[ c("Endothelial cells", "Ependymal cells", "Microglia", "Neurons", "Oligodendrocytes", "Pericytes", "Astrocytes")]
pg_list <- c(pg_list, markers)
markers
<- tibble(set = "Cell-type markers\n(Panglaodb)", marker = names(pg_list) )
pg_df <- c(marker_df, list(pg_df))
marker_df
}
if( "DEGs" %in% set){
load(file = here::here("data/differential_expression/ALS/ALS_Spinal_Cord_de_res_Mar2022.RData"))
<- de_res_final %>%
deg_markers map_df( ~{.x %>%
filter(adj_p_val < 0.05) %>% # abs(log_fc) > 1) %>%
mutate(direction = ifelse(log_fc > 0, yes = "Upregulated in ALS", no = "Downregulated in ALS" ) ) %>%
select(direction, genename) }, .id = "set"
%>%
) mutate(direction = factor(direction, levels = c("Upregulated in ALS", "Downregulated in ALS"))) %>%
split(.$direction) %>%
map(~{unique(.x$genename) })
<- tibble(set = "DEGs", marker = names(deg_markers))
deg_df
<- c(deg_markers, markers)
markers <- c(marker_df, list(deg_df) )
marker_df
}
<-
marker_enrich_df ::map_df( module_gene_list, ~{
purrr<- .x
mod ::map_df( markers, ~{
purrr<- .x
mark markerEnrichment(module = mod, marker = mark, universe = nrow(gene_modules) )
.id = "marker")
}, .id = "module") %>%
}, left_join( bind_rows(marker_df), by = "marker")
$padj <- p.adjust(p = marker_enrich_df$p.value, method = "bonferroni")
marker_enrich_dfreturn(marker_enrich_df)
}
# plot_marker_enrich <- function(marker_enrich_df){
# marker_enrich_df$module <- paste0(marker_enrich_df$module, " (", marker_enrich_df$module_length, ")")
# marker_enrich_df$marker <- paste0(marker_enrich_df$marker, "\n(", marker_enrich_df$marker_length, ")")
#
# marker_enrich_df %>%
# #mutate( ratio = ifelse(padj < 0.05, ratio, NA)) %>%
# filter( padj < 0.05) %>%
# ggplot( aes(x = marker, y = module)) +
# geom_tile( aes(fill = log(ratio) )) +
# geom_text( aes( label = signif(ratio,digits = 3) )) +
# scale_fill_distiller("log(odds ratio)", type = "seq", palette = 8, direction = 1, na.value = NA) + #, limits = c(1,4)) +
# xlab("") + ylab("") + theme_classic() +
# labs(title = "Marker gene enrichment") +
# scale_x_discrete(expand = c(0,0)) +
# scale_y_discrete(expand = c(0,0)) +
# theme(axis.text.x = element_text(angle = 45, hjust = 1) ) +
# facet_wrap(~set, scales = "free_x", strip.position = "bottom") +
# theme(strip.background = element_blank(), strip.placement = "outside", strip.text = element_text(face = "bold"))
# }
<- function(res, traits = trait_df$trait_label){
trait_plot # traits
%>%
res mutate(padj = p.adjust(p.value, method = "bonferroni")) %>%
mutate(trait_label = factor(trait_label, levels = rev(traits) )) %>%
mutate(label = ifelse(padj < 0.05, "*", "")) %>%
mutate(module = factor(module, levels = module_colour_order )) %>%
ggplot(aes(x = module, y = trait_label, fill = correlation)) +
geom_tile(colour = "black") +
geom_text(aes(label = label), nudge_y = -0.2, colour = "white") +
scale_fill_distiller(palette = "RdBu", na.value = "white") +
scale_x_discrete(expand=c(0,0)) +
scale_y_discrete(expand=c(0,0)) +
theme_classic() +
labs(x = "", y = "") +
facet_wrap(~1) +
theme(
plot.margin = margin(),
axis.line = element_blank(),
strip.text = element_blank(),
panel.border = element_rect(size = 0.6, colour = "black", fill = NA),
axis.text.x = element_blank(), axis.text.y = element_text(colour = "black"),axis.ticks.x = element_blank(),
legend.key.size = unit(0.25, 'cm'), legend.title = element_text(size = 5), legend.text = element_text(size =5)
)
}
<- function(res, marker_levels = NULL){
marker_plot # markers
if( !is.null(marker_levels) ) {
$marker <- fct_rev(factor(res$marker, levels = marker_levels))
reselse{
}$marker <- fct_rev(as.factor(res$marker) )
res
}
%>%
res arrange(desc(marker)) %>%
mutate(label = ifelse(padj < 0.05, "*", "")) %>%
mutate(p.value = ifelse(p.value < 1e-16, 1e-16, p.value)) %>% # threshold P values on 1e-16
mutate(module = factor(module, levels = module_colour_order)) %>%
ggplot(aes(x = module, y = marker, fill = -log10(p.value))) +
geom_tile(colour = "black") +
geom_text(aes(label = label), nudge_y = -0.2, colour = "white") +
#scale_fill_distiller(palette = "RdBu", na.value = "white") +
scale_x_discrete(expand=c(0,0)) +
scale_y_discrete(expand=c(0,0)) +
theme_classic() +
scale_fill_gradient(expression(-log[10]~p-value), low = "white", high = "darkblue") +
labs(x = "", y = "") +
facet_wrap(~1) +
theme(
plot.margin = margin(),
axis.line = element_blank(),
strip.text = element_blank(),
panel.border = element_rect(size = 0.6, colour = "black", fill = NA),
axis.text.x = element_blank(), axis.text.y = element_text(colour = "black"),axis.ticks.x = element_blank(),
legend.key.size = unit(0.25, 'cm'), legend.title = element_text(size = 5), legend.text = element_text(size =5)
)
}
March 2022 - median TPM > 0, regressinng out prep, pct_mrna, rin, site, gPCs - 23 modules
# load model
<- "model2a"
model load(paste0(out_folder, model, "_WGCNA_eigengenes.RData"))
# name modules by size
<- enframe(table(moduleColors), name = "module", value = "n") %>%
module_names arrange(n) %>%
mutate(module_name = paste0("M", 1:nrow(.)))
# cluster MEs by correlation
names(MEs) <- module_names$module_name[ match( gsub("ME", "", names(MEs)), module_names$module )]
<- MEs %>%
module_eigengene_supp_table rownames_to_column(var = "sample")
module_eigengene_supp_table
write_tsv(module_eigengene_supp_table, file = "ALS_SC/tables/networks/networks_eigengenes.tsv")
= 1-cor(MEs)
MEDiss # Cluster module eigengenes
= hclust(as.dist(MEDiss), method = "average")
module_clust
#module_clust <- hclust(dist(1 - cor(MEs)))
<- names(MEs)[module_clust$order]
module_name_order <- module_names$module[ match(module_name_order, module_names$module_name)]
module_colour_order
names(module_colour_order) <- module_name_order
save( module_colour_order, module_names, file = here::here(paste0("data/WGCNA/ALS_signed_deep_split_4/",model, "_WGCNA_module_names.RData")))
# write out gene modules
<- gene_modules %>% left_join( module_names, by = "module") %>%
gene_module_df select(module, module_name, genename, ensemblid = gene) %>%
arrange(module_name)
write_tsv(gene_module_df, here::here("ALS_SC/tables/networks/networks_module_genes.tsv") )
<- MEs %>% rownames_to_column(var = 'sample')
eigengene_df
# use de-identified sample IDs
<- read_tsv(here::here("ALS_SC/tables/all_samples_used_in_study.tsv"))
sample_deid
$sample_deid <- sample_deid$rna_id[match(eigengene_df$sample, sample_deid$external_sample_id)]
eigengene_df
<- dplyr::select(eigengene_df, sample_id = sample_deid, starts_with("M") )
eigengene_df
write_tsv(eigengene_df, here::here("ALS_SC/tables/networks/networks_eigengenes.tsv") )
Module membership
load(here::here("data/WGCNA/ALS_SC_Voom_RBE_Mar2022.RData"))
= as.data.frame(cor(counts_voom_rbe, MEs, use = "p"))
geneModuleMembership = as.data.frame(WGCNA::corPvalueStudent(as.matrix(geneModuleMembership), nrow(counts_voom_rbe)));
MMPvalue
save(geneModuleMembership, MMPvalue, file = here::here("data/WGCNA/module_membership.RData"))
## colours
<-
colour_df tibble( module = module_colour_order, module_name = names(module_colour_order) ) %>%
mutate(module = factor(module, levels = module_colour_order)) %>%
mutate(module_name = factor(module_name, levels = names(module_colour_order))) %>%
mutate(module_order = as.numeric(module))
<-
colour_plot %>%
colour_df ggplot(aes(x = module_name, y = 1)) + geom_tile(aes(fill = module_name) ) +
scale_fill_manual( values = module_colour_order) + #theme_void() +
guides(fill = FALSE) +
scale_y_continuous(expand = c(0,0)) +
scale_x_discrete(expand=c(0,0), position = "top") +
facet_wrap(~1) +
labs(x = "", y = "") +
theme_void() +
theme(axis.ticks = element_blank(),
strip.text = element_blank(),
strip.background = element_blank(),strip.placement = "inside",
panel.border = element_rect(size = 0.6, colour = "black", fill = NA),
axis.text.y = element_blank(),
axis.text.x.top = element_text(angle = 90,hjust = 0.5, vjust = 0.5, colour = "black", size = 8,margin = margin() ),
plot.margin = margin(b = 5, t = 0, unit = "pt") )
# module size plot
<- gene_module_df %>%
size_plot group_by(module_name) %>% tally() %>%
inner_join(colour_df) %>%
mutate(module_name = factor(module_name, levels = names(module_colour_order) )) %>%
ggplot(aes(x = module_name, y = 1)) +
geom_tile(aes(fill = n) ) +
#scale_fill_manual( values = module_colour_order) + #theme_void() +
#guides(fill = FALSE) +
scale_fill_gradient(low = "white", high = "black") +
scale_y_continuous(expand = c(0,0)) +
scale_x_discrete(expand=c(0,0), position = "top") +
facet_wrap(~1) +
labs(x = "", y = "", fill = "Module size") +
theme_void() +
theme(axis.ticks = element_blank(),
strip.text = element_blank(),
axis.text.x.top = element_blank(),
strip.background = element_blank(),strip.placement = "inside",
panel.border = element_rect(size = 0.6, colour = "black", fill = NA),
axis.text.y = element_blank(),
legend.key.size = unit(0.25, 'cm'),
legend.title = element_text(size = 5),
legend.text = element_text(size =5),
#axis.text.x.top = element_text(angle = 90,hjust = 0.5, vjust = 0.5, colour = "black", size = 8,margin = margin() ),
plot.margin = margin(b = 5, t = 0, unit = "pt") )
## dendrogram
<- as.dendrogram(module_clust)
dhc # Rectangular lines
<- dendro_data(dhc, type = "rectangle")
ddata
<-
dendro_plot ggplot(segment(ddata)) +
geom_segment(aes(x = x, y = y, xend = xend, yend = yend)) +
#geom_tile(data = colour_df, aes(x = 1:nrow(colour_df), y = 0, fill = module), colour = "black", size = 0.5 ) +
scale_fill_manual( values = module_colour_order) + theme_void() +
#ylim(-1,5) +
scale_x_continuous(expand = c(0, 0), limits = c(0.5, nrow(colour_df) + 0.5 )) +
guides(fill = FALSE) +
theme(plot.margin = margin() )
#marker_me_nxp <- marker_gene_enrichment("model2", set = c("nxp"))
#marker_me_panglao <- marker_gene_enrichment("model2", set = c("Panglaodb"))
<- marker_gene_enrichment(model, set = c("kelley"))
marker_me_kelley
<- marker_me_kelley %>%
marker_kelley_df left_join(module_names, by = "module") %>%
mutate( set = "Kelley et al. 2018") %>%
select(module, module_name, marker, module_length, marker_length, overlap, ratio, p.value, padj ) %>%
arrange(padj)
write_tsv(marker_kelley_df, here::here("ALS_SC/tables/networks/networks_kelley_markers_enrich.tsv") )
<- marker_gene_enrichment(model, set = c("mathys"))
marker_me_mathys
<- marker_me_mathys %>%
marker_mathys_df left_join(module_names, by = "module") %>%
mutate( set = "Mathys et al. 2020") %>%
select(module, module_name, marker, module_length, marker_length, overlap, ratio, p.value, padj ) %>%
arrange(padj)
write_tsv(marker_mathys_df, here::here("ALS_SC/tables/networks/networks_mathys_markers_enrich.tsv") )
<- tibble(
trait_df trait = c("age", "onset", "duration", "motor_onsetLimb", "sexMale", "disease_c9sALS", "tSTMN2"),
trait_label = c("Age at death", "Age at onset", "Disease duration", "Limb vs Bulbar onset", "Sex", "C9orf72", "tSTMN2")
)
<-
trait_me trait_assoc(model, return_object = TRUE) %>%
left_join(trait_df, by = "trait") %>%
left_join(module_names, by = "module") %>%
filter( !is.na(trait_label) ) %>%
mutate(padj = p.adjust(p.value, method = "bonferroni")) %>%
select(module, module_name, n, trait_label, correlation, p.value, padj) %>%
arrange(padj)
$n <- as.vector(trait_me$n)
trait_me
write_tsv(trait_me, here::here("ALS_SC/tables/networks/networks_trait_cor.tsv") )
<- trait_plot(trait_me) #, traits = c("duration", "motor_onsetLimb", "sexMale", "disease_c9sALS", "tSTMN2"))
m2_trait_plot
<- marker_plot(marker_me_kelley)
m2_marker_plot
#m2_marker_plot_panglao <- marker_plot(marker_me_panglao)
<- marker_gene_enrichment(model, set = c( "DEGs"))
deg_me
<- deg_me %>%
deg_me_df left_join(module_names, by = "module") %>%
#mutate( set = "") %>%
select(module, module_name, marker, module_length, marker_length, overlap, ratio, p.value, padj ) %>%
arrange(padj)
write_tsv(deg_me_df, here::here("ALS_SC/tables/networks/networks_deg_enrich.tsv") )
<- marker_plot(deg_me, marker_levels = c("Upregulated in ALS", "Downregulated in ALS"))
m2_deg_plot
<- marker_gene_enrichment(model, set = c("activation"))
marker_me_activation
<- marker_me_activation %>%
activation_me_df left_join(module_names, by = "module") %>%
#mutate( set = "") %>%
select(module, module_name, marker, module_length, marker_length, overlap, ratio, p.value, padj ) %>%
arrange(padj)
write_tsv(activation_me_df, here::here("ALS_SC/tables/networks/networks_activation_enrich.tsv") )
# for supplement
<- bind_rows(marker_mathys_df, activation_me_df)
all_marker_enrich_df write_tsv(all_marker_enrich_df , here::here("ALS_SC/tables/networks/networks_celltype_activation_enrich.tsv") )
# new for 2022 - marker enrichment using Mathys top 100
<- marker_gene_enrichment(model, set = c("mathys"))
marker_me_mathys <- marker_plot(marker_me_mathys)
m2_mathys_plot
<- marker_plot(marker_me_activation) #marker_levels = c( "Disease-associated microglia", "Disease-associated astrocytes", "Reactive astrocytes - MCAO", "Reactive astrocytes - LPS", "Plaque-induced genes") ) m2_activation_plot
Gene Ontology
curate modules into consensus pathways
# kegg_res <- module_go("model2", term_set = "KEGG", rerun = FALSE) %>% left_join(colour_df) %>%
# arrange(module_order)
<- module_go("model2a", term_set = "GO:BP", rerun = FALSE) %>% left_join(colour_df) %>%
go_res arrange(module_order) %>%
mutate(q = p.adjust(p, method = "bonferroni"))
## [1] " * N modules: 23"
## [1] " * N modules with GO:BP terms: 22"
<-
go_res_df %>%
go_res left_join(module_names, by = c("module", "module_name") )%>%
#mutate( set = "") %>%
select(module, module_name, go_term = term, n = query_size, overlap = intersection_size, precision, p.value = p, padj = q ) %>%
arrange(padj)
write_tsv(go_res_df, here::here("ALS_SC/tables/networks/networks_go_enrich.tsv") )
<- function(res, strings, module_order = module_colour_order){
go_grep <- tibble( module = module_colour_order )
p_df <- tibble( module = module_colour_order )
q_df for( i in 1:length(strings) ){
<- filter(res, grepl(strings[i], res$term ) ) %>% group_by(module) %>%
hit_modules summarise( q = min(q), p = min(p) )
names(strings)[i] ] <- hit_modules$p[match(p_df$module, hit_modules$module)]
p_df[, names(strings)[i] ] <- hit_modules$q[match(q_df$module, hit_modules$module)]
q_df[,
}<- p_df %>% pivot_longer( cols = !module, names_to = "term", values_to = "p.value")
p_df <- q_df %>% pivot_longer( cols = !module, names_to = "term", values_to = "q.value")
q_df
<- inner_join(p_df, q_df, by = c("term", "module"))
df return(df)
}
<- c(
go_strings "Extracellular matrix" = "extracellular matrix|adhesion",
#"Gene expression" = "gene expression|RNA processing|RNA metabol|transcription",
"Immune response" = "immune",
"Mitochondria" = "mitochondr|oxidative phosphorylation|electron transport chain" ,
"Myelination" = "myelination",
#"Cytoskeleton" = "cytoskeleton",
"Synaptic transmission"= "ynaptic",
"Translation" = "ribo|translation",
"Vasculature" = "vascula",
"Chromatin" = "chromatin|histone"
#"RNA splicing" = "RNA splicing",
#"Oxidative phosphorylation" = "xidative phos",
#"Cell cycle" = "cell cycle",
#"Microtubules" = "microtubule", # for M17 - ependymal cells
)
<-
go_plot go_grep( go_res, strings = go_strings) %>%
mutate(module = factor(module, levels = module_colour_order )) %>%
left_join(colour_df) %>%
mutate(p.value = ifelse(p.value < 1e-16, 1e-16, p.value)) %>%
mutate(term = fct_rev(as.factor(term))) %>%
mutate(label = ifelse( q.value < 0.05 & !is.na(q.value), "*", "" )) %>%
ggplot(aes(x = module_name, y = term, fill = -log10(p.value) )) +
geom_tile(colour = "black") +
geom_text( colour = "white", aes(label = label), nudge_y = -0.2) +
#guides(fill = FALSE) +
scale_x_discrete(expand = c(0,0)) +
scale_y_discrete(expand = c(0,0)) +
labs(x = "", y = "") +
facet_wrap(~1) +
scale_fill_gradient(low = "white", high = "darkred", na.value = "white") +
#scale_fill_manual(values = c("white", "red")) +
theme(axis.line = element_blank(),
strip.text = element_blank(),
panel.border = element_rect(size = 0.6, colour = "black", fill = NA),
axis.text.x = element_blank(),
axis.text.y = element_text(colour = "black"),
axis.ticks.x = element_blank(),
legend.key.size = unit(0.25, 'cm'),
legend.title = element_text(size = 5),
legend.text = element_text(size =5),
plot.margin = margin() )
go_plot
Bring it all together
<-
module_plot +
dendro_plot +
colour_plot +
size_plot +
m2_mathys_plot +
m2_activation_plot +
go_plot +
m2_deg_plot +
m2_trait_plot plot_layout(ncol = 1, heights = c(3,1,1, 6,5, 8, 2, 8), guides = "collect") +
plot_annotation(theme = theme(plot.margin = margin() ))
#module_plot
ggsave(module_plot, filename = paste0("ALS_SC/plots/WGCNA_", model, "_module_assoc_plot.pdf"), width = 6, height = 6.6)
Module eigengenes
<-
eigengene_plot module_trait_plot(m = "M8", trait = "duration", xlab = "Disease duration, months") +
module_trait_plot("M17", trait = "duration", "Disease duration, months") +
module_trait_plot("M20", trait = "tSTMN2", xlab = "tSTMN2 expression") +
module_trait_plot("M3", trait = "onset", xlab = "Age at symptom onset") +
#module_trait_plot("M37", "duration", "Disease duration, months") +
#module_trait_plot("M18", "duration", "Disease duration, months") +
#module_trait_plot("M11", "tSTMN2", "Truncated STMN2 expression") +
plot_layout(ncol = 1)
#eigengene_plot
ggsave(plot = eigengene_plot, filename = "ALS_SC/plots/WGCNA_eigengene_plot.pdf", width = 3, height = 8)
Using the modules as sets of genes, perform GSEA with the differentially expressed genes.
<- gene_modules %>%
module_gsea select(termid = module, gene = genename)
load(file = here::here("data/differential_expression/ALS/ALS_Spinal_Cord_de_res_Mar2022.RData"))
<-
gene_sets_tstat map(de_res_final, ~{
<- arrange(.x, desc(t) ) %>% filter(genename %in% module_gsea$gene)
.x <- .x$t
tstat names(tstat) <- .x$genename
return(tstat)
})
<-
gene_sets_lfc map(de_res_final, ~{
<-
.x %>%
.x filter(p_value < 0.05) %>%
arrange(desc(log_fc) ) %>% filter(genename %in% module_gsea$gene)
<- .x$log_fc
lfc names(lfc) <- .x$genename
return(lfc)
})
<- map_df(gene_sets_lfc, ~{
module_gsea_res as.data.frame(GSEA(.x, TERM2GENE = module_gsea,pvalueCutoff = Inf))
.id = "set") },
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (0.11% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are duplicate gene names, fgsea may produce unexpected results.
## Warning in fgseaMultilevel(...): For some pathways, in reality P-values are less than 1e-10. You can set the `eps` argument to zero for better estimation.
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (0.12% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are duplicate gene names, fgsea may produce unexpected results.
## Warning in fgseaMultilevel(...): For some pathways, in reality P-values are less than 1e-10. You can set the `eps` argument to zero for better estimation.
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are ties in the preranked stats (0.07% of the list).
## The order of those tied genes will be arbitrary, which may produce unexpected results.
## Warning in preparePathwaysAndStats(pathways, stats, minSize, maxSize, gseaParam, : There are duplicate gene names, fgsea may produce unexpected results.
## Warning in fgseaMultilevel(...): For some pathways, in reality P-values are less than 1e-10. You can set the `eps` argument to zero for better estimation.
<-
gsea_plot %>%
module_gsea_res full_join(colour_df, by = c("Description" = "module")) %>%
mutate(label = ifelse(p.adjust < 0.05, "*", "")) %>%
ggplot(aes(x = module_name, y = set, fill = NES)) +
geom_tile(colour = "black") +
geom_text( colour = "white", aes(label = label), nudge_y = -0.2) +
scale_fill_distiller(na.value = "lightgray", palette = "RdBu") +
scale_x_discrete(expand = c(0,0)) +
scale_y_discrete(expand = c(0,0)) +
labs(x = "", y = "") +
facet_wrap(~1) +
#scale_fill_manual(values = c("white", "red")) +
theme(axis.line = element_blank(),
strip.text = element_blank(),
panel.border = element_rect(size = 0.6, colour = "black", fill = NA),
axis.text.x = element_blank(),
axis.text.y = element_text(colour = "black"),
axis.ticks.x = element_blank(),
legend.key.size = unit(0.25, 'cm'),
legend.title = element_text(size = 5),
legend.text = element_text(size =5),
plot.margin = margin() )
##
%>%
module_gsea left_join(colour_df, by = c("termid" = "module")) %>%
#filter(module_name == "M3") %>%
left_join(de_res_final$CSC, by = c("gene" = "genename")) %>%
ggplot(aes(x = module_name, y = log_fc )) + geom_boxplot() +
geom_hline(yintercept = 0)
## Warning: Removed 40 rows containing non-finite values (stat_boxplot).
The module clustering correlates well with whether the genes in those modules were upregulated or downregulated in ALS vs control. Despite there not being any control samples in the analysis.
<- read_tsv(here::here("data/differential_expression/ALS/ALS_all_differential_expression_support.tsv")) %>%
de_support mutate(duration = as.numeric(duration))
## Warning in mask$eval_all_mutate(quo): NAs introduced by coercion
<- read_tsv(here::here("data/deconvolution/Mathys_MuSiC_residual_results.tsv"))
mathys_music_resid
<- data.frame(short = c("Ast","End", "Mic", "Ex", "Oli", "Per" ),
conversion_table long = c("Astrocytes", "Endothelial", "Microglia", "Neurons", "Oligos", "Pericytes")
)
<- mathys_music_resid %>%
deconv_df select(sample, cell, deconv) %>%
left_join(conversion_table, by = c("cell" = "short")) %>%
select(sample, cell = long, deconv)
<-
support_deconv %>%
deconv_df drop_na() %>%
pivot_wider(names_from = cell, values_from = deconv) %>%
column_to_rownames(var = "sample")
<- support_deconv[ row.names(support_onehot),]
support_deconv
<-
deconv_me trait_assoc(model, return_object = TRUE, support_loc = support_deconv) %>%
#left_join(trait_df, by = "trait") %>%
left_join(module_names, by = "module") %>%
#filter( !is.na(trait_label) ) %>%
mutate(padj = p.adjust(p.value, method = "bonferroni")) %>%
select(module, module_name, n, trait_label = trait, correlation, p.value, padj) %>%
arrange(padj)
<- trait_plot(deconv_me, traits = unique(deconv_df$cell))
m2_deconv_plot
<-
deconv_multiplot /
dendro_plot /
colour_plot /
m2_mathys_plot /
m2_deconv_plot
(module_trait_plot("M8","Astrocytes", traits = support_deconv, "% Astrocytes") |
module_trait_plot("M9", "Endothelial", traits = support_deconv,"% Endothelial")
/
) module_trait_plot("M17", "Microglia", traits = support_deconv,"% Microglia") |
( module_trait_plot(m = "M20", trait = "Neurons", traits = support_deconv, "% Neurons") ) /
module_trait_plot(m = "M12", trait = "Oligos", traits = support_deconv, "% Oligodendrocytes") |
(module_trait_plot("M15", "Pericytes", traits = support_deconv,"% Pericytes")
+
) plot_layout(nrow = 7, ncol = 1, heights = c(2,1,6,6,3,3,3))
ggsave(deconv_multiplot, filename = "ALS_SC/plots/WGCNA_module_deconv_multiplots.png", width = 6, height = 7, dpi = 600)
#&
# scale_x_continuous(labels = scales::percent_format(accuracy = 1,trim = FALSE)) & plot_layout(ncol = 3, nrow = 2))
Model 2 - voom normalised , batch and technical metrics removed
41 modules 28 modules have at least 1 KEGG term (70%), 39 with at least 1 GO BP term duration and tissue have most associations, motor onset and disease_C9 have none that pass multiple testing. One module is associated with age. - cyan and skyblue posiively associated w duration, cyan is enriched in astrocyte markers - orangered4 and green negatively associate w duration, both enriched in microglia - yellow is associated with tissue (higher in CSC) highly enriched with oligo terms - yellow is higher in men - darkslateblue and turquoise both altered between tissues, both enriched with neuron markers
This suggests that patients with longer disease duration have lower microglia and higher astrocyte as proportions It also suggests that the cell composition between ALS Cervical and Lumbar spinal cord is different, with more neurons and fewer microglia and oligodendrocytes in the LSC.
Using neuroexpresso markers instead of Kelley: mostly the same results, ivory and darkslateblue no longer significant.
Activation markers: green and orangered4 (microglia marker enriched) also enriched for microglia activation - no obvious separate activation module for microglia, although Plum1 is very strongly enriched for disease-associated microglia but is not itself associated with any trait. astrocyte activation is enriched in red and orangered4
Model 3 - voom normalized, SVA-network with a bunch of SVs
some modules are correlated with RIN, one strongly with sex, very litle with age, onset. Some modulesa associate with C9orf72.
Microglia modules: pink, navajowhite2, darkolivegreen
Astrocyte modules: darkolivegreen, navajo white, plum1, orange, steelblue, darkorange - astro and micro have overlapping modules!
Neuron modules - lots - darkseagreen, floralwhite, lightsteelblue1
Oligo modules: blue, plum, royalblue, red
Tissue effects: floralwhite and green - neither enriched with cell type, nor with KEGG terms
Sex affects: floralwhite, green and honeydew1 - same as above
–>