ALS deconvolution

work_dir = here::here("data/collate_samples/")

plots_folder = here::here("docs/ALS_deconvolution/plots/")
if(!dir.exists(plots_folder)){dir.create(plots_folder)}
#deconv_github = "/Users/jack/software/CortexCellDeconv/"
deconv_github <- "/Users/Jack/google_drive/Work/Mount_Sinai/Misc/CortexCellDeconv/"

Input our expression data

Don’t apply any normalising or scaling

load(here::here("data/feb_2020/gene_counts_no_tags.RData") )

metadata <- read_tsv( here::here("data/differential_expression/ALS/ALS_all_differential_expression_support.tsv") )

# filter out non-CNS samples
tissues <- c("Lumbar_Spinal_Cord", "Cervical_Spinal_Cord", "Thoracic_Spinal_Cord")

# remove low RIN samples - for good
metadata <- filter(metadata, tissue %in% tissues)

#Gene name for the voom table 
gencode_30 <- read_tsv(here::here("data/misc/gencode.v30.tx2gene.tsv.gz")) %>%
  janitor::clean_names() %>%
  dplyr::select(ensembl = geneid, symbol = genename) %>%
  dplyr::distinct() %>%
  dplyr::mutate(ensembl =  str_split_fixed(ensembl, "\\.", 2)[,1] ) %>%
  dplyr::distinct() 

genes_loc <- as.data.frame(genes_counts[, metadata$sample])
#rm(genes_counts)

# remove lowly expressed genes
keep.exp <- rowSums(cpm(genes_loc) > 1) >= ceiling(0.5*ncol(genes_loc))
genes_loc <- genes_loc[keep.exp,]

# QN is bad!
#genes_loc <-  EDASeq::betweenLaneNormalization(as.matrix(genes_loc),which = "full")

# normalisation is ok..
#norm_loc <- calcNormFactors(genes_loc, method = "TMM") #normalized counts with TMM
#dge <- DGEList(counts=genes_loc, samples=data.frame(row.names = colnames(genes_loc), sample = colnames(genes_loc)), norm.factors = norm_loc) #design_loc <- model.matrix(as.formula(design_formula), data = support_loc)
#genes_counts_voom_loc <- voom(dge)

#genes_counts_voom[1:5,1:8]
geneExpr_our = as.data.frame(genes_loc)
geneExpr_our$ensembl = row.names(geneExpr_our)
geneExpr_our <- merge(geneExpr_our, gencode_30, by = "ensembl")
#Remove duplicates 
geneExpr_our <- geneExpr_our[! duplicated(geneExpr_our$symbol),]
dim(geneExpr_our) # 19730   276

## [1] 16472   382

rownames(geneExpr_our) = geneExpr_our$symbol
geneExpr_our$ensembl = NULL
geneExpr_our$symbol = NULL

#head(geneExpr_our)

rm(genes_loc)
# row scaling is bad!
#geneResid_our = geneExpr_our - rowMeans(geneExpr_our)

Load Darmanis data

Ben Barres and Stephen Quake paper.

Reference dataset from Darmanis et al. “A survey of human brain transcriptome diversity at the single cell level.” https://www.pnas.org/content/112/23/7285.long#sec-7

MuSic requires replicate donors for each cell type to measure variability between donors. Darmanis luckily took single cells from multiple brains.

DarmanisCells from GSE67835 single cell data.

Try to factorise code into functions

Created set of Darmanis marker genes (100 per cell with highest fold change >0) and total genes.

Deconvolution

dtangle with Darmanis reference

Darmanis - dtangle

The expression data comes from our dataset.

Darmanis - MuSiC

Darmanis - DSA

The expression data comes from our dataset. DSA prefers small numbers of markers so take top 10

Mathys et al Human Brain - MuSiC

Create markers

# library(readr)
# library(here)
# library(dplyr)
# 
# load(here::here("data/feb_2020/gene_counts_no_tags.RData") )
# 
# metadata <- read_tsv( here::here("data/differential_expression/ALS/ALS_all_differential_expression_support.tsv") )
# 
# # filter out non-CNS samples
# tissues <- c("Lumbar_Spinal_Cord", "Cervical_Spinal_Cord", "Thoracic_Spinal_Cord")
# 
# # remove low RIN samples - for good
# metadata <- filter(metadata, tissue %in% tissues)
# 
# #Gene name for the voom table 
# gencode_30 <- read_tsv(here::here("data/misc/gencode.v30.tx2gene.tsv.gz")) %>%
#   janitor::clean_names() %>%
#   dplyr::select(ensembl = geneid, symbol = genename) %>%
#   dplyr::distinct() %>%
#   dplyr::mutate(ensembl =  str_split_fixed(ensembl, "\\.", 2)[,1] ) %>%
#   dplyr::distinct() 
# 
# genes_loc <- as.data.frame(genes_counts[, metadata$sample])
# 
# # remove lowly expressed genes
# keep.exp <- rowSums(cpm(genes_loc) > 1) >= ceiling(0.5*ncol(genes_loc))
# genes_loc <- genes_loc[keep.exp,]
# 
# geneExpr_our = as.data.frame(genes_loc)
# geneExpr_our$ensembl = row.names(geneExpr_our)
# geneExpr_our <- merge(geneExpr_our, gencode_30, by = "ensembl")
# #Remove duplicates 
# geneExpr_our <- geneExpr_our[! duplicated(geneExpr_our$symbol),]
# dim(geneExpr_our) # 19730   276
# rownames(geneExpr_our) = geneExpr_our$symbol
# geneExpr_our$ensembl = NULL
# geneExpr_our$symbol = NULL
# 
# 
# 
# # differential expression - compare each cell type to the mean
# compareReferenceCells <- function(bulk_expression, reference_expression, reference_meta){
#   # only use reference genes present in bulk
#   int <- intersect(rownames(bulk_expression),rownames(reference_expression))
#   y <- DGEList(counts= reference_expression[int,])
#   y <- calcNormFactors(y,method = 'TMM', Acutoff =  quantile(log2(reference_expression[,1]/sum(reference_expression[,1])),0.75))
#   
#   reference_expression <- cpm(y, log=FALSE)
#   reference_expressionMean = sapply(unique(reference_meta),function(x)rowMeans(reference_expression[,names(which(reference_meta==x))]))
#   int = intersect(rownames(bulk_expression),rownames(reference_expressionMean))
#   design = model.matrix(~.-1,data.frame(cell= reference_meta[colnames(reference_expression)]))
#   colnames(design) = gsub('cell','',colnames(design))
#   v <- voom(reference_expression[int,rownames(design)], design, plot=FALSE)
#   fit <- lmFit(v, design)
#   x = sapply(colnames(design),function(x)paste(x,'-(',paste(colnames(design)[colnames(design)!=x],collapse = "+"),')/4',sep = ''))
#   con = makeContrasts(contrasts = x,levels = colnames(design))
#   fit = contrasts.fit(fit,con)
#   fit <- eBayes(fit, robust=TRUE)
#   
#   return(list(design = design, fit = fit))
# }
# 
# createMarkers <- function( input, n_markers = 10){
#   markers = NULL
#   
#   for(i in 1:ncol(input$design)){
#     x = input$fit$coef[p.adjust(input$fit$p.v[,i],'fdr')<0.05,i]
#     markers[[colnames(input$design)[i]]] =  names(head(sort(-x[x>0]),n_markers))
#   }
#   unlist(lapply(markers,length))
#   
#   full_genes = NULL
#   for(i in 1:ncol(input$design)){
#     x = input$fit$coef[p.adjust(input$fit$p.v[,i],'fdr')<0.05,i]
#    full_genes[[colnames(input$design)[i]]] =  names(sort(-x[x>0]))
#   }
#   unlist(lapply(full_genes,length))
#   
#   return( list(markers = markers, full = full_genes) )
# }
# 
# 
# ## readMM requires the Matrix package
# mathys_counts <- Matrix::readMM(here::here("data/Mathys_Brain_scRNA/filtered_count_matrix.mtx.gz") )
# rownames(mathys_counts) <- readLines(here::here("data/Mathys_Brain_scRNA/filtered_gene_row_names.txt") )
mathys_meta <- read_tsv(here::here("data/Mathys_Brain_scRNA/filtered_column_metadata.txt") ) %>%
   janitor::clean_names()
#colnames(mathys_counts) <- mathys_meta$tag
# 
 # projid is subject I think
# #length(table(mathys_meta$projid))
# table(mathys_meta$broad_cell_type)
# 
mathys_meta <- dplyr::select(mathys_meta, cellID = tag, cellType = broad_cell_type, sampleID = projid)
# 
# # load in ROSMAP metadata
# # keep only controls
rosmap_meta <- read_csv(here::here("data/Mathys_Brain_scRNA/ROSMAP_Clinical_2019-05_v3.csv")) %>%
   filter(projid %in% mathys_meta$sampleID, cogdx == 1)
# 
# #keep only control individuals
# mathys_controls_metadata <- filter(mathys_meta, sampleID %in% rosmap_meta$projid )
# 
# mathys_control_cells <- mathys_controls_metadata$cellType
# names(mathys_control_cells) <- mathys_controls_metadata$cellID
# # 23,513 cells left
# mathys_controls_counts <- mathys_counts[, colnames(mathys_counts) %in% mathys_controls_metadata$sampleID] 
# 
# write(mathys_controls_metadata, mathys_controls_counts, file = "/data/Mathys_Brain_scRNA/Mathys_controls.RData")
# 
# #createMarkers 
# mathys_fit <- compareReferenceCells(bulk_expression = geneExpr_our, reference_expression = mathys_controls_counts, reference_meta = mathys_control_cells)
# 
# mathys_markers <- createMarkers(darmanis_fit, n_markers = 100)
# 
# write(mathys_fit, mathys_markers, file = "/data/Mathys_Brain_scRNA/Mathys_markers.RData")

## [1] 3040   21

Comparison between methods and reference sets

Regress out technical and other factors and compare deconvolution estimates between ALS and Controls

rna_tech_df <- read_tsv(here::here("data/differential_expression/ALS/ALS_all_differential_expression_tech_metrics.tsv"))

residualiser <- function(data){
  data <- left_join(data, rna_tech_df, by = "sample") 
  if("onset" %in% names(data)){
    data$onset <- NULL
  }
  data <- data[complete.cases(data),]
  #print(table(data$prep))
  mod <- lm( deconv ~ factor(prep) + age + factor(sex) + rin + pct_mrna_bases + pct_ribosomal_bases + pct_chimeras + pct_intergenic_bases + median_3prime_bias + median_5prime_bias, data)
  data$resid <-  residuals(mod) 
  
  data$p_nom <- pairwise.wilcox.test(x = data$deconv, g = data$disease)$p.value[1]
  data$p_resid <- pairwise.wilcox.test(x = data$resid, g = data$disease)$p.value[1]
  
  return(data)
}

#darmanis_dsa_resid <- darmanis_dsa_res %>% filter(sample != "CGND-HRA-00431.1" ) %>% split(paste(.$tissue, .$cell) ) %>% map_df( residualiser)

darmanis_dtangle_resid <- darmanis_dtangle_res %>% filter(sample != "CGND-HRA-00431.1" ) %>% split(paste(.$tissue, .$cell) ) %>% map_df( residualiser)

darmanis_music_resid <- darmanis_music_res %>% filter(sample != "CGND-HRA-00431.1" ) %>% split(paste(.$tissue, .$cell) ) %>% map_df( residualiser)

mathys_music_resid <- mathys_music_res %>% filter(sample != "CGND-HRA-00431.1" ) %>% split(paste(.$tissue, .$cell) ) %>% map_df( residualiser)

#write_tsv(darmanis_dsa_resid, path = here::here("data/deconvolution/Darmanis_DSA_residual_results.tsv") )
write_tsv(darmanis_music_resid, path = here::here("data/deconvolution/Darmanis_MuSiC_residual_results.tsv") )
write_tsv(darmanis_dtangle_resid, path = here::here("data/deconvolution/Darmanis_dtangle_residual_results.tsv") )
write_tsv(mathys_music_resid, path = here::here("data/deconvolution/Mathys_MuSiC_residual_results.tsv") )

darmanis_music_resid <- read_tsv(here::here("data/deconvolution/Darmanis_MuSiC_residual_results.tsv") )
darmanis_dtangle_resid <- read_tsv( here::here("data/deconvolution/Darmanis_dtangle_residual_results.tsv") )
mathys_music_resid <- read_tsv(here::here("data/deconvolution/Mathys_MuSiC_residual_results.tsv"))

## write out MuSiC estimates with Mathys cells for supplementary table
mathys_music_df <-
  mathys_music_resid %>%
  dplyr::select(sample, cell, deconv) %>%
  pivot_wider(names_from = cell, values_from = deconv)

write_csv(mathys_music_df, file = "../../ALS_SC/tables/mathys_music_deconvolution_table.csv")

estimate_plot <- function(results, method, reference = "Darmanis", ycol = "deconv", p = "p_nom", 
                          tissues = c("Cervical_Spinal_Cord", "Lumbar_Spinal_Cord", "Thoracic_Spinal_Cord") 
                          ){
  to_plot <- results %>%
    mutate(tissue = factor(tissue, levels = tissues)) %>%
    filter(tissue %in% tissues) %>%
    filter(disease %in% c("ALS","Control")) %>%
    mutate( disease = forcats::fct_relevel(disease,"Control")) %>%
    mutate(tissue = factor( gsub("_","\n", tissue), levels = gsub("_","\n", tissues) )  ) %>%
    filter( !is.na(cell))
  
  p_df <- to_plot %>% 
    group_by(cell, tissue) %>% 
    summarise(p_nom = min(p_nom), p_resid = min(p_resid)) %>%
    mutate(padj = p.adjust(p_resid, method = "bonferroni")) %>%
    mutate(padj_label = case_when(
    padj < 0.0001 ~ "***",
    padj < 0.001 ~ "**",
    padj < 0.05 ~ "*",
    TRUE ~ "."
    ))
  
  plot <- 
  to_plot %>%
  ggplot(aes(x = disease, deconv)) +
    rasterise(
      geom_point(aes(colour = disease),size = 0.5, position = position_jitter(width = 0.33, height = 0) ),
      dpi = 300
      )+
    geom_boxplot(fill = NA,notch = FALSE, na.rm = TRUE, outlier.color = NA) +
    facet_grid(tissue ~ cell,switch = "y" ) +
    scale_colour_manual(values = c("#4F8FC4", "#B61927")) +
    labs(subtitle = paste0( reference, " - ", method) , x = "", y = "" ) +
    guides(fill = FALSE, colour = FALSE) +
    theme_jh() +
    theme(
      plot.title = element_blank(),
      strip.background = element_blank(),
      strip.switch.pad.grid = unit(0,units ="pt"),
      panel.spacing.x = unit(0,units = "pt"), 
      strip.placement = "outside",
      panel.border = element_blank(),
      strip.text.x = element_text(face = "bold", hjust = 0.5, vjust = 1, margin = margin(t =5, b = 5, unit = "pt")),
      strip.text.y.left = element_text(angle = 0),
      plot.margin = margin()
    ) +
    labs(y = "") +
    scale_y_continuous(labels = scales::percent_format(accuracy = 1), limits = c(0,1)) + 
    geom_text(nudge_x = 0.5, data = p_df, aes(x = 1, y = 0.9, label = padj_label)   )
  
  
  return(plot)
}

conversion_table <- data.frame(short = c("Ast","End", "Mic", "Ex", "Oli", "Per" ),
                               long = c("Astrocytes", "Endothelial", "Microglia", "Neurons", "Oligos", "Pericytes")
  )

mathys_music_df <- mathys_music_resid %>%
  left_join(conversion_table, by = c("cell" = "short") ) %>%
  select(-cell) %>%
  rename(cell = long) %>%
  select(sample, disease, duration, cell, tissue, deconv, resid, p_nom, p_resid)



mathys_music_csc <- estimate_plot(mathys_music_df, method = "MuSiC", reference = "Mathys single nucleus", tissues = c("Cervical_Spinal_Cord"))

mathys_music_csc

## for main text - plot Cervical Spinal Cord with Mathys MuSiC
ggsave(plot = mathys_music_csc, filename = here::here("ALS_SC/plots/mathys_music_deconvolution_plot.pdf"), width = 8, height = 2.5)

estimate_multiplot <- 
  estimate_plot(mathys_music_df, method = "MuSiC", reference = "Mathys single nucleus") +

  estimate_plot(darmanis_music_resid, method = "MuSiC", reference = "Darmanis single cell") +

    plot_layout(ncol = 1) +
  plot_annotation(tag_levels = "A") &
  theme(plot.tag = element_text(face = "bold"))

ggsave(plot = estimate_multiplot, filename =  here::here("ALS_SC/plots/deconvolution_music_plots.pdf"), width = 8, height = 9)

estimate_multiplot

dtangle_plot <- estimate_plot(darmanis_dtangle_resid, method = "dtangle", reference = "Darmanis single cell") +
  theme(plot.margin = margin(t = 5,b=5,l=5,r =5, unit = "pt"))

ggsave(plot = dtangle_plot, filename =  here::here("ALS_SC/plots/deconvolution_dtangle_plot.pdf"), width = 8, height = 6)

dtangle_plot

For pairs of results, calculate correlation between the estimate of neuronal, astrocyte and microglial proportion in each sample

compare_2_results <- function(res1, res2, res1.name, res2.name){
  conversion_table <- data.frame(short = c("astro","endo", "microglia", "neuro", "oligo" ),
                               long = c("astrocytes", "endothelial", "microglia", "neurons", "oligodendrocytes")
  )
  # match ids
  if( all(res1$cell %in% conversion_table$short) ){
    res1$cell <- conversion_table$long[ match(res1$cell, conversion_table$short)]
  }
  if( all(res2$cell %in% conversion_table$short) ){
    res2$cell <- conversion_table$long[ match(res2$cell, conversion_table$short)]
  }
  
  left_join(
  x = dplyr::select(res1, sample, cell, deconv),
  y =dplyr::select(res2, sample, cell, deconv), 
  suffix = c(".res1", ".res2"),
  by = c("sample", "cell")
) %>%
  left_join(metadata, by = "sample") %>%
    mutate(tissue = gsub("_", "\n", tissue)) %>%
    filter(!is.na(tissue)) %>%
    #filter(tissue %in% c("Lumbar_Spinal_Cord", "Cervical_Spinal_Cord")) %>%
  ggplot(aes(x = deconv.res1, y = deconv.res2 ) ) + 
    geom_point( size = 0.5, aes(colour = disease), alpha = 0.5) +
    geom_abline(slope =1 , intercept =0, linetype = 3) +
    labs(x = res1.name, y = res2.name)+
    theme_jh() +
    facet_grid(tissue ~ cell,switch = "y", scales = "free" ) +
    ggpubr::stat_cor(method = "spearman",aes(label = ..r.label..) ) +
    scale_colour_manual(values = c("red", "darkblue")) +
    theme(
      #plot.title = element_blank(),
      strip.background = element_blank(),
      strip.switch.pad.grid = unit(0,units ="pt"),
      panel.spacing.x = unit(0,units = "pt"), 
      strip.placement = "outside",
      panel.border = element_blank(),
      strip.text.x = element_text(face = "bold", hjust = 0.5, vjust = 1, margin = margin(t =5, b = 5, unit = "pt")),
      strip.text.y.left = element_text(angle = 0),
    ) +
    scale_y_continuous(labels = scales::percent_format(accuracy = 1) ) +
    scale_x_continuous(labels = scales::percent_format(accuracy = 1) )
}


#compare_2_results(darmanis_dtangle_res, darmanis_dsa_res, "dtangle", "DSA") + labs(title = "Darmanis - DSA vs dtangle") +
darmanis_compare_plot <- 
  compare_2_results(darmanis_music_res, darmanis_dtangle_res, "MuSiC", "dtangle") + labs(title = "Darmanis - MuSiC vs dtangle", x = "MuSiC estimate", y = "dtangle estimate") #+


ggsave(plot = darmanis_compare_plot, filename =  here::here("ALS_SC/plots/deconvolution_darmanis_compare_plot.pdf"), width = 9, height = 6)
#compare_2_results(darmanis_music_res, darmanis_dsa_res, "MuSiC", "DSA")+ labs(title = "Darmanis - MuSiC vs DSA") +
 # plot_layout(nrow = 3)

darmanis_compare_plot

Endothelial cells have worst agreement between MuSiC and the two marker based methods dtangle and DSA. Should I remove endothelial cells as a category and rerun?

Compare MuSiC results from Darmanis and Mathys single cell data

#table(msc_music_res$cell)
darmanis_mathys_conversion <- data.frame(
  mathys = c("Ast", "End", "Ex", "In", "Mic", "Oli", "Opc", "Per"),
  darmanis = c("astrocytes", "endothelial", "neurons", "neurons", "microglia", "oligodendrocytes", NA, NA)
)

mathys_compare <- mathys_music_res %>% dplyr::select(sample, cell, deconv)
mathys_compare$cell_mathys <- mathys_compare$cell
mathys_compare$cell <- darmanis_mathys_conversion$darmanis[match(mathys_compare$cell, darmanis_mathys_conversion$mathys)]



reference_compare_plot <-
  left_join(mathys_compare, 
            dplyr::select(darmanis_music_res, sample, cell, deconv), by = c("sample", "cell"), 
            suffix = c(".mathys", ".darmanis")) %>%
  left_join(metadata, by = "sample") %>%
  mutate(tissue = gsub("_", "\n", tissue )) %>%
  #filter(tissue %in% c("Cervical_Spinal_Cord", "Lumbar_Spinal_Cord")) %>%
  filter(cell_mathys %in% c("Ast", "End", "Ex", "Mic", "Oli")) %>%
  ggplot(aes(y = deconv.mathys, x = deconv.darmanis)) + 
    geom_point( size = 0.5, aes(colour = disease), alpha = 0.5) +
    geom_abline(slope =1 , intercept =0, linetype = 3) +
    #labs(x = res1.name, y = res2.name)+
    theme_jh() +
    facet_grid(tissue ~ cell,switch = "y", scales = "free" ) +
    ggpubr::stat_cor(method = "spearman",aes(label = ..r.label..) ) +
    scale_colour_manual(values = c("red", "darkblue")) +
    theme(
      strip.background = element_blank(),
      strip.switch.pad.grid = unit(0,units ="pt"),
      panel.spacing.x = unit(0,units = "pt"), 
      strip.placement = "outside",
      panel.border = element_blank(),
      strip.text.x = element_text(face = "bold", hjust = 0.5, vjust = 1, margin = margin(t =5, b = 5, unit = "pt")),
      strip.text.y.left = element_text(angle = 0),
    ) +
    scale_y_continuous(labels = scales::percent_format(accuracy = 1) ) +
    scale_x_continuous(labels = scales::percent_format(accuracy = 1) ) +
  labs(x = "MuSiC estimate with Mathys", y = "MuSiC estimate with Darmanis")

ggsave(plot = reference_compare_plot, filename =  here::here("ALS_SC/plots/deconvolution_reference_compare_plot.pdf"), width = 9, height = 6)

reference_compare_plot

# 
# # intersect our data with Darmanis, calculate marker genes
# int <- intersect(rownames(geneExpr_our),rownames(Darmanis))
# 
# int <- rownames(Darmanis)
# 
# y <- DGEList(counts= Darmanis[int,])
# y <- calcNormFactors(y,method = 'TMM', Acutoff =  quantile(log2(Darmanis[,1]/sum(Darmanis[,1])),0.75))
# Darmanis <- cpm(y, log=FALSE)
# DarmanisMean = sapply(unique(DarmanisCells),function(x)rowMeans(Darmanis[,names(which(DarmanisCells==x))]))
# int <- intersect(rownames(geneExpr_our),rownames(DarmanisMean))
# 
# int <- rownames(DarmanisMean)
# 
# design <- model.matrix(~.-1,data.frame(cell= DarmanisCells[colnames(Darmanis)]))
# colnames(design) <- gsub('cell','',colnames(design))
# v <- voom(Darmanis[int,rownames(design)], design, plot=FALSE)
# fit <- lmFit(v, design)
# x <- sapply(colnames(design),function(x)paste(x,'-(',paste(colnames(design)[colnames(design)!=x],collapse = "+"),')/4',sep = ''))
# con = makeContrasts(contrasts = x,levels = colnames(design))
# fit = contrasts.fit(fit,con)
# fit <- eBayes(fit, robust=TRUE)
# 
# 
# DarmanisMarkers = NULL
# 
# n_markers <- 20
# 
# for(i in 1:ncol(design)){
#   x = fit$coef[p.adjust(fit$p.v[,i],'fdr')<0.05,i]
#   DarmanisMarkers[[colnames(design)[i]]] =  names(head(sort(-x[x>0]),n_markers))
# }
# unlist(lapply(DarmanisMarkers,length))
# DarmanisMarkersFull = NULL
# for(i in 1:ncol(design)){
#   x = fit$coef[p.adjust(fit$p.v[,i],'fdr')<0.05,i]
#   DarmanisMarkersFull[[colnames(design)[i]]] =  names(sort(-x[x>0]))
# }
# unlist(lapply(DarmanisMarkersFull,length))
# 
# # get microglial comparison 1 - vs mean of all
# microglia_vs_all <- topTable(fit, coef = 3, number = Inf)  %>%
#   as.data.frame() %>%
#   rownames_to_column(var = "gene_name") %>%
#   janitor::clean_names()
# 
# microglia_vs_all %>% 
#   filter(adj_p_val < 0.05, log_fc > 2) %>%
#   arrange( desc(log_fc))
#   
# # better comparison - microglia vs neurons
# mg_neur <- filter(darmanis_metadata, cellType %in% c("neurons", "microglia")) %>%
#   mutate(cellType = factor(cellType, levels = c("neurons", "microglia")))
# row.names(mg_neur) <- mg_neur$cellID
# 
# design2 <- model.matrix(~ cellType,  mg_neur) #data.frame(cell= DarmanisCells[mg_neur$cellID]))
# 
# v <- voom(Darmanis[int,rownames(design2)], design2, plot=FALSE)
# fit <- lmFit(v, design2)
# 
# #fit = contrasts.fit(fit,con)
# 
# fit <- eBayes(fit, robust=TRUE)
# 
# microglia_vs_neurons <- 
#   topTable(fit, coef = 2, number = Inf) %>%
#   as.data.frame() %>%
#   rownames_to_column(var = "gene_name") %>%
#   janitor::clean_names()
# 
# microglia_vs_neurons %>% 
#   filter(adj_p_val < 0.05, log_fc > 2) %>%
#   arrange( desc(log_fc)) 
#   
# write_tsv(microglia_vs_all, "../../other_datasets/Darmanis/Darmanis_microglia_vs_all_limma.tsv")
# 
# write_tsv(microglia_vs_neurons, "../../other_datasets/Darmanis/Darmanis_microglia_vs_neurons_limma.tsv")