From 12be62075b7e43d173ad5850ab09512252bb7dfc Mon Sep 17 00:00:00 2001
From: Laurent Modolo <laurent@modolo.fr>
Date: Thu, 25 Oct 2018 18:12:45 +0200
Subject: [PATCH] add lm version of the computation

---
 R/anova.R              | 52 +++++++++++++++++++++++++++++++++++++++++-
 R/project.R            | 12 +++++++---
 criblejurkat_final.pbs |  2 +-
 criblejurkat_full.pbs  |  2 +-
 4 files changed, 62 insertions(+), 6 deletions(-)

diff --git a/R/anova.R b/R/anova.R
index 7a28459..1917e5c 100644
--- a/R/anova.R
+++ b/R/anova.R
@@ -116,7 +116,36 @@ batch_effect <- function(data) {
   return(data)
 }
 
-#' build rm model between drugs accounting for batch effect
+#' build lm model between drugs accounting for batch effect
+#'
+#' @param data a data.frame
+#' @param formula (default: "ratio ~ drug + batch") the formula of the model
+#' @param lower (default: TRUE) tests if "y" (the ratio) is lower than in controls
+#' @param outdir set the outdir directory to a path (default extracted from fcs)
+#' @return a data.frame
+#' @examples
+#' \dontrun{
+#' data <- anova_lm(data)
+#' }
+#' @importFrom grDevices dev.off pdf
+#' @importFrom stats as.formula quantile
+#' @export anova_lm
+anova_lm <- function(data, formula = "ratio ~ drug + batch", lower = TRUE,
+                      outdir) {
+  variable_name <- gsub("(.*) ~.*", "\\1", formula)
+  model <- lm(stats::as.formula(formula),
+                     data = data)
+  model_anova <- compute_pval(model, lower = lower)
+  if (missing(outdir)) {
+    outdir <- mk_outdir(data, "/test")
+  }
+  data <- export_lm_results(data, model_anova)
+  save(data, model, file = paste0(outdir, "anova_rlm.Rdata"))
+  export_drug_table(data, model_anova, outdir)
+  return(data)
+}
+
+#' build rlm model between drugs accounting for batch effect
 #'
 #' @param data a data.frame
 #' @param formula (default: "ratio ~ drug + batch") the formula of the model
@@ -181,6 +210,27 @@ export_rlm_results <- function(data, model_anova) {
   return(data)
 }
 
+#' @importFrom utils write.csv
+export_lm_results <- function(data, model_anova) {
+  data$coef <- NA
+  data$coef_std <- NA
+  data$pval <- NA
+  data$tval <- NA
+  for (drug in levels(data$drug)) {
+    if (!(drug %in% "None")) {
+      data$coef[data$drug %in% drug] <- model_anova$Estimate[grepl(drug,
+        rownames(model_anova))]
+      data$coef_std[data$drug %in% drug] <- model_anova[grepl(drug,
+        rownames(model_anova)), 2]
+      data$tval[data$drug %in% drug] <- model_anova$t.value[grepl(drug,
+        rownames(model_anova))]
+      data$pval[data$drug %in% drug] <- model_anova$pval[grepl(drug,
+        rownames(model_anova))]
+    }
+  }
+  return(data)
+}
+
 export_drug_table <- function(data, model_anova, outdir,
                               channels = c("Y1.A", "B1.A")) {
   drug_table <- model_anova[grepl("drug", rownames(model_anova)), ]
diff --git a/R/project.R b/R/project.R
index 1955d9a..e88168b 100644
--- a/R/project.R
+++ b/R/project.R
@@ -37,13 +37,14 @@ set_analysis <- function(data_path = "data/", meta = F) {
 #' perform all the analysis for a set of data sets
 #'
 #' @param data_path path to folder containing the data sets
+#' @param rlm_model (default: TRUE) should rlm model be use or lm ?
 #' @return TRUE if everythings ran correclty
 #' @examples
 #' \dontrun{
 #' analysis("data/set_test")
 #' }
 #' @export analysis
-analysis <- function(data_path = "data/") {
+analysis <- function(data_path = "data/", rlm_model = TRUE) {
   if (base::file.info(data_path)$isdir) {
     set_folders <- list.dirs(data_path, full.names = F)[-1]
   } else {
@@ -88,8 +89,13 @@ analysis <- function(data_path = "data/") {
     rm(set_data)
   }
   data$set <- as.factor(data$set)
-  data <- anova_rlm(data, formula = "ratio ~ drug + batch + set",
-            outdir = outdir_rlm)
+  if (rlm_model) {
+    data <- anova_rlm(data, formula = "ratio ~ drug + batch + set",
+              outdir = outdir_rlm)
+  } else {
+    data <- anova_lm(data, formula = "ratio ~ drug + batch + set",
+              outdir = outdir_rlm)
+  }
   print(summary(data))
   for (folder in set_folders) {
     message(paste0("plotting for ", folder))
diff --git a/criblejurkat_final.pbs b/criblejurkat_final.pbs
index 6208ede..6bbf09b 100644
--- a/criblejurkat_final.pbs
+++ b/criblejurkat_final.pbs
@@ -24,6 +24,6 @@ source /usr/share/lmod/lmod/init/bash
 ml R/3.4.3
 export R_LIBS="/Xnfs/lbmcdb/common/R/x86_64-pc-linux-gnu-library/3.4/"
 
-R -e '.libPaths(); library("criblejurkat"); analysis("data/final"); traceback()'
+R -e '.libPaths(); library("criblejurkat"); analysis("data/final", rlm_model = F); traceback()'
 
 
diff --git a/criblejurkat_full.pbs b/criblejurkat_full.pbs
index f8df341..4fcaf3c 100644
--- a/criblejurkat_full.pbs
+++ b/criblejurkat_full.pbs
@@ -24,6 +24,6 @@ source /usr/share/lmod/lmod/init/bash
 ml R/3.4.3
 export R_LIBS="/Xnfs/lbmcdb/common/R/x86_64-pc-linux-gnu-library/3.4/"
 
-R -e '.libPaths(); library("criblejurkat"); analysis("data/full"); traceback()'
+R -e '.libPaths(); library("criblejurkat"); analysis("data/full", rlm_model = F); traceback()'
 
 
-- 
GitLab