Skip to content
Snippets Groups Projects
Select Git revision
  • 89a843958043ac8bdbf82b5a2e2bb5fe81901846
  • master default protected
  • v2.1.1
  • v2.1.0
4 results

wrapper_dds.R

Blame
  • wrapper_dds.R 6.43 KiB
    # WARNING - Generated by {fusen} from /dev/flat_full.Rmd: do not edit by hand
    
    
    #' Wrapper Function for DESeq2 Analysis
    #'
    #' This function performs differential expression analysis using DESeq2 based on the provided
    #' DESeqDataSet (dds) object. It calculates the dispersion values from the dds object and then
    #' performs inference on the log-fold change (LFC) values using the specified parameters.
    #'
    #' @param dds A DESeqDataSet object containing the count data and experimental design.
    #' @param lfcThreshold The threshold for minimum log-fold change (LFC) to consider differentially expressed.
    #' @param altHypothesis The alternative hypothesis for the analysis, indicating the direction of change.
    #'                      Options are "greater", "less", or "two.sided".
    #' @param correction_method The method for p-value correction. Default is "BH" (Benjamini-Hochberg).
    #'
    #' @return A list containing the dispersion values and the results of the differential expression analysis.
    #'         The dispersion values are calculated from the dds object and named according to sample names.
    #'         The inference results include adjusted p-values and log2 fold changes for each gene.
    #'
    #' @examples
    #' N_GENES = 100
    #' MAX_REPLICATES = 5
    #' MIN_REPLICATES = 5
    #' ## --init variable
    #' input_var_list <- init_variable( name = "genotype", mu = 12, sd = 0.1, level = 3) %>%
    #'                    init_variable(name = "environment", mu = c(0,1), NA , level = 2) 
    #'
    #' mock_data <- mock_rnaseq(input_var_list, N_GENES, MIN_REPLICATES, MAX_REPLICATES)
    #' dds <- DESeq2::DESeqDataSetFromMatrix(mock_data$counts , 
    #'                    mock_data$metadata, ~ genotype + environment)
    #' dds <- DESeq2::DESeq(dds, quiet = TRUE)
    #' result <- wrap_dds(dds, lfcThreshold = 1, altHypothesis = "greater")
    #' @export
    wrap_dds <- function(dds, lfcThreshold , altHypothesis, correction_method = "BH") {
      dds_full <- S4Vectors::mcols(dds) %>% as.data.frame()
      
      ## -- dispersion
      message("INFO: The dispersion values from DESeq2 were reparametrized to their reciprocals (1/dispersion).")
      dispersion <- 1/dds_full$dispersion
      names(dispersion) <- rownames(dds_full)
    
      ## -- coeff
      inference_df <- get_inference_dds(dds_full, lfcThreshold, altHypothesis, correction_method)
      res <- list(dispersion = dispersion, fixEff = inference_df)
      return(res)
    }
    
    
    
    #' Calculate Inference for Differential Expression Analysis
    #'
    #' This function calculates inference for differential expression analysis based on the results of DESeq2.
    #'
    #' @param dds_full A data frame containing DESeq2 results, including estimate and standard error information.
    #' @param lfcThreshold Log fold change threshold for determining differentially expressed genes.
    #' @param altHypothesis Alternative hypothesis for testing, one of "greater", "less", or "two.sided".
    #' @param correction_method Method for multiple hypothesis correction, e.g., "BH" (Benjamini-Hochberg).
    #'
    #' @return A data frame containing inference results, including statistics, p-values, and adjusted p-values.
    #'
    #' @examples
    #' \dontrun{
    #' # Example usage of the function
    #' inference_result <- get_inference_dds(dds_full, lfcThreshold = 0.5, 
    #'                                    altHypothesis = "greater", 
    #'                                    correction_method = "BH")
    #' }
    #' @importFrom stats p.adjust
    #' @export
    get_inference_dds <- function(dds_full, lfcThreshold, altHypothesis, correction_method){
    
      ## -- build subdtf
      stdErr_df <- getSE_df(dds_full)
      estim_df <- getEstimate_df(dds_full)
      ## -- join
      df2ret <- join_dtf(estim_df, stdErr_df, k1 = c("ID", "term") , k2 = c("ID", "term"))
    
      ## -- convert to ln
      message("INFO: The log2-fold change estimates and standard errors from DESeq2 were converted to the natural logarithm scale.")
      df2ret$estimate <- df2ret$estimate*log(2)
      df2ret$std.error <- df2ret$std.error*log(2)
    
      ## -- some details reshaped
      df2ret$term <- gsub("_vs_.*","", df2ret$term)
      df2ret$term <- gsub(pattern = "_", df2ret$term, replacement = "")
      df2ret$term <- removeDuplicatedWord(df2ret$term)
      df2ret$term <- gsub(pattern = "[.]", df2ret$term, replacement = ":")
      df2ret$effect <- "fixed"
      idx_intercept <- df2ret$term == "Intercept"
      df2ret$term[idx_intercept] <- "(Intercept)"
    
      ## -- statistical part
      waldRes <- wald_test(df2ret$estimate, df2ret$std.error, lfcThreshold, altHypothesis)
      df2ret$statistic <- waldRes$statistic
      df2ret$p.value <- waldRes$p.value
      df2ret$p.adj <- stats::p.adjust(df2ret$p.value, method = correction_method)
    
      return(df2ret)
    }
    
    
    #' Extract Standard Error Information from DESeq2 Results
    #'
    #' This function extracts the standard error (SE) information from DESeq2 results.
    #'
    #' @param dds_full A data frame containing DESeq2 results, including standard error columns.
    #'
    #' @return A data frame with melted standard error information, including gene IDs and terms.
    #'
    #' @examples
    #' \dontrun{
    #' # Example usage of the function
    #' se_info <- getSE_df(dds_full)
    #' }
    #' @importFrom reshape2 melt
    #' @export
    getSE_df <- function(dds_full){
      columnsInDds_full <- colnames(dds_full)
      SE_columns <- columnsInDds_full [ grepl("SE" , columnsInDds_full) ]
      SE_df <- dds_full[, SE_columns]
      SE_df$ID <- rownames(SE_df)
      SE_df_long <- reshape2::melt(SE_df,
                                           measure.vars = SE_columns,
                                           variable.name  = "term", value.name = "std.error", drop = F)
      SE_df_long$term <- gsub(pattern = "SE_", SE_df_long$term, replacement = "")
      return(SE_df_long)
    
    }
    
    
    #' Extract Inferred Estimate Information from DESeq2 Results
    #'
    #' This function extracts the inferred estimate values from DESeq2 results.
    #'
    #' @param dds_full A data frame containing DESeq2 results, including estimate columns.
    #'
    #' @return A data frame with melted inferred estimate information, including gene IDs and terms.
    #'
    #' @examples
    #' \dontrun{
    #' # Example usage of the function
    #' estimate_info <- getEstimate_df(dds_full)
    #'  }
    #' @importFrom reshape2 melt
    #' @export
    getEstimate_df <- function(dds_full){
      columnsInDds_full <- colnames(dds_full)
      SE_columns <- columnsInDds_full [ grepl("SE" , columnsInDds_full) ]
      inferedVal_columns <- gsub("SE_", "" , x = SE_columns)
    
      estimate_df <- dds_full[, inferedVal_columns]
      estimate_df$ID <- rownames(estimate_df)
      estimate_df_long <- reshape2::melt(estimate_df,
                                     measure.vars = inferedVal_columns,
                                     variable.name  = "term", value.name = "estimate", drop = F)
      return(estimate_df_long)
    
    }