wrapper_dds.R

# WARNING - Generated by {fusen} from /dev/flat_full.Rmd: do not edit by hand


#' Wrapper Function for DESeq2 Analysis
#'
#' This function performs differential expression analysis using DESeq2 based on the provided
#' DESeqDataSet (dds) object. It calculates the dispersion values from the dds object and then
#' performs inference on the log-fold change (LFC) values using the specified parameters.
#'
#' @param dds A DESeqDataSet object containing the count data and experimental design.
#' @param lfcThreshold The threshold for minimum log-fold change (LFC) to consider differentially expressed.
#' @param altHypothesis The alternative hypothesis for the analysis, indicating the direction of change.
#'                      Options are "greater", "less", or "two.sided".
#' @param correction_method The method for p-value correction. Default is "BH" (Benjamini-Hochberg).
#'
#' @return A list containing the dispersion values and the results of the differential expression analysis.
#'         The dispersion values are calculated from the dds object and named according to sample names.
#'         The inference results include adjusted p-values and log2 fold changes for each gene.
#'
#' @examples
#' N_GENES = 100
#' MAX_REPLICATES = 5
#' MIN_REPLICATES = 5
#' ## --init variable
#' input_var_list <- init_variable( name = "genotype", mu = 12, sd = 0.1, level = 3) %>%
#'                    init_variable(name = "environment", mu = c(0,1), NA , level = 2) 
#'
#' mock_data <- mock_rnaseq(input_var_list, N_GENES, MIN_REPLICATES, MAX_REPLICATES)
#' dds <- DESeq2::DESeqDataSetFromMatrix(mock_data$counts , 
#'                    mock_data$metadata, ~ genotype + environment)
#' dds <- DESeq2::DESeq(dds, quiet = TRUE)
#' result <- wrap_dds(dds, lfcThreshold = 1, altHypothesis = "greater")
#' @export
wrap_dds <- function(dds, lfcThreshold , altHypothesis, correction_method = "BH") {
  dds_full <- S4Vectors::mcols(dds) %>% as.data.frame()
  
  ## -- dispersion
  message("INFO: The dispersion values from DESeq2 were reparametrized to their reciprocals (1/dispersion).")
  dispersion <- 1/dds_full$dispersion
  names(dispersion) <- rownames(dds_full)

  ## -- coeff
  inference_df <- get_inference_dds(dds_full, lfcThreshold, altHypothesis, correction_method)
  res <- list(dispersion = dispersion, fixEff = inference_df)
  return(res)
}


#' Calculate Inference for Differential Expression Analysis
#'
#' This function calculates inference for differential expression analysis based on the results of DESeq2.
#'
#' @param dds_full A data frame containing DESeq2 results, including estimate and standard error information.
#' @param lfcThreshold Log fold change threshold for determining differentially expressed genes.
#' @param altHypothesis Alternative hypothesis for testing, one of "greater", "less", or "two.sided".
#' @param correction_method Method for multiple hypothesis correction, e.g., "BH" (Benjamini-Hochberg).
#'
#' @return A data frame containing inference results, including statistics, p-values, and adjusted p-values.
#'
#' @examples
#' \dontrun{
#' # Example usage of the function
#' inference_result <- get_inference_dds(dds_full, lfcThreshold = 0.5, 
#'                                    altHypothesis = "greater", 
#'                                    correction_method = "BH")
#' }
#' @importFrom stats p.adjust
#' @export
get_inference_dds <- function(dds_full, lfcThreshold, altHypothesis, correction_method){

  ## -- build subdtf
  stdErr_df <- getSE_df(dds_full)
  estim_df <- getEstimate_df(dds_full)
  ## -- join
  df2ret <- join_dtf(estim_df, stdErr_df, k1 = c("ID", "term") , k2 = c("ID", "term"))

  ## -- convert to ln
  message("INFO: The log2-fold change estimates and standard errors from DESeq2 were converted to the natural logarithm scale.")
  df2ret$estimate <- df2ret$estimate*log(2)
  df2ret$std.error <- df2ret$std.error*log(2)

  ## -- some details reshaped
  df2ret$term <- gsub("_vs_.*","", df2ret$term)
  df2ret$term <- gsub(pattern = "_", df2ret$term, replacement = "")
  df2ret$term <- removeDuplicatedWord(df2ret$term)
  df2ret$term <- gsub(pattern = "[.]", df2ret$term, replacement = ":")
  df2ret$effect <- "fixed"
  idx_intercept <- df2ret$term == "Intercept"
  df2ret$term[idx_intercept] <- "(Intercept)"

  ## -- statistical part
  waldRes <- wald_test(df2ret$estimate, df2ret$std.error, lfcThreshold, altHypothesis)
  df2ret$statistic <- waldRes$statistic
  df2ret$p.value <- waldRes$p.value
  df2ret$p.adj <- stats::p.adjust(df2ret$p.value, method = correction_method)

  return(df2ret)
}


#' Extract Standard Error Information from DESeq2 Results
#'
#' This function extracts the standard error (SE) information from DESeq2 results.
#'
#' @param dds_full A data frame containing DESeq2 results, including standard error columns.
#'
#' @return A data frame with melted standard error information, including gene IDs and terms.
#'
#' @examples
#' \dontrun{
#' # Example usage of the function
#' se_info <- getSE_df(dds_full)
#' }
#' @importFrom reshape2 melt
#' @export
getSE_df <- function(dds_full){
  columnsInDds_full <- colnames(dds_full)
  SE_columns <- columnsInDds_full [ grepl("SE" , columnsInDds_full) ]
  SE_df <- dds_full[, SE_columns]
  SE_df$ID <- rownames(SE_df)
  SE_df_long <- reshape2::melt(SE_df,
                                       measure.vars = SE_columns,
                                       variable.name  = "term", value.name = "std.error", drop = F)
  SE_df_long$term <- gsub(pattern = "SE_", SE_df_long$term, replacement = "")
  return(SE_df_long)

}


#' Extract Inferred Estimate Information from DESeq2 Results
#'
#' This function extracts the inferred estimate values from DESeq2 results.
#'
#' @param dds_full A data frame containing DESeq2 results, including estimate columns.
#'
#' @return A data frame with melted inferred estimate information, including gene IDs and terms.
#'
#' @examples
#' \dontrun{
#' # Example usage of the function
#' estimate_info <- getEstimate_df(dds_full)
#'  }
#' @importFrom reshape2 melt
#' @export
getEstimate_df <- function(dds_full){
  columnsInDds_full <- colnames(dds_full)
  SE_columns <- columnsInDds_full [ grepl("SE" , columnsInDds_full) ]
  inferedVal_columns <- gsub("SE_", "" , x = SE_columns)

  estimate_df <- dds_full[, inferedVal_columns]
  estimate_df$ID <- rownames(estimate_df)
  estimate_df_long <- reshape2::melt(estimate_df,
                                 measure.vars = inferedVal_columns,
                                 variable.name  = "term", value.name = "estimate", drop = F)
  return(estimate_df_long)

}