Select Git revision
wrapper_dds.R
wrapper_dds.R 6.43 KiB
# WARNING - Generated by {fusen} from /dev/flat_full.Rmd: do not edit by hand
#' Wrapper Function for DESeq2 Analysis
#'
#' This function performs differential expression analysis using DESeq2 based on the provided
#' DESeqDataSet (dds) object. It calculates the dispersion values from the dds object and then
#' performs inference on the log-fold change (LFC) values using the specified parameters.
#'
#' @param dds A DESeqDataSet object containing the count data and experimental design.
#' @param lfcThreshold The threshold for minimum log-fold change (LFC) to consider differentially expressed.
#' @param altHypothesis The alternative hypothesis for the analysis, indicating the direction of change.
#' Options are "greater", "less", or "two.sided".
#' @param correction_method The method for p-value correction. Default is "BH" (Benjamini-Hochberg).
#'
#' @return A list containing the dispersion values and the results of the differential expression analysis.
#' The dispersion values are calculated from the dds object and named according to sample names.
#' The inference results include adjusted p-values and log2 fold changes for each gene.
#'
#' @examples
#' N_GENES = 100
#' MAX_REPLICATES = 5
#' MIN_REPLICATES = 5
#' ## --init variable
#' input_var_list <- init_variable( name = "genotype", mu = 12, sd = 0.1, level = 3) %>%
#' init_variable(name = "environment", mu = c(0,1), NA , level = 2)
#'
#' mock_data <- mock_rnaseq(input_var_list, N_GENES, MIN_REPLICATES, MAX_REPLICATES)
#' dds <- DESeq2::DESeqDataSetFromMatrix(mock_data$counts ,
#' mock_data$metadata, ~ genotype + environment)
#' dds <- DESeq2::DESeq(dds, quiet = TRUE)
#' result <- wrap_dds(dds, lfcThreshold = 1, altHypothesis = "greater")
#' @export
wrap_dds <- function(dds, lfcThreshold , altHypothesis, correction_method = "BH") {
dds_full <- S4Vectors::mcols(dds) %>% as.data.frame()
## -- dispersion
message("INFO: The dispersion values from DESeq2 were reparametrized to their reciprocals (1/dispersion).")
dispersion <- 1/dds_full$dispersion
names(dispersion) <- rownames(dds_full)
## -- coeff
inference_df <- get_inference_dds(dds_full, lfcThreshold, altHypothesis, correction_method)
res <- list(dispersion = dispersion, fixEff = inference_df)
return(res)
}
#' Calculate Inference for Differential Expression Analysis
#'
#' This function calculates inference for differential expression analysis based on the results of DESeq2.
#'
#' @param dds_full A data frame containing DESeq2 results, including estimate and standard error information.
#' @param lfcThreshold Log fold change threshold for determining differentially expressed genes.
#' @param altHypothesis Alternative hypothesis for testing, one of "greater", "less", or "two.sided".
#' @param correction_method Method for multiple hypothesis correction, e.g., "BH" (Benjamini-Hochberg).
#'
#' @return A data frame containing inference results, including statistics, p-values, and adjusted p-values.
#'
#' @examples
#' \dontrun{
#' # Example usage of the function
#' inference_result <- get_inference_dds(dds_full, lfcThreshold = 0.5,
#' altHypothesis = "greater",
#' correction_method = "BH")
#' }
#' @importFrom stats p.adjust
#' @export
get_inference_dds <- function(dds_full, lfcThreshold, altHypothesis, correction_method){
## -- build subdtf
stdErr_df <- getSE_df(dds_full)
estim_df <- getEstimate_df(dds_full)
## -- join
df2ret <- join_dtf(estim_df, stdErr_df, k1 = c("ID", "term") , k2 = c("ID", "term"))
## -- convert to ln
message("INFO: The log2-fold change estimates and standard errors from DESeq2 were converted to the natural logarithm scale.")
df2ret$estimate <- df2ret$estimate*log(2)
df2ret$std.error <- df2ret$std.error*log(2)
## -- some details reshaped
df2ret$term <- gsub("_vs_.*","", df2ret$term)
df2ret$term <- gsub(pattern = "_", df2ret$term, replacement = "")
df2ret$term <- removeDuplicatedWord(df2ret$term)
df2ret$term <- gsub(pattern = "[.]", df2ret$term, replacement = ":")
df2ret$effect <- "fixed"
idx_intercept <- df2ret$term == "Intercept"
df2ret$term[idx_intercept] <- "(Intercept)"
## -- statistical part
waldRes <- wald_test(df2ret$estimate, df2ret$std.error, lfcThreshold, altHypothesis)
df2ret$statistic <- waldRes$statistic
df2ret$p.value <- waldRes$p.value
df2ret$p.adj <- stats::p.adjust(df2ret$p.value, method = correction_method)
return(df2ret)
}
#' Extract Standard Error Information from DESeq2 Results
#'
#' This function extracts the standard error (SE) information from DESeq2 results.
#'
#' @param dds_full A data frame containing DESeq2 results, including standard error columns.
#'
#' @return A data frame with melted standard error information, including gene IDs and terms.
#'
#' @examples
#' \dontrun{
#' # Example usage of the function
#' se_info <- getSE_df(dds_full)
#' }
#' @importFrom reshape2 melt
#' @export
getSE_df <- function(dds_full){
columnsInDds_full <- colnames(dds_full)
SE_columns <- columnsInDds_full [ grepl("SE" , columnsInDds_full) ]
SE_df <- dds_full[, SE_columns]
SE_df$ID <- rownames(SE_df)
SE_df_long <- reshape2::melt(SE_df,
measure.vars = SE_columns,
variable.name = "term", value.name = "std.error", drop = F)
SE_df_long$term <- gsub(pattern = "SE_", SE_df_long$term, replacement = "")
return(SE_df_long)
}
#' Extract Inferred Estimate Information from DESeq2 Results
#'
#' This function extracts the inferred estimate values from DESeq2 results.
#'
#' @param dds_full A data frame containing DESeq2 results, including estimate columns.
#'
#' @return A data frame with melted inferred estimate information, including gene IDs and terms.
#'
#' @examples
#' \dontrun{
#' # Example usage of the function
#' estimate_info <- getEstimate_df(dds_full)
#' }
#' @importFrom reshape2 melt
#' @export
getEstimate_df <- function(dds_full){
columnsInDds_full <- colnames(dds_full)
SE_columns <- columnsInDds_full [ grepl("SE" , columnsInDds_full) ]
inferedVal_columns <- gsub("SE_", "" , x = SE_columns)
estimate_df <- dds_full[, inferedVal_columns]
estimate_df$ID <- rownames(estimate_df)
estimate_df_long <- reshape2::melt(estimate_df,
measure.vars = inferedVal_columns,
variable.name = "term", value.name = "estimate", drop = F)
return(estimate_df_long)
}