# WARNING - Generated by {fusen} from /dev/flat_full.Rmd: do not edit by hand #' Get input for simulation based on coefficients #' #' This function generates input data for simulation based on the coefficients provided in the \code{list_var} argument. #' #' @param list_var A list of variables (already initialized) #' @param n_genes Number of genes to simulate (default: 1) #' @param input2mvrnorm Input to the \code{mvrnorm} function for simulating data from multivariate normal distribution (default: NULL) #' @return A data frame with input coefficients for simulation #' @export #' @examples #' # Example usage #' list_var <- init_variable() #' getInput2simulation(list_var, n_genes = 10) getInput2simulation <- function(list_var, n_genes = 1, input2mvrnorm = NULL) { # Use default input to mvrnorm if not provided by the user if (is.null(input2mvrnorm)) input2mvrnorm = getInput2mvrnorm(list_var) l_dataFromMvrnorm = getDataFromMvrnorm(list_var, input2mvrnorm, n_genes) l_dataFromUser = getDataFromUser(list_var) df_input2simu <- getCoefficients(list_var, l_dataFromMvrnorm, l_dataFromUser, n_genes) return(df_input2simu) } #' getCoefficients #' #' Get the coefficients. #' #' @param list_var A list of variables (already initialized) #' @param l_dataFromMvrnorm Data from the `getGeneMetadata` function (optional). #' @param l_dataFromUser Data from the `getDataFromUser` function (optional). #' @param n_genes The number of genes. #' @export #' @return A dataframe containing the coefficients. #' @examples #' # Example usage #' list_var <- init_variable() #' input2mvrnorm = getInput2mvrnorm(list_var) #' l_dataFromMvrnorm = getDataFromMvrnorm(list_var, input2mvrnorm, n_genes) #' l_dataFromUser = getDataFromUser(list_var) #' getCoefficients(list_var, l_dataFromMvrnorm, l_dataFromUser, n_genes = 3) getCoefficients <- function(list_var, l_dataFromMvrnorm, l_dataFromUser, n_genes) { if (length(l_dataFromMvrnorm) == 0) { metaData <- getGeneMetadata(list_var, n_genes) l_dataFromMvrnorm <- list(metaData) } l_df2join <- c(l_dataFromMvrnorm, l_dataFromUser) df_coef <- Reduce(function(d1, d2){ column_names = colnames(d2) idx_key = grepl(pattern = "label", column_names ) keys = column_names[idx_key] join_dtf(d1, d2, k1 = keys , k2 = keys) } , l_df2join ) %>% as.data.frame() column_names <- colnames(df_coef) idx_column2factor <- grep(pattern = "label_", column_names) if (length(idx_column2factor) > 1) { df_coef[, idx_column2factor] <- lapply(df_coef[, idx_column2factor], as.factor) } else { df_coef[, idx_column2factor] <- as.factor(df_coef[, idx_column2factor]) } return(df_coef) } #' Get the log_qij values from the coefficient data frame. #' #' @param dtf_coef The coefficient data frame. #' @return The coefficient data frame with log_qij column added. #' @export getLog_qij <- function(dtf_coef) { dtf_beta_numeric <- dtf_coef[sapply(dtf_coef, is.numeric)] dtf_coef$log_qij <- rowSums(dtf_beta_numeric, na.rm = TRUE) return(dtf_coef) } #' Calculate mu_ij values based on coefficient data frame and scaling factor #' #' This function calculates mu_ij values by raising 2 to the power of the log_qij values #' from the coefficient data frame and multiplying it by the provided scaling factor. #' #' @param dtf_coef Coefficient data frame containing the log_qij values #' #' @return Coefficient data frame with an additional mu_ij column #' #' @examples #' list_var <- init_variable() #' dtf_coef <- getInput2simulation(list_var, 10) #' dtf_coef <- getLog_qij(dtf_coef) #' dtf_coef <- addBasalExpression(dtf_coef, 10, c(10, 20, 0)) #' getMu_ij(dtf_coef) #' @export getMu_ij <- function(dtf_coef) { log_qij_scaled <- dtf_coef$log_qij + dtf_coef$basalExpr dtf_coef$log_qij_scaled <- log_qij_scaled mu_ij <- exp(log_qij_scaled) dtf_coef$mu_ij <- mu_ij return(dtf_coef) } #' getMu_ij_matrix #' #' Get the Mu_ij matrix. #' #' @param dtf_coef A dataframe containing the coefficients. #' @importFrom reshape2 dcast #' @importFrom stats as.formula #' @export #' @return A Mu_ij matrix. getMu_ij_matrix <- function(dtf_coef) { column_names <- colnames(dtf_coef) idx_var <- grepl(pattern = "label", column_names) l_var <- column_names[idx_var] str_formula_rigth <- paste(l_var, collapse = " + ") if (str_formula_rigth == "") stop("no variable label detected") str_formula <- paste(c("geneID", str_formula_rigth), collapse = " ~ ") formula <- stats::as.formula(str_formula) dtf_Muij <- dtf_coef %>% reshape2::dcast(formula = formula, value.var = "mu_ij", drop = F) dtf_Muij[is.na(dtf_Muij)] <- 0 mtx_Muij <- data.frame(dtf_Muij[, -1], row.names = dtf_Muij[, 1]) %>% as.matrix() mtx_Muij <- mtx_Muij[, order(colnames(mtx_Muij)), drop = F] return(mtx_Muij) } #' getSubCountsTable #' #' Get the subcounts table. #' #' @param matx_Muij The Mu_ij matrix. #' @param matx_dispersion The dispersion matrix. #' @param replicateID The replication identifier. #' @param l_bool_replication A boolean vector indicating the replicates. #' @importFrom stats rnbinom #' #' @return A subcounts table. getSubCountsTable <- function(matx_Muij, matx_dispersion, replicateID, l_bool_replication) { getKijMatrix <- function(matx_Muij, matx_dispersion, n_genes, n_samples) { k_ij <- stats::rnbinom(n_genes * n_samples, size = matx_dispersion, mu = matx_Muij) %>% matrix(nrow = n_genes, ncol = n_samples) k_ij[is.na(k_ij)] <- 0 return(k_ij) } if (!any(l_bool_replication)) return(NULL) matx_Muij <- matx_Muij[, l_bool_replication, drop = FALSE] matx_dispersion <- matx_dispersion[, l_bool_replication, drop = FALSE] l_sampleID <- colnames(matx_Muij) l_geneID <- rownames(matx_Muij) dimension_mtx <- dim(matx_Muij) n_genes <- dimension_mtx[1] n_samples <- dimension_mtx[2] matx_kij <- getKijMatrix(matx_Muij, matx_dispersion, n_genes, n_samples) colnames(matx_kij) <- paste(l_sampleID, replicateID, sep = "_") rownames(matx_kij) <- l_geneID return(matx_kij) }