Select Git revision
mock_rnaseq.R
Arnaud Duvermy authored
mock_rnaseq.R 7.67 KiB
# WARNING - Generated by {fusen} from /dev/flat_full.Rmd: do not edit by hand
#' Check the validity of the dispersion matrix
#'
#' Checks if the dispersion matrix has the correct dimensions.
#'
#' @param matx_dispersion Replication matrix
#' @param matx_bool_replication Replication matrix
#' @return TRUE if the dimensions are valid, FALSE otherwise
#' @export
#' @examples
#' matx_dispersion <- matrix(1:12, nrow = 3, ncol = 4)
#' matx_bool_replication <- matrix(TRUE, nrow = 3, ncol = 4)
#' is_dispersionMatrixValid(matx_dispersion, matx_bool_replication)
is_dispersionMatrixValid <- function(matx_dispersion, matx_bool_replication) {
expected_nb_column <- dim(matx_bool_replication)[2]
if (expected_nb_column != dim(matx_dispersion)[2]) {
return(FALSE)
}
return(TRUE)
}
#' Generate count table
#'
#' Generates the count table based on the mu_ij matrix, dispersion matrix, and replication matrix.
#'
#' @param mu_ij_matx_rep Replicated mu_ij matrix
#' @param matx_dispersion_rep Replicated dispersion matrix
#' @return Count table
#' @export
#' @examples
#' mu_ij_matx_rep <- matrix(1:12, nrow = 3, ncol = 4)
#' matx_dispersion_rep <- matrix(1:12, nrow = 3, ncol = 4)
#' generateCountTable(mu_ij_matx_rep, matx_dispersion_rep)
generateCountTable <- function(mu_ij_matx_rep, matx_dispersion_rep) {
message("k_ij ~ Nbinom(mu_ij, dispersion)")
n_genes <- dim(mu_ij_matx_rep)[1]
n_samples <- dim(mu_ij_matx_rep)[2]
n_samplings <- prod(n_genes * n_samples)
mat_countsTable <- rnbinom(n_samplings,
size = matx_dispersion_rep,
mu = mu_ij_matx_rep) %>%
matrix(nrow = n_genes, ncol = n_samples)
colnames(mat_countsTable) <- colnames(mu_ij_matx_rep)
rownames(mat_countsTable) <- rownames(mu_ij_matx_rep)
mat_countsTable[is.na(mat_countsTable)] <- 0
return(mat_countsTable)
}
#' Perform RNA-seq simulation
#'
#' Simulates RNA-seq data based on the input variables.
#'
#' @param list_var List of input variables
#' @param n_genes Number of genes
#' @param min_replicates Minimum replication count
#' @param max_replicates Maximum replication count
#' @param sequencing_depth Sequencing depth
#' @param basal_expression base expression gene
#' @param dispersion User-provided dispersion vector (optional)
#' @return List containing the ground truth, counts, and metadata
#' @export
#' @examples
#' mock_rnaseq(list_var = init_variable(),
#' n_genes = 1000, min_replicates = 2,
#' max_replicates = 4)
mock_rnaseq <- function(list_var, n_genes, min_replicates, max_replicates, sequencing_depth = NULL,
basal_expression = 0 , dispersion = stats::runif(n_genes, min = 0, max = 1000) ) {
## -- get my effect
df_inputSimulation <- getInput2simulation(list_var, n_genes)
## -- add column logQij
df_inputSimulation <- getLog_qij(df_inputSimulation)
df_inputSimulation <- addBasalExpression(df_inputSimulation, n_genes, basal_expression)
df_inputSimulation <- getMu_ij(df_inputSimulation )
message("Building mu_ij matrix")
## -- matrix
matx_Muij <- getMu_ij_matrix(df_inputSimulation)
l_sampleID <- getSampleID(list_var)
matx_bool_replication <- generateReplicationMatrix(list_var, min_replicates, max_replicates)
mu_ij_matx_rep <- replicateMatrix(matx_Muij, matx_bool_replication)
dispersion <- getValidDispersion(dispersion)
genes_dispersion <- sample(dispersion , size = n_genes, replace = T)
matx_dispersion <- getDispersionMatrix(list_var, n_genes, genes_dispersion)
l_geneID = base::paste("gene", 1:n_genes, sep = "")
names(genes_dispersion) <- l_geneID
## same order as mu_ij_matx_rep
matx_dispersion <- matx_dispersion[ order(row.names(matx_dispersion)), ]
matx_dispersion_rep <- replicateMatrix(matx_dispersion, matx_bool_replication)
matx_countsTable <- generateCountTable(mu_ij_matx_rep, matx_dispersion_rep)
message("Counts simulation: Done")
dtf_countsTable <- matx_countsTable %>% as.data.frame()
if (!is.null(sequencing_depth)) {
message("Scaling count table according to sequencing depth.")
dtf_countsTable <- scaleCountsTable(dtf_countsTable, sequencing_depth)
}
metaData <- getSampleMetadata(list_var, n_genes, matx_bool_replication)
libSize <- sum(colSums(dtf_countsTable))
settings_df <- getSettingsTable(n_genes, min_replicates, max_replicates, libSize)
list2ret <- list(
settings = settings_df,
init = list_var,
groundTruth = list(effects = df_inputSimulation, gene_dispersion = genes_dispersion),
counts = dtf_countsTable,
metadata = metaData)
return(list2ret)
}
#' Validate and Filter Dispersion Values
#'
#' This function takes an input vector and validates it to ensure that it meets certain criteria.
#'
#' @param input_vector A vector to be validated.
#' @return A validated and filtered numeric vector.
#' @details The function checks whether the input is a vector, suppresses warnings while converting to numeric,
#' and filters out non-numeric elements. It also checks for values greater than zero and removes negative values.
#' If the resulting vector has a length of zero, an error is thrown.
#' @examples
#' getValidDispersion(c(0.5, 1.2, -0.3, "invalid", 0.8))
#' @export
getValidDispersion <- function(input_vector) {
# Verify if it's a vector
if (!is.vector(input_vector)) {
stop("dispersion param is not a vector.")
}
input_vector <- suppressWarnings(as.numeric(input_vector))
# Filter numeric elements
numeric_elements <- !is.na(input_vector)
if (sum(!numeric_elements) > 0) {
message("Non-numeric elements were removed from the dispersion vector")
input_vector <- input_vector[numeric_elements]
}
# Check and filter values > 0
numeric_positive_elements <- input_vector > 0
if (sum(!numeric_positive_elements) > 0) {
message("Negative numeric values were removed from the dispersion vector")
input_vector <- input_vector[numeric_positive_elements]
}
if (length(input_vector) == 0) stop("Invalid dispersion values provided.")
return(input_vector)
}
#' Generate replication matrix
#'
#' Generates the replication matrix based on the minimum and maximum replication counts.
#'
#' @param list_var Number of samples
#' @param min_replicates Minimum replication count
#' @param max_replicates Maximum replication count
#' @return Replication matrix
#' @export
#' @examples
#' list_var = init_variable()
#' generateReplicationMatrix(list_var, min_replicates = 2, max_replicates = 4)
generateReplicationMatrix <- function(list_var, min_replicates, max_replicates) {
if (min_replicates > max_replicates) {
message("min_replicates > max_replicates have been supplied")
message("Automatic reversing")
tmp_min_replicates <- min_replicates
min_replicates <- max_replicates
max_replicates <- tmp_min_replicates
}
l_sampleIDs <- getSampleID(list_var)
n_samples <- length(l_sampleIDs)
return(getReplicationMatrix(min_replicates, max_replicates, n_samples = n_samples))
}
#' Replicate matrix
#'
#' Replicates a matrix based on a replication matrix.
#'
#' @param matrix Matrix to replicate
#' @param replication_matrix Replication matrix
#' @return Replicated matrix
#' @export
#' @examples
#' matrix <- matrix(1:9, nrow = 3, ncol = 3)
#' replication_matrix <- matrix(TRUE, nrow = 3, ncol = 3)
#' replicateMatrix(matrix, replication_matrix)
replicateMatrix <- function(matrix, replication_matrix) {
n_genes <- dim(matrix)[1]
rep_list <- colSums(replication_matrix)
replicated_indices <- rep(seq_len(ncol(matrix)), times = rep_list)
replicated_matrix <- matrix[, replicated_indices, drop = FALSE]
suffix_sampleID <- sequence(rep_list)
colnames(replicated_matrix) <- paste(colnames(replicated_matrix), suffix_sampleID, sep = "_")
return(replicated_matrix)
}