Skip to content
Snippets Groups Projects
Select Git revision
  • 0e72a3915144c6a35b2a8f6297b895fa025aa445
  • master default protected
  • v2.1.1
  • v2.1.0
4 results

mock_rnaseq.R

Blame
  • mock_rnaseq.R 7.67 KiB
    # WARNING - Generated by {fusen} from /dev/flat_full.Rmd: do not edit by hand
    
    
    #' Check the validity of the dispersion matrix
    #'
    #' Checks if the dispersion matrix has the correct dimensions.
    #'
    #' @param matx_dispersion Replication matrix
    #' @param matx_bool_replication Replication matrix
    #' @return TRUE if the dimensions are valid, FALSE otherwise
    #' @export
    #' @examples
    #' matx_dispersion <- matrix(1:12, nrow = 3, ncol = 4)
    #' matx_bool_replication <- matrix(TRUE, nrow = 3, ncol = 4)
    #' is_dispersionMatrixValid(matx_dispersion, matx_bool_replication)
    is_dispersionMatrixValid <- function(matx_dispersion, matx_bool_replication) {
      expected_nb_column <- dim(matx_bool_replication)[2]
      if (expected_nb_column != dim(matx_dispersion)[2]) {
        return(FALSE)
      }
      return(TRUE)
    }
    
    #' Generate count table
    #'
    #' Generates the count table based on the mu_ij matrix, dispersion matrix, and replication matrix.
    #'
    #' @param mu_ij_matx_rep Replicated mu_ij matrix
    #' @param matx_dispersion_rep Replicated dispersion matrix
    #' @return Count table
    #' @export
    #' @examples
    #' mu_ij_matx_rep <- matrix(1:12, nrow = 3, ncol = 4)
    #' matx_dispersion_rep <- matrix(1:12, nrow = 3, ncol = 4)
    #' generateCountTable(mu_ij_matx_rep, matx_dispersion_rep)
    generateCountTable <- function(mu_ij_matx_rep, matx_dispersion_rep) {
      message("k_ij ~ Nbinom(mu_ij, dispersion)")
      n_genes <- dim(mu_ij_matx_rep)[1]
      n_samples <- dim(mu_ij_matx_rep)[2]
      n_samplings <- prod(n_genes * n_samples)
      mat_countsTable <- rnbinom(n_samplings, 
                                 size = matx_dispersion_rep, 
                                 mu = mu_ij_matx_rep) %>%
                          matrix(nrow = n_genes, ncol = n_samples)
      colnames(mat_countsTable) <- colnames(mu_ij_matx_rep)
      rownames(mat_countsTable) <- rownames(mu_ij_matx_rep)
      mat_countsTable[is.na(mat_countsTable)] <- 0
      return(mat_countsTable)
    }
    
    
    #' Perform RNA-seq simulation
    #'
    #' Simulates RNA-seq data based on the input variables.
    #'
    #' @param list_var List of input variables
    #' @param n_genes Number of genes
    #' @param min_replicates Minimum replication count
    #' @param max_replicates Maximum replication count
    #' @param sequencing_depth Sequencing depth
    #' @param basal_expression base expression gene
    #' @param dispersion User-provided dispersion vector (optional)
    #' @return List containing the ground truth, counts, and metadata
    #' @export
    #' @examples
    #' mock_rnaseq(list_var = init_variable(), 
    #'              n_genes = 1000, min_replicates = 2,   
    #'               max_replicates = 4)
    mock_rnaseq <- function(list_var, n_genes, min_replicates, max_replicates, sequencing_depth = NULL,  
                            basal_expression = 0 , dispersion = stats::runif(n_genes, min = 0, max = 1000) ) {
      
      ## -- get my effect
      df_inputSimulation <- getInput2simulation(list_var, n_genes)
      ## -- add column logQij
      df_inputSimulation <- getLog_qij(df_inputSimulation)
      df_inputSimulation <- addBasalExpression(df_inputSimulation, n_genes, basal_expression)
      df_inputSimulation <- getMu_ij(df_inputSimulation )
      
      message("Building mu_ij matrix")
      ## -- matrix
      matx_Muij <- getMu_ij_matrix(df_inputSimulation)
      l_sampleID <- getSampleID(list_var)
      matx_bool_replication <- generateReplicationMatrix(list_var, min_replicates, max_replicates)
      mu_ij_matx_rep <- replicateMatrix(matx_Muij, matx_bool_replication)
      
      
      dispersion <- getValidDispersion(dispersion)
      genes_dispersion <- sample(dispersion , size = n_genes, replace = T)
      matx_dispersion <- getDispersionMatrix(list_var, n_genes, genes_dispersion)
      l_geneID = base::paste("gene", 1:n_genes, sep = "")
      names(genes_dispersion) <- l_geneID
      
      ## same order as mu_ij_matx_rep
      matx_dispersion <- matx_dispersion[ order(row.names(matx_dispersion)), ]
      matx_dispersion_rep <- replicateMatrix(matx_dispersion, matx_bool_replication)
      matx_countsTable <- generateCountTable(mu_ij_matx_rep, matx_dispersion_rep)
    
      message("Counts simulation: Done")
      
      
      dtf_countsTable <- matx_countsTable %>% as.data.frame()
      if (!is.null(sequencing_depth)) {
        message("Scaling count table according to sequencing depth.")
        dtf_countsTable <- scaleCountsTable(dtf_countsTable, sequencing_depth)
      }
      
      metaData <- getSampleMetadata(list_var, n_genes, matx_bool_replication)
      libSize <- sum(colSums(dtf_countsTable))
      settings_df <- getSettingsTable(n_genes, min_replicates, max_replicates, libSize)
        
      list2ret <- list(
        settings = settings_df,
        init = list_var, 
        groundTruth = list(effects = df_inputSimulation, gene_dispersion = genes_dispersion),
        counts = dtf_countsTable,
        metadata = metaData)
      return(list2ret)
    }
    
    
    
    
    #' Validate and Filter Dispersion Values
    #'
    #' This function takes an input vector and validates it to ensure that it meets certain criteria.
    #'
    #' @param input_vector A vector to be validated.
    #' @return A validated and filtered numeric vector.
    #' @details The function checks whether the input is a vector, suppresses warnings while converting to numeric,
    #' and filters out non-numeric elements. It also checks for values greater than zero and removes negative values.
    #' If the resulting vector has a length of zero, an error is thrown.
    #' @examples
    #' getValidDispersion(c(0.5, 1.2, -0.3, "invalid", 0.8))
    #' @export
    getValidDispersion <- function(input_vector) {
      # Verify if it's a vector
      if (!is.vector(input_vector)) {
        stop("dispersion param is not a vector.")
      }
    
      input_vector <- suppressWarnings(as.numeric(input_vector))
    
      # Filter numeric elements
      numeric_elements <- !is.na(input_vector)
      if (sum(!numeric_elements) > 0) {
        message("Non-numeric elements were removed from the dispersion vector")
        input_vector <- input_vector[numeric_elements]
      }
    
      # Check and filter values > 0
      numeric_positive_elements <- input_vector > 0
      if (sum(!numeric_positive_elements) > 0) {
        message("Negative numeric values were removed from the dispersion vector")
        input_vector <- input_vector[numeric_positive_elements]
      }
    
      if (length(input_vector) == 0) stop("Invalid dispersion values provided.")
    
      return(input_vector)
    }
    
    
    #' Generate replication matrix
    #'
    #' Generates the replication matrix based on the minimum and maximum replication counts.
    #'
    #' @param list_var Number of samples
    #' @param min_replicates Minimum replication count
    #' @param max_replicates Maximum replication count
    #' @return Replication matrix
    #' @export
    #' @examples
    #' list_var = init_variable()
    #' generateReplicationMatrix(list_var, min_replicates = 2, max_replicates = 4)
    generateReplicationMatrix <- function(list_var, min_replicates, max_replicates) {
      if (min_replicates > max_replicates) {
        message("min_replicates > max_replicates have been supplied")
        message("Automatic reversing")
        tmp_min_replicates <- min_replicates
        min_replicates <- max_replicates
        max_replicates <- tmp_min_replicates
      }
      l_sampleIDs <- getSampleID(list_var)
      n_samples <-  length(l_sampleIDs)
      return(getReplicationMatrix(min_replicates, max_replicates, n_samples = n_samples))
    }
    
    #' Replicate matrix
    #'
    #' Replicates a matrix based on a replication matrix.
    #'
    #' @param matrix Matrix to replicate
    #' @param replication_matrix Replication matrix
    #' @return Replicated matrix
    #' @export
    #' @examples
    #' matrix <- matrix(1:9, nrow = 3, ncol = 3)
    #' replication_matrix <- matrix(TRUE, nrow = 3, ncol = 3)
    #' replicateMatrix(matrix, replication_matrix)
    replicateMatrix <- function(matrix, replication_matrix) {
      n_genes <- dim(matrix)[1]
      rep_list <- colSums(replication_matrix)
      replicated_indices <- rep(seq_len(ncol(matrix)), times = rep_list)
      replicated_matrix <- matrix[, replicated_indices, drop = FALSE]
      suffix_sampleID <- sequence(rep_list)
      colnames(replicated_matrix) <- paste(colnames(replicated_matrix), suffix_sampleID, sep = "_")
      return(replicated_matrix)
    }