From 56f398bdcf1509aefe9979427faecc31325ba555 Mon Sep 17 00:00:00 2001 From: aduvermy <arnaud.duvermy@ens-lyon.fr> Date: Tue, 26 Mar 2024 10:44:24 +0100 Subject: [PATCH] add fix_reference_effect parameter and utils Former-commit-id: 9d73431e71bc99d4ad7e6ad1ce8d0877db320447 Former-commit-id: 52edbac846370096cf86ff60c723a1937956e284 Former-commit-id: 9502ca085592283dd8aa5d2b3ed2267fb97561bb --- NAMESPACE | 2 + R/mock_rnaseq.R | 7 +- R/simulation.R | 71 ++++++++++++++-- dev/flat_full.Rmd | 135 +++++++++++++++++++++++++++++-- man/getDataFromRnorm.Rd | 6 +- man/getInput2simulation.Rd | 5 ++ man/getRefLevel.Rd | 18 +++++ man/mock_rnaseq.Rd | 7 +- man/replaceReferenceEffectBy0.Rd | 21 +++++ tests/testthat/test-simulation.R | 57 +++++++++++++ 10 files changed, 313 insertions(+), 16 deletions(-) create mode 100644 man/getRefLevel.Rd create mode 100644 man/replaceReferenceEffectBy0.Rd diff --git a/NAMESPACE b/NAMESPACE index fb7c5ff..bd18a90 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -90,6 +90,7 @@ export(getLog_qij) export(getMu_ij) export(getMu_ij_matrix) export(getNumberOfCombinationsInInteraction) +export(getRefLevel) export(getReplicationMatrix) export(getSE_df) export(getSampleID) @@ -158,6 +159,7 @@ export(removeDigitsAtEnd) export(removeDuplicatedWord) export(renameColumns) export(reorderColumns) +export(replaceReferenceEffectBy0) export(replicateByGroup) export(replicateMatrix) export(replicateRows) diff --git a/R/mock_rnaseq.R b/R/mock_rnaseq.R index ce43744..e880f9b 100644 --- a/R/mock_rnaseq.R +++ b/R/mock_rnaseq.R @@ -105,6 +105,9 @@ warning_too_low_mu_ij_row <- function(mu_ij_matrix, threshold = 1 ){ #' @param normal_distr Specifies the distribution type for generating effects. Choose between 'univariate' or 'multivariate' (default). #' - 'univariate': Effects are drawn independently from univariate normal distributions. #' - 'multivariate': Effects are drawn jointly from a multivariate normal distribution. +#' @param fix_reference_effect A logical value indicating whether the effect of the reference label should be fixed to zero. If set to TRUE, the effect of the +#' reference label is constrained to zero, ensuring that it does not contribute to the model. If set to FALSE, the effect from the reference label is picked +#' randomly from the distribution specified by the user. This option works only when `normal_distr` is set to 'univariate'. Default is FALSE. #' @return List containing the ground truth, counts, and metadata #' @export #' @examples @@ -113,10 +116,10 @@ warning_too_low_mu_ij_row <- function(mu_ij_matrix, threshold = 1 ){ #' max_replicates = 4) mock_rnaseq <- function(list_var, n_genes, min_replicates, max_replicates, sequencing_depth = NULL, basal_expression = 0 , dispersion = stats::runif(n_genes, min = 0, max = 1000), - normal_distr = "multivariate") { + normal_distr = "multivariate", fix_reference_effect = FALSE) { ## -- get my effect - df_inputSimulation <- getInput2simulation(list_var, n_genes, normal_distr) + df_inputSimulation <- getInput2simulation(list_var, n_genes, normal_distr, fix_reference_effect ) ## -- add column logQij df_inputSimulation <- getLog_qij(df_inputSimulation) df_inputSimulation <- addBasalExpression(df_inputSimulation, n_genes, basal_expression) diff --git a/R/simulation.R b/R/simulation.R index d507012..0fb1d3e 100644 --- a/R/simulation.R +++ b/R/simulation.R @@ -10,6 +10,9 @@ #' @param normal_distr Specifies the distribution type for generating effects. Choose between 'univariate' or 'multivariate' (default). #' - 'univariate': Effects are drawn independently from univariate normal distributions. #' - 'multivariate': Effects are drawn jointly from a multivariate normal distribution. +#' @param fix_reference_effect A logical value indicating whether the effect of the reference label should be fixed to zero. If set to TRUE, the effect of the +#' reference label is constrained to zero, ensuring that it does not contribute to the model. If set to FALSE, the effect from the reference label is picked +#' randomly from the distribution specified by the user. This option works only when `normal_distr` is set to 'univariate'. Default is FALSE. #' @param input2mvrnorm Input to the \code{mvrnorm} function for simulating data from multivariate normal distribution (default: NULL) #' @return A data frame with input coefficients for simulation #' @export @@ -17,7 +20,7 @@ #' # Example usage #' list_var <- init_variable() #' getInput2simulation(list_var, n_genes = 10) -getInput2simulation <- function(list_var, n_genes = 1, normal_distr = "multivariate", input2mvrnorm = NULL) { +getInput2simulation <- function(list_var, n_genes = 1, normal_distr = "multivariate", fix_reference_effect = FALSE, input2mvrnorm = NULL) { stopifnot( normal_distr %in% c("multivariate", "univariate") ) @@ -26,7 +29,7 @@ getInput2simulation <- function(list_var, n_genes = 1, normal_distr = "multivari l_dataFrom_normdistr <- getDataFromMvrnorm(list_var, input2mvrnorm, n_genes) } if (normal_distr == "univariate"){ - l_dataFrom_normdistr <- getDataFromRnorm(list_var, n_genes) + l_dataFrom_normdistr <- getDataFromRnorm(list_var, n_genes, fix_reference_effect) } l_dataFromUser = getDataFromUser(list_var) @@ -38,24 +41,82 @@ getInput2simulation <- function(list_var, n_genes = 1, normal_distr = "multivari +#' Get the reference level for categorical variables in the data +#' +#' This function extracts the reference level for each categorical variable in the data. +#' The reference level is the first level encountered for each categorical variable. +#' +#' @param data The data frame containing the categorical variables. +#' @return A list containing the reference level for each categorical variable. +#' @export +getRefLevel <- function(data){ + col_names <- colnames(data) + categorical_vars <- col_names[grepl(col_names, pattern = "label_")] + if (length(categorical_vars) == 1){ + l_labels <- list() + l_labels[[categorical_vars]] <- levels(data[, categorical_vars]) + + } else l_labels <- lapply(data[, categorical_vars], levels) + l_labels_ref <- sapply(l_labels, function(vec) vec[1]) + return(l_labels_ref) +} + +#' Replace the reference effect by 0 in the data +#' +#' This function replaces the effect corresponding to the reference level with 0 in the data. +#' +#' @param list_var The list of variables containing the effects to modify. +#' @param l_labels_ref A list containing the reference level for each categorical variable. +#' @param data The data frame containing the effects to modify. +#' @return The modified data frame with reference effects replaced by 0. +#' @export +replaceReferenceEffectBy0 <- function(list_var, l_labels_ref , data){ + varInitialized <- c(getListVar(list_var), getListVar(list_var$interactions)) + varInitialized <- varInitialized[varInitialized != 'interactions'] + df_effects_with0 <- sapply(varInitialized, function(var){ + categorical_var <- paste("label", unlist(strsplit(var, ":")), sep = "_") + if (length(categorical_var) == 1) + idx_0 <- data[[categorical_var]] == l_labels_ref[[categorical_var]] + else { + bool_matrix <- sapply(categorical_var, function(uniq_cat_var) data[uniq_cat_var] == l_labels_ref[uniq_cat_var]) + idx_0 <- rowSums(bool_matrix) == length(categorical_var) + } + return(replace(data[[var]], idx_0, 0)) + } + ) + col_names <- colnames(data) + categorical_vars <- col_names[grepl(col_names, pattern = "label_")] + data <- cbind(data[c("geneID", categorical_vars)], df_effects_with0) + return(data) +} + + + #' Prepare data using effects from a normal distribution #' #' Prepares the data by generating effects from a normal distribution for each gene. #' #' @param list_var A list of variables (already initialized) #' @param n_genes Number of genes to generate data for. +#' @param fix_reference_effect A logical value indicating whether the effect of the reference label should be fixed to zero. If set to TRUE, the effect of the +#' reference label is constrained to zero, ensuring that it does not contribute to the model. If set to FALSE, the effect from the reference label is picked +#' randomly from the distribution specified by the user. This option works only when `normal_distr` is set to 'univariate'. Default is FALSE. #' @return A dataframe containing gene metadata and effects generated from a normal distribution. #' @export -getDataFromRnorm <- function(list_var, n_genes){ +getDataFromRnorm <- function(list_var, n_genes, fix_reference_effect = FALSE){ ## -- check if all data have been provided by user if (is.null(getInput2mvrnorm(list_var)$covMatrix)) return(list()) - metadata <- getGeneMetadata(list_var , n_genes) df_effects <- get_effects_from_rnorm(list_var, metadata) data <- cbind(metadata, df_effects) + + if (isTRUE(fix_reference_effect)){ + l_labels_ref <- getRefLevel(data) + data <- replaceReferenceEffectBy0(input_var_list, l_labels_ref, data) + } + return(list(data)) - } #' Generate effects from a normal distribution diff --git a/dev/flat_full.Rmd b/dev/flat_full.Rmd index 70b35a8..f9e1c8e 100644 --- a/dev/flat_full.Rmd +++ b/dev/flat_full.Rmd @@ -1527,6 +1527,9 @@ test_that("set_correlation sets the correlation between variables correctly", { #' @param normal_distr Specifies the distribution type for generating effects. Choose between 'univariate' or 'multivariate' (default). #' - 'univariate': Effects are drawn independently from univariate normal distributions. #' - 'multivariate': Effects are drawn jointly from a multivariate normal distribution. +#' @param fix_reference_effect A logical value indicating whether the effect of the reference label should be fixed to zero. If set to TRUE, the effect of the +#' reference label is constrained to zero, ensuring that it does not contribute to the model. If set to FALSE, the effect from the reference label is picked +#' randomly from the distribution specified by the user. This option works only when `normal_distr` is set to 'univariate'. Default is FALSE. #' @param input2mvrnorm Input to the \code{mvrnorm} function for simulating data from multivariate normal distribution (default: NULL) #' @return A data frame with input coefficients for simulation #' @export @@ -1534,7 +1537,7 @@ test_that("set_correlation sets the correlation between variables correctly", { #' # Example usage #' list_var <- init_variable() #' getInput2simulation(list_var, n_genes = 10) -getInput2simulation <- function(list_var, n_genes = 1, normal_distr = "multivariate", input2mvrnorm = NULL) { +getInput2simulation <- function(list_var, n_genes = 1, normal_distr = "multivariate", fix_reference_effect = FALSE, input2mvrnorm = NULL) { stopifnot( normal_distr %in% c("multivariate", "univariate") ) @@ -1543,7 +1546,7 @@ getInput2simulation <- function(list_var, n_genes = 1, normal_distr = "multivari l_dataFrom_normdistr <- getDataFromMvrnorm(list_var, input2mvrnorm, n_genes) } if (normal_distr == "univariate"){ - l_dataFrom_normdistr <- getDataFromRnorm(list_var, n_genes) + l_dataFrom_normdistr <- getDataFromRnorm(list_var, n_genes, fix_reference_effect) } l_dataFromUser = getDataFromUser(list_var) @@ -1555,24 +1558,82 @@ getInput2simulation <- function(list_var, n_genes = 1, normal_distr = "multivari +#' Get the reference level for categorical variables in the data +#' +#' This function extracts the reference level for each categorical variable in the data. +#' The reference level is the first level encountered for each categorical variable. +#' +#' @param data The data frame containing the categorical variables. +#' @return A list containing the reference level for each categorical variable. +#' @export +getRefLevel <- function(data){ + col_names <- colnames(data) + categorical_vars <- col_names[grepl(col_names, pattern = "label_")] + if (length(categorical_vars) == 1){ + l_labels <- list() + l_labels[[categorical_vars]] <- levels(data[, categorical_vars]) + + } else l_labels <- lapply(data[, categorical_vars], levels) + l_labels_ref <- sapply(l_labels, function(vec) vec[1]) + return(l_labels_ref) +} + +#' Replace the reference effect by 0 in the data +#' +#' This function replaces the effect corresponding to the reference level with 0 in the data. +#' +#' @param list_var The list of variables containing the effects to modify. +#' @param l_labels_ref A list containing the reference level for each categorical variable. +#' @param data The data frame containing the effects to modify. +#' @return The modified data frame with reference effects replaced by 0. +#' @export +replaceReferenceEffectBy0 <- function(list_var, l_labels_ref , data){ + varInitialized <- c(getListVar(list_var), getListVar(list_var$interactions)) + varInitialized <- varInitialized[varInitialized != 'interactions'] + df_effects_with0 <- sapply(varInitialized, function(var){ + categorical_var <- paste("label", unlist(strsplit(var, ":")), sep = "_") + if (length(categorical_var) == 1) + idx_0 <- data[[categorical_var]] == l_labels_ref[[categorical_var]] + else { + bool_matrix <- sapply(categorical_var, function(uniq_cat_var) data[uniq_cat_var] == l_labels_ref[uniq_cat_var]) + idx_0 <- rowSums(bool_matrix) == length(categorical_var) + } + return(replace(data[[var]], idx_0, 0)) + } + ) + col_names <- colnames(data) + categorical_vars <- col_names[grepl(col_names, pattern = "label_")] + data <- cbind(data[c("geneID", categorical_vars)], df_effects_with0) + return(data) +} + + + #' Prepare data using effects from a normal distribution #' #' Prepares the data by generating effects from a normal distribution for each gene. #' #' @param list_var A list of variables (already initialized) #' @param n_genes Number of genes to generate data for. +#' @param fix_reference_effect A logical value indicating whether the effect of the reference label should be fixed to zero. If set to TRUE, the effect of the +#' reference label is constrained to zero, ensuring that it does not contribute to the model. If set to FALSE, the effect from the reference label is picked +#' randomly from the distribution specified by the user. This option works only when `normal_distr` is set to 'univariate'. Default is FALSE. #' @return A dataframe containing gene metadata and effects generated from a normal distribution. #' @export -getDataFromRnorm <- function(list_var, n_genes){ +getDataFromRnorm <- function(list_var, n_genes, fix_reference_effect = FALSE){ ## -- check if all data have been provided by user if (is.null(getInput2mvrnorm(list_var)$covMatrix)) return(list()) - metadata <- getGeneMetadata(list_var , n_genes) df_effects <- get_effects_from_rnorm(list_var, metadata) data <- cbind(metadata, df_effects) + + if (isTRUE(fix_reference_effect)){ + l_labels_ref <- getRefLevel(data) + data <- replaceReferenceEffectBy0(input_var_list, l_labels_ref, data) + } + return(list(data)) - } #' Generate effects from a normal distribution @@ -2186,6 +2247,63 @@ test_that("All negative values", { + +# Test for getRefLevel function +test_that("getRefLevel returns correct reference levels", { + # Create a sample data frame + data <- data.frame( + label_genotype = factor(c("A", "B", "A", "B")), + label_environment = factor(c("X", "Y", "X", "Y")) + ) + + # Expected reference levels + expected_ref_levels <- list( + label_genotype = "A", + label_environment = "X" + ) %>% unlist() + + # Test getRefLevel function + ref_levels <- getRefLevel(data) + + # Check if reference levels match the expected ones + expect_identical(ref_levels, expected_ref_levels) +}) + +# Test for replaceReferenceEffectBy0 function +test_that("replaceReferenceEffectBy0 replaces reference effects correctly", { + + input_var_list <- init_variable( name = "genotype", mu = 0, sd = 2.18, level = 2) %>% + init_variable( name = "env", mu = 0, sd = 0.57, level = 4 ) %>% + init_variable(name = "T", mu = 0, sd = 0.39, level = 3) %>% + add_interaction(between_var = c("genotype", "env"), mu = 0 , sd = 1.018) %>% + add_interaction(between_var = c("genotype", "env", "T"), mu = 0 , sd = 1.018) + + + N_GENES <- 10 + l_dataFrom_normdistr <- getDataFromRnorm(input_var_list, N_GENES) + metadata <- getGeneMetadata(input_var_list , N_GENES) + df_effects <- get_effects_from_rnorm(input_var_list, metadata) + data <- cbind(metadata, df_effects) + + l_labels_ref <- getRefLevel(data) + data <- replaceReferenceEffectBy0(input_var_list, l_labels_ref, data) + + # Check if modified data matches the expected data + expect_identical(colnames(data), c("geneID","label_genotype","label_env", "label_T", "genotype", "env" , "T", "genotype:env" ,"genotype:env:T")) + expected_data <- data.frame(geneID = "gene1", label_genotype = "genotype1", label_env = "env1", label_T = "T1", genotype = 0, + env = 0, T = 0, "genotype:env" = 0, "genotype:env:T" = 0 ) + colnames(expected_data) <- c("geneID", "label_genotype", "label_env", "label_T", "genotype", + "env", "T" , "genotype:env", "genotype:env:T" ) + expected_data$label_genotype <- factor(expected_data$label_genotype , levels = c("genotype1", "genotype2")) + expected_data$label_env <- factor(expected_data$label_env , levels = c("env1", "env2", "env3", "env4")) + expected_data$label_T <- factor(expected_data$label_T , levels = c("T1", "T2", "T3")) + + expect_identical(data[1,], expected_data ) + +}) + + + ``` @@ -2295,6 +2413,9 @@ warning_too_low_mu_ij_row <- function(mu_ij_matrix, threshold = 1 ){ #' @param normal_distr Specifies the distribution type for generating effects. Choose between 'univariate' or 'multivariate' (default). #' - 'univariate': Effects are drawn independently from univariate normal distributions. #' - 'multivariate': Effects are drawn jointly from a multivariate normal distribution. +#' @param fix_reference_effect A logical value indicating whether the effect of the reference label should be fixed to zero. If set to TRUE, the effect of the +#' reference label is constrained to zero, ensuring that it does not contribute to the model. If set to FALSE, the effect from the reference label is picked +#' randomly from the distribution specified by the user. This option works only when `normal_distr` is set to 'univariate'. Default is FALSE. #' @return List containing the ground truth, counts, and metadata #' @export #' @examples @@ -2303,10 +2424,10 @@ warning_too_low_mu_ij_row <- function(mu_ij_matrix, threshold = 1 ){ #' max_replicates = 4) mock_rnaseq <- function(list_var, n_genes, min_replicates, max_replicates, sequencing_depth = NULL, basal_expression = 0 , dispersion = stats::runif(n_genes, min = 0, max = 1000), - normal_distr = "multivariate") { + normal_distr = "multivariate", fix_reference_effect = FALSE) { ## -- get my effect - df_inputSimulation <- getInput2simulation(list_var, n_genes, normal_distr) + df_inputSimulation <- getInput2simulation(list_var, n_genes, normal_distr, fix_reference_effect ) ## -- add column logQij df_inputSimulation <- getLog_qij(df_inputSimulation) df_inputSimulation <- addBasalExpression(df_inputSimulation, n_genes, basal_expression) diff --git a/man/getDataFromRnorm.Rd b/man/getDataFromRnorm.Rd index a9354eb..afcdf4a 100644 --- a/man/getDataFromRnorm.Rd +++ b/man/getDataFromRnorm.Rd @@ -4,12 +4,16 @@ \alias{getDataFromRnorm} \title{Prepare data using effects from a normal distribution} \usage{ -getDataFromRnorm(list_var, n_genes) +getDataFromRnorm(list_var, n_genes, fix_reference_effect = FALSE) } \arguments{ \item{list_var}{A list of variables (already initialized)} \item{n_genes}{Number of genes to generate data for.} + +\item{fix_reference_effect}{A logical value indicating whether the effect of the reference label should be fixed to zero. If set to TRUE, the effect of the +reference label is constrained to zero, ensuring that it does not contribute to the model. If set to FALSE, the effect from the reference label is picked +randomly from the distribution specified by the user. This option works only when \code{normal_distr} is set to 'univariate'. Default is FALSE.} } \value{ A dataframe containing gene metadata and effects generated from a normal distribution. diff --git a/man/getInput2simulation.Rd b/man/getInput2simulation.Rd index f50ce20..a4cca47 100644 --- a/man/getInput2simulation.Rd +++ b/man/getInput2simulation.Rd @@ -8,6 +8,7 @@ getInput2simulation( list_var, n_genes = 1, normal_distr = "multivariate", + fix_reference_effect = FALSE, input2mvrnorm = NULL ) } @@ -22,6 +23,10 @@ getInput2simulation( \item 'multivariate': Effects are drawn jointly from a multivariate normal distribution. }} +\item{fix_reference_effect}{A logical value indicating whether the effect of the reference label should be fixed to zero. If set to TRUE, the effect of the +reference label is constrained to zero, ensuring that it does not contribute to the model. If set to FALSE, the effect from the reference label is picked +randomly from the distribution specified by the user. This option works only when \code{normal_distr} is set to 'univariate'. Default is FALSE.} + \item{input2mvrnorm}{Input to the \code{mvrnorm} function for simulating data from multivariate normal distribution (default: NULL)} } \value{ diff --git a/man/getRefLevel.Rd b/man/getRefLevel.Rd new file mode 100644 index 0000000..6a29450 --- /dev/null +++ b/man/getRefLevel.Rd @@ -0,0 +1,18 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/simulation.R +\name{getRefLevel} +\alias{getRefLevel} +\title{Get the reference level for categorical variables in the data} +\usage{ +getRefLevel(data) +} +\arguments{ +\item{data}{The data frame containing the categorical variables.} +} +\value{ +A list containing the reference level for each categorical variable. +} +\description{ +This function extracts the reference level for each categorical variable in the data. +The reference level is the first level encountered for each categorical variable. +} diff --git a/man/mock_rnaseq.Rd b/man/mock_rnaseq.Rd index c68b463..0d86249 100644 --- a/man/mock_rnaseq.Rd +++ b/man/mock_rnaseq.Rd @@ -12,7 +12,8 @@ mock_rnaseq( sequencing_depth = NULL, basal_expression = 0, dispersion = stats::runif(n_genes, min = 0, max = 1000), - normal_distr = "multivariate" + normal_distr = "multivariate", + fix_reference_effect = FALSE ) } \arguments{ @@ -35,6 +36,10 @@ mock_rnaseq( \item 'univariate': Effects are drawn independently from univariate normal distributions. \item 'multivariate': Effects are drawn jointly from a multivariate normal distribution. }} + +\item{fix_reference_effect}{A logical value indicating whether the effect of the reference label should be fixed to zero. If set to TRUE, the effect of the +reference label is constrained to zero, ensuring that it does not contribute to the model. If set to FALSE, the effect from the reference label is picked +randomly from the distribution specified by the user. This option works only when \code{normal_distr} is set to 'univariate'. Default is FALSE.} } \value{ List containing the ground truth, counts, and metadata diff --git a/man/replaceReferenceEffectBy0.Rd b/man/replaceReferenceEffectBy0.Rd new file mode 100644 index 0000000..2a8380e --- /dev/null +++ b/man/replaceReferenceEffectBy0.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/simulation.R +\name{replaceReferenceEffectBy0} +\alias{replaceReferenceEffectBy0} +\title{Replace the reference effect by 0 in the data} +\usage{ +replaceReferenceEffectBy0(list_var, l_labels_ref, data) +} +\arguments{ +\item{list_var}{The list of variables containing the effects to modify.} + +\item{l_labels_ref}{A list containing the reference level for each categorical variable.} + +\item{data}{The data frame containing the effects to modify.} +} +\value{ +The modified data frame with reference effects replaced by 0. +} +\description{ +This function replaces the effect corresponding to the reference level with 0 in the data. +} diff --git a/tests/testthat/test-simulation.R b/tests/testthat/test-simulation.R index d43b959..28e804d 100644 --- a/tests/testthat/test-simulation.R +++ b/tests/testthat/test-simulation.R @@ -248,3 +248,60 @@ test_that("All negative values", { + +# Test for getRefLevel function +test_that("getRefLevel returns correct reference levels", { + # Create a sample data frame + data <- data.frame( + label_genotype = factor(c("A", "B", "A", "B")), + label_environment = factor(c("X", "Y", "X", "Y")) + ) + + # Expected reference levels + expected_ref_levels <- list( + label_genotype = "A", + label_environment = "X" + ) %>% unlist() + + # Test getRefLevel function + ref_levels <- getRefLevel(data) + + # Check if reference levels match the expected ones + expect_identical(ref_levels, expected_ref_levels) +}) + +# Test for replaceReferenceEffectBy0 function +test_that("replaceReferenceEffectBy0 replaces reference effects correctly", { + + input_var_list <- init_variable( name = "genotype", mu = 0, sd = 2.18, level = 2) %>% + init_variable( name = "env", mu = 0, sd = 0.57, level = 4 ) %>% + init_variable(name = "T", mu = 0, sd = 0.39, level = 3) %>% + add_interaction(between_var = c("genotype", "env"), mu = 0 , sd = 1.018) %>% + add_interaction(between_var = c("genotype", "env", "T"), mu = 0 , sd = 1.018) + + + N_GENES <- 10 + l_dataFrom_normdistr <- getDataFromRnorm(input_var_list, N_GENES) + metadata <- getGeneMetadata(input_var_list , N_GENES) + df_effects <- get_effects_from_rnorm(input_var_list, metadata) + data <- cbind(metadata, df_effects) + + l_labels_ref <- getRefLevel(data) + data <- replaceReferenceEffectBy0(input_var_list, l_labels_ref, data) + + # Check if modified data matches the expected data + expect_identical(colnames(data), c("geneID","label_genotype","label_env", "label_T", "genotype", "env" , "T", "genotype:env" ,"genotype:env:T")) + expected_data <- data.frame(geneID = "gene1", label_genotype = "genotype1", label_env = "env1", label_T = "T1", genotype = 0, + env = 0, T = 0, "genotype:env" = 0, "genotype:env:T" = 0 ) + colnames(expected_data) <- c("geneID", "label_genotype", "label_env", "label_T", "genotype", + "env", "T" , "genotype:env", "genotype:env:T" ) + expected_data$label_genotype <- factor(expected_data$label_genotype , levels = c("genotype1", "genotype2")) + expected_data$label_env <- factor(expected_data$label_env , levels = c("env1", "env2", "env3", "env4")) + expected_data$label_T <- factor(expected_data$label_T , levels = c("T1", "T2", "T3")) + + expect_identical(data[1,], expected_data ) + +}) + + + -- GitLab