flat_full.Rmd

---
title: "flat_full.Rmd for working package"
output: html_document
editor_options: 
chunk_output_type: console
---

<!-- Run this 'development' chunk -->
<!-- Store every call to library() that you need to explore your functions -->

```{r development, include=FALSE}
library(testthat)
```

<!--
 You need to run the 'description' chunk in the '0-dev_history.Rmd' file before continuing your code there.

If it is the first time you use {fusen}, after 'description', you can directly run the last chunk of the present file with inflate() inside.
--> 

```{r development-load}
# Load already included functions if relevant
pkgload::load_all(export_all = FALSE)
```


```{r function-utils, filename = "utils"}
#' Join two data frames using data.table
#'
#' @param d1 Data frame 1
#' @param d2 Data frame 2
#' @param k1 Key columns for data frame 1
#' @param k2 Key columns for data frame 2
#' @importFrom data.table data.table
#' @return Joined data frame
#' @export
#'
#' @examples
#'
#' # Example usage:
#' df1 <- data.frame(id = 1:5, value = letters[1:5])
#' df2 <- data.frame(id = 1:5, category = LETTERS[1:5])
#' join_dtf(df1, df2, "id", "id")
join_dtf <- function(d1, d2, k1, k2) {
  d1.dt_table <- data.table::data.table(d1, key = k1)
  d2.dt_table <- data.table::data.table(d2, key = k2)
  dt_joined <- d1.dt_table[d2.dt_table, allow.cartesian = TRUE]
  return(dt_joined %>% as.data.frame())
}

#' Finds the index of the first non-null element in a list.
#'
#' This function searches a list and returns the index of the first non-null element.
#'
#' @param lst The list to search.
#' @return The index of the first non-null element, or NULL if no non-null element is found.
#' @export
#' 
#' @examples
#' my_list <- list(NULL, NULL, 3, 5, NULL)
#' first_non_null_index(my_list)  # Returns 3
first_non_null_index <- function(lst) {
  for (i in seq_along(lst)) {
    if (!is.null(lst[[i]])) {
      return(i)
    }
  }
  return(NULL)
}


#' Detect rows in a matrix with all values below a given threshold
#'
#' This function detects rows in a matrix where all values are below a specified threshold.
#'
#' @param matrix The input matrix
#' @param threshold The threshold value
#' @return A logical vector indicating rows below the threshold
#' @export
detect_row_matx_bellow_threshold <- function(matrix, threshold) {
    apply(matrix, 1, function(row) all(row < threshold))
}


#' Clean Variable Name
#'
#' This function removes digits, spaces, and special characters from a variable name.
#' If any of these are present, they will be replaced with an underscore '_'.
#'
#' @param name The input variable name to be cleaned.
#' @return The cleaned variable name without digits, spaces, or special characters.
#'
#' @details
#' This function will check the input variable name for the presence of digits,
#' spaces, and special characters. If any of these are found, they will be removed
#' from the variable name and replaced with an underscore '_'. Additionally, it will
#' check if the cleaned name is not one of the reserved names "interactions" or
#' "correlations" which are not allowed as variable names.
#' @export
#' @examples
#' clean_variable_name("my_var_12349309spe ")
clean_variable_name <- function(name){
      message("Variable name should not contain digits, spaces, or special characters.\nIf any of these are present, they will be removed from the variable name.")
      # avoid space in variable name
      name <- gsub(" ", "_", name, fixed = TRUE)
      # avoid digit in variable name
      name <-  gsub("[0-9]", "", name)
      # avoid special character in variable name
      name <-  gsub("[[:punct:]]", "", name)
  
      forbidden_names <- c("interactions", "correlations")
      if (name %in% forbidden_names) {
        forbidden_str <- paste(forbidden_names, collapse = " and ")
        stop(forbidden_str, "cannot be used as variable name")
      }
      return(name)
    
}

#' Convert specified columns to factor
#'
#' @param data The input data frame
#' @param columns The column names to be converted to factors
#' @return The modified data frame with specified columns converted to factors
#' @export
#' @examples
#' data <- data.frame( Category1 = c("A", "B", "A", "B"),
#'                      Category2 = c("X", "Y", "X", "Z"),
#'                      Value = 1:4,
#'                      stringsAsFactors = FALSE )
#' ## -- Convert columns to factors
#' convert2Factor(data, columns = c("Category1", "Category2"))
convert2Factor <- function(data, columns) {
  if (is.character(columns)) {
    columns <- match(columns, colnames(data))
  }

  if (length(columns) > 1) data[, columns] <- lapply(data[, columns], as.factor )
  else data[, columns] <- as.factor(data[, columns])
  return(data)
}

#' Get Setting Table
#'
#' Create a table of experimental settings.
#'
#' This function takes various experimental parameters and returns a data frame
#' that represents the experimental settings.
#'
#' @param n_genes Number of genes in the experiment.
#' @param max_replicates Maximum number of replicates for each gene.
#' @param min_replicates Minimum number of replicates for each gene.
#' @param lib_size  total number of reads
#'
#' @return A data frame containing the experimental settings with their corresponding values.
#' @export
getSettingsTable <- function(n_genes, max_replicates, min_replicates, lib_size ){
  
  settings_df <- data.frame(parameters = c("# genes", "Max # replicates", "Min # replicates", "Library size" ),
                            values = c(n_genes, max_replicates, min_replicates, lib_size))
  rownames(settings_df) <- NULL
  
  return(settings_df)
}


#' Check if a matrix is positive definite
#' This function checks whether a given matrix is positive definite, i.e., all of its eigenvalues are positive.
#' @param mat The matrix to be checked.
#' @return A logical value indicating whether the matrix is positive definite.
#' @export
#' @examples
#' # Create a positive definite matrix
#' mat1 <- matrix(c(4, 2, 2, 3), nrow = 2)
#' is_positive_definite(mat1)
#' # Expected output: TRUE
#'
#' # Create a non-positive definite matrix
#' mat2 <- matrix(c(4, 2, 2, -3), nrow = 2)
#' is_positive_definite(mat2)
#' # Expected output: FALSE
#'
#' # Check an empty matrix
#' mat3 <- matrix(nrow = 0, ncol = 0)
#' is_positive_definite(mat3)
#' # Expected output: TRUE
#'
is_positive_definite <- function(mat) {
  if (nrow(mat) == 0 && ncol(mat) == 0) return(TRUE)
  eigenvalues <- eigen(mat)$values
  all(eigenvalues > 0)
}


#' Get the list of variable names
#'
#' @param list_var R list, e.g., output of init_variable
#'
#' @return
#' A character vector with the names of variables
#' @examples
#' getListVar(init_variable())
#' @export
getListVar <- function(list_var) attributes(list_var)$names

#' Get a given attribute from a list of variables
#'
#' @param list_var A list of variables (already initialized with init_variable)
#' @param attribute A string specifying the attribute to retrieve in all occurrences of the list
#' @export
#' @return
#' A list without NULL values
#' @examples
#' getGivenAttribute(init_variable(), "level")
getGivenAttribute <- function(list_var, attribute) {
  l <- lapply(list_var, FUN = function(var) var[[attribute]]) 
  l_withoutNull <- l[!vapply(l, is.null, logical(1))]
  return(l_withoutNull)
}


#' Get labels for variables
#'
#' @param l_variables2labelized A list of variables
#' @param l_nb_label A list of numeric values representing the number of levels per variable
#' @export
#' @return
#' A list of labels per variable
#' 
#' @examples
#' labels <- getLabels(c("varA", "varB"), c(2, 3))
getLabels <- function(l_variables2labelized, l_nb_label) {
  getVarNameLabel <- function(name, level) {
    list_label <- paste(name, 1:level, sep = "")
    return(list_label)
  }
  
  listLabels <- Map(getVarNameLabel, l_variables2labelized, l_nb_label)
  return(listLabels)
}


#' getGridCombination
#'
#' Generates all possible combinations of labels.
#'
#' @param l_labels List of label vectors
#'
#' @return A data frame with all possible combinations of labels
#' @export
#'
#' @examples
#' l_labels <- list(
#'   c("A", "B", "C"),
#'   c("X", "Y")
#' )
#' getGridCombination(l_labels)
getGridCombination <- function(l_labels) {
  grid <- expand.grid(l_labels)
  colnames(grid) <- paste("label", seq_along(l_labels), sep = "_")
  return(grid)
}


#' Get grid combination from list_var
#'
#' @param list_var A list of variables (already initialized)
#'
#' @return
#' The grid combination between variable in list_var
#' @export
#' @examples
#' generateGridCombination_fromListVar(init_variable())
generateGridCombination_fromListVar <- function (list_var){
  l_levels <- getGivenAttribute(list_var, "level") %>% unlist()
  vars <- names(l_levels)
  l_levels <- l_levels[vars]
  l_labels <- getLabels(l_variables2labelized = vars, l_nb_label = l_levels)
  gridComb <- getGridCombination(l_labels)
  colnames(gridComb) <- paste("label", vars, sep = "_")
  return(gridComb)
}

#' Remove Duplicated Words from Strings
#'
#' This function takes a vector of strings and removes duplicated words within each string.
#'
#' @param strings A character vector containing strings with potential duplicated words.
#' @return A character vector with duplicated words removed from each string.
#' @export
#' @examples
#' words <- c("hellohello", "worldworld", "programmingprogramming", "R isis great", "duplicateeee1333")
#' cleaned_words <- removeDuplicatedWord(words)
removeDuplicatedWord <- function(strings){
  gsub("([A-Za-z]{1,})(\\1{1,})", "\\1", strings, perl = TRUE)
  #gsub("(.*)\\1+", "\\1", strings, perl = TRUE)
}


#' Reorder the columns of a dataframe
#'
#' This function reorders the columns of a dataframe according to the specified column order.
#'
#' @param df The input dataframe.
#' @param columnOrder A vector specifying the desired order of columns.
#'
#' @return A dataframe with columns reordered according to the specified column order.
#' @export
#' @examples
#' # Example dataframe
#' df <- data.frame(A = 1:3, B = 4:6, C = 7:9)
#'
#' # Define the desired column order
#' columnOrder <- c("B", "C", "A")
#'
#' # Reorder the columns of the dataframe
#' df <- reorderColumns(df, columnOrder)
reorderColumns <- function(df, columnOrder) {
  df <- df[, columnOrder, drop = FALSE]
  return(df)
}


#' Check if a list of glmmTMB objects is valid
#'
#' This function checks if a list of glmmTMB objects is valid. It ensures that the input 
#' list contains glmmTMB objects generated by the `fitModelParallel` function.
#'
#' @param list_tmb A list of glmmTMB objects.
#' @return TRUE if the list is valid, otherwise an error is thrown.
#' @export
isValidList_tmb <- function(list_tmb) {
  stopifnot(is.list(list_tmb))
  
  if (all(sapply(list_tmb, is.null))) {
    stop("All elements in 'list_tmb' are NULL")
  }
  
  invisible(lapply(names(list_tmb), function(i) isValidGlmmTmb(i, list_tmb[[i]])))
  return(TRUE)
}

#' Check if a glmmTMB object is valid
#'
#' This function checks if a glmmTMB object is valid. It ensures that the input object 
#' is a glmmTMB object generated by the `fitModelParallel` function.
#'
#' @param i The name of the object being checked.
#' @param obj The glmmTMB object being checked.
#' @return TRUE if the object is valid, otherwise an error is thrown.
#' @export
isValidGlmmTmb <- function(i, obj) {
  if (is.null(obj)) {
    return(TRUE)
  }
  
  if (!inherits(obj, "glmmTMB")) {
    stop(paste("Element", i, "is not a glmmTMB object. 'list_tmb' should be generated by fitModelParallel"))
  }
  return(TRUE)
}


#' Checks if an object corresponds to a mock object generated by `mock_rnaseq()`.
#'
#' This function verifies if the provided object matches the structure of a mock object generated
#' by `mock_rnaseq()`. A mock object should contain specific named elements: "settings", "init",
#' "groundTruth", "counts", and "metadata".
#'
#' @param obj Object to be checked.
#' @return TRUE or error message
#' @export
isValidMock_obj <- function(obj) {
  message_err <- "'mock_obj' does not correspond to HTRfit mock_obj. 'mock_obj' can be generated using mock_rnaseq()."
  
  if (!is.list(obj)) {
    stop(message_err)
  }
  
  expected_names <- c("settings", "init", "groundTruth", "counts", "metadata", "scaling_factors")
  
  if (!all(expected_names %in% names(obj))) {
    stop(message_err)
  }
  
  if (!all(names(obj) %in% expected_names)){
    warning("Unexpected list element in 'mock_obj'")
  }
    
  return(TRUE)
}


clear_memory <- function(except_obj){
  rm(list = setdiff(ls(), except_obj)) ; invisible(gc( reset = TRUE, verbose = FALSE ))
}


#' Custom Theme for htrfit Plots
#'
#' This function defines a custom theme for plots created with htrfit package.
#' It sets various visual parameters such as background color, grid lines, axis lines, etc.,
#' to provide a consistent and aesthetically pleasing appearance to the plots.
#'
#' @return A ggplot2 theme object with customized visual parameters.
#'
#' @importFrom ggplot2 theme element_rect element_line element_blank element_text
#' @export
theme_htrfit <- function() {
  ggplot2::theme(
    panel.background = ggplot2::element_rect("#ffffff", "#ffffff"),
    panel.grid.major = ggplot2::element_line(color = "#826A50", linetype = "dashed", size = 0.02),
    panel.grid.minor = ggplot2::element_blank(),
    panel.border = ggplot2::element_blank(),
    axis.line.x = ggplot2::element_line(size = 0.5, linetype = "solid", colour = "#362C21"),
    axis.line.y = ggplot2::element_line(size = 0.5, linetype = "solid", colour = "#362C21"),
    strip.background = ggplot2::element_rect(
      color = "white", fill = "#ECEFF1", linewidth = 0.5, linetype = "solid"
    ),
    axis.title = ggplot2::element_text(
      size = 10, color = "black",
      face = "italic"
    ),
    legend.title = ggplot2::element_text(
      size = 10, color = "black",
      face = "italic"
    )
  )
}


```


```{r tests-utils}

# Test for first_non_null_index function
test_that("first_non_null_index returns the correct index", {
  lst <- list(NULL, NULL, 3, 5, NULL)
  expect_equal(first_non_null_index(lst), 3)
})


# Test unitaires pour la fonction join_dtf
test_that("join_dtf réalise la jointure correctement", {
  # Création de données de test
  df1 <- data.frame(id = 1:5, value = letters[1:5])
  df2 <- data.frame(id = 1:5, category = LETTERS[1:5])
  
  # Exécution de la fonction
  result <- join_dtf(df1, df2, "id", "id")
  
  # Vérification des résultats
  expect_true(is.data.frame(result))
  expect_equal(nrow(result), 5)
  expect_equal(ncol(result), 3)
  expect_equal(names(result), c("id", "value", "category"))
  expect_true(all.equal(result$id, df1$id))
  expect_true(all.equal(result$id, df2$id))
})


test_that("clean_variable_name correctly removes digits, spaces, and special characters", {
  expect_equal(clean_variable_name("my variable name"), "myvariablename")
  expect_equal(clean_variable_name("variable_1"), "variable")
  expect_equal(clean_variable_name("^spec(ial#chars! "), "specialchars")
})

test_that("clean_variable_name handles reserved names properly", {
  expect_error(clean_variable_name("interactions"))
  expect_error(clean_variable_name("correlations"))
})


test_that("getLabels generates labels for variables", {
  labels <- getLabels(c("varA", "varB"), c(2, 3))
  expect_equal(length(labels), 2)
  expect_equal(length(labels[[1]]), 2)
  expect_equal(length(labels[[2]]), 3)
})

test_that("getGridCombination generates a grid of combinations", {
  labels <- list(A = c("A1", "A2"), B = c("B1", "B2", "B3"))
  grid_combination <- getGridCombination(labels)
  expect_equal(dim(grid_combination), c(6, 2))
})


test_that("generateGridCombination_fromListVar returns expected output", {
  result <- generateGridCombination_fromListVar(init_variable())
  expect <- data.frame(label_myVariable = c("myVariable1", "myVariable2"))
  expect_equal(nrow(result), nrow(expect))
  expect_equal(ncol(result), ncol(expect))
})

# Tests for convert2Factor
test_that("convert2Factor converts specified columns to factors", {
  # Create a sample data frame
  data <- data.frame(
    Category1 = c("A", "B", "A", "B"),
    Category2 = c("X", "Y", "X", "Z"),
    Value = 1:4,
    stringsAsFactors = FALSE
  )
  
  # Convert columns to factors
  result <- convert2Factor(data, columns = c("Category1", "Category2"))
  
  # Check the output
  expect_is(result$Category1, "factor")  # Category1 column converted to factor
  expect_is(result$Category2, "factor")  # Category2 column converted to factor
})

test_that("removeDuplicatedWord returns expected output", {
  words <- c("hellohello", "worldworld", "programmingprogramming", "R isis great")
  cleaned_words <- removeDuplicatedWord(words)
  expect_equal(cleaned_words, c("hello", "world", "programming", "R is great"))
})


# Test for detect_row_matx_bellow_threshold function
test_that("detect_row_matx_bellow_threshold detects rows below threshold", {
  # Create a sample matrix
  matrix <- matrix(c(0.5, 0.7, 1.2, 0.2, 0.9, 0.9), nrow = 2)
  # Test with threshold 1
  expect_equal(detect_row_matx_bellow_threshold(matrix, 1), c(FALSE, TRUE))
  # Test with threshold 0.5
  expect_equal(detect_row_matx_bellow_threshold(matrix, 0.5), c(FALSE, FALSE))
  expect_equal(detect_row_matx_bellow_threshold(matrix, 2), c(TRUE, TRUE))
})


test_that("reorderColumns returns expected output",{
    df <- data.frame(A = 1:3, B = 4:6, C = 7:9)
    # Define the desired column order
    columnOrder <- c("B", "C", "A")
    # Reorder the columns of the dataframe
    df_reorder <- reorderColumns(df, columnOrder)
    expect_equal(colnames(df_reorder), columnOrder)
    expect_equal(dim(df_reorder), dim(df))

})


test_that( "generateGridCombination_fromListVar return expected output", {
    ## case 1
    gridcom <- generateGridCombination_fromListVar(init_variable())
    expect_s3_class(gridcom, "data.frame")
    expect_equal(gridcom$label_myVariable, factor(c("myVariable1", "myVariable2")))

    ## case 2
    init_variables <- init_variable() %>% init_variable(name = "var" , mu = 2, sd = 1, level = 3) 
    gridcom <- generateGridCombination_fromListVar(init_variables)
    expect_s3_class(gridcom, "data.frame")
    expect_equal(unique(gridcom$label_myVariable), factor(c("myVariable1", "myVariable2")))
    expect_equal(unique(gridcom$label_var), factor(c("var1", "var2", "var3")))

  })

test_that( "getGivenAttribute return expected output", {
  ## -- case 1
  level_attr <- getGivenAttribute(init_variable(), "level")
  expect_equal(level_attr$myVariable, 2)

  ## -- case 2
  init_variables <- init_variable() %>% init_variable(name = "var" , mu = 2, sd = 1, level = 3) 
  mu_attr <- getGivenAttribute(init_variables, "mu")
  expect_equal(mu_attr$var, 2)
} )


test_that("isValidList_tmb function", {
  # Test with a valid list of glmmTMB objects
  l_tmb <- list("model1" = glmmTMB::glmmTMB(mpg ~ hp + vs + am + (1|cyl), data = mtcars),
                 "model2" = glmmTMB::glmmTMB(mpg ~ hp + vs + am + (1|cyl), data = mtcars))
  expect_true(isValidList_tmb(l_tmb))
  
  # Test with a list containing NULL elements
  expect_error(isValidList_tmb(list(NULL, NULL)), "All elements in 'list_tmb' are NULL")
  
  # Test with an empty list
  expect_error(isValidList_tmb(list()), "All elements in 'list_tmb' are NULL")
})

test_that("isValidGlmmTmb function", {
  # Test with a valid glmmTMB object
  valid_model <- glmmTMB::glmmTMB(mpg ~ hp + vs + am + (1|cyl), data = mtcars)
  expect_true(isValidGlmmTmb("model", valid_model))
  
  # Test with an invalid object (not a glmmTMB object)
  invalid_object <- list(a = 1, b = 2)
  expect_error(isValidGlmmTmb("object", invalid_object), "Element object is not a glmmTMB object.")
  
  # Test with NULL object
  expect_true(isValidGlmmTmb("model", NULL))
})


# Mock object

test_that("isValidMock_obj checks if the provided object is a valid mock object", {
  mock_obj <- mock_rnaseq(init_variable(), n_genes = 100, 4, 4)

  # Test with a valid mock object
  expect_true(isValidMock_obj(mock_obj))
  
  # Test with an object missing an element
  missing_element_obj <- list(settings = list(), init = list(), groundTruth = list(), counts = list())
  expect_error(isValidMock_obj(missing_element_obj))
  
  # Test with an object containing additional elements
  additional_element_obj <- mock_obj
  additional_element_obj$error_name <- list()
  expect_warning(isValidMock_obj(additional_element_obj))
})

```


```{r function-init_variable, filename = "simulation_initialization"}
#' Initialize variable
#'
#' @param list_var Either c() or output of init_variable
#' @param name Variable name
#' @param sd Either numeric value or NA. Use to specify range of effect sizes.
#' @param level Numeric value to specify the number of levels to simulate. Default = 2.
#' @param mu Either a numeric value or a numeric vector (of length = level). Default : 0. Not recommended to modify.
#'
#' @return
#' A list with initialized variables
#' @export
#'
#' @examples
#' init_variable(name = "my_varA", sd = 0.50, level = 200)
init_variable <- function(list_var = c(), name = "myVariable", sd = 0.2, level = 2, mu = 0) {
  
  name <- clean_variable_name(name)
  
  # Only mu specified by user => set level param
  if (is.na(sd)) {
    level <- length(mu)
  }
  
  # Validate inputs
  inputs_checking(list_var, name, mu, sd, level)
  
  if (endsWithDigit(name)) {
    warning("Names ending with digits are not allowed. They will be removed from the variable name.")
    name <- removeDigitsAtEnd(name)
  }
  
  # Initialize new variable
  list_var[[name]] <- fillInVariable(name, mu, sd, level)
  
  return(list_var)
}


#' Check if a string ends with a digit
#'
#' This function checks whether a given string ends with a digit.
#'
#' @param string The input string to be checked
#' @return \code{TRUE} if the string ends with a digit, \code{FALSE} otherwise
#' @export
#' @examples
#' endsWithDigit("abc123")  # Output: TRUE
#' endsWithDigit("xyz")     # Output: FALSE
endsWithDigit <- function(string) {
  lastChar <- substring(string, nchar(string))
  return(grepl("[0-9]", lastChar))
}

#' Remove digits at the end of a string
#'
#' This function removes any digits occurring at the end of a given string.
#'
#' @param string The input string from which digits are to be removed
#' @return The modified string with digits removed from the end
#' @export
#' @examples
#' removeDigitsAtEnd("abc123")  # Output: "abc"
#' removeDigitsAtEnd("xyz")     # Output: "xyz"
removeDigitsAtEnd <- function(string) {
  return(gsub("\\d+$", "", string))
}


#' Check Input Parameters
#'
#' This function checks the validity of the input parameters for initializing a variable.
#' It ensures that the necessary conditions are met for the input parameters.
#'
#' @param list_var List containing the variables to be initialized.
#' @param name Name of the variable.
#' @param mu Mean of the variable.
#' @param sd Standard deviation of the variable (optional).
#' @param level Number of levels for categorical variables.
#' 
#' @return NULL
#' @export
#'
#' @examples
#' inputs_checking(list_var = c(), name = "var1", mu = 0, sd = 1, level = 2)
inputs_checking <- function(list_var, name, mu, sd, level) {
  stopifnot(name != "")
  stopifnot(is.character(name))
  stopifnot(is.numeric(mu))
  stopifnot(is.numeric(sd) | is.na(sd))
  stopifnot(is.numeric(level))
  stopifnot(length(level) == 1)
  stopifnot(level >= 2)

  if (!is.null(list_var)) {
    error_msg <- "Non conformable list_var parameter.\nlist_var must be set as an init_var output or initialized as c()"
    if (!is.list(list_var)) {
      stop(error_msg)
    }
  }

  if (length(mu) > 1) {
    stopifnot(length(mu) == level)
  }

  if (is.na(sd)) {
    if (level != length(mu)) {
      stop("sd was specified as NA. mu should have the same length as the number of levels\n")
    }
  }

  # Check if variable is already initialized
  name_not_in_list_var <- identical(which(already_init_variable(list_var, name)), integer(0))
  if (!name_not_in_list_var) {
    message(paste(name, "is already initialized in list_var.\nIt will be updated", sep = " "))
  }

  return(NULL)
}


#' Check if Variable is Already Initialized
#'
#' This function checks if a variable is already initialized in the variable list.
#'
#' @param list_var A list object representing the variable list.
#' @param new_var_name A character string specifying the name of the new variable.
#'
#' @return TRUE if the variable is already initialized, FALSE otherwise.
#' @export
#'
#' @examples
#' my_list <- list(var1 = 1, var2 = 2, var3 = 3)
#' already_initialized <- already_init_variable(list_var = my_list, new_var_name = "myVariable")
already_init_variable <- function(list_var, new_var_name) {
  if (is.null(list_var)) {
    return(FALSE)
  }
  
  var_names <- names(list_var)
  return(new_var_name %in% var_names)
}

#' Fill in Variable
#'
#' This function fills in a variable with simulated data based on the provided parameters.
#'
#' @param name The name of the variable.
#' @param mu A numeric value or a numeric vector (of length = level) representing the mean.
#' @param sd A numeric value representing the standard deviation, or NA if not applicable.
#' @param level A numeric value specifying the number of levels to simulate.
#'
#' @return A data frame or a list containing the simulated data for the variable.
#' @export
#'
#' @examples
#' variable_data <- fillInVariable(name = "myVariable", mu = c(2, 3), sd = NA, level = 2)
fillInVariable <- function(name, mu, sd, level) {
  
  if (length(mu) > 1 | is.na(sd)) {  # Effects given by user
    level <- length(mu)
    l_labels <- paste(name, 1:level, sep = '')
    l_betaEffects <- mu
    column_names <- c(paste("label", name, sep = "_"), name)
    sub_obj <- build_sub_obj_return_to_user(level, metaData = l_labels,
                                       effectsGivenByUser = l_betaEffects,
                                       column_names)
  } else {
    sub_obj <- as.data.frame(list(mu = mu, sd = sd, level = level))
  }
  
  return(sub_obj)  
}

#' Build Sub Object to Return to User
#'
#' This function builds the sub-object to be returned to the user.
#'
#' @param level A numeric value specifying the number of levels.
#' @param metaData A list of labels.
#' @param effectsGivenByUser A list of effects given by the user.
#' @param col_names A character vector specifying the column names to use.
#' @importFrom utils tail
#'
#' @return A list with the sub-object details.
build_sub_obj_return_to_user <- function(level, metaData, effectsGivenByUser, col_names) {
  sub_obj <- list(level = level)
  data <- cbind(metaData, effectsGivenByUser) %>% as.data.frame()
  colnames(data) <- col_names
  var_name <- utils::tail(col_names, n = 1)
  data[, var_name] <- as.numeric(data[, var_name])
  sub_obj$data <- data
  return(sub_obj)
}


#' Add interaction
#'
#' @param list_var A list of variables (already initialized)
#' @param between_var A vector of variable names to include in the interaction
#' @param sd Either numeric value or NA. Use to specify range of effect sizes. Default 0 for no interaction effects.
#' @param mu Either a numeric value or a numeric vector (of length = level). Default : 0. Not recommended to modify.

#'
#' @return
#' A list with initialized interaction
#' @export
#'
#' @examples
#' init_variable(name = "myvarA", sd = 3, level = 200) %>%
#' init_variable(name = "myvarB", sd = 0.2, level = 2 ) %>%
#' add_interaction(between_var = c("myvarA", "myvarB"), sd = 2)
add_interaction <- function(list_var, between_var, sd = 0, mu = 0) {
  name_interaction <- paste(between_var, collapse = ":")
  check_input2interaction(name_interaction, list_var, between_var, mu, sd)
  
  # Check the number of variables in the interaction
  if (length(between_var) > 3) {
    stop("Cannot initialize an interaction with more than 3 variables.")
  }
  
  interactionCombinations <- getNumberOfCombinationsInInteraction(list_var, between_var)
  list_var$interactions[[name_interaction]] <- fillInInteraction(list_var, between_var, mu, sd, interactionCombinations)
  return(list_var)
}

#' Check input for interaction
#'
#' @param name_interaction String specifying the name of the interaction (example: "varA:varB")
#' @param list_var A list of variables (already initialized)
#' @param between_var A vector of variable names to include in the interaction
#' @param mu Either a numeric value or a numeric vector (of length = level)
#' @param sd Either numeric value or NA
#'
#' @return
#' NULL (throws an error if the input is invalid)
#' @export
check_input2interaction <- function(name_interaction, list_var, between_var, mu, sd) {
  # Check if variables in between_var are declared and initialized
  bool_checkInteractionValidity <- function(between_var, list_var) {
    nb_varInInteraction <- length(unique(between_var))
    stopifnot(nb_varInInteraction > 1)
    existingVar_nb <- getListVar(list_var) %in% between_var %>% sum()
    if (existingVar_nb != nb_varInInteraction) {
      return(FALSE)
    } else {
      return(TRUE)
    }
  }
  
  bool_valid_interaction <- bool_checkInteractionValidity(between_var, list_var)
  if (!bool_valid_interaction) {
    stop("At least one variable in between_var is not declared. Variable not initialized cannot be used in an interaction.")
  }
  
  requestedNumberOfValues <- getNumberOfCombinationsInInteraction(list_var, between_var)
  if (is.na(sd) && requestedNumberOfValues != length(mu)) {
    msg_e <- "sd was specified as NA. mu should have the same length as the possible number of interactions:\n"
    msg_e2 <- paste(requestedNumberOfValues, "interaction values are requested.")
    stop(paste(msg_e, msg_e2))
  }
  
  level <- requestedNumberOfValues
  inputs_checking(list_var$interactions, name_interaction, mu, sd, level)
}

#' Get the number of combinations in an interaction
#'
#' @param list_var A list of variables (already initialized)
#' @param between A vector of variable names to include in the interaction
#'
#' @return
#' The number of combinations in the interaction
#' @export
getNumberOfCombinationsInInteraction <- function(list_var, between) {
  levelInlistVar <- getGivenAttribute(list_var, "level") %>% unlist()
  n_combinations <- prod(levelInlistVar[between]) 
  return(n_combinations)
}

#' Fill in interaction
#'
#' @param list_var A list of variables (already initialized)
#' @param between A vector of variable names to include in the interaction
#' @param mu Either a numeric value or a numeric vector (of length = level)
#' @param sd Either numeric value or NA
#' @param level Number of interactions
#'
#' @return
#' A data frame with the filled-in interaction values
#' @export
fillInInteraction <- function(list_var, between, mu, sd, level) {
  if (length(mu) > 1 || is.na(sd)) {
    l_levels <- getGivenAttribute(list_var, "level") %>% unlist()
    l_levelsOfInterest <- l_levels[between]
    l_labels_varOfInterest <- getLabels(l_variables2labelized = between, l_nb_label = l_levelsOfInterest ) 
    
    grid_combination <- getGridCombination(l_labels_varOfInterest)
    n_combinations <- dim(grid_combination)[1]
    column_names <- c(paste("label", between, sep = "_"), paste(between, collapse = ":"))
    sub_dtf <- build_sub_obj_return_to_user(level = n_combinations,
                                            metaData = grid_combination,
                                            effectsGivenByUser = mu, 
                                            col_names = column_names)
  } else {
    sub_dtf <- list(mu = mu, sd = sd, level = level) %>% as.data.frame()
  }
  
  return(sub_dtf)
}

```


```{r tests-init_variable}

test_that("endsWithDigit returns the correct result", {
  expect_true(endsWithDigit("abc123"))
  expect_false(endsWithDigit("xyz"))
})

test_that("removeDigitsAtEnd removes digits at the end of a string", {
  expect_equal(removeDigitsAtEnd("abc123"), "abc")
  expect_equal(removeDigitsAtEnd("xyz"), "xyz")
})


test_that("init_variable initializes a variable correctly", {
  # Test case 1: Initialize a variable with default parameters
  list_var <- init_variable()
  expect_true("myVariable" %in% names(list_var))

  # Test case 2: Initialize a variable with custom parameters
  list_var <- init_variable(name = "custom_variable", mu = c(1, 2, 3), sd = 0.5, level = 3)
  expect_true("customvariable" %in% names(list_var))
  expect_equal(nrow(list_var$customvariable$data), 3)
})

test_that("inputs_checking performs input validation", {
  
  # Test case 1: Invalid inputs - sd is NA but mu has unique values
  expect_error(inputs_checking(list_var = c(), name = "myVariable", mu = 2, sd = NA, level = 2))
  
  # Test case 2: Invalid inputs - empty name
  expect_error(inputs_checking(list_var = c(), name = "", mu = 2, sd = NA, level = 2))
  
  # Test case 3: Invalid inputs - non-numeric mu
  expect_error(inputs_checking(list_var = c(), name = "myVariable", mu = "invalid", sd = NA, level = 2))
  
  # Test case 4: Invalid inputs - non-numeric sd
  expect_error(inputs_checking(list_var = c(), name = "myVariable", mu = 2, sd = "invalid", level = 2))
  
  # Test case 5: Invalid inputs - level less than 2
  expect_error(inputs_checking(list_var = c(), name = "myVariable", mu = 2, sd = NA, level = 1))
  
  # Test case 6: Invalid inputs - mu and level have different lengths
  expect_error(inputs_checking(list_var = c(), name = "myVariable", mu = c(1, 2, 3), sd = NA, level = 2))
  
  # Test case 7: Valid inputs
  expect_silent(inputs_checking(list_var = c(), name = "myVariable", mu = c(1, 2, 3), sd = NA, level = 3))
})


test_that("already_init_variable checks if a variable is already initialized", {
  list_var <- init_variable()
  
  # Test case 1: Variable not initialized
  list_var <- init_variable(name = "custom_variable", mu = c(2, 3), sd = NA, level = 2)
  expect_true(already_init_variable(list_var, "customvariable"))
  
  # Test case 2: Variable already initialized 
  expect_false(already_init_variable(list_var, "myVariable"))
  
})

test_that("fillInVariable fills in variable correctly", {
  # Test case 1: Effects given by user
  sub_obj <- fillInVariable("myVariable", c(1, 2, 3), NA, NA)
  expect_equal(sub_obj$level, 3)
  expect_equal(ncol(sub_obj$data), 2)
  
  # Test case 2: Effects simulated using mvrnorm
  sub_obj <- fillInVariable("myVariable", 2, 0.5, 3)
  expect_equal(sub_obj$level, 3)
  expect_equal(sub_obj$sd, 0.5)
  expect_equal(sub_obj$mu, 2)
})

test_that("build_sub_obj_return_to_user returns the expected output", {
  level <- 3
  metaData <- paste("label", 1:level, sep = "_")
  effectsGivenByUser <- c(2, 3, 4)
  col_names <- c("metadata", "effects")
  
  result <- build_sub_obj_return_to_user(level, metaData, effectsGivenByUser, col_names)
  
  expect_equal(result$level, level)
  expect_identical(result$data$metadata, metaData)
  expect_identical(result$data$effects, effectsGivenByUser)
  
  
})

test_that("add_interaction adds an interaction between variables", {
  list_var <- init_variable(name = "varA", mu = 1, sd = 1, level = 2)
  list_var <- init_variable(list_var, name = "varB", mu = 2, sd = 1, level = 3)
  list_var <- add_interaction(list_var, between_var = c("varA", "varB"), mu = 0.5, sd = 3)
  expect_true("varA:varB" %in% names(list_var$interactions))
})

test_that("add_interaction throws an error for invalid variables", {
  list_var <- init_variable(name = "varA", mu = 1, sd = 1, level = 2)
  expect_error(add_interaction(list_var, between_var = c("varA", "varB"), mu = 0.5, sd = NA))
})


test_that("getNumberOfCombinationsInInteraction calculates the number of combinations", {
  list_var <- init_variable(name = "varA", mu = 1, sd = 1, level = 2)
  list_var <- init_variable(list_var, name = "varB", mu = 2, sd = 1, level = 3)
  expect_equal(getNumberOfCombinationsInInteraction(list_var, c("varA", "varB")), 6)
})
```

```{r function-mvrnorm, filename = "datafrommvrnorm_manipulations" }
#' getInput2mvrnorm
#'
#' @inheritParams init_variable
#'
#' @return
#' a list that can be used as input for MASS::mvrnorm
#' @export
#'
#' @examples
#' list_var <- init_variable(name = "my_var", mu = 0, sd = 2, level = 3)
#' getInput2mvrnorm(list_var)
getInput2mvrnorm <- function(list_var){
  # -- pick up sd provided by user
  variable_standard_dev <- getGivenAttribute(list_var, attribute = "sd") %>% unlist()
  interaction_standard_dev <- getGivenAttribute(list_var$interactions, attribute = "sd") %>% unlist()
  list_stdev_2covmatx <- c(variable_standard_dev, interaction_standard_dev)
  if (is.null(list_stdev_2covmatx)) ## NO SD provided
    return(list(mu = NULL, covMatrix = NULL))

  # - COV matrix
  covar_userProvided = getGivenAttribute(list_var$correlations, "covar")
  covMatrix <- getCovarianceMatrix(list_stdev_2covmatx, covar_userProvided)

  # -- MU
  variable_mu <- getGivenAttribute(list_var, attribute = "mu") %>% unlist()
  interaction_mu <- getGivenAttribute(list_var$interactions, attribute = "mu") %>% unlist()
  list_mu <- c(variable_mu, interaction_mu)

  return(list(mu = list_mu, covMatrix = covMatrix))

}


#' getCovarianceMatrix 
#' @param list_stdev standard deviation list
#' @param list_covar covariance list
#' 
#' @return
#' covariance matrix
#' @export
#'
#' @examples
#' vector_sd <- c(1,2, 3)
#' names(vector_sd) <- c("varA", "varB", "varC")
#' vector_covar <- c(8, 12, 24)
#' names(vector_covar) <- c("varA.varB", "varA.varC", "varB.varC")
#' covMatrix <- getCovarianceMatrix(vector_sd, vector_covar)
getCovarianceMatrix <- function(list_stdev, list_covar){
  # -- cov(A, A) = sd(A)^2
  diag_cov <- list_stdev^2
  dimension <- length(diag_cov)
  covariance_matrix <- matrix(0,nrow = dimension, ncol = dimension)
  diag(covariance_matrix) <- diag_cov
  colnames(covariance_matrix) <- paste("label", names(diag_cov), sep = "_")
  rownames(covariance_matrix) <- paste("label", names(diag_cov), sep = "_")
  names_covaration <- names(list_covar)

  ###### -- utils -- #####
  convertDF <- function(name, value){
    ret <- data.frame(value)
    colnames(ret) <- name
    ret
  }

  ## -- needed to use reduce after ;)
  l_covarUserDf <- lapply(names_covaration, function(n_cov) convertDF(n_cov, list_covar[n_cov] ))
  covariance_matrix2ret <- Reduce(fillInCovarMatrice, x = l_covarUserDf, init =  covariance_matrix)
  covariance_matrix2ret
}


#' Fill in Covariance Matrix
#'
#' This function updates the covariance matrix with the specified covariance value between two variables.
#'
#' @param covarMatrice The input covariance matrix.
#' @param covar A data frame containing the covariance value between two variables.
#' @return The updated covariance matrix with the specified covariance value filled in.
#' @export
#' @examples
#' covarMat <- matrix(0, nrow = 3, ncol = 3)
#' colnames(covarMat) <- c("label_varA", "label_varB", "label_varC")
#' rownames(covarMat) <- c("label_varA", "label_varB", "label_varC")
#' covarValue <- data.frame("varA.varB" = 0.5)
#' fillInCovarMatrice(covarMatrice = covarMat, covar = covarValue)
fillInCovarMatrice <- function(covarMatrice, covar){
  varsInCovar <- strsplit(colnames(covar), split = "[.]") %>% unlist()
  index_matrix <- paste("label",varsInCovar, sep  = "_")
  covar_value <- covar[1,1]
  covarMatrice[index_matrix[1], index_matrix[2]] <- covar_value
  covarMatrice[index_matrix[2], index_matrix[1]] <- covar_value
  return(covarMatrice)
}

#' getGeneMetadata
#'
#' @inheritParams init_variable
#' @param n_genes Number of genes to simulate
#'
#' @return
#' metadata matrix
#' 
#' @export
#'
#' @examples
#' list_var <- init_variable()
#' metadata <- getGeneMetadata(list_var, n_genes = 10)
getGeneMetadata <- function(list_var, n_genes) {
  metaData <- generateGridCombination_fromListVar(list_var)
  n_combinations <- dim(metaData)[1]
  genes_vec <- paste("gene", 1:n_genes, sep = "")
  geneID <- rep(genes_vec, each = n_combinations)
  metaData <- cbind(geneID, metaData)
  
  return(metaData)
}


#' getDataFromMvrnorm
#'
#' @inheritParams init_variable 
#' @param input2mvrnorm list with mu and covariance matrix, output of getInput2mvrnorm
#' @param n_genes Number of genes to simulate
#' 
#' @return
#' data simulated from multivariate normal distribution
#' 
#' @export
#'
#' @examples
#' list_var <- init_variable()
#' input <- getInput2mvrnorm(list_var)
#' simulated_data <- getDataFromMvrnorm(list_var, input, n_genes = 10)
getDataFromMvrnorm <- function(list_var, input2mvrnorm, n_genes = 1) {
  if (is.null(input2mvrnorm$covMatrix))
    return(list())
  
  metaData <- getGeneMetadata(list_var, n_genes)
  n_tirages <- dim(metaData)[1]
  
  mtx_mvrnormSamplings <- samplingFromMvrnorm(n_samplings = n_tirages, 
                                             l_mu = input2mvrnorm$mu, matx_cov = input2mvrnorm$covMatrix)
  
  dataFromMvrnorm <- cbind(metaData, mtx_mvrnormSamplings)
  
  return(list(dataFromMvrnorm))
}


#' getDataFromMvrnorm
#'
#' @param n_samplings number of samplings using mvrnorm
#' @param l_mu vector of mu
#' @param matx_cov covariance matrix
#'
#' @return
#' samples generated from multivariate normal distribution
#' 
#' @export
#' @importFrom MASS mvrnorm
#' @examples
#' n <- 100
#' mu <- c(0, 0)
#' covMatrix <- matrix(c(1, 0.5, 0.5, 1), ncol = 2)
#' samples <- samplingFromMvrnorm(n_samplings = n, l_mu = mu, matx_cov = covMatrix)
samplingFromMvrnorm <- function(n_samplings, l_mu, matx_cov) {
  mvrnormSamp <-  MASS::mvrnorm(n = n_samplings, mu = l_mu, Sigma = matx_cov, empirical = TRUE)
  return(mvrnormSamp)
}

```

```{r  tests-mvrnorm}
test_that("getInput2mvrnorm returns the correct list", {
  list_var <- init_variable()
  input <- getInput2mvrnorm(list_var)
  expect_is(input, "list")
  expect_true("mu" %in% names(input))
  expect_true("covMatrix" %in% names(input))
})


test_that("fillInCovarMatrice returns the correct matrix", {
  covarMat <- matrix(0, nrow = 3, ncol = 3)
  colnames(covarMat) <- c("label_varA", "label_varB", "label_varC")
  rownames(covarMat) <- c("label_varA", "label_varB", "label_varC")
  covarValue <- data.frame("varA.varB" = 18)
  matrice <- fillInCovarMatrice(covarMatrice = covarMat, covar = covarValue)
  
  expected_matrice <- matrix(0, nrow = 3, ncol = 3)
  colnames(expected_matrice) <- c("label_varA", "label_varB", "label_varC")
  rownames(expected_matrice) <- c("label_varA", "label_varB", "label_varC")
  expected_matrice["label_varA", "label_varB"] <- 18
  expected_matrice["label_varB", "label_varA"] <- 18
  expect_identical(matrice, expected_matrice)
})

test_that("getCovarianceMatrix returns the correct covariance matrix", {
  vector_sd <- c(1,2, 3)
  names(vector_sd) <- c("varA", "varB", "varC")
  vector_covar <- c(8, 12, 24)
  names(vector_covar) <- c("varA.varB", "varA.varC", "varB.varC")
  covMatrix <- getCovarianceMatrix(vector_sd, vector_covar)
  
  expect_is(covMatrix, "matrix")
  expect_equal(dim(covMatrix), c(3, 3))
  expected_matrix <- matrix(c(1,8,12,8,4,24, 12,24,9), nrow = 3,  byrow = T)
  rownames(expected_matrix) <- c("label_varA", "label_varB", "label_varC")
  colnames(expected_matrix) <- c("label_varA", "label_varB", "label_varC")
  expect_equal(expected_matrix, covMatrix)
})

test_that("getGeneMetadata returns the correct metadata", {
  list_var <- init_variable()
  n_genes <- 10
  metadata <- getGeneMetadata(list_var, n_genes)
  expect_is(metadata, "data.frame")
  expect_equal(colnames(metadata), c("geneID", paste("label", (attributes(list_var)$names), sep ="_")))
  expect_equal(nrow(metadata), n_genes * list_var$myVariable$level)
})

test_that("getDataFromMvrnorm returns the correct data", {
  list_var <- init_variable(name = "varA", mu = 1, sd = 4, level = 3) %>% init_variable("varB", mu = 2, sd = 1, level = 2)
  input <- getInput2mvrnorm(list_var)
  n_genes <- 10
  n_samplings <- n_genes * (list_var$varA$level ) * (list_var$varB$level )
  data <- getDataFromMvrnorm(list_var, input, n_genes)
  expect_is(data, "list")
  expect_equal(length(data), 1)
  expect_is(data[[1]], "data.frame")
  expect_equal(nrow(data[[1]]), n_samplings)
  
})

test_that("getDataFromMvrnomr returns empty list",{
  list_var <- init_variable()
  input <- getInput2mvrnorm(list_var)
  n_genes <- 10
  n_samplings <- n_genes * (list_var$varA$level ) * (list_var$varB$level )
  data <- getDataFromMvrnorm(list_var, input, n_genes)
  expect_is(data, "list")
  expect_equal(colnames(data[[1]]), c("geneID","label_myVariable" ,"myVariable"))
})

test_that("samplingFromMvrnorm returns the correct sampling", {
  n_samplings <- 100
  l_mu <- c(1, 2)
  matx_cov <- matrix(c(1, 0.5, 0.5, 1), ncol = 2)
  sampling <- samplingFromMvrnorm(n_samplings, l_mu, matx_cov)
  
  expect_is(sampling, "matrix")
  expect_equal(dim(sampling), c(n_samplings, length(l_mu)))
})


```

```{r function-dataFromUser, filename = "datafromUser_manipulations"}

#' Get data from user
#'
#'
#' @param list_var A list of variables (already initialized)
#' @return A list of data to join
#' @export
#'
#' @examples
#' getDataFromUser(init_variable())
getDataFromUser <- function(list_var) {
  variable_data2join <- getGivenAttribute(list_var, "data")
  id_var2join <- names(variable_data2join)
  interaction_data2join <- getGivenAttribute(list_var$interactions, "data")
  id_interaction2join <- names(interaction_data2join)
  
  data2join <- list(variable_data2join, interaction_data2join) %>%
    unlist(recursive = FALSE)
  id2join <- c(id_var2join, id_interaction2join)
  l_data2join <- lapply(id2join, function(id) data2join[[id]])
  
  return(l_data2join)
}

```

```{r test-dataFromUser}
# Test unitaires pour la fonction join_dtf
test_that("join_dtf réalise la jointure correctement", {
  # Création de données de test
  df1 <- data.frame(id = 1:5, value = letters[1:5])
  df2 <- data.frame(id = 1:5, category = LETTERS[1:5])
  
  # Exécution de la fonction
  result <- join_dtf(df1, df2, "id", "id")
  
  # Vérification des résultats
  expect_true(is.data.frame(result))
  expect_equal(nrow(result), 5)
  expect_equal(ncol(result), 3)
  expect_equal(names(result), c("id", "value", "category"))
  expect_true(all.equal(result$id, df1$id))
  expect_true(all.equal(result$id, df2$id))
})


# Test unitaires pour la fonction getDataFromUser
test_that("getDataFromUser renvoie les données appropriées", {
  # Exécution de la fonction
  list_var <- init_variable()
  list_var <- init_variable(list_var, "second_var")
  result <- getDataFromUser(list_var)
  
  # Vérification des résultats
  expect_true(is.list(result))
  expect_equal(length(result), 0)
  
  
  list_var <- init_variable(mu = c(1,2,3), sd = NA)
  list_var <- init_variable(list_var, "second_var")
  result <- getDataFromUser(list_var)
  expect_true(all(sapply(result, is.data.frame)))
  expect_equal(names(result[[1]]), c("label_myVariable", "myVariable"))
})
```

```{r function-setcorrelation, filename =  "setcorrelation"}

#' Compute Covariation from Correlation and Standard Deviations
#'
#' This function computes the covariation between two variables (A and B) given their correlation and standard deviations.
#'
#' @param corr_AB The correlation coefficient between variables A and B.
#' @param sd_A The standard deviation of variable A.
#' @param sd_B The standard deviation of variable B.
#'
#' @return The covariation between variables A and B.
#' @export
#' @examples
#' corr <- 0.7
#' sd_A <- 3
#' sd_B <- 4
#' compute_covariation(corr, sd_A, sd_B)
compute_covariation <- function(corr_AB, sd_A, sd_B) {
  cov_AB <- corr_AB * sd_A * sd_B
  return(cov_AB)
}


#' Get Standard Deviations for Variables in Correlation
#'
#' This function extracts the standard deviations for the variables involved in the correlation.
#'
#' @param list_var A list containing the variables and their attributes.
#' @param between_var A character vector containing the names of the variables involved in the correlation.
#'
#' @return A numeric vector containing the standard deviations for the variables in the correlation.
#' @export
#' @examples
#' list_var <- init_variable(name = "varA", mu = 0, sd = 5, level = 3) %>%
#'          init_variable(name = "varB", mu = 0, sd = 25, level = 3)
#' between_var <- c("varA", "varB")
#' getStandardDeviationInCorrelation(list_var, between_var)
getStandardDeviationInCorrelation <- function(list_var, between_var){
  for (var in between_var) sd_List <- getGivenAttribute(list_var, "sd")
  for (var in between_var) sd_ListFromInteraction <- getGivenAttribute(list_var$interactions, "sd")
  sd_List <- c(sd_List, sd_ListFromInteraction)
  return(unname(unlist(sd_List[between_var])))
}


#' Set Correlation between Variables
#'
#' Set the correlation between two or more variables in a simulation.
#'
#' @param list_var A list containing the variables used in the simulation, initialized using \code{\link{init_variable}}.
#' @param between_var Character vector specifying the names of the variables to set the correlation between.
#' @param corr Numeric value specifying the desired correlation between the variables.
#'
#' @return Updated \code{list_var} with the specified correlation set between the variables.
#'
#' @details The function checks if the variables specified in \code{between_var} are declared and initialized in the \code{list_var}. It also ensures that at least two variables with provided standard deviation are required to set a correlation in the simulation.
#' The specified correlation value must be within the range (-1, 1). The function computes the corresponding covariance between the variables based on the specified correlation and standard deviations.
#' The correlation information is then added to the \code{list_var} in the form of a data frame containing the correlation value and the corresponding covariance value.
#' @export
#' @examples
#' list_var <- init_variable(name = "varA", mu = 0, sd = 5, level = 3) %>%
#'             init_variable(name = "varB", mu = 0, sd = 25, level = 3)
#' list_var <- set_correlation(list_var, between_var = c("varA", "varB"), corr = 0.7)
set_correlation <- function(list_var, between_var, corr) {

  # Check if variables in between_var are declared and initialized
  bool_checkBetweenVarValidity <- function(between_var, list_var) {
    nb_varInCorrelation <- length(unique(between_var))
    stopifnot(nb_varInCorrelation > 1)
    # -- check also for interaction
    varInitialized <- c(getListVar(list_var), getListVar(list_var$interactions))
    existingVar_nb <- varInitialized  %in% between_var %>% sum()
    if (existingVar_nb != nb_varInCorrelation) {
      return(FALSE)
    } else {
      return(TRUE)
    }
  }
  
  name_correlation <- paste(between_var, collapse = ".")
  bool_valid_corr <- bool_checkBetweenVarValidity(between_var, list_var)
  if (!bool_valid_corr) {
    stop("At least one variable in between_var is not declared. Variable not initialized cannot be used in a correlation.")
  }
  
  vec_standardDev <- getStandardDeviationInCorrelation(list_var, between_var)
  if (length(vec_standardDev) < 2) {
    stop("Exactly two variables with provided standard deviation are required to set a correlation in simulation.")
  }
  # Validate the specified correlation value to be within the range [-1, 1]
  if (corr < -1 || corr > 1) {
    stop("Invalid correlation value. Correlation must be in the range [-1, 1].")
  }
  
  name_interaction <- paste(between_var, collapse = ":")
  corr <- data.frame(cor = corr, covar = compute_covariation(corr, vec_standardDev[1], vec_standardDev[2] ))
  list_var$correlations[[name_correlation]] <- corr
  return(list_var)
}


```

```{r  test-setcorrelation}

test_that("compute_covariation returns the correct covariation", {
  # Test case 1: Positive correlation
  corr <- 0.7
  sd_A <- 3
  sd_B <- 4
  expected_cov <- corr * sd_A * sd_B
  actual_cov <- compute_covariation(corr, sd_A, sd_B)
  expect_equal(actual_cov, expected_cov)

  # Test case 2: Negative correlation
  corr <- -0.5
  sd_A <- 2.5
  sd_B <- 3.5
  expected_cov <- corr * sd_A * sd_B
  actual_cov <- compute_covariation(corr, sd_A, sd_B)
  expect_equal(actual_cov, expected_cov)

  # Test case 3: Zero correlation
  corr <- 0
  sd_A <- 1
  sd_B <- 2
  expected_cov <- corr * sd_A * sd_B
  actual_cov <- compute_covariation(corr, sd_A, sd_B)
  expect_equal(actual_cov, expected_cov)
})


# Unit tests for getStandardDeviationInCorrelation
test_that("getStandardDeviationInCorrelation returns correct standard deviations", {
  
  # Initialize list_var
  list_var <- init_variable(name = "varA", mu = 0, sd = 5, level = 3) %>%
              init_variable(name = "varB", mu = 0, sd = 25, level = 3)
  
  # Test case 1: Two variables correlation
  between_var_1 <- c("varA", "varB")
  sd_expected_1 <- c(5, 25)
  sd_result_1 <- getStandardDeviationInCorrelation(list_var, between_var_1)
  expect_equal(sd_result_1, sd_expected_1)
  
})


test_that("set_correlation sets the correlation between variables correctly", {
  # Initialize variables in the list_var
  list_var <- init_variable(name = "varA", mu = 0, sd = 5, level = 3) %>%
              init_variable(name = "varB", mu = 0, sd = 25, level = 3)

  # Test setting correlation between varA and varB
  list_var <- set_correlation(list_var, between_var = c("varA", "varB"), corr = 0.7)
  
  corr_result <- list_var$correlations$varA.varB$cor
  covar_result <- list_var$correlations$varA.varB$covar
  expect_equal(corr_result, 0.7)
  expect_equal(covar_result, 87.5)

  # Test setting correlation between varA and varC (should raise an error)
  expect_error(set_correlation(list_var, between_var = c("varA", "varC"), corr = 0.8),
               "At least one variable in between_var is not declared. Variable not initialized cannot be used in a correlation.")

  # Test setting correlation with invalid correlation value
  expect_error(set_correlation(list_var, between_var = c("varA", "varB"), corr = 1.5))

  # Test setting correlation with less than 2 variables with provided standard deviation
  expect_error(set_correlation(list_var, between_var = c("varA"), corr = 0.7))
})


```

```{r function-simulation , filename = "simulation"}

#' Get input for simulation based on coefficients
#'
#' This function generates input data for simulation based on the coefficients provided in the \code{list_var} argument.
#'
#' @param list_var A list of variables (already initialized)
#' @param n_genes Number of genes to simulate (default: 1)
#' @param normal_distr Specifies the distribution type for generating effects. Choose between 'univariate' (default) or 'multivariate' .
#' - 'univariate': Effects are drawn independently from univariate normal distributions. 
#' - 'multivariate': Effects are drawn jointly from a multivariate normal distribution. (not recommended)
#' @param input2mvrnorm Input to the \code{mvrnorm} function for simulating data from multivariate normal distribution (default: NULL)
#' @return A data frame with input coefficients for simulation
#' @export
#' @examples
#' # Example usage
#' list_var <- init_variable()
#' getInput2simulation(list_var, n_genes = 10)
getInput2simulation <- function(list_var, n_genes = 1, normal_distr = "univariate",  input2mvrnorm = NULL) {
  
  stopifnot( normal_distr %in% c("multivariate", "univariate") )

  if (normal_distr == "multivariate"){
      if (is.null(input2mvrnorm)) input2mvrnorm = getInput2mvrnorm(list_var)    
      l_dataFrom_normdistr <- getDataFromMvrnorm(list_var, input2mvrnorm, n_genes)
    } 
  if (normal_distr == "univariate"){
      l_dataFrom_normdistr <- getDataFromRnorm(list_var, n_genes)
    }
  
  l_dataFromUser = getDataFromUser(list_var)
  
  df_input2simu <- getCoefficients(list_var, l_dataFrom_normdistr, l_dataFromUser, n_genes)
  
  return(df_input2simu)
}


#' Get the reference level for categorical variables in the data
#'
#' This function extracts the reference level for each categorical variable in the data.
#' The reference level is the first level encountered for each categorical variable.
#'
#' @param data The data frame containing the categorical variables.
#' @return A list containing the reference level for each categorical variable.
#' @export
getRefLevel <- function(data){
  col_names <- colnames(data)
  categorical_vars <- col_names[grepl(col_names, pattern = "label_")]
  if (length(categorical_vars) == 1){
    l_labels <- list()
    l_labels[[categorical_vars]] <- levels(data[, categorical_vars])
    
  } else l_labels <- lapply(data[, categorical_vars], levels)
  l_labels_ref <- sapply(l_labels, function(vec) vec[1])
  return(l_labels_ref)
}

#' Replace the effect by 0 in the data
#'
#' This function replaces the effect in interactions columns by 0, when needed.
#'
#' @param list_var The list of variables containing the effects to modify.
#' @param l_labels_ref A list containing the reference level for each categorical variable.
#' @param data The data frame containing the effects to modify.
#' @return The modified data frame 
#' @export
replaceUnexpectedInteractionValuesBy0 <- function(list_var, l_labels_ref , data){
  varInteraction <- getListVar(list_var$interactions)
  df_interaction_with0 <- sapply(varInteraction, function(var){
    categorical_var <- paste("label", unlist(strsplit(var, ":")), sep = "_")
    bool_matrix <- sapply(categorical_var, function(uniq_cat_var) data[uniq_cat_var] ==  l_labels_ref[uniq_cat_var])
    idx_0 <- rowSums(bool_matrix) > 0 ## line without interactions effects
    return(replace(data[[var]], idx_0, 0)) 
  })
  col_names <- colnames(data)
  categorical_vars <- col_names[grepl(col_names, pattern = "label_")]
  data[, varInteraction] <- df_interaction_with0
  return(data)
}


#' Prepare data using effects from a normal distribution
#'
#' Prepares the data by generating effects from a normal distribution for each gene.
#'
#' @param list_var A list of variables (already initialized)
#' @param n_genes Number of genes to generate data for.
#' @return A dataframe containing gene metadata and effects generated from a normal distribution.
#' @export
getDataFromRnorm <- function(list_var, n_genes){
    ## -- check if all data have been provided by user
    if (is.null(getInput2mvrnorm(list_var)$covMatrix))
        return(list())
    metadata <- getGeneMetadata(list_var , n_genes)
    df_effects <- get_effects_from_rnorm(list_var, metadata)
    data <- cbind(metadata, df_effects)  
    
    if(!is.null(getListVar(list_var$interactions))){
      l_labels_ref <- getRefLevel(data)
      data <- replaceUnexpectedInteractionValuesBy0(list_var, l_labels_ref, data)
    }
    
    return(list(data))
}

#' Generate effects from a normal distribution
#'
#' Generates effects from a normal distribution for each gene.
#'
#' @param list_var A list of variables (already initialized)
#' @param metadata Gene metadata.
#' @return A dataframe containing effects generated from a normal distribution.
#' @export
get_effects_from_rnorm <- function(list_var, metadata){
  
  variable_standard_dev <- getGivenAttribute(list_var, attribute = "sd") %>% unlist()
  interaction_standard_dev <- getGivenAttribute(list_var$interactions, attribute = "sd") %>% unlist()
  list_stdev <- c(variable_standard_dev, interaction_standard_dev)
  
  # -- mu
  variable_mu <- getGivenAttribute(list_var, attribute = "mu") %>% unlist()
  interaction_mu <- getGivenAttribute(list_var$interactions, attribute = "mu") %>% unlist()
  list_mu <- c(variable_mu, interaction_mu)
  
  variable_2rnorm <- names(list_stdev)
  l_effects <- lapply(stats::setNames(variable_2rnorm, variable_2rnorm) , function(var){
    col_labels <- paste("label", unlist(strsplit(var, ":")), sep = "_")
    cols2paste <- c("geneID", col_labels)
    list_combinations <- apply( metadata[ , cols2paste ] , 1 , paste , collapse = "-" )
    list_effects <- unique(list_combinations)
    list_beta <- rnorm(length(list_effects), mean = list_mu[var], sd = list_stdev[var])
    names(list_beta) <- list_effects
    unname(list_beta[list_combinations])
  })
       
  
  df_effects <- do.call("cbind", l_effects)
  return(df_effects)
}


#' getCoefficients
#'
#' Get the coefficients.
#'
#' @param list_var A list of variables (already initialized)
#' @param l_dataFromMvrnorm Data from the `getGeneMetadata` function (optional).
#' @param l_dataFromUser Data from the `getDataFromUser` function (optional).
#' @param n_genes The number of genes.
#' @export
#' @return A dataframe containing the coefficients.
#' @examples
#' # Example usage
#' list_var <- init_variable()
#' input2mvrnorm = getInput2mvrnorm(list_var)
#' l_dataFromMvrnorm = getDataFromMvrnorm(list_var, input2mvrnorm, n_genes=3)
#' l_dataFromUser = getDataFromUser(list_var)
#' getCoefficients(list_var, l_dataFromMvrnorm, l_dataFromUser, n_genes = 3)
getCoefficients <- function(list_var, l_dataFromMvrnorm, l_dataFromUser, n_genes) {
  if (length(l_dataFromMvrnorm) == 0) {
    metaData <- getGeneMetadata(list_var, n_genes)
    l_dataFromMvrnorm <- list(metaData)
  }
  l_df2join <- c(l_dataFromMvrnorm, l_dataFromUser)
  
  
  df_coef <- Reduce(function(d1, d2){ column_names = colnames(d2)
                                      idx_key = grepl(pattern = "label", column_names )
                                      keys = column_names[idx_key]
                                      join_dtf(d1, d2, k1 = keys , k2 = keys)
                                    } 
                    , l_df2join ) %>% as.data.frame()
  column_names <- colnames(df_coef)
  idx_column2factor <- grep(pattern = "label_", column_names)
  
  if (length(idx_column2factor) > 1) {
    df_coef[, idx_column2factor] <- lapply(df_coef[, idx_column2factor], as.factor)
  } else {
    df_coef[, idx_column2factor] <- as.factor(df_coef[, idx_column2factor])
  }
  
  return(df_coef)
}


#' Get the log_qij values from the coefficient data frame.
#'
#' @param dtf_coef The coefficient data frame.
#' @return The coefficient data frame with log_qij column added.
#' @export
#' @examples
#' list_var <- init_variable()
#' dtf_coef <- getInput2simulation(list_var, 10)
#' dtf_coef <- getLog_qij(dtf_coef)
getLog_qij <- function(dtf_coef) {
  dtf_beta_numeric <- dtf_coef[sapply(dtf_coef, is.numeric)]
  dtf_coef$log_qij <- rowSums(dtf_beta_numeric, na.rm = TRUE)
  return(dtf_coef)
}


#' Calculate mu_ij values based on coefficient data frame and scaling factor
#'
#' This function calculates mu_ij values by raising 2 to the power of the log_qij values
#' from the coefficient data frame and multiplying it by the provided scaling factor.
#'
#' @param dtf_coef Coefficient data frame containing the log_qij values
#'
#' @return Coefficient data frame with an additional mu_ij column
#'
#' @examples
#' list_var <- init_variable()
#' dtf_coef <- getInput2simulation(list_var, 10)
#' dtf_coef <- getLog_qij(dtf_coef)
#' dtf_coef <- addBasalExpression(dtf_coef, 10, c(10, 20, 0))
#' getMu_ij(dtf_coef)
#' @export
getMu_ij <- function(dtf_coef) {
  log_qij_scaled <- dtf_coef$log_qij + dtf_coef$basalExpr
  dtf_coef$log_qij_scaled <- log_qij_scaled
  mu_ij <- exp(log_qij_scaled)  
  dtf_coef$mu_ij <- mu_ij
  return(dtf_coef)
}

#' getMu_ij_matrix
#'
#' Get the Mu_ij matrix.
#'
#' @param dtf_coef A dataframe containing the coefficients.
#' @importFrom reshape2 dcast
#' @importFrom stats as.formula

#' @export
#' @return A Mu_ij matrix.
#' @examples
#' list_var <- init_variable()
#' dtf_coef <- getInput2simulation(list_var, 10)
#' dtf_coef <- getLog_qij(dtf_coef)
#' dtf_coef <- addBasalExpression(dtf_coef, 10, c(10, 20, 0))
#' dtf_coef<- getMu_ij(dtf_coef)
#' getMu_ij_matrix(dtf_coef)
getMu_ij_matrix <- function(dtf_coef) {
  column_names <- colnames(dtf_coef)
  idx_var <- grepl(pattern = "label", column_names)
  l_var <- column_names[idx_var]
  str_formula_rigth <- paste(l_var, collapse = " + ")
  if (str_formula_rigth == "") stop("no variable label detected")
  str_formula <- paste(c("geneID", str_formula_rigth), collapse = " ~ ")
  formula <- stats::as.formula(str_formula)
  dtf_Muij <- dtf_coef %>% reshape2::dcast(formula = formula, value.var = "mu_ij", drop = F)
  dtf_Muij[is.na(dtf_Muij)] <- 0
  mtx_Muij <- data.frame(dtf_Muij[, -1], row.names = dtf_Muij[, 1]) %>% as.matrix()
  mtx_Muij <- mtx_Muij[, order(colnames(mtx_Muij)), drop = F]
  return(mtx_Muij)
}

#' getSubCountsTable
#'
#' Get the subcounts table.
#'
#' @param matx_Muij The Mu_ij matrix.
#' @param matx_dispersion The dispersion matrix.
#' @param replicateID The replication identifier.
#' @param l_bool_replication A boolean vector indicating the replicates.
#' @importFrom stats rnbinom
#' 
#' @return A subcounts table.
getSubCountsTable <- function(matx_Muij, matx_dispersion, replicateID, l_bool_replication) {
  getKijMatrix <- function(matx_Muij, matx_dispersion, n_genes, n_samples) {
    k_ij <- stats::rnbinom(n_genes * n_samples,
                           size = matx_dispersion,
                           mu = matx_Muij) %>%
              matrix(nrow = n_genes, ncol = n_samples)
    
    k_ij[is.na(k_ij)] <- 0
    return(k_ij)
  }
  
  if (!any(l_bool_replication))
    return(NULL) 
  
  matx_Muij <- matx_Muij[, l_bool_replication, drop = FALSE]
  matx_dispersion <- matx_dispersion[, l_bool_replication, drop = FALSE] 
  l_sampleID <- colnames(matx_Muij)
  l_geneID <- rownames(matx_Muij)
  dimension_mtx <- dim(matx_Muij)
  n_genes <- dimension_mtx[1]
  n_samples <- dimension_mtx[2]
  matx_kij <- getKijMatrix(matx_Muij, matx_dispersion, n_genes, n_samples)
  colnames(matx_kij) <- paste(l_sampleID, replicateID, sep = "_")
  rownames(matx_kij) <- l_geneID
  return(matx_kij)
}

#' getReplicationMatrix
#'
#' @param minN Minimum number of replicates for each sample
#' @param maxN Maximum number of replicates for each sample
#' @param n_samples Number of samples
#' @export
#' @return A replication matrix indicating which samples are replicated
getReplicationMatrix <- function(minN, maxN, n_samples) {
  
  # Create a list of logical vectors representing the minimum number of replicates
  l_replication_minimum = lapply(1:n_samples, 
                                 FUN = function(i) rep(TRUE, times = minN) )
  
  # Create a list of random logical vectors representing additional replicates
  l_replication_random = lapply(1:n_samples, 
                                FUN = function(i) sample(x = c(TRUE, FALSE), size = maxN-minN, replace = T) )
  
  # Combine the replication vectors into matrices
  matx_replication_minimum <- do.call(cbind, l_replication_minimum)
  matx_replication_random <- do.call(cbind, l_replication_random)
  
  # Combine the minimum replicates and random replicates into a single matrix
  matx_replication <- rbind(matx_replication_minimum, matx_replication_random)
  
  # Sort the columns of the replication matrix in descending order
  matx_replication = apply(matx_replication, 2, sort, decreasing = TRUE ) %>% matrix(nrow = maxN)
  
  return(matx_replication)
}

#' getCountsTable
#'
#' @param matx_Muij Matrix of mean expression values for each gene and sample
#' @param matx_dispersion Matrix of dispersion values for each gene and sample
#' @param matx_bool_replication Replication matrix indicating which samples are replicated
#'
#' @return A counts table containing simulated read counts for each gene and sample
getCountsTable <- function(matx_Muij ,  matx_dispersion, matx_bool_replication ){
  max_replicates <-  dim(matx_bool_replication)[1]
  
  # Apply the getSubCountsTable function to each row of the replication matrix
  l_countsTable = lapply(1:max_replicates, function(i) getSubCountsTable(matx_Muij , matx_dispersion, i, matx_bool_replication[i,]  ))
  
  # Combine the counts tables into a single matrix
  countsTable = do.call(cbind, l_countsTable)
  
  return(countsTable %>% as.data.frame())
}

#' getDispersionMatrix
#'
#' @param list_var A list of variables (already initialized)
#' @param n_genes Number of genes
#' @param dispersion Vector of dispersion values for each gene
#' @export
#'
#' @return A matrix of dispersion values for each gene and sample
getDispersionMatrix <- function(list_var, n_genes, dispersion = stats::runif(n_genes, min = 0, max = 1000)){
  l_geneID = paste("gene", 1:n_genes, sep = "")
  l_sampleID = getSampleID(list_var) 
  n_samples = length(l_sampleID) 
  l_dispersion <- dispersion
  
  # Create a data frame for the dispersion values
  dtf_dispersion = list(dispersion =  l_dispersion) %>% as.data.frame()
  dtf_dispersion <- dtf_dispersion[, rep("dispersion", n_samples)]
  rownames(dtf_dispersion) = l_geneID
  colnames(dtf_dispersion) = l_sampleID
  
  matx_dispersion = dtf_dispersion %>% as.matrix()
  
  return(matx_dispersion)
}


#' Replicate rows of a data frame by group
#'
#' Replicates the rows of a data frame based on a grouping variable and replication counts for each group.
#'
#' @param df Data frame to replicate
#' @param group_var Name of the grouping variable in the data frame
#' @param rep_list Vector of replication counts for each group
#' @return Data frame with replicated rows
#' @examples
#' df <- data.frame(group = c("A", "B"), value = c(1, 2))
#' replicateByGroup(df, "group", c(2, 3))
#'
#' @export
replicateByGroup <- function(df, group_var, rep_list) {
  l_group_var <- df[[group_var]]
  group_levels <- unique(l_group_var)
  names(rep_list) <- group_levels
  group_indices <- rep_list[l_group_var]
  replicated_indices <- rep(seq_len(nrow(df)), times = group_indices)
  replicated_df <- df[replicated_indices, ]
  suffix_sampleID <- sequence(group_indices)
  replicated_df[["sampleID"]] <- paste(replicated_df[["sampleID"]], suffix_sampleID, sep = "_")
  rownames(replicated_df) <- NULL
  return(replicated_df)
}


#' Replicate rows of a data frame
#'
#' Replicates the rows of a data frame by a specified factor.
#'
#' @param df Data frame to replicate
#' @param n Replication factor for each row
#' @return Data frame with replicated rows
#' @export
#' @examples
#' df <- data.frame(a = 1:3, b = letters[1:3])
#' replicateRows(df, 2)
#'
replicateRows <- function(df, n) {
  indices <- rep(seq_len(nrow(df)), each = n)
  replicated_df <- df[indices, , drop = FALSE]
  rownames(replicated_df) <- NULL
  return(replicated_df)
}

#' Get sample metadata
#'
#' Generates sample metadata based on the input variables, replication matrix, and number of genes.
#'
#' @param list_var A list of variables (already initialized)
#' @param replicationMatrix Replication matrix
#' @return Data frame of sample metadata
#' @importFrom data.table setorderv
#' @export
#' @examples
#' list_var <- init_variable()
#' replicationMatrix <- generateReplicationMatrix(list_var, 3, 3)
#' getSampleMetadata(list_var,  replicationMatrix)
getSampleMetadata <- function(list_var, replicationMatrix) {
  l_sampleIDs = getSampleID(list_var)
  metaData <- generateGridCombination_fromListVar(list_var)
  metaData[] <- lapply(metaData, as.character) ## before reordering
  data.table::setorderv(metaData, cols = colnames(metaData))
  metaData[] <- lapply(metaData, as.factor)
  metaData$sampleID <- l_sampleIDs
  rep_list <- colSums(replicationMatrix)
  metaData$sampleID <- as.character(metaData$sampleID) ## before replicating
  sampleMetadata <- replicateByGroup(metaData, "sampleID", rep_list)
  colnames(sampleMetadata) <- gsub("label_", "", colnames(sampleMetadata))
  return(sampleMetadata)
}


#' getSampleID
#'
#' @param list_var A list of variables (already initialized)
#' @export
#' @return A sorted vector of sample IDs
#' @examples
#' getSampleID(init_variable())
getSampleID <- function(list_var){
  gridCombination <- generateGridCombination_fromListVar(list_var)
  l_sampleID <- apply( gridCombination , 1 , paste , collapse = "_" ) %>% unname()
  return(sort(l_sampleID))
}

```

```{r test-simulation}

test_that("getDataFromRnorm generates correct data frame", {
  input_var_list <- init_variable(name = "varA", mu = 10, sd = 0.1, level = 3) %>%
                    init_variable(name = "varB", mu = 1, sd = 2, level = 2)
  metadata <- getGeneMetadata(input_var_list , n_genes = 5)
  df <- getDataFromRnorm(input_var_list, n_genes = 5)
  expect_is(df[[1]], "data.frame")
  expect_equal(nrow(df[[1]]), 30)
  expect_equal(colnames(df[[1]]), c("geneID", "label_varA", "label_varB", "varA", "varB"))  
})

test_that("get_effects_from_rnorm generates correct effects", {
  input_var_list <- init_variable(name = "varA", mu = 10, sd = 0.1, level = 3) %>%
                    init_variable(name = "varB", mu = 1, sd = 2, level = 2)
  metadata <- getGeneMetadata(input_var_list , n_genes = 5)
  df_effects <- get_effects_from_rnorm(input_var_list, metadata)
  
  expect_is(df_effects, "matrix")
  expect_equal(nrow(df_effects), nrow(metadata))
  expect_equal(colnames(df_effects), c("varA", "varB"))
})


# Test case 1: Check if the function returns a data frame
test_that("getInput2simulation returns a data frame", {
  list_var <- init_variable()
  set.seed(101)
  result <- getInput2simulation(list_var, normal_distr = 'multivariate')
  expect_is(result, "data.frame")
  expected <- data.frame(geneID = c("gene1", "gene1"), label_myVariable = as.factor(c("myVariable1", "myVariable2")), 
                         myVariable = c(-0.1414214,0.1414214))
  expect_equal(result, expected, tolerance = 1e-3)
  })

# Test for getCoefficients function
test_that("getCoefficients returns the correct output", {
  # Create dummy data
  n_genes <- 3
  list_var = init_variable()
  # Call the function
  coefficients <- getCoefficients(list_var, list(), list(), n_genes)
  
  # Check the output
  expect_equal(nrow(coefficients), n_genes*list_var$myVariable$level)
  expect_equal(colnames(coefficients), c("geneID", "label_myVariable")) 
})

# Test for getMu_ij_matrix function
test_that("getMu_ij_matrix returns the correct output", {
  # Create a dummy coefficients dataframe
  dtf_coef <- data.frame(geneID = c("Gene1", "Gene1", "Gene1"),
                         label_varA = c("A1", "A2", "A3"),
                         label_varB = c("B1", "B2", "B3"),
                         mu_ij = c(1, 2, 3))
  
  # Call the function
  mu_matrix <- getMu_ij_matrix(dtf_coef)
  # Check the output
  expect_equal(dim(mu_matrix), c(1, 9)) 
  
})

# Test for getSubCountsTable function
test_that("getSubCountsTable returns the correct output", {
  # Create dummy data
  l_genes <- c("gene1", "gene2", "gene3")
  matx_Muij = data.frame(sple1 = c(1,3,4), sple2 = c(2, 0, 9), sple3 = c(1, 69, 2)) %>% as.matrix()
  rownames(matx_Muij) <- l_genes
  matx_dispersion <- matrix(0.5, nrow = 3, ncol = 3)
  replicateID <- 1
  l_bool_replication <- c(TRUE, FALSE, TRUE)
  
  # Call the function
  subcounts_table <- getSubCountsTable(matx_Muij, matx_dispersion, 1, l_bool_replication)
  
  # Check the output
  expect_equal(dim(subcounts_table), c(3, 2))
  expect_equal(rownames(subcounts_table), l_genes)
})


test_that("getReplicationMatrix returns the correct replication matrix", {
  minN <- 2
  maxN <- 4
  n_samples <- 3
  expected <- matrix(c(TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, FALSE, TRUE, TRUE, TRUE, FALSE), nrow = maxN)
  
  set.seed(123)
  result <- getReplicationMatrix(minN, maxN, n_samples)
  
  expect_equal(result, expected)
})

test_that("getSampleID return the correct list of sampleID",{
   expect_equal(getSampleID(init_variable()), c("myVariable1", "myVariable2"))
})

# Create a test case for getMu_ij
test_that("getMu_ij returns the correct output", {
  # Create a sample coefficient data frame
  dtf_coef <- data.frame(
    log_qij = c(1, 9, 0.1),
    basalExpr = c(2, 3, 4)
  )

    # Call the getMu_ij function
  result <- getMu_ij(dtf_coef)

  # Check if the mu_ij column is added
  expect_true("mu_ij" %in% colnames(result))

  # Check the values of mu_ij
  #expected_mu_ij <- c(20.08554, 162754.79142 , 60.34029)
  #expect_equal(result$mu_ij, expected_mu_ij, tolerance = 0.000001)
})


# Create a test case for getLog_qij
test_that("getLog_qij returns the correct output", {
  # Create a sample coefficient data frame
  dtf_coef <- data.frame(
    beta1 = c(1.2, 2.3, 3.4),
    beta2 = c(0.5, 1.0, 1.5),
    non_numeric = c("a", "b", "c")
  )

  # Call the getLog_qij function
  result <- getLog_qij(dtf_coef)

  # Check if the log_qij column is added
  expect_true("log_qij" %in% colnames(result))

  # Check the values of log_qij
  expected_log_qij <- c(1.7, 3.3, 4.9)
  expect_equal(result$log_qij, expected_log_qij)
})

test_that("getCountsTable returns the correct counts table", {
  mat_mu_ij <- matrix(c(1,2,3,4,5,6), ncol = 3, byrow = T)
  rownames(mat_mu_ij) <- c("gene1", "gene2")
  colnames(mat_mu_ij) <- c("sample1", "sample2", "sample3")
  mat_disp <- matrix(c(0.3,0.3,0.3, 0.5,0.5,0.5), ncol = 3, byrow = T)
  rownames(mat_disp) <- c("gene1", "gene2")
  colnames(mat_disp) <- c("sample1", "sample2", "sample3")
  mat_repl <- matrix(c(TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE), ncol = 3, byrow = T)
  
  expected_df <- matrix(c(0,0,1,0,0,0,0,1,0,2,34,18,0,0,3,10,7,2), nrow = 2, byrow = T) %>% as.data.frame()
  rownames(expected_df) <- c("gene1", "gene2")
  colnames(expected_df) <- c("sample1_1", "sample2_1", "sample3_1", "sample1_2", 
                             "sample2_2","sample3_2","sample1_3", "sample2_3" ,"sample3_3")
  
  set.seed(123)
  result <- getCountsTable(mat_mu_ij, mat_disp, mat_repl)

  expect_true(is.data.frame(result))
  expect_equal(colnames(result), colnames(expected_df))
  expect_equal(rownames(result), rownames(expected_df))

})


test_that("getSampleMetadata returns expected output", {
  # Set up input variables
  list_var <- init_variable()
  replicationMatrix <- matrix(TRUE, nrow = 2, ncol = 2)

  # Run the function
  result <- getSampleMetadata(list_var,  replicationMatrix)
  
  # Define expected output
  expected_colnames <- c("myVariable", "sampleID")
  expect_equal(colnames(result), expected_colnames)