evaluation_withmixedeffect.R

# WARNING - Generated by {fusen} from /dev/flat_full.Rmd: do not edit by hand


#' Check if the formula contains a mixed effect structure.
#'
#' This function checks if the formula contains a mixed effect structure indicated by the presence of "|".
#'
#' @param formula A formula object.
#'
#' @return \code{TRUE} if the formula contains a mixed effect structure, \code{FALSE} otherwise.
#'
#' @examples
#' is_mixedEffect_inFormula(y ~ x + (1|group))
#'
#' @export
is_mixedEffect_inFormula <- function(formula) {
  return("|" %in% all.names(formula))
}

#' Check if the formula follows a specific type I mixed effect structure.
#'
#' This function checks if the formula follows a specific type I mixed effect structure, which consists of a fixed effect and a random effect indicated by the presence of "|".
#'
#' @param formula A formula object.
# 
#' @return \code{TRUE} if the formula follows the specified type I mixed effect structure, \code{FALSE} otherwise.
# 
#' @examples
#' is_formula_mixedTypeI(formula = y ~ x + (1|group))
# 
#' @export
is_formula_mixedTypeI <- function(formula) {
  if (length(all.vars(formula)) != 3) return(FALSE)
  if (sum(all.names(formula) == "+") > 1) return(FALSE)
  if (sum(all.names(formula) == "/") > 0) return(FALSE)
  return(TRUE)
}


#' Get the categorical variable associated with the fixed effect in a type I formula.
#'
#' This function extracts the categorical variable associated with the fixed effect in a type I formula from a tidy tibble.
# The categorical variable is constructed by taking the label of the second main fixed effect term (ignoring any numeric suffix) and prefixing it with "label_".
#
#' @param tidy_tmb A tidy tibble containing model terms.
# 
#' @return The categorical variable associated with the fixed effect in the type I formula.
# 
#' @examples
#' \dontrun{
#' getCategoricalVar_inFixedEffect(tidy_tmb)
#' } 
#' @export
getCategoricalVar_inFixedEffect <- function(tidy_tmb) {
  main_fixEffs <- unique(subset(tidy_tmb, effect == "fixed")$term)
  categorical_var_inFixEff <- paste("label", gsub("\\d+$", "", main_fixEffs[2]), sep = "_")
  return(categorical_var_inFixEff)
}


#' Group log_qij values per genes and labels.
#'
#' This function groups log_qij values in a ground truth tibble per genes and labels using a specified categorical variable.
#
#' @param ground_truth A tibble containing ground truth data.
#' @param categorical_var The categorical variable to use for grouping.
# 
#' @return A list of log_qij values grouped by genes and labels.
#' @importFrom stats as.formula
#' @importFrom reshape2 dcast
#' 
# 
#' @examples
#' \dontrun{
#' group_logQij_per_genes_and_labels(ground_truth, categorical_var)
#' }
#' @export
group_logQij_per_genes_and_labels <- function(ground_truth, categorical_var) {
  str_formula <- paste(c(categorical_var, "geneID"), collapse = " ~ ")
  formula <- stats::as.formula(str_formula)
  list_logqij <- ground_truth %>%
    reshape2::dcast(
      formula,
      value.var = "log_qij_scaled",
      fun.aggregate = list
    )
  list_logqij[categorical_var] <- NULL
  return(list_logqij)
}

#' Calculate actual mixed effect values for each gene.
#'
#' This function calculates actual mixed effect values for each gene using the provided data, reference labels, and other labels in a categorical variable.
#
#' @param list_logqij A list of log_qij values grouped by genes and labels.
#' @param genes_iter_list A list of genes for which to calculate the actual mixed effect values.
#' @param categoricalVar_infos Information about the categorical variable, including reference labels and other labels.
# 
#' @return A data frame containing the actual mixed effect values for each gene.
# 
#' @examples
#' \dontrun{
#' getActualMixed_typeI(list_logqij, genes_iter_list, categoricalVar_infos)
#' }
#' @export
getActualMixed_typeI <- function(list_logqij, genes_iter_list, categoricalVar_infos) {
  labelRef_InCategoricalVar <- categoricalVar_infos$ref
  labels_InCategoricalVar <- categoricalVar_infos$labels
  labelOther_inCategoricalVar <- categoricalVar_infos$labelsOther

  data_per_gene <- lapply(genes_iter_list, function(g) {
    data_gene <- data.frame(list_logqij[[g]])
    colnames(data_gene) <- labels_InCategoricalVar
    return(data_gene)
  })
  
  l_actual_per_gene <- lapply(genes_iter_list, function(g) {
    data_gene <- data_per_gene[[g]]
    res <- calculate_actualMixed(data_gene, labelRef_InCategoricalVar, labelOther_inCategoricalVar)
    res$geneID <- g
    return(res)
  })
  
  actual_mixedEff <- do.call("rbind", l_actual_per_gene)
  rownames(actual_mixedEff) <- NULL
  return(actual_mixedEff)
}


#' Compare the mixed-effects inference to expected values.
#'
#' This function compares the mixed-effects inference obtained from a mixed-effects model to expected values derived from a ground truth dataset. The function assumes a specific type I mixed-effect structure in the input model.
# 
#' @param tidy_tmb  tidy model results obtained from fitting a mixed-effects model.
#' @param ground_truth_eff A data frame containing ground truth effects.
# 
#' @return A data frame with the comparison of estimated mixed effects to expected values.
#' @importFrom stats setNames
#' @examples
#' \dontrun{
#' inferenceToExpected_withMixedEff(tidy_tmb(l_tmb), ground_truth_eff)
#' } 
#' @export
inferenceToExpected_withMixedEff <- function(tidy_tmb, ground_truth_eff){

  # -- CategoricalVar involve in fixEff
  categorical_var <- getCategoricalVar_inFixedEffect(tidy_tmb)
  labels_InCategoricalVar <- levels(ground_truth_eff[, categorical_var])
  labelRef_InCategoricalVar <- labels_InCategoricalVar[1]
  labelOther_inCategoricalVar <- labels_InCategoricalVar[2:length(labels_InCategoricalVar)]
  categoricalVar_infos <- list(ref = labelRef_InCategoricalVar,
                               labels = labels_InCategoricalVar,
                               labelsOther = labelOther_inCategoricalVar )

  ## -- prepare data 2 get actual
  l_logqij <- group_logQij_per_genes_and_labels(ground_truth_eff, categorical_var)
  l_genes <- unique(ground_truth_eff$geneID)
  genes_iter_list <- stats::setNames(l_genes,l_genes)
  actual_mixedEff <- getActualMixed_typeI(l_logqij, genes_iter_list, categoricalVar_infos)

  res <- join_dtf(actual_mixedEff, tidy_tmb  ,c("geneID", "term"), c("ID", "term"))

  ## -- reorder for convenience
  actual <- res$actual
  res <- res[, -1]
  res$actual <- actual
  return(res)
}


#' Calculate actual mixed effects.
#'
#' This function calculates actual mixed effects based on the given data for a specific type I mixed-effect structure.
# It calculates the expected values, standard deviations, and correlations between the fixed and random effects.
# The function is designed to work with specific input data for type I mixed-effect calculations.
# 
#' @param data_gene Data for a specific gene.
#' @param labelRef_InCategoricalVar The reference label for the categorical variable.
#' @param labelOther_inCategoricalVar Labels for the categorical variable other than the reference label.
#' @importFrom stats sd cor
# 
#' @return A data frame containing the calculated actual mixed effects.
# 
#' @examples
#' \dontrun{
#'  calculate_actualMixed(data_gene, labelRef_InCategoricalVar, labelOther_inCategoricalVar)
#' }
#' @export
calculate_actualMixed <- function(data_gene, labelRef_InCategoricalVar, labelOther_inCategoricalVar ){
   log_qij_scaled_intercept <- data_gene[labelRef_InCategoricalVar]
  colnames(log_qij_scaled_intercept) <- '(Intercept)'

  if (length(labelOther_inCategoricalVar == 1 )) {
    log_qij_scaled_other <- data_gene[labelOther_inCategoricalVar]
  } else log_qij_scaled_other <- data_gene[,labelOther_inCategoricalVar]
  log_qij_scaled_transf <- log_qij_scaled_other - log_qij_scaled_intercept[,"(Intercept)"]

  log_qij_scaled_transf <- cbind(log_qij_scaled_intercept, log_qij_scaled_transf)
  ## -- fix eff
  actual_fixedValues <- colMeans(log_qij_scaled_transf)

  ## -- stdev values
  std_values <- sapply(log_qij_scaled_transf, function(x) stats::sd(x))
  names(std_values) <- paste("sd", names(std_values), sep = '_')

  ## -- correlation
  corr_mat <- stats::cor(log_qij_scaled_transf)
  indx <- which(upper.tri(corr_mat, diag = FALSE), arr.ind = TRUE)
  corr2keep = corr_mat[indx]
  name_corr <- paste(rownames(corr_mat)[indx[, "row"]], colnames(corr_mat)[indx[, "col"]], sep = ".")
  names(corr2keep) <- paste("cor", name_corr, sep = "__")

  ## -- output 
  actual <- c(actual_fixedValues, std_values, corr2keep)
  res <- as.data.frame(actual)
  res$term <- rownames(res)
  rownames(res) <- NULL
  res$description <- sub("_.*", "", gsub("\\d+$", "" , res$term))
  return(res)
  
  
}


#' Compare inference results to expected values for a given model.
#'
#' This function compares the inference results from a model to the expected values based on a ground truth dataset with the simulated effects. The function handles models with mixed effects and fixed effects separately, ensuring that the comparison is appropriate for the specific model type.
#'
#' If a model includes mixed effects, the function checks for support for the specific mixed effect structure and provides an informative error message if the structure is not supported.
#'
#' @param tidy_tmb A fitted model object convert to tidy dataframe.
#' @param ground_truth_eff A ground truth dataset with the simulated effects.
#' @param formula_used formula used in model 
#'
#' @return A data frame containing the comparison results, including the term names, inference values, and expected values.
#'
#' @examples
#' \dontrun{
#' evalData <- compareInferenceToExpected(l_tmb, ground_truth_eff)
#' }
#' @export
compareInferenceToExpected <- function(tidy_tmb, ground_truth_eff, formula_used) {
  ## -- parsing formula & check mixed effect
  involvMixedEffect <- is_mixedEffect_inFormula(formula_used)

  msg_e_formula_type <- "This simulation evaluation supports certain types of formulas with mixed effects, but not all.
    Please refer to the package documentation for information on supported formula structures.
    You are welcome to implement additional functions to handle specific formula types with mixed effects that are not currently supported."

  ## -- if mixed effect
  if (involvMixedEffect){
    message("Mixed effect detected in the formula structure.")

    if(!is_formula_mixedTypeI(formula_used)){
      stop(msg_e_formula_type)
    }
    evalData <- inferenceToExpected_withMixedEff(tidy_tmb, ground_truth_eff)

  ## -- only fixed effect
  } else {
    
    message("Only fixed effects are detected in the formula structure.")
    evalData <- inferenceToExpected_withFixedEff(tidy_tmb, ground_truth_eff)
  }

  return(evalData)
}