Newer
Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
# WARNING - Generated by {fusen} from /dev/flat_full.Rmd: do not edit by hand
#' Get input for simulation based on coefficients
#'
#' This function generates input data for simulation based on the coefficients provided in the \code{list_var} argument.
#'
#' @param list_var A list of variables (already initialized)
#' @param n_genes Number of genes to simulate (default: 1)
#' @param input2mvrnorm Input to the \code{mvrnorm} function for simulating data from multivariate normal distribution (default: NULL)
#' @return A data frame with input coefficients for simulation
#' @export
#' @examples
#' # Example usage
#' list_var <- init_variable()
#' getInput2simulation(list_var, n_genes = 10)
getInput2simulation <- function(list_var, n_genes = 1, input2mvrnorm = NULL) {
# Use default input to mvrnorm if not provided by the user
if (is.null(input2mvrnorm)) input2mvrnorm = getInput2mvrnorm(list_var)
l_dataFromMvrnorm = getDataFromMvrnorm(list_var, input2mvrnorm, n_genes)
l_dataFromUser = getDataFromUser(list_var)
df_input2simu <- getCoefficients(list_var, l_dataFromMvrnorm, l_dataFromUser, n_genes)
return(df_input2simu)
}
#' getCoefficients
#'
#' Get the coefficients.
#'
#' @param list_var A list of variables (already initialized)
#' @param l_dataFromMvrnorm Data from the `getGeneMetadata` function (optional).
#' @param l_dataFromUser Data from the `getDataFromUser` function (optional).
#' @param n_genes The number of genes.
#' @export
#' @return A dataframe containing the coefficients.
#' @examples
#' # Example usage
#' list_var <- init_variable()
#' input2mvrnorm = getInput2mvrnorm(list_var)
#' l_dataFromMvrnorm = getDataFromMvrnorm(list_var, input2mvrnorm, n_genes)
#' l_dataFromUser = getDataFromUser(list_var)
#' getCoefficients(list_var, l_dataFromMvrnorm, l_dataFromUser, n_genes = 3)
getCoefficients <- function(list_var, l_dataFromMvrnorm, l_dataFromUser, n_genes) {
if (length(l_dataFromMvrnorm) == 0) {
metaData <- getGeneMetadata(list_var, n_genes)
l_dataFromMvrnorm <- list(metaData)
}
l_df2join <- c(l_dataFromMvrnorm, l_dataFromUser)
df_coef <- Reduce(function(d1, d2){ column_names = colnames(d2)
idx_key = grepl(pattern = "label", column_names )
keys = column_names[idx_key]
join_dtf(d1, d2, k1 = keys , k2 = keys)
}
, l_df2join ) %>% as.data.frame()
column_names <- colnames(df_coef)
idx_column2factor <- grep(pattern = "label_", column_names)
if (length(idx_column2factor) > 1) {
df_coef[, idx_column2factor] <- lapply(df_coef[, idx_column2factor], as.factor)
} else {
df_coef[, idx_column2factor] <- as.factor(df_coef[, idx_column2factor])
}
return(df_coef)
}
#' Get the log_qij values from the coefficient data frame.
#'
#' @param dtf_coef The coefficient data frame.
#' @return The coefficient data frame with log_qij column added.
#' @export
getLog_qij <- function(dtf_coef) {
dtf_beta_numeric <- dtf_coef[sapply(dtf_coef, is.numeric)]
dtf_coef$log_qij <- rowSums(dtf_beta_numeric, na.rm = TRUE)
return(dtf_coef)
}
#' Calculate mu_ij values based on coefficient data frame and scaling factor
#'
#' This function calculates mu_ij values by raising 2 to the power of the log_qij values
#' from the coefficient data frame and multiplying it by the provided scaling factor.
#'
#' @param dtf_coef Coefficient data frame containing the log_qij values
#'
#' @return Coefficient data frame with an additional mu_ij column
#'
#' @examples
#' list_var <- init_variable()
#' dtf_coef <- getInput2simulation(list_var, 10)
#' dtf_coef <- getLog_qij(dtf_coef)
#' dtf_coef <- addBasalExpression(dtf_coef, 10, c(10, 20, 0))
#' getMu_ij(dtf_coef)
#' @export
getMu_ij <- function(dtf_coef) {
log_qij_scaled <- dtf_coef$log_qij + dtf_coef$basalExpr
dtf_coef$log_qij_scaled <- log_qij_scaled
mu_ij <- exp(log_qij_scaled)
dtf_coef$mu_ij <- mu_ij
return(dtf_coef)
}
#' getMu_ij_matrix
#'
#' Get the Mu_ij matrix.
#'
#' @param dtf_coef A dataframe containing the coefficients.
#' @importFrom reshape2 dcast
#' @importFrom stats as.formula
#' @export
#' @return A Mu_ij matrix.
getMu_ij_matrix <- function(dtf_coef) {
column_names <- colnames(dtf_coef)
idx_var <- grepl(pattern = "label", column_names)
l_var <- column_names[idx_var]
str_formula_rigth <- paste(l_var, collapse = " + ")
if (str_formula_rigth == "") stop("no variable label detected")
str_formula <- paste(c("geneID", str_formula_rigth), collapse = " ~ ")
formula <- stats::as.formula(str_formula)
dtf_Muij <- dtf_coef %>% reshape2::dcast(formula = formula, value.var = "mu_ij", drop = F)
dtf_Muij[is.na(dtf_Muij)] <- 0
mtx_Muij <- data.frame(dtf_Muij[, -1], row.names = dtf_Muij[, 1]) %>% as.matrix()
mtx_Muij <- mtx_Muij[, order(colnames(mtx_Muij)), drop = F]
return(mtx_Muij)
}
#' getSubCountsTable
#'
#' Get the subcounts table.
#'
#' @param matx_Muij The Mu_ij matrix.
#' @param matx_dispersion The dispersion matrix.
#' @param replicateID The replication identifier.
#' @param l_bool_replication A boolean vector indicating the replicates.
#' @importFrom stats rnbinom
#'
#' @return A subcounts table.
getSubCountsTable <- function(matx_Muij, matx_dispersion, replicateID, l_bool_replication) {
getKijMatrix <- function(matx_Muij, matx_dispersion, n_genes, n_samples) {
k_ij <- stats::rnbinom(n_genes * n_samples,
size = matx_dispersion,
mu = matx_Muij) %>%
matrix(nrow = n_genes, ncol = n_samples)
k_ij[is.na(k_ij)] <- 0
return(k_ij)
}
if (!any(l_bool_replication))
return(NULL)
matx_Muij <- matx_Muij[, l_bool_replication, drop = FALSE]
matx_dispersion <- matx_dispersion[, l_bool_replication, drop = FALSE]
l_sampleID <- colnames(matx_Muij)
l_geneID <- rownames(matx_Muij)
dimension_mtx <- dim(matx_Muij)
n_genes <- dimension_mtx[1]
n_samples <- dimension_mtx[2]
matx_kij <- getKijMatrix(matx_Muij, matx_dispersion, n_genes, n_samples)
colnames(matx_kij) <- paste(l_sampleID, replicateID, sep = "_")
rownames(matx_kij) <- l_geneID
return(matx_kij)
}