diff --git a/dev/flat_full.Rmd b/dev/flat_full.Rmd index b3f61718e98c11165a41c2c8abb66bdce0896f99..14d833bef7e5babd225f6f93a040230d6d8decc2 100644 --- a/dev/flat_full.Rmd +++ b/dev/flat_full.Rmd @@ -55,13 +55,13 @@ sim_kmer <- function(n_kmer, mean_coef, sex = "XY", count = F) { count = mvtnorm::rmvnorm(n_kmer * .1, mean = c(1, 2)*mean_coef, sigma = matrix(c(1.05, 2, 2, 4.05) * mean_coef^1.5, ncol = 2), checkSymmetry = F, method = "svd") %>% as_tibble(.name_repair = "universal") ) %>% - unnest(count) %>% + unnest(c(count)) %>% bind_rows( tibble( sex = "A", count = mvtnorm::rmvnorm(n_kmer * 0.85, mean = c(2, 2)*mean_coef, sigma = matrix(c(1, .95, .95, 1) * mean_coef^1.5 * 4, ncol = 2), method = "svd") %>% as_tibble(.name_repair = "universal") - ) %>% unnest(count) + ) %>% unnest(c(count)) ) if (sex == "XY") { data <- data %>% @@ -71,7 +71,7 @@ sim_kmer <- function(n_kmer, mean_coef, sex = "XY", count = F) { count = mvtnorm::rmvnorm(n_kmer * .05, mean = c(1, 0)*mean_coef, sigma = matrix(c(.9, .05, .05, .05) * mean_coef^1.5, ncol = 2), method = "svd") %>% as_tibble(.name_repair = "universal") ) %>% - unnest(count) + unnest(c(count)) ) } data <- data %>% @@ -85,7 +85,7 @@ sim_kmer <- function(n_kmer, mean_coef, sex = "XY", count = F) { count_m = map(count_m, function(x){rpois(1, x)}), count_f = map(count_f, function(x){rpois(1, x)}) ) %>% - unnest(count_m, count_f) + unnest(c(count_m, count_f)) ) } return(data) @@ -236,24 +236,15 @@ E_N_clust <- function(proba) { #' @param proba a matrix of autosomal female and male chromosome probability #' @param mu, the base mean parameter #' @param N_clust the size of the clusters +#' @param cluster_coef, the base mean coefficients for the cluster #' @param sex = c("XY", "XO") the sex of the specie #' @return a mean mu #' @examples -M_mean <- function(x, proba, N_clust, sex) { +M_mean <- function(x, proba, N_clust, cluster_coef, sex) { mu <- 0 for (cluster in 1:ncol(proba)) { - if (cluster == 1) { - mu <- mu + - mean(colSums(x * c(0.5, 0.5) * proba[, cluster]) / N_clust[cluster]) - } - if (cluster == 2) { - mu <- mu + - mean(colSums(x * c(1, 0.5) * proba[, cluster]) / N_clust[cluster]) - } - if (cluster == 3) { - mu <- mu + - (colSums(x * c(1, 0) * proba[, cluster]) / N_clust[cluster])[1] - } + mu <- mu + + mean(colSums(x * cluster_coef[[cluster]] * proba[, cluster]) / N_clust[cluster]) } if (sex == "XY") { return(mu / 3) @@ -343,8 +334,8 @@ sample %>% #' @return a list of parameters init_param <- function(x, sex) { cluster_coef <- list( - "a" = c(2, 2), - "f" = c(1, 2) + "a" = c(1, 1), + "f" = c(1, 0.5) ) theta <- list( "pi" = c(.85, .1, .05), @@ -404,7 +395,7 @@ EM_clust <- function(x, threshold = 1, sex = "XY") { old_loglik <- loglik(x, param$theta, param$cluster_coef, sex) proba <- E_proba(x, param$theta, param$cluster_coef, sex) param$theta$pi <- E_N_clust(proba) - param$theta$mu <- M_mean(x, proba, param$theta$pi, sex) + param$theta$mu <- M_mean(x, proba, param$theta$pi, params$cluster_coef, sex) param$theta$sigma <- M_cov(x, proba, param$theta$mu, param$theta$pi, param$cluster_coef, sex) param$theta$pi <- param$theta$pi / nrow(x) new_loglik <- loglik(x, param$theta, param$cluster_coef, sex) @@ -490,7 +481,7 @@ compare_models <- function(x, threshold = 1, nboot = 100, bootsize = 1000, core model_XO = boostrap_EM(x, sex = "X0", threshold = threshold, nboot = nboot, bootsize = bootsize, core = core) ) %>% pivot_longer(cols = c(model_XO, model_XY)) %>% - unnest(value) + unnest(c(value)) } ``` @@ -514,7 +505,7 @@ BSS_WSS <- function(x, cluster) { # clustering XY ```{r clustering_XY} -data <- sim_kmer(1e4, 1000, "XY", count =T) +data <- sim_kmer(1e6, 1000, "XY", count =T) model_XY <- data %>% dplyr::select(count_m, count_f) %>% as.matrix() %>% @@ -642,8 +633,8 @@ annotate_counts <- function(annotation, count, name) { group_by(specie) %>% nest(.key = "sex") %>% mutate(count = lapply(sex, function(files, data){ - files_m <- files %>% filter(sex == "male") %>% unnest(files) %>% pull(file) %>% as.vector() - files_f <- files %>% filter(sex == "female") %>% unnest(files) %>% pull(file) %>% as.vector() + files_m <- files %>% filter(sex == "male") %>% unnest(c(files)) %>% pull(file) %>% as.vector() + files_f <- files %>% filter(sex == "female") %>% unnest(c(files)) %>% pull(file) %>% as.vector() data %>% select(kmer) %>% mutate( @@ -655,8 +646,8 @@ annotate_counts <- function(annotation, count, name) { count_f = log1p(count_f) ) }, data = count)) %>% - unnest(sex) %>% - unnest(count) %>% + unnest(c(sex)) %>% + unnest(c(count)) %>% ungroup() } ```