diff --git a/src/clustering.Rmd b/src/clustering.Rmd index 72a1e80a75e525d7462740bf6a49dfad76a6f982..d6a2af52690eee6e441e6e416f2fd4e3d5587150 100644 --- a/src/clustering.Rmd +++ b/src/clustering.Rmd @@ -300,7 +300,7 @@ data %>% ## For XY ```{r} -data <- sim_kmer(1e3, 1000, "XY") +data <- sim_kmer(1e2, 1000, "XY") model_XY <- data %>% dplyr::select(count_m, count_f) %>% as.matrix() %>% @@ -309,18 +309,13 @@ model_XO <- data %>% dplyr::select(count_m, count_f) %>% as.matrix() %>% EM_clust(sex = "XO") -model_XY$BIC -model_XO$BIC --2 * (model_XY$loglik - model_XO$loglik) --2 * (model_XO$loglik - model_XY$loglik) pchisq(-2 * (model_XY$loglik - model_XO$loglik), 4) -pchisq(-2 * (model_XO$loglik - model_XY$loglik), 4) ``` ## For XO ```{r} -data <- sim_kmer(1e3, 1000, "XO") +data <- sim_kmer(1e2, 1000, "XO") model_XY <- data %>% dplyr::select(count_m, count_f) %>% as.matrix() %>% @@ -329,12 +324,17 @@ model_XO <- data %>% dplyr::select(count_m, count_f) %>% as.matrix() %>% EM_clust(sex = "XO") -model_XY$BIC -model_XO$BIC -- 2 * (model_XY$loglik - model_XO$loglik) -- 2 * (model_XO$loglik - model_XY$loglik) pchisq(-2 * (model_XY$loglik - model_XO$loglik), 4) -pchisq(-2 * (model_XO$loglik - model_XY$loglik), 4) +``` + +## Get Y k-mer + +```{r} +data %>% + mutate(y_proba = model_XY$proba[,3]) %>% + ggplot(aes(x = count_m, count_f, color = y_proba)) + + geom_point() + + theme_bw() ``` ## With real data