From f9cc40717b0173ab33eb65b1e9c0689714ff1633 Mon Sep 17 00:00:00 2001 From: Laurent Modolo <laurent.modolo@ens-lyon.fr> Date: Mon, 5 Jun 2023 11:52:27 +0200 Subject: [PATCH] clustering: add LRT --- src/clustering.Rmd | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/src/clustering.Rmd b/src/clustering.Rmd index 72a1e80..d6a2af5 100644 --- a/src/clustering.Rmd +++ b/src/clustering.Rmd @@ -300,7 +300,7 @@ data %>% ## For XY ```{r} -data <- sim_kmer(1e3, 1000, "XY") +data <- sim_kmer(1e2, 1000, "XY") model_XY <- data %>% dplyr::select(count_m, count_f) %>% as.matrix() %>% @@ -309,18 +309,13 @@ model_XO <- data %>% dplyr::select(count_m, count_f) %>% as.matrix() %>% EM_clust(sex = "XO") -model_XY$BIC -model_XO$BIC --2 * (model_XY$loglik - model_XO$loglik) --2 * (model_XO$loglik - model_XY$loglik) pchisq(-2 * (model_XY$loglik - model_XO$loglik), 4) -pchisq(-2 * (model_XO$loglik - model_XY$loglik), 4) ``` ## For XO ```{r} -data <- sim_kmer(1e3, 1000, "XO") +data <- sim_kmer(1e2, 1000, "XO") model_XY <- data %>% dplyr::select(count_m, count_f) %>% as.matrix() %>% @@ -329,12 +324,17 @@ model_XO <- data %>% dplyr::select(count_m, count_f) %>% as.matrix() %>% EM_clust(sex = "XO") -model_XY$BIC -model_XO$BIC -- 2 * (model_XY$loglik - model_XO$loglik) -- 2 * (model_XO$loglik - model_XY$loglik) pchisq(-2 * (model_XY$loglik - model_XO$loglik), 4) -pchisq(-2 * (model_XO$loglik - model_XY$loglik), 4) +``` + +## Get Y k-mer + +```{r} +data %>% + mutate(y_proba = model_XY$proba[,3]) %>% + ggplot(aes(x = count_m, count_f, color = y_proba)) + + geom_point() + + theme_bw() ``` ## With real data -- GitLab