From f9cc40717b0173ab33eb65b1e9c0689714ff1633 Mon Sep 17 00:00:00 2001
From: Laurent Modolo <laurent.modolo@ens-lyon.fr>
Date: Mon, 5 Jun 2023 11:52:27 +0200
Subject: [PATCH] clustering: add LRT

---
 src/clustering.Rmd | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/src/clustering.Rmd b/src/clustering.Rmd
index 72a1e80..d6a2af5 100644
--- a/src/clustering.Rmd
+++ b/src/clustering.Rmd
@@ -300,7 +300,7 @@ data %>%
 ## For XY
 
 ```{r}
-data <- sim_kmer(1e3, 1000, "XY")
+data <- sim_kmer(1e2, 1000, "XY")
 model_XY <- data %>%
     dplyr::select(count_m, count_f) %>%
     as.matrix() %>% 
@@ -309,18 +309,13 @@ model_XO <- data %>%
     dplyr::select(count_m, count_f) %>%
     as.matrix() %>% 
     EM_clust(sex = "XO")
-model_XY$BIC
-model_XO$BIC
--2 * (model_XY$loglik - model_XO$loglik)
--2 * (model_XO$loglik - model_XY$loglik)
 pchisq(-2 * (model_XY$loglik - model_XO$loglik), 4)
-pchisq(-2 * (model_XO$loglik - model_XY$loglik), 4)
 ```
 
 ## For XO
 
 ```{r}
-data <- sim_kmer(1e3, 1000, "XO")
+data <- sim_kmer(1e2, 1000, "XO")
 model_XY <- data %>%
     dplyr::select(count_m, count_f) %>%
     as.matrix() %>% 
@@ -329,12 +324,17 @@ model_XO <- data %>%
     dplyr::select(count_m, count_f) %>%
     as.matrix() %>% 
     EM_clust(sex = "XO")
-model_XY$BIC
-model_XO$BIC
-- 2 * (model_XY$loglik - model_XO$loglik)
-- 2 * (model_XO$loglik - model_XY$loglik)
 pchisq(-2 * (model_XY$loglik - model_XO$loglik), 4)
-pchisq(-2 * (model_XO$loglik - model_XY$loglik), 4)
+```
+
+## Get Y k-mer
+
+```{r}
+data %>%
+    mutate(y_proba = model_XY$proba[,3]) %>% 
+    ggplot(aes(x = count_m, count_f, color = y_proba)) +
+    geom_point() +
+    theme_bw()
 ```
 
 ## With real data
-- 
GitLab