From 6664472527dbd4b72d6013f397e710b8724087b3 Mon Sep 17 00:00:00 2001
From: Laurent Modolo <laurent.modolo@ens-lyon.fr>
Date: Sat, 5 Aug 2023 18:30:06 +0200
Subject: [PATCH] add clustering analysis in addition to boostrap analaysis

---
 src/bin/kmerclust.R      |  6 ------
 src/bin/kmerclust_boot.R | 13 ++++++++++++
 src/bin/kmerclust_plot.R | 11 +++++-----
 src/modules/kmerclust.nf | 44 ++++++++++++++++++++++++++++++++--------
 4 files changed, 53 insertions(+), 21 deletions(-)
 delete mode 100644 src/bin/kmerclust.R
 create mode 100644 src/bin/kmerclust_boot.R

diff --git a/src/bin/kmerclust.R b/src/bin/kmerclust.R
deleted file mode 100644
index 1f87616..0000000
--- a/src/bin/kmerclust.R
+++ /dev/null
@@ -1,6 +0,0 @@
-library(kmerclust)
-args <- commandArgs(trailingOnly = TRUE)
-print(args)
-load(file = paste0(args[1], ".Rdata"))
-res <- compare_models(count, nboot = as.numeric(args[2]) * 2, bootsize = nrow(count), core = as.numeric(args[2]))
-save(res, file = paste0(args[1], "_boot_", args[3], ".Rdata"))
diff --git a/src/bin/kmerclust_boot.R b/src/bin/kmerclust_boot.R
new file mode 100644
index 0000000..89a7007
--- /dev/null
+++ b/src/bin/kmerclust_boot.R
@@ -0,0 +1,13 @@
+library(kmerclust)
+args <- commandArgs(trailingOnly = TRUE)
+print(args)
+load(file = paste0(args[1], ".Rdata"))
+res <- count %>%
+  dplyr::select(count_m, count_f) %>%
+  as.matrix() %>%
+  compute_tpm() %>%
+  log() %>%
+  compare_models_constraint(count, nboot = as.numeric(args[2]) * 2, bootsize = nrow(count), core = as.numeric(args[2]))
+save(res, file = paste0(args[1], "_boot_", args[3], ".Rdata"))
+
+
diff --git a/src/bin/kmerclust_plot.R b/src/bin/kmerclust_plot.R
index 79969ee..5f50f46 100644
--- a/src/bin/kmerclust_plot.R
+++ b/src/bin/kmerclust_plot.R
@@ -12,16 +12,15 @@ res %>%
   geom_violin() +
   theme_bw()
 ggsave(paste0(args[1], "_BIC.pdf"))
-res %>%
-  ggplot(aes(x = name, y = WSS_f / BSS)) +
-  geom_violin() +
-  theme_bw()
-ggsave(paste0(args[1], "_WSS_BSS.pdf"))
-rm(res)
 
 load(file = paste0(args[1], ".Rdata"))
 count %>%
   sample_frac(0.1) %>%
+  mutate(
+    count_m = log1p(count_m),
+    count_f = log1p(count_f),
+  )
   ggplot(aes(x = count_m, y = count_f)) +
+  geom_point() +
   theme_bw()
 ggsave(paste0(args[1], "_scatter.pdf"))
diff --git a/src/modules/kmerclust.nf b/src/modules/kmerclust.nf
index b8426b3..61feec6 100644
--- a/src/modules/kmerclust.nf
+++ b/src/modules/kmerclust.nf
@@ -2,7 +2,7 @@ process KMERCLUST_LOAD {
     tag "$specie"
     label 'big_mem_mono_cpus'
 
-    container "lbmc/kmerclust:0.0.2"
+    container "lbmc/kmerclust:0.0.3"
 
     input:
     tuple val(specie), path(csv)
@@ -19,7 +19,33 @@ process KMERCLUST_LOAD {
 
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":
-        Rkmerclust: 0.0.1 
+        Rkmerclust: 0.0.3 
+    END_VERSIONS
+    """
+}
+
+process KMERCLUST_BOOT {
+    tag "$specie"
+    label 'big_mem_multi_cpus'
+
+    container "lbmc/kmerclust:0.0.3"
+    publishDir "results/${params.kmer_size}/${specie}/", mode: 'copy'
+
+    input:
+    tuple val(id), val(specie), path(rdata)
+
+    output:
+    tuple val(specie), path("*.Rdata"), emit: rdata 
+    path "versions.yml"           , emit: versions
+
+    script:
+    def args = task.ext.args ?: ''
+    """
+    Rscript ${projectDir}/bin/kmerclust_boot.R ${specie} ${task.cpus} ${id}
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        Rkmerclust: 0.0.3 
     END_VERSIONS
     """
 }
@@ -28,7 +54,7 @@ process KMERCLUST {
     tag "$specie"
     label 'big_mem_multi_cpus'
 
-    container "lbmc/kmerclust:0.0.2"
+    container "lbmc/kmerclust:0.0.3"
     publishDir "results/${params.kmer_size}/${specie}/", mode: 'copy'
 
     input:
@@ -41,11 +67,11 @@ process KMERCLUST {
     script:
     def args = task.ext.args ?: ''
     """
-    Rscript ${projectDir}/bin/kmerclust.R ${specie} ${task.cpus} ${id}
+    Rscript ${projectDir}/bin/kmerclust.R ${specie} ${task.cpus}
 
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":
-        Rkmerclust: 0.0.1 
+        Rkmerclust: 0.0.3 
     END_VERSIONS
     """
 }
@@ -54,7 +80,7 @@ process KMERCLUST_MERGE {
     tag "$specie"
     label 'big_mem_mono_cpus'
 
-    container "lbmc/kmerclust:0.0.2"
+    container "lbmc/kmerclust:0.0.3"
     publishDir "results/${params.kmer_size}/${specie}/", mode: 'copy'
 
     input:
@@ -71,7 +97,7 @@ process KMERCLUST_MERGE {
 
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":
-        Rkmerclust: 0.0.1 
+        Rkmerclust: 0.0.3 
     END_VERSIONS
     """
 }
@@ -80,7 +106,7 @@ process KMERCLUST_PLOT {
     tag "$specie"
     label 'big_mem_mono_cpus'
 
-    container "lbmc/kmerclust:0.0.2"
+    container "lbmc/kmerclust:0.0.3"
     publishDir "results/${params.kmer_size}/${specie}/", mode: 'copy'
 
     input:
@@ -97,7 +123,7 @@ process KMERCLUST_PLOT {
 
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":
-        Rkmerclust: 0.0.1 
+        Rkmerclust: 0.0.3 
     END_VERSIONS
     """
 }
-- 
GitLab