From 6664472527dbd4b72d6013f397e710b8724087b3 Mon Sep 17 00:00:00 2001 From: Laurent Modolo <laurent.modolo@ens-lyon.fr> Date: Sat, 5 Aug 2023 18:30:06 +0200 Subject: [PATCH] add clustering analysis in addition to boostrap analaysis --- src/bin/kmerclust.R | 6 ------ src/bin/kmerclust_boot.R | 13 ++++++++++++ src/bin/kmerclust_plot.R | 11 +++++----- src/modules/kmerclust.nf | 44 ++++++++++++++++++++++++++++++++-------- 4 files changed, 53 insertions(+), 21 deletions(-) delete mode 100644 src/bin/kmerclust.R create mode 100644 src/bin/kmerclust_boot.R diff --git a/src/bin/kmerclust.R b/src/bin/kmerclust.R deleted file mode 100644 index 1f87616..0000000 --- a/src/bin/kmerclust.R +++ /dev/null @@ -1,6 +0,0 @@ -library(kmerclust) -args <- commandArgs(trailingOnly = TRUE) -print(args) -load(file = paste0(args[1], ".Rdata")) -res <- compare_models(count, nboot = as.numeric(args[2]) * 2, bootsize = nrow(count), core = as.numeric(args[2])) -save(res, file = paste0(args[1], "_boot_", args[3], ".Rdata")) diff --git a/src/bin/kmerclust_boot.R b/src/bin/kmerclust_boot.R new file mode 100644 index 0000000..89a7007 --- /dev/null +++ b/src/bin/kmerclust_boot.R @@ -0,0 +1,13 @@ +library(kmerclust) +args <- commandArgs(trailingOnly = TRUE) +print(args) +load(file = paste0(args[1], ".Rdata")) +res <- count %>% + dplyr::select(count_m, count_f) %>% + as.matrix() %>% + compute_tpm() %>% + log() %>% + compare_models_constraint(count, nboot = as.numeric(args[2]) * 2, bootsize = nrow(count), core = as.numeric(args[2])) +save(res, file = paste0(args[1], "_boot_", args[3], ".Rdata")) + + diff --git a/src/bin/kmerclust_plot.R b/src/bin/kmerclust_plot.R index 79969ee..5f50f46 100644 --- a/src/bin/kmerclust_plot.R +++ b/src/bin/kmerclust_plot.R @@ -12,16 +12,15 @@ res %>% geom_violin() + theme_bw() ggsave(paste0(args[1], "_BIC.pdf")) -res %>% - ggplot(aes(x = name, y = WSS_f / BSS)) + - geom_violin() + - theme_bw() -ggsave(paste0(args[1], "_WSS_BSS.pdf")) -rm(res) load(file = paste0(args[1], ".Rdata")) count %>% sample_frac(0.1) %>% + mutate( + count_m = log1p(count_m), + count_f = log1p(count_f), + ) ggplot(aes(x = count_m, y = count_f)) + + geom_point() + theme_bw() ggsave(paste0(args[1], "_scatter.pdf")) diff --git a/src/modules/kmerclust.nf b/src/modules/kmerclust.nf index b8426b3..61feec6 100644 --- a/src/modules/kmerclust.nf +++ b/src/modules/kmerclust.nf @@ -2,7 +2,7 @@ process KMERCLUST_LOAD { tag "$specie" label 'big_mem_mono_cpus' - container "lbmc/kmerclust:0.0.2" + container "lbmc/kmerclust:0.0.3" input: tuple val(specie), path(csv) @@ -19,7 +19,33 @@ process KMERCLUST_LOAD { cat <<-END_VERSIONS > versions.yml "${task.process}": - Rkmerclust: 0.0.1 + Rkmerclust: 0.0.3 + END_VERSIONS + """ +} + +process KMERCLUST_BOOT { + tag "$specie" + label 'big_mem_multi_cpus' + + container "lbmc/kmerclust:0.0.3" + publishDir "results/${params.kmer_size}/${specie}/", mode: 'copy' + + input: + tuple val(id), val(specie), path(rdata) + + output: + tuple val(specie), path("*.Rdata"), emit: rdata + path "versions.yml" , emit: versions + + script: + def args = task.ext.args ?: '' + """ + Rscript ${projectDir}/bin/kmerclust_boot.R ${specie} ${task.cpus} ${id} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + Rkmerclust: 0.0.3 END_VERSIONS """ } @@ -28,7 +54,7 @@ process KMERCLUST { tag "$specie" label 'big_mem_multi_cpus' - container "lbmc/kmerclust:0.0.2" + container "lbmc/kmerclust:0.0.3" publishDir "results/${params.kmer_size}/${specie}/", mode: 'copy' input: @@ -41,11 +67,11 @@ process KMERCLUST { script: def args = task.ext.args ?: '' """ - Rscript ${projectDir}/bin/kmerclust.R ${specie} ${task.cpus} ${id} + Rscript ${projectDir}/bin/kmerclust.R ${specie} ${task.cpus} cat <<-END_VERSIONS > versions.yml "${task.process}": - Rkmerclust: 0.0.1 + Rkmerclust: 0.0.3 END_VERSIONS """ } @@ -54,7 +80,7 @@ process KMERCLUST_MERGE { tag "$specie" label 'big_mem_mono_cpus' - container "lbmc/kmerclust:0.0.2" + container "lbmc/kmerclust:0.0.3" publishDir "results/${params.kmer_size}/${specie}/", mode: 'copy' input: @@ -71,7 +97,7 @@ process KMERCLUST_MERGE { cat <<-END_VERSIONS > versions.yml "${task.process}": - Rkmerclust: 0.0.1 + Rkmerclust: 0.0.3 END_VERSIONS """ } @@ -80,7 +106,7 @@ process KMERCLUST_PLOT { tag "$specie" label 'big_mem_mono_cpus' - container "lbmc/kmerclust:0.0.2" + container "lbmc/kmerclust:0.0.3" publishDir "results/${params.kmer_size}/${specie}/", mode: 'copy' input: @@ -97,7 +123,7 @@ process KMERCLUST_PLOT { cat <<-END_VERSIONS > versions.yml "${task.process}": - Rkmerclust: 0.0.1 + Rkmerclust: 0.0.3 END_VERSIONS """ } -- GitLab