diff --git a/src/bin/kmerclust.R b/src/bin/kmerclust.R index 582d80d72658960a7d1739f622f23d98efa8876f..29dba78da2b55b05b2cf562f88f8581c5fcad2b3 100644 --- a/src/bin/kmerclust.R +++ b/src/bin/kmerclust.R @@ -1,15 +1,6 @@ library(kmerclust) -library(ggplot2) +load(file = paste0(args[1], ".Rdata")) -args <- commandArgs(trailingOnly = TRUE) -print(args) +res <- compare_models(count, nboot = args[2] * 2, bootsize = nrow(count), core = as.numeric(args[2])) -data <- readr::read_tsv(args[2], show_col_types = FALSE) -format(object.size(data), units = "Mb") -annotation <- parse_annotation(args[1]) -count <- annotate_counts(annotation, data, args[3]) - -res <- compare_models(count %>% dplyr::ungroup(), nboot = 100, bootsize = 0.1, core = as.numeric(args[4])) - -save(count, file = paste0(args[3], ".Rdata")) -save(res, file = paste0(args[3], "_boot.Rdata")) +save(res, file = paste0(args[1], "_boot_", args[3], ".Rdata")) diff --git a/src/bin/kmerclust_load.R b/src/bin/kmerclust_load.R new file mode 100644 index 0000000000000000000000000000000000000000..f6c4b1de243fa7c759e9f5902ad5ce4bd7c68b13 --- /dev/null +++ b/src/bin/kmerclust_load.R @@ -0,0 +1,10 @@ +library(kmerclust) + +args <- commandArgs(trailingOnly = TRUE) +print(args) + +data <- readr::read_tsv(args[2], show_col_types = FALSE) +format(object.size(data), units = "Mb") +annotation <- parse_annotation(args[1]) +count <- annotate_counts(annotation, data, args[3]) %>% dplyr::ungroup() +save(count, file = paste0(args[3], ".Rdata")) diff --git a/src/bin/kmerclust_merge.R b/src/bin/kmerclust_merge.R new file mode 100644 index 0000000000000000000000000000000000000000..c5e30f80969887edc512b4ba668771d81ffbfffc --- /dev/null +++ b/src/bin/kmerclust_merge.R @@ -0,0 +1,10 @@ +library(kmerclust) + +res_files <- list.files(pattern = "Rdata$") +res_total <- NULL +for (res_file in res_files) { + load(res_file) + res_total <- rbind(res_total, res) +} +res <- res_total +save(res, file = paste0(args[1], "_boot", ".Rdata")) diff --git a/src/bin/kmerclust_plot.R b/src/bin/kmerclust_plot.R index 57d6408ce00ba8b603fb19faf58fedf28f6fc497..42d1aab5720639d0b9d557aea20b267cdef1b6ad 100644 --- a/src/bin/kmerclust_plot.R +++ b/src/bin/kmerclust_plot.R @@ -3,20 +3,20 @@ library(ggplot2) args <- commandArgs(trailingOnly = TRUE) print(args) -load(file = paste0(args[3], ".Rdata")) -load(file = paste0(args[3], "_boot.Rdata")) +load(file = paste0(args[1], ".Rdata")) +load(file = paste0(args[1], "_boot.Rdata")) res %>% ggplot(aes(x = name, y = BIC)) + geom_violin() %>% theme_bw() -ggsave(paste0(args[3], "_BIC.pdf")) +ggsave(paste0(args[1], "_BIC.pdf")) res %>% ggplot(aes(x = name, y = WSS_f / BSS)) + geom_violin() %>% theme_bw() -ggsave(paste0(args[3], "_WSS_BSS.pdf")) +ggsave(paste0(args[1], "_WSS_BSS.pdf")) count %>% ggplot(aes(x = count_m, y = count_f)) %>% theme_bw() -ggsave(paste0(args[3], "_scatter.pdf")) +ggsave(paste0(args[1], "_scatter.pdf")) diff --git a/src/main.nf b/src/main.nf index d747c7a750d8231be39fda38bbe167789b7fe26a..638a2e2c9aa58de239c5cdebeffa3dc383081f4e 100644 --- a/src/main.nf +++ b/src/main.nf @@ -1,16 +1,17 @@ nextflow.enable.dsl=2 +nextflow.preview.recursion=true /* Testing pipeline for marseq scRNASeq analysis */ params.kmer_size = 12 +params.bootstrap = 10 include { SPLIT } from "./modules/split" include { FASTKMERS } from "./modules/fastkmers" include { MERGEKMER } from "./modules/mergekmer" include { COLLATEKMER } from "./modules/mergekmer" -include { KMERCLUST } from "./modules/kmerclust" -include { KMERCLUST_PLOT } from "./modules/kmerclust" +include { KMERCLUST_LOAD, KMERCLUST, KMERCLUST_PLOT, KMERCLUST_MERGE } from "./modules/kmerclust" Channel.fromPath( file(params.csv) ) @@ -30,6 +31,8 @@ workflow { FASTKMERS(SPLIT.out.fastq.transpose()) MERGEKMER(FASTKMERS.out.csv.groupTuple()) COLLATEKMER(MERGEKMER.out.csv.map{it -> [it[0].specie, it[1]] }.groupTuple()) - KMERCLUST(COLLATEKMER.out.csv, params_csv.collect()) + KMERCLUST_LOAD(COLLATEKMER.out.csv, params_csv.collect()) + KMERCLUST(Channel.of(1..params.bootstrap).combine(KMERCLUST_LOAD.out.rdata)) + KMERCLUST_MERGE(KMERCLUST.out.rdata.map{it -> [it[1], it[2]]}.groupTuple()) KMERCLUST_PLOT(KMERCLUST.out.rdata) } diff --git a/src/modules/kmerclust.nf b/src/modules/kmerclust.nf index 7a7090fde5a1ee8a70bca0d8e1cb96c1e992c6dc..12485e16c24102170f7dd6d90f5746454de2e6f6 100644 --- a/src/modules/kmerclust.nf +++ b/src/modules/kmerclust.nf @@ -1,4 +1,4 @@ -process KMERCLUST { +process KMERCLUST_LOAD { tag "$specie" label 'big_mem_multi_cpus' @@ -16,7 +16,59 @@ process KMERCLUST { script: def args = task.ext.args ?: '' """ - Rscript ${projectDir}/bin/kmerclust.R ${params_csv} ${csv} ${specie} ${task.cpus} + Rscript ${projectDir}/bin/kmerclust_load.R ${params_csv} ${csv} ${specie} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + Rkmerclust: 0.0.1 + END_VERSIONS + """ +} + +process KMERCLUST { + tag "$specie" + label 'big_mem_multi_cpus' + + container "lbmc/kmerclust:0.0.2" + publishDir "results/${params.kmer_size}/${specie}/", mode: 'copy' + + input: + tuple val(id), val(specie), path(rdata) + + output: + tuple val(specie), path("*.Rdata"), emit: rdata + path "versions.yml" , emit: versions + + script: + def args = task.ext.args ?: '' + """ + Rscript ${projectDir}/bin/kmerclust.R ${specie} ${task.cpus} ${id} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + Rkmerclust: 0.0.1 + END_VERSIONS + """ +} + +process KMERCLUST_MERGE { + tag "$specie" + label 'big_mem_multi_cpus' + + container "lbmc/kmerclust:0.0.2" + publishDir "results/${params.kmer_size}/${specie}/", mode: 'copy' + + input: + tuple val(specie), path(rdata) + + output: + tuple val(specie), path("*.Rdata"), emit: rdata + path "versions.yml" , emit: versions + + script: + def args = task.ext.args ?: '' + """ + Rscript ${projectDir}/bin/kmerclust_merge.R ${specie} cat <<-END_VERSIONS > versions.yml "${task.process}":