Skip to content
Snippets Groups Projects
Verified Commit 765b7e36 authored by Laurent Modolo's avatar Laurent Modolo
Browse files

split kmerclust into multiple processes

parent 4f33f1e3
Branches
No related tags found
No related merge requests found
library(kmerclust)
library(ggplot2)
load(file = paste0(args[1], ".Rdata"))
args <- commandArgs(trailingOnly = TRUE)
print(args)
res <- compare_models(count, nboot = args[2] * 2, bootsize = nrow(count), core = as.numeric(args[2]))
data <- readr::read_tsv(args[2], show_col_types = FALSE)
format(object.size(data), units = "Mb")
annotation <- parse_annotation(args[1])
count <- annotate_counts(annotation, data, args[3])
res <- compare_models(count %>% dplyr::ungroup(), nboot = 100, bootsize = 0.1, core = as.numeric(args[4]))
save(count, file = paste0(args[3], ".Rdata"))
save(res, file = paste0(args[3], "_boot.Rdata"))
save(res, file = paste0(args[1], "_boot_", args[3], ".Rdata"))
library(kmerclust)
args <- commandArgs(trailingOnly = TRUE)
print(args)
data <- readr::read_tsv(args[2], show_col_types = FALSE)
format(object.size(data), units = "Mb")
annotation <- parse_annotation(args[1])
count <- annotate_counts(annotation, data, args[3]) %>% dplyr::ungroup()
save(count, file = paste0(args[3], ".Rdata"))
library(kmerclust)
res_files <- list.files(pattern = "Rdata$")
res_total <- NULL
for (res_file in res_files) {
load(res_file)
res_total <- rbind(res_total, res)
}
res <- res_total
save(res, file = paste0(args[1], "_boot", ".Rdata"))
......@@ -3,20 +3,20 @@ library(ggplot2)
args <- commandArgs(trailingOnly = TRUE)
print(args)
load(file = paste0(args[3], ".Rdata"))
load(file = paste0(args[3], "_boot.Rdata"))
load(file = paste0(args[1], ".Rdata"))
load(file = paste0(args[1], "_boot.Rdata"))
res %>%
ggplot(aes(x = name, y = BIC)) +
geom_violin() %>%
theme_bw()
ggsave(paste0(args[3], "_BIC.pdf"))
ggsave(paste0(args[1], "_BIC.pdf"))
res %>%
ggplot(aes(x = name, y = WSS_f / BSS)) +
geom_violin() %>%
theme_bw()
ggsave(paste0(args[3], "_WSS_BSS.pdf"))
ggsave(paste0(args[1], "_WSS_BSS.pdf"))
count %>%
ggplot(aes(x = count_m, y = count_f)) %>%
theme_bw()
ggsave(paste0(args[3], "_scatter.pdf"))
ggsave(paste0(args[1], "_scatter.pdf"))
nextflow.enable.dsl=2
nextflow.preview.recursion=true
/*
Testing pipeline for marseq scRNASeq analysis
*/
params.kmer_size = 12
params.bootstrap = 10
include { SPLIT } from "./modules/split"
include { FASTKMERS } from "./modules/fastkmers"
include { MERGEKMER } from "./modules/mergekmer"
include { COLLATEKMER } from "./modules/mergekmer"
include { KMERCLUST } from "./modules/kmerclust"
include { KMERCLUST_PLOT } from "./modules/kmerclust"
include { KMERCLUST_LOAD, KMERCLUST, KMERCLUST_PLOT, KMERCLUST_MERGE } from "./modules/kmerclust"
Channel.fromPath( file(params.csv) )
......@@ -30,6 +31,8 @@ workflow {
FASTKMERS(SPLIT.out.fastq.transpose())
MERGEKMER(FASTKMERS.out.csv.groupTuple())
COLLATEKMER(MERGEKMER.out.csv.map{it -> [it[0].specie, it[1]] }.groupTuple())
KMERCLUST(COLLATEKMER.out.csv, params_csv.collect())
KMERCLUST_LOAD(COLLATEKMER.out.csv, params_csv.collect())
KMERCLUST(Channel.of(1..params.bootstrap).combine(KMERCLUST_LOAD.out.rdata))
KMERCLUST_MERGE(KMERCLUST.out.rdata.map{it -> [it[1], it[2]]}.groupTuple())
KMERCLUST_PLOT(KMERCLUST.out.rdata)
}
process KMERCLUST {
process KMERCLUST_LOAD {
tag "$specie"
label 'big_mem_multi_cpus'
......@@ -16,7 +16,59 @@ process KMERCLUST {
script:
def args = task.ext.args ?: ''
"""
Rscript ${projectDir}/bin/kmerclust.R ${params_csv} ${csv} ${specie} ${task.cpus}
Rscript ${projectDir}/bin/kmerclust_load.R ${params_csv} ${csv} ${specie}
cat <<-END_VERSIONS > versions.yml
"${task.process}":
Rkmerclust: 0.0.1
END_VERSIONS
"""
}
process KMERCLUST {
tag "$specie"
label 'big_mem_multi_cpus'
container "lbmc/kmerclust:0.0.2"
publishDir "results/${params.kmer_size}/${specie}/", mode: 'copy'
input:
tuple val(id), val(specie), path(rdata)
output:
tuple val(specie), path("*.Rdata"), emit: rdata
path "versions.yml" , emit: versions
script:
def args = task.ext.args ?: ''
"""
Rscript ${projectDir}/bin/kmerclust.R ${specie} ${task.cpus} ${id}
cat <<-END_VERSIONS > versions.yml
"${task.process}":
Rkmerclust: 0.0.1
END_VERSIONS
"""
}
process KMERCLUST_MERGE {
tag "$specie"
label 'big_mem_multi_cpus'
container "lbmc/kmerclust:0.0.2"
publishDir "results/${params.kmer_size}/${specie}/", mode: 'copy'
input:
tuple val(specie), path(rdata)
output:
tuple val(specie), path("*.Rdata"), emit: rdata
path "versions.yml" , emit: versions
script:
def args = task.ext.args ?: ''
"""
Rscript ${projectDir}/bin/kmerclust_merge.R ${specie}
cat <<-END_VERSIONS > versions.yml
"${task.process}":
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment