From 8afe54bb8ef84bc16cbb4a43bec4e0fde92ccc52 Mon Sep 17 00:00:00 2001 From: Laurent Modolo <laurent.modolo@ens-lyon.fr> Date: Wed, 14 Apr 2021 18:16:18 +0200 Subject: [PATCH] nf_modules: update module from g2gtools to multiqc to match the CONTRIBUTING.md --- src/nf_modules/fastqc/main.nf | 67 ++++------------- src/nf_modules/g2gtools/main.nf | 71 ++++++++++++++++-- src/nf_modules/gatk3/main.nf | 126 +++++++++++++++++++++++++++++--- src/nf_modules/gatk4/main.nf | 116 +++++++++++++++++++++++++++-- src/nf_modules/macs2/main.nf | 21 +++--- src/nf_modules/macs3/main.nf | 16 +++- src/nf_modules/minimap2/main.nf | 47 +++++++----- src/nf_modules/multiqc/main.nf | 7 +- src/solution_RNASeq.nf | 6 +- 9 files changed, 367 insertions(+), 110 deletions(-) diff --git a/src/nf_modules/fastqc/main.nf b/src/nf_modules/fastqc/main.nf index 40cef922..4501b026 100644 --- a/src/nf_modules/fastqc/main.nf +++ b/src/nf_modules/fastqc/main.nf @@ -2,65 +2,30 @@ version = "0.11.5" container_url = "lbmc/fastqc:${version}" params.fastqc_fastq = "" +params.fastqc_fastq_out = "" process fastqc_fastq { - container = "${container_url}" - label "big_mem_mono_cpus" - tag "$pair_id" - - input: - tuple val(pair_id), path(reads) - - output: - path "*.{zip,html}", emit: report - - script: -if (reads instanceof List) -""" -fastqc --quiet --threads ${task.cpus} --format fastq --outdir ./ \ - ${params.fastqc_fastq} \ - ${reads[0]} ${reads[1]} -""" -else -""" - fastqc --quiet --threads ${task.cpus} --format fastq --outdir ./ ${reads} -""" -} - -params.fastqc_fastq_pairedend = "" -process fastqc_fastq_pairedend { - container = "${container_url}" - label "big_mem_mono_cpus" - tag "$pair_id" - - input: - tuple val(pair_id), path(reads) - - output: - path "*.{zip,html}", emit: report - - script: -""" -fastqc --quiet --threads ${task.cpus} --format fastq --outdir ./ \ - ${params.fastqc_fastq_pairedend} \ - ${reads[0]} ${reads[1]} -""" -} - -params.fastqc_fastq_singleend = "" -process fastqc_fastq_singleend { container = "${container_url}" label "big_mem_mono_cpus" tag "$file_id" + if (params.fastqc_fastq_out != "") { + publishDir "results/${params.fastqc_fastq_out}", mode: 'copy' + } input: tuple val(file_id), path(reads) output: - path "*.{zip,html}", emit: report + tuple val(file_id), path("*.{zip,html}"), emit: report script: -""" - fastqc --quiet --threads ${task.cpus} ${params.fastqc_fastq_singleend} --format fastq --outdir ./ ${reads} -""" -} - + if (reads.size() == 2) + """ + fastqc --quiet --threads ${task.cpus} --format fastq --outdir ./ \ + ${params.fastqc_fastq} \ + ${reads[0]} ${reads[1]} + """ + else if (reads.size() == 1) + """ + fastqc --quiet --threads ${task.cpus} --format fastq --outdir ./ ${params.fastqc_fastq} ${reads[0]} + """ +} \ No newline at end of file diff --git a/src/nf_modules/g2gtools/main.nf b/src/nf_modules/g2gtools/main.nf index c00dc500..15af5885 100644 --- a/src/nf_modules/g2gtools/main.nf +++ b/src/nf_modules/g2gtools/main.nf @@ -2,10 +2,14 @@ version = "0.2.8" container_url = "lbmc/g2gtools:${version}" params.vci_build = "" +params.vci_build_out = "" process vci_build { container = "${container_url}" label "big_mem_multi_cpus" tag "$file_id" + if (params.vci_build_out != "") { + publishDir "results/${params.vci_build_out}", mode: 'copy' + } input: tuple val(file_id), path(vcf) @@ -14,6 +18,12 @@ process vci_build { tuple val(file_id), path("*.vci.gz"), path("*.vci.gz.tbi"), emit: vci tuple val(file_id), path("*_report.txt"), emit: report script: + if (file_id instanceof List){ + file_prefix = file_id[0] + } else { + file_prefix = file_id + } + input_vcf = "" for (vcf_file in vcf) { input_vcf += " -i ${vcf_file}" @@ -24,83 +34,114 @@ g2gtools vcf2vci \ -p ${task.cpus} \ -f ${fasta} \ ${input_vcf} \ - -s ${file_id} \ - -o ${file_id}.vci 2> ${file_id}_g2gtools_vcf2vci_report.txt + -s ${file_prefix} \ + -o ${file_prefix}.vci 2> ${file_prefix}_g2gtools_vcf2vci_report.txt """ } params.incorporate_snp = "" +params.incorporate_snp_out = "" process incorporate_snp { container = "${container_url}" label "big_mem_multi_cpus" tag "$file_id" + if (params.incorporate_snp_out != "") { + publishDir "results/${params.incorporate_snp_out}", mode: 'copy' + } input: tuple val(file_id), path(vci), path(tbi) tuple val(ref_id), path(fasta) output: - tuple val(file_id), path("${file_id}_snp.fa"), path("${vci}"), path("${tbi}"), emit: fasta + tuple val(file_id), path("${file_prefix}_snp.fa"), path("${vci}"), path("${tbi}"), emit: fasta tuple val(file_id), path("*_report.txt"), emit: report script: + if (file_id instanceof List){ + file_prefix = file_id[0] + } else { + file_prefix = file_id + } """ g2gtools patch \ ${params.incorporate_snp} \ -p ${task.cpus} \ -i ${fasta} \ -c ${vci} \ - -o ${file_id}_snp.fa 2> ${file_id}_g2gtools_path_report.txt + -o ${file_prefix}_snp.fa 2> ${file_prefix}_g2gtools_path_report.txt """ } params.incorporate_indel = "" +params.incorporate_indel_out = "" process incorporate_indel { container = "${container_url}" label "big_mem_multi_cpus" tag "$file_id" + if (params.incorporate_indel_out != "") { + publishDir "results/${params.incorporate_indel_out}", mode: 'copy' + } input: tuple val(file_id), path(fasta), path(vci), path(tbi) output: - tuple val(file_id), path("${file_id}_snp_indel.fa"), path("${vci}"), path("${tbi}"), emit: fasta + tuple val(file_id), path("${file_prefix}_snp_indel.fa"), path("${vci}"), path("${tbi}"), emit: fasta tuple val(file_id), path("*_report.txt"), emit: report script: + if (file_id instanceof List){ + file_prefix = file_id[0] + } else { + file_prefix = file_id + } """ g2gtools transform \ ${params.incorporate_indel} \ -p ${task.cpus} \ -i ${fasta} \ -c ${vci} \ - -o ${file_id}_snp_indel.fa 2> ${file_id}_g2gtools_transform_report.txt + -o ${file_prefix}_snp_indel.fa 2> ${file_prefix}_g2gtools_transform_report.txt """ } params.convert_gtf = "" +params.convert_gtf_out = "" process convert_gtf { container = "${container_url}" label "big_mem_mono_cpus" tag "$file_id" + if (params.convert_gtf_out != "") { + publishDir "results/${params.convert_gtf_out}", mode: 'copy' + } input: tuple val(file_id), path(vci), path(tbi) tuple val(annot_id), path(gtf) output: - tuple val(file_id), path("${file_id}.gtf"), emit: gtf + tuple val(file_id), path("${file_prefix}.gtf"), emit: gtf tuple val(file_id), path("*_report.txt"), emit: report script: + if (file_id instanceof List){ + file_prefix = file_id[0] + } else { + file_prefix = file_id + } """ g2gtools convert \ ${params.convert_gtf} \ -i ${gtf} \ -c ${vci} \ - -o ${file_id}.gtf 2> ${file_id}_g2gtools_convert_report.txt + -o ${file_prefix}.gtf 2> ${file_prefix}_g2gtools_convert_report.txt """ } params.convert_bed = "" +params.convert_bed_out = "" process convert_bed { container = "${container_url}" label "big_mem_mono_cpus" tag "$file_id" + if (params.convert_bed_out != "") { + publishDir "results/${params.convert_bed_out}", mode: 'copy' + } input: tuple val(file_id), path(vci), path(tbi) @@ -109,6 +150,11 @@ process convert_bed { tuple val(file_id), path("${file_id}.bed"), emit: bed tuple val(file_id), path("*_report.txt"), emit: report script: + if (file_id instanceof List){ + file_prefix = file_id[0] + } else { + file_prefix = file_id + } """ g2gtools convert \ ${params.convert_bed} \ @@ -119,10 +165,14 @@ g2gtools convert \ } params.convert_bam = "" +params.convert_bam_out = "" process convert_bam { container = "${container_url}" label "big_mem_mono_cpus" tag "${bam_id} ${file_id}" + if (params.convert_bam_out != "") { + publishDir "results/${params.convert_bam_out}", mode: 'copy' + } input: tuple val(file_id), path(vci), path(tbi) @@ -131,6 +181,11 @@ process convert_bam { tuple val(file_id), path("${file_id}_${bam_id.baseName}.bam"), emit: bam tuple val(file_id), path("*_report.txt"), emit: report script: + if (file_id instanceof List){ + file_prefix = file_id[0] + } else { + file_prefix = file_id + } """ g2gtools convert \ ${params.convert_bam} \ diff --git a/src/nf_modules/gatk3/main.nf b/src/nf_modules/gatk3/main.nf index 2348afe2..cedf9446 100644 --- a/src/nf_modules/gatk3/main.nf +++ b/src/nf_modules/gatk3/main.nf @@ -2,10 +2,14 @@ version = "3.8.0" container_url = "lbmc/gatk:${version}" params.variant_calling = "" +params.variant_calling_out = "" process variant_calling { container = "${container_url}" label "big_mem_multi_cpus" tag "$file_id" + if (params.variant_calling_out != "") { + publishDir "results/${params.variant_calling_out}", mode: 'copy' + } input: tuple val(file_id), path(bam), path(bai) @@ -14,21 +18,30 @@ process variant_calling { tuple val(file_id), path("*.vcf"), emit: vcf script: + if (file_id instanceof List){ + file_prefix = file_id[0] + } else { + file_prefix = file_id + } """ gatk3 -T HaplotypeCaller \ -nct ${task.cpus} \ ${params.variant_calling} \ -R ${fasta} \ -I ${bam} \ - -o ${file_id}.vcf + -o ${file_prefix}.vcf """ } params.filter_snp = "" +params.filter_snp_out = "" process filter_snp { container = "${container_url}" label "big_mem_multi_cpus" tag "$file_id" + if (params.filter_snp_out != "") { + publishDir "results/${params.filter_snp_out}", mode: 'copy' + } input: tuple val(file_id), path(vcf) @@ -36,6 +49,11 @@ process filter_snp { output: tuple val(file_id), path("*_snp.vcf"), emit: vcf script: + if (file_id instanceof List){ + file_prefix = file_id[0] + } else { + file_prefix = file_id + } """ gatk3 -T SelectVariants \ -nct ${task.cpus} \ @@ -43,15 +61,19 @@ gatk3 -T SelectVariants \ -R ${fasta} \ -V ${vcf} \ -selectType SNP \ - -o ${file_id}_snp.vcf + -o ${file_prefix}_snp.vcf """ } params.filter_indels = "" +params.filter_indels_out = "" process filter_indels { container = "${container_url}" label "big_mem_multi_cpus" tag "$file_id" + if (params.filter_indels_out != "") { + publishDir "results/${params.filter_indels_out}", mode: 'copy' + } input: tuple val(file_id), path(vcf) @@ -59,6 +81,11 @@ process filter_indels { output: tuple val(file_id), path("*_indel.vcf"), emit: vcf script: + if (file_id instanceof List){ + file_prefix = file_id[0] + } else { + file_prefix = file_id + } """ gatk3 -T SelectVariants \ -nct ${task.cpus} \ @@ -66,16 +93,20 @@ gatk3 -T SelectVariants \ -R ${fasta} \ -V ${vcf} \ -selectType INDEL \ - -o ${file_id}_indel.vcf + -o ${file_prefix}_indel.vcf """ } high_confidence_snp_filter = "(QD < 2.0) || (FS > 60.0) || (MQ < 40.0) || (MQRankSum < -12.5) || (ReadPosRankSum < -8.0) || (SOR > 4.0)" params.high_confidence_snp = "--filterExpression \"${high_confidence_snp_filter}\" --filterName \"basic_snp_filter\"" +params.high_confidence_snp_out = "" process high_confidence_snp { container = "${container_url}" label "big_mem_multi_cpus" tag "$file_id" + if (params.high_confidence_snp_out != "") { + publishDir "results/${params.high_confidence_snp_out}", mode: 'copy' + } input: tuple val(file_id), path(vcf) @@ -83,22 +114,31 @@ process high_confidence_snp { output: tuple val(file_id), path("*_snp.vcf"), emit: vcf script: + if (file_id instanceof List){ + file_prefix = file_id[0] + } else { + file_prefix = file_id + } """ gatk3 -T VariantFiltration \ -nct ${task.cpus} \ -R ${fasta} \ -V ${vcf} \ ${params.high_confidence_snp} \ - -o ${file_id}_filtered_snp.vcf + -o ${file_prefix}_filtered_snp.vcf """ } high_confidence_indel_filter = "QD < 3.0 || FS > 200.0 || ReadPosRankSum < -20.0 || SOR > 10.0" params.high_confidence_indels = "--filterExpression \"${high_confidence_indel_filter}\" --filterName \"basic_indel_filter\"" +params.high_confidence_indels_out = "" process high_confidence_indels { container = "${container_url}" label "big_mem_multi_cpus" tag "$file_id" + if (params.high_confidence_indels_out != "") { + publishDir "results/${params.high_confidence_indels_out}", mode: 'copy' + } input: tuple val(file_id), path(vcf) @@ -106,21 +146,30 @@ process high_confidence_indels { output: tuple val(file_id), path("*_indel.vcf"), emit: vcf script: + if (file_id instanceof List){ + file_prefix = file_id[0] + } else { + file_prefix = file_id + } """ gatk3 -T VariantFiltration \ -nct ${task.cpus} \ -R ${fasta} \ -V ${vcf} \ ${params.high_confidence_indels} \ - -o ${file_id}_filtered_indel.vcf + -o ${file_prefix}_filtered_indel.vcf """ } params.recalibrate_snp_table = "" +params.recalibrate_snp_table_out = "" process recalibrate_snp_table { container = "${container_url}" label "big_mem_multi_cpus" tag "$file_id" + if (params.recalibrate_snp_table_out != "") { + publishDir "results/${params.recalibrate_snp_table_out}", mode: 'copy' + } input: tuple val(file_id), path(snp_file), path(indel_file), path(bam), path(bam_idx) @@ -141,10 +190,14 @@ gatk3 -T BaseRecalibrator \ } params.recalibrate_snp = "" +params.recalibrate_snp_out = "" process recalibrate_snp { container = "${container_url}" label "big_mem_multi_cpus" tag "$file_id" + if (params.recalibrate_snp_out != "") { + publishDir "results/${params.recalibrate_snp_out}", mode: 'copy' + } input: tuple val(file_id), path(snp_file), path(indel_file), path(bam), path(bam_idx) @@ -153,6 +206,11 @@ process recalibrate_snp { output: tuple val(file_id), path("*.bam"), emit: bam script: + if (file_id instanceof List){ + file_prefix = file_id[0] + } else { + file_prefix = file_id + } """ gatk3 -T PrintReads \ --use_jdk_deflater \ @@ -162,15 +220,19 @@ gatk3 -T PrintReads \ -R ${fasta} \ -I ${bam} \ -BQSR recal_data_table \ - -o ${file_id}_recal.bam + -o ${file_prefix}_recal.bam """ } params.haplotype_caller = "" +params.haplotype_caller_out = "" process haplotype_caller { container = "${container_url}" label "big_mem_multi_cpus" tag "$file_id" + if (params.haplotype_caller_out != "") { + publishDir "results/${params.haplotype_caller_out}", mode: 'copy' + } input: tuple val(file_id), path(bam) @@ -178,6 +240,11 @@ process haplotype_caller { output: tuple val(file_id), path("*.gvcf"), emit: gvcf script: + if (file_id instanceof List){ + file_prefix = file_id[0] + } else { + file_prefix = file_id + } """ gatk3 -T HaplotypeCaller \ -nct ${task.cpus} \ @@ -186,15 +253,19 @@ gatk3 -T HaplotypeCaller \ -I ${bam} \ -ERC GVCF \ -variant_index_type LINEAR -variant_index_parameter 128000 \ - -o ${file_id}.gvcf + -o ${file_prefix}.gvcf """ } params.gvcf_genotyping = "" +params.gvcf_genotyping_out = "" process gvcf_genotyping { container = "${container_url}" label "big_mem_multi_cpus" tag "$file_id" + if (params.gvcf_genotyping_out != "") { + publishDir "results/${params.gvcf_genotyping_out}", mode: 'copy' + } input: tuple val(file_id), path(gvcf) @@ -202,21 +273,30 @@ process gvcf_genotyping { output: tuple val(file_id), path("*.vcf"), emit: vcf script: + if (file_id instanceof List){ + file_prefix = file_id[0] + } else { + file_prefix = file_id + } """ gatk3 -T GenotypeGVCFs \ -nct ${task.cpus} \ ${params.gvcf_genotyping} \ -R ${fasta} \ -V ${gvcf} \ - -o ${file_id}_joint.vcf + -o ${file_prefix}_joint.vcf """ } params.select_variants_snp = "" +params.select_variants_snp_out = "" process select_variants_snp { container = "${container_url}" label "big_mem_multi_cpus" tag "$file_id" + if (params.select_variants_snp_out != "") { + publishDir "results/${params.select_variants_snp_out}", mode: 'copy' + } input: tuple val(file_id), path(vcf) @@ -224,6 +304,11 @@ process select_variants_snp { output: tuple val(file_id), path("*_joint_snp.vcf"), emit: vcf script: + if (file_id instanceof List){ + file_prefix = file_id[0] + } else { + file_prefix = file_id + } """ gatk3 -T SelectVariants \ -nct ${task.cpus} \ @@ -231,15 +316,19 @@ gatk3 -T SelectVariants \ -R ${fasta} \ -V ${vcf} \ -selectType SNP \ - -o ${file_id}_joint_snp.vcf + -o ${file_prefix}_joint_snp.vcf """ } params.select_variants_indels = "" +params.select_variants_indels_out = "" process select_variants_indels { container = "${container_url}" label "big_mem_multi_cpus" tag "$file_id" + if (params.select_variants_indels_out != "") { + publishDir "results/${params.select_variants_indels_out}", mode: 'copy' + } input: tuple val(file_id), path(vcf) @@ -247,6 +336,11 @@ process select_variants_indels { output: tuple val(file_id), path("*_joint_indel.vcf"), emit: vcf script: + if (file_id instanceof List){ + file_prefix = file_id[0] + } else { + file_prefix = file_id + } """ gatk3 -T SelectVariants \ -nct ${task.cpus} \ @@ -254,15 +348,19 @@ gatk3 -T SelectVariants \ -R ${fasta} \ -V ${vcf} \ -selectType INDEL \ - -o ${file_id}_joint_indel.vcf + -o ${file_prefix}_joint_indel.vcf """ } params.personalized_genome = "" +params.personalized_genome_out = "" process personalized_genome { container = "${container_url}" label "big_mem_mono_cpus" tag "$file_id" + if (params.personalized_genome_out != "") { + publishDir "results/${params.personalized_genome_out}", mode: 'copy' + } input: tuple val(file_id), path(vcf) @@ -271,13 +369,17 @@ process personalized_genome { tuple val(file_id), path("*_genome.fasta"), emit: fasta script: - library = pick_library(file_id, library_list) + if (file_id instanceof List){ + file_prefix = file_id[0] + } else { + file_prefix = file_id + } """ gatk3 -T FastaAlternateReferenceMaker\ ${params.personalized_genome} \ -R ${reference} \ -V ${vcf} \ - -o ${library}_genome.fasta + -o ${file_prefix}_genome.fasta """ } diff --git a/src/nf_modules/gatk4/main.nf b/src/nf_modules/gatk4/main.nf index 053151dc..bf7c0f24 100644 --- a/src/nf_modules/gatk4/main.nf +++ b/src/nf_modules/gatk4/main.nf @@ -2,11 +2,14 @@ version = "4.2.0.0" container_url = "broadinstitute/gatk:${version}" params.variant_calling = "" - +params.variant_calling_out = "" process variant_calling { container = "${container_url}" label "big_mem_mono_cpus" tag "$file_id" + if (params.variant_calling_out != "") { + publishDir "results/${params.variant_calling_out}", mode: 'copy' + } input: tuple val(file_id), path(bam), path(bai) @@ -16,6 +19,11 @@ process variant_calling { script: xmx_memory = "${task.memory}" - ~/\s*GB/ + if (file_id instanceof List){ + file_prefix = file_id[0] + } else { + file_prefix = file_id + } """ gatk --java-options "-Xmx${xmx_memory}G" HaplotypeCaller \ ${params.variant_calling} \ @@ -26,10 +34,14 @@ gatk --java-options "-Xmx${xmx_memory}G" HaplotypeCaller \ } params.filter_snp = "" +params.filter_snp_out = "" process filter_snp { container = "${container_url}" label "big_mem_mono_cpus" tag "$file_id" + if (params.filter_snp_out != "") { + publishDir "results/${params.filter_snp_out}", mode: 'copy' + } input: tuple val(file_id), path(vcf) @@ -38,6 +50,11 @@ process filter_snp { tuple val(file_id), path("*_snp.vcf"), emit: vcf script: xmx_memory = "${task.memory}" - ~/\s*GB/ + if (file_id instanceof List){ + file_prefix = file_id[0] + } else { + file_prefix = file_id + } """ gatk --java-options "-Xmx${xmx_memory}G" SelectVariants \ ${params.filter_snp} \ @@ -49,10 +66,14 @@ gatk --java-options "-Xmx${xmx_memory}G" SelectVariants \ } params.filter_indels = "" +params.filter_indels_out = "" process filter_indels { container = "${container_url}" label "big_mem_mono_cpus" tag "$file_id" + if (params.filter_indels_out != "") { + publishDir "results/${params.filter_indels_out}", mode: 'copy' + } input: tuple val(file_id), path(vcf) @@ -61,6 +82,11 @@ process filter_indels { tuple val(file_id), path("*_indel.vcf"), emit: vcf script: xmx_memory = "${task.memory}" - ~/\s*GB/ + if (file_id instanceof List){ + file_prefix = file_id[0] + } else { + file_prefix = file_id + } """ gatk --java-options "-Xmx${xmx_memory}G" SelectVariants \ ${params.filter_indels} \ @@ -71,13 +97,16 @@ gatk --java-options "-Xmx${xmx_memory}G" SelectVariants \ """ } -high_confidence_snp_filter = "(QD < 2.0) || (FS > 60.0) || (MQ < 40.0) || (MQRankSum < -12.5) || (ReadPosRankSum < -8.0) || (SOR > 4.0)" - -params.high_confidence_snp_filter = "" +params.high_confidence_snp_filter = "(QD < 2.0) || (FS > 60.0) || (MQ < 40.0) || (MQRankSum < -12.5) || (ReadPosRankSum < -8.0) || (SOR > 4.0)" +params.high_confidence_snp = "" +params.high_confidence_snp_out = "" process high_confidence_snp { container = "${container_url}" label "big_mem_mono_cpus" tag "$file_id" + if (params.high_confidence_snp_out != "") { + publishDir "results/${params.high_confidence_snp_out}", mode: 'copy' + } input: tuple val(file_id), path(vcf) @@ -86,6 +115,11 @@ process high_confidence_snp { tuple val(file_id), path("*_snp.vcf"), emit: vcf script: xmx_memory = "${task.memory}" - ~/\s*GB/ + if (file_id instanceof List){ + file_prefix = file_id[0] + } else { + file_prefix = file_id + } """ gatk --java-options "-Xmx${xmx_memory}G" VariantFiltration \ -R ${fasta} \ @@ -100,10 +134,14 @@ gatk --java-options "-Xmx${xmx_memory}G" VariantFiltration \ high_confidence_indel_filter = "QD < 3.0 || FS > 200.0 || ReadPosRankSum < -20.0 || SOR > 10.0" params.high_confidence_indels = "" +params.high_confidence_indels_out = "" process high_confidence_indels { container = "${container_url}" label "big_mem_mono_cpus" tag "$file_id" + if (params.high_confidence_indels_out != "") { + publishDir "results/${params.high_confidence_indels_out}", mode: 'copy' + } input: tuple val(file_id), path(vcf) @@ -112,6 +150,11 @@ process high_confidence_indels { tuple val(file_id), path("*_indel.vcf"), emit: vcf script: xmx_memory = "${task.memory}" - ~/\s*GB/ + if (file_id instanceof List){ + file_prefix = file_id[0] + } else { + file_prefix = file_id + } """ gatk --java-options "-Xmx${xmx_memory}G" VariantFiltration \ -R ${fasta} \ @@ -124,10 +167,14 @@ gatk --java-options "-Xmx${xmx_memory}G" VariantFiltration \ } params.recalibrate_snp_table = "" +params.recalibrate_snp_table_out = "" process recalibrate_snp_table { container = "${container_url}" label "big_mem_mono_cpus" tag "$file_id" + if (params.recalibrate_snp_table_out != "") { + publishDir "results/${params.recalibrate_snp_table_out}", mode: 'copy' + } input: tuple val(file_id), path(snp_file), path(indel_file), path(bam), path(bam_idx) @@ -136,6 +183,11 @@ process recalibrate_snp_table { tuple val(file_id), path("recal_data_table"), emit: recal_table script: xmx_memory = "${task.memory}" - ~/\s*GB/ + if (file_id instanceof List){ + file_prefix = file_id[0] + } else { + file_prefix = file_id + } """ gatk --java-options "-Xmx${xmx_memory}G" IndexFeatureFile \ -I ${snp_file} @@ -152,10 +204,14 @@ gatk --java-options "-Xmx${xmx_memory}G" BaseRecalibrator \ } params.recalibrate_snp = "" +params.recalibrate_snp_out = "" process recalibrate_snp { container = "${container_url}" label "big_mem_mono_cpus" tag "$file_id" + if (params.recalibrate_snp_out != "") { + publishDir "results/${params.recalibrate_snp_out}", mode: 'copy' + } input: tuple val(file_id), path(snp_file), path(indel_file), path(bam), path(bam_idx), path(recal_table) @@ -164,6 +220,11 @@ process recalibrate_snp { tuple val(file_id), path("*.bam"), emit: bam script: xmx_memory = "${task.memory}" - ~/\s*GB/ + if (file_id instanceof List){ + file_prefix = file_id[0] + } else { + file_prefix = file_id + } """ gatk --java-options "-Xmx${xmx_memory}G" ApplyBQSR \ ${params.recalibrate_snp} \ @@ -175,10 +236,14 @@ gatk --java-options "-Xmx${xmx_memory}G" ApplyBQSR \ } params.haplotype_caller = "" +params.haplotype_caller_out = "" process haplotype_caller { container = "${container_url}" label "big_mem_mono_cpus" tag "$file_id" + if (params.haplotype_caller_out != "") { + publishDir "results/${params.haplotype_caller_out}", mode: 'copy' + } input: tuple val(file_id), path(bam) @@ -187,6 +252,11 @@ process haplotype_caller { tuple val(file_id), path("*.gvcf"), emit: gvcf script: xmx_memory = "${task.memory}" - ~/\s*GB/ + if (file_id instanceof List){ + file_prefix = file_id[0] + } else { + file_prefix = file_id + } """ gatk --java-options "-Xmx${xmx_memory}G" HaplotypeCaller \ ${params.haplotype_caller} \ @@ -198,10 +268,14 @@ gatk --java-options "-Xmx${xmx_memory}G" HaplotypeCaller \ } params.gvcf_genotyping = "" +params.gvcf_genotyping_out = "" process gvcf_genotyping { container = "${container_url}" label "big_mem_mono_cpus" tag "$file_id" + if (params.gvcf_genotyping_out != "") { + publishDir "results/${params.gvcf_genotyping_out}", mode: 'copy' + } input: tuple val(file_id), path(gvcf) @@ -210,6 +284,11 @@ process gvcf_genotyping { tuple val(file_id), path("*.vcf.gz"), emit: vcf script: xmx_memory = "${task.memory}" - ~/\s*GB/ + if (file_id instanceof List){ + file_prefix = file_id[0] + } else { + file_prefix = file_id + } """ gatk --java-options "-Xmx${xmx_memory}G" GenotypeGVCFs \ ${params.gvcf_genotyping} \ @@ -220,10 +299,14 @@ gatk --java-options "-Xmx${xmx_memory}G" GenotypeGVCFs \ } params.select_variants_snp = "" +params.select_variants_snp_out = "" process select_variants_snp { container = "${container_url}" label "big_mem_mono_cpus" tag "$file_id" + if (params.select_variants_snp_out != "") { + publishDir "results/${params.select_variants_snp_out}", mode: 'copy' + } input: tuple val(file_id), path(vcf) @@ -232,6 +315,11 @@ process select_variants_snp { tuple val(file_id), path("*_joint_snp.vcf"), emit: vcf script: xmx_memory = "${task.memory}" - ~/\s*GB/ + if (file_id instanceof List){ + file_prefix = file_id[0] + } else { + file_prefix = file_id + } """ gatk --java-options "-Xmx${xmx_memory}GG" SelectVariants \ ${params.select_variants_snp} \ @@ -243,10 +331,14 @@ gatk --java-options "-Xmx${xmx_memory}GG" SelectVariants \ } params.select_variants_indels = "" +params.select_variants_indels_out = "" process select_variants_indels { container = "${container_url}" label "big_mem_mono_cpus" tag "$file_id" + if (params.select_variants_indels_out != "") { + publishDir "results/${params.select_variants_indels_out}", mode: 'copy' + } input: tuple val(file_id), path(vcf) @@ -255,21 +347,30 @@ process select_variants_indels { tuple val(file_id), path("*_joint_indel.vcf"), emit: vcf script: xmx_memory = "${task.memory}" - ~/\s*GB/ + if (file_id instanceof List){ + file_prefix = file_id[0] + } else { + file_prefix = file_id + } """ gatk --java-options "-Xmx${xmx_memory}G" SelectVariants \ ${params.select_variants_indels} \ -R ${fasta} \ -V ${vcf} \ -select-type INDEL \ - -O ${file_id}_joint_indel.vcf + -O ${file_prefix}_joint_indel.vcf """ } params.personalized_genome = "" +params.personalized_genome_out = "" process personalized_genome { container = "${container_url}" label "big_mem_mono_cpus" tag "$file_id" + if (params.personalized_genome_out != "") { + publishDir "results/${params.personalized_genome_out}", mode: 'copy' + } input: tuple val(file_id), path(vcf) @@ -279,6 +380,11 @@ process personalized_genome { script: xmx_memory = "${task.memory}" - ~/\s*GB/ + if (file_id instanceof List){ + file_prefix = file_id[0] + } else { + file_prefix = file_id + } """ gatk --java-options "-Xmx${xmx_memory}G" FastaAlternateReferenceMaker\ ${params.personalized_genome} \ diff --git a/src/nf_modules/macs2/main.nf b/src/nf_modules/macs2/main.nf index b7e96442..7742fcf4 100644 --- a/src/nf_modules/macs2/main.nf +++ b/src/nf_modules/macs2/main.nf @@ -3,12 +3,15 @@ container_url = "lbmc/macs2:${version}" params.macs_gsize=3e9 params.macs_mfold="5 50" - -params.peak_calling = "" +params.peak_calling = "--mfold ${params.macs_mfold} --gsize ${params.macs_gsize}" +params.peak_calling_out = "" process peak_calling { container = "${container_url}" label "big_mem_mono_cpus" tag "${file_id}" + if (params.peak_calling_out != "") { + publishDir "results/${params.peak_calling_out}", mode: 'copy' + } input: tuple val(file_id), path(bam_ip), path(bam_control) @@ -27,9 +30,7 @@ macs2 callpeak \ --call-summits \ --control ${bam_control} \ --keep-dup all \ - --name ${bam_ip.simpleName} \ - --mfold ${params.macs_mfold} \ - --gsize ${params.macs_gsize} 2> \ + --name ${bam_ip.simpleName} 2> \ ${bam_ip.simpleName}_macs2_report.txt if grep -q "ERROR" ${bam_ip.simpleName}_macs2_report.txt; then @@ -39,11 +40,15 @@ fi """ } -params.peak_calling_bg = "" +params.peak_calling_bg = "--mfold ${params.macs_mfold} --gsize ${params.macs_gsize}" +params.peak_calling_bg_out = "" process peak_calling_bg { container = "${container_url}" label "big_mem_mono_cpus" tag "${file_id}" + if (params.peak_calling_bg_out != "") { + publishDir "results/${params.peak_calling_bg_out}", mode: 'copy' + } input: tuple val(file_id), path(bg_ip), path(bg_control) @@ -66,9 +71,7 @@ macs2 callpeak \ --call-summits \ --control ${bg_control.simpleName}.bed \ --keep-dup all \ - --name ${bg_ip.simpleName} \ - --mfold ${params.macs_mfold} \ - --gsize ${params.macs_gsize} 2> \ + --name ${bg_ip.simpleName} 2> \ ${bg_ip.simpleName}_macs2_report.txt if grep -q "ERROR" ${bg_ip.simpleName}_macs2_report.txt; then diff --git a/src/nf_modules/macs3/main.nf b/src/nf_modules/macs3/main.nf index c36140aa..b8c2dbce 100644 --- a/src/nf_modules/macs3/main.nf +++ b/src/nf_modules/macs3/main.nf @@ -2,12 +2,16 @@ version = "3.0.0a6" container_url = "lbmc/macs3:${version}" params.macs_gsize=3e9 -params.macs_mfold=[5, 50] - +params.macs_mfold="5 50" +params.peak_calling = "--mfold ${params.macs_mfold} --gsize ${params.macs_gsize}" +params.peak_calling_out = "" process peak_calling { container = "${container_url}" label "big_mem_mono_cpus" tag "${file_id}" + if (params.peak_calling_out != "") { + publishDir "results/${params.peak_calling_out}", mode: 'copy' + } input: tuple val(file_id), path(bam_ip), path(bam_control) @@ -24,7 +28,7 @@ macs3 callpeak \ --call-summits \ --control ${bam_control} \ --keep-dup all \ - --mfold params.macs_mfold[0] params.macs_mfold[1] + ${params.peak_calling} \ --name ${bam_ip.simpleName} \ --gsize ${params.macs_gsize} 2> \ ${bam_ip.simpleName}_macs3_report.txt @@ -36,10 +40,15 @@ fi """ } +params.peak_calling_bg = "--mfold ${params.macs_mfold} --gsize ${params.macs_gsize}" +params.peak_calling_bg_out = "" process peak_calling_bg { container = "${container_url}" label "big_mem_mono_cpus" tag "${file_id}" + if (params.peak_calling_bg_out != "") { + publishDir "results/${params.peak_calling_bg_out}", mode: 'copy' + } input: tuple val(file_id), path(bg_ip), path(bg_control) @@ -56,6 +65,7 @@ awk '{print \$1"\t"\$2"\t"\$3"\t.\t+\t"\$4}' ${bg_ip} > \ awk '{print \$1"\t"\$2"\t"\$3"\t.\t+\t"\$4}' ${bg_control} > \ ${bg_control.simpleName}.bed macs3 callpeak \ + ${params.peak_calling_bg} \ --treatment ${bg_ip.simpleName}.bed \ --call-summits \ --control ${bg_control.simpleName}.bed \ diff --git a/src/nf_modules/minimap2/main.nf b/src/nf_modules/minimap2/main.nf index 700856d2..d404ac79 100644 --- a/src/nf_modules/minimap2/main.nf +++ b/src/nf_modules/minimap2/main.nf @@ -2,16 +2,20 @@ version = "2.17" container_url = "lbmc/minimap2:${version}" params.index_fasta = "" +params.index_fasta_out = "" process index_fasta { container = "${container_url}" label "big_mem_multi_cpus" - tag "$fasta.baseName" + tag "$file_id" + if (params.index_fasta_out != "") { + publishDir "results/${params.index_fasta_out}", mode: 'copy' + } input: - path fasta + tuple val(file_id), path(fasta) output: - tuple path("${fasta}"), path("*.mmi*"), emit: index + tuple val(file_id), path("${fasta}"), path("*.mmi*"), emit: index path "*_report.txt", emit: report script: @@ -22,30 +26,39 @@ minimap2 ${params.index_fasta} -t ${task.cpus} -I ${memory}G -d ${fasta.baseName } params.mapping_fastq = "-ax sr" +params.mapping_fastq_out = "" process mapping_fastq { container = "${container_url}" label "big_mem_multi_cpus" - tag "$pair_id" + tag "$file_id" + if (params.mapping_fastq_out != "") { + publishDir "results/${params.mapping_fastq_out}", mode: 'copy' + } input: - tuple path(fasta), path(index) - tuple val(pair_id), path(reads) + tuple val(fasta_id), path(fasta), path(index) + tuple val(file_id), path(reads) output: - tuple val(pair_id), path("*.bam"), emit: bam + tuple val(file_id), path("*.bam"), emit: bam path "*_report.txt", emit: report script: + if (file_id instanceof List){ + file_prefix = file_id[0] + } else { + file_prefix = file_id + } memory = "${task.memory}" - ~/\s*GB/ memory = memory / (task.cpus + 1.0) -if (reads instanceof List) -""" -minimap2 ${params.mapping_fastq} -t ${task.cpus} -K ${memory} ${fasta} ${reads[0]} ${reads[1]} | - samtools view -Sb - > ${pair_id}.bam -""" -else -""" -minimap2 ${params.mapping_fastq} -t ${task.cpus} -K ${memory} ${fasta} ${reads} | - samtools view -Sb - > ${reads.baseName}.bam -""" + if (reads.size() == 2) + """ + minimap2 ${params.mapping_fastq} -t ${task.cpus} -K ${memory} ${fasta} ${reads[0]} ${reads[1]} | + samtools view -Sb - > ${pair_id}.bam + """ + else if (reads.size() == 1) + """ + minimap2 ${params.mapping_fastq} -t ${task.cpus} -K ${memory} ${fasta} ${reads} | + samtools view -Sb - > ${pair_id}.bam + """ } \ No newline at end of file diff --git a/src/nf_modules/multiqc/main.nf b/src/nf_modules/multiqc/main.nf index eaa0ced3..0ee0dc2e 100644 --- a/src/nf_modules/multiqc/main.nf +++ b/src/nf_modules/multiqc/main.nf @@ -2,13 +2,16 @@ version = "1.9" container_url = "lbmc/multiqc:${version}" params.multiqc = "" +params.multiqc_out = "" process multiqc { container = "${container_url}" label "big_mem_mono_cpus" - publishDir "results/QC/", mode: 'copy' + if (params.multiqc_out != "") { + publishDir "results/${params.multiqc_out}", mode: 'copy' + } input: - path report + path report output: path "*multiqc_*", emit: report diff --git a/src/solution_RNASeq.nf b/src/solution_RNASeq.nf index a16cdc10..0ef21235 100644 --- a/src/solution_RNASeq.nf +++ b/src/solution_RNASeq.nf @@ -8,17 +8,17 @@ log.info "fastq files : ${params.fastq}" log.info "fasta file : ${params.fasta}" log.info "bed file : ${params.bed}" -Channel +channel .fromPath( params.fasta ) .ifEmpty { error "Cannot find any fasta files matching: ${params.fasta}" } .map { it -> [it.simpleName, it]} .set { fasta_files } -Channel +channel .fromPath( params.bed ) .ifEmpty { error "Cannot find any bed files matching: ${params.bed}" } .map { it -> [it.simpleName, it]} .set { bed_files } -Channel +channel .fromFilePairs( params.fastq, size: -1) .ifEmpty { error "Cannot find any fastq files matching: ${params.fastq}" } .set { fastq_files } -- GitLab