From 4a7355a22204e9c387da350c65a6a511d1aded8b Mon Sep 17 00:00:00 2001 From: Laurent Modolo <laurent.modolo@ens-lyon.fr> Date: Thu, 8 Apr 2021 18:26:50 +0200 Subject: [PATCH] nf_modules: add params.<process_names> for each process --- src/nf_modules/bedtools/main.nf | 16 +++++++++---- src/nf_modules/bowtie/main.nf | 10 +++++++- src/nf_modules/bowtie2/main.nf | 14 ++++++----- src/nf_modules/cutadapt/main.nf | 12 ++++++---- src/nf_modules/emase/main.nf | 27 ++++++++++++++++++++++ src/nf_modules/fastqc/main.nf | 7 +++++- src/nf_modules/g2gtools/main.nf | 12 ++++++++++ src/nf_modules/gatk3/main.nf | 30 +++++++++++++++++++----- src/nf_modules/gatk4/main.nf | 25 ++++++++++++++++++++ src/nf_modules/kallisto/main.nf | 41 ++++++++++++++++++++++++++++++--- src/nf_modules/macs2/main.nf | 4 ++++ src/nf_modules/minimap2/main.nf | 9 ++++---- src/nf_modules/multiqc/main.nf | 3 ++- src/nf_modules/picard/main.nf | 14 +++++++---- src/nf_modules/sambamba/main.nf | 12 ++++++---- src/nf_modules/ucsc/main.nf | 3 ++- 16 files changed, 197 insertions(+), 42 deletions(-) create mode 100644 src/nf_modules/emase/main.nf diff --git a/src/nf_modules/bedtools/main.nf b/src/nf_modules/bedtools/main.nf index 50a848e7..c8533656 100644 --- a/src/nf_modules/bedtools/main.nf +++ b/src/nf_modules/bedtools/main.nf @@ -1,6 +1,7 @@ version = "2.25.0" container_url = "lbmc/bedtools:${version}" +params.fasta_from_bed = "-name" process fasta_from_bed { container = "${container_url}" label "big_mem_mono_cpus" @@ -15,11 +16,12 @@ process fasta_from_bed { script: """ -bedtools getfasta -name \ +bedtools getfasta ${params.fasta_from_bed} \ -fi ${fasta} -bed ${bed} -fo ${bed.baseName}_extracted.fasta """ } +params.merge_bed = "" process merge_bed { container = "${container_url}" label "big_mem_mono_cpus" @@ -33,10 +35,11 @@ process merge_bed { script: """ -bedtools merge -i ${bed} > ${bed[0].simpleName}_merged.bed +bedtools merge ${params.merge_bed} -i ${bed} > ${bed[0].simpleName}_merged.bed """ } +params.bam_to_fastq_singleend = "" process bam_to_fastq_singleend { container = "${container_url}" label "big_mem_mono_cpus" @@ -51,10 +54,12 @@ process bam_to_fastq_singleend { script: """ bedtools bamtofastq \ --i ${bam} -fq ${bam.baseName}.fastq + ${params.bam_to_fastq_singleend} \ + -i ${bam} -fq ${bam.baseName}.fastq """ } +params.bam_to_fastq_pairedend = "" process bam_to_fastq_pairedend { container = "${container_url}" label "big_mem_mono_cpus" @@ -69,10 +74,12 @@ process bam_to_fastq_pairedend { script: """ bedtools bamtofastq \ --i ${bam} -fq ${bam.baseName}_R1.fastq -fq2 ${bam.baseName}_R2.fastq + ${params.bam_to_fastq_pairedend} \ + -i ${bam} -fq ${bam.baseName}_R1.fastq -fq2 ${bam.baseName}_R2.fastq """ } +params.bam_to_bedgraph = "" process bam_to_bedgraph { container = "${container_url}" label "big_mem_mono_cpus" @@ -87,6 +94,7 @@ process bam_to_bedgraph { script: """ bedtools genomecov \ + ${params.bam_to_bedgraph} \ -ibam ${bam} \ -bg > ${bam.simpleName}.bg """ diff --git a/src/nf_modules/bowtie/main.nf b/src/nf_modules/bowtie/main.nf index d250e21f..4d465f5d 100644 --- a/src/nf_modules/bowtie/main.nf +++ b/src/nf_modules/bowtie/main.nf @@ -1,6 +1,7 @@ version = "1.2.2" container_url = "lbmc/bowtie:${version}" +params.index_fasta = "" process index_fasta { container = "${container_url}" label "big_mem_multi_cpus" @@ -16,6 +17,7 @@ process index_fasta { script: """ bowtie-build --threads ${task.cpus} \ + ${params.index_fasta} \ -f ${fasta} ${fasta.baseName}.index &> \ ${fasta.baseName}_bowtie_index_report.txt @@ -25,6 +27,7 @@ fi """ } +params.mapping_fastq = "" process mapping_fastq { container = "${container_url}" label "big_mem_multi_cpus" @@ -50,6 +53,7 @@ if (reads instanceof List) # -v specify the max number of missmatch, -k the number of match reported per # reads bowtie --best -v 3 -k 1 --sam -p ${task.cpus} ${index_id} \ + ${params.mapping_fastq} \ -1 ${reads[0]} -2 ${reads[1]} 2> \ ${pair_id}_bowtie_report_tmp.txt | \ samtools view -Sb - > ${pair_id}.bam @@ -63,6 +67,7 @@ tail -n 19 ${pair_id}_bowtie_report_tmp.txt > \ else """ bowtie --best -v 3 -k 1 --sam -p ${task.cpus} ${index_id} \ + ${params.mapping_fastq} -q ${reads} 2> \ ${file_id}_bowtie_report_tmp.txt | \ samtools view -Sb - > ${file_id}.bam @@ -75,6 +80,7 @@ tail -n 19 ${file_id}_bowtie_report_tmp.txt > \ """ } +params.mapping_fastq_pairedend = "" process mapping_fastq_pairedend { container = "${container_url}" label "big_mem_multi_cpus" @@ -99,6 +105,7 @@ process mapping_fastq_pairedend { # -v specify the max number of missmatch, -k the number of match reported per # reads bowtie --best -v 3 -k 1 --sam -p ${task.cpus} ${index_id} \ + ${params.mapping_fastq_pairedend} \ -1 ${reads[0]} -2 ${reads[1]} 2> \ ${pair_id}_bowtie_report_tmp.txt | \ samtools view -Sb - > ${pair_id}.bam @@ -111,7 +118,7 @@ tail -n 19 ${pair_id}_bowtie_report_tmp.txt > \ """ } - +params.mapping_fastq_singleend = "" process mapping_fastq_singleend { container = "${container_url}" label "big_mem_multi_cpus" @@ -134,6 +141,7 @@ process mapping_fastq_singleend { } """ bowtie --best -v 3 -k 1 --sam -p ${task.cpus} ${index_id} \ + ${params.mapping_fastq_singleend} \ -q ${reads} 2> \ ${file_id}_bowtie_report_tmp.txt | \ samtools view -Sb - > ${file_id}.bam diff --git a/src/nf_modules/bowtie2/main.nf b/src/nf_modules/bowtie2/main.nf index 02d26635..a626d4f3 100644 --- a/src/nf_modules/bowtie2/main.nf +++ b/src/nf_modules/bowtie2/main.nf @@ -1,6 +1,7 @@ version = "2.3.4.1" container_url = "lbmc/bowtie2:${version}" +params.index_fasta = "" process index_fasta { container = "${container_url}" label "big_mem_multi_cpus" @@ -26,7 +27,7 @@ fi """ } - +params.mapping_fastq = "--very-sensitive" process mapping_fastq { container = "${container_url}" label "big_mem_multi_cpus" @@ -49,7 +50,7 @@ process mapping_fastq { } if (reads instanceof List) """ -bowtie2 --very-sensitive \ +bowtie2 ${params.mapping_fastq} \ -p ${task.cpus} \ -x ${index_id} \ -1 ${reads[0]} \ @@ -65,7 +66,7 @@ tail -n 19 ${pair_id}_bowtie2_mapping_report_tmp.txt > \ """ else """ -bowtie2 --very-sensitive \ +bowtie2 ${params.mapping_fastq} \ -p ${task.cpus} \ -x ${index_id} \ -U ${reads} 2> \ @@ -80,6 +81,7 @@ tail -n 19 ${reads.baseName}_bowtie2_mapping_report_tmp.txt > \ """ } +params.mapping_fastq_pairedend = "--very-sensitive" process mapping_fastq_pairedend { container = "${container_url}" label "big_mem_multi_cpus" @@ -101,7 +103,7 @@ process mapping_fastq_pairedend { } } """ -bowtie2 --very-sensitive \ +bowtie2 ${params.mapping_fastq_pairedend} \ -p ${task.cpus} \ -x ${index_id} \ -1 ${reads[0]} \ @@ -117,7 +119,7 @@ tail -n 19 ${pair_id}_bowtie2_mapping_report_tmp.txt > \ """ } - +params.mapping_fastq_singleend = "--very-sensitive" process mapping_fastq_singleend { container = "${container_url}" label "big_mem_multi_cpus" @@ -139,7 +141,7 @@ process mapping_fastq_singleend { } } """ -bowtie2 --very-sensitive \ +bowtie2 ${params.mapping_fastq_singleend} \ -p ${task.cpus} \ -x ${index_id} \ -U ${reads} 2> \ diff --git a/src/nf_modules/cutadapt/main.nf b/src/nf_modules/cutadapt/main.nf index 64649351..7b459f81 100644 --- a/src/nf_modules/cutadapt/main.nf +++ b/src/nf_modules/cutadapt/main.nf @@ -5,7 +5,7 @@ adapter_3_prim = "AGATCGGAAGAG" adapter_5_prim = "CTCTTCCGATCT" trim_quality = "20" - +params.adaptor_removal = "-a ${adapter_3_prim} -g ${adapter_5_prim} -A ${adapter_3_prim} -G ${adapter_5_prim}" process adaptor_removal { container = "${container_url}" label "big_mem_mono_cpus" @@ -21,18 +21,19 @@ process adaptor_removal { script: if (reads instanceof List) """ - cutadapt -a ${adapter_3_prim} -g ${adapter_5_prim} -A ${adapter_3_prim} -G ${adapter_5_prim} \ + cutadapt ${params.adaptor_removal} \ -o ${pair_id}_cut_R1.fastq.gz -p ${pair_id}_cut_R2.fastq.gz \ ${reads[0]} ${reads[1]} > ${pair_id}_report.txt """ else: """ - cutadapt -a ${adapter_3_prim} -g ${adapter_5_prim} \ + cutadapt ${params.adaptor_removal} \ -o ${file_id}_cut.fastq.gz \ ${reads} > ${file_id}_report.txt """ } +params.adaptor_removal_pairedend = "-a ${adapter_3_prim} -g ${adapter_5_prim} -A ${adapter_3_prim} -G ${adapter_5_prim}" process adaptor_removal_pairedend { container = "${container_url}" label "big_mem_mono_cpus" @@ -47,12 +48,13 @@ process adaptor_removal_pairedend { script: """ - cutadapt -a ${adapter_3_prim} -g ${adapter_5_prim} -A ${adapter_3_prim} -G ${adapter_5_prim} \ + cutadapt ${params.adaptor_removal_pairedend} \ -o ${pair_id}_cut_R1.fastq.gz -p ${pair_id}_cut_R2.fastq.gz \ ${reads[0]} ${reads[1]} > ${pair_id}_report.txt """ } +params.adaptor_removal_singleend = "-a ${adapter_3_prim} -g ${adapter_5_prim}" process adaptor_removal_singleend { container = "${container_url}" label "big_mem_mono_cpus" @@ -67,7 +69,7 @@ process adaptor_removal_singleend { script: """ - cutadapt -a ${adapter_3_prim} -g ${adapter_5_prim} \ + cutadapt ${params.adaptor_removal_singleend} \ -o ${file_id}_cut.fastq.gz \ ${reads} > ${file_id}_report.txt """ diff --git a/src/nf_modules/emase/main.nf b/src/nf_modules/emase/main.nf new file mode 100644 index 00000000..73ace1e8 --- /dev/null +++ b/src/nf_modules/emase/main.nf @@ -0,0 +1,27 @@ +version = "0.10.16" +container_url = "lbmc/emase:${version}" + +params.personalised_transcriptome = "" + +process personalised_transcriptome { + container = "${container_url}" + label "big_mem_mono_cpus" + tag "$file_id" + + input: + tuple val(file_id), path(fasta) + tuple val(gtf_id), path(gtf) + + output: + tuple val(file_id), path("${fasta.simpleName}.*"), emit: index + tuple val(file_id), path("*_bwa_report.txt"), emit: report + + script: +""" +prepare-emase ${personalised_transcriptome} -G ${REF_FASTA} -g ${REF_GTF} -o ${REF_DIR} -m --no-bowtie-index +// ${REF_DIR}/emase.transcriptome.fa +// ${REF_DIR}/emase.transcriptome.info +// ${REF_DIR}/emase.gene2transcripts.tsv +prepare-emase -G ${SAMPLE_DIR}/L.fa,${SAMPLE_DIR}/R.fa -s L,R -o ${SAMPLE_DIR} +""" +} \ No newline at end of file diff --git a/src/nf_modules/fastqc/main.nf b/src/nf_modules/fastqc/main.nf index 5e770297..40cef922 100644 --- a/src/nf_modules/fastqc/main.nf +++ b/src/nf_modules/fastqc/main.nf @@ -1,6 +1,7 @@ version = "0.11.5" container_url = "lbmc/fastqc:${version}" +params.fastqc_fastq = "" process fastqc_fastq { container = "${container_url}" label "big_mem_mono_cpus" @@ -16,6 +17,7 @@ process fastqc_fastq { if (reads instanceof List) """ fastqc --quiet --threads ${task.cpus} --format fastq --outdir ./ \ + ${params.fastqc_fastq} \ ${reads[0]} ${reads[1]} """ else @@ -24,6 +26,7 @@ else """ } +params.fastqc_fastq_pairedend = "" process fastqc_fastq_pairedend { container = "${container_url}" label "big_mem_mono_cpus" @@ -38,10 +41,12 @@ process fastqc_fastq_pairedend { script: """ fastqc --quiet --threads ${task.cpus} --format fastq --outdir ./ \ + ${params.fastqc_fastq_pairedend} \ ${reads[0]} ${reads[1]} """ } +params.fastqc_fastq_singleend = "" process fastqc_fastq_singleend { container = "${container_url}" label "big_mem_mono_cpus" @@ -55,7 +60,7 @@ process fastqc_fastq_singleend { script: """ - fastqc --quiet --threads ${task.cpus} --format fastq --outdir ./ ${reads} + fastqc --quiet --threads ${task.cpus} ${params.fastqc_fastq_singleend} --format fastq --outdir ./ ${reads} """ } diff --git a/src/nf_modules/g2gtools/main.nf b/src/nf_modules/g2gtools/main.nf index 18a05b64..c00dc500 100644 --- a/src/nf_modules/g2gtools/main.nf +++ b/src/nf_modules/g2gtools/main.nf @@ -1,6 +1,7 @@ version = "0.2.8" container_url = "lbmc/g2gtools:${version}" +params.vci_build = "" process vci_build { container = "${container_url}" label "big_mem_multi_cpus" @@ -19,6 +20,7 @@ process vci_build { } """ g2gtools vcf2vci \ + ${params.vci_build} \ -p ${task.cpus} \ -f ${fasta} \ ${input_vcf} \ @@ -27,6 +29,7 @@ g2gtools vcf2vci \ """ } +params.incorporate_snp = "" process incorporate_snp { container = "${container_url}" label "big_mem_multi_cpus" @@ -41,6 +44,7 @@ process incorporate_snp { script: """ g2gtools patch \ + ${params.incorporate_snp} \ -p ${task.cpus} \ -i ${fasta} \ -c ${vci} \ @@ -48,6 +52,7 @@ g2gtools patch \ """ } +params.incorporate_indel = "" process incorporate_indel { container = "${container_url}" label "big_mem_multi_cpus" @@ -61,6 +66,7 @@ process incorporate_indel { script: """ g2gtools transform \ + ${params.incorporate_indel} \ -p ${task.cpus} \ -i ${fasta} \ -c ${vci} \ @@ -68,6 +74,7 @@ g2gtools transform \ """ } +params.convert_gtf = "" process convert_gtf { container = "${container_url}" label "big_mem_mono_cpus" @@ -82,12 +89,14 @@ process convert_gtf { script: """ g2gtools convert \ + ${params.convert_gtf} \ -i ${gtf} \ -c ${vci} \ -o ${file_id}.gtf 2> ${file_id}_g2gtools_convert_report.txt """ } +params.convert_bed = "" process convert_bed { container = "${container_url}" label "big_mem_mono_cpus" @@ -102,12 +111,14 @@ process convert_bed { script: """ g2gtools convert \ + ${params.convert_bed} \ -i ${bed} \ -c ${vci} \ -o ${file_id}.bed 2> ${file_id}_g2gtools_convert_report.txt """ } +params.convert_bam = "" process convert_bam { container = "${container_url}" label "big_mem_mono_cpus" @@ -122,6 +133,7 @@ process convert_bam { script: """ g2gtools convert \ + ${params.convert_bam} \ -i ${bam} \ -c ${vci} \ -o ${file_id}_${bam.baseName}.bam 2> ${file_id}_g2gtools_convert_report.txt diff --git a/src/nf_modules/gatk3/main.nf b/src/nf_modules/gatk3/main.nf index cb3656f4..2348afe2 100644 --- a/src/nf_modules/gatk3/main.nf +++ b/src/nf_modules/gatk3/main.nf @@ -1,6 +1,7 @@ version = "3.8.0" container_url = "lbmc/gatk:${version}" +params.variant_calling = "" process variant_calling { container = "${container_url}" label "big_mem_multi_cpus" @@ -16,12 +17,14 @@ process variant_calling { """ gatk3 -T HaplotypeCaller \ -nct ${task.cpus} \ + ${params.variant_calling} \ -R ${fasta} \ -I ${bam} \ -o ${file_id}.vcf """ } +params.filter_snp = "" process filter_snp { container = "${container_url}" label "big_mem_multi_cpus" @@ -36,6 +39,7 @@ process filter_snp { """ gatk3 -T SelectVariants \ -nct ${task.cpus} \ + ${params.filter_snp} \ -R ${fasta} \ -V ${vcf} \ -selectType SNP \ @@ -43,6 +47,7 @@ gatk3 -T SelectVariants \ """ } +params.filter_indels = "" process filter_indels { container = "${container_url}" label "big_mem_multi_cpus" @@ -57,6 +62,7 @@ process filter_indels { """ gatk3 -T SelectVariants \ -nct ${task.cpus} \ + ${params.filter_indels} \ -R ${fasta} \ -V ${vcf} \ -selectType INDEL \ @@ -65,7 +71,7 @@ gatk3 -T SelectVariants \ } high_confidence_snp_filter = "(QD < 2.0) || (FS > 60.0) || (MQ < 40.0) || (MQRankSum < -12.5) || (ReadPosRankSum < -8.0) || (SOR > 4.0)" - +params.high_confidence_snp = "--filterExpression \"${high_confidence_snp_filter}\" --filterName \"basic_snp_filter\"" process high_confidence_snp { container = "${container_url}" label "big_mem_multi_cpus" @@ -82,14 +88,13 @@ gatk3 -T VariantFiltration \ -nct ${task.cpus} \ -R ${fasta} \ -V ${vcf} \ - --filterExpression "${high_confidence_snp_filter}" \ - --filterName "basic_snp_filter" \ + ${params.high_confidence_snp} \ -o ${file_id}_filtered_snp.vcf """ } high_confidence_indel_filter = "QD < 3.0 || FS > 200.0 || ReadPosRankSum < -20.0 || SOR > 10.0" - +params.high_confidence_indels = "--filterExpression \"${high_confidence_indel_filter}\" --filterName \"basic_indel_filter\"" process high_confidence_indels { container = "${container_url}" label "big_mem_multi_cpus" @@ -106,12 +111,12 @@ gatk3 -T VariantFiltration \ -nct ${task.cpus} \ -R ${fasta} \ -V ${vcf} \ - --filterExpression "${high_confidence_indel_filter}" \ - --filterName "basic_indel_filter" \ + ${params.high_confidence_indels} \ -o ${file_id}_filtered_indel.vcf """ } +params.recalibrate_snp_table = "" process recalibrate_snp_table { container = "${container_url}" label "big_mem_multi_cpus" @@ -126,6 +131,7 @@ process recalibrate_snp_table { """ gatk3 -T BaseRecalibrator \ -nct ${task.cpus} \ + ${recalibrate_snp_table} \ -R ${fasta} \ -I ${bam} \ -knownSites ${snp_file} \ @@ -134,6 +140,7 @@ gatk3 -T BaseRecalibrator \ """ } +params.recalibrate_snp = "" process recalibrate_snp { container = "${container_url}" label "big_mem_multi_cpus" @@ -150,6 +157,7 @@ process recalibrate_snp { gatk3 -T PrintReads \ --use_jdk_deflater \ --use_jdk_inflater \ + ${recalibrate_snp} \ -nct ${task.cpus} \ -R ${fasta} \ -I ${bam} \ @@ -158,6 +166,7 @@ gatk3 -T PrintReads \ """ } +params.haplotype_caller = "" process haplotype_caller { container = "${container_url}" label "big_mem_multi_cpus" @@ -172,6 +181,7 @@ process haplotype_caller { """ gatk3 -T HaplotypeCaller \ -nct ${task.cpus} \ + ${params.haplotype_caller} \ -R ${fasta} \ -I ${bam} \ -ERC GVCF \ @@ -180,6 +190,7 @@ gatk3 -T HaplotypeCaller \ """ } +params.gvcf_genotyping = "" process gvcf_genotyping { container = "${container_url}" label "big_mem_multi_cpus" @@ -194,12 +205,14 @@ process gvcf_genotyping { """ gatk3 -T GenotypeGVCFs \ -nct ${task.cpus} \ + ${params.gvcf_genotyping} \ -R ${fasta} \ -V ${gvcf} \ -o ${file_id}_joint.vcf """ } +params.select_variants_snp = "" process select_variants_snp { container = "${container_url}" label "big_mem_multi_cpus" @@ -214,6 +227,7 @@ process select_variants_snp { """ gatk3 -T SelectVariants \ -nct ${task.cpus} \ + ${params.select_variants_snp} \ -R ${fasta} \ -V ${vcf} \ -selectType SNP \ @@ -221,6 +235,7 @@ gatk3 -T SelectVariants \ """ } +params.select_variants_indels = "" process select_variants_indels { container = "${container_url}" label "big_mem_multi_cpus" @@ -235,6 +250,7 @@ process select_variants_indels { """ gatk3 -T SelectVariants \ -nct ${task.cpus} \ + ${params.select_variants_indels} \ -R ${fasta} \ -V ${vcf} \ -selectType INDEL \ @@ -242,6 +258,7 @@ gatk3 -T SelectVariants \ """ } +params.personalized_genome = "" process personalized_genome { container = "${container_url}" label "big_mem_mono_cpus" @@ -257,6 +274,7 @@ process personalized_genome { library = pick_library(file_id, library_list) """ gatk3 -T FastaAlternateReferenceMaker\ + ${params.personalized_genome} \ -R ${reference} \ -V ${vcf} \ -o ${library}_genome.fasta diff --git a/src/nf_modules/gatk4/main.nf b/src/nf_modules/gatk4/main.nf index 22efa0e0..053151dc 100644 --- a/src/nf_modules/gatk4/main.nf +++ b/src/nf_modules/gatk4/main.nf @@ -1,6 +1,8 @@ version = "4.2.0.0" container_url = "broadinstitute/gatk:${version}" +params.variant_calling = "" + process variant_calling { container = "${container_url}" label "big_mem_mono_cpus" @@ -16,12 +18,14 @@ process variant_calling { xmx_memory = "${task.memory}" - ~/\s*GB/ """ gatk --java-options "-Xmx${xmx_memory}G" HaplotypeCaller \ + ${params.variant_calling} \ -R ${fasta} \ -I ${bam} \ -O ${bam.simpleName}.vcf """ } +params.filter_snp = "" process filter_snp { container = "${container_url}" label "big_mem_mono_cpus" @@ -36,6 +40,7 @@ process filter_snp { xmx_memory = "${task.memory}" - ~/\s*GB/ """ gatk --java-options "-Xmx${xmx_memory}G" SelectVariants \ + ${params.filter_snp} \ -R ${fasta} \ -V ${vcf} \ -select-type SNP \ @@ -43,6 +48,7 @@ gatk --java-options "-Xmx${xmx_memory}G" SelectVariants \ """ } +params.filter_indels = "" process filter_indels { container = "${container_url}" label "big_mem_mono_cpus" @@ -57,6 +63,7 @@ process filter_indels { xmx_memory = "${task.memory}" - ~/\s*GB/ """ gatk --java-options "-Xmx${xmx_memory}G" SelectVariants \ + ${params.filter_indels} \ -R ${fasta} \ -V ${vcf} \ -select-type INDEL \ @@ -66,6 +73,7 @@ gatk --java-options "-Xmx${xmx_memory}G" SelectVariants \ high_confidence_snp_filter = "(QD < 2.0) || (FS > 60.0) || (MQ < 40.0) || (MQRankSum < -12.5) || (ReadPosRankSum < -8.0) || (SOR > 4.0)" +params.high_confidence_snp_filter = "" process high_confidence_snp { container = "${container_url}" label "big_mem_mono_cpus" @@ -82,6 +90,7 @@ process high_confidence_snp { gatk --java-options "-Xmx${xmx_memory}G" VariantFiltration \ -R ${fasta} \ -V ${vcf} \ + ${params.high_confidence_snp_filter} \ --filter-expression "${high_confidence_snp_filter}" \ --filter-name "basic_snp_filter" \ -O ${vcf.simpleName}_filtered_snp.vcf @@ -90,6 +99,7 @@ gatk --java-options "-Xmx${xmx_memory}G" VariantFiltration \ high_confidence_indel_filter = "QD < 3.0 || FS > 200.0 || ReadPosRankSum < -20.0 || SOR > 10.0" +params.high_confidence_indels = "" process high_confidence_indels { container = "${container_url}" label "big_mem_mono_cpus" @@ -106,12 +116,14 @@ process high_confidence_indels { gatk --java-options "-Xmx${xmx_memory}G" VariantFiltration \ -R ${fasta} \ -V ${vcf} \ + ${params.high_confidence_indels} \ --filter-expression "${high_confidence_indel_filter}" \ --filter-name "basic_indel_filter" \ -O ${vcf.simpleName}_filtered_indel.vcf """ } +params.recalibrate_snp_table = "" process recalibrate_snp_table { container = "${container_url}" label "big_mem_mono_cpus" @@ -130,6 +142,7 @@ gatk --java-options "-Xmx${xmx_memory}G" IndexFeatureFile \ gatk --java-options "-Xmx${xmx_memory}G" IndexFeatureFile \ -I ${indel_file} gatk --java-options "-Xmx${xmx_memory}G" BaseRecalibrator \ + ${params.recalibrate_snp_table} \ -R ${fasta} \ -I ${bam} \ -known-sites ${snp_file} \ @@ -138,6 +151,7 @@ gatk --java-options "-Xmx${xmx_memory}G" BaseRecalibrator \ """ } +params.recalibrate_snp = "" process recalibrate_snp { container = "${container_url}" label "big_mem_mono_cpus" @@ -152,6 +166,7 @@ process recalibrate_snp { xmx_memory = "${task.memory}" - ~/\s*GB/ """ gatk --java-options "-Xmx${xmx_memory}G" ApplyBQSR \ + ${params.recalibrate_snp} \ -R ${fasta} \ -I ${bam} \ --bqsr-recal-file recal_data_table \ @@ -159,6 +174,7 @@ gatk --java-options "-Xmx${xmx_memory}G" ApplyBQSR \ """ } +params.haplotype_caller = "" process haplotype_caller { container = "${container_url}" label "big_mem_mono_cpus" @@ -173,6 +189,7 @@ process haplotype_caller { xmx_memory = "${task.memory}" - ~/\s*GB/ """ gatk --java-options "-Xmx${xmx_memory}G" HaplotypeCaller \ + ${params.haplotype_caller} \ -R ${fasta} \ -I ${bam} \ -ERC GVCF \ @@ -180,6 +197,7 @@ gatk --java-options "-Xmx${xmx_memory}G" HaplotypeCaller \ """ } +params.gvcf_genotyping = "" process gvcf_genotyping { container = "${container_url}" label "big_mem_mono_cpus" @@ -194,12 +212,14 @@ process gvcf_genotyping { xmx_memory = "${task.memory}" - ~/\s*GB/ """ gatk --java-options "-Xmx${xmx_memory}G" GenotypeGVCFs \ + ${params.gvcf_genotyping} \ -R ${fasta} \ -V ${gvcf} \ -O ${gvcf.simpleName}_joint.vcf.gz """ } +params.select_variants_snp = "" process select_variants_snp { container = "${container_url}" label "big_mem_mono_cpus" @@ -214,6 +234,7 @@ process select_variants_snp { xmx_memory = "${task.memory}" - ~/\s*GB/ """ gatk --java-options "-Xmx${xmx_memory}GG" SelectVariants \ + ${params.select_variants_snp} \ -R ${fasta} \ -V ${vcf} \ -select-type SNP \ @@ -221,6 +242,7 @@ gatk --java-options "-Xmx${xmx_memory}GG" SelectVariants \ """ } +params.select_variants_indels = "" process select_variants_indels { container = "${container_url}" label "big_mem_mono_cpus" @@ -235,6 +257,7 @@ process select_variants_indels { xmx_memory = "${task.memory}" - ~/\s*GB/ """ gatk --java-options "-Xmx${xmx_memory}G" SelectVariants \ + ${params.select_variants_indels} \ -R ${fasta} \ -V ${vcf} \ -select-type INDEL \ @@ -242,6 +265,7 @@ gatk --java-options "-Xmx${xmx_memory}G" SelectVariants \ """ } +params.personalized_genome = "" process personalized_genome { container = "${container_url}" label "big_mem_mono_cpus" @@ -257,6 +281,7 @@ process personalized_genome { xmx_memory = "${task.memory}" - ~/\s*GB/ """ gatk --java-options "-Xmx${xmx_memory}G" FastaAlternateReferenceMaker\ + ${params.personalized_genome} \ -R ${reference} \ -V ${vcf} \ -O ${vcf.simpleName}_genome.fasta diff --git a/src/nf_modules/kallisto/main.nf b/src/nf_modules/kallisto/main.nf index bb80e4b3..0546d790 100644 --- a/src/nf_modules/kallisto/main.nf +++ b/src/nf_modules/kallisto/main.nf @@ -1,6 +1,7 @@ version = "0.44.0" container_url = "lbmc/kallisto:${version}" +params.index_fasta = "-k 31 --make-unique" process index_fasta { container = "${container_url}" label "big_mem_multi_cpus" @@ -15,12 +16,45 @@ process index_fasta { script: """ -kallisto index -k 31 --make-unique -i ${fasta.baseName}.index ${fasta} \ +kallisto index ${params.index_fasta} -i ${fasta.baseName}.index ${fasta} \ 2> ${fasta.baseName}_kallisto_index_report.txt """ } +params.mapping_fastq = "--bias --bootstrap-samples 100" +process mapping_fastq { + container = "${container_url}" + label "big_mem_multi_cpus" + tag "$pair_id" + + input: + path index + tuple val(pair_id), path(reads) + + output: + path "${pair_id}", emit: counts + path "*_report.txt", emit: report + + script: + +if (reads instanceof List) +""" +mkdir ${pair_id} +kallisto quant -i ${index} -t ${task.cpus} \ +${params.mapping_fastq} -o ${pair_id} \ +${reads[0]} ${reads[1]} &> ${pair_id}_kallisto_mapping_report.txt +""" +else: +""" +mkdir ${pair_id} +kallisto quant -i ${index} -t ${task.cpus} --single \ +${params.mapping_fastq} -o ${pair_id} \ +-l ${params.mean} -s ${params.sd} \ +${reads} &> ${reads.simpleName}_kallisto_mapping_report.txt +""" +} +params.mapping_fastq_pairedend = "--bias --bootstrap-samples 100" process mapping_fastq_pairedend { container = "${container_url}" label "big_mem_multi_cpus" @@ -38,12 +72,13 @@ process mapping_fastq_pairedend { """ mkdir ${pair_id} kallisto quant -i ${index} -t ${task.cpus} \ ---bias --bootstrap-samples 100 -o ${pair_id} \ +${params.mapping_fastq_pairedend} -o ${pair_id} \ ${reads[0]} ${reads[1]} &> ${pair_id}_kallisto_mapping_report.txt """ } +params.mapping_fastq_singleend = "--bias --bootstrap-samples 100" process mapping_fastq_singleend { container = "${container_url}" label "big_mem_multi_cpus" @@ -61,7 +96,7 @@ process mapping_fastq_singleend { """ mkdir ${file_id} kallisto quant -i ${index} -t ${task.cpus} --single \ ---bias --bootstrap-samples 100 -o ${file_id} \ +${params.mapping_fastq_singleend} -o ${file_id} \ -l ${params.mean} -s ${params.sd} \ ${reads} &> ${reads.simpleName}_kallisto_mapping_report.txt """ diff --git a/src/nf_modules/macs2/main.nf b/src/nf_modules/macs2/main.nf index a350fb43..b7e96442 100644 --- a/src/nf_modules/macs2/main.nf +++ b/src/nf_modules/macs2/main.nf @@ -4,6 +4,7 @@ container_url = "lbmc/macs2:${version}" params.macs_gsize=3e9 params.macs_mfold="5 50" +params.peak_calling = "" process peak_calling { container = "${container_url}" label "big_mem_mono_cpus" @@ -21,6 +22,7 @@ process peak_calling { /* remove --nomodel option for real dataset */ """ macs2 callpeak \ + ${params.peak_calling} \ --treatment ${bam_ip} \ --call-summits \ --control ${bam_control} \ @@ -37,6 +39,7 @@ fi """ } +params.peak_calling_bg = "" process peak_calling_bg { container = "${container_url}" label "big_mem_mono_cpus" @@ -58,6 +61,7 @@ awk '{print \$1"\t"\$2"\t"\$3"\t.\t+\t"\$4}' ${bg_ip} > \ awk '{print \$1"\t"\$2"\t"\$3"\t.\t+\t"\$4}' ${bg_control} > \ ${bg_control.simpleName}.bed macs2 callpeak \ + ${params.peak_calling_bg} \ --treatment ${bg_ip.simpleName}.bed \ --call-summits \ --control ${bg_control.simpleName}.bed \ diff --git a/src/nf_modules/minimap2/main.nf b/src/nf_modules/minimap2/main.nf index 73dc0e34..700856d2 100644 --- a/src/nf_modules/minimap2/main.nf +++ b/src/nf_modules/minimap2/main.nf @@ -1,6 +1,7 @@ version = "2.17" container_url = "lbmc/minimap2:${version}" +params.index_fasta = "" process index_fasta { container = "${container_url}" label "big_mem_multi_cpus" @@ -16,11 +17,11 @@ process index_fasta { script: memory = "${task.memory}" - ~/\s*GB/ """ -minimap2 -t ${task.cpus} -I ${memory}G -d ${fasta.baseName}.mmi ${fasta} +minimap2 ${params.index_fasta} -t ${task.cpus} -I ${memory}G -d ${fasta.baseName}.mmi ${fasta} """ } - +params.mapping_fastq = "-ax sr" process mapping_fastq { container = "${container_url}" label "big_mem_multi_cpus" @@ -39,12 +40,12 @@ process mapping_fastq { memory = memory / (task.cpus + 1.0) if (reads instanceof List) """ -minimap2 -ax sr -t ${task.cpus} -K ${memory} ${fasta} ${reads[0]} ${reads[1]} | +minimap2 ${params.mapping_fastq} -t ${task.cpus} -K ${memory} ${fasta} ${reads[0]} ${reads[1]} | samtools view -Sb - > ${pair_id}.bam """ else """ -minimap2 -ax sr -t ${task.cpus} -K ${memory} ${fasta} ${reads} | +minimap2 ${params.mapping_fastq} -t ${task.cpus} -K ${memory} ${fasta} ${reads} | samtools view -Sb - > ${reads.baseName}.bam """ } \ No newline at end of file diff --git a/src/nf_modules/multiqc/main.nf b/src/nf_modules/multiqc/main.nf index 64cecaac..eaa0ced3 100644 --- a/src/nf_modules/multiqc/main.nf +++ b/src/nf_modules/multiqc/main.nf @@ -1,6 +1,7 @@ version = "1.9" container_url = "lbmc/multiqc:${version}" +params.multiqc = "" process multiqc { container = "${container_url}" label "big_mem_mono_cpus" @@ -14,6 +15,6 @@ process multiqc { script: """ -multiqc -f . +multiqc ${params.multiqc} -f . """ } diff --git a/src/nf_modules/picard/main.nf b/src/nf_modules/picard/main.nf index aa24096c..ec8572ac 100644 --- a/src/nf_modules/picard/main.nf +++ b/src/nf_modules/picard/main.nf @@ -1,6 +1,7 @@ version = "2.18.11" container_url = "lbmc/picard:${version}" +params.mark_duplicate = "VALIDATION_STRINGENCY=LENIENT REMOVE_DUPLICATES=true" process mark_duplicate { container = "${container_url}" label "big_mem_mono_cpus" @@ -16,8 +17,7 @@ process mark_duplicate { script: """ PicardCommandLine MarkDuplicates \ - VALIDATION_STRINGENCY=LENIENT \ - REMOVE_DUPLICATES=true \ + ${params.mark_duplicate} \ INPUT=${bam} \ OUTPUT=${bam.baseName}_dedup.bam \ METRICS_FILE=${bam.baseName}_picard_dedup_report.txt &> \ @@ -25,6 +25,7 @@ PicardCommandLine MarkDuplicates \ """ } +params.index_fasta = "" process index_fasta { container = "${container_url}" label "big_mem_mono_cpus" @@ -38,11 +39,13 @@ process index_fasta { script: """ PicardCommandLine CreateSequenceDictionary \ -REFERENCE=${fasta} \ -OUTPUT=${fasta.baseName}.dict + ${params.index_fasta} \ + REFERENCE=${fasta} \ + OUTPUT=${fasta.baseName}.dict """ } +params.index_bam = "" process index_bam { container = "${container_url}" label "big_mem_mono_cpus" @@ -56,6 +59,7 @@ process index_bam { script: """ PicardCommandLine BuildBamIndex \ -INPUT=${bam} + ${params.index_bam} \ + INPUT=${bam} """ } diff --git a/src/nf_modules/sambamba/main.nf b/src/nf_modules/sambamba/main.nf index e07210bb..ea6c6e97 100644 --- a/src/nf_modules/sambamba/main.nf +++ b/src/nf_modules/sambamba/main.nf @@ -1,6 +1,7 @@ version = "0.6.7" container_url = "lbmc/sambamba:${version}" +params.index_bam = "" process index_bam { container = "${container_url}" label "big_mem_multi_cpus" @@ -14,10 +15,11 @@ process index_bam { script: """ -sambamba index -t ${task.cpus} ${bam} +sambamba index ${params.index_bam} -t ${task.cpus} ${bam} """ } +params.sort_bam = "" process sort_bam { container = "${container_url}" label "big_mem_multi_cpus" @@ -31,11 +33,11 @@ process sort_bam { script: """ -sambamba sort -t ${task.cpus} -o ${bam.baseName}_sorted.bam ${bam} +sambamba sort -t ${task.cpus} ${params.sort_bam} -o ${bam.baseName}_sorted.bam ${bam} """ } - +params.split_bam = "" process split_bam { container = "${container_url}" label "big_mem_multi_cpus" @@ -49,9 +51,9 @@ process split_bam { tuple val(file_id), path("*_reverse.bam*"), emit: bam_reverse script: """ -sambamba view -t ${task.cpus} -h -F "strand == '+'" ${bam} > \ +sambamba view -t ${task.cpus} ${params.split_bam} -h -F "strand == '+'" ${bam} > \ ${bam.baseName}_forward.bam -sambamba view -t ${task.cpus} -h -F "strand == '-'" ${bam} > \ +sambamba view -t ${task.cpus} ${params.split_bam} -h -F "strand == '-'" ${bam} > \ ${bam.baseName}_reverse.bam """ } diff --git a/src/nf_modules/ucsc/main.nf b/src/nf_modules/ucsc/main.nf index 1e288e83..1e9debe3 100644 --- a/src/nf_modules/ucsc/main.nf +++ b/src/nf_modules/ucsc/main.nf @@ -1,6 +1,7 @@ version = "407" container_url = "lbmc/ucsc:${version}" +params.bedgraph_to_bigwig = "" process bedgraph_to_bigwig { container = "${container_url}" label "big_mem_mono_cpus" @@ -20,7 +21,7 @@ LC_COLLATE=C awk -v OFS="\\t" '{print \$1, \$3}' ${bed} > chromsize.txt sort -T ./ -k1,1 -k2,2n ${bg} > \ - bedGraphToBigWig - \ + bedGraphToBigWig ${params.bedgraph_to_bigwig} - \ chromsize.txt \ ${bg.simpleName}_norm.bw """ -- GitLab