From 44a7178ecb91494d47406bad16cc49920b4f5e8f Mon Sep 17 00:00:00 2001 From: nservant <nicolas.servant@curie.fr> Date: Mon, 30 Nov 2020 18:20:47 +0100 Subject: [PATCH] [MODIF] fix input extension errors --- CHANGELOG.md | 1 + main.nf | 110 +++++++++++++++++++++++++++++---------------------- 2 files changed, 63 insertions(+), 48 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3063cbd..a0ef487 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### `Fixed` +* Fix recurrent bug in input file extension (#86) * Fix bug in `--bin_size` parameter (#85) * `min_mapq` is ignored if `--keep_multi` is used diff --git a/main.nf b/main.nf index fe719af..1014e37 100644 --- a/main.nf +++ b/main.nf @@ -157,26 +157,36 @@ if (params.input_paths){ .from( params.input_paths ) .map { row -> [ row[0], [file(row[1][0]), file(row[1][1])]] } .separate( raw_reads, raw_reads_2 ) { a -> [tuple(a[0], a[1][0]), tuple(a[0], a[1][1])] } - }else{ +}else{ raw_reads = Channel.create() raw_reads_2 = Channel.create() - Channel - .fromFilePairs( params.input ) - .separate( raw_reads, raw_reads_2 ) { a -> [tuple(a[0], a[1][0]), tuple(a[0], a[1][1])] } + if ( params.split_fastq ){ + Channel + .fromFilePairs( params.input, flat:true ) + .splitFastq( by: params.fastq_chunks_size, pe:true, file: true, compress:true) + .separate( raw_reads, raw_reads_2 ) { a -> [tuple(a[0] + "_R1", a[1]), tuple(a[0] + "_R2", a[2])] } + }else{ + Channel + .fromFilePairs( params.input ) + .separate( raw_reads, raw_reads_2 ) { a -> [tuple(a[0] + "_R1", a[1][0]), tuple(a[0] + "_R2", a[1][1])] } + } } -// SPlit fastq files -// https://www.nextflow.io/docs/latest/operator.html#splitfastq - -if ( params.split_fastq ){ - raw_reads_full = raw_reads.concat( raw_reads_2 ) - raw_reads = raw_reads_full.splitFastq( by: params.fastq_chunks_size, file: true) - }else{ - raw_reads = raw_reads.concat( raw_reads_2 ).dump(tag: "data") +// Update sample name if splitFastq is used +def updateSampleName(x) { + if ((matcher = x[1] =~ /\s*(\.[\d]+).fastq.gz/)) { + res = matcher[0][1] + } + return [x[0] + res, x[1]] } +if (params.split_fastq ){ + raw_reads = raw_reads.concat( raw_reads_2 ).map{it -> updateSampleName(it)}.dump(tag:'input') +}else{ + raw_reads = raw_reads.concat( raw_reads_2 ).dump(tag:'input') +} /* * Other input channels @@ -230,7 +240,7 @@ else if ( params.fasta && params.restriction_site ){ .ifEmpty { exit 1, "Restriction fragments: Fasta file not found: ${params.fasta}" } .set { fasta_for_resfrag } } -else { +else if (! params.dnase) { exit 1, "No restriction fragments file specified!" } @@ -255,17 +265,17 @@ if (params.restriction_site){ summary['Digestion'] = params.digestion summary['Restriction Motif']= params.restriction_site summary['Ligation Motif'] = params.ligation_site - summary['Min Fragment Size']= ("$params.min_restriction_fragment_size".isInteger() ? params.min_restriction_fragment_size : 'None') - summary['Max Fragment Size']= ("$params.max_restriction_fragment_size".isInteger() ? params.max_restriction_fragment_size : 'None') - summary['Min Insert Size'] = ("$params.min_insert_size".isInteger() ? params.min_insert_size : 'None') - summary['Max Insert Size'] = ("$params.max_insert_size".isInteger() ? params.max_insert_size : 'None') + summary['Min Fragment Size']= params.min_restriction_fragment_size + summary['Max Fragment Size']= params.max_restriction_fragment_size + summary['Min Insert Size'] = params.min_insert_size + summary['Max Insert Size'] = params.max_insert_size }else{ summary['DNase Mode'] = params.dnase - summary['Min CIS dist'] = ("$params.min_cis_dist".isInteger() ? params.min_cis_dist : 'None') + summary['Min CIS dist'] = params.min_cis_dist } summary['Min MAPQ'] = params.min_mapq -summary['Keep Duplicates'] = params.keep_dups -summary['Keep Multihits'] = params.keep_multi +summary['Keep Duplicates'] = params.keep_dups ? 'Yes' : 'No' +summary['Keep Multihits'] = params.keep_multi ? 'Yes' : 'No' summary['Maps resolution'] = params.bin_size summary['Max Resources'] = "$params.max_memory memory, $params.max_cpus cpus, $params.max_time time per job" if (workflow.containerEngine) summary['Container'] = "$workflow.containerEngine - $workflow.container" @@ -435,7 +445,7 @@ if(!params.restriction_fragments && params.fasta && !params.dnase){ */ process bowtie2_end_to_end { - tag "$prefix" + tag "$sample" label 'process_medium' publishDir path: { params.save_aligned_intermediates ? "${params.outdir}/mapping" : params.outdir }, saveAs: { params.save_aligned_intermediates ? it : null }, mode: params.publish_dir_mode @@ -445,13 +455,12 @@ process bowtie2_end_to_end { file index from bwt2_index_end2end.collect() output: - set val(prefix), file("${prefix}_unmap.fastq") into unmapped_end_to_end - set val(prefix), file("${prefix}.bam") into end_to_end_bam + set val(sample), file("${prefix}_unmap.fastq") into unmapped_end_to_end + set val(sample), file("${prefix}.bam") into end_to_end_bam script: prefix = reads.toString() - ~/(\.fq)?(\.fastq)?(\.gz)?$/ def bwt2_opts = params.bwt2_opts_end2end - if (!params.dnase){ """ bowtie2 --rg-id BMG --rg SM:${prefix} \\ @@ -474,7 +483,7 @@ process bowtie2_end_to_end { } process trim_reads { - tag "$prefix" + tag "$sample" label 'process_low' publishDir path: { params.save_aligned_intermediates ? "${params.outdir}/mapping" : params.outdir }, saveAs: { params.save_aligned_intermediates ? it : null }, mode: params.publish_dir_mode @@ -483,12 +492,13 @@ process trim_reads { !params.dnase input: - set val(prefix), file(reads) from unmapped_end_to_end + set val(sample), file(reads) from unmapped_end_to_end output: - set val(prefix), file("${prefix}_trimmed.fastq") into trimmed_reads + set val(sample), file("${prefix}_trimmed.fastq") into trimmed_reads script: + prefix = reads.toString() - ~/(\.fq)?(\.fastq)?(\.gz)?$/ """ cutsite_trimming --fastq $reads \\ --cutsite ${params.ligation_site} \\ @@ -497,7 +507,7 @@ process trim_reads { } process bowtie2_on_trimmed_reads { - tag "$prefix" + tag "$sample" label 'process_medium' publishDir path: { params.save_aligned_intermediates ? "${params.outdir}/mapping" : params.outdir }, saveAs: { params.save_aligned_intermediates ? it : null }, mode: params.publish_dir_mode @@ -506,11 +516,11 @@ process bowtie2_on_trimmed_reads { !params.dnase input: - set val(prefix), file(reads) from trimmed_reads + set val(sample), file(reads) from trimmed_reads file index from bwt2_index_trim.collect() output: - set val(prefix), file("${prefix}_trimmed.bam") into trimmed_bam + set val(sample), file("${prefix}_trimmed.bam") into trimmed_bam script: prefix = reads.toString() - ~/(_trimmed)?(\.fq)?(\.fastq)?(\.gz)?$/ @@ -524,22 +534,24 @@ process bowtie2_on_trimmed_reads { } if (!params.dnase){ - process merge_mapping_steps{ - tag "$sample = $bam1 + $bam2" + process bowtie2_merge_mapping_steps{ + tag "$prefix = $bam1 + $bam2" label 'process_medium' publishDir path: { params.save_aligned_intermediates ? "${params.outdir}/mapping" : params.outdir }, saveAs: { params.save_aligned_intermediates ? it : null }, mode: params.publish_dir_mode input: - set val(prefix), file(bam1), file(bam2) from end_to_end_bam.join( trimmed_bam ) + set val(prefix), file(bam1), file(bam2) from end_to_end_bam.join( trimmed_bam ).dump(tag:'merge') output: set val(sample), file("${prefix}_bwt2merged.bam") into bwt2_merged_bam set val(oname), file("${prefix}.mapstat") into all_mapstat script: - sample = prefix.toString() - ~/(_R1|_R2|_val_1|_val_2|_1|_2)/ - tag = prefix.toString() =~/_R1|_val_1|_1/ ? "R1" : "R2" + //sample = prefix.toString() - ~/(_R1|_R2|_val_1|_val_2|_1|_2)/ + sample = prefix.toString() - ~/(_R1|_R2)/ + //tag = prefix.toString() =~/_R1|_val_1|_1/ ? "R1" : "R2" + tag = prefix.toString() =~/_R1/ ? "R1" : "R2" oname = prefix.toString() - ~/(\.[0-9]+)$/ """ samtools merge -@ ${task.cpus} \\ @@ -566,46 +578,48 @@ if (!params.dnase){ } }else{ process dnase_mapping_stats{ - tag "$sample = $bam1" + tag "$sample = $bam" label 'process_medium' publishDir path: { params.save_aligned_intermediates ? "${params.outdir}/mapping" : params.outdir }, saveAs: { params.save_aligned_intermediates ? it : null }, mode: params.publish_dir_mode input: - set val(prefix), file(bam1) from end_to_end_bam + set val(prefix), file(bam) from end_to_end_bam output: - set val(sample), file(bam1) into bwt2_merged_bam + set val(sample), file(bam) into bwt2_merged_bam set val(oname), file("${prefix}.mapstat") into all_mapstat script: - sample = prefix.toString() - ~/(_R1|_R2|_val_1|_val_2|_1|_2)/ - tag = prefix.toString() =~/_R1|_val_1|_1/ ? "R1" : "R2" + //sample = prefix.toString() - ~/(_R1|_R2|_val_1|_val_2|_1|_2)/ + sample = prefix.toString() - ~/(_R1|_R2)/ + //tag = prefix.toString() =~/_R1|_val_1|_1/ ? "R1" : "R2" + tag = prefix.toString() =~/_R1/ ? "R1" : "R2" oname = prefix.toString() - ~/(\.[0-9]+)$/ """ echo "## ${prefix}" > ${prefix}.mapstat echo -n "total_${tag}\t" >> ${prefix}.mapstat - samtools view -c ${bam1} >> ${prefix}.mapstat + samtools view -c ${bam} >> ${prefix}.mapstat echo -n "mapped_${tag}\t" >> ${prefix}.mapstat - samtools view -c -F 4 ${bam1} >> ${prefix}.mapstat + samtools view -c -F 4 ${bam} >> ${prefix}.mapstat echo -n "global_${tag}\t" >> ${prefix}.mapstat - samtools view -c -F 4 ${bam1} >> ${prefix}.mapstat + samtools view -c -F 4 ${bam} >> ${prefix}.mapstat echo -n "local_${tag}\t0" >> ${prefix}.mapstat """ } } -process combine_mapped_files{ +process combine_mates{ tag "$sample = $r1_prefix + $r2_prefix" label 'process_low' publishDir "${params.outdir}/mapping", mode: params.publish_dir_mode, saveAs: {filename -> filename.indexOf(".pairstat") > 0 ? "stats/$filename" : "$filename"} input: - set val(sample), file(aligned_bam) from bwt2_merged_bam.groupTuple().dump(tag:'bams') + set val(sample), file(aligned_bam) from bwt2_merged_bam.groupTuple().dump(tag:'mates') output: - set val(sample), file("${sample}_bwt2pairs.bam") into paired_bam + set val(oname), file("${sample}_bwt2pairs.bam") into paired_bam set val(oname), file("*.pairstat") into all_pairstat script: @@ -629,7 +643,7 @@ process combine_mapped_files{ /* * STEP2 - DETECT VALID PAIRS -*/ + */ if (!params.dnase){ process get_valid_interaction{ @@ -711,7 +725,7 @@ process remove_duplicates { saveAs: {filename -> filename.indexOf("*stat") > 0 ? "stats/$sample/$filename" : "$filename"} input: - set val(sample), file(vpairs) from valid_pairs.groupTuple() + set val(sample), file(vpairs) from valid_pairs.groupTuple().dump(tag:'final') output: set val(sample), file("*.allValidPairs") into all_valid_pairs -- GitLab