Skip to content
Snippets Groups Projects
Commit 44a7178e authored by nservant's avatar nservant
Browse files

[MODIF] fix input extension errors

parent e7852f98
No related branches found
No related tags found
No related merge requests found
...@@ -12,6 +12,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ...@@ -12,6 +12,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### `Fixed` ### `Fixed`
* Fix recurrent bug in input file extension (#86)
* Fix bug in `--bin_size` parameter (#85) * Fix bug in `--bin_size` parameter (#85)
* `min_mapq` is ignored if `--keep_multi` is used * `min_mapq` is ignored if `--keep_multi` is used
......
...@@ -157,26 +157,36 @@ if (params.input_paths){ ...@@ -157,26 +157,36 @@ if (params.input_paths){
.from( params.input_paths ) .from( params.input_paths )
.map { row -> [ row[0], [file(row[1][0]), file(row[1][1])]] } .map { row -> [ row[0], [file(row[1][0]), file(row[1][1])]] }
.separate( raw_reads, raw_reads_2 ) { a -> [tuple(a[0], a[1][0]), tuple(a[0], a[1][1])] } .separate( raw_reads, raw_reads_2 ) { a -> [tuple(a[0], a[1][0]), tuple(a[0], a[1][1])] }
}else{
}else{
raw_reads = Channel.create() raw_reads = Channel.create()
raw_reads_2 = Channel.create() raw_reads_2 = Channel.create()
Channel if ( params.split_fastq ){
.fromFilePairs( params.input ) Channel
.separate( raw_reads, raw_reads_2 ) { a -> [tuple(a[0], a[1][0]), tuple(a[0], a[1][1])] } .fromFilePairs( params.input, flat:true )
.splitFastq( by: params.fastq_chunks_size, pe:true, file: true, compress:true)
.separate( raw_reads, raw_reads_2 ) { a -> [tuple(a[0] + "_R1", a[1]), tuple(a[0] + "_R2", a[2])] }
}else{
Channel
.fromFilePairs( params.input )
.separate( raw_reads, raw_reads_2 ) { a -> [tuple(a[0] + "_R1", a[1][0]), tuple(a[0] + "_R2", a[1][1])] }
}
} }
// SPlit fastq files // Update sample name if splitFastq is used
// https://www.nextflow.io/docs/latest/operator.html#splitfastq def updateSampleName(x) {
if ((matcher = x[1] =~ /\s*(\.[\d]+).fastq.gz/)) {
if ( params.split_fastq ){ res = matcher[0][1]
raw_reads_full = raw_reads.concat( raw_reads_2 ) }
raw_reads = raw_reads_full.splitFastq( by: params.fastq_chunks_size, file: true) return [x[0] + res, x[1]]
}else{
raw_reads = raw_reads.concat( raw_reads_2 ).dump(tag: "data")
} }
if (params.split_fastq ){
raw_reads = raw_reads.concat( raw_reads_2 ).map{it -> updateSampleName(it)}.dump(tag:'input')
}else{
raw_reads = raw_reads.concat( raw_reads_2 ).dump(tag:'input')
}
/* /*
* Other input channels * Other input channels
...@@ -230,7 +240,7 @@ else if ( params.fasta && params.restriction_site ){ ...@@ -230,7 +240,7 @@ else if ( params.fasta && params.restriction_site ){
.ifEmpty { exit 1, "Restriction fragments: Fasta file not found: ${params.fasta}" } .ifEmpty { exit 1, "Restriction fragments: Fasta file not found: ${params.fasta}" }
.set { fasta_for_resfrag } .set { fasta_for_resfrag }
} }
else { else if (! params.dnase) {
exit 1, "No restriction fragments file specified!" exit 1, "No restriction fragments file specified!"
} }
...@@ -255,17 +265,17 @@ if (params.restriction_site){ ...@@ -255,17 +265,17 @@ if (params.restriction_site){
summary['Digestion'] = params.digestion summary['Digestion'] = params.digestion
summary['Restriction Motif']= params.restriction_site summary['Restriction Motif']= params.restriction_site
summary['Ligation Motif'] = params.ligation_site summary['Ligation Motif'] = params.ligation_site
summary['Min Fragment Size']= ("$params.min_restriction_fragment_size".isInteger() ? params.min_restriction_fragment_size : 'None') summary['Min Fragment Size']= params.min_restriction_fragment_size
summary['Max Fragment Size']= ("$params.max_restriction_fragment_size".isInteger() ? params.max_restriction_fragment_size : 'None') summary['Max Fragment Size']= params.max_restriction_fragment_size
summary['Min Insert Size'] = ("$params.min_insert_size".isInteger() ? params.min_insert_size : 'None') summary['Min Insert Size'] = params.min_insert_size
summary['Max Insert Size'] = ("$params.max_insert_size".isInteger() ? params.max_insert_size : 'None') summary['Max Insert Size'] = params.max_insert_size
}else{ }else{
summary['DNase Mode'] = params.dnase summary['DNase Mode'] = params.dnase
summary['Min CIS dist'] = ("$params.min_cis_dist".isInteger() ? params.min_cis_dist : 'None') summary['Min CIS dist'] = params.min_cis_dist
} }
summary['Min MAPQ'] = params.min_mapq summary['Min MAPQ'] = params.min_mapq
summary['Keep Duplicates'] = params.keep_dups summary['Keep Duplicates'] = params.keep_dups ? 'Yes' : 'No'
summary['Keep Multihits'] = params.keep_multi summary['Keep Multihits'] = params.keep_multi ? 'Yes' : 'No'
summary['Maps resolution'] = params.bin_size summary['Maps resolution'] = params.bin_size
summary['Max Resources'] = "$params.max_memory memory, $params.max_cpus cpus, $params.max_time time per job" summary['Max Resources'] = "$params.max_memory memory, $params.max_cpus cpus, $params.max_time time per job"
if (workflow.containerEngine) summary['Container'] = "$workflow.containerEngine - $workflow.container" if (workflow.containerEngine) summary['Container'] = "$workflow.containerEngine - $workflow.container"
...@@ -435,7 +445,7 @@ if(!params.restriction_fragments && params.fasta && !params.dnase){ ...@@ -435,7 +445,7 @@ if(!params.restriction_fragments && params.fasta && !params.dnase){
*/ */
process bowtie2_end_to_end { process bowtie2_end_to_end {
tag "$prefix" tag "$sample"
label 'process_medium' label 'process_medium'
publishDir path: { params.save_aligned_intermediates ? "${params.outdir}/mapping" : params.outdir }, publishDir path: { params.save_aligned_intermediates ? "${params.outdir}/mapping" : params.outdir },
saveAs: { params.save_aligned_intermediates ? it : null }, mode: params.publish_dir_mode saveAs: { params.save_aligned_intermediates ? it : null }, mode: params.publish_dir_mode
...@@ -445,13 +455,12 @@ process bowtie2_end_to_end { ...@@ -445,13 +455,12 @@ process bowtie2_end_to_end {
file index from bwt2_index_end2end.collect() file index from bwt2_index_end2end.collect()
output: output:
set val(prefix), file("${prefix}_unmap.fastq") into unmapped_end_to_end set val(sample), file("${prefix}_unmap.fastq") into unmapped_end_to_end
set val(prefix), file("${prefix}.bam") into end_to_end_bam set val(sample), file("${prefix}.bam") into end_to_end_bam
script: script:
prefix = reads.toString() - ~/(\.fq)?(\.fastq)?(\.gz)?$/ prefix = reads.toString() - ~/(\.fq)?(\.fastq)?(\.gz)?$/
def bwt2_opts = params.bwt2_opts_end2end def bwt2_opts = params.bwt2_opts_end2end
if (!params.dnase){ if (!params.dnase){
""" """
bowtie2 --rg-id BMG --rg SM:${prefix} \\ bowtie2 --rg-id BMG --rg SM:${prefix} \\
...@@ -474,7 +483,7 @@ process bowtie2_end_to_end { ...@@ -474,7 +483,7 @@ process bowtie2_end_to_end {
} }
process trim_reads { process trim_reads {
tag "$prefix" tag "$sample"
label 'process_low' label 'process_low'
publishDir path: { params.save_aligned_intermediates ? "${params.outdir}/mapping" : params.outdir }, publishDir path: { params.save_aligned_intermediates ? "${params.outdir}/mapping" : params.outdir },
saveAs: { params.save_aligned_intermediates ? it : null }, mode: params.publish_dir_mode saveAs: { params.save_aligned_intermediates ? it : null }, mode: params.publish_dir_mode
...@@ -483,12 +492,13 @@ process trim_reads { ...@@ -483,12 +492,13 @@ process trim_reads {
!params.dnase !params.dnase
input: input:
set val(prefix), file(reads) from unmapped_end_to_end set val(sample), file(reads) from unmapped_end_to_end
output: output:
set val(prefix), file("${prefix}_trimmed.fastq") into trimmed_reads set val(sample), file("${prefix}_trimmed.fastq") into trimmed_reads
script: script:
prefix = reads.toString() - ~/(\.fq)?(\.fastq)?(\.gz)?$/
""" """
cutsite_trimming --fastq $reads \\ cutsite_trimming --fastq $reads \\
--cutsite ${params.ligation_site} \\ --cutsite ${params.ligation_site} \\
...@@ -497,7 +507,7 @@ process trim_reads { ...@@ -497,7 +507,7 @@ process trim_reads {
} }
process bowtie2_on_trimmed_reads { process bowtie2_on_trimmed_reads {
tag "$prefix" tag "$sample"
label 'process_medium' label 'process_medium'
publishDir path: { params.save_aligned_intermediates ? "${params.outdir}/mapping" : params.outdir }, publishDir path: { params.save_aligned_intermediates ? "${params.outdir}/mapping" : params.outdir },
saveAs: { params.save_aligned_intermediates ? it : null }, mode: params.publish_dir_mode saveAs: { params.save_aligned_intermediates ? it : null }, mode: params.publish_dir_mode
...@@ -506,11 +516,11 @@ process bowtie2_on_trimmed_reads { ...@@ -506,11 +516,11 @@ process bowtie2_on_trimmed_reads {
!params.dnase !params.dnase
input: input:
set val(prefix), file(reads) from trimmed_reads set val(sample), file(reads) from trimmed_reads
file index from bwt2_index_trim.collect() file index from bwt2_index_trim.collect()
output: output:
set val(prefix), file("${prefix}_trimmed.bam") into trimmed_bam set val(sample), file("${prefix}_trimmed.bam") into trimmed_bam
script: script:
prefix = reads.toString() - ~/(_trimmed)?(\.fq)?(\.fastq)?(\.gz)?$/ prefix = reads.toString() - ~/(_trimmed)?(\.fq)?(\.fastq)?(\.gz)?$/
...@@ -524,22 +534,24 @@ process bowtie2_on_trimmed_reads { ...@@ -524,22 +534,24 @@ process bowtie2_on_trimmed_reads {
} }
if (!params.dnase){ if (!params.dnase){
process merge_mapping_steps{ process bowtie2_merge_mapping_steps{
tag "$sample = $bam1 + $bam2" tag "$prefix = $bam1 + $bam2"
label 'process_medium' label 'process_medium'
publishDir path: { params.save_aligned_intermediates ? "${params.outdir}/mapping" : params.outdir }, publishDir path: { params.save_aligned_intermediates ? "${params.outdir}/mapping" : params.outdir },
saveAs: { params.save_aligned_intermediates ? it : null }, mode: params.publish_dir_mode saveAs: { params.save_aligned_intermediates ? it : null }, mode: params.publish_dir_mode
input: input:
set val(prefix), file(bam1), file(bam2) from end_to_end_bam.join( trimmed_bam ) set val(prefix), file(bam1), file(bam2) from end_to_end_bam.join( trimmed_bam ).dump(tag:'merge')
output: output:
set val(sample), file("${prefix}_bwt2merged.bam") into bwt2_merged_bam set val(sample), file("${prefix}_bwt2merged.bam") into bwt2_merged_bam
set val(oname), file("${prefix}.mapstat") into all_mapstat set val(oname), file("${prefix}.mapstat") into all_mapstat
script: script:
sample = prefix.toString() - ~/(_R1|_R2|_val_1|_val_2|_1|_2)/ //sample = prefix.toString() - ~/(_R1|_R2|_val_1|_val_2|_1|_2)/
tag = prefix.toString() =~/_R1|_val_1|_1/ ? "R1" : "R2" sample = prefix.toString() - ~/(_R1|_R2)/
//tag = prefix.toString() =~/_R1|_val_1|_1/ ? "R1" : "R2"
tag = prefix.toString() =~/_R1/ ? "R1" : "R2"
oname = prefix.toString() - ~/(\.[0-9]+)$/ oname = prefix.toString() - ~/(\.[0-9]+)$/
""" """
samtools merge -@ ${task.cpus} \\ samtools merge -@ ${task.cpus} \\
...@@ -566,46 +578,48 @@ if (!params.dnase){ ...@@ -566,46 +578,48 @@ if (!params.dnase){
} }
}else{ }else{
process dnase_mapping_stats{ process dnase_mapping_stats{
tag "$sample = $bam1" tag "$sample = $bam"
label 'process_medium' label 'process_medium'
publishDir path: { params.save_aligned_intermediates ? "${params.outdir}/mapping" : params.outdir }, publishDir path: { params.save_aligned_intermediates ? "${params.outdir}/mapping" : params.outdir },
saveAs: { params.save_aligned_intermediates ? it : null }, mode: params.publish_dir_mode saveAs: { params.save_aligned_intermediates ? it : null }, mode: params.publish_dir_mode
input: input:
set val(prefix), file(bam1) from end_to_end_bam set val(prefix), file(bam) from end_to_end_bam
output: output:
set val(sample), file(bam1) into bwt2_merged_bam set val(sample), file(bam) into bwt2_merged_bam
set val(oname), file("${prefix}.mapstat") into all_mapstat set val(oname), file("${prefix}.mapstat") into all_mapstat
script: script:
sample = prefix.toString() - ~/(_R1|_R2|_val_1|_val_2|_1|_2)/ //sample = prefix.toString() - ~/(_R1|_R2|_val_1|_val_2|_1|_2)/
tag = prefix.toString() =~/_R1|_val_1|_1/ ? "R1" : "R2" sample = prefix.toString() - ~/(_R1|_R2)/
//tag = prefix.toString() =~/_R1|_val_1|_1/ ? "R1" : "R2"
tag = prefix.toString() =~/_R1/ ? "R1" : "R2"
oname = prefix.toString() - ~/(\.[0-9]+)$/ oname = prefix.toString() - ~/(\.[0-9]+)$/
""" """
echo "## ${prefix}" > ${prefix}.mapstat echo "## ${prefix}" > ${prefix}.mapstat
echo -n "total_${tag}\t" >> ${prefix}.mapstat echo -n "total_${tag}\t" >> ${prefix}.mapstat
samtools view -c ${bam1} >> ${prefix}.mapstat samtools view -c ${bam} >> ${prefix}.mapstat
echo -n "mapped_${tag}\t" >> ${prefix}.mapstat echo -n "mapped_${tag}\t" >> ${prefix}.mapstat
samtools view -c -F 4 ${bam1} >> ${prefix}.mapstat samtools view -c -F 4 ${bam} >> ${prefix}.mapstat
echo -n "global_${tag}\t" >> ${prefix}.mapstat echo -n "global_${tag}\t" >> ${prefix}.mapstat
samtools view -c -F 4 ${bam1} >> ${prefix}.mapstat samtools view -c -F 4 ${bam} >> ${prefix}.mapstat
echo -n "local_${tag}\t0" >> ${prefix}.mapstat echo -n "local_${tag}\t0" >> ${prefix}.mapstat
""" """
} }
} }
process combine_mapped_files{ process combine_mates{
tag "$sample = $r1_prefix + $r2_prefix" tag "$sample = $r1_prefix + $r2_prefix"
label 'process_low' label 'process_low'
publishDir "${params.outdir}/mapping", mode: params.publish_dir_mode, publishDir "${params.outdir}/mapping", mode: params.publish_dir_mode,
saveAs: {filename -> filename.indexOf(".pairstat") > 0 ? "stats/$filename" : "$filename"} saveAs: {filename -> filename.indexOf(".pairstat") > 0 ? "stats/$filename" : "$filename"}
input: input:
set val(sample), file(aligned_bam) from bwt2_merged_bam.groupTuple().dump(tag:'bams') set val(sample), file(aligned_bam) from bwt2_merged_bam.groupTuple().dump(tag:'mates')
output: output:
set val(sample), file("${sample}_bwt2pairs.bam") into paired_bam set val(oname), file("${sample}_bwt2pairs.bam") into paired_bam
set val(oname), file("*.pairstat") into all_pairstat set val(oname), file("*.pairstat") into all_pairstat
script: script:
...@@ -629,7 +643,7 @@ process combine_mapped_files{ ...@@ -629,7 +643,7 @@ process combine_mapped_files{
/* /*
* STEP2 - DETECT VALID PAIRS * STEP2 - DETECT VALID PAIRS
*/ */
if (!params.dnase){ if (!params.dnase){
process get_valid_interaction{ process get_valid_interaction{
...@@ -711,7 +725,7 @@ process remove_duplicates { ...@@ -711,7 +725,7 @@ process remove_duplicates {
saveAs: {filename -> filename.indexOf("*stat") > 0 ? "stats/$sample/$filename" : "$filename"} saveAs: {filename -> filename.indexOf("*stat") > 0 ? "stats/$sample/$filename" : "$filename"}
input: input:
set val(sample), file(vpairs) from valid_pairs.groupTuple() set val(sample), file(vpairs) from valid_pairs.groupTuple().dump(tag:'final')
output: output:
set val(sample), file("*.allValidPairs") into all_valid_pairs set val(sample), file("*.allValidPairs") into all_valid_pairs
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment