diff --git a/src/training_dataset.config b/src/training_dataset.config index 5dc812709740b1db8041946eb25162ae02378734..00a471c6c4a356b9fd843e7185b6c440ea37bfcb 100644 --- a/src/training_dataset.config +++ b/src/training_dataset.config @@ -18,12 +18,24 @@ profiles { $bam_2_fastq_paired { container = "samtools:1.7" } + $sort_bam_paired { + container = "samtools:1.7" + } + $index_bam_paired { + container = "samtools:1.7" + } $mapping_fastq_single { container = "bowtie2:2.3.4.1" } $bam_2_fastq_single { container = "samtools:1.7" } + $sort_bam_single { + container = "samtools:1.7" + } + $index_bam_single { + container = "samtools:1.7" + } } } sge { @@ -59,12 +71,24 @@ profiles { $bam_2_fastq_paired { beforeScript = "module purge; module load SAMtools/1.7" } + $sort_bam_paired { + beforeScript = "module purge; module load SAMtools/1.7" + } + $index_bam_paired { + beforeScript = "module purge; module load SAMtools/1.7" + } $mapping_fastq_single { beforeScript = "module purge; module load SAMtools/1.7; module load Bowtie2/2.3.4.1" } $bam_2_fastq_single { beforeScript = "module purge; module load SAMtools/1.7" } + $sort_bam_single { + beforeScript = "module purge; module load SAMtools/1.7" + } + $index_bam_single { + beforeScript = "module purge; module load SAMtools/1.7" + } } } } diff --git a/src/training_dataset.nf b/src/training_dataset.nf index 58c9bbb504ad0dfc5c25cd2ba055b1a063e76ddb..205004607f0f48b8c434a823e7fc362bac899848 100644 --- a/src/training_dataset.nf +++ b/src/training_dataset.nf @@ -160,12 +160,45 @@ if ( params.fastq_paired != "" ) { file bed from bed_files output: - set file_id, "*.bam" into filtered_bam_files + set file_id, "*.bam" into filtered_bam_files_paired script: """ samtools view -@ ${task.cpus} -hb ${bam} -f 0x2 > ${file_id}_S.bam """ } + + process sort_bam_paired { + tag "$file_id" + publishDir "results/training/bams/", mode: 'copy' + cpus 4 + + input: + set file_id, file(bam) from filtered_bam_files_paired + + output: + set file_id, "*_sorted.bam" into sorted_bam_files_paired + + script: + """ + samtools sort -@ ${task.cpus} -O BAM -o ${file_id}_sorted.bam ${bam} + """ + } + + process index_bam_paired { + tag "$file_id" + publishDir "results/training/bams/", mode: 'copy' + + input: + set file_id, file(bam) from sorted_bam_files_paired + + output: + set file_id, "*.bam*" into indexed_bam_file_paired + + script: + """ + samtools index ${bam} + """ + } } @@ -226,7 +259,6 @@ if ( params.fastq_single != "" ) { process filter_bam_single { tag "$file_id" - publishDir "results/training/bams/", mode: 'copy' cpus 4 input: @@ -234,10 +266,44 @@ if ( params.fastq_single != "" ) { file bed from bed_files output: - set file_id, "*_S.bam" into filtered_bam_files + set file_id, "*_S.bam" into filtered_bam_files_single script: """ samtools view -@ ${task.cpus} -hb ${bam} -F 0x4 > ${file_id}_S.bam """ } + + process sort_bam_single { + tag "$file_id" + publishDir "results/training/bams/", mode: 'copy' + cpus 4 + + input: + set file_id, file(bam) from filtered_bam_files_single + + output: + set file_id, "*_sorted.bam" into sorted_bam_files_single + + script: + """ + samtools sort -@ ${task.cpus} -O BAM -o ${file_id}_sorted.bam ${bam} + """ + } + + process index_bam_single { + tag "$file_id" + publishDir "results/training/bams/", mode: 'copy' + + input: + set file_id, file(bam) from sorted_bam_files_single + + output: + set file_id, "*.bam*" into indexed_bam_file_single + + script: + """ + samtools index ${bam} + """ + } } +