From 3bc22f3a50455a9f95105d34a5eb2c410db35a8b Mon Sep 17 00:00:00 2001 From: Laurent Modolo <laurent@modolo.fr> Date: Fri, 24 Aug 2018 16:24:03 +0200 Subject: [PATCH] training_dataset: add bam indexing step --- src/training_dataset.config | 24 +++++++++++++ src/training_dataset.nf | 72 +++++++++++++++++++++++++++++++++++-- 2 files changed, 93 insertions(+), 3 deletions(-) diff --git a/src/training_dataset.config b/src/training_dataset.config index 5dc8127..00a471c 100644 --- a/src/training_dataset.config +++ b/src/training_dataset.config @@ -18,12 +18,24 @@ profiles { $bam_2_fastq_paired { container = "samtools:1.7" } + $sort_bam_paired { + container = "samtools:1.7" + } + $index_bam_paired { + container = "samtools:1.7" + } $mapping_fastq_single { container = "bowtie2:2.3.4.1" } $bam_2_fastq_single { container = "samtools:1.7" } + $sort_bam_single { + container = "samtools:1.7" + } + $index_bam_single { + container = "samtools:1.7" + } } } sge { @@ -59,12 +71,24 @@ profiles { $bam_2_fastq_paired { beforeScript = "module purge; module load SAMtools/1.7" } + $sort_bam_paired { + beforeScript = "module purge; module load SAMtools/1.7" + } + $index_bam_paired { + beforeScript = "module purge; module load SAMtools/1.7" + } $mapping_fastq_single { beforeScript = "module purge; module load SAMtools/1.7; module load Bowtie2/2.3.4.1" } $bam_2_fastq_single { beforeScript = "module purge; module load SAMtools/1.7" } + $sort_bam_single { + beforeScript = "module purge; module load SAMtools/1.7" + } + $index_bam_single { + beforeScript = "module purge; module load SAMtools/1.7" + } } } } diff --git a/src/training_dataset.nf b/src/training_dataset.nf index 58c9bbb..2050046 100644 --- a/src/training_dataset.nf +++ b/src/training_dataset.nf @@ -160,12 +160,45 @@ if ( params.fastq_paired != "" ) { file bed from bed_files output: - set file_id, "*.bam" into filtered_bam_files + set file_id, "*.bam" into filtered_bam_files_paired script: """ samtools view -@ ${task.cpus} -hb ${bam} -f 0x2 > ${file_id}_S.bam """ } + + process sort_bam_paired { + tag "$file_id" + publishDir "results/training/bams/", mode: 'copy' + cpus 4 + + input: + set file_id, file(bam) from filtered_bam_files_paired + + output: + set file_id, "*_sorted.bam" into sorted_bam_files_paired + + script: + """ + samtools sort -@ ${task.cpus} -O BAM -o ${file_id}_sorted.bam ${bam} + """ + } + + process index_bam_paired { + tag "$file_id" + publishDir "results/training/bams/", mode: 'copy' + + input: + set file_id, file(bam) from sorted_bam_files_paired + + output: + set file_id, "*.bam*" into indexed_bam_file_paired + + script: + """ + samtools index ${bam} + """ + } } @@ -226,7 +259,6 @@ if ( params.fastq_single != "" ) { process filter_bam_single { tag "$file_id" - publishDir "results/training/bams/", mode: 'copy' cpus 4 input: @@ -234,10 +266,44 @@ if ( params.fastq_single != "" ) { file bed from bed_files output: - set file_id, "*_S.bam" into filtered_bam_files + set file_id, "*_S.bam" into filtered_bam_files_single script: """ samtools view -@ ${task.cpus} -hb ${bam} -F 0x4 > ${file_id}_S.bam """ } + + process sort_bam_single { + tag "$file_id" + publishDir "results/training/bams/", mode: 'copy' + cpus 4 + + input: + set file_id, file(bam) from filtered_bam_files_single + + output: + set file_id, "*_sorted.bam" into sorted_bam_files_single + + script: + """ + samtools sort -@ ${task.cpus} -O BAM -o ${file_id}_sorted.bam ${bam} + """ + } + + process index_bam_single { + tag "$file_id" + publishDir "results/training/bams/", mode: 'copy' + + input: + set file_id, file(bam) from sorted_bam_files_single + + output: + set file_id, "*.bam*" into indexed_bam_file_single + + script: + """ + samtools index ${bam} + """ + } } + -- GitLab