From 3bc22f3a50455a9f95105d34a5eb2c410db35a8b Mon Sep 17 00:00:00 2001
From: Laurent Modolo <laurent@modolo.fr>
Date: Fri, 24 Aug 2018 16:24:03 +0200
Subject: [PATCH] training_dataset: add bam indexing step

---
 src/training_dataset.config | 24 +++++++++++++
 src/training_dataset.nf     | 72 +++++++++++++++++++++++++++++++++++--
 2 files changed, 93 insertions(+), 3 deletions(-)

diff --git a/src/training_dataset.config b/src/training_dataset.config
index 5dc8127..00a471c 100644
--- a/src/training_dataset.config
+++ b/src/training_dataset.config
@@ -18,12 +18,24 @@ profiles {
       $bam_2_fastq_paired {
         container = "samtools:1.7"
       }
+      $sort_bam_paired {
+        container = "samtools:1.7"
+      }
+      $index_bam_paired {
+        container = "samtools:1.7"
+      }
       $mapping_fastq_single {
         container = "bowtie2:2.3.4.1"
       }
       $bam_2_fastq_single {
         container = "samtools:1.7"
       }
+      $sort_bam_single {
+        container = "samtools:1.7"
+      }
+      $index_bam_single {
+        container = "samtools:1.7"
+      }
     }
   }
   sge {
@@ -59,12 +71,24 @@ profiles {
       $bam_2_fastq_paired {
         beforeScript = "module purge; module load SAMtools/1.7"
       }
+      $sort_bam_paired {
+        beforeScript = "module purge; module load SAMtools/1.7"
+      }
+      $index_bam_paired {
+        beforeScript = "module purge; module load SAMtools/1.7"
+      }
       $mapping_fastq_single {
         beforeScript = "module purge; module load SAMtools/1.7; module load Bowtie2/2.3.4.1"
       }
       $bam_2_fastq_single {
         beforeScript = "module purge; module load SAMtools/1.7"
       }
+      $sort_bam_single {
+        beforeScript = "module purge; module load SAMtools/1.7"
+      }
+      $index_bam_single {
+        beforeScript = "module purge; module load SAMtools/1.7"
+      }
     }
   }
 }
diff --git a/src/training_dataset.nf b/src/training_dataset.nf
index 58c9bbb..2050046 100644
--- a/src/training_dataset.nf
+++ b/src/training_dataset.nf
@@ -160,12 +160,45 @@ if ( params.fastq_paired != "" ) {
       file bed from bed_files
 
     output:
-      set file_id, "*.bam" into filtered_bam_files
+      set file_id, "*.bam" into filtered_bam_files_paired
     script:
   """
   samtools view -@ ${task.cpus} -hb ${bam} -f 0x2 > ${file_id}_S.bam
   """
   }
+
+  process sort_bam_paired {
+    tag "$file_id"
+    publishDir "results/training/bams/", mode: 'copy'
+    cpus 4
+
+    input:
+      set file_id, file(bam) from filtered_bam_files_paired
+
+    output:
+      set file_id, "*_sorted.bam" into sorted_bam_files_paired
+
+    script:
+  """
+  samtools sort -@ ${task.cpus} -O BAM -o ${file_id}_sorted.bam ${bam}
+  """
+  }
+
+  process index_bam_paired {
+    tag "$file_id"
+    publishDir "results/training/bams/", mode: 'copy'
+
+    input:
+      set file_id, file(bam) from sorted_bam_files_paired
+
+    output:
+      set file_id, "*.bam*" into indexed_bam_file_paired
+
+    script:
+  """
+  samtools index ${bam}
+  """
+  }
 }
 
 
@@ -226,7 +259,6 @@ if ( params.fastq_single != "" ) {
 
   process filter_bam_single {
     tag "$file_id"
-    publishDir "results/training/bams/", mode: 'copy'
     cpus 4
 
     input:
@@ -234,10 +266,44 @@ if ( params.fastq_single != "" ) {
       file bed from bed_files
 
     output:
-      set file_id, "*_S.bam" into filtered_bam_files
+      set file_id, "*_S.bam" into filtered_bam_files_single
     script:
   """
   samtools view -@ ${task.cpus} -hb ${bam} -F 0x4 > ${file_id}_S.bam
   """
   }
+
+  process sort_bam_single {
+    tag "$file_id"
+    publishDir "results/training/bams/", mode: 'copy'
+    cpus 4
+
+    input:
+      set file_id, file(bam) from filtered_bam_files_single
+
+    output:
+      set file_id, "*_sorted.bam" into sorted_bam_files_single
+
+    script:
+  """
+  samtools sort -@ ${task.cpus} -O BAM -o ${file_id}_sorted.bam ${bam}
+  """
+  }
+
+  process index_bam_single {
+    tag "$file_id"
+    publishDir "results/training/bams/", mode: 'copy'
+
+    input:
+      set file_id, file(bam) from sorted_bam_files_single
+
+    output:
+      set file_id, "*.bam*" into indexed_bam_file_single
+
+    script:
+  """
+  samtools index ${bam}
+  """
+  }
 }
+
-- 
GitLab