From 59b0f72d0c712a7bbbb041b3d4dc695065072521 Mon Sep 17 00:00:00 2001
From: Laurent Modolo <laurent.modolo@ens-lyon.fr>
Date: Fri, 29 Mar 2019 13:27:06 +0100
Subject: [PATCH] hisat2: merge samtools and hisat2 in the process

---
 src/nf_modules/hisat2/bam_converter.nf  |  32 ------
 src/nf_modules/hisat2/hisat2.config     |  54 ----------
 src/nf_modules/hisat2/hisat2.nf         | 138 ------------------------
 src/nf_modules/hisat2/indexing.config   |   4 +-
 src/nf_modules/hisat2/indexing.nf       |   2 +-
 src/nf_modules/hisat2/mapping_paired.nf |  17 ++-
 src/nf_modules/hisat2/mapping_single.nf |  17 ++-
 src/nf_modules/hisat2/tests.sh          |  44 +++++---
 8 files changed, 62 insertions(+), 246 deletions(-)
 delete mode 100644 src/nf_modules/hisat2/bam_converter.nf
 delete mode 100644 src/nf_modules/hisat2/hisat2.config
 delete mode 100644 src/nf_modules/hisat2/hisat2.nf

diff --git a/src/nf_modules/hisat2/bam_converter.nf b/src/nf_modules/hisat2/bam_converter.nf
deleted file mode 100644
index f9627eb0..00000000
--- a/src/nf_modules/hisat2/bam_converter.nf
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
-* SAMtools :
-* Imputs : sam files
-* Output : bam files
-*/
-
-/*                      sam to bam                                    */
-params.sam = "$baseDir/data/bam/*.sam"
-
-log.info "sam files : ${params.sam}"
-
-Channel
-  .fromPath( params.sam )
-  .ifEmpty { error "Cannot find any sam files matching: ${params.sam}" }
-  .set { sam_files }
-
-process bam_converter {
-  tag "$sam"
-  cpus 4
-  publishDir "results/mapping/bam/", mode: 'copy'
-
-  input:
-    file sam from sam_files
-
-  output:
-    file "*.bam" into bam_files
-
-  script:
-"""
-samtools view -@ ${task.cpus} -bS ${sam} > ${sam.baseName}.bam
-"""
-}
diff --git a/src/nf_modules/hisat2/hisat2.config b/src/nf_modules/hisat2/hisat2.config
deleted file mode 100644
index c2988690..00000000
--- a/src/nf_modules/hisat2/hisat2.config
+++ /dev/null
@@ -1,54 +0,0 @@
-profiles {
-  docker {
-    docker.temp = 'auto'
-    docker.enabled = true
-    process {
-      $index_fasta {
-        container = "hisat2:2.0.0"
-      }
-      $mapping_fastq {
-        container = "hisat2:2.0.0"
-      }
-      $bam_converter {
-        container = "samtools:1.7"
-      }
-    }
-  }
-  sge {
-    process{
-      $index_fasta {
-        beforeScript = "module purge; module load Hisat2/2.0.0"
-        executor = "sge"
-        cpus = 1
-        memory = "5GB"
-        time = "6h"
-        queueSize = 1000
-        pollInterval = '60sec'
-        queue = 'h6-E5-2667v4deb128'
-        penv = 'openmp8'
-      }
-      $mapping_fastq {
-        beforeScript = "module purge; module load Hisat2/2.0.0"
-        executor = "sge"
-        cpus = 4
-        memory = "5GB"
-        time = "6h"
-        queueSize = 1000
-        pollInterval = '60sec'
-        queue = 'h6-E5-2667v4deb128'
-        penv = 'openmp8'
-      }
-      $bam_converter {
-        beforeScript = "module purge; module load SAMtools/1.5"
-        executor = "sge"
-        cpus = 4
-        memory = "5GB"
-        time = "6h"
-        queueSize = 1000
-        pollInterval = '60sec'
-        queue = 'h6-E5-2667v4deb128'
-        penv = 'openmp8'
-      }
-    }
-  }
-}
diff --git a/src/nf_modules/hisat2/hisat2.nf b/src/nf_modules/hisat2/hisat2.nf
deleted file mode 100644
index e8d5eb63..00000000
--- a/src/nf_modules/hisat2/hisat2.nf
+++ /dev/null
@@ -1,138 +0,0 @@
-/*
-* Hisat2 :
-* Imputs : fastq files
-* Imputs : fasta files
-* Output : bam files
-*/
-
-/*                      fasta indexing                                     */
-params.fasta = "$baseDir/data/bam/*.fasta"
-
-log.info "fasta files : ${params.fasta}"
-
-Channel
-  .fromPath( params.fasta )
-  .ifEmpty { error "Cannot find any fasta files matching: ${params.fasta}" }
-  .set { fasta_file }
-
-process index_fasta {
-  tag "$fasta.baseName"
-  publishDir "results/mapping/index/", mode: 'copy'
-
-  input:
-    file fasta from fasta_file
-
-  output:
-    file "*.index*" into index_files
-
-  script:
-"""
-hisat2-build ${fasta} ${fasta.baseName}.index
-"""
-}
-
-/*
-* for single-end data
-*/
-
-params.fastq = "$baseDir/data/fastq/*.fastq"
-params.index = "$baseDir/data/index/*.index*"
-
-log.info "fastq files : ${params.fastq}"
-log.info "index files : ${params.index}"
-
-Channel
-  .fromPath( params.fastq )
-  .ifEmpty { error "Cannot find any fastq files matching: ${params.fastq}" }
-  .set { fastq_files }
-Channel
-  .fromPath( params.index )
-  .ifEmpty { error "Cannot find any index files matching: ${params.index}" }
-  .set { index_files }
-
-process mapping_fastq {
-  tag "$reads.baseName"
-  cpus 4
-  publishDir "results/mapping/", mode: 'copy'
-
-  input:
-  file reads from fastq_files
-  file index from index_files.toList()
-
-  output:
-  file "*" into count_files
-
-  script:
-"""
-hisat2 -x ${file(file(index[0]).baseName).baseName} -U ${reads} -S ${reads.baseName}.sam -p ${task.cpus}
-"""
-}
-
-/*
-* for paired-end data
-*/
-
-params.fastq = "$baseDir/data/fastq/*_{1,2}.fastq"
-params.index = "$baseDir/data/index/*.index.*"
-
-log.info "fastq files : ${params.fastq}"
-log.info "index files : ${params.index}"
-
-Channel
-  .fromFilePairs( params.fastq )
-  .ifEmpty { error "Cannot find any fastq files matching: ${params.fastq}" }
-  .set { fastq_files }
-Channel
-  .fromPath( params.index )
-  .ifEmpty { error "Cannot find any index files matching: ${params.index}" }
-  .set { index_files }
-
-process mapping_fastq {
-  tag "$reads"
-  //tag "$index.baseName"
-  cpus 4
-  publishDir "results/mapping/", mode: 'copy'
-
-  input:
-  set pair_id, file(reads) from fastq_files
-  file index from index_files.toList()
-
-  output:
-  file "*" into counts_files
-
-  script:
-"""
-hisat2 -x ${file(file(index[0]).baseName).baseName} -1 ${reads[0]} -2 ${reads[1]} -S ${pair_id}.sam -p ${task.cpus}
-"""
-}
-
-/*
-* converting sam into bam
-*/
-
-/*                      sam to bam                                    */
-params.sam = "$baseDir/data/bam/*.sam"
-
-log.info "sam files : ${params.sam}"
-
-Channel
-  .fromPath( params.sam )
-  .ifEmpty { error "Cannot find any sam files matching: ${params.sam}" }
-  .set { sam_files }
-
-process bam_converter {
-  tag "$sam"
-  cpus 4
-  publishDir "results/mapping/bam/", mode: 'copy'
-
-  input:
-    file sam from sam_files
-
-  output:
-    file "*.bam" into bam_files
-
-  script:
-"""
-samtools view -@ ${task.cpus} -bS ${sam} > ${sam.baseName}.bam
-"""
-}
diff --git a/src/nf_modules/hisat2/indexing.config b/src/nf_modules/hisat2/indexing.config
index dbbb4a2e..47c14a57 100644
--- a/src/nf_modules/hisat2/indexing.config
+++ b/src/nf_modules/hisat2/indexing.config
@@ -4,8 +4,8 @@ profiles {
     docker.enabled = true
     process {
       withName: index_fasta {
-        cpus = 4
         container = "hisat2:2.0.0"
+        cpus = 4
       }
     }
   }
@@ -18,7 +18,7 @@ profiles {
       }
     }
   }
-  sge {
+  psmn {
     process{
       withName: index_fasta {
         beforeScript = "source /usr/share/lmod/lmod/init/bash; module use ~/privatemodules"
diff --git a/src/nf_modules/hisat2/indexing.nf b/src/nf_modules/hisat2/indexing.nf
index 563a13c2..1b11b3ef 100644
--- a/src/nf_modules/hisat2/indexing.nf
+++ b/src/nf_modules/hisat2/indexing.nf
@@ -27,6 +27,6 @@ process index_fasta {
 
   script:
 """
-hisat2-build -p {task.cpus} ${fasta} ${fasta.baseName}.index
+hisat2-build -p ${task.cpus} ${fasta} ${fasta.baseName}.index
 """
 }
diff --git a/src/nf_modules/hisat2/mapping_paired.nf b/src/nf_modules/hisat2/mapping_paired.nf
index d7bf9696..28b37e00 100644
--- a/src/nf_modules/hisat2/mapping_paired.nf
+++ b/src/nf_modules/hisat2/mapping_paired.nf
@@ -23,15 +23,26 @@ process mapping_fastq {
 
   output:
   file "*" into counts_files
+  set pair_id, "*.bam" into bam_files
+  file "*_report.txt" into mapping_report
 
   script:
   index_id = index[0]
   for (index_file in index) {
-    if (index_file =~ /.*\.1\.bt2/ && !(index_file =~ /.*\.rev\.1\.bt2/)) {
-        index_id = ( index_file =~ /(.*)\.1\.bt2/)[0][1]
+    if (index_file =~ /.*\.1\.ht2/ && !(index_file =~ /.*\.rev\.1\.ht2/)) {
+        index_id = ( index_file =~ /(.*)\.1\.ht2/)[0][1]
     }
   }
 """
-hisat2 -x ${index_id} -1 ${reads[0]} -2 ${reads[1]} -S ${pair_id}.sam -p ${task.cpus}
+hisat2 -p ${task.cpus} \
+  -x ${index_id} \
+  -1 ${reads[0]} \
+  -2 ${reads[1]} 2> \
+${pair_id}_hisat2_report.txt | \
+samtools view -Sb - > ${pair_id}.bam
+
+if grep -q "Error" ${pair_id}_hisat2_report.txt; then
+  exit 1
+fi
 """
 }
diff --git a/src/nf_modules/hisat2/mapping_single.nf b/src/nf_modules/hisat2/mapping_single.nf
index 93506140..0fdb729e 100644
--- a/src/nf_modules/hisat2/mapping_single.nf
+++ b/src/nf_modules/hisat2/mapping_single.nf
@@ -28,15 +28,26 @@ process mapping_fastq {
 
   output:
   file "*" into count_files
+  set file_id, "*.bam" into bam_files
+  file "*_report.txt" into mapping_report
 
   script:
   index_id = index[0]
   for (index_file in index) {
-    if (index_file =~ /.*\.1\.bt2/ && !(index_file =~ /.*\.rev\.1\.bt2/)) {
-        index_id = ( index_file =~ /(.*)\.1\.bt2/)[0][1]
+    if (index_file =~ /.*\.1\.ht2/ && !(index_file =~ /.*\.rev\.1\.ht2/)) {
+        index_id = ( index_file =~ /(.*)\.1\.ht2/)[0][1]
     }
   }
 """
-hisat2 -x ${index_id} -U ${reads} -S ${file_id}.sam -p ${task.cpus}
+hisat2 -p ${task.cpus} \
+ -x ${index_id} \
+ -U ${reads} 2> \
+${file_id}_hisat2_report.txt | \
+samtools view -Sb - > ${file_id}.bam
+
+if grep -q "Error" ${file_id}_hisat2_report.txt; then
+  exit 1
+fi
+
 """
 }
diff --git a/src/nf_modules/hisat2/tests.sh b/src/nf_modules/hisat2/tests.sh
index 261e9f2f..50e43966 100755
--- a/src/nf_modules/hisat2/tests.sh
+++ b/src/nf_modules/hisat2/tests.sh
@@ -1,21 +1,39 @@
-nextflow src/nf_modules/Hisat2/test/index.nf \
-  -c src/nf_modules/Hisat2/hisat2.config \
+./nextflow src/nf_modules/hisat2/indexing.nf \
+  -c src/nf_modules/hisat2/indexing.config \
   -profile docker \
-  --fasta "data/tiny_dataset/fasta/tiny_v2.fasta"
+  --fasta "data/tiny_dataset/fasta/tiny_v2.fasta" \
+  -resume
 
-nextflow src/nf_modules/Hisat2/test/mapping_paired.nf \
-  -c src/nf_modules/Hisat2/hisat2.config \
+./nextflow src/nf_modules/hisat2/mapping_paired.nf \
+  -c src/nf_modules/hisat2/mapping_paired.config \
   -profile docker \
   --index "results/mapping/index/tiny_v2.index*" \
-  --fastq "data/tiny_dataset/fastq/tiny*_R{1,2}.fastq"
+  --fastq "data/tiny_dataset/fastq/tiny*_R{1,2}.fastq" \
+  -resume
 
-nextflow src/nf_modules/Hisat2/test/mapping_single.nf \
-  -c src/nf_modules/Hisat2/hisat2.config \
+./nextflow src/nf_modules/hisat2/mapping_single.nf \
+  -c src/nf_modules/hisat2/mapping_single.config \
   -profile docker \
   --index "results/mapping/index/tiny_v2.index*" \
-  --fastq "data/tiny_dataset/fastq/tiny*_S.fastq"
+  --fastq "data/tiny_dataset/fastq/tiny*_S.fastq" \
+  -resume
 
-nextflow src/nf_modules/Hisat2/test/bam_converter.nf \
-  -c src/nf_modules/Hisat2/hisat2.config \
-  -profile docker \
-  --sam "results/mapping/*.sam" \
+if [ -x "$(command -v singularity)" ]; then
+./nextflow src/nf_modules/hisat2/indexing.nf \
+  -c src/nf_modules/hisat2/indexing.config \
+  -profile singularity \
+  --fasta "data/tiny_dataset/fasta/tiny_v2.fasta" \
+  -resume
+
+./nextflow src/nf_modules/hisat2/mapping_paired.nf \
+  -c src/nf_modules/hisat2/mapping_paired.config \
+  -profile singularity \
+  --index "results/mapping/index/tiny_v2.index*" \
+  --fastq "data/tiny_dataset/fastq/tiny*_R{1,2}.fastq"
+
+./nextflow src/nf_modules/hisat2/mapping_single.nf \
+  -c src/nf_modules/hisat2/mapping_single.config \
+  -profile singularity \
+  --index "results/mapping/index/tiny_v2.index*" \
+  --fastq "data/tiny_dataset/fastq/tiny*_S.fastq"
+fi
-- 
GitLab