From 59b0f72d0c712a7bbbb041b3d4dc695065072521 Mon Sep 17 00:00:00 2001 From: Laurent Modolo <laurent.modolo@ens-lyon.fr> Date: Fri, 29 Mar 2019 13:27:06 +0100 Subject: [PATCH] hisat2: merge samtools and hisat2 in the process --- src/nf_modules/hisat2/bam_converter.nf | 32 ------ src/nf_modules/hisat2/hisat2.config | 54 ---------- src/nf_modules/hisat2/hisat2.nf | 138 ------------------------ src/nf_modules/hisat2/indexing.config | 4 +- src/nf_modules/hisat2/indexing.nf | 2 +- src/nf_modules/hisat2/mapping_paired.nf | 17 ++- src/nf_modules/hisat2/mapping_single.nf | 17 ++- src/nf_modules/hisat2/tests.sh | 44 +++++--- 8 files changed, 62 insertions(+), 246 deletions(-) delete mode 100644 src/nf_modules/hisat2/bam_converter.nf delete mode 100644 src/nf_modules/hisat2/hisat2.config delete mode 100644 src/nf_modules/hisat2/hisat2.nf diff --git a/src/nf_modules/hisat2/bam_converter.nf b/src/nf_modules/hisat2/bam_converter.nf deleted file mode 100644 index f9627eb0..00000000 --- a/src/nf_modules/hisat2/bam_converter.nf +++ /dev/null @@ -1,32 +0,0 @@ -/* -* SAMtools : -* Imputs : sam files -* Output : bam files -*/ - -/* sam to bam */ -params.sam = "$baseDir/data/bam/*.sam" - -log.info "sam files : ${params.sam}" - -Channel - .fromPath( params.sam ) - .ifEmpty { error "Cannot find any sam files matching: ${params.sam}" } - .set { sam_files } - -process bam_converter { - tag "$sam" - cpus 4 - publishDir "results/mapping/bam/", mode: 'copy' - - input: - file sam from sam_files - - output: - file "*.bam" into bam_files - - script: -""" -samtools view -@ ${task.cpus} -bS ${sam} > ${sam.baseName}.bam -""" -} diff --git a/src/nf_modules/hisat2/hisat2.config b/src/nf_modules/hisat2/hisat2.config deleted file mode 100644 index c2988690..00000000 --- a/src/nf_modules/hisat2/hisat2.config +++ /dev/null @@ -1,54 +0,0 @@ -profiles { - docker { - docker.temp = 'auto' - docker.enabled = true - process { - $index_fasta { - container = "hisat2:2.0.0" - } - $mapping_fastq { - container = "hisat2:2.0.0" - } - $bam_converter { - container = "samtools:1.7" - } - } - } - sge { - process{ - $index_fasta { - beforeScript = "module purge; module load Hisat2/2.0.0" - executor = "sge" - cpus = 1 - memory = "5GB" - time = "6h" - queueSize = 1000 - pollInterval = '60sec' - queue = 'h6-E5-2667v4deb128' - penv = 'openmp8' - } - $mapping_fastq { - beforeScript = "module purge; module load Hisat2/2.0.0" - executor = "sge" - cpus = 4 - memory = "5GB" - time = "6h" - queueSize = 1000 - pollInterval = '60sec' - queue = 'h6-E5-2667v4deb128' - penv = 'openmp8' - } - $bam_converter { - beforeScript = "module purge; module load SAMtools/1.5" - executor = "sge" - cpus = 4 - memory = "5GB" - time = "6h" - queueSize = 1000 - pollInterval = '60sec' - queue = 'h6-E5-2667v4deb128' - penv = 'openmp8' - } - } - } -} diff --git a/src/nf_modules/hisat2/hisat2.nf b/src/nf_modules/hisat2/hisat2.nf deleted file mode 100644 index e8d5eb63..00000000 --- a/src/nf_modules/hisat2/hisat2.nf +++ /dev/null @@ -1,138 +0,0 @@ -/* -* Hisat2 : -* Imputs : fastq files -* Imputs : fasta files -* Output : bam files -*/ - -/* fasta indexing */ -params.fasta = "$baseDir/data/bam/*.fasta" - -log.info "fasta files : ${params.fasta}" - -Channel - .fromPath( params.fasta ) - .ifEmpty { error "Cannot find any fasta files matching: ${params.fasta}" } - .set { fasta_file } - -process index_fasta { - tag "$fasta.baseName" - publishDir "results/mapping/index/", mode: 'copy' - - input: - file fasta from fasta_file - - output: - file "*.index*" into index_files - - script: -""" -hisat2-build ${fasta} ${fasta.baseName}.index -""" -} - -/* -* for single-end data -*/ - -params.fastq = "$baseDir/data/fastq/*.fastq" -params.index = "$baseDir/data/index/*.index*" - -log.info "fastq files : ${params.fastq}" -log.info "index files : ${params.index}" - -Channel - .fromPath( params.fastq ) - .ifEmpty { error "Cannot find any fastq files matching: ${params.fastq}" } - .set { fastq_files } -Channel - .fromPath( params.index ) - .ifEmpty { error "Cannot find any index files matching: ${params.index}" } - .set { index_files } - -process mapping_fastq { - tag "$reads.baseName" - cpus 4 - publishDir "results/mapping/", mode: 'copy' - - input: - file reads from fastq_files - file index from index_files.toList() - - output: - file "*" into count_files - - script: -""" -hisat2 -x ${file(file(index[0]).baseName).baseName} -U ${reads} -S ${reads.baseName}.sam -p ${task.cpus} -""" -} - -/* -* for paired-end data -*/ - -params.fastq = "$baseDir/data/fastq/*_{1,2}.fastq" -params.index = "$baseDir/data/index/*.index.*" - -log.info "fastq files : ${params.fastq}" -log.info "index files : ${params.index}" - -Channel - .fromFilePairs( params.fastq ) - .ifEmpty { error "Cannot find any fastq files matching: ${params.fastq}" } - .set { fastq_files } -Channel - .fromPath( params.index ) - .ifEmpty { error "Cannot find any index files matching: ${params.index}" } - .set { index_files } - -process mapping_fastq { - tag "$reads" - //tag "$index.baseName" - cpus 4 - publishDir "results/mapping/", mode: 'copy' - - input: - set pair_id, file(reads) from fastq_files - file index from index_files.toList() - - output: - file "*" into counts_files - - script: -""" -hisat2 -x ${file(file(index[0]).baseName).baseName} -1 ${reads[0]} -2 ${reads[1]} -S ${pair_id}.sam -p ${task.cpus} -""" -} - -/* -* converting sam into bam -*/ - -/* sam to bam */ -params.sam = "$baseDir/data/bam/*.sam" - -log.info "sam files : ${params.sam}" - -Channel - .fromPath( params.sam ) - .ifEmpty { error "Cannot find any sam files matching: ${params.sam}" } - .set { sam_files } - -process bam_converter { - tag "$sam" - cpus 4 - publishDir "results/mapping/bam/", mode: 'copy' - - input: - file sam from sam_files - - output: - file "*.bam" into bam_files - - script: -""" -samtools view -@ ${task.cpus} -bS ${sam} > ${sam.baseName}.bam -""" -} diff --git a/src/nf_modules/hisat2/indexing.config b/src/nf_modules/hisat2/indexing.config index dbbb4a2e..47c14a57 100644 --- a/src/nf_modules/hisat2/indexing.config +++ b/src/nf_modules/hisat2/indexing.config @@ -4,8 +4,8 @@ profiles { docker.enabled = true process { withName: index_fasta { - cpus = 4 container = "hisat2:2.0.0" + cpus = 4 } } } @@ -18,7 +18,7 @@ profiles { } } } - sge { + psmn { process{ withName: index_fasta { beforeScript = "source /usr/share/lmod/lmod/init/bash; module use ~/privatemodules" diff --git a/src/nf_modules/hisat2/indexing.nf b/src/nf_modules/hisat2/indexing.nf index 563a13c2..1b11b3ef 100644 --- a/src/nf_modules/hisat2/indexing.nf +++ b/src/nf_modules/hisat2/indexing.nf @@ -27,6 +27,6 @@ process index_fasta { script: """ -hisat2-build -p {task.cpus} ${fasta} ${fasta.baseName}.index +hisat2-build -p ${task.cpus} ${fasta} ${fasta.baseName}.index """ } diff --git a/src/nf_modules/hisat2/mapping_paired.nf b/src/nf_modules/hisat2/mapping_paired.nf index d7bf9696..28b37e00 100644 --- a/src/nf_modules/hisat2/mapping_paired.nf +++ b/src/nf_modules/hisat2/mapping_paired.nf @@ -23,15 +23,26 @@ process mapping_fastq { output: file "*" into counts_files + set pair_id, "*.bam" into bam_files + file "*_report.txt" into mapping_report script: index_id = index[0] for (index_file in index) { - if (index_file =~ /.*\.1\.bt2/ && !(index_file =~ /.*\.rev\.1\.bt2/)) { - index_id = ( index_file =~ /(.*)\.1\.bt2/)[0][1] + if (index_file =~ /.*\.1\.ht2/ && !(index_file =~ /.*\.rev\.1\.ht2/)) { + index_id = ( index_file =~ /(.*)\.1\.ht2/)[0][1] } } """ -hisat2 -x ${index_id} -1 ${reads[0]} -2 ${reads[1]} -S ${pair_id}.sam -p ${task.cpus} +hisat2 -p ${task.cpus} \ + -x ${index_id} \ + -1 ${reads[0]} \ + -2 ${reads[1]} 2> \ +${pair_id}_hisat2_report.txt | \ +samtools view -Sb - > ${pair_id}.bam + +if grep -q "Error" ${pair_id}_hisat2_report.txt; then + exit 1 +fi """ } diff --git a/src/nf_modules/hisat2/mapping_single.nf b/src/nf_modules/hisat2/mapping_single.nf index 93506140..0fdb729e 100644 --- a/src/nf_modules/hisat2/mapping_single.nf +++ b/src/nf_modules/hisat2/mapping_single.nf @@ -28,15 +28,26 @@ process mapping_fastq { output: file "*" into count_files + set file_id, "*.bam" into bam_files + file "*_report.txt" into mapping_report script: index_id = index[0] for (index_file in index) { - if (index_file =~ /.*\.1\.bt2/ && !(index_file =~ /.*\.rev\.1\.bt2/)) { - index_id = ( index_file =~ /(.*)\.1\.bt2/)[0][1] + if (index_file =~ /.*\.1\.ht2/ && !(index_file =~ /.*\.rev\.1\.ht2/)) { + index_id = ( index_file =~ /(.*)\.1\.ht2/)[0][1] } } """ -hisat2 -x ${index_id} -U ${reads} -S ${file_id}.sam -p ${task.cpus} +hisat2 -p ${task.cpus} \ + -x ${index_id} \ + -U ${reads} 2> \ +${file_id}_hisat2_report.txt | \ +samtools view -Sb - > ${file_id}.bam + +if grep -q "Error" ${file_id}_hisat2_report.txt; then + exit 1 +fi + """ } diff --git a/src/nf_modules/hisat2/tests.sh b/src/nf_modules/hisat2/tests.sh index 261e9f2f..50e43966 100755 --- a/src/nf_modules/hisat2/tests.sh +++ b/src/nf_modules/hisat2/tests.sh @@ -1,21 +1,39 @@ -nextflow src/nf_modules/Hisat2/test/index.nf \ - -c src/nf_modules/Hisat2/hisat2.config \ +./nextflow src/nf_modules/hisat2/indexing.nf \ + -c src/nf_modules/hisat2/indexing.config \ -profile docker \ - --fasta "data/tiny_dataset/fasta/tiny_v2.fasta" + --fasta "data/tiny_dataset/fasta/tiny_v2.fasta" \ + -resume -nextflow src/nf_modules/Hisat2/test/mapping_paired.nf \ - -c src/nf_modules/Hisat2/hisat2.config \ +./nextflow src/nf_modules/hisat2/mapping_paired.nf \ + -c src/nf_modules/hisat2/mapping_paired.config \ -profile docker \ --index "results/mapping/index/tiny_v2.index*" \ - --fastq "data/tiny_dataset/fastq/tiny*_R{1,2}.fastq" + --fastq "data/tiny_dataset/fastq/tiny*_R{1,2}.fastq" \ + -resume -nextflow src/nf_modules/Hisat2/test/mapping_single.nf \ - -c src/nf_modules/Hisat2/hisat2.config \ +./nextflow src/nf_modules/hisat2/mapping_single.nf \ + -c src/nf_modules/hisat2/mapping_single.config \ -profile docker \ --index "results/mapping/index/tiny_v2.index*" \ - --fastq "data/tiny_dataset/fastq/tiny*_S.fastq" + --fastq "data/tiny_dataset/fastq/tiny*_S.fastq" \ + -resume -nextflow src/nf_modules/Hisat2/test/bam_converter.nf \ - -c src/nf_modules/Hisat2/hisat2.config \ - -profile docker \ - --sam "results/mapping/*.sam" \ +if [ -x "$(command -v singularity)" ]; then +./nextflow src/nf_modules/hisat2/indexing.nf \ + -c src/nf_modules/hisat2/indexing.config \ + -profile singularity \ + --fasta "data/tiny_dataset/fasta/tiny_v2.fasta" \ + -resume + +./nextflow src/nf_modules/hisat2/mapping_paired.nf \ + -c src/nf_modules/hisat2/mapping_paired.config \ + -profile singularity \ + --index "results/mapping/index/tiny_v2.index*" \ + --fastq "data/tiny_dataset/fastq/tiny*_R{1,2}.fastq" + +./nextflow src/nf_modules/hisat2/mapping_single.nf \ + -c src/nf_modules/hisat2/mapping_single.config \ + -profile singularity \ + --index "results/mapping/index/tiny_v2.index*" \ + --fastq "data/tiny_dataset/fastq/tiny*_S.fastq" +fi -- GitLab