Skip to content
Snippets Groups Projects
Verified Commit 59b0f72d authored by Laurent Modolo's avatar Laurent Modolo
Browse files

hisat2: merge samtools and hisat2 in the process

parent 58d2e60a
No related branches found
No related tags found
No related merge requests found
/*
* SAMtools :
* Imputs : sam files
* Output : bam files
*/
/* sam to bam */
params.sam = "$baseDir/data/bam/*.sam"
log.info "sam files : ${params.sam}"
Channel
.fromPath( params.sam )
.ifEmpty { error "Cannot find any sam files matching: ${params.sam}" }
.set { sam_files }
process bam_converter {
tag "$sam"
cpus 4
publishDir "results/mapping/bam/", mode: 'copy'
input:
file sam from sam_files
output:
file "*.bam" into bam_files
script:
"""
samtools view -@ ${task.cpus} -bS ${sam} > ${sam.baseName}.bam
"""
}
profiles {
docker {
docker.temp = 'auto'
docker.enabled = true
process {
$index_fasta {
container = "hisat2:2.0.0"
}
$mapping_fastq {
container = "hisat2:2.0.0"
}
$bam_converter {
container = "samtools:1.7"
}
}
}
sge {
process{
$index_fasta {
beforeScript = "module purge; module load Hisat2/2.0.0"
executor = "sge"
cpus = 1
memory = "5GB"
time = "6h"
queueSize = 1000
pollInterval = '60sec'
queue = 'h6-E5-2667v4deb128'
penv = 'openmp8'
}
$mapping_fastq {
beforeScript = "module purge; module load Hisat2/2.0.0"
executor = "sge"
cpus = 4
memory = "5GB"
time = "6h"
queueSize = 1000
pollInterval = '60sec'
queue = 'h6-E5-2667v4deb128'
penv = 'openmp8'
}
$bam_converter {
beforeScript = "module purge; module load SAMtools/1.5"
executor = "sge"
cpus = 4
memory = "5GB"
time = "6h"
queueSize = 1000
pollInterval = '60sec'
queue = 'h6-E5-2667v4deb128'
penv = 'openmp8'
}
}
}
}
/*
* Hisat2 :
* Imputs : fastq files
* Imputs : fasta files
* Output : bam files
*/
/* fasta indexing */
params.fasta = "$baseDir/data/bam/*.fasta"
log.info "fasta files : ${params.fasta}"
Channel
.fromPath( params.fasta )
.ifEmpty { error "Cannot find any fasta files matching: ${params.fasta}" }
.set { fasta_file }
process index_fasta {
tag "$fasta.baseName"
publishDir "results/mapping/index/", mode: 'copy'
input:
file fasta from fasta_file
output:
file "*.index*" into index_files
script:
"""
hisat2-build ${fasta} ${fasta.baseName}.index
"""
}
/*
* for single-end data
*/
params.fastq = "$baseDir/data/fastq/*.fastq"
params.index = "$baseDir/data/index/*.index*"
log.info "fastq files : ${params.fastq}"
log.info "index files : ${params.index}"
Channel
.fromPath( params.fastq )
.ifEmpty { error "Cannot find any fastq files matching: ${params.fastq}" }
.set { fastq_files }
Channel
.fromPath( params.index )
.ifEmpty { error "Cannot find any index files matching: ${params.index}" }
.set { index_files }
process mapping_fastq {
tag "$reads.baseName"
cpus 4
publishDir "results/mapping/", mode: 'copy'
input:
file reads from fastq_files
file index from index_files.toList()
output:
file "*" into count_files
script:
"""
hisat2 -x ${file(file(index[0]).baseName).baseName} -U ${reads} -S ${reads.baseName}.sam -p ${task.cpus}
"""
}
/*
* for paired-end data
*/
params.fastq = "$baseDir/data/fastq/*_{1,2}.fastq"
params.index = "$baseDir/data/index/*.index.*"
log.info "fastq files : ${params.fastq}"
log.info "index files : ${params.index}"
Channel
.fromFilePairs( params.fastq )
.ifEmpty { error "Cannot find any fastq files matching: ${params.fastq}" }
.set { fastq_files }
Channel
.fromPath( params.index )
.ifEmpty { error "Cannot find any index files matching: ${params.index}" }
.set { index_files }
process mapping_fastq {
tag "$reads"
//tag "$index.baseName"
cpus 4
publishDir "results/mapping/", mode: 'copy'
input:
set pair_id, file(reads) from fastq_files
file index from index_files.toList()
output:
file "*" into counts_files
script:
"""
hisat2 -x ${file(file(index[0]).baseName).baseName} -1 ${reads[0]} -2 ${reads[1]} -S ${pair_id}.sam -p ${task.cpus}
"""
}
/*
* converting sam into bam
*/
/* sam to bam */
params.sam = "$baseDir/data/bam/*.sam"
log.info "sam files : ${params.sam}"
Channel
.fromPath( params.sam )
.ifEmpty { error "Cannot find any sam files matching: ${params.sam}" }
.set { sam_files }
process bam_converter {
tag "$sam"
cpus 4
publishDir "results/mapping/bam/", mode: 'copy'
input:
file sam from sam_files
output:
file "*.bam" into bam_files
script:
"""
samtools view -@ ${task.cpus} -bS ${sam} > ${sam.baseName}.bam
"""
}
...@@ -4,8 +4,8 @@ profiles { ...@@ -4,8 +4,8 @@ profiles {
docker.enabled = true docker.enabled = true
process { process {
withName: index_fasta { withName: index_fasta {
cpus = 4
container = "hisat2:2.0.0" container = "hisat2:2.0.0"
cpus = 4
} }
} }
} }
...@@ -18,7 +18,7 @@ profiles { ...@@ -18,7 +18,7 @@ profiles {
} }
} }
} }
sge { psmn {
process{ process{
withName: index_fasta { withName: index_fasta {
beforeScript = "source /usr/share/lmod/lmod/init/bash; module use ~/privatemodules" beforeScript = "source /usr/share/lmod/lmod/init/bash; module use ~/privatemodules"
......
...@@ -27,6 +27,6 @@ process index_fasta { ...@@ -27,6 +27,6 @@ process index_fasta {
script: script:
""" """
hisat2-build -p {task.cpus} ${fasta} ${fasta.baseName}.index hisat2-build -p ${task.cpus} ${fasta} ${fasta.baseName}.index
""" """
} }
...@@ -23,15 +23,26 @@ process mapping_fastq { ...@@ -23,15 +23,26 @@ process mapping_fastq {
output: output:
file "*" into counts_files file "*" into counts_files
set pair_id, "*.bam" into bam_files
file "*_report.txt" into mapping_report
script: script:
index_id = index[0] index_id = index[0]
for (index_file in index) { for (index_file in index) {
if (index_file =~ /.*\.1\.bt2/ && !(index_file =~ /.*\.rev\.1\.bt2/)) { if (index_file =~ /.*\.1\.ht2/ && !(index_file =~ /.*\.rev\.1\.ht2/)) {
index_id = ( index_file =~ /(.*)\.1\.bt2/)[0][1] index_id = ( index_file =~ /(.*)\.1\.ht2/)[0][1]
} }
} }
""" """
hisat2 -x ${index_id} -1 ${reads[0]} -2 ${reads[1]} -S ${pair_id}.sam -p ${task.cpus} hisat2 -p ${task.cpus} \
-x ${index_id} \
-1 ${reads[0]} \
-2 ${reads[1]} 2> \
${pair_id}_hisat2_report.txt | \
samtools view -Sb - > ${pair_id}.bam
if grep -q "Error" ${pair_id}_hisat2_report.txt; then
exit 1
fi
""" """
} }
...@@ -28,15 +28,26 @@ process mapping_fastq { ...@@ -28,15 +28,26 @@ process mapping_fastq {
output: output:
file "*" into count_files file "*" into count_files
set file_id, "*.bam" into bam_files
file "*_report.txt" into mapping_report
script: script:
index_id = index[0] index_id = index[0]
for (index_file in index) { for (index_file in index) {
if (index_file =~ /.*\.1\.bt2/ && !(index_file =~ /.*\.rev\.1\.bt2/)) { if (index_file =~ /.*\.1\.ht2/ && !(index_file =~ /.*\.rev\.1\.ht2/)) {
index_id = ( index_file =~ /(.*)\.1\.bt2/)[0][1] index_id = ( index_file =~ /(.*)\.1\.ht2/)[0][1]
} }
} }
""" """
hisat2 -x ${index_id} -U ${reads} -S ${file_id}.sam -p ${task.cpus} hisat2 -p ${task.cpus} \
-x ${index_id} \
-U ${reads} 2> \
${file_id}_hisat2_report.txt | \
samtools view -Sb - > ${file_id}.bam
if grep -q "Error" ${file_id}_hisat2_report.txt; then
exit 1
fi
""" """
} }
nextflow src/nf_modules/Hisat2/test/index.nf \ ./nextflow src/nf_modules/hisat2/indexing.nf \
-c src/nf_modules/Hisat2/hisat2.config \ -c src/nf_modules/hisat2/indexing.config \
-profile docker \ -profile docker \
--fasta "data/tiny_dataset/fasta/tiny_v2.fasta" --fasta "data/tiny_dataset/fasta/tiny_v2.fasta" \
-resume
nextflow src/nf_modules/Hisat2/test/mapping_paired.nf \ ./nextflow src/nf_modules/hisat2/mapping_paired.nf \
-c src/nf_modules/Hisat2/hisat2.config \ -c src/nf_modules/hisat2/mapping_paired.config \
-profile docker \ -profile docker \
--index "results/mapping/index/tiny_v2.index*" \ --index "results/mapping/index/tiny_v2.index*" \
--fastq "data/tiny_dataset/fastq/tiny*_R{1,2}.fastq" --fastq "data/tiny_dataset/fastq/tiny*_R{1,2}.fastq" \
-resume
nextflow src/nf_modules/Hisat2/test/mapping_single.nf \ ./nextflow src/nf_modules/hisat2/mapping_single.nf \
-c src/nf_modules/Hisat2/hisat2.config \ -c src/nf_modules/hisat2/mapping_single.config \
-profile docker \ -profile docker \
--index "results/mapping/index/tiny_v2.index*" \ --index "results/mapping/index/tiny_v2.index*" \
--fastq "data/tiny_dataset/fastq/tiny*_S.fastq" --fastq "data/tiny_dataset/fastq/tiny*_S.fastq" \
-resume
nextflow src/nf_modules/Hisat2/test/bam_converter.nf \ if [ -x "$(command -v singularity)" ]; then
-c src/nf_modules/Hisat2/hisat2.config \ ./nextflow src/nf_modules/hisat2/indexing.nf \
-profile docker \ -c src/nf_modules/hisat2/indexing.config \
--sam "results/mapping/*.sam" \ -profile singularity \
--fasta "data/tiny_dataset/fasta/tiny_v2.fasta" \
-resume
./nextflow src/nf_modules/hisat2/mapping_paired.nf \
-c src/nf_modules/hisat2/mapping_paired.config \
-profile singularity \
--index "results/mapping/index/tiny_v2.index*" \
--fastq "data/tiny_dataset/fastq/tiny*_R{1,2}.fastq"
./nextflow src/nf_modules/hisat2/mapping_single.nf \
-c src/nf_modules/hisat2/mapping_single.config \
-profile singularity \
--index "results/mapping/index/tiny_v2.index*" \
--fastq "data/tiny_dataset/fastq/tiny*_S.fastq"
fi
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment