Skip to content
Snippets Groups Projects
Commit f2ed7699 authored by elabaron's avatar elabaron
Browse files

add dual_mapping nf scripts

parent dedb0cba
No related branches found
No related tags found
No related merge requests found
profiles {
sge {
process{
withName: index_fasta_hisat {
beforeScript = "source /usr/share/lmod/lmod/init/bash; module use ~/privatemodules"
module = "hisat2/2.1.0:samtools/1.7"
executor = "sge"
clusterOptions = "-cwd -V"
memory = "20GB"
cpus = 16
time = "12h"
queue = 'E5-2670deb128A,E5-2670deb128B,E5-2670deb128C,E5-2670deb128D,E5-2670deb128E,E5-2670deb128F'
penv = 'openmp16'
}
withName: index_fasta_bowtie {
beforeScript = "source $baseDir/.conda_psmn.sh"
conda = "$baseDir/.conda_envs/bowtie_1.2.2"
executor = "sge"
clusterOptions = "-cwd -V"
memory = "20GB"
cpus = 16
time = "12h"
queue = 'E5-2670deb128A,E5-2670deb128B,E5-2670deb128C,E5-2670deb128D,E5-2670deb128E,E5-2670deb128F'
penv = 'openmp16'
}
withName: hisat2 {
beforeScript = "source /usr/share/lmod/lmod/init/bash; module use ~/privatemodules"
module = "hisat2/2.1.0:samtools/1.7"
executor = "sge"
clusterOptions = "-cwd -V"
memory = "20GB"
cpus = 16
time = "12h"
queue = 'E5-2670deb128A,E5-2670deb128B,E5-2670deb128C,E5-2670deb128D,E5-2670deb128E,E5-2670deb128F'
penv = 'openmp16'
}
withName: sort_bam {
beforeScript = "source $baseDir/.conda_psmn.sh"
conda = "$baseDir/.conda_envs/samtools_1.7"
executor = "sge"
clusterOptions = "-cwd -V"
cpus = 1
memory = "20GB"
time = "12h"
queue = 'monointeldeb128,monointeldeb48,h48-E5-2670deb128,h6-E5-2667v4deb128'
}
withName: merge_bam {
beforeScript = "source $baseDir/.conda_psmn.sh"
conda = "$baseDir/.conda_envs/samtools_1.7"
executor = "sge"
clusterOptions = "-cwd -V"
cpus = 16
memory = "30GB"
time = "24h"
queue = 'E5-2670deb128A,E5-2670deb128B,E5-2670deb128C,E5-2670deb128D,E5-2670deb128E,E5-2670deb128F'
penv = 'openmp16'
}
}
}
docker {
docker.temp = 'auto'
docker.enabled = true
process {
withName: index_fasta_hisat {
cpus = 4
container = "lbmc/hisat2:2.1.0"
}
withName: index_fasta_bowtie {
cpus = 4
container = "lbmc/bowtie:1.2.2"
}
withName: hisat2 {
cpus = 4
container = "lbmc/hisat2:2.1.0"
}
withName: bowtie {
cpus = 4
container = "lbmc/bowtie:1.2.2"
}
withName: merge_bam {
container = "lbmc/samtools:1.7"
cpus = 4
}
}
}
}
///////////////////////////////////////////////////////////////////////////////
////////////////////////////// INDEXING GENOMES /////////////////////////////
///////////////////////////////////////////////////////////////////////////////
params.fasta = "data/genome/NC001802.1.fa"
log.info "fasta files : ${params.fasta}"
Channel
.fromPath( params.fasta )
.ifEmpty { error "Cannot find any fasta files matching: ${params.fasta}" }
.set { fasta_file }
fasta_file.into{fasta_hisat ; fasta_bowtie}
/* HISAT */
process index_fasta_hisat {
tag "$fasta.baseName"
publishDir "data/indexes/${fasta.baseName}_hisat/", mode: 'copy'
input:
file fasta from fasta_hisat
output:
file "*ht2" into index_files_hisat
script:
"""
hisat2-build -p ${task.cpus} ${fasta} ${fasta.baseName}
"""
}
/* BOWTIE 1 */
process index_fasta_bowtie {
tag "$fasta.baseName"
publishDir "results/${fasta.baseName}_bowtie/", mode: 'copy'
input:
file fasta from fasta_bowtie
output:
file "*ebwt" into index_files_bowtie
file "*_report.txt" into indexing_report
script:
"""
bowtie-build --threads ${task.cpus} -f ${fasta} ${fasta.baseName} \
&> ${fasta.baseName}_bowtie_report.txt
if grep -q "Error" ${fasta.baseName}_bowtie_report.txt; then
exit 1
fi
"""
}
///////////////////////////////////////////////////////////////////////////////
////////////////////////////// MAPPING GENOMES /////////////////////////////
///////////////////////////////////////////////////////////////////////////////
params.fastq = "$baseDir/data/fastq/*.fastq"
// params.index = "$baseDir/data/index/*.index*"
log.info "fastq files : ${params.fastq}"
// log.info "index files : ${params.index}"
Channel
.fromPath( params.fastq )
.ifEmpty { error "Cannot find any fastq files matching: ${params.fastq}" }
.map { it -> [(it.baseName =~ /([^\.]*)/)[0][1], it]}
.set { fastq_files }
/*Channel
.fromPath( params.index )
.ifEmpty { error "Cannot find any index files matching: ${params.index}" }
.set { index_files }*/
/* HISAIT */
process hisat2 {
tag "$file_id"
input:
set file_id, file(fastq_filtred) from fastq_files
file index from index_files_hisat.collect()
output:
set file_id, "*.bam" into bam_hisat
file "*.txt" into hisat_report
set file_id, "*.fastq.gz" into reads_non_aligned
script:
index_id = index[0]
for (index_file in index) {
if (index_file =~ /.*\.1\.ht2/ && !(index_file =~ /.*\.rev\.1\.ht2/)) {
index_id = ( index_file =~ /(.*)\.1\.ht2/)[0][1]
}
}
"""
hisat2 -x ${index_id} -p ${task.cpus} \
-U ${fastq_filtred} --un-gz ${file_id}_notaligned_hisat.fastq.gz \
--end-to-end --rna-strandness 'F' \
2> ${file_id}_hisat2_NY5.txt | samtools view -F 4 -F 16 -Sb - > ${file_id}.bam
"""
}
/* BOWTIE */
process bowtie {
tag "$file_id"
input:
set file_id, file(reads) from reads_non_aligned
file index from index_files_bowtie.collect()
output:
set file_id, index_id, "*.bam" into bam_bowtie
file "*_report.txt" into mapping_report
script:
index_id = index[0]
for (index_file in index) {
if (index_file =~ /.*\.1\.ebwt/ && !(index_file =~ /.*\.rev\.1\.ebwt/)) {
index_id = ( index_file =~ /(.*)\.1\.ebwt/)[0][1]
}
}
"""
bowtie --best --fr -v 3 -k 1 --sam -p ${task.cpus} ${index_id} \
-q ${reads} 2> \
${file_id}_bowtie_report_tmp.txt | \
samtools view -F 4 -F 16 -Sb - > ${file_id}_bowtie.bam
if grep -q "Error" ${file_id}_bowtie_report_tmp.txt; then
exit 1
fi
tail -n 19 ${file_id}_bowtie_report_tmp.txt > ${file_id}_bowtie_report.txt
"""
}
///////////////////////////////////////////////////////////////////////////////
////////////////////////////// MERGE BAM /////////////////////////////
///////////////////////////////////////////////////////////////////////////////
bam_bowtie.join(bam_hisat)
.set{merged_bam}
//merged_bam.println()
process merge_bam{
publishDir "results/05_${index_id}_mergedBAM/", mode: 'copy'
input:
set file_id, index_id, file(bam_bowtie), file(bam_hisat) from merged_bam
output:
file "*merged.{bam,bam.bai}" into final_flow
"""
samtools sort -@ ${task.cpus} -o ${file_id}_bowtie_sort.bam ${bam_bowtie} && \
samtools sort -@ ${task.cpus} -o ${file_id}_hisat_sort.bam ${bam_hisat} && \
samtools merge ${file_id}_merged.bam *sort.bam && \
samtools index ${file_id}_merged.bam
"""
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment