Skip to content
Snippets Groups Projects
Commit afb02683 authored by elabaron's avatar elabaron
Browse files

add src/build_index_mapping_single_end nf and config scripts

parent abe319d1
Branches
No related tags found
No related merge requests found
profiles {
docker {
docker.temp = 'auto'
docker.enabled = true
process {
withName: index_fasta {
container = "lbmc/hisat2:2.1.0"
cpus = 4
}
withName: mapping_fastq {
cpus = 4
container = "lbmc/hisat2:2.1.0"
}
withName: sort_bam {
container = "lbmc/samtools:1.7"
cpus = 4
}
}
}
singularity {
singularity.enabled = true
singularity.cacheDir = "./bin/"
process {
withName: index_fasta {
cpus = 4
container = "lbmc/hisat2:2.1.0"
}
withName: mapping_fastq {
cpus = 4
container = "lbmc/hisat2:2.1.0"
}
withName: sort_bam {
container = "lbmc/samtools:1.7"
cpus = 4
}
}
}
psmn{
process{
withName: index_fasta {
beforeScript = "source $baseDir/.conda_psmn.sh"
conda = "$baseDir/.conda_envs/hisat2_2.1.0"
executor = "sge"
clusterOptions = "-cwd -V"
memory = "20GB"
cpus = 16
time = "12h"
queue = 'E5-2670deb128A,E5-2670deb128B,E5-2670deb128C,E5-2670deb128D,E5-2670deb128E,E5-2670deb128F'
penv = 'openmp16'
}
withName: mapping_fastq {
beforeScript = "source /usr/share/lmod/lmod/init/bash; module use ~/privatemodules"
module = "hisat2/2.1.0:samtools/1.7"
executor = "sge"
clusterOptions = "-cwd -V"
memory = "20GB"
cpus = 16
time = "12h"
queue = 'E5-2670deb128A,E5-2670deb128B,E5-2670deb128C,E5-2670deb128D,E5-2670deb128E,E5-2670deb128F'
penv = 'openmp16'
}
withName: sort_bam {
beforeScript = "source $baseDir/.conda_psmn.sh"
conda = "$baseDir/.conda_envs/samtools_1.7"
executor = "sge"
clusterOptions = "-cwd -V"
cpus = 16
memory = "30GB"
time = "24h"
queue = 'E5-2670deb128A,E5-2670deb128B,E5-2670deb128C,E5-2670deb128D,E5-2670deb128E,E5-2670deb128F'
penv = 'openmp16'
}
}
}
ccin2p3 {
singularity.enabled = true
singularity.cacheDir = "$baseDir/.singularity_in2p3/"
singularity.runOptions = "--bind /pbs,/sps,/scratch"
process{
withName: index_fasta {
container = "lbmc/hisat2:2.1.0"
scratch = true
stageInMode = "copy"
stageOutMode = "rsync"
executor = "sge"
clusterOptions = "-P P_lbmc -l os=cl7 -l sps=1 -r n\
"
cpus = 1
queue = 'huge'
}
withName: mapping_fastq {
container = "lbmc/hisat2:2.1.0"
scratch = true
stageInMode = "copy"
stageOutMode = "rsync"
executor = "sge"
clusterOptions = "-P P_lbmc -l os=cl7 -l sps=1 -r n\
"
cpus = 1
queue = 'huge'
}
withName: sort_bam {
container = "lbmc/samtools:1.7"
scratch = true
stageInMode = "copy"
stageOutMode = "rsync"
executor = "sge"
clusterOptions = "-P P_lbmc -l os=cl7 -l sps=1 -r n\
"
cpus = 1
queue = 'huge'
}
}
}
}
/*
* RibosomeProfiling mapping
*/
params.input = ""
params.genome = ""
params.output = ""
log.info "input files (rRNA depleted fastq) : ${params.input}"
log.info "genome : ${params.genome}"
log.info "output folder : ${params.output}"
Channel
.fromPath( params.genome )
.ifEmpty { error "Cannot find any fasta files matching: ${params.genome}" }
.set { fasta_file }
Channel
.fromPath( params.input )
.ifEmpty { error "Cannot find any input files for mapping : ${params.input}" }
.map { it -> [(it.baseName =~ /([^\.]*)/)[0][1], it]}
.set { input_file }
/* fasta indexing */
process index_fasta {
tag "$fasta.baseName"
publishDir "${params.output}/index/", mode: 'copy'
input:
file fasta from fasta_file
output:
file "*.ht2" into index_files
script:
"""
hisat2-build -p ${task.cpus} ${fasta} ${fasta.baseName}
"""
}
/* mapping for single-end data */
process mapping_fastq {
tag "$file_id"
input:
set file_id, file(reads) from input_file
file index from index_files.collect()
output:
set file_id, "*.bam" into bam_files
file "*_report.txt" into mapping_report
script:
index_id = index[0]
for (index_file in index) {
if (index_file =~ /.*\.1\.ht2/ && !(index_file =~ /.*\.rev\.1\.ht2/)) {
index_id = ( index_file =~ /(.*)\.1\.ht2/)[0][1]
}
}
"""
hisat2 -p ${task.cpus} \
-x ${index_id} \
-U ${reads} 2> \
--norc \
${file_id}_hisat2_report.txt | \
samtools view -Sb -F 4 - > ${file_id}.bam
if grep -q "Error" ${file_id}_hisat2_report.txt; then
exit 1
fi
"""
}
/* sorting and indexing bams */
process sort_bam {
tag "$file_id"
publishDir "${params.output}/mapping/", mode: 'copy'
input:
set file_id, file(bam) from bam_files
output:
set file_id, "*_sorted.{bam,bam.bai}" into sorted_bam_files
script:
"""
samtools sort -@ ${task.cpus} -O BAM -o ${file_id}_sorted.bam ${bam}
samtools index ${file_id}_sorted.bam
"""
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment