diff --git a/src/building_index_and_mapping_single_end.config b/src/building_index_and_mapping_single_end.config new file mode 100644 index 0000000000000000000000000000000000000000..b727b621742f8139303c89f68553fa92094d6d42 --- /dev/null +++ b/src/building_index_and_mapping_single_end.config @@ -0,0 +1,115 @@ +profiles { + docker { + docker.temp = 'auto' + docker.enabled = true + process { + withName: index_fasta { + container = "lbmc/hisat2:2.1.0" + cpus = 4 + } + withName: mapping_fastq { + cpus = 4 + container = "lbmc/hisat2:2.1.0" + } + withName: sort_bam { + container = "lbmc/samtools:1.7" + cpus = 4 + } + } + } + singularity { + singularity.enabled = true + singularity.cacheDir = "./bin/" + process { + withName: index_fasta { + cpus = 4 + container = "lbmc/hisat2:2.1.0" + } + withName: mapping_fastq { + cpus = 4 + container = "lbmc/hisat2:2.1.0" + } + withName: sort_bam { + container = "lbmc/samtools:1.7" + cpus = 4 + } + } + } + psmn{ + process{ + withName: index_fasta { + beforeScript = "source $baseDir/.conda_psmn.sh" + conda = "$baseDir/.conda_envs/hisat2_2.1.0" + executor = "sge" + clusterOptions = "-cwd -V" + memory = "20GB" + cpus = 16 + time = "12h" + queue = 'E5-2670deb128A,E5-2670deb128B,E5-2670deb128C,E5-2670deb128D,E5-2670deb128E,E5-2670deb128F' + penv = 'openmp16' + } + withName: mapping_fastq { + beforeScript = "source /usr/share/lmod/lmod/init/bash; module use ~/privatemodules" + module = "hisat2/2.1.0:samtools/1.7" + executor = "sge" + clusterOptions = "-cwd -V" + memory = "20GB" + cpus = 16 + time = "12h" + queue = 'E5-2670deb128A,E5-2670deb128B,E5-2670deb128C,E5-2670deb128D,E5-2670deb128E,E5-2670deb128F' + penv = 'openmp16' + } + withName: sort_bam { + beforeScript = "source $baseDir/.conda_psmn.sh" + conda = "$baseDir/.conda_envs/samtools_1.7" + executor = "sge" + clusterOptions = "-cwd -V" + cpus = 16 + memory = "30GB" + time = "24h" + queue = 'E5-2670deb128A,E5-2670deb128B,E5-2670deb128C,E5-2670deb128D,E5-2670deb128E,E5-2670deb128F' + penv = 'openmp16' + } + } + } + ccin2p3 { + singularity.enabled = true + singularity.cacheDir = "$baseDir/.singularity_in2p3/" + singularity.runOptions = "--bind /pbs,/sps,/scratch" + process{ + withName: index_fasta { + container = "lbmc/hisat2:2.1.0" + scratch = true + stageInMode = "copy" + stageOutMode = "rsync" + executor = "sge" + clusterOptions = "-P P_lbmc -l os=cl7 -l sps=1 -r n\ + " + cpus = 1 + queue = 'huge' + } + withName: mapping_fastq { + container = "lbmc/hisat2:2.1.0" + scratch = true + stageInMode = "copy" + stageOutMode = "rsync" + executor = "sge" + clusterOptions = "-P P_lbmc -l os=cl7 -l sps=1 -r n\ + " + cpus = 1 + queue = 'huge' + } + withName: sort_bam { + container = "lbmc/samtools:1.7" + scratch = true + stageInMode = "copy" + stageOutMode = "rsync" + executor = "sge" + clusterOptions = "-P P_lbmc -l os=cl7 -l sps=1 -r n\ + " + cpus = 1 + queue = 'huge' + } + } + } +} diff --git a/src/building_index_and_mapping_single_end.nf b/src/building_index_and_mapping_single_end.nf new file mode 100644 index 0000000000000000000000000000000000000000..919e06c0a62ed0b62c43bd2f02ae5314e293cbb7 --- /dev/null +++ b/src/building_index_and_mapping_single_end.nf @@ -0,0 +1,94 @@ +/* +* RibosomeProfiling mapping +*/ + +params.input = "" +params.genome = "" +params.output = "" + +log.info "input files (rRNA depleted fastq) : ${params.input}" +log.info "genome : ${params.genome}" +log.info "output folder : ${params.output}" + +Channel + .fromPath( params.genome ) + .ifEmpty { error "Cannot find any fasta files matching: ${params.genome}" } + .set { fasta_file } + +Channel + .fromPath( params.input ) + .ifEmpty { error "Cannot find any input files for mapping : ${params.input}" } + .map { it -> [(it.baseName =~ /([^\.]*)/)[0][1], it]} + .set { input_file } + +/* fasta indexing */ + +process index_fasta { + tag "$fasta.baseName" + publishDir "${params.output}/index/", mode: 'copy' + + input: + file fasta from fasta_file + + output: + file "*.ht2" into index_files + + script: +""" +hisat2-build -p ${task.cpus} ${fasta} ${fasta.baseName} +""" +} + +/* mapping for single-end data */ + +process mapping_fastq { + tag "$file_id" + + input: + set file_id, file(reads) from input_file + file index from index_files.collect() + + output: + set file_id, "*.bam" into bam_files + file "*_report.txt" into mapping_report + + script: + index_id = index[0] + for (index_file in index) { + if (index_file =~ /.*\.1\.ht2/ && !(index_file =~ /.*\.rev\.1\.ht2/)) { + index_id = ( index_file =~ /(.*)\.1\.ht2/)[0][1] + } + } +""" +hisat2 -p ${task.cpus} \ + -x ${index_id} \ + -U ${reads} 2> \ + --norc \ +${file_id}_hisat2_report.txt | \ +samtools view -Sb -F 4 - > ${file_id}.bam + +if grep -q "Error" ${file_id}_hisat2_report.txt; then + exit 1 +fi + +""" +} + +/* sorting and indexing bams */ + +process sort_bam { + tag "$file_id" + publishDir "${params.output}/mapping/", mode: 'copy' + + input: + set file_id, file(bam) from bam_files + + output: + set file_id, "*_sorted.{bam,bam.bai}" into sorted_bam_files + + script: +""" +samtools sort -@ ${task.cpus} -O BAM -o ${file_id}_sorted.bam ${bam} +samtools index ${file_id}_sorted.bam +""" +}