diff --git a/src/dual_mapping.config b/src/dual_mapping.config new file mode 100644 index 0000000000000000000000000000000000000000..fdf3c362c7b45fb23ffe5bf44ddcdf15bbb0a964 --- /dev/null +++ b/src/dual_mapping.config @@ -0,0 +1,86 @@ +profiles { + sge { + process{ + withName: index_fasta_hisat { + beforeScript = "source /usr/share/lmod/lmod/init/bash; module use ~/privatemodules" + module = "hisat2/2.1.0:samtools/1.7" + executor = "sge" + clusterOptions = "-cwd -V" + memory = "20GB" + cpus = 16 + time = "12h" + queue = 'E5-2670deb128A,E5-2670deb128B,E5-2670deb128C,E5-2670deb128D,E5-2670deb128E,E5-2670deb128F' + penv = 'openmp16' + } + withName: index_fasta_bowtie { + beforeScript = "source $baseDir/.conda_psmn.sh" + conda = "$baseDir/.conda_envs/bowtie_1.2.2" + executor = "sge" + clusterOptions = "-cwd -V" + memory = "20GB" + cpus = 16 + time = "12h" + queue = 'E5-2670deb128A,E5-2670deb128B,E5-2670deb128C,E5-2670deb128D,E5-2670deb128E,E5-2670deb128F' + penv = 'openmp16' + } + withName: hisat2 { + beforeScript = "source /usr/share/lmod/lmod/init/bash; module use ~/privatemodules" + module = "hisat2/2.1.0:samtools/1.7" + executor = "sge" + clusterOptions = "-cwd -V" + memory = "20GB" + cpus = 16 + time = "12h" + queue = 'E5-2670deb128A,E5-2670deb128B,E5-2670deb128C,E5-2670deb128D,E5-2670deb128E,E5-2670deb128F' + penv = 'openmp16' + } + withName: sort_bam { + beforeScript = "source $baseDir/.conda_psmn.sh" + conda = "$baseDir/.conda_envs/samtools_1.7" + executor = "sge" + clusterOptions = "-cwd -V" + cpus = 1 + memory = "20GB" + time = "12h" + queue = 'monointeldeb128,monointeldeb48,h48-E5-2670deb128,h6-E5-2667v4deb128' + } + withName: merge_bam { + beforeScript = "source $baseDir/.conda_psmn.sh" + conda = "$baseDir/.conda_envs/samtools_1.7" + executor = "sge" + clusterOptions = "-cwd -V" + cpus = 16 + memory = "30GB" + time = "24h" + queue = 'E5-2670deb128A,E5-2670deb128B,E5-2670deb128C,E5-2670deb128D,E5-2670deb128E,E5-2670deb128F' + penv = 'openmp16' + } + } + } + docker { + docker.temp = 'auto' + docker.enabled = true + process { + withName: index_fasta_hisat { + cpus = 4 + container = "lbmc/hisat2:2.1.0" + } + withName: index_fasta_bowtie { + cpus = 4 + container = "lbmc/bowtie:1.2.2" + } + withName: hisat2 { + cpus = 4 + container = "lbmc/hisat2:2.1.0" + } + withName: bowtie { + cpus = 4 + container = "lbmc/bowtie:1.2.2" + } + withName: merge_bam { + container = "lbmc/samtools:1.7" + cpus = 4 + } + } + } +} diff --git a/src/dual_mapping.nf b/src/dual_mapping.nf new file mode 100644 index 0000000000000000000000000000000000000000..c8509b9a7b698ebd6a35c0507cf042a887ab4ccc --- /dev/null +++ b/src/dual_mapping.nf @@ -0,0 +1,163 @@ +/////////////////////////////////////////////////////////////////////////////// +////////////////////////////// INDEXING GENOMES ///////////////////////////// +/////////////////////////////////////////////////////////////////////////////// + +params.fasta = "data/genome/NC001802.1.fa" +log.info "fasta files : ${params.fasta}" + +Channel + .fromPath( params.fasta ) + .ifEmpty { error "Cannot find any fasta files matching: ${params.fasta}" } + .set { fasta_file } + +fasta_file.into{fasta_hisat ; fasta_bowtie} + +/* HISAT */ +process index_fasta_hisat { + tag "$fasta.baseName" + publishDir "data/indexes/${fasta.baseName}_hisat/", mode: 'copy' + + input: + file fasta from fasta_hisat + + output: + file "*ht2" into index_files_hisat + + script: +""" +hisat2-build -p ${task.cpus} ${fasta} ${fasta.baseName} +""" +} + +/* BOWTIE 1 */ + +process index_fasta_bowtie { + tag "$fasta.baseName" + publishDir "results/${fasta.baseName}_bowtie/", mode: 'copy' + + input: + file fasta from fasta_bowtie + + output: + file "*ebwt" into index_files_bowtie + file "*_report.txt" into indexing_report + + script: +""" +bowtie-build --threads ${task.cpus} -f ${fasta} ${fasta.baseName} \ +&> ${fasta.baseName}_bowtie_report.txt + +if grep -q "Error" ${fasta.baseName}_bowtie_report.txt; then + exit 1 +fi +""" +} + +/////////////////////////////////////////////////////////////////////////////// +////////////////////////////// MAPPING GENOMES ///////////////////////////// +/////////////////////////////////////////////////////////////////////////////// + +params.fastq = "$baseDir/data/fastq/*.fastq" +// params.index = "$baseDir/data/index/*.index*" + +log.info "fastq files : ${params.fastq}" +// log.info "index files : ${params.index}" + +Channel + .fromPath( params.fastq ) + .ifEmpty { error "Cannot find any fastq files matching: ${params.fastq}" } + .map { it -> [(it.baseName =~ /([^\.]*)/)[0][1], it]} + .set { fastq_files } +/*Channel + .fromPath( params.index ) + .ifEmpty { error "Cannot find any index files matching: ${params.index}" } + .set { index_files }*/ + +/* HISAIT */ + +process hisat2 { + tag "$file_id" + + input: + set file_id, file(fastq_filtred) from fastq_files + file index from index_files_hisat.collect() + + output: + set file_id, "*.bam" into bam_hisat + file "*.txt" into hisat_report + set file_id, "*.fastq.gz" into reads_non_aligned + + script: + index_id = index[0] + for (index_file in index) { + if (index_file =~ /.*\.1\.ht2/ && !(index_file =~ /.*\.rev\.1\.ht2/)) { + index_id = ( index_file =~ /(.*)\.1\.ht2/)[0][1] + } + } +""" +hisat2 -x ${index_id} -p ${task.cpus} \ +-U ${fastq_filtred} --un-gz ${file_id}_notaligned_hisat.fastq.gz \ +--end-to-end --rna-strandness 'F' \ +2> ${file_id}_hisat2_NY5.txt | samtools view -F 4 -F 16 -Sb - > ${file_id}.bam +""" +} + +/* BOWTIE */ + +process bowtie { + tag "$file_id" + + input: + set file_id, file(reads) from reads_non_aligned + file index from index_files_bowtie.collect() + + output: + set file_id, index_id, "*.bam" into bam_bowtie + file "*_report.txt" into mapping_report + + script: +index_id = index[0] +for (index_file in index) { + if (index_file =~ /.*\.1\.ebwt/ && !(index_file =~ /.*\.rev\.1\.ebwt/)) { + index_id = ( index_file =~ /(.*)\.1\.ebwt/)[0][1] + } +} +""" +bowtie --best --fr -v 3 -k 1 --sam -p ${task.cpus} ${index_id} \ +-q ${reads} 2> \ +${file_id}_bowtie_report_tmp.txt | \ +samtools view -F 4 -F 16 -Sb - > ${file_id}_bowtie.bam + +if grep -q "Error" ${file_id}_bowtie_report_tmp.txt; then + exit 1 +fi +tail -n 19 ${file_id}_bowtie_report_tmp.txt > ${file_id}_bowtie_report.txt +""" +} + +/////////////////////////////////////////////////////////////////////////////// +////////////////////////////// MERGE BAM ///////////////////////////// +/////////////////////////////////////////////////////////////////////////////// + +bam_bowtie.join(bam_hisat) + .set{merged_bam} + +//merged_bam.println() + +process merge_bam{ + publishDir "results/05_${index_id}_mergedBAM/", mode: 'copy' + + input: + set file_id, index_id, file(bam_bowtie), file(bam_hisat) from merged_bam + + output: + file "*merged.{bam,bam.bai}" into final_flow + + """ + samtools sort -@ ${task.cpus} -o ${file_id}_bowtie_sort.bam ${bam_bowtie} && \ + samtools sort -@ ${task.cpus} -o ${file_id}_hisat_sort.bam ${bam_hisat} && \ + samtools merge ${file_id}_merged.bam *sort.bam && \ + samtools index ${file_id}_merged.bam + """ + +}