diff --git a/src/PASseq.config b/src/PASseq.config index 6837f9cc1217343f7067a3a7a0eff8019c7391c0..6ad8fbf7f8ff99c425f7d5fef17f80535c09c66f 100644 --- a/src/PASseq.config +++ b/src/PASseq.config @@ -33,45 +33,16 @@ profiles { queue = 'E5-2670deb128A,E5-2670deb128B,E5-2670deb128C,E5-2670deb128D,E5-2670deb128E,E5-2670deb128F' penv = 'openmp16' } - withName: sort_bam { - beforeScript = "source $baseDir/.conda_psmn.sh" - conda = "$baseDir/.conda_envs/samtools_1.7" - executor = "sge" - clusterOptions = "-cwd -V" - cpus = 1 - memory = "20GB" - time = "12h" - queue = 'monointeldeb128,monointeldeb48,h48-E5-2670deb128,h6-E5-2667v4deb128' - } - withName: index_bam { - beforeScript = "source $baseDir/.conda_psmn.sh" - conda = "$baseDir/.conda_envs/samtools_1.7" - executor = "sge" - clusterOptions = "-cwd -V" - cpus = 1 - memory = "20GB" - time = "12h" - queue = 'monointeldeb128,monointeldeb48,h48-E5-2670deb128,h6-E5-2667v4deb128' - } - withName: dedup { - beforeScript = "source /usr/share/lmod/lmod/init/bash; module use ~/privatemodules" - module = "umi_tools/1.0.0" - executor = "sge" - clusterOptions = "-cwd -V" - cpus = 1 - memory = "20GB" - time = "12h" - queue = 'monointeldeb128,monointeldeb48,h48-E5-2670deb128,h6-E5-2667v4deb128' - } - withName: counting { + withName: bam_to_bigwig { beforeScript = "source /usr/share/lmod/lmod/init/bash; module use ~/privatemodules" - module = "htseq/0.11.2" + module = "deeptools/3.0.2" executor = "sge" clusterOptions = "-cwd -V" - cpus = 1 - memory = "20GB" - time = "12h" - queue = 'monointeldeb128,monointeldeb48,h48-E5-2670deb128,h6-E5-2667v4deb128' + cpus = 16 + memory = "30GB" + time = "24h" + queue = 'E5-2670deb128A,E5-2670deb128B,E5-2670deb128C,E5-2670deb128D,E5-2670deb128E,E5-2670deb128F' + penv = 'openmp16' } } } @@ -91,17 +62,13 @@ profiles { cpus = 4 container = "lbmc/hisat2:2.1.0" } - withName: sort_bam { - container = "lbmc/samtools:1.7" - cpus = 1 - } withName: index_bam { container = "lbmc/samtools:1.7" - cpus = 1 + cpus = 4 } - withName: dedup { - container = "lbmc/umi_tools:1.0.0" - cpus = 1 + withName: bam_to_bigwig { + container = "lbmc/deeptools:3.0.2" + cpus = 4 } withName: counting { container = "lbmc/htseq:0.11.2" diff --git a/src/PASseq.nf b/src/PASseq.nf index 29f4e81d78a7100e8a01b625e160a6c9ed7a1ea4..bb0b7197ae49aaddb7935ad30fb53899fdb7c527 100644 --- a/src/PASseq.nf +++ b/src/PASseq.nf @@ -55,8 +55,14 @@ process rRNA_removal { file "*.txt" into bowtie_report script: + index_id = index[0] + for (index_file in index) { + if (index_file =~ /.*\.1\.bt2/ && !(index_file =~ /.*\.rev\.1\.bt2/)) { + index_id = ( index_file =~ /(.*)\.1\.bt2/)[0][1] + } + } """ -zcat ${reads} | bowtie2 --sensitive -p ${task.cpus} -x human_rRNA_tRNA \ +zcat ${reads} | bowtie2 --sensitive -p ${task.cpus} -x ${index_id} \ -U - --un-gz ${file_id}_mRNA.fastq.gz 2> \ ${file_id}_bowtie2_report.txt > /dev/null @@ -92,11 +98,17 @@ process hisat2_human { file "*.txt" into hisat_report script: + index_id = index[0] + for (index_file in index) { + if (index_file =~ /.*\.1\.ht2/ && !(index_file =~ /.*\.rev\.1\.ht2/)) { + index_id = ( index_file =~ /(.*)\.1\.ht2/)[0][1] + } + } """ -hisat2 -x genome_tran -p ${task.cpus} \ --U ${fastq_filtred} --un-gz ${file_id}_notaligned_hg38.fastq.gz \ +hisat2 -x ${index_id} -p ${task.cpus} \ +-U ${fastq_filtred} --un-gz ${file_id}_notaligned.fastq.gz \ --end-to-end --rna-strandness 'F' \ -2> ${file_id}_hisat2_hg38.txt | samtools view -bS -F 4 -o ${file_id}.bam +2> ${file_id}_hisat2.txt | samtools view -bS -F 4 -o ${file_id}.bam """ } @@ -105,64 +117,25 @@ hisat2 -x genome_tran -p ${task.cpus} \ process index_bam { tag "$file_id" - publishDir "${params.output}/03_hisat2_hg38/", mode: 'copy' + publishDir "${params.output}/03_mapping/", mode: 'copy' input: set file_id, file(bam) from reads_aligned_hg38 + file report from hisat_report output: set file_id, "*_sorted.{bam,bam.bai}" into sorted_bam_files + file "*.log" into hisat_report_bis script: """ samtools sort -@ ${task.cpus} -O BAM -o ${file_id}_sorted.bam ${bam} samtools index ${file_id}_sorted.bam +cat ${report} > ${file_id}.log """ } -sorted_bam_files.into{for_dedup;for_htseq} - -/* deduplicating reads */ - -params.dedup_options = "" - -process dedup { - tag "$file_id" - - input: - set file_id, file(bam) from for_dedup - - output: - set file_id, "*dedup.bam" into dedup_bam - file "*.txt" into dedup_report - - script: -""" -umi_tools dedup -I ${bam[0]} \ - ${params.dedup_options} \ - -S ${file_id}_dedup.bam > report.txt -""" -} - -process sort_bam { - tag "$file_id" - publishDir "${params.output}/03_hisat2_hg38_dedup/", mode: 'copy' - - input: - set file_id, file(bam) from dedup_bam - file dedup from dedup_report - - output: - set file_id, "*_sorted.{bam,bam.bai}" into sorted_bam_files_2 - file "*.txt" into report_dedup - - script: -""" -samtools sort -@ ${task.cpus} -O BAM -o ${file_id}_sorted.bam ${bam} -samtools index ${file_id}_sorted.bam -cat ${dedup} > ${file_id}_dedup_report.txt -""" -} +sorted_bam_files.into{for_htseq} /* HTseq */ @@ -188,22 +161,11 @@ process counting { script: """ htseq-count ${bam[0]} ${gtf} \ - --mode=intersection-nonempty \ - -a 10 \ - -s yes \ - -t CDS \ - -i gene_id \ - -r pos \ - -f bam \ -> ${file_id}_CDS.count - -htseq-count ${bam[0]} ${gtf} \ - --mode=intersection-nonempty \ + --mode=union \ -a 10 \ -s yes \ -t exon \ -i gene_id \ - -r pos \ -f bam \ > ${file_id}_exon.count