diff --git a/src/RNAseq.config b/src/RNAseq.config index db5a6c95a86453dd128de5a39816e4bb38a4a550..b51fac49c67c1087e9b055384ac3b4eba894abd0 100644 --- a/src/RNAseq.config +++ b/src/RNAseq.config @@ -9,49 +9,46 @@ profiles { cpus = 1 memory = "20GB" time = "12h" - queue = 'monointeldeb128,monointeldeb48,h48-E5-2670deb128,h6-E5-2667v4deb128' + queue = 'CLG6242deb384A,CLG6242deb384C,CLG5218deb192A,CLG5218deb192B,CLG5218deb192C,CLG5218deb192D,SLG5118deb96,SLG6142deb384A,SLG6142deb384B,SLG6142deb384C,SLG6142deb384D' } withName: rRNA_removal { beforeScript = "source $baseDir/.conda_psmn.sh" conda = "$baseDir/.conda_envs/bowtie2_2.3.4.1" executor = "sge" clusterOptions = "-cwd -V" - cpus = 16 - memory = "30GB" - time = "24h" - queue = 'E5-2670deb128A,E5-2670deb128B,E5-2670deb128C,E5-2670deb128D,E5-2670deb128E,E5-2670deb128F' - penv = 'openmp16' + queue = 'CLG6242deb384A,CLG6242deb384C,CLG5218deb192A,CLG5218deb192B,CLG5218deb192C,CLG5218deb192D,SLG5118deb96,SLG6142deb384A,SLG6142deb384B,SLG6142deb384C,SLG6142deb384D' + penv = 'openmp32' } withName: hisat2_human { beforeScript = "source $baseDir/.conda_psmn.sh" - module = "$baseDir/.conda_envs/hisat2_2.1.0" + conda = "$baseDir/.conda_envs/hisat2_2.1.0" executor = "sge" clusterOptions = "-cwd -V" memory = "20GB" cpus = 16 time = "12h" - queue = 'E5-2670deb128A,E5-2670deb128B,E5-2670deb128C,E5-2670deb128D,E5-2670deb128E,E5-2670deb128F' - penv = 'openmp16' + queue = 'CLG6242deb384A,CLG6242deb384C,CLG5218deb192A,CLG5218deb192B,CLG5218deb192C,CLG5218deb192D,SLG5118deb96,SLG6142deb384A,SLG6142deb384B,SLG6142deb384C,SLG6142deb384D' + penv = 'openmp32' } withName: sort_bam { beforeScript = "source $baseDir/.conda_psmn.sh" - conda = "$baseDir/.conda_envs/samtools_1.7" + conda = "$baseDir/.conda_envs/hisat2_2.1.0" executor = "sge" clusterOptions = "-cwd -V" cpus = 1 memory = "20GB" time = "12h" - queue = 'monointeldeb128,monointeldeb48,h48-E5-2670deb128,h6-E5-2667v4deb128' + queue = 'CLG6242deb384A,CLG6242deb384C,CLG5218deb192A,CLG5218deb192B,CLG5218deb192C,CLG5218deb192D,SLG5118deb96,SLG6142deb384A,SLG6142deb384B,SLG6142deb384C,SLG6142deb384D' } withName: index_bam { beforeScript = "source $baseDir/.conda_psmn.sh" - conda = "$baseDir/.conda_envs/samtools_1.7" + conda = "$baseDir/.conda_envs/hisat2_2.1.0" executor = "sge" clusterOptions = "-cwd -V" cpus = 1 memory = "20GB" time = "12h" - queue = 'monointeldeb128,monointeldeb48,h48-E5-2670deb128,h6-E5-2667v4deb128' + queue = 'CLG6242deb384A,CLG6242deb384C,CLG5218deb192A,CLG5218deb192B,CLG5218deb192C,CLG5218deb192D,SLG5118deb96,SLG6142deb384A,SLG6142deb384B,SLG6142deb384C,SLG6142deb384D' } withName: dedup { beforeScript = "source /usr/share/lmod/lmod/init/bash; module use ~/privatemodules" @@ -61,7 +58,7 @@ profiles { cpus = 1 memory = "20GB" time = "12h" - queue = 'monointeldeb128,monointeldeb48,h48-E5-2670deb128,h6-E5-2667v4deb128' + queue = 'CLG6242deb384A,CLG6242deb384C,CLG5218deb192A,CLG5218deb192B,CLG5218deb192C,CLG5218deb192D,SLG5118deb96,SLG6142deb384A,SLG6142deb384B,SLG6142deb384C,SLG6142deb384D' } withName: counting { beforeScript = "source /usr/share/lmod/lmod/init/bash; module use ~/privatemodules" @@ -71,7 +68,7 @@ profiles { cpus = 1 memory = "20GB" time = "12h" - queue = 'monointeldeb128,monointeldeb48,h48-E5-2670deb128,h6-E5-2667v4deb128' + queue = 'CLG6242deb384A,CLG6242deb384C,CLG5218deb192A,CLG5218deb192B,CLG5218deb192C,CLG5218deb192D,SLG5118deb96,SLG6142deb384A,SLG6142deb384B,SLG6142deb384C,SLG6142deb384D' } } } diff --git a/src/RNAseq.nf b/src/RNAseq.nf index 911314d5c9a98b94e0e12aafa558186e2dcd4ece..73aaf7b2482d05ee04b9428606378b2575e7c46a 100644 --- a/src/RNAseq.nf +++ b/src/RNAseq.nf @@ -27,6 +27,7 @@ process trimming { script: """ cutadapt -a AGATCGGAAGAGC -A AGATCGGAAGAGC \ + --minimum-length 50 \ -o ${file_id}_cut_R1.fastq.gz -p ${file_id}_tmp_R2.fastq.gz \ ${reads[0]} ${reads[1]} > ${file_id}_report.txt @@ -50,7 +51,7 @@ Channel process rRNA_removal { tag "$file_id" cpus 8 - publishDir "results/RNAseq/U937/02_rRNA_depletion/", mode: 'copy' + publishDir "results/RNAseq/02_rRNA_depletion/", mode: 'copy' input: set file_id, file(reads) from fastq_files_cut @@ -107,26 +108,31 @@ hisat2 -x genome_tran -p ${task.cpus} \ """ } +reads_aligned_hg38.into{for_mapping;for_htseq} + + /* sorting */ process index_bam { tag "$file_id" - publishDir "${params.output}/03_hisat2_hg38/", mode: 'copy' + publishDir "results/RNAseq/03_hisat2_hg38/", mode: 'copy' input: - set file_id, file(bam) from reads_aligned_hg38 + set file_id, file(bam) from for_mapping + file report from hisat_report output: set file_id, "*_sorted.{bam,bam.bai}" into sorted_bam_files + file "*.txt" into report_hisat script: """ samtools sort -@ ${task.cpus} -O BAM -o ${file_id}_sorted.bam ${bam} samtools index ${file_id}_sorted.bam +cat ${report} > ${file_id}_hisat_hg38.txt """ } -sorted_bam_files.into{for_dedup;for_htseq} /* deduplicating reads @@ -172,6 +178,21 @@ cat ${dedup} > ${file_id}_dedup_report.txt /* HTseq */ +process sort_bam { + tag "$file_id" + + input: + set file_id, file(bam) from for_htseq + + output: + set file_id, "*_sorted.bam" into sorted_bam_files_2 + + script: +""" +samtools sort -@ ${task.cpus} -n -O BAM -o ${file_id}_sorted.bam ${bam} +""" +} + params.gtf = "$baseDir/data/annotation/*.gtf" log.info "gtf files : ${params.gtf}" @@ -182,10 +203,10 @@ Channel process counting { tag "$file_id" - publishDir "${params.output}/04_HTseq/", mode: 'copy' + publishDir "results/RNAseq/04_HTseq/", mode: 'copy' input: - set file_id, file(bam) from for_htseq + set file_id, file(bam) from sorted_bam_files_2 file gtf from gtf_file.toList() output: @@ -199,7 +220,6 @@ htseq-count ${bam[0]} ${gtf} \ -s yes \ -t CDS \ -i gene_id \ - -r pos \ -f bam \ > ${file_id}_CDS.count @@ -209,7 +229,6 @@ htseq-count ${bam[0]} ${gtf} \ -s yes \ -t exon \ -i gene_id \ - -r pos \ -f bam \ > ${file_id}_exon.count