From 36bb93c1215928479a8edef0bd3ca93b8e12f417 Mon Sep 17 00:00:00 2001 From: elabaron <emmanuel.labaronne@ens-lyon.fr> Date: Thu, 16 Jul 2020 17:19:54 +0200 Subject: [PATCH] fix beug for psmn --- src/RNAseq.config | 15 +++++++++++++-- src/RNAseq.nf | 17 +++++++++++++---- src/RNAseq_illumina.nf | 32 +++++++++++++++++++++++++++++++- src/norm_coverage.sh | 29 +++++++++++++++++++---------- 4 files changed, 76 insertions(+), 17 deletions(-) diff --git a/src/RNAseq.config b/src/RNAseq.config index 33593dc0..cc96d3ab 100644 --- a/src/RNAseq.config +++ b/src/RNAseq.config @@ -27,6 +27,7 @@ profiles { clusterOptions = "-cwd -V" memory = "20GB" cpus = 16 + penv = 'openmp16' time = "12h" queue = 'CLG6242deb384A,CLG6242deb384C,CLG5218deb192A,CLG5218deb192B,CLG5218deb192C,CLG5218deb192D,SLG5118deb96,SLG6142deb384A,SLG6142deb384B,SLG6142deb384C,SLG6142deb384D' penv = 'openmp16' @@ -42,8 +43,7 @@ profiles { queue = 'CLG6242deb384A,CLG6242deb384C,CLG5218deb192A,CLG5218deb192B,CLG5218deb192C,CLG5218deb192D,SLG5118deb96,SLG6142deb384A,SLG6142deb384B,SLG6142deb384C,SLG6142deb384D' } withName: counting { - beforeScript = "source /usr/share/lmod/lmod/init/bash; module use ~/privatemodules" - module = "htseq/0.11.2" + beforeScript = "source /usr/share/lmod/lmod/init/bash; module use ~/privatemodules; module purge; module load htseq/0.11.2" executor = "sge" clusterOptions = "-cwd -V" cpus = 1 @@ -51,6 +51,17 @@ profiles { time = "12h" queue = 'CLG6242deb384A,CLG6242deb384C,CLG5218deb192A,CLG5218deb192B,CLG5218deb192C,CLG5218deb192D,SLG5118deb96,SLG6142deb384A,SLG6142deb384B,SLG6142deb384C,SLG6142deb384D' } + withName: coverage{ + beforeScript = "source /usr/share/lmod/lmod/init/bash; module use ~/privatemodules" + module = "deeptools/3.0.2" + executor = "sge" + clusterOptions = "-cwd -V" + cpus = 16 + memory = "30GB" + time = "24h" + penv = 'openmp16' + queue = 'CLG6242deb384A,CLG6242deb384C,CLG5218deb192A,CLG5218deb192B,CLG5218deb192C,CLG5218deb192D,SLG5118deb96,SLG6142deb384A,SLG6142deb384B,SLG6142deb384C,SLG6142deb384D' + } } } docker { diff --git a/src/RNAseq.nf b/src/RNAseq.nf index 791d4b0a..bb673adc 100644 --- a/src/RNAseq.nf +++ b/src/RNAseq.nf @@ -4,6 +4,10 @@ params.fastq_raw = "data/demultiplexed/*{_R1,_R2}.fastq.gz" params.output = "results" +params.script_cov = "src/norm_coverage.sh" + +log.info "script for coverage : ${script_cov}" + Channel .fromFilePairs(params.fastq_raw) @@ -27,8 +31,7 @@ process trimming { script: """ - cutadapt -a AGATCGGAAGAGC -A AGATCGGAAGAGC \ - --minimum-length 50 \ + cutadapt -a AGATCGGAAGAGCACACGTCTGAACTCCAGTCA -A AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT \ -o ${file_id}_cut_R1.fastq.gz -p ${file_id}_tmp_R2.fastq.gz \ ${reads[0]} ${reads[1]} > ${file_id}_report.txt @@ -129,7 +132,7 @@ samtools index ${file_id}_sorted.bam """ } -sorted_bam_files.into{sorted_bam_htseq, sorted_bam_coverage} +sorted_bam_files.into{sorted_bam_htseq; sorted_bam_coverage} /* HTseq */ @@ -190,19 +193,25 @@ htseq-count ${bam[0]} ${gtf} \ """ } +Channel + .fromFilePairs(params.script_cov) + .ifEmpty { error "Cannot find any file matching: ${params.script_cov}" } + .set {script_channel} + process coverage { tag "$file_id" publishDir "${params.output}/05_coverage/", mode: 'copy' input: set file_id, file(bam) from sorted_bam_coverage + set script from script_channel.collect() output: file "*.bw" into coverage_files script: """ -bash src/norm_coverage.sh -b ${bam} \ +bash ${script} -b ${bam} \ -o {file_id}.bw \ --binSize 1 \ -p ${cpus} 8 diff --git a/src/RNAseq_illumina.nf b/src/RNAseq_illumina.nf index b1e93381..8c70bd89 100644 --- a/src/RNAseq_illumina.nf +++ b/src/RNAseq_illumina.nf @@ -4,6 +4,10 @@ params.fastq_raw = "data/demultiplexed/*{_R1,_R2}.fastq.gz" params.output = "results" +params.script_cov = "src/norm_coverage.sh" + +log.info "script for coverage : ${params.script_cov}" + Channel .fromFilePairs(params.fastq_raw) @@ -127,6 +131,7 @@ samtools index ${file_id}_sorted.bam """ } +sorted_bam_files.into{sorted_bam_htseq; sorted_bam_coverage} /* HTseq */ @@ -134,7 +139,7 @@ process sort_bam { tag "$file_id" input: - set file_id, file(bam) from sorted_bam_files + set file_id, file(bam) from sorted_bam_htseq output: set file_id, "*_htseq.bam" into sorted_bam_files_2 @@ -186,3 +191,28 @@ htseq-count ${bam[0]} ${gtf} \ """ } + +Channel + .fromPath(params.script_cov) + .ifEmpty { error "Cannot find any file matching: ${params.script_cov}" } + .set {script_channel} + +process coverage { + tag "$file_id" + publishDir "${params.output}/05_coverage/", mode: 'copy' + + input: + set file_id, file(bam) from sorted_bam_coverage + set script from script_channel.collect() + + output: + file "*.bw" into coverage_files + + script: +""" +bash ${script} -b ${bam[0]} \ + -o ${file_id}.bw \ + --binSize 1 \ + -p ${task.cpus} +""" +} diff --git a/src/norm_coverage.sh b/src/norm_coverage.sh index 8dcf4c83..b5485369 100644 --- a/src/norm_coverage.sh +++ b/src/norm_coverage.sh @@ -2,33 +2,42 @@ set -e -usage() { echo "Usage: $0 -b <bamfile.bam> -o <outputName> --binSize <int> -p <CPUs>" 1>&2; exit 1; } +usage() { echo "Usage: $0 -b <bamfile.bam> -o <outputName> -s <binSize> -p <CPUs>" 1>&2; exit 1; } cpus=4 binSize=1 -while getopts "b:o:binSize:p:" arg; do +while getopts "hb:o:s:p:" arg; do case $arg in - -h) - echo "usage" + h) + usage ;; - -b) + b) bam=$OPTARG ;; - -o) + o) output=$OPTARG ;; - --binSize) + s) binSize=$OPTARG ;; - -p) + p) cpus=$OPTARG ;; + \?) + echo "$OPTARG : invalid option" + usage + ;; + :) + echo "$OPTARG requiert an argument" + usage + ;; + esac done hg38=$(samtools view ${bam} | awk '{print $1}' | sort | uniq | wc -l) factor=$(echo "1000000/($hg38)" | bc -l) echo "hg38 counts : $hg38" -echo "scaling factor : $factor\n" -bamCoverage -p ${cpus} --scaleFactor ${factor} --binSize ${binSize} -b ${bam} -o ${output} +echo "scaling factor : $factor" +echo "bamCoverage -p ${cpus} --scaleFactor ${factor} --binSize ${binSize} -b ${bam} -o ${output}" -- GitLab