diff --git a/src/RNAseq.config b/src/RNAseq.config index 33593dc0246d408ec17ebac93fbac84ca7414f5f..cc96d3ab627b41e6983a96e3a6765d754b9d9ae3 100644 --- a/src/RNAseq.config +++ b/src/RNAseq.config @@ -27,6 +27,7 @@ profiles { clusterOptions = "-cwd -V" memory = "20GB" cpus = 16 + penv = 'openmp16' time = "12h" queue = 'CLG6242deb384A,CLG6242deb384C,CLG5218deb192A,CLG5218deb192B,CLG5218deb192C,CLG5218deb192D,SLG5118deb96,SLG6142deb384A,SLG6142deb384B,SLG6142deb384C,SLG6142deb384D' penv = 'openmp16' @@ -42,8 +43,7 @@ profiles { queue = 'CLG6242deb384A,CLG6242deb384C,CLG5218deb192A,CLG5218deb192B,CLG5218deb192C,CLG5218deb192D,SLG5118deb96,SLG6142deb384A,SLG6142deb384B,SLG6142deb384C,SLG6142deb384D' } withName: counting { - beforeScript = "source /usr/share/lmod/lmod/init/bash; module use ~/privatemodules" - module = "htseq/0.11.2" + beforeScript = "source /usr/share/lmod/lmod/init/bash; module use ~/privatemodules; module purge; module load htseq/0.11.2" executor = "sge" clusterOptions = "-cwd -V" cpus = 1 @@ -51,6 +51,17 @@ profiles { time = "12h" queue = 'CLG6242deb384A,CLG6242deb384C,CLG5218deb192A,CLG5218deb192B,CLG5218deb192C,CLG5218deb192D,SLG5118deb96,SLG6142deb384A,SLG6142deb384B,SLG6142deb384C,SLG6142deb384D' } + withName: coverage{ + beforeScript = "source /usr/share/lmod/lmod/init/bash; module use ~/privatemodules" + module = "deeptools/3.0.2" + executor = "sge" + clusterOptions = "-cwd -V" + cpus = 16 + memory = "30GB" + time = "24h" + penv = 'openmp16' + queue = 'CLG6242deb384A,CLG6242deb384C,CLG5218deb192A,CLG5218deb192B,CLG5218deb192C,CLG5218deb192D,SLG5118deb96,SLG6142deb384A,SLG6142deb384B,SLG6142deb384C,SLG6142deb384D' + } } } docker { diff --git a/src/RNAseq.nf b/src/RNAseq.nf index 791d4b0a675c1576156a847349115a0e6301cf4b..bb673adc9c1551d8154586cd34e0e3594c7d04a7 100644 --- a/src/RNAseq.nf +++ b/src/RNAseq.nf @@ -4,6 +4,10 @@ params.fastq_raw = "data/demultiplexed/*{_R1,_R2}.fastq.gz" params.output = "results" +params.script_cov = "src/norm_coverage.sh" + +log.info "script for coverage : ${script_cov}" + Channel .fromFilePairs(params.fastq_raw) @@ -27,8 +31,7 @@ process trimming { script: """ - cutadapt -a AGATCGGAAGAGC -A AGATCGGAAGAGC \ - --minimum-length 50 \ + cutadapt -a AGATCGGAAGAGCACACGTCTGAACTCCAGTCA -A AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT \ -o ${file_id}_cut_R1.fastq.gz -p ${file_id}_tmp_R2.fastq.gz \ ${reads[0]} ${reads[1]} > ${file_id}_report.txt @@ -129,7 +132,7 @@ samtools index ${file_id}_sorted.bam """ } -sorted_bam_files.into{sorted_bam_htseq, sorted_bam_coverage} +sorted_bam_files.into{sorted_bam_htseq; sorted_bam_coverage} /* HTseq */ @@ -190,19 +193,25 @@ htseq-count ${bam[0]} ${gtf} \ """ } +Channel + .fromFilePairs(params.script_cov) + .ifEmpty { error "Cannot find any file matching: ${params.script_cov}" } + .set {script_channel} + process coverage { tag "$file_id" publishDir "${params.output}/05_coverage/", mode: 'copy' input: set file_id, file(bam) from sorted_bam_coverage + set script from script_channel.collect() output: file "*.bw" into coverage_files script: """ -bash src/norm_coverage.sh -b ${bam} \ +bash ${script} -b ${bam} \ -o {file_id}.bw \ --binSize 1 \ -p ${cpus} 8 diff --git a/src/RNAseq_illumina.nf b/src/RNAseq_illumina.nf index b1e9338131a42e14ff91c15ca55ca9c26300ec88..8c70bd8968aff39c48ecd9c3aa05a1bc481d2694 100644 --- a/src/RNAseq_illumina.nf +++ b/src/RNAseq_illumina.nf @@ -4,6 +4,10 @@ params.fastq_raw = "data/demultiplexed/*{_R1,_R2}.fastq.gz" params.output = "results" +params.script_cov = "src/norm_coverage.sh" + +log.info "script for coverage : ${params.script_cov}" + Channel .fromFilePairs(params.fastq_raw) @@ -127,6 +131,7 @@ samtools index ${file_id}_sorted.bam """ } +sorted_bam_files.into{sorted_bam_htseq; sorted_bam_coverage} /* HTseq */ @@ -134,7 +139,7 @@ process sort_bam { tag "$file_id" input: - set file_id, file(bam) from sorted_bam_files + set file_id, file(bam) from sorted_bam_htseq output: set file_id, "*_htseq.bam" into sorted_bam_files_2 @@ -186,3 +191,28 @@ htseq-count ${bam[0]} ${gtf} \ """ } + +Channel + .fromPath(params.script_cov) + .ifEmpty { error "Cannot find any file matching: ${params.script_cov}" } + .set {script_channel} + +process coverage { + tag "$file_id" + publishDir "${params.output}/05_coverage/", mode: 'copy' + + input: + set file_id, file(bam) from sorted_bam_coverage + set script from script_channel.collect() + + output: + file "*.bw" into coverage_files + + script: +""" +bash ${script} -b ${bam[0]} \ + -o ${file_id}.bw \ + --binSize 1 \ + -p ${task.cpus} +""" +} diff --git a/src/norm_coverage.sh b/src/norm_coverage.sh index 8dcf4c835e1639bbec5a84f10aadd1a2190f4e3c..b5485369626b715bfa02ba86e6cb36b1696af1b3 100644 --- a/src/norm_coverage.sh +++ b/src/norm_coverage.sh @@ -2,33 +2,42 @@ set -e -usage() { echo "Usage: $0 -b <bamfile.bam> -o <outputName> --binSize <int> -p <CPUs>" 1>&2; exit 1; } +usage() { echo "Usage: $0 -b <bamfile.bam> -o <outputName> -s <binSize> -p <CPUs>" 1>&2; exit 1; } cpus=4 binSize=1 -while getopts "b:o:binSize:p:" arg; do +while getopts "hb:o:s:p:" arg; do case $arg in - -h) - echo "usage" + h) + usage ;; - -b) + b) bam=$OPTARG ;; - -o) + o) output=$OPTARG ;; - --binSize) + s) binSize=$OPTARG ;; - -p) + p) cpus=$OPTARG ;; + \?) + echo "$OPTARG : invalid option" + usage + ;; + :) + echo "$OPTARG requiert an argument" + usage + ;; + esac done hg38=$(samtools view ${bam} | awk '{print $1}' | sort | uniq | wc -l) factor=$(echo "1000000/($hg38)" | bc -l) echo "hg38 counts : $hg38" -echo "scaling factor : $factor\n" -bamCoverage -p ${cpus} --scaleFactor ${factor} --binSize ${binSize} -b ${bam} -o ${output} +echo "scaling factor : $factor" +echo "bamCoverage -p ${cpus} --scaleFactor ${factor} --binSize ${binSize} -b ${bam} -o ${output}"