Skip to content
Snippets Groups Projects
Commit 36bb93c1 authored by elabaron's avatar elabaron
Browse files

fix beug for psmn

parent ca139d0e
No related branches found
No related tags found
No related merge requests found
...@@ -27,6 +27,7 @@ profiles { ...@@ -27,6 +27,7 @@ profiles {
clusterOptions = "-cwd -V" clusterOptions = "-cwd -V"
memory = "20GB" memory = "20GB"
cpus = 16 cpus = 16
penv = 'openmp16'
time = "12h" time = "12h"
queue = 'CLG6242deb384A,CLG6242deb384C,CLG5218deb192A,CLG5218deb192B,CLG5218deb192C,CLG5218deb192D,SLG5118deb96,SLG6142deb384A,SLG6142deb384B,SLG6142deb384C,SLG6142deb384D' queue = 'CLG6242deb384A,CLG6242deb384C,CLG5218deb192A,CLG5218deb192B,CLG5218deb192C,CLG5218deb192D,SLG5118deb96,SLG6142deb384A,SLG6142deb384B,SLG6142deb384C,SLG6142deb384D'
penv = 'openmp16' penv = 'openmp16'
...@@ -42,8 +43,7 @@ profiles { ...@@ -42,8 +43,7 @@ profiles {
queue = 'CLG6242deb384A,CLG6242deb384C,CLG5218deb192A,CLG5218deb192B,CLG5218deb192C,CLG5218deb192D,SLG5118deb96,SLG6142deb384A,SLG6142deb384B,SLG6142deb384C,SLG6142deb384D' queue = 'CLG6242deb384A,CLG6242deb384C,CLG5218deb192A,CLG5218deb192B,CLG5218deb192C,CLG5218deb192D,SLG5118deb96,SLG6142deb384A,SLG6142deb384B,SLG6142deb384C,SLG6142deb384D'
} }
withName: counting { withName: counting {
beforeScript = "source /usr/share/lmod/lmod/init/bash; module use ~/privatemodules" beforeScript = "source /usr/share/lmod/lmod/init/bash; module use ~/privatemodules; module purge; module load htseq/0.11.2"
module = "htseq/0.11.2"
executor = "sge" executor = "sge"
clusterOptions = "-cwd -V" clusterOptions = "-cwd -V"
cpus = 1 cpus = 1
...@@ -51,6 +51,17 @@ profiles { ...@@ -51,6 +51,17 @@ profiles {
time = "12h" time = "12h"
queue = 'CLG6242deb384A,CLG6242deb384C,CLG5218deb192A,CLG5218deb192B,CLG5218deb192C,CLG5218deb192D,SLG5118deb96,SLG6142deb384A,SLG6142deb384B,SLG6142deb384C,SLG6142deb384D' queue = 'CLG6242deb384A,CLG6242deb384C,CLG5218deb192A,CLG5218deb192B,CLG5218deb192C,CLG5218deb192D,SLG5118deb96,SLG6142deb384A,SLG6142deb384B,SLG6142deb384C,SLG6142deb384D'
} }
withName: coverage{
beforeScript = "source /usr/share/lmod/lmod/init/bash; module use ~/privatemodules"
module = "deeptools/3.0.2"
executor = "sge"
clusterOptions = "-cwd -V"
cpus = 16
memory = "30GB"
time = "24h"
penv = 'openmp16'
queue = 'CLG6242deb384A,CLG6242deb384C,CLG5218deb192A,CLG5218deb192B,CLG5218deb192C,CLG5218deb192D,SLG5118deb96,SLG6142deb384A,SLG6142deb384B,SLG6142deb384C,SLG6142deb384D'
}
} }
} }
docker { docker {
......
...@@ -4,6 +4,10 @@ ...@@ -4,6 +4,10 @@
params.fastq_raw = "data/demultiplexed/*{_R1,_R2}.fastq.gz" params.fastq_raw = "data/demultiplexed/*{_R1,_R2}.fastq.gz"
params.output = "results" params.output = "results"
params.script_cov = "src/norm_coverage.sh"
log.info "script for coverage : ${script_cov}"
Channel Channel
.fromFilePairs(params.fastq_raw) .fromFilePairs(params.fastq_raw)
...@@ -27,8 +31,7 @@ process trimming { ...@@ -27,8 +31,7 @@ process trimming {
script: script:
""" """
cutadapt -a AGATCGGAAGAGC -A AGATCGGAAGAGC \ cutadapt -a AGATCGGAAGAGCACACGTCTGAACTCCAGTCA -A AGATCGGAAGAGCGTCGTGTAGGGAAAGAGTGT \
--minimum-length 50 \
-o ${file_id}_cut_R1.fastq.gz -p ${file_id}_tmp_R2.fastq.gz \ -o ${file_id}_cut_R1.fastq.gz -p ${file_id}_tmp_R2.fastq.gz \
${reads[0]} ${reads[1]} > ${file_id}_report.txt ${reads[0]} ${reads[1]} > ${file_id}_report.txt
...@@ -129,7 +132,7 @@ samtools index ${file_id}_sorted.bam ...@@ -129,7 +132,7 @@ samtools index ${file_id}_sorted.bam
""" """
} }
sorted_bam_files.into{sorted_bam_htseq, sorted_bam_coverage} sorted_bam_files.into{sorted_bam_htseq; sorted_bam_coverage}
/* HTseq */ /* HTseq */
...@@ -190,19 +193,25 @@ htseq-count ${bam[0]} ${gtf} \ ...@@ -190,19 +193,25 @@ htseq-count ${bam[0]} ${gtf} \
""" """
} }
Channel
.fromFilePairs(params.script_cov)
.ifEmpty { error "Cannot find any file matching: ${params.script_cov}" }
.set {script_channel}
process coverage { process coverage {
tag "$file_id" tag "$file_id"
publishDir "${params.output}/05_coverage/", mode: 'copy' publishDir "${params.output}/05_coverage/", mode: 'copy'
input: input:
set file_id, file(bam) from sorted_bam_coverage set file_id, file(bam) from sorted_bam_coverage
set script from script_channel.collect()
output: output:
file "*.bw" into coverage_files file "*.bw" into coverage_files
script: script:
""" """
bash src/norm_coverage.sh -b ${bam} \ bash ${script} -b ${bam} \
-o {file_id}.bw \ -o {file_id}.bw \
--binSize 1 \ --binSize 1 \
-p ${cpus} 8 -p ${cpus} 8
......
...@@ -4,6 +4,10 @@ ...@@ -4,6 +4,10 @@
params.fastq_raw = "data/demultiplexed/*{_R1,_R2}.fastq.gz" params.fastq_raw = "data/demultiplexed/*{_R1,_R2}.fastq.gz"
params.output = "results" params.output = "results"
params.script_cov = "src/norm_coverage.sh"
log.info "script for coverage : ${params.script_cov}"
Channel Channel
.fromFilePairs(params.fastq_raw) .fromFilePairs(params.fastq_raw)
...@@ -127,6 +131,7 @@ samtools index ${file_id}_sorted.bam ...@@ -127,6 +131,7 @@ samtools index ${file_id}_sorted.bam
""" """
} }
sorted_bam_files.into{sorted_bam_htseq; sorted_bam_coverage}
/* HTseq */ /* HTseq */
...@@ -134,7 +139,7 @@ process sort_bam { ...@@ -134,7 +139,7 @@ process sort_bam {
tag "$file_id" tag "$file_id"
input: input:
set file_id, file(bam) from sorted_bam_files set file_id, file(bam) from sorted_bam_htseq
output: output:
set file_id, "*_htseq.bam" into sorted_bam_files_2 set file_id, "*_htseq.bam" into sorted_bam_files_2
...@@ -186,3 +191,28 @@ htseq-count ${bam[0]} ${gtf} \ ...@@ -186,3 +191,28 @@ htseq-count ${bam[0]} ${gtf} \
""" """
} }
Channel
.fromPath(params.script_cov)
.ifEmpty { error "Cannot find any file matching: ${params.script_cov}" }
.set {script_channel}
process coverage {
tag "$file_id"
publishDir "${params.output}/05_coverage/", mode: 'copy'
input:
set file_id, file(bam) from sorted_bam_coverage
set script from script_channel.collect()
output:
file "*.bw" into coverage_files
script:
"""
bash ${script} -b ${bam[0]} \
-o ${file_id}.bw \
--binSize 1 \
-p ${task.cpus}
"""
}
...@@ -2,33 +2,42 @@ ...@@ -2,33 +2,42 @@
set -e set -e
usage() { echo "Usage: $0 -b <bamfile.bam> -o <outputName> --binSize <int> -p <CPUs>" 1>&2; exit 1; } usage() { echo "Usage: $0 -b <bamfile.bam> -o <outputName> -s <binSize> -p <CPUs>" 1>&2; exit 1; }
cpus=4 cpus=4
binSize=1 binSize=1
while getopts "b:o:binSize:p:" arg; do while getopts "hb:o:s:p:" arg; do
case $arg in case $arg in
-h) h)
echo "usage" usage
;; ;;
-b) b)
bam=$OPTARG bam=$OPTARG
;; ;;
-o) o)
output=$OPTARG output=$OPTARG
;; ;;
--binSize) s)
binSize=$OPTARG binSize=$OPTARG
;; ;;
-p) p)
cpus=$OPTARG cpus=$OPTARG
;; ;;
\?)
echo "$OPTARG : invalid option"
usage
;;
:)
echo "$OPTARG requiert an argument"
usage
;;
esac esac
done done
hg38=$(samtools view ${bam} | awk '{print $1}' | sort | uniq | wc -l) hg38=$(samtools view ${bam} | awk '{print $1}' | sort | uniq | wc -l)
factor=$(echo "1000000/($hg38)" | bc -l) factor=$(echo "1000000/($hg38)" | bc -l)
echo "hg38 counts : $hg38" echo "hg38 counts : $hg38"
echo "scaling factor : $factor\n" echo "scaling factor : $factor"
bamCoverage -p ${cpus} --scaleFactor ${factor} --binSize ${binSize} -b ${bam} -o ${output} echo "bamCoverage -p ${cpus} --scaleFactor ${factor} --binSize ${binSize} -b ${bam} -o ${output}"
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment