From 52b510e48daa1fb7edfd0211f37aebf0d83cab20 Mon Sep 17 00:00:00 2001 From: jganofsk <jeremy.ganofsky@ens-lyon.fr> Date: Wed, 27 Jun 2018 14:10:26 +0200 Subject: [PATCH] add process fastqdump --- src/nf_modules/sra-tools/sra-tools.config | 17 ++++++++ src/nf_modules/sra-tools/sra-tools.nf | 43 +++++++++++++++++++++ src/nf_modules/sra-tools/tests/fastqdump.nf | 43 +++++++++++++++++++++ src/nf_modules/sra-tools/tests/list-srr.txt | 7 ++++ src/nf_modules/sra-tools/tests/tests.sh | 4 ++ 5 files changed, 114 insertions(+) create mode 100644 src/nf_modules/sra-tools/sra-tools.config create mode 100644 src/nf_modules/sra-tools/sra-tools.nf create mode 100644 src/nf_modules/sra-tools/tests/fastqdump.nf create mode 100644 src/nf_modules/sra-tools/tests/list-srr.txt create mode 100755 src/nf_modules/sra-tools/tests/tests.sh diff --git a/src/nf_modules/sra-tools/sra-tools.config b/src/nf_modules/sra-tools/sra-tools.config new file mode 100644 index 00000000..c49af219 --- /dev/null +++ b/src/nf_modules/sra-tools/sra-tools.config @@ -0,0 +1,17 @@ +profiles { + docker { + docker.temp = 'auto' + docker.enabled = true + process { + $fastqdump { + container = "sratoolkit:2.8.2" + } } + } + sge { + process{ + $fastqdump { + beforeScript = "module purge; module load SRAtoolkit/2.8.2" + } + } + } +} diff --git a/src/nf_modules/sra-tools/sra-tools.nf b/src/nf_modules/sra-tools/sra-tools.nf new file mode 100644 index 00000000..0ffa4f35 --- /dev/null +++ b/src/nf_modules/sra-tools/sra-tools.nf @@ -0,0 +1,43 @@ +/* +* sra-tools : + +*/ + +/* fastq-dump +* Imputs : srr list +* Outputs : fastq files +*/ + +params.list_srr = "$baseDir/data/SRR/*.txt" + +log.info "downloading list srr : ${params.list_srr}" + +Channel + .fromPath( params.list_srr ) + .ifEmpty { error "Cannot find any bam files matching: ${params.list_srr}" } + .splitCsv(header: true) + .set { SRR } + +//run is the column name containing SRR ids + + process fastq-dump { + tag {"${x.run}"} + publishDir "results/download/fastq/${x.run}/", mode: 'copy' + input: + val x from SRR + output: + file("*") into fastq + + script: + + """ + + fastq-dump --split-files --defline-seq '@\$ac_\$si/\$ri' --defline-qual "+" ${x.run} + if [ -f ${x.run}_1.fastq ] + then + true + else + touch ${x.run}.fastq + fi +""" + } diff --git a/src/nf_modules/sra-tools/tests/fastqdump.nf b/src/nf_modules/sra-tools/tests/fastqdump.nf new file mode 100644 index 00000000..8fdd50e4 --- /dev/null +++ b/src/nf_modules/sra-tools/tests/fastqdump.nf @@ -0,0 +1,43 @@ +/* +* sra-tools : + +*/ + +/* fastq-dump +* Imputs : srr list +* Outputs : fastq files +*/ + +params.list_srr = "$baseDir/data/SRR/*.txt" + +log.info "downloading list srr : ${params.list_srr}" + +Channel + .fromPath( params.list_srr ) + .ifEmpty { error "Cannot find any bam files matching: ${params.list_srr}" } + .splitCsv(header: true) + .set { SRR } + +//run is the column name containing SRR ids + + process fastqdump { + tag {"${x.run}"} + publishDir "results/download/fastq/${x.run}/", mode: 'copy' + input: + val x from SRR + output: + file("*") into fastq + + script: + + """ + #for test only 10000 reads are downloading with the option -N 10000 -X 20000 + fastq-dump --split-files --defline-seq '@\$ac_\$si/\$ri' --defline-qual "+" -N 10000 -X 20000 ${x.run} + if [ -f ${x.run}_1.fastq ] + then + true + else + touch ${x.run}.fastq + fi +""" + } diff --git a/src/nf_modules/sra-tools/tests/list-srr.txt b/src/nf_modules/sra-tools/tests/list-srr.txt new file mode 100644 index 00000000..a9cd1d0e --- /dev/null +++ b/src/nf_modules/sra-tools/tests/list-srr.txt @@ -0,0 +1,7 @@ +run +ERR572281 +ERR572146 +ERR572201 +ERR638114 +ERR638115 +ERR638116 diff --git a/src/nf_modules/sra-tools/tests/tests.sh b/src/nf_modules/sra-tools/tests/tests.sh new file mode 100755 index 00000000..59e6c554 --- /dev/null +++ b/src/nf_modules/sra-tools/tests/tests.sh @@ -0,0 +1,4 @@ +nextflow src/nf_modules/sra-tools/tests/fastqdump.nf \ + -c src/nf_modules/sra-tools/sra-tools.config \ + -profile docker \ + --list_srr "src/nf_modules/sra-tools/tests/list-srr.txt" -- GitLab