From f0d5b150e4a8aecf515b0cfb757f7ad14f7e93ab Mon Sep 17 00:00:00 2001 From: Laurent Modolo <laurent.modolo@ens-lyon.fr> Date: Mon, 4 Jun 2018 18:21:34 +0200 Subject: [PATCH] Kallisto: add nf and tests files --- src/nf_modules/Kallisto/kallisto.config | 24 ++++ src/nf_modules/Kallisto/kallisto.nf | 122 ++++++++++++++++++ src/nf_modules/Kallisto/tests/index.nf | 27 ++++ .../Kallisto/tests/mapping_paired.nf | 36 ++++++ .../Kallisto/tests/mapping_single.nf | 41 ++++++ src/nf_modules/Kallisto/tests/tests.sh | 17 +++ 6 files changed, 267 insertions(+) create mode 100644 src/nf_modules/Kallisto/kallisto.config create mode 100644 src/nf_modules/Kallisto/kallisto.nf create mode 100644 src/nf_modules/Kallisto/tests/index.nf create mode 100644 src/nf_modules/Kallisto/tests/mapping_paired.nf create mode 100644 src/nf_modules/Kallisto/tests/mapping_single.nf create mode 100644 src/nf_modules/Kallisto/tests/tests.sh diff --git a/src/nf_modules/Kallisto/kallisto.config b/src/nf_modules/Kallisto/kallisto.config new file mode 100644 index 0000000..85eab6e --- /dev/null +++ b/src/nf_modules/Kallisto/kallisto.config @@ -0,0 +1,24 @@ +profiles { + docker { + docker.temp = 'auto' + docker.enabled = true + process { + $index_fasta { + container = "kallisto:0.43.1" + } + $mapping_fastq { + container = "kallisto:0.43.1" + } + } + } + sge { + process{ + $index_fasta { + beforeScript = "module purge; module load kallisto/0.43.1" + } + $mapping_fastq { + beforeScript = "module purge; module load kallisto/0.43.1" + } + } + } +} diff --git a/src/nf_modules/Kallisto/kallisto.nf b/src/nf_modules/Kallisto/kallisto.nf new file mode 100644 index 0000000..922d975 --- /dev/null +++ b/src/nf_modules/Kallisto/kallisto.nf @@ -0,0 +1,122 @@ +/* +* Kallisto : +* Imputs : fastq files +* Imputs : fasta files +* Output : bam files +*/ + +/* fasta indexing */ +params.fasta = "$baseDir/data/bam/*.fasta" + +log.info "fasta files : ${params.fasta}" + +Channel + .fromPath( params.fasta ) + .ifEmpty { error "Cannot find any bam files matching: ${params.fasta}" } + .set { fasta_file } + +process index_fasta { + tag "$fasta.baseName" + cpus 4 + publishDir "results/mapping/index/", mode: 'copy' + + input: + file fasta from fasta_file + + output: + file "*.index*" into index_files + + script: +""" +kallisto index -k 31 --make-unique -i ${fasta.baseName}.index ${fasta} \ +> ${fasta.baseName}_kallisto_report.txt +""" +} + + +/* +* for paired-end data +*/ + +params.fastq = "$baseDir/data/fastq/*_{1,2}.fastq" +params.index = "$baseDir/data/index/*.index.*" + +log.info "fastq files : ${params.fastq}" +log.info "index files : ${params.index}" + +Channel + .fromFilePairs( params.fastq ) + .ifEmpty { error "Cannot find any fastq files matching: ${params.fastq}" } + .set { fastq_files } +Channel + .fromPath( params.index ) + .ifEmpty { error "Cannot find any index files matching: ${params.index}" } + .set { index_files } + +process mapping_fastq { + tag "$pair_id" + cpus 4 + publishDir "results/mapping/quantification/", mode: 'copy' + + input: + set pair_id, file(reads) from fastq_files + file index from index_files.collect() + + output: + file "*" into counts_files + + script: +""" +mkdir ${pair_id} +kallisto quant -i ${index} -t ${task.cpus} \ +--bias --bootstrap-samples 100 -o ${pair_id} \ +${reads[0]} ${reads[1]} &> ${pair_id}_kallisto_report.txt +""" +} + + +/* +* for single-end data +*/ + +params.fastq = "$baseDir/data/fastq/*.fastq" +params.index = "$baseDir/data/index/*.index*" +params.mean = 200 +params.sd = 100 + +log.info "fastq files : ${params.fastq}" +log.info "index files : ${params.index}" +log.info "mean read size: ${params.mean}" +log.info "sd read size: ${params.sd}" + +Channel + .fromPath( params.fastq ) + .ifEmpty { error "Cannot find any fastq files matching: ${params.fastq}" } + .set { fastq_files } +Channel + .fromPath( params.index ) + .ifEmpty { error "Cannot find any index files matching: ${params.index}" } + .set { index_files } + +process mapping_fastq { + tag "$reads.baseName" + cpus 4 + publishDir "results/mapping/quantification/", mode: 'copy' + + input: + file reads from fastq_files + file index from index_files.collect() + + output: + file "*" into count_files + + script: +""" +mkdir ${reads.baseName} +kallisto quant -i ${index[0].baseName} -t ${task.cpus} --single +--bias --bootstrap-samples 100 -o ${reads.baseName} \ +-l ${params.mean} -s ${params.sd} -o ./ \ +${reads} > ${reads.baseName}_kallisto_report.txt +""" +} + diff --git a/src/nf_modules/Kallisto/tests/index.nf b/src/nf_modules/Kallisto/tests/index.nf new file mode 100644 index 0000000..cae4bb0 --- /dev/null +++ b/src/nf_modules/Kallisto/tests/index.nf @@ -0,0 +1,27 @@ +params.fasta = "$baseDir/data/bam/*.fasta" + +log.info "fasta files : ${params.fasta}" + +Channel + .fromPath( params.fasta ) + .ifEmpty { error "Cannot find any bam files matching: ${params.fasta}" } + .set { fasta_file } + +process index_fasta { + tag "$fasta.baseName" + cpus 4 + publishDir "results/mapping/index/", mode: 'copy' + + input: + file fasta from fasta_file + + output: + file "*.index*" into index_files + + script: +""" +kallisto index -k 31 --make-unique -i ${fasta.baseName}.index ${fasta} \ +> ${fasta.baseName}_kallisto_report.txt +""" +} + diff --git a/src/nf_modules/Kallisto/tests/mapping_paired.nf b/src/nf_modules/Kallisto/tests/mapping_paired.nf new file mode 100644 index 0000000..447577d --- /dev/null +++ b/src/nf_modules/Kallisto/tests/mapping_paired.nf @@ -0,0 +1,36 @@ +params.fastq = "$baseDir/data/fastq/*_{1,2}.fastq" +params.index = "$baseDir/data/index/*.index.*" + +log.info "fastq files : ${params.fastq}" +log.info "index files : ${params.index}" + +Channel + .fromFilePairs( params.fastq ) + .ifEmpty { error "Cannot find any fastq files matching: ${params.fastq}" } + .set { fastq_files } +Channel + .fromPath( params.index ) + .ifEmpty { error "Cannot find any index files matching: ${params.index}" } + .set { index_files } + +process mapping_fastq { + tag "$pair_id" + cpus 4 + publishDir "results/mapping/quantification/", mode: 'copy' + + input: + set pair_id, file(reads) from fastq_files + file index from index_files.collect() + + output: + file "*" into counts_files + + script: +""" +mkdir ${pair_id} +kallisto quant -i ${index} -t ${task.cpus} \ +--bias --bootstrap-samples 100 -o ${pair_id} \ +${reads[0]} ${reads[1]} &> ${pair_id}_kallisto_report.txt +""" +} + diff --git a/src/nf_modules/Kallisto/tests/mapping_single.nf b/src/nf_modules/Kallisto/tests/mapping_single.nf new file mode 100644 index 0000000..69e3a6f --- /dev/null +++ b/src/nf_modules/Kallisto/tests/mapping_single.nf @@ -0,0 +1,41 @@ +params.fastq = "$baseDir/data/fastq/*.fastq" +params.index = "$baseDir/data/index/*.index*" +params.mean = 200 +params.sd = 100 + +log.info "fastq files : ${params.fastq}" +log.info "index files : ${params.index}" +log.info "mean read size: ${params.mean}" +log.info "sd read size: ${params.sd}" + +Channel + .fromPath( params.fastq ) + .ifEmpty { error "Cannot find any fastq files matching: ${params.fastq}" } + .set { fastq_files } +Channel + .fromPath( params.index ) + .ifEmpty { error "Cannot find any index files matching: ${params.index}" } + .set { index_files } + +process mapping_fastq { + tag "$reads.baseName" + cpus 4 + publishDir "results/mapping/quantification/", mode: 'copy' + + input: + file reads from fastq_files + file index from index_files.collect() + + output: + file "*" into count_files + + script: +""" +mkdir ${reads.baseName} +kallisto quant -i ${index} -t ${task.cpus} --single \ +--bias --bootstrap-samples 100 -o ${reads.baseName} \ +-l ${params.mean} -s ${params.sd} \ +${reads} > ${reads.baseName}_kallisto_report.txt +""" +} + diff --git a/src/nf_modules/Kallisto/tests/tests.sh b/src/nf_modules/Kallisto/tests/tests.sh new file mode 100644 index 0000000..3d49f1e --- /dev/null +++ b/src/nf_modules/Kallisto/tests/tests.sh @@ -0,0 +1,17 @@ +nextflow src/nf_modules/Kallisto/tests/index.nf \ + -c src/nf_modules/Kallisto/kallisto.config \ + -profile docker \ + --fasta "data/tiny_dataset/fasta/tiny_v2.fasta" + +nextflow src/nf_modules/Kallisto/tests/mapping_single.nf \ + -c src/nf_modules/Kallisto/kallisto.config \ + -profile docker \ + --index "results/mapping/index/tiny_v2.index" \ + --fastq "data/tiny_dataset/fastq/tiny_S.fastq" + +nextflow src/nf_modules/Kallisto/tests/mapping_paired.nf \ + -c src/nf_modules/Kallisto/kallisto.config \ + -profile docker \ + --index "results/mapping/index/tiny_v2.index" \ + --fastq "data/tiny_dataset/fastq/tiny_R{1,2}.fastq" + -- GitLab