From f0d5b150e4a8aecf515b0cfb757f7ad14f7e93ab Mon Sep 17 00:00:00 2001
From: Laurent Modolo <laurent.modolo@ens-lyon.fr>
Date: Mon, 4 Jun 2018 18:21:34 +0200
Subject: [PATCH] Kallisto: add nf and tests files

---
 src/nf_modules/Kallisto/kallisto.config       |  24 ++++
 src/nf_modules/Kallisto/kallisto.nf           | 122 ++++++++++++++++++
 src/nf_modules/Kallisto/tests/index.nf        |  27 ++++
 .../Kallisto/tests/mapping_paired.nf          |  36 ++++++
 .../Kallisto/tests/mapping_single.nf          |  41 ++++++
 src/nf_modules/Kallisto/tests/tests.sh        |  17 +++
 6 files changed, 267 insertions(+)
 create mode 100644 src/nf_modules/Kallisto/kallisto.config
 create mode 100644 src/nf_modules/Kallisto/kallisto.nf
 create mode 100644 src/nf_modules/Kallisto/tests/index.nf
 create mode 100644 src/nf_modules/Kallisto/tests/mapping_paired.nf
 create mode 100644 src/nf_modules/Kallisto/tests/mapping_single.nf
 create mode 100644 src/nf_modules/Kallisto/tests/tests.sh

diff --git a/src/nf_modules/Kallisto/kallisto.config b/src/nf_modules/Kallisto/kallisto.config
new file mode 100644
index 0000000..85eab6e
--- /dev/null
+++ b/src/nf_modules/Kallisto/kallisto.config
@@ -0,0 +1,24 @@
+profiles {
+  docker {
+    docker.temp = 'auto'
+    docker.enabled = true
+    process {
+      $index_fasta {
+        container = "kallisto:0.43.1"
+      }
+      $mapping_fastq {
+        container = "kallisto:0.43.1"
+      }
+    }
+  }
+  sge {
+    process{
+      $index_fasta {
+        beforeScript = "module purge; module load kallisto/0.43.1"
+      }
+      $mapping_fastq {
+        beforeScript = "module purge; module load kallisto/0.43.1"
+      }
+    }
+  }
+}
diff --git a/src/nf_modules/Kallisto/kallisto.nf b/src/nf_modules/Kallisto/kallisto.nf
new file mode 100644
index 0000000..922d975
--- /dev/null
+++ b/src/nf_modules/Kallisto/kallisto.nf
@@ -0,0 +1,122 @@
+/*
+* Kallisto :
+* Imputs : fastq files
+* Imputs : fasta files
+* Output : bam files
+*/
+
+/*                      fasta indexing                                     */
+params.fasta = "$baseDir/data/bam/*.fasta"
+
+log.info "fasta files : ${params.fasta}"
+
+Channel
+  .fromPath( params.fasta )
+  .ifEmpty { error "Cannot find any bam files matching: ${params.fasta}" }
+  .set { fasta_file }
+
+process index_fasta {
+  tag "$fasta.baseName"
+  cpus 4
+  publishDir "results/mapping/index/", mode: 'copy'
+
+  input:
+    file fasta from fasta_file
+
+  output:
+    file "*.index*" into index_files
+
+  script:
+"""
+kallisto index -k 31 --make-unique -i ${fasta.baseName}.index ${fasta} \
+> ${fasta.baseName}_kallisto_report.txt
+"""
+}
+
+
+/*
+* for paired-end data
+*/
+
+params.fastq = "$baseDir/data/fastq/*_{1,2}.fastq"
+params.index = "$baseDir/data/index/*.index.*"
+
+log.info "fastq files : ${params.fastq}"
+log.info "index files : ${params.index}"
+
+Channel
+  .fromFilePairs( params.fastq )
+  .ifEmpty { error "Cannot find any fastq files matching: ${params.fastq}" }
+  .set { fastq_files }
+Channel
+  .fromPath( params.index )
+  .ifEmpty { error "Cannot find any index files matching: ${params.index}" }
+  .set { index_files }
+
+process mapping_fastq {
+  tag "$pair_id"
+  cpus 4
+  publishDir "results/mapping/quantification/", mode: 'copy'
+
+  input:
+  set pair_id, file(reads) from fastq_files
+  file index from index_files.collect()
+
+  output:
+  file "*" into counts_files
+
+  script:
+"""
+mkdir ${pair_id}
+kallisto quant -i ${index} -t ${task.cpus} \
+--bias --bootstrap-samples 100 -o ${pair_id} \
+${reads[0]} ${reads[1]} &> ${pair_id}_kallisto_report.txt
+"""
+}
+
+
+/*
+* for single-end data
+*/
+
+params.fastq = "$baseDir/data/fastq/*.fastq"
+params.index = "$baseDir/data/index/*.index*"
+params.mean = 200
+params.sd = 100
+
+log.info "fastq files : ${params.fastq}"
+log.info "index files : ${params.index}"
+log.info "mean read size: ${params.mean}"
+log.info "sd read size: ${params.sd}"
+
+Channel
+  .fromPath( params.fastq )
+  .ifEmpty { error "Cannot find any fastq files matching: ${params.fastq}" }
+  .set { fastq_files }
+Channel
+  .fromPath( params.index )
+  .ifEmpty { error "Cannot find any index files matching: ${params.index}" }
+  .set { index_files }
+
+process mapping_fastq {
+  tag "$reads.baseName"
+  cpus 4
+  publishDir "results/mapping/quantification/", mode: 'copy'
+
+  input:
+  file reads from fastq_files
+  file index from index_files.collect()
+
+  output:
+  file "*" into count_files
+
+  script:
+"""
+mkdir ${reads.baseName}
+kallisto quant -i ${index[0].baseName} -t ${task.cpus} --single
+--bias --bootstrap-samples 100 -o ${reads.baseName} \
+-l ${params.mean} -s ${params.sd} -o ./ \
+${reads} > ${reads.baseName}_kallisto_report.txt
+"""
+}
+
diff --git a/src/nf_modules/Kallisto/tests/index.nf b/src/nf_modules/Kallisto/tests/index.nf
new file mode 100644
index 0000000..cae4bb0
--- /dev/null
+++ b/src/nf_modules/Kallisto/tests/index.nf
@@ -0,0 +1,27 @@
+params.fasta = "$baseDir/data/bam/*.fasta"
+
+log.info "fasta files : ${params.fasta}"
+
+Channel
+  .fromPath( params.fasta )
+  .ifEmpty { error "Cannot find any bam files matching: ${params.fasta}" }
+  .set { fasta_file }
+
+process index_fasta {
+  tag "$fasta.baseName"
+  cpus 4
+  publishDir "results/mapping/index/", mode: 'copy'
+
+  input:
+    file fasta from fasta_file
+
+  output:
+    file "*.index*" into index_files
+
+  script:
+"""
+kallisto index -k 31 --make-unique -i ${fasta.baseName}.index ${fasta} \
+> ${fasta.baseName}_kallisto_report.txt
+"""
+}
+
diff --git a/src/nf_modules/Kallisto/tests/mapping_paired.nf b/src/nf_modules/Kallisto/tests/mapping_paired.nf
new file mode 100644
index 0000000..447577d
--- /dev/null
+++ b/src/nf_modules/Kallisto/tests/mapping_paired.nf
@@ -0,0 +1,36 @@
+params.fastq = "$baseDir/data/fastq/*_{1,2}.fastq"
+params.index = "$baseDir/data/index/*.index.*"
+
+log.info "fastq files : ${params.fastq}"
+log.info "index files : ${params.index}"
+
+Channel
+  .fromFilePairs( params.fastq )
+  .ifEmpty { error "Cannot find any fastq files matching: ${params.fastq}" }
+  .set { fastq_files }
+Channel
+  .fromPath( params.index )
+  .ifEmpty { error "Cannot find any index files matching: ${params.index}" }
+  .set { index_files }
+
+process mapping_fastq {
+  tag "$pair_id"
+  cpus 4
+  publishDir "results/mapping/quantification/", mode: 'copy'
+
+  input:
+  set pair_id, file(reads) from fastq_files
+  file index from index_files.collect()
+
+  output:
+  file "*" into counts_files
+
+  script:
+"""
+mkdir ${pair_id}
+kallisto quant -i ${index} -t ${task.cpus} \
+--bias --bootstrap-samples 100 -o ${pair_id} \
+${reads[0]} ${reads[1]} &> ${pair_id}_kallisto_report.txt
+"""
+}
+
diff --git a/src/nf_modules/Kallisto/tests/mapping_single.nf b/src/nf_modules/Kallisto/tests/mapping_single.nf
new file mode 100644
index 0000000..69e3a6f
--- /dev/null
+++ b/src/nf_modules/Kallisto/tests/mapping_single.nf
@@ -0,0 +1,41 @@
+params.fastq = "$baseDir/data/fastq/*.fastq"
+params.index = "$baseDir/data/index/*.index*"
+params.mean = 200
+params.sd = 100
+
+log.info "fastq files : ${params.fastq}"
+log.info "index files : ${params.index}"
+log.info "mean read size: ${params.mean}"
+log.info "sd read size: ${params.sd}"
+
+Channel
+  .fromPath( params.fastq )
+  .ifEmpty { error "Cannot find any fastq files matching: ${params.fastq}" }
+  .set { fastq_files }
+Channel
+  .fromPath( params.index )
+  .ifEmpty { error "Cannot find any index files matching: ${params.index}" }
+  .set { index_files }
+
+process mapping_fastq {
+  tag "$reads.baseName"
+  cpus 4
+  publishDir "results/mapping/quantification/", mode: 'copy'
+
+  input:
+  file reads from fastq_files
+  file index from index_files.collect()
+
+  output:
+  file "*" into count_files
+
+  script:
+"""
+mkdir ${reads.baseName}
+kallisto quant -i ${index} -t ${task.cpus} --single \
+--bias --bootstrap-samples 100 -o ${reads.baseName} \
+-l ${params.mean} -s ${params.sd} \
+${reads} > ${reads.baseName}_kallisto_report.txt
+"""
+}
+
diff --git a/src/nf_modules/Kallisto/tests/tests.sh b/src/nf_modules/Kallisto/tests/tests.sh
new file mode 100644
index 0000000..3d49f1e
--- /dev/null
+++ b/src/nf_modules/Kallisto/tests/tests.sh
@@ -0,0 +1,17 @@
+nextflow src/nf_modules/Kallisto/tests/index.nf \
+  -c src/nf_modules/Kallisto/kallisto.config \
+  -profile docker \
+  --fasta "data/tiny_dataset/fasta/tiny_v2.fasta"
+
+nextflow src/nf_modules/Kallisto/tests/mapping_single.nf \
+  -c src/nf_modules/Kallisto/kallisto.config \
+  -profile docker \
+  --index "results/mapping/index/tiny_v2.index" \
+  --fastq "data/tiny_dataset/fastq/tiny_S.fastq"
+
+nextflow src/nf_modules/Kallisto/tests/mapping_paired.nf \
+  -c src/nf_modules/Kallisto/kallisto.config \
+  -profile docker \
+  --index "results/mapping/index/tiny_v2.index" \
+  --fastq "data/tiny_dataset/fastq/tiny_R{1,2}.fastq"
+
-- 
GitLab