From 4ff7d5659f7ec1d955ec88b69bcbbdcea3f5d7d6 Mon Sep 17 00:00:00 2001
From: Laurent Modolo <laurent@modolo.fr>
Date: Wed, 20 Nov 2019 14:02:33 +0100
Subject: [PATCH] nf_modules: add STAR

---
 doc/available_tools.md                    |  1 +
 src/nf_modules/star/indexing.config       | 54 +++++++++++++++++++++++
 src/nf_modules/star/indexing.nf           | 36 +++++++++++++++
 src/nf_modules/star/mapping_paired.config | 54 +++++++++++++++++++++++
 src/nf_modules/star/mapping_paired.nf     | 40 +++++++++++++++++
 src/nf_modules/star/mapping_single.config | 54 +++++++++++++++++++++++
 src/nf_modules/star/mapping_single.nf     | 39 ++++++++++++++++
 src/nf_modules/star/tests.sh              | 43 ++++++++++++++++++
 8 files changed, 321 insertions(+)
 create mode 100644 src/nf_modules/star/indexing.config
 create mode 100644 src/nf_modules/star/indexing.nf
 create mode 100644 src/nf_modules/star/mapping_paired.config
 create mode 100644 src/nf_modules/star/mapping_paired.nf
 create mode 100644 src/nf_modules/star/mapping_single.config
 create mode 100644 src/nf_modules/star/mapping_single.nf
 create mode 100755 src/nf_modules/star/tests.sh

diff --git a/doc/available_tools.md b/doc/available_tools.md
index 10b97ea3..297cc27e 100644
--- a/doc/available_tools.md
+++ b/doc/available_tools.md
@@ -34,6 +34,7 @@ sambamba | ok | ok | ok | ok
 samblaster | ok | ok | ok | ok
 SAMtools | ok | ok | ok | ok
 SRAtoolkit | ok | ok | ok | ok
+STAR | ok | ok | ok | ok
 subread | **no** | ok | ok | ok
 TopHat | **no** | ok | ok | ok
 Trimmomatic | **no** | ok | ok | ok
diff --git a/src/nf_modules/star/indexing.config b/src/nf_modules/star/indexing.config
new file mode 100644
index 00000000..3e4017b9
--- /dev/null
+++ b/src/nf_modules/star/indexing.config
@@ -0,0 +1,54 @@
+profiles {
+  docker {
+    docker.temp = 'auto'
+    docker.enabled = true
+    process {
+      withName: index_fasta {
+        container = "lbmc/star:2.7.3a"
+        cpus = 4
+      }
+    }
+  }
+  singularity {
+    singularity.enabled = true
+    singularity.cacheDir = "./bin/"
+    process {
+      withName: index_fasta {
+        container = "lbmc/star:2.7.3a"
+        cpus = 4
+      }
+    }
+  }
+  psmn{
+    process{
+      withName: index_fasta {
+        beforeScript = "source $baseDir/.conda_psmn.sh"
+        conda = "$baseDir/.conda_envs/star_2.7.3a"
+        executor = "sge"
+        clusterOptions = "-cwd -V"
+        cpus = 16
+        memory = "20GB"
+        time = "12h"
+        queue = 'E5-2670deb128A,E5-2670deb128B,E5-2670deb128C,E5-2670deb128D,E5-2670deb128E,E5-2670deb128F'
+        penv = 'openmp16'
+      }
+    }
+  }
+  ccin2p3 {
+    singularity.enabled = true
+    singularity.cacheDir = "/sps/lbmc/common/singularity/"
+    singularity.runOptions = "--bind /pbs,/sps,/scratch"
+    process{
+      withName: index_fasta {
+        container = "lbmc/star:2.7.3a"
+        scratch = true
+        stageInMode = "copy"
+        stageOutMode = "rsync"
+        executor = "sge"
+        clusterOptions = "-P P_lbmc -l os=cl7 -l sps=1 -r n"
+        cpus = 1
+        queue = 'huge'
+      }
+    }
+  }
+}
diff --git a/src/nf_modules/star/indexing.nf b/src/nf_modules/star/indexing.nf
new file mode 100644
index 00000000..0f340b2d
--- /dev/null
+++ b/src/nf_modules/star/indexing.nf
@@ -0,0 +1,36 @@
+params.fasta = "$baseDir/data/bam/*.fasta"
+params.annotation = "$baseDir/data/bam/*.gtf"
+
+log.info "fasta files : ${params.fasta}"
+
+Channel
+  .fromPath( params.fasta )
+  .ifEmpty { error "Cannot find any fasta files matching: ${params.fasta}" }
+  .set { fasta_file }
+Channel
+  .fromPath( params.annotation )
+  .ifEmpty { error "Cannot find any annotation files matching: ${params.annotation}" }
+  .set { annotation_file }
+
+process index_fasta {
+  tag "$fasta.baseName"
+  publishDir "results/mapping/index/", mode: 'copy'
+
+  input:
+    file fasta from fasta_file
+    file annotation from annotation_file
+
+  output:
+    file "*" into index_files
+
+  script:
+"""
+STAR --runThreadN ${task.cpus} --runMode genomeGenerate \
+--genomeDir ./ \
+--genomeFastaFiles ${fasta} \
+--sjdbGTFfile ${annotation} \
+--genomeSAindexNbases 3 # min(14, log2(GenomeLength)/2 - 1)
+"""
+}
+
+
diff --git a/src/nf_modules/star/mapping_paired.config b/src/nf_modules/star/mapping_paired.config
new file mode 100644
index 00000000..c2de5c50
--- /dev/null
+++ b/src/nf_modules/star/mapping_paired.config
@@ -0,0 +1,54 @@
+profiles {
+  docker {
+    docker.temp = 'auto'
+    docker.enabled = true
+    process {
+      withName: mapping_fastq {
+        container = "lbmc/star:2.7.3a"
+        cpus = 4
+      }
+    }
+  }
+  singularity {
+    singularity.enabled = true
+    singularity.cacheDir = "./bin/"
+    process {
+      withName: mapping_fastq {
+        container = "lbmc/star:2.7.3a"
+        cpus = 4
+      }
+    }
+  }
+  psmn{
+    process{
+      withName: mapping_fastq {
+        beforeScript = "source $baseDir/.conda_psmn.sh"
+        conda = "$baseDir/.conda_envs/star_2.7.3a"
+        executor = "sge"
+        clusterOptions = "-cwd -V"
+        cpus = 16
+        memory = "30GB"
+        time = "24h"
+        queue = 'E5-2670deb128A,E5-2670deb128B,E5-2670deb128C,E5-2670deb128D,E5-2670deb128E,E5-2670deb128F'
+        penv = 'openmp16'
+      }
+    }
+  }
+  ccin2p3 {
+    singularity.enabled = true
+    singularity.cacheDir = "/sps/lbmc/common/singularity/"
+    singularity.runOptions = "--bind /pbs,/sps,/scratch"
+    process{
+      withName: mapping_fastq {
+        container = "lbmc/star:2.7.3a"
+        scratch = true
+        stageInMode = "copy"
+        stageOutMode = "rsync"
+        executor = "sge"
+        clusterOptions = "-P P_lbmc -l os=cl7 -l sps=1 -r n"
+        cpus = 1
+        queue = 'huge'
+      }
+    }
+  }
+}
diff --git a/src/nf_modules/star/mapping_paired.nf b/src/nf_modules/star/mapping_paired.nf
new file mode 100644
index 00000000..9ea90175
--- /dev/null
+++ b/src/nf_modules/star/mapping_paired.nf
@@ -0,0 +1,40 @@
+params.fastq = "$baseDir/data/fastq/*_{1,2}.fastq"
+params.index = "$baseDir/data/index/*.index.*"
+
+log.info "fastq files : ${params.fastq}"
+log.info "index files : ${params.index}"
+
+Channel
+  .fromFilePairs( params.fastq )
+  .ifEmpty { error "Cannot find any fastq files matching: ${params.fastq}" }
+  .set { fastq_files }
+Channel
+  .fromPath( params.index )
+  .ifEmpty { error "Cannot find any index files matching: ${params.index}" }
+  .set { index_files }
+
+process mapping_fastq {
+  tag "$pair_id"
+  publishDir "results/mapping/bams/", mode: 'copy'
+
+  input:
+  set pair_id, file(reads) from fastq_files
+  file index from index_files.collect()
+
+  output:
+  set pair_id, "*.bam" into bam_files
+  file "*.out" into mapping_report
+
+  script:
+"""
+mkdir -p index
+mv ${index} index/
+STAR --runThreadN ${task.cpus} \
+--genomeDir index/ \
+--readFilesIn ${reads[0]} ${reads[1]} \
+--outFileNamePrefix ${pair_id} \
+--outSAMmapqUnique 0 \
+--outSAMtype BAM SortedByCoordinate
+"""
+}
+
diff --git a/src/nf_modules/star/mapping_single.config b/src/nf_modules/star/mapping_single.config
new file mode 100644
index 00000000..c2de5c50
--- /dev/null
+++ b/src/nf_modules/star/mapping_single.config
@@ -0,0 +1,54 @@
+profiles {
+  docker {
+    docker.temp = 'auto'
+    docker.enabled = true
+    process {
+      withName: mapping_fastq {
+        container = "lbmc/star:2.7.3a"
+        cpus = 4
+      }
+    }
+  }
+  singularity {
+    singularity.enabled = true
+    singularity.cacheDir = "./bin/"
+    process {
+      withName: mapping_fastq {
+        container = "lbmc/star:2.7.3a"
+        cpus = 4
+      }
+    }
+  }
+  psmn{
+    process{
+      withName: mapping_fastq {
+        beforeScript = "source $baseDir/.conda_psmn.sh"
+        conda = "$baseDir/.conda_envs/star_2.7.3a"
+        executor = "sge"
+        clusterOptions = "-cwd -V"
+        cpus = 16
+        memory = "30GB"
+        time = "24h"
+        queue = 'E5-2670deb128A,E5-2670deb128B,E5-2670deb128C,E5-2670deb128D,E5-2670deb128E,E5-2670deb128F'
+        penv = 'openmp16'
+      }
+    }
+  }
+  ccin2p3 {
+    singularity.enabled = true
+    singularity.cacheDir = "/sps/lbmc/common/singularity/"
+    singularity.runOptions = "--bind /pbs,/sps,/scratch"
+    process{
+      withName: mapping_fastq {
+        container = "lbmc/star:2.7.3a"
+        scratch = true
+        stageInMode = "copy"
+        stageOutMode = "rsync"
+        executor = "sge"
+        clusterOptions = "-P P_lbmc -l os=cl7 -l sps=1 -r n"
+        cpus = 1
+        queue = 'huge'
+      }
+    }
+  }
+}
diff --git a/src/nf_modules/star/mapping_single.nf b/src/nf_modules/star/mapping_single.nf
new file mode 100644
index 00000000..9d3d51b3
--- /dev/null
+++ b/src/nf_modules/star/mapping_single.nf
@@ -0,0 +1,39 @@
+params.fastq = "$baseDir/data/fastq/*.fastq"
+
+log.info "fastq files : ${params.fastq}"
+log.info "index files : ${params.index}"
+
+Channel
+  .fromPath( params.fastq )
+  .ifEmpty { error "Cannot find any fastq files matching: ${params.fastq}" }
+  .map { it -> [(it.baseName =~ /([^\.]*)/)[0][1], it]}
+  .set { fastq_files }
+Channel
+  .fromPath( params.index )
+  .ifEmpty { error "Cannot find any index files matching: ${params.index}" }
+  .set { index_files }
+
+process mapping_fastq {
+  tag "$file_id"
+  publishDir "results/mapping/bams/", mode: 'copy'
+
+  input:
+  set file_id, file(reads) from fastq_files
+  file index from index_files.collect()
+
+  output:
+  set file_id, "*.bam" into bam_files
+  file "*.out" into mapping_report
+
+  script:
+"""
+mkdir -p index
+mv ${index} index/
+STAR --runThreadN ${task.cpus} \
+--genomeDir index/ \
+--readFilesIn ${reads} \
+--outFileNamePrefix ${file_id} \
+--outSAMmapqUnique 0 \
+--outSAMtype BAM SortedByCoordinate
+"""
+}
diff --git a/src/nf_modules/star/tests.sh b/src/nf_modules/star/tests.sh
new file mode 100755
index 00000000..046ffe4f
--- /dev/null
+++ b/src/nf_modules/star/tests.sh
@@ -0,0 +1,43 @@
+./nextflow src/nf_modules/star/indexing.nf \
+  -c src/nf_modules/star/indexing.config \
+  -profile docker \
+  --fasta "data/tiny_dataset/fasta/tiny_v2.fasta" \
+  --annotation "data/tiny_dataset/annot/tiny.gtf" \
+  -resume
+
+./nextflow src/nf_modules/star/mapping_single.nf \
+  -c src/nf_modules/star/mapping_single.config \
+  -profile docker \
+  --index "results/mapping/index/*" \
+  --fastq "data/tiny_dataset/fastq/tiny*_S.fastq" \
+  -resume
+
+./nextflow src/nf_modules/star/mapping_paired.nf \
+  -c src/nf_modules/star/mapping_paired.config \
+  -profile docker \
+  --index "results/mapping/index/*" \
+  --fastq "data/tiny_dataset/fastq/tiny*_R{1,2}.fastq" \
+  -resume
+
+if [ -x "$(command -v singularity)" ]; then
+./nextflow src/nf_modules/star/indexing.nf \
+  -c src/nf_modules/star/indexing.config \
+  -profile singularity \
+  --fasta "data/tiny_dataset/fasta/tiny_v2.fasta" \
+  --annotation "data/tiny_dataset/annot/tiny.gtf" \
+  -resume
+
+./nextflow src/nf_modules/star/mapping_single.nf \
+  -c src/nf_modules/star/mapping_single.config \
+  -profile singularity \
+  --index "results/mapping/index/*" \
+  --fastq "data/tiny_dataset/fastq/tiny*_S.fastq" \
+  -resume
+
+./nextflow src/nf_modules/star/mapping_paired.nf \
+  -c src/nf_modules/star/mapping_paired.config \
+  -profile singularity \
+  --index "results/mapping/index/*" \
+  --fastq "data/tiny_dataset/fastq/tiny*_R{1,2}.fastq" \
+  -resume
+fi
-- 
GitLab