From ff033f30f46bd1e1e6d9cf170e832fae5282b6df Mon Sep 17 00:00:00 2001
From: Xavier Grand <157-xgrand@users.noreply.gitbio.ens-lyon.fr>
Date: Mon, 25 Jul 2022 13:48:11 +0200
Subject: [PATCH] add STAR and HTseq to RNAseq_XGR.nf

---
 src/RNAseq_XGR.nf                  |  1 +
 src/nf_modules/star/main_2.7.8a.nf | 53 ++++++++++++++++++++++++++++++
 2 files changed, 54 insertions(+)

diff --git a/src/RNAseq_XGR.nf b/src/RNAseq_XGR.nf
index 2992340..45b2ace 100644
--- a/src/RNAseq_XGR.nf
+++ b/src/RNAseq_XGR.nf
@@ -151,6 +151,7 @@ workflow {
     Channel
       .fromPath( idx_genome )
       .ifEmpty { error "Cannot find idexed genome reference files" }
+      .map{it -> [(it.baseName =~ /([^\.]*)/)[0][1], it]}
       .set { genome_indexed_input }
     mapping_withindex(genome_indexed_input, fastp.out.fastq)
   }
diff --git a/src/nf_modules/star/main_2.7.8a.nf b/src/nf_modules/star/main_2.7.8a.nf
index 44e6693..13d0af1 100644
--- a/src/nf_modules/star/main_2.7.8a.nf
+++ b/src/nf_modules/star/main_2.7.8a.nf
@@ -181,6 +181,59 @@ STAR --runThreadN ${task.cpus} \
 --outSAMstrandField intronMotif \
 --chimOutType WithinBAM
 
+mv ${reads_id}.Aligned.sortedByCoord.out.bam ${reads_id}.bam
+"""
+}
+
+
+process mapping_withindex {
+  container = "${container_url}"
+  label "big_mem_multi_cpus"
+  if (params.star_mapping_fastq_out != "") {
+    publishDir "results/${params.star_mapping_fastq_out}", mode: 'copy'
+  }
+
+  input:
+    tuple val(index_id), path(index)
+    tuple val(reads_id), path(reads) 
+
+  output:
+    path "*.Log.final.out", emit: report
+    tuple val(reads_id), path("*.bam"), emit: bam
+
+  script:
+if (reads_id instanceof List){
+    file_prefix = reads_id[0]
+  } else {
+    file_prefix = reads_id
+  }
+
+if (reads.size() == 2)
+"""
+STAR --runThreadN ${task.cpus} \
+--genomeDir index/ \
+--readFilesCommand zcat \
+--readFilesIn ${reads[0]} ${reads[1]} \
+--outFileNamePrefix ${reads_id}. \
+--alignIntronMax 10000 \
+--outSAMtype BAM SortedByCoordinate \
+--outSAMstrandField intronMotif
+
+mv ${reads_id}.Aligned.sortedByCoord.out.bam ${reads_id}.bam
+"""
+else
+"""
+mkdir -p index
+mv ${index} index/
+STAR --runThreadN ${task.cpus} \
+--genomeDir index/ \
+--readFilesCommand zcat \
+--readFilesIn ${reads} \
+--outFileNamePrefix ${reads_id}. \
+--alignIntronMax 10000 \
+--outSAMtype BAM SortedByCoordinate \
+--outSAMstrandField intronMotif
+
 mv ${reads_id}.Aligned.sortedByCoord.out.bam ${reads_id}.bam
 """
 }
\ No newline at end of file
-- 
GitLab