From 54b8027a92acacfe26ab1784b1a254b4cd4fe845 Mon Sep 17 00:00:00 2001
From: aliarifki <aliarifki@outlook.fr>
Date: Fri, 26 May 2023 16:17:32 +0200
Subject: [PATCH] Ajout des derniers process

---
 src/bolero.nf                                 | 50 ++++++++++++++-----
 src/nf_modules/junction_nanosplicer/main.nf   | 11 ++--
 src/nf_modules/ont-guppy/{main.fr => main.nf} |  0
 src/nf_modules/rna_count/main.nf              |  5 +-
 src/nf_modules/start_positions/main.nf        |  8 +--
 5 files changed, 51 insertions(+), 23 deletions(-)
 rename src/nf_modules/ont-guppy/{main.fr => main.nf} (100%)

diff --git a/src/bolero.nf b/src/bolero.nf
index f0e8cc3..41e7bda 100755
--- a/src/bolero.nf
+++ b/src/bolero.nf
@@ -1,6 +1,7 @@
 #!/usr/bin/env nextflow
 
 nextflow.enable.dsl=2
+//syntax extension DSL2
 
 /*
 ========================================================================================================================
@@ -73,7 +74,7 @@ if (params.help || params.h) {
  ****************************************************************
 */
 
-/* params in */
+/* Params in */
 
 params.skipBC = true
 params.gpu_mode = false
@@ -100,6 +101,8 @@ params.seqkit_grep_out = "03_fastq/"
 params.cutadapt_out = "04_cutadapt/"
 params.minimap2_genome_out = "05_minimap2/"
 params.start_position_counts_out = "06_start_positions/"
+params.nanosplicer_out = "07_nanosplicer/"
+params.rna_count_out = "08_RNA_count/"
 params.pycoQC_out = "pycoQC/"
 
 /*
@@ -108,6 +111,7 @@ params.pycoQC_out = "pycoQC/"
  ****************************************************************
 */
 
+//to print multiline informations
 log.info "fast5/q folder : ${params.input}"
 log.info "5'RACE adapter sequence : ${params.adapt}"
 if(!params.skipBC) log.info "Guppy basecalling calculation using GPU mode : ${params.gpu_mode}."
@@ -161,13 +165,21 @@ if(!params.skipBC) {
 // Replace concatenate by seqkit fct to parallelization:
 // include { concatenate } from "./nf_modules/seqkit/main.nf"
 include { concatenate } from "./nf_modules/concatenate/main.nf"
-
 include { cut_5pRACE } from "./nf_modules/cutadapt/main.nf"
 include { hbv_genome } from "./nf_modules/minimap2/main.nf"
 include { seqkit_grep } from "./nf_modules/seqkit/main.nf"
-include { sort_bam as sort_bam_genome } from './nf_modules/samtools/main.nf' addParams(sort_bam_out: params.minimap2_genome_out)
-include { index_bam as index_bam_genome } from './nf_modules/samtools/main.nf' addParams(index_bam_out: params.minimap2_genome_out)
+include { sort_bam } from './nf_modules/samtools/main.nf' addParams(sort_bam_out: params.minimap2_genome_out)
+include { index_bam } from './nf_modules/samtools/main.nf' addParams(index_bam_out: params.minimap2_genome_out)
+include { sort_index_bam } from './nf_modules/samtools/main.nf' addParams(indexed_bam_out: params.minimap2_genome_out)
 include { start_position_counts } from "./nf_modules/samtools/main.nf"
+include { start_position_individuals } from "./nf_modules/start_positions/main.nf"
+include { jwr_checker } from "./nf_modules/nanosplicer/main.nf"
+include { junctions_nanosplicer } from "./nf_modules/junction_nanosplicer/main.nf"
+include { rna_count } from "./nf_modules/rna_count/main.nf"
+
+/////////////////////////////////////////////////////////
+// script R avec classification des reads par type d'ARN et graphiques associés
+
 
 // creation des fonctions NanoSplicer:
 // include { jwr_check } from "./nf_modules/nanosplicer/main.nf"
@@ -200,19 +212,33 @@ workflow {
   }
   //####################### PREPROCESSING #######################
 
-  /*
+  
+  
+  //Filtration (seqkit_grep looks for the 5'RACE and the gsp patterns in the reads to keep only mature ARNs)
   seqkit_grep(concatenate.out.merged_fastq, params.adapt, params.gsp)
+  
+  //Cut of the 5'RACE sequence
   cut_5pRACE(seqkit_grep.out.filtered_fastq, params.adapt)
 
   //########################## MAPPING ##########################
-
+  
+  
   hbv_genome(cut_5pRACE.out.fastq_cutadapt, genome)
-  sort_bam_genome(hbv_genome.out.bam)
-  index_bam_genome(sort_bam_genome.out.sorted_bam.collect())
+  sort_index_bam(hbv_genome.out.bam)
+  // index_bam(sort_bam_genome.out.sorted_bam.collect())
 
-  //###################### QUANTIFICATION #######################
+  //###################### START POSITIONS #######################
 
-  start_position_counts(sort_bam_genome.out.sorted_bam)
-  */
+  start_position_counts(sort_index_bam.out.indexed_bam)
+  start_position_individuals(start_position_counts.out.count)
 
-}
\ No newline at end of file
+  //#################### VARIANTS D'EPISSAGE ####################
+
+  jwr_checker(sort_index_bam.out.indexed_bam)
+  junctions_nanosplicer(start_position_individuals.out.classification_of_reads, jwr_checker.out.nanosplicer_jwr)
+  
+  //#################### VARIANTS D'EPISSAGE ####################
+
+  rna_count(junctions_nanosplicer.out.identified_SPvariants, start_position_individuals.out.classification_of_reads)
+
+}
diff --git a/src/nf_modules/junction_nanosplicer/main.nf b/src/nf_modules/junction_nanosplicer/main.nf
index 4af81e2..9000bc8 100644
--- a/src/nf_modules/junction_nanosplicer/main.nf
+++ b/src/nf_modules/junction_nanosplicer/main.nf
@@ -1,13 +1,13 @@
 version = "1.0"
-container_url = "xgrand/r-scripts:${version}"
+container_url = "xgrand/r-bolero:${version}"
 
-params.junctions_out = ""
+params.nanosplicer_out = ""
 process junctions_nanosplicer{
   container = "${container_url}"
   label "small_mem_mono_cpus"
   tag "identification de variants d'épissage"
-  if (params.junctions_out != "") {
-    publishDir "results/${params.junctions_out}", mode: 'copy'
+  if (params.nanosplicer_out != "") {
+    publishDir "results/${params.nanosplicer_out}", mode: 'copy'
   }
 
   input:
@@ -17,10 +17,11 @@ process junctions_nanosplicer{
   output:
     path("Rplots.pdf")
     path("JWR_check_parsed.csv")
+    path("*.jpg")
     path("identified_SPvariants.csv"), emit: identified_SPvariants
 
   script:
     """
-    Rscript Junctions_NanoSplicer.R -c txt -j csv
+    Rscript /Junctions_NanoSplicer.R -c ${txt} -j ${csv}
     """
 }
\ No newline at end of file
diff --git a/src/nf_modules/ont-guppy/main.fr b/src/nf_modules/ont-guppy/main.nf
similarity index 100%
rename from src/nf_modules/ont-guppy/main.fr
rename to src/nf_modules/ont-guppy/main.nf
diff --git a/src/nf_modules/rna_count/main.nf b/src/nf_modules/rna_count/main.nf
index 5899f2e..4d9b6f4 100644
--- a/src/nf_modules/rna_count/main.nf
+++ b/src/nf_modules/rna_count/main.nf
@@ -1,5 +1,5 @@
 version = "1.0"
-container_url = "xgrand/r-scripts:${version}"
+container_url = "xgrand/r-bolero:${version}"
 
 params.rna_count_out = ""
 process rna_count{
@@ -17,9 +17,10 @@ process rna_count{
   output:
     path("*.csv")
     path("*.pdf")
+    path("*.jpg")
 
   script:
     """
-    Rscript HBV_RNAs_count.R -s spvariants -c classification
+    Rscript /HBV_RNAs_count.R -s ${spvariants} -c ${classification}
     """
 }
diff --git a/src/nf_modules/start_positions/main.nf b/src/nf_modules/start_positions/main.nf
index 4a29e9f..4b97e0c 100644
--- a/src/nf_modules/start_positions/main.nf
+++ b/src/nf_modules/start_positions/main.nf
@@ -1,7 +1,7 @@
 version = "1.0"
-container_url = "xgrand/r-scripts:${version}"
+container_url = "xgrand/r-bolero:${version}"
 
-params.start_position_counts_out = ""
+params.start_position_counts_out =""
 process start_position_individuals{
   container = "${container_url}"
   label "small_mem_mono_cpus"
@@ -9,17 +9,17 @@ process start_position_individuals{
   if (params.start_position_counts_out != "") {
     publishDir "results/${params.start_position_counts_out}", mode: 'copy'
   }
-
   input:
     path(start_position_counts)
 
   output:
     path("Rplots.pdf")
+    path("*.jpg")
     path("Count_reads_per_promoter.tsv")
     path("classification_of_reads_per_RNA.txt"), emit: classification_of_reads
 
   script:
     """
-    Rscript start_positions.R -i start_position_counts
+    Rscript /Start_positions.R -i ${start_position_counts}
     """
 }
\ No newline at end of file
-- 
GitLab