From 54b8027a92acacfe26ab1784b1a254b4cd4fe845 Mon Sep 17 00:00:00 2001 From: aliarifki <aliarifki@outlook.fr> Date: Fri, 26 May 2023 16:17:32 +0200 Subject: [PATCH] Ajout des derniers process --- src/bolero.nf | 50 ++++++++++++++----- src/nf_modules/junction_nanosplicer/main.nf | 11 ++-- src/nf_modules/ont-guppy/{main.fr => main.nf} | 0 src/nf_modules/rna_count/main.nf | 5 +- src/nf_modules/start_positions/main.nf | 8 +-- 5 files changed, 51 insertions(+), 23 deletions(-) rename src/nf_modules/ont-guppy/{main.fr => main.nf} (100%) diff --git a/src/bolero.nf b/src/bolero.nf index f0e8cc3..41e7bda 100755 --- a/src/bolero.nf +++ b/src/bolero.nf @@ -1,6 +1,7 @@ #!/usr/bin/env nextflow nextflow.enable.dsl=2 +//syntax extension DSL2 /* ======================================================================================================================== @@ -73,7 +74,7 @@ if (params.help || params.h) { **************************************************************** */ -/* params in */ +/* Params in */ params.skipBC = true params.gpu_mode = false @@ -100,6 +101,8 @@ params.seqkit_grep_out = "03_fastq/" params.cutadapt_out = "04_cutadapt/" params.minimap2_genome_out = "05_minimap2/" params.start_position_counts_out = "06_start_positions/" +params.nanosplicer_out = "07_nanosplicer/" +params.rna_count_out = "08_RNA_count/" params.pycoQC_out = "pycoQC/" /* @@ -108,6 +111,7 @@ params.pycoQC_out = "pycoQC/" **************************************************************** */ +//to print multiline informations log.info "fast5/q folder : ${params.input}" log.info "5'RACE adapter sequence : ${params.adapt}" if(!params.skipBC) log.info "Guppy basecalling calculation using GPU mode : ${params.gpu_mode}." @@ -161,13 +165,21 @@ if(!params.skipBC) { // Replace concatenate by seqkit fct to parallelization: // include { concatenate } from "./nf_modules/seqkit/main.nf" include { concatenate } from "./nf_modules/concatenate/main.nf" - include { cut_5pRACE } from "./nf_modules/cutadapt/main.nf" include { hbv_genome } from "./nf_modules/minimap2/main.nf" include { seqkit_grep } from "./nf_modules/seqkit/main.nf" -include { sort_bam as sort_bam_genome } from './nf_modules/samtools/main.nf' addParams(sort_bam_out: params.minimap2_genome_out) -include { index_bam as index_bam_genome } from './nf_modules/samtools/main.nf' addParams(index_bam_out: params.minimap2_genome_out) +include { sort_bam } from './nf_modules/samtools/main.nf' addParams(sort_bam_out: params.minimap2_genome_out) +include { index_bam } from './nf_modules/samtools/main.nf' addParams(index_bam_out: params.minimap2_genome_out) +include { sort_index_bam } from './nf_modules/samtools/main.nf' addParams(indexed_bam_out: params.minimap2_genome_out) include { start_position_counts } from "./nf_modules/samtools/main.nf" +include { start_position_individuals } from "./nf_modules/start_positions/main.nf" +include { jwr_checker } from "./nf_modules/nanosplicer/main.nf" +include { junctions_nanosplicer } from "./nf_modules/junction_nanosplicer/main.nf" +include { rna_count } from "./nf_modules/rna_count/main.nf" + +///////////////////////////////////////////////////////// +// script R avec classification des reads par type d'ARN et graphiques associés + // creation des fonctions NanoSplicer: // include { jwr_check } from "./nf_modules/nanosplicer/main.nf" @@ -200,19 +212,33 @@ workflow { } //####################### PREPROCESSING ####################### - /* + + + //Filtration (seqkit_grep looks for the 5'RACE and the gsp patterns in the reads to keep only mature ARNs) seqkit_grep(concatenate.out.merged_fastq, params.adapt, params.gsp) + + //Cut of the 5'RACE sequence cut_5pRACE(seqkit_grep.out.filtered_fastq, params.adapt) //########################## MAPPING ########################## - + + hbv_genome(cut_5pRACE.out.fastq_cutadapt, genome) - sort_bam_genome(hbv_genome.out.bam) - index_bam_genome(sort_bam_genome.out.sorted_bam.collect()) + sort_index_bam(hbv_genome.out.bam) + // index_bam(sort_bam_genome.out.sorted_bam.collect()) - //###################### QUANTIFICATION ####################### + //###################### START POSITIONS ####################### - start_position_counts(sort_bam_genome.out.sorted_bam) - */ + start_position_counts(sort_index_bam.out.indexed_bam) + start_position_individuals(start_position_counts.out.count) -} \ No newline at end of file + //#################### VARIANTS D'EPISSAGE #################### + + jwr_checker(sort_index_bam.out.indexed_bam) + junctions_nanosplicer(start_position_individuals.out.classification_of_reads, jwr_checker.out.nanosplicer_jwr) + + //#################### VARIANTS D'EPISSAGE #################### + + rna_count(junctions_nanosplicer.out.identified_SPvariants, start_position_individuals.out.classification_of_reads) + +} diff --git a/src/nf_modules/junction_nanosplicer/main.nf b/src/nf_modules/junction_nanosplicer/main.nf index 4af81e2..9000bc8 100644 --- a/src/nf_modules/junction_nanosplicer/main.nf +++ b/src/nf_modules/junction_nanosplicer/main.nf @@ -1,13 +1,13 @@ version = "1.0" -container_url = "xgrand/r-scripts:${version}" +container_url = "xgrand/r-bolero:${version}" -params.junctions_out = "" +params.nanosplicer_out = "" process junctions_nanosplicer{ container = "${container_url}" label "small_mem_mono_cpus" tag "identification de variants d'épissage" - if (params.junctions_out != "") { - publishDir "results/${params.junctions_out}", mode: 'copy' + if (params.nanosplicer_out != "") { + publishDir "results/${params.nanosplicer_out}", mode: 'copy' } input: @@ -17,10 +17,11 @@ process junctions_nanosplicer{ output: path("Rplots.pdf") path("JWR_check_parsed.csv") + path("*.jpg") path("identified_SPvariants.csv"), emit: identified_SPvariants script: """ - Rscript Junctions_NanoSplicer.R -c txt -j csv + Rscript /Junctions_NanoSplicer.R -c ${txt} -j ${csv} """ } \ No newline at end of file diff --git a/src/nf_modules/ont-guppy/main.fr b/src/nf_modules/ont-guppy/main.nf similarity index 100% rename from src/nf_modules/ont-guppy/main.fr rename to src/nf_modules/ont-guppy/main.nf diff --git a/src/nf_modules/rna_count/main.nf b/src/nf_modules/rna_count/main.nf index 5899f2e..4d9b6f4 100644 --- a/src/nf_modules/rna_count/main.nf +++ b/src/nf_modules/rna_count/main.nf @@ -1,5 +1,5 @@ version = "1.0" -container_url = "xgrand/r-scripts:${version}" +container_url = "xgrand/r-bolero:${version}" params.rna_count_out = "" process rna_count{ @@ -17,9 +17,10 @@ process rna_count{ output: path("*.csv") path("*.pdf") + path("*.jpg") script: """ - Rscript HBV_RNAs_count.R -s spvariants -c classification + Rscript /HBV_RNAs_count.R -s ${spvariants} -c ${classification} """ } diff --git a/src/nf_modules/start_positions/main.nf b/src/nf_modules/start_positions/main.nf index 4a29e9f..4b97e0c 100644 --- a/src/nf_modules/start_positions/main.nf +++ b/src/nf_modules/start_positions/main.nf @@ -1,7 +1,7 @@ version = "1.0" -container_url = "xgrand/r-scripts:${version}" +container_url = "xgrand/r-bolero:${version}" -params.start_position_counts_out = "" +params.start_position_counts_out ="" process start_position_individuals{ container = "${container_url}" label "small_mem_mono_cpus" @@ -9,17 +9,17 @@ process start_position_individuals{ if (params.start_position_counts_out != "") { publishDir "results/${params.start_position_counts_out}", mode: 'copy' } - input: path(start_position_counts) output: path("Rplots.pdf") + path("*.jpg") path("Count_reads_per_promoter.tsv") path("classification_of_reads_per_RNA.txt"), emit: classification_of_reads script: """ - Rscript start_positions.R -i start_position_counts + Rscript /Start_positions.R -i ${start_position_counts} """ } \ No newline at end of file -- GitLab