diff --git a/.gitignore b/.gitignore old mode 100644 new mode 100755 diff --git a/src/.docker_modules/guppy-gpu/6.4.6/Dockerfile b/src/.docker_modules/guppy-gpu/6.4.6/Dockerfile old mode 100644 new mode 100755 diff --git a/src/.docker_modules/minionqc/1.4.2/Dockerfile b/src/.docker_modules/minionqc/1.4.2/Dockerfile old mode 100644 new mode 100755 diff --git a/src/.docker_modules/nanosplicer/1.0/Dockerfile b/src/.docker_modules/nanosplicer/1.0/Dockerfile old mode 100644 new mode 100755 diff --git a/src/.docker_modules/nanosplicer/1.0/requirements.txt b/src/.docker_modules/nanosplicer/1.0/requirements.txt old mode 100644 new mode 100755 diff --git a/src/.docker_modules/nanosplicer/1.1/Dockerfile b/src/.docker_modules/nanosplicer/1.1/Dockerfile old mode 100644 new mode 100755 diff --git a/src/.docker_modules/ont-guppy/6.4.6/Dockerfile b/src/.docker_modules/ont-guppy/6.4.6/Dockerfile old mode 100644 new mode 100755 diff --git a/src/.docker_modules/pychopper/2.5.0/Dockerfile b/src/.docker_modules/pychopper/2.5.0/Dockerfile old mode 100644 new mode 100755 diff --git a/src/.docker_modules/pycoqc/2.5.2/Dockerfile b/src/.docker_modules/pycoqc/2.5.2/Dockerfile old mode 100644 new mode 100755 diff --git a/src/.docker_modules/pycoqc/2.5.2/requirements.txt b/src/.docker_modules/pycoqc/2.5.2/requirements.txt old mode 100644 new mode 100755 diff --git a/src/.docker_modules/r-bolero/1.0/Dockerfile b/src/.docker_modules/r-bolero/1.0/Dockerfile old mode 100644 new mode 100755 diff --git a/src/.docker_modules/r-bolero/1.0/HBV_RNAs_count.R b/src/.docker_modules/r-bolero/1.0/HBV_RNAs_count.R old mode 100644 new mode 100755 diff --git a/src/.docker_modules/r-bolero/1.0/Install_packages.R b/src/.docker_modules/r-bolero/1.0/Install_packages.R old mode 100644 new mode 100755 diff --git a/src/.docker_modules/r-bolero/1.0/Junctions_NanoSplicer.R b/src/.docker_modules/r-bolero/1.0/Junctions_NanoSplicer.R old mode 100644 new mode 100755 diff --git a/src/.docker_modules/r-bolero/1.0/Start_positions.R b/src/.docker_modules/r-bolero/1.0/Start_positions.R old mode 100644 new mode 100755 diff --git a/src/.docker_modules/r-bolero/1.1/Dockerfile b/src/.docker_modules/r-bolero/1.1/Dockerfile old mode 100644 new mode 100755 diff --git a/src/.docker_modules/r-bolero/1.1/HBV_RNAs_count.R b/src/.docker_modules/r-bolero/1.1/HBV_RNAs_count.R old mode 100644 new mode 100755 diff --git a/src/.docker_modules/r-bolero/1.1/Install_packages.R b/src/.docker_modules/r-bolero/1.1/Install_packages.R old mode 100644 new mode 100755 diff --git a/src/.docker_modules/r-bolero/1.1/Junctions_NanoSplicer.R b/src/.docker_modules/r-bolero/1.1/Junctions_NanoSplicer.R old mode 100644 new mode 100755 diff --git a/src/.docker_modules/r-bolero/1.1/Start_positions.R b/src/.docker_modules/r-bolero/1.1/Start_positions.R old mode 100644 new mode 100755 diff --git a/src/.docker_modules/r-bolero/1.1/ggplot_theme_Publication-2.R b/src/.docker_modules/r-bolero/1.1/ggplot_theme_Publication-2.R old mode 100644 new mode 100755 diff --git a/src/.docker_modules/samtools/1.17/Dockerfile b/src/.docker_modules/samtools/1.17/Dockerfile old mode 100644 new mode 100755 diff --git a/src/.docker_modules/seqkit/2.4.0/Dockerfile b/src/.docker_modules/seqkit/2.4.0/Dockerfile old mode 100644 new mode 100755 diff --git a/src/nf_modules/junction_nanosplicer/main.nf b/src/nf_modules/junction_nanosplicer/main.nf old mode 100644 new mode 100755 diff --git a/src/nf_modules/nanosplicer/main.nf b/src/nf_modules/nanosplicer/main.nf old mode 100644 new mode 100755 diff --git a/src/nf_modules/ont-guppy/main.nf b/src/nf_modules/ont-guppy/main.nf old mode 100644 new mode 100755 diff --git a/src/nf_modules/pychopper/main.nf b/src/nf_modules/pychopper/main.nf old mode 100644 new mode 100755 diff --git a/src/nf_modules/pycoqc/main.nf b/src/nf_modules/pycoqc/main.nf old mode 100644 new mode 100755 diff --git a/src/nf_modules/rna_count/main.nf b/src/nf_modules/rna_count/main.nf old mode 100644 new mode 100755 diff --git a/src/nf_modules/start_positions/main.nf b/src/nf_modules/start_positions/main.nf old mode 100644 new mode 100755 diff --git a/src/orelob.nf b/src/orelob.nf new file mode 100644 index 0000000000000000000000000000000000000000..cf3cafc1813c56806454a1f75979b52f2d8ff4f6 --- /dev/null +++ b/src/orelob.nf @@ -0,0 +1,330 @@ +#!/usr/bin/env nextflow + +nextflow.enable.dsl=2 +//syntax extension DSL2 + +/* +======================================================================================================================== + Orelob +======================================================================================================================== + +bolero pipeline : + * Pipeline dedicated to transcription terminaison analysis of Hepatitis B Virus from nanopore seq + * Preprocessing, filtration, alignment, quantification. + + **************************************************************** + Help Message Definition + **************************************************************** +*/ + +def helpMessage() { + log.info""" + Usage: + The typical command for running the pipeline is as follows: + + nextflow ./src/orelob.nf -c ./src/nextflow.config -profile singularity + + Nextflow parameters: + -profile [str] Configuration profile to use. + Available: docker, singularity, podman, psmn, ccin2p3 + + Mandatory arguments: + --input [path] Path to the folder containing fast5 files. + If skip basecalling option enabled, path to fastq files folder. + --adapt [file] Sequence of 3'RACE adapter. + --gsp [file] Sequence of gene-specific primer used in 3'RACE amplification step. + + References: + --genome [file] Path to the fasta file containing the genome. + --gtf [file] Path to the gtf file containing the genome annotation. + + Nanopore basecalling: + --skipBC [boolean] Skip basecalling step. If true, give fastq folder as input. Default: true. + --flowcell [str] Nanopore flowcell. Default = FLO-MIN106. + --kit [str] Nanopore kit. Default = SQK-PBK004. + --gpu_mode [boolean] Guppy basecaller configuration. Default: false. + "gpu" mode is dedicated to NVIDIA Cuda compatible system according to Guppy specifications. + + Nanopore barcoding: + --kit_barcoding Nanopore barcoding kit. + --config_file Nanopore configuration file. + + GPU basecalling parameters: + --min_qscore [float] Minimum quality score threshold, default = 7.0. + --gpu_runners_per_device [int] Number of runner per device, default = 32 (refer to guppy manual). + --num_callers [int] Number of callers, default = 16 (refer to guppy manual). + --chunks_per_runner [int] Number of chunks per runner, default = 512 (refer to guppy manual). + --chunk_size [int] Chunck size, default = 1900 (refer to guppy manual). + + Help: + --help | --h Display this help message. + + """.stripIndent() +} + +// Show help message + +params.help = "" +params.h = "" + +if (params.help || params.h) { + helpMessage() + exit 0 +} + +/* + **************************************************************** + Default Parameters + **************************************************************** +*/ + +/* Params in */ + +params.skipBC = true +params.gpu_mode = false +params.adapt = "" +params.gsp = "" +params.genome = "/home/xavier/Data/Genome/202201_Full-length_HBV_GTFv3/20230516_HBV_FL_preCore_reference.fasta" +params.gtf = "/home/xavier/Data/Genome/202201_Full-length_HBV_GTFv3/20230516_GTF_preCore_FL_HBV_XGR.gtf" + +params.flowcell = "FLO-MIN106" +params.kit = "SQK-PBK004" +params.min_qscore = 7.0 +params.gpu_runners_per_device = 32 +params.num_callers = 16 +params.chunks_per_runner = 512 +params.chunk_size = 1900 +params.config_file = "" +params.kit_barcoding = "" + + +/* Params out */ + +params.basecalling_out = "01_basecalling/" +params.barcoding_out = "02_barcoding/" +params.fastq_out = "03_fastq/" +params.seqkit_grep_out = "03_fastq/" +params.porechop_out = "03_fastq/" +params.cutadapt_out = "04_cutadapt/" +params.minimap2_genome_out = "05_minimap2/" +params.filtered_bam_out = "05_minimap2/" +params.start_position_counts_out = "06_start_positions/" +params.nanosplicer_out = "07_nanosplicer/" +params.rna_count_out = "08_RNA_count/" +params.rna_qc_out = "09_quality_control/" + +/* + **************************************************************** + Logs + **************************************************************** +*/ + +log.info "fast5/q folder : ${params.input}" +log.info "3'RACE adapter sequence : ${params.adapt}" +log.info "Gene specific primer : ${params.gsp}" +if(!params.skipBC) log.info "Guppy basecalling calculation using GPU mode : ${params.gpu_mode}." +log.info "Genome file : ${params.genome}" +log.info "Genome annotation file : ${params.gtf}" + +/* + **************************************************************** + Channel definitions + **************************************************************** +*/ + +Channel + .of( params.input ) + .ifEmpty { error "No fast5/q folder defined." } + .set { input } + +Channel + .fromPath( params.genome ) + .ifEmpty { error "No genome defined, a fasta file containing the full length preC RNA from HBV genome." } + .set { genome } + +Channel + .fromPath( params.gtf ) + .ifEmpty { error "No annotation defined, a gtf file describing transcripts and splice variants." } + .set { gtf } + +Channel + .fromPath(params.input+'*/', type: 'dir') + .map(it -> [it.baseName, it]) + .set{barcodes} + + +/* + **************************************************************** + Imports + **************************************************************** +*/ + +if(!params.skipBC) { + /* Hardware configuration, if Nvidia CUDA compatible graphic card is installed, use guppy-gpu, else guppy-cpu (much slower)*/ + if(params.gpu_mode) { + include { basecall_fast5_gpu } from "./nf_modules/ont-guppy/main.nf" + include { barcoding_gpu } from "./nf_modules/ont-guppy/main.nf" + } + else { + include { basecall_fast5_cpu } from "./nf_modules/ont-guppy/main.nf" + include { barcoding_cpu } from "./nf_modules/ont-guppy/main.nf" + } +} + +include { barcoding_cpu } from "./nf_modules/ont-guppy/main.nf" +include { control_basecalling } from "./nf_modules/pycoqc/main.nf" +include { control_bam } from "./nf_modules/pycoqc/main.nf" +include { concatenate } from "./nf_modules/seqkit/main.nf" +include { cut_5pRACE } from "./nf_modules/cutadapt/main.nf" +include { hbv_genome } from "./nf_modules/minimap2/main.nf" +include { seqkit_grep } from "./nf_modules/seqkit/main.nf" +include { sort_bam } from './nf_modules/samtools/main.nf' addParams(sort_bam_out: params.minimap2_genome_out) +include { index_bam } from './nf_modules/samtools/main.nf' addParams(index_bam_out: params.minimap2_genome_out) +include { sort_index_bam } from './nf_modules/samtools/main.nf' addParams(indexed_bam_out: params.minimap2_genome_out) +include { filter_as } from './nf_modules/samtools/main.nf' +include { start_position_counts } from "./nf_modules/samtools/main.nf" +include { start_position_individuals } from "./nf_modules/start_positions/main.nf" +include { jwr_checker } from "./nf_modules/nanosplicer/main.nf" +include { junctions_nanosplicer } from "./nf_modules/junction_nanosplicer/main.nf" +include { rna_count } from "./nf_modules/rna_count/main.nf" + +include { porechop } from "./nf_modules/porechop/main.nf" +include { trimmming_pychopper } from "./nf_modules/pychopper/main.nf" + +/* + **************************************************************** + Workflow + **************************************************************** +*/ + +workflow { + + if(params.skipBC) { // we take fastq files as input and skip basecalling + concatenate(barcodes) + } + + else { // we take fast5 files as input and proceed to basecalling with guppy + if(params.gpu_mode) { + basecall_fast5_gpu(input) + if(params.kit_barcoding != ""){ + barcoding_gpu(basecall_fast5_gpu.out.pass) + barcoding_gpu.out.barcodes + .flatten() + .map{it -> [it.name, it]} + .set{tuples_barcode} + concatenate(tuples_barcode) + } + else{ + basecall_fast5_gpu.out.pass + .map{it -> ["Sample", it]} + .set{tuple_sample} + concatenate(tuple_sample) + } + + } + else { + basecall_fast5_cpu(input) + if(params.kit_barcoding != ""){ + barcoding_cpu(basecall_fast5_cpu.out.pass) + barcoding_cpu.out.barcodes + .flatten() + .map{it -> [it.name, it]} + .set{tuples_barcode} + concatenate(tuples_barcode) + } + else{ + basecall_fast5_cpu.out.pass + .map{it -> ["Sample", it]} + .set{tuple_sample} + concatenate(tuple_sample) + } + } + } + + + + //####################### PREPROCESSING ####################### + + + //Filtration (seqkit_grep looks for the 5'RACE and the gsp patterns in the reads to keep only mature ARNs) + seqkit_grep(concatenate.out.merged_fastq, params.adapt, params.gsp) + + //Trimming with porechop + porechop(seqkit_grep.out.filtered_fastq) + + //Trimming with pychopper + //trimmming_pychopper(seqkit_grep.out.filtered_fastq) + + //Cut of the 5'RACE sequence + cut_5pRACE(porechop.out.porechoped_fastq, params.adapt) + //cut_5pRACE(trimmming_pychopper.out.pychoped_fastq, params.adapt) + //cut_5pRACE(seqkit_grep.out.filtered_fastq, params.adapt) + + //########################## MAPPING ########################## + + hbv_genome(cut_5pRACE.out.fastq_cutadapt, genome.collect()) + + //Filter + filter_as(hbv_genome.out.bam) + + //Index + sort_index_bam(filter_as.out.filtered_bam) + + //Quality control + if(params.skipBC == false) { + if(params.gpu_mode) { + control_bam(basecall_fast5_gpu.out.sequencing_summary.collect(), sort_index_bam.out.indexed_bam) + } + else { + control_bam(basecall_fast5_cpu.out.sequencing_summary.collect(), sort_index_bam.out.indexed_bam) + } + } + //###################### START POSITIONS ####################### + + //Identification of start positions + start_position_counts(sort_index_bam.out.indexed_bam) + + //Identification of RNA + start_position_individuals(start_position_counts.out.count) + + //#################### VARIANTS D'EPISSAGE #################### + + //Identification of splicing junction sites + jwr_checker(sort_index_bam.out.indexed_bam) + + start_position_individuals.out.classification_of_reads + .combine(jwr_checker.out.nanosplicer_jwr, by: 0) + .set{files_for_nanosplicer} + + //Identification of variants + junctions_nanosplicer(files_for_nanosplicer) + + //#################### VARIANTS D'EPISSAGE #################### + + junctions_nanosplicer.out.identified_SPvariants + .combine(start_position_individuals.out.classification_of_reads, by: 0) + .set{files_for_rna_count} + + //Variants count + rna_count(files_for_rna_count) + +} + +// End message: + +workflow.onComplete { + + println ( workflow.success ? """ + DUPFinder tools execution summary + --------------------------- + Completed at : ${workflow.complete} + Duration : ${workflow.duration} + Success : ${workflow.success} + workDir : ${workflow.workDir} + exit status : ${workflow.exitStatus} + """ : """ + Failed: ${workflow.errorReport} + exit status : ${workflow.exitStatus} + """ + ) +}