diff --git a/src/bolero.nf b/src/bolero.nf index cf3605d0115d8daad8fe5370839c745723a23d33..f6462faaf17ed7629b86be920a990d396a2cfe6e 100755 --- a/src/bolero.nf +++ b/src/bolero.nf @@ -160,9 +160,11 @@ Channel // Test pour barcoding process Channel - .fromPath(params.pass) + .fromPath('/home/alia/pipelines/bolero/results/01_Basecalling/pass/', type: 'dir') .set{pass} - +Channel + .fromPath('/home/alia/pipelines/bolero/results/01_Basecalling/sequencing_summary.txt') + .set{ss} /* **************************************************************** Imports @@ -182,6 +184,7 @@ if(!params.skipBC) { } include { control_basecalling } from "./nf_modules/pycoqc/main.nf" +include { control_bam } from "./nf_modules/pycoqc/main.nf" include { concatenate } from "./nf_modules/seqkit/main.nf" include { cut_5pRACE } from "./nf_modules/cutadapt/main.nf" include { hbv_genome } from "./nf_modules/minimap2/main.nf" @@ -217,23 +220,33 @@ workflow { //il reste à adapter ça else { // we take fast5 files as input and proceed to basecalling with guppy if(params.gpu_mode) { - //basecall_fast5_gpu(input) - barcoding_gpu(pass) - barcoding_gpu.out.barcodes - .flatten() - .map{it -> [it.name, it]} - .set{tuples_barcode} - concatenate(tuples_barcode) + basecall_fast5_gpu(input) + if(params.kit_barcoding != ""){ + barcoding_gpu(pass) + barcoding_gpu.out.barcodes + .flatten() + .map{it -> [it.name, it]} + .set{tuples_barcode} + concatenate(tuples_barcode) + } + else{ + concatenate(pass) + } //control_basecalling(basecall_fast5_gpu.out.sequencing_summary) } else { - //basecall_fast5_cpu(input) - barcoding_cpu(pass) - barcoding_cpu.out.barcodes - .flatten() - .map{it -> [it.name, it]} - .set{tuples_barcode} - concatenate(tuples_barcode) + basecall_fast5_cpu(input) + if(params.kit_barcoding != ""){ + barcoding_cpu(pass) + barcoding_cpu.out.barcodes + .flatten() + .map{it -> [it.name, it]} + .set{tuples_barcode} + concatenate(tuples_barcode) + } + else{ + concatenate(pass) + } //control_basecalling(basecall_fast5_cpu.out.sequencing_summary) } } @@ -252,9 +265,16 @@ workflow { //########################## MAPPING ########################## hbv_genome(cut_5pRACE.out.fastq_cutadapt, genome.collect()) - sort_index_bam(hbv_genome.out.bam) + sort_index_bam.out.indexed_bam + .flatten() + .filter(~/.*bam$/) + .collect() + .set{bam_path} + + //control_bam(ss, bam_path) + //###################### START POSITIONS ####################### start_position_counts(sort_index_bam.out.indexed_bam) diff --git a/src/nf_modules/junction_nanosplicer/main.nf b/src/nf_modules/junction_nanosplicer/main.nf index a48c713e70903f8f604bfa59e5b3ffabbca6737f..6d24d6104c89e392f5d52b2774e891529dbdc35d 100644 --- a/src/nf_modules/junction_nanosplicer/main.nf +++ b/src/nf_modules/junction_nanosplicer/main.nf @@ -11,8 +11,7 @@ process junctions_nanosplicer{ } input: - tuple val(barcode), path(txt) - tuple val(barcode), path(csv) + tuple val(barcode), path(txt), path(csv) output: path("${barcode}/${barcode}_JWR_check_parsed.csv") diff --git a/src/nf_modules/ont-guppy/main.nf b/src/nf_modules/ont-guppy/main.nf index e1282c5b8b4b21d7ae5a4c1c6795dc55a6f441d1..5704bb7596e7c1820247724341fb15316704b42f 100644 --- a/src/nf_modules/ont-guppy/main.nf +++ b/src/nf_modules/ont-guppy/main.nf @@ -27,7 +27,7 @@ process basecall_fast5_gpu { errorKit "WARNING ! No kit type given..." errorKit.view() } -/* + if (params.config_file != "") { options = "-c /opt/ont/guppy/data/${params.config_file}" } @@ -35,7 +35,7 @@ process basecall_fast5_gpu { options = "--flowcell ${params.flowcell} \ --kit ${params.kit} " } -*/ + input: path(fast5_folder) @@ -60,8 +60,7 @@ guppy_basecaller --compress_fastq \ --gpu_runners_per_device ${params.gpu_runners_per_device} \ --num_callers ${params.num_callers} \ --chunks_per_runner ${params.chunks_per_runner} \ - --flowcell ${params.flowcell} \ - --kit ${params.kit} + ${options} """ } @@ -84,6 +83,14 @@ process basecall_fast5_cpu { errorKit.view() } + if (params.config_file != "") { + options = "-c /opt/ont/guppy/data/${params.config_file}" + } + else { + options = "--flowcell ${params.flowcell} \ +--kit ${params.kit} " + } + input: val(fast5_folder) @@ -95,16 +102,20 @@ process basecall_fast5_cpu { script: """ -echo "Start basecalling using CPUs." -find ${fast5_folder} -type f -name "*.fast5" > allfast5files.txt +echo "Start basecalling using G=CPUs." +# guppy_basecaller --print_workflows +path=\$(readlink -f ${fast5_folder}) +find \${path} -type f -name "*.fast5" > allfast5files.txt guppy_basecaller --compress_fastq \ -i / \ --input_file_list allfast5files.txt \ -s . \ - --cpu_threads_per_caller ${params.cpu_threads_per_caller} \ + -x "cuda:all" \ + --min_qscore ${params.min_qscore} \ + --cpu_runners_per_device ${params.cpu_runners_per_device} \ --num_callers ${params.num_callers} \ - --flowcell ${params.flowcell} \ - --kit ${params.kit} + --chunks_per_runner ${params.chunks_per_runner} \ + ${options} """ } @@ -112,7 +123,7 @@ params.kit_barcoding = "EXP-PBC001" process barcoding_gpu { container = "${container_url}" label "gpus" - tag "$fast5_folder" + tag "$pass_path" if (params.barcoding_out != "") { publishDir "results/${params.barcoding_out}", mode: 'copy' } @@ -140,7 +151,7 @@ guppy_barcoder \ process barcoding_cpu { container = "${container_url}" label "big_mem_multi_cpus" - tag "$fast5_folder" + tag "$pass_path" if (params.barcoding_out != "") { publishDir "results/${params.barcoding_out}", mode: 'copy' } diff --git a/src/nf_modules/pycoqc/main.nf b/src/nf_modules/pycoqc/main.nf index 1503479e5b4cd10a973d2c512a5f7831cddfe871..de9db1f18c24137e9ed39103b2f11a27405c7757 100644 --- a/src/nf_modules/pycoqc/main.nf +++ b/src/nf_modules/pycoqc/main.nf @@ -36,7 +36,8 @@ process control_bam { path("*.txt") """ - find results/${params.minimap2_genome_out} -type f -name "*sorted.bam" > allbamfiles.txt - #pycoQC -f ${txt} -a ${path_bam} -o Control_mapping.html + mkdir bam/ + mv ${path_bam} bam/ + pycoQC -f ${txt} -a bam/ -o Control_mapping.html """ } \ No newline at end of file diff --git a/src/nf_modules/rna_count/main.nf b/src/nf_modules/rna_count/main.nf index 448f5986a596a4a5f0d8a6a0d80ff538daf208d0..bc858b2fcc26bb603757f9eaa8c85ada9e65f79d 100644 --- a/src/nf_modules/rna_count/main.nf +++ b/src/nf_modules/rna_count/main.nf @@ -11,8 +11,7 @@ process rna_count{ } input: - tuple val(barcode), path(spvariants) - tuple val(barcode), path(classification) + tuple val(barcode), path(spvariants), path(classification) output: path("${barcode}/*.csv") diff --git a/src/nf_modules/seqkit/main.nf b/src/nf_modules/seqkit/main.nf index 683c924fe37a357baa16860c4520bcb800e16144..87792e5cb181e76c25495747cad2517fa2aab8d8 100755 --- a/src/nf_modules/seqkit/main.nf +++ b/src/nf_modules/seqkit/main.nf @@ -87,4 +87,24 @@ process concatenate { seqkit scat -j ${task.cpus} -f \${path} --gz-only > ${barcode}_merged.fastq gzip ${barcode}_merged.fastq """ +} + +process concatenate_BC { + container = "${container_url}" + label "big_mem_multi_cpus" + tag "${barcode}" + if (params.fastq_out != "") { + publishDir "results/${params.fastq_out}", mode: 'copy' + } + + input: + path(path) + + output: + path("test.txt") + + script: + """ + echo ${path} \$(readlink -f ${path}) > test.txt + """ } \ No newline at end of file