From 5bbf9ae40b10a84bbfd633fa8ee3e10ce6869827 Mon Sep 17 00:00:00 2001 From: Laurent Modolo <laurent.modolo@ens-lyon.fr> Date: Mon, 15 Jul 2024 15:24:39 +0200 Subject: [PATCH] add option to use paraplit instead of cutsite --- conf/modules.config | 15 +++++++++++++++ modules/local/parasplit.nf | 28 ++++++++++++++++++++++++++++ nextflow.config | 4 ++++ nextflow_schema.json | 13 +++++++++++++ workflows/hic.nf | 19 +++++++++++++++++++ 5 files changed, 79 insertions(+) create mode 100644 modules/local/parasplit.nf diff --git a/conf/modules.config b/conf/modules.config index 08075e7..1d85bb1 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -470,6 +470,21 @@ process { ] } + withName: 'PARASPLIT' { + ext.output_for = { "${meta1.id}_${meta1.chunk}_${meta1.mates}_digested.fastq" } + ext.output_rev = { "${meta2.id}_${meta2.chunk}_${meta2.mates}_digested.fastq" } + ext.args = { [ + " --mode ${params.parasplit_mode}", + " --seed_size ${params.parasplit_seed}" + ].join('').trim() + } + publishDir = [ + path: { "${params.outdir}/cutsite/digested"}, + mode: 'copy', + enabled: params.save_digested + ] + } + withName: 'ITERALIGN' { ext.prefix = { "${meta.id}_${meta.chunk}_${meta.mates}" } ext.args = { [ diff --git a/modules/local/parasplit.nf b/modules/local/parasplit.nf new file mode 100644 index 0000000..4bfb734 --- /dev/null +++ b/modules/local/parasplit.nf @@ -0,0 +1,28 @@ +process PARASPLIT { + tag "$meta1.id" + label 'process_high' + + container = "docker.io/lbmc/parasplit:0.3.1" + + input: + tuple val(meta1), path(reads1), val(meta2), path(reads2) + val(digestion) + + output: + tuple val(meta1), path ("*.fastq"), emit: fastq + path "versions.yml", emit: versions + + script: + def args = task.ext.args ?: '' + def output_for = task.ext.output_for ?: "${meta1.id}_${meta1.chunk}_${meta1.mates}.fastq" + def output_rev = task.ext.output_rev ?: "${meta2.id}_${meta2.chunk}_${meta2.mates}.fastq" + + """ + parasplit -sf ${reads1} -sr ${reads2} -le ${digestion} -of ${output_for} -or ${output_rev} -nt ${task.cpus} -sz 20 -m all ${args} + cat <<-END_VERSIONS > versions.yml + "${task.process}": + parasplit: v0.3.1) + END_VERSIONS + """ +} + diff --git a/nextflow.config b/nextflow.config index 26dee50..eebf741 100644 --- a/nextflow.config +++ b/nextflow.config @@ -176,6 +176,10 @@ params { save_digested = false cutsite = false + //Parasplit + parasplit_mode = 'all' + parasplit_seed = 20 + //Iterative alignement iteralign = false } diff --git a/nextflow_schema.json b/nextflow_schema.json index d26c093..bcc53ce 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -720,6 +720,19 @@ "description": "Mode to use to make the digestion.", "enum": ["all", "for_vs_rev", "pile"] }, + "parasplit": { + "type": "boolean", + "description": "If true, do parasplit process to pre digest reads" + }, + "parasplit_seed": { + "type": "integer", + "description": "Minimum size of a fragment (i.e. seed size used in mapping as reads smaller won't be mapped.)" + }, + "parasplit_mode": { + "type": "string", + "description": "Mode to use to make the digestion.", + "enum": ["all", "fr"] + }, "iteralign": { "type": "boolean", "description": "If true, do iterative alignment" diff --git a/workflows/hic.nf b/workflows/hic.nf index 6c3487e..c2adb4b 100644 --- a/workflows/hic.nf +++ b/workflows/hic.nf @@ -127,6 +127,7 @@ include { TADS } from '../subworkflows/local/tads' include { FASTQC } from '../modules/nf-core/fastqc/main' include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions/main' include { CUTSITE } from '../modules/local/hicstuff/cutsite' +include { PARASPLIT } from '../modules/local/parasplit' /* ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -208,6 +209,24 @@ workflow HIC { ch_reads = CUTSITE.out.fastq ch_versions = ch_versions.mix(CUTSITE.out.versions) } + if (params.parasplit){ + // Align each mates separetly and add mates information in [meta] + ch_reads_r1 = ch_reads.map{ it -> pairToSingle(it,"R1") } + ch_reads_r2 = ch_reads.map{ it -> pairToSingle(it,"R2") } + ch_reads = ch_reads_r1.concat(ch_reads_r2) + + ch_reads.combine(ch_reads) + .map { + meta1, reads1, meta2, reads2 -> + meta1.id == meta2.id && meta1.chunk == meta2.chunk && meta1.mates == "R1" && meta2.mates == "R2" ? [ meta1, reads1, meta2, reads2 ] : null + }.set{ new_ch_reads } + PARASPLIT( + new_ch_reads, + params.digestion + ) + ch_reads = PARASPLIT.out.fastq + ch_versions = ch_versions.mix(PARASPLIT.out.versions) + } // // SUB-WORFLOW: HiC-Pro -- GitLab