From 5bbf9ae40b10a84bbfd633fa8ee3e10ce6869827 Mon Sep 17 00:00:00 2001
From: Laurent Modolo <laurent.modolo@ens-lyon.fr>
Date: Mon, 15 Jul 2024 15:24:39 +0200
Subject: [PATCH] add option to use paraplit instead of cutsite

---
 conf/modules.config        | 15 +++++++++++++++
 modules/local/parasplit.nf | 28 ++++++++++++++++++++++++++++
 nextflow.config            |  4 ++++
 nextflow_schema.json       | 13 +++++++++++++
 workflows/hic.nf           | 19 +++++++++++++++++++
 5 files changed, 79 insertions(+)
 create mode 100644 modules/local/parasplit.nf

diff --git a/conf/modules.config b/conf/modules.config
index 08075e7..1d85bb1 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -470,6 +470,21 @@ process {
         ]
     }
 
+    withName: 'PARASPLIT' {
+        ext.output_for = { "${meta1.id}_${meta1.chunk}_${meta1.mates}_digested.fastq" }
+        ext.output_rev = { "${meta2.id}_${meta2.chunk}_${meta2.mates}_digested.fastq" }
+        ext.args = { [
+            " --mode ${params.parasplit_mode}",
+            " --seed_size ${params.parasplit_seed}"
+            ].join('').trim()
+        }
+        publishDir = [
+            path: { "${params.outdir}/cutsite/digested"},
+            mode: 'copy',
+            enabled: params.save_digested
+        ]
+    }
+
     withName: 'ITERALIGN' {
         ext.prefix = { "${meta.id}_${meta.chunk}_${meta.mates}" }
         ext.args = { [
diff --git a/modules/local/parasplit.nf b/modules/local/parasplit.nf
new file mode 100644
index 0000000..4bfb734
--- /dev/null
+++ b/modules/local/parasplit.nf
@@ -0,0 +1,28 @@
+process PARASPLIT {
+    tag "$meta1.id"
+    label 'process_high'
+
+    container = "docker.io/lbmc/parasplit:0.3.1"
+
+    input:
+    tuple val(meta1), path(reads1), val(meta2), path(reads2)
+    val(digestion)
+
+    output:
+    tuple val(meta1), path ("*.fastq"), emit: fastq
+    path "versions.yml", emit: versions
+
+    script:
+    def args = task.ext.args ?: ''
+    def output_for = task.ext.output_for ?: "${meta1.id}_${meta1.chunk}_${meta1.mates}.fastq"
+    def output_rev = task.ext.output_rev ?: "${meta2.id}_${meta2.chunk}_${meta2.mates}.fastq"
+
+    """
+    parasplit -sf ${reads1} -sr ${reads2} -le ${digestion} -of ${output_for} -or ${output_rev} -nt ${task.cpus}  -sz 20 -m all ${args}
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        parasplit: v0.3.1)
+    END_VERSIONS
+    """
+}
+
diff --git a/nextflow.config b/nextflow.config
index 26dee50..eebf741 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -176,6 +176,10 @@ params {
     save_digested = false
     cutsite = false
 
+    //Parasplit
+    parasplit_mode = 'all'
+    parasplit_seed = 20
+
     //Iterative alignement
     iteralign = false
 }
diff --git a/nextflow_schema.json b/nextflow_schema.json
index d26c093..bcc53ce 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -720,6 +720,19 @@
                     "description": "Mode to use to make the digestion.",
                     "enum": ["all", "for_vs_rev", "pile"]
                 },
+                "parasplit": {
+                    "type": "boolean",
+                    "description": "If true, do parasplit process to pre digest reads"
+                },
+                "parasplit_seed": {
+                    "type": "integer",
+                    "description": "Minimum size of a fragment (i.e. seed size used in mapping as reads smaller won't be mapped.)"
+                },
+                "parasplit_mode": {
+                    "type": "string",
+                    "description": "Mode to use to make the digestion.",
+                    "enum": ["all", "fr"]
+                },
                 "iteralign": {
                     "type": "boolean",
                     "description": "If true, do iterative alignment"
diff --git a/workflows/hic.nf b/workflows/hic.nf
index 6c3487e..c2adb4b 100644
--- a/workflows/hic.nf
+++ b/workflows/hic.nf
@@ -127,6 +127,7 @@ include { TADS } from '../subworkflows/local/tads'
 include { FASTQC                      } from '../modules/nf-core/fastqc/main'
 include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions/main'
 include { CUTSITE }                     from '../modules/local/hicstuff/cutsite'
+include { PARASPLIT }                   from '../modules/local/parasplit'
 
 /*
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -208,6 +209,24 @@ workflow HIC {
         ch_reads = CUTSITE.out.fastq
         ch_versions = ch_versions.mix(CUTSITE.out.versions)
     }
+    if (params.parasplit){
+        // Align each mates separetly and add mates information in [meta]
+        ch_reads_r1 = ch_reads.map{ it -> pairToSingle(it,"R1") }
+        ch_reads_r2 = ch_reads.map{ it -> pairToSingle(it,"R2") }
+        ch_reads = ch_reads_r1.concat(ch_reads_r2)
+
+        ch_reads.combine(ch_reads)
+        .map {
+            meta1, reads1, meta2, reads2 ->
+                meta1.id == meta2.id && meta1.chunk == meta2.chunk && meta1.mates == "R1" && meta2.mates == "R2" ? [ meta1,  reads1,  meta2, reads2 ] : null
+        }.set{ new_ch_reads }
+        PARASPLIT(
+            new_ch_reads,
+            params.digestion
+        )
+        ch_reads = PARASPLIT.out.fastq
+        ch_versions = ch_versions.mix(PARASPLIT.out.versions)
+    }
 
   //
   // SUB-WORFLOW: HiC-Pro
-- 
GitLab