From ce06ca062bd2b101eb604f53a63767676ece7c82 Mon Sep 17 00:00:00 2001
From: Mia Croiset <mia.croiset@ens-lyon.fr>
Date: Wed, 20 Sep 2023 10:36:47 +0200
Subject: [PATCH] correct picard pcr filter for hicpro

---
 modules/local/hicpro/hicpro2pairs.nf |  6 ++++--
 subworkflows/local/filter_pcr_dup.nf |  9 +++------
 subworkflows/local/hicpro.nf         | 15 --------------
 subworkflows/local/hicpro_mapping.nf | 30 +++++++++++++++++++++++++++-
 4 files changed, 36 insertions(+), 24 deletions(-)

diff --git a/modules/local/hicpro/hicpro2pairs.nf b/modules/local/hicpro/hicpro2pairs.nf
index eb9b86b..bdb111b 100644
--- a/modules/local/hicpro/hicpro2pairs.nf
+++ b/modules/local/hicpro/hicpro2pairs.nf
@@ -19,8 +19,10 @@ process HICPRO2PAIRS {
     prefix = "${meta.id}"
     """
     ##columns: readID chr1 pos1 chr2 pos2 strand1 strand2
-    awk '{OFS="\t";print \$1,\$2,\$3,\$5,\$6,\$4,\$7}' $vpairs | bgzip -c > ${prefix}_contacts.pairs.gz
-    ##sort -k2,2 -k4,4 -k3,3n -k5,5n ${prefix}_contacts.pairs | bgzip -c > ${prefix}_contacts.pairs.gz
+        ## bgzip -c > ${prefix}_contacts.pairs.gz
+    awk '{OFS="\t";print \$1,\$2,\$3,\$5,\$6,\$4,\$7}' $vpairs |\\
+
+    sort -k2,2 -k4,4 -k3,3n -k5,5n | bgzip -c > ${prefix}_contacts.pairs.gz
     pairix -f ${prefix}_contacts.pairs.gz
 
     cat <<-END_VERSIONS > versions.yml
diff --git a/subworkflows/local/filter_pcr_dup.nf b/subworkflows/local/filter_pcr_dup.nf
index 344c1c9..cd5eece 100644
--- a/subworkflows/local/filter_pcr_dup.nf
+++ b/subworkflows/local/filter_pcr_dup.nf
@@ -32,18 +32,15 @@ workflow FILTER_PCR_DUP {
     )
     SAMTOOLS_SORT_N.out.bam.set{ ch_bam }
 
-    if (params.workflow == "hicstuff"){
-        FILTER_PAIR(
+
+    FILTER_PAIR(
         ch_bam.combine(ch_bam)
         .map {
             meta1, bam1, meta2, bam2 ->
                 meta1.id == meta2.id && meta1.chunk == meta2.chunk && meta1.mates == "R1" && meta2.mates == "R2" ? [ meta1,  bam1,  meta2, bam2 ] : null
     })
     FILTER_PAIR.out.bam.set{ new_ch_bam }
-    }
-    else{
-        ch_bam.set{ new_ch_bam }
-    }
+
 
 
     emit:
diff --git a/subworkflows/local/hicpro.nf b/subworkflows/local/hicpro.nf
index cf1915c..2553e22 100644
--- a/subworkflows/local/hicpro.nf
+++ b/subworkflows/local/hicpro.nf
@@ -46,22 +46,7 @@ workflow HICPRO {
 
     HICPRO_MAPPING.out.bam.set{ ch_bam }
 
-    //***************************************
-    // FILTER PCR DUPLICATES
 
-    if (params.filter_pcr_picard && !params.keep_dups){
-        error "Error: cannot filter PCR duplicates with both methods! If filter_pcr_picard is true, keep_dups should be true too"
-    }
-    else if (params.filter_pcr_picard){
-        FILTER_PCR_DUP(
-            HICPRO_MAPPING.out.bam,
-            fasta,
-            index
-        )
-        FILTER_PCR_DUP.out.bam
-            .set {ch_bam}
-    }
-    //TODO add versions
 
     //***************************************
     // DIGESTION PROTOCOLS
diff --git a/subworkflows/local/hicpro_mapping.nf b/subworkflows/local/hicpro_mapping.nf
index 61a95a8..e544ebd 100644
--- a/subworkflows/local/hicpro_mapping.nf
+++ b/subworkflows/local/hicpro_mapping.nf
@@ -9,6 +9,7 @@ include { BOWTIE2_ALIGN as BOWTIE2_ALIGN_TRIMMED } from '../../modules/nf-core/b
 include { MERGE_BOWTIE2 } from '../../modules/local/hicpro/bowtie2_merge'
 include { COMBINE_MATES} from '../../modules/local/hicpro/combine_mates'
 include { MAPPING_STATS_DNASE } from '../../modules/local/hicpro/dnase_mapping_stats'
+include { FILTER_PCR_DUP } from './filter_pcr_dup'
 
 // Paired-end to Single-end
 def pairToSingle(row, mates) {
@@ -88,15 +89,42 @@ workflow HICPRO_MAPPING {
     MERGE_BOWTIE2(
       ch_bowtie2_align
     )
+    MERGE_BOWTIE2.out.bam
+        .set{ ch_bowtie2_merged }
+
+    ch_bowtie2_merged.view()
+
     ch_versions = ch_versions.mix(MERGE_BOWTIE2.out.versions)
     ch_mapping_stats = MERGE_BOWTIE2.out.stats
 
+    //***************************************
+    // FILTER PCR DUPLICATES
+
+    if (params.filter_pcr_picard && !params.keep_dups){
+        error "Error: cannot filter PCR duplicates with both methods! If filter_pcr_picard is true, keep_dups should be true too"
+    }
+    else if (params.filter_pcr_picard){
+        FILTER_PCR_DUP(
+            ch_bowtie2_merged,
+            fasta,
+            index
+        )
+        FILTER_PCR_DUP.out.bam
+            .flatten()
+            .buffer( size: 2 )
+            .set {ch_bowtie2_merged}
+    }
+    //TODO add versions
+    ch_bowtie2_merged.view()
+
     // Combine mates
-    MERGE_BOWTIE2.out.bam
+    ch_bowtie2_merged
       .map { singleToPair(it) }
       .groupTuple()
       .set {ch_bams}
 
+      ch_bams.view()
+
   }else{
 
     MAPPING_STATS_DNASE(
-- 
GitLab