From e288490077795590445c2200d4ec50ba0ded65fe Mon Sep 17 00:00:00 2001
From: Mia Croiset <mia.croiset@ens-lyon.fr>
Date: Fri, 3 Mar 2023 17:51:36 +0100
Subject: [PATCH] create build_matrix_cool module + finish args def

---
 bin/hicstuff_bam2pairs.py                   |  4 +++-
 bin/hicstuff_build_matrix.py                |  1 +
 conf/hicstuff.config                        | 12 +++++++++-
 modules/local/hicstuff/bam2pairs.nf         |  2 +-
 modules/local/hicstuff/build_matrix.nf      | 11 +++++----
 modules/local/hicstuff/build_matrix_cool.nf | 25 +++++++++++++++++++++
 6 files changed, 48 insertions(+), 7 deletions(-)
 create mode 100644 modules/local/hicstuff/build_matrix_cool.nf

diff --git a/bin/hicstuff_bam2pairs.py b/bin/hicstuff_bam2pairs.py
index ea456aa..6e3cfeb 100755
--- a/bin/hicstuff_bam2pairs.py
+++ b/bin/hicstuff_bam2pairs.py
@@ -146,6 +146,7 @@ if __name__ == "__main__":
     parser.add_argument("-q", "--min_qual")
     parser.add_argument("-e", "--enzyme")
     parser.add_argument("-f", "--fasta")
+    parser.add_argument("-c", "--circular")
     args = parser.parse_args()
 
     bam1 = args.bam1
@@ -156,6 +157,7 @@ if __name__ == "__main__":
     min_qual = int(args.min_qual)
     enzyme = args.enzyme
     fasta = args.fasta
+    circular = args.circular
 
     #hicstuff case sensitive enzymes adaptation
     if enzyme == "hindiii":
@@ -175,7 +177,7 @@ if __name__ == "__main__":
     for record in SeqIO.parse(hio.read_compressed(fasta), "fasta"):
         # Get chromosome restriction table
         restrict_table[record.id] = hcd.get_restriction_table(
-            record.seq, enzyme, circular=False
+            record.seq, enzyme, circular=circular
         )
 
     hcd.attribute_fragments(out_pairs, out_idx, restrict_table)
diff --git a/bin/hicstuff_build_matrix.py b/bin/hicstuff_build_matrix.py
index 451849e..bd68ee0 100755
--- a/bin/hicstuff_build_matrix.py
+++ b/bin/hicstuff_build_matrix.py
@@ -5,6 +5,7 @@ import argparse
 import pysam as ps
 import pandas as pd
 import shutil as st
+import subprocess as sp
 import itertools
 from hicstuff_log import logger
 import hicstuff_io as hio
diff --git a/conf/hicstuff.config b/conf/hicstuff.config
index 6f53691..e670077 100644
--- a/conf/hicstuff.config
+++ b/conf/hicstuff.config
@@ -129,6 +129,7 @@ params {
     hicstuff_valid_pairs = 'valid.pairs'
     hicstuff_valid_idx = 'valid_idx.pairs'
     hicstuff_min_qual = 30
+    hicstuff_matrix = 'abs_fragments_contacts_weighted.txt'
 }
 
 process {
@@ -167,7 +168,8 @@ process {
         ext.args = { [
             " -o ${params.hicstuff_valid_pairs}",
             " -x ${params.hicstuff_valid_idx}",
-            " -q ${params.hicstuff_min_qual}"
+            " -q ${params.hicstuff_min_qual}",
+            " -c ${params.hicstuff_circular}"
         ].join('').trim() }
         publishDir = [
             path: { "${params.outdir}/hicstuff/pairs" },
@@ -175,6 +177,14 @@ process {
         ]
     }
     withName: 'BUILD_MATRIX' {
+        ext.args = params.hicstuff_matrix
+        publishDir = [
+            path: { "${params.outdir}/hicstuff/matrix" },
+            mode: 'copy'
+        ]
+    }
+    withName: 'BUILD_MATRIX_COOL' {
+        ext.args = params.hicstuff_matrix
         publishDir = [
             path: { "${params.outdir}/hicstuff/matrix" },
             mode: 'copy'
diff --git a/modules/local/hicstuff/bam2pairs.nf b/modules/local/hicstuff/bam2pairs.nf
index 68f7915..c8a5a14 100644
--- a/modules/local/hicstuff/bam2pairs.nf
+++ b/modules/local/hicstuff/bam2pairs.nf
@@ -1,5 +1,5 @@
 process BAM2PAIRS {
-    tag "$info_contigs"
+    tag "$meta1"
     label 'process_high'
 
     conda "conda-forge::python=3.9 conda-forge::biopython=1.80 conda-forge::numpy=1.22.3 conda-forge::matplotlib=3.6.3 conda-forge::pandas=1.5.3"
diff --git a/modules/local/hicstuff/build_matrix.nf b/modules/local/hicstuff/build_matrix.nf
index c81df96..d8ff6a6 100644
--- a/modules/local/hicstuff/build_matrix.nf
+++ b/modules/local/hicstuff/build_matrix.nf
@@ -1,5 +1,5 @@
 process BUILD_MATRIX {
-    tag "$idx_pairs"
+    tag "$meta1.id"
     label 'process_single'
 
     conda "conda-forge::python=3.9 conda-forge::biopython=1.80 conda-forge::numpy=1.22.3 conda-forge::matplotlib=3.6.3 conda-forge::pandas=1.5.3"
@@ -10,12 +10,15 @@ process BUILD_MATRIX {
     tuple val(meta), path(fragments_list)
 
     output:
-    tuple val(meta), path("${meta1.id}_fragments_contacts_weighted.txt"), emit: matrix
+    tuple val(meta), path("${meta1.id}_*"), emit: matrix
 
     script:
+
+    def args = task.ext.args ?: ''
+
     """
-    hicstuff_build_matrix.py -p ${idx_pairs} -f ${fragments_list} -t graal -o abs_fragments_contacts_weighted.txt
+    hicstuff_build_matrix.py -p ${idx_pairs} -f ${fragments_list} -t graal -o ${args}
 
-    mv abs_fragments_contacts_weighted.txt ${meta1.id}_fragments_contacts_weighted.txt
+    mv ${args} ${meta1.id}_${args}
     """
 }
diff --git a/modules/local/hicstuff/build_matrix_cool.nf b/modules/local/hicstuff/build_matrix_cool.nf
new file mode 100644
index 0000000..3611061
--- /dev/null
+++ b/modules/local/hicstuff/build_matrix_cool.nf
@@ -0,0 +1,25 @@
+process BUILD_MATRIX_COOL {
+    tag "$meta1.id"
+    label 'process_single'
+
+    conda "conda-forge::python=3.9 conda-forge::biopython=1.80 conda-forge::numpy=1.22.3 conda-forge::matplotlib=3.6.3 conda-forge::pandas=1.5.3"
+    container = "lbmc/hicstuff:3.1.3"
+
+    input:
+    tuple val(meta1), path(idx_pairs)
+    tuple val(meta), path(fragments_list)
+
+    output:
+    tuple val(meta), path("${meta1.id}_*.cool"), emit: matrix
+
+    script:
+
+    def args = task.ext.args ?: ''
+    def base = args.replaceFirst(/.txt/,"")
+
+    """
+    hicstuff_build_matrix.py -p ${idx_pairs} -f ${fragments_list} -t cool -o ${args}
+
+    mv ${base}.cool ${meta1.id}_${base}.cool
+    """
+}
-- 
GitLab