From f1fe040e026ec002573a16339b80848d38aeb173 Mon Sep 17 00:00:00 2001
From: Mia Croiset <mia.croiset@ens-lyon.fr>
Date: Fri, 10 Feb 2023 09:54:32 +0100
Subject: [PATCH] TEST fragment enzyme modules

---
 bin/__init__.py                           |    1 +
 bin/hicstuff_digest.py                    |  489 ++++++++
 bin/hicstuff_fragments.py                 |   40 +
 bin/hicstuff_io.py                        | 1379 +++++++++++++++++++++
 bin/hicstuff_log.py                       |   89 ++
 conf/hicstuff.config                      |   31 +
 modules/local/hicstuff/fragment_enzyme.nf |   21 +
 subworkflows/local/hicstuff_sub.nf        |   12 +
 workflows/hicstuff.nf                     |    8 +-
 9 files changed, 2067 insertions(+), 3 deletions(-)
 create mode 100644 bin/__init__.py
 create mode 100644 bin/hicstuff_digest.py
 create mode 100755 bin/hicstuff_fragments.py
 create mode 100644 bin/hicstuff_io.py
 create mode 100644 bin/hicstuff_log.py
 create mode 100644 modules/local/hicstuff/fragment_enzyme.nf

diff --git a/bin/__init__.py b/bin/__init__.py
new file mode 100644
index 0000000..813219d
--- /dev/null
+++ b/bin/__init__.py
@@ -0,0 +1 @@
+from .bin import *
diff --git a/bin/hicstuff_digest.py b/bin/hicstuff_digest.py
new file mode 100644
index 0000000..3fba3da
--- /dev/null
+++ b/bin/hicstuff_digest.py
@@ -0,0 +1,489 @@
+#!/usr/bin/env python3
+# coding: utf-8
+
+"""Genome digestion
+Functions used to write auxiliary instagraal compatible
+sparse matrices.
+"""
+
+from Bio import SeqIO, SeqUtils
+from Bio.Seq import Seq
+from Bio.Restriction import RestrictionBatch, Analysis
+import os, sys, csv
+import re
+import collections
+import copy
+import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
+from hicstuff_log import logger
+import hicstuff_io as hio
+
+DEFAULT_FRAGMENTS_LIST_FILE_NAME = "fragments_list.txt"
+DEFAULT_INFO_CONTIGS_FILE_NAME = "info_contigs.txt"
+DEFAULT_SPARSE_MATRIX_FILE_NAME = "abs_fragments_contacts_weighted.txt"
+DEFAULT_KB_BINNING = 1
+DEFAULT_THRESHOLD_SIZE = 0
+# Most used enzyme for eukaryotes
+DEFAULT_ENZYME = "DpnII"
+# If using evenly-sized chunks instead of restriction
+# enzymes, they shouldn't be too short
+DEFAULT_MIN_CHUNK_SIZE = 50
+
+
+def write_frag_info(
+    fasta,
+    enzyme,
+    min_size=DEFAULT_THRESHOLD_SIZE,
+    circular=False,
+    output_contigs=DEFAULT_INFO_CONTIGS_FILE_NAME,
+    output_frags=DEFAULT_FRAGMENTS_LIST_FILE_NAME,
+    output_dir=None,
+):
+    """Digest and write fragment information
+    Write the fragments_list.txt and info_contigs.txt that are necessary for
+    instagraal to run.
+    Parameters
+    ----------
+    fasta : pathlib.Path or str
+        The path to the reference genome
+    enzyme : str, int or list of str
+        If a string, must be the name of an enzyme (e.g. DpnII) and the genome
+        will be cut at the enzyme's restriction sites. If a number, the genome
+        will be cut uniformly into chunks with length equal to that number. A
+        list of enzymes can also be specified if using multiple enzymes.
+    min_size : float, optional
+        Size below which shorter contigs are discarded. Default is 0, i.e. all
+        contigs are retained.
+    circular : bool, optional
+        Whether the genome is circular. Default is False.
+    output_contigs : str, optional
+        The name of the file with contig info. Default is info_contigs.txt
+    output_frags : str, optional
+        The name of the file with fragment info. Default is fragments_list.txt
+    output_dir : [type], optional
+        The path to the output directory, which will be created if not already
+        existing. Default is the current directory.
+    """
+
+    records = SeqIO.parse(hio.read_compressed(fasta), "fasta")
+
+    try:
+        info_contigs_path = os.path.join(output_dir, output_contigs)
+        frag_list_path = os.path.join(output_dir, output_frags)
+    except TypeError:
+        info_contigs_path = output_contigs
+        frag_list_path = output_frags
+
+    with open(info_contigs_path, "w") as info_contigs:
+
+        info_contigs.write("contig\tlength\tn_frags\tcumul_length\n")
+
+        with open(frag_list_path, "w") as fragments_list:
+
+            fragments_list.write(
+                "id\tchrom\tstart_pos" "\tend_pos\tsize\tgc_content\n"
+            )
+
+            total_frags = 0
+
+            for record in records:
+                contig_seq = record.seq
+                contig_name = record.id
+                contig_length = len(contig_seq)
+                if contig_length < int(min_size):
+                    continue
+
+                sites = get_restriction_table(
+                    contig_seq, enzyme, circular=circular
+                )
+                fragments = (
+                    contig_seq[sites[i] : sites[i + 1]]
+                    for i in range(len(sites) - 1)
+                )
+                n_frags = 0
+
+                current_id = 1
+                start_pos = 0
+                for frag in fragments:
+                    frag_length = len(frag)
+                    if frag_length > 0:
+                        end_pos = start_pos + frag_length
+                        gc_content = SeqUtils.GC(frag) / 100.0
+
+                        current_fragment_line = "%s\t%s\t%s\t%s\t%s\t%s\n" % (
+                            current_id,
+                            contig_name,
+                            start_pos,
+                            end_pos,
+                            frag_length,
+                            gc_content,
+                        )
+
+                        fragments_list.write(current_fragment_line)
+
+                        try:
+                            assert (current_id == 1 and start_pos == 0) or (
+                                current_id > 1 and start_pos > 0
+                            )
+                        except AssertionError:
+                            logger.error((current_id, start_pos))
+                            raise
+                        start_pos = end_pos
+                        current_id += 1
+                        n_frags += 1
+
+                current_contig_line = "%s\t%s\t%s\t%s\n" % (
+                    contig_name,
+                    contig_length,
+                    n_frags,
+                    total_frags,
+                )
+                total_frags += n_frags
+                info_contigs.write(current_contig_line)
+
+
+def attribute_fragments(pairs_file, idx_pairs_file, restriction_table):
+    """
+    Writes the indexed pairs file, which has two more columns than the input
+    pairs file corresponding to the restriction fragment index of each read.
+    Note that pairs files have 1bp point positions whereas restriction table
+    has 0bp point poisitions.
+    Parameters
+    ----------
+    pairs_file: str
+        Path the the input pairs file. Consists of 7 tab-separated
+        columns: readID, chr1, pos1, chr2, pos2, strand1, strand2
+    idx_pairs_file: str
+        Path to the output indexed pairs file. Consists of 9 white space
+        separated columns: readID, chr1, pos1, chr2, pos2, strand1, strand2,
+        frag1, frag2. frag1 and frag2 are 0-based restriction fragments based
+        on whole genome.
+    restriction_table: dict
+        Dictionary with chromosome identifiers (str) as keys and list of
+        positions (int) of restriction sites as values.
+    """
+
+    # NOTE: Bottlenecks here are 1. binary search in find_frag and 2. writerow
+    # 1. could be reduced by searching groups of N frags in parallel and 2. by
+    # writing N frags simultaneously using a single call of writerows.
+
+    # Parse and update header section
+    pairs_header = hio.get_pairs_header(pairs_file)
+    header_size = len(pairs_header)
+    chrom_order = []
+    with open(idx_pairs_file, "w") as idx_pairs:
+        for line in pairs_header:
+            # Add new column names to header
+            if line.startswith("#columns"):
+                line = line.rstrip() + " frag1 frag2"
+            if line.startswith("#chromsize"):
+                chrom_order.append(line.split()[1])
+            idx_pairs.write(line + "\n")
+
+    # Get number of fragments per chrom to allow genome-based indices
+    shift_frags = {}
+    prev_frags = 0
+    for rank, chrom in enumerate(chrom_order):
+        if rank > 0:
+            # Note the "-1" because there are nfrags + 1 sites in rest table
+            prev_frags += len(restriction_table[chrom_order[rank - 1]]) - 1
+        # Idx of each chrom's frags will be shifted by n frags in previous chroms
+        shift_frags[chrom] = prev_frags
+
+    missing_contigs = set()
+    # Attribute pairs to fragments and append them to output file (after header)
+    with open(pairs_file, "r") as pairs, open(
+        idx_pairs_file, "a"
+    ) as idx_pairs:
+        # Skip header lines
+        for _ in range(header_size):
+            next(pairs)
+
+        # Define input and output fields
+        pairs_cols = [
+            "readID",
+            "chr1",
+            "pos1",
+            "chr2",
+            "pos2",
+            "strand1",
+            "strand2",
+        ]
+        idx_cols = pairs_cols + ["frag1", "frag2"]
+
+        # Use csv reader / writer to automatically parse columns into a dict
+        pairs_reader = csv.DictReader(
+            pairs, fieldnames=pairs_cols, delimiter="\t"
+        )
+        pairs_writer = csv.DictWriter(
+            idx_pairs, fieldnames=idx_cols, delimiter="\t"
+        )
+
+        for pair in pairs_reader:
+            # Get the 0-based indices of corresponding restriction fragments
+            # Deducing 1 from pair position to get it into 0bp point
+            pair["frag1"] = find_frag(
+                int(pair["pos1"]) - 1, restriction_table[pair["chr1"]]
+            )
+            pair["frag2"] = find_frag(
+                int(pair["pos2"]) - 1, restriction_table[pair["chr2"]]
+            )
+            # Shift fragment indices to make them genome-based instead of
+            # chromosome-based
+            try:
+                pair["frag1"] += shift_frags[pair["chr1"]]
+            except KeyError:
+                missing_contigs.add(pair["chr1"])
+            try:
+                pair["frag2"] += shift_frags[pair["chr2"]]
+            except KeyError:
+                missing_contigs.add(pair["chr2"])
+
+            # Write indexed pairs in the new file
+            pairs_writer.writerow(pair)
+
+        if missing_contigs:
+            logger.warning(
+                "Pairs on the following contigs were discarded as "
+                "those contigs are not listed in the paris file header. "
+                "This is normal if you filtered out small contigs: %s"
+                % " ".join(list(missing_contigs))
+            )
+
+
+def get_restriction_table(seq, enzyme, circular=False):
+    """
+    Get the restriction table for a single genomic sequence.
+    Parameters
+    ----------
+    seq : Seq object
+        A biopython Seq object representing a chromosomes or contig.
+    enzyme : int, str or list of str
+        The name of the restriction enzyme used, or a list of restriction
+        enzyme names. Can also be an integer, to digest by fixed chunk size.
+    circular : bool
+        Wether the genome is circular.
+    Returns
+    -------
+    numpy.array:
+        List of restriction fragment boundary positions for the input sequence.
+    >>> from Bio.Seq import Seq
+    >>> get_restriction_table(Seq("AAGCCGGATCGG"),"HpaII")
+    array([ 0,  4, 12])
+    >>> get_restriction_table(Seq("AA"),["HpaII", "MluCI"])
+    array([0, 2])
+    >>> get_restriction_table(Seq("AA"),"aeiou1")
+    Traceback (most recent call last):
+        ...
+    ValueError: aeiou1 is not a valid restriction enzyme.
+    >>> get_restriction_table("AA","HpaII")
+    Traceback (most recent call last):
+        ...
+    TypeError: Expected Seq or MutableSeq instance, got <class 'str'> instead
+    """
+    chrom_len = len(seq)
+    wrong_enzyme = "{} is not a valid restriction enzyme.".format(enzyme)
+    # Restriction batch containing the restriction enzyme
+    try:
+        enz = [enzyme] if isinstance(enzyme, str) else enzyme
+        cutter = RestrictionBatch(enz)
+    except (TypeError, ValueError):
+        try:
+            cutter = max(int(enzyme), DEFAULT_MIN_CHUNK_SIZE)
+        except ValueError:
+            raise ValueError(wrong_enzyme)
+
+    # Conversion from string type to restriction type
+    if isinstance(cutter, int):
+        sites = [i for i in range(0, chrom_len, cutter)]
+        if sites[-1] < chrom_len:
+            sites.append(chrom_len)
+    else:
+        # Find sites of all restriction enzymes given
+        ana = Analysis(cutter, seq, linear=not circular)
+        sites = ana.full()
+        # Gets all sites into a single flat list with 0-based index
+        sites = [site - 1 for enz in sites.values() for site in enz]
+        # Sort by position and allow first add start and end of seq
+        sites.sort()
+        sites.insert(0, 0)
+        sites.append(chrom_len)
+
+    return np.array(sites)
+
+
+def find_frag(pos, r_sites):
+    """
+    Use binary search to find the index of a chromosome restriction fragment
+    corresponding to an input genomic position.
+    Parameters
+    ----------
+    pos : int
+        Genomic position, in base pairs.
+    r_sites : list
+        List of genomic positions corresponding to restriction sites.
+    Returns
+    -------
+    int
+        The 0-based index of the restriction fragment to which the position belongs.
+    >>> find_frag(15, [0, 20, 30])
+    0
+    >>> find_frag(15, [10, 20, 30])
+    Traceback (most recent call last):
+        ...
+    ValueError: The first position in the restriction table is not 0.
+    >>> find_frag(31, [0, 20, 30])
+    Traceback (most recent call last):
+        ...
+    ValueError: Read position is larger than last entry in restriction table.
+    """
+    if r_sites[0] != 0:
+        raise ValueError(
+            "The first position in the restriction table is not 0."
+        )
+    if pos > r_sites[-1]:
+        raise ValueError(
+            "Read position is larger than last entry in restriction table."
+        )
+    # binary search for the index of the read
+    index = max(np.searchsorted(r_sites, pos, side="right") - 1, 0)
+    # Last site = end of the chrom, index of last fragment is last site - 1
+    index = min(len(r_sites) - 2, index)
+
+    return index
+
+
+def frag_len(
+    frags_file_name=DEFAULT_FRAGMENTS_LIST_FILE_NAME,
+    output_dir=None,
+    plot=False,
+    fig_path=None,
+):
+    """
+    logs summary statistics of fragment length distribution based on an
+    input fragment file. Can optionally show a histogram instead
+    of text summary.
+    Parameters
+    ----------
+    frags_file_name : str
+        Path to the output list of fragments.
+    output_dir : str
+        Directory where the list should be saved.
+    plot : bool
+        Wether a histogram of fragment length should be shown.
+    fig_path : str
+        If a path is given, the figure will be saved instead of shown.
+    """
+
+    try:
+        frag_list_path = os.path.join(output_dir, frags_file_name)
+    except TypeError:
+        frag_list_path = frags_file_name
+    frags = pd.read_csv(frag_list_path, sep="\t")
+    nfrags = frags.shape[0]
+    med_len = frags["size"].median()
+    nbins = 40
+    if plot:
+        fig, ax = plt.subplots()
+        _, _, _ = ax.hist(frags["size"], bins=nbins)
+
+        ax.set_xlabel("Fragment length [bp]")
+        ax.set_ylabel("Log10 number of fragments")
+        ax.set_title("Distribution of restriction fragment length")
+        ax.set_yscale("log", base=10)
+        ax.annotate(
+            "Total fragments: {}".format(nfrags),
+            xy=(0.95, 0.95),
+            xycoords="axes fraction",
+            fontsize=12,
+            horizontalalignment="right",
+            verticalalignment="top",
+        )
+        ax.annotate(
+            "Median length: {}".format(med_len),
+            xy=(0.95, 0.90),
+            xycoords="axes fraction",
+            fontsize=12,
+            horizontalalignment="right",
+            verticalalignment="top",
+        )
+        # Tweak spacing to prevent clipping of ylabel
+        fig.tight_layout()
+        if fig_path:
+            plt.savefig(fig_path)
+        else:
+            plt.show()
+        plt.clf()
+    else:
+        logger.info(
+            "Genome digested into {0} fragments with a median "
+            "length of {1}".format(nfrags, med_len)
+        )
+
+
+def gen_enzyme_religation_regex(enzyme):
+    """Return a regex which corresponds to all possible religation sites given a
+    set of enzyme.
+    Parameters:
+    -----------
+    enzyme : str
+        String that contains the names of the enzyme separated by a comma.
+    Returns:
+    --------
+    re.Pattern :
+        Regex that corresponds to all possible ligation sites given a set of
+        enzyme.
+    Examples:
+    ---------
+    >>> gen_enzyme_religation_regex('HpaII')
+    re.compile('CCGCGG')
+    >>> gen_enzyme_religation_regex('HpaII,MluCI')
+    re.compile('AATTAATT|AATTCGG|CCGAATT|CCGCGG')
+    """
+
+    # Split the str on the comma to separate the different enzymes.
+    enzyme = enzyme.split(",")
+
+    # Check on Biopython dictionnary the enzyme.
+    rb = RestrictionBatch(enzyme)
+
+    # Initiation:
+    give_list = []
+    accept_list = []
+    ligation_list = []
+
+    # Iterates on the enzymes.
+    for enz in rb:
+
+        # Extract restriction sites and look for cut sites.
+        site = enz.elucidate()
+        fw_cut = site.find("^")
+        rev_cut = site.find("_")
+
+        # Process "give" site. Remove N on the left (useless).
+        give_site = site[:rev_cut].replace("^", "")
+        while give_site[0] == "N":
+            give_site = give_site[1:]
+        give_list.append(give_site)
+
+        # Process "accept" site. Remove N on the rigth (useless).
+        accept_site = site[fw_cut + 1 :].replace("_", "")
+        while accept_site[-1] == "N":
+            accept_site = accept_site[:-1]
+        accept_list.append(accept_site)
+
+    # Iterates on the two list to build all the possible HiC ligation sites.
+    for give_site in give_list:
+        for accept_site in accept_list:
+            # Replace "N" by "." for regex searching of the sites
+            ligation_list.append((give_site + accept_site).replace("N", "."))
+            ligation_list.append(
+                str(Seq(give_site + accept_site).reverse_complement()).replace(
+                    "N", "."
+                )
+            )
+
+    # Build the regex for any ligation sites.
+    pattern = "|".join(sorted(list(set(ligation_list))))
+    return re.compile(pattern)
diff --git a/bin/hicstuff_fragments.py b/bin/hicstuff_fragments.py
new file mode 100755
index 0000000..c49378e
--- /dev/null
+++ b/bin/hicstuff_fragments.py
@@ -0,0 +1,40 @@
+#!/usr/bin/env python
+
+import hicstuff_digest as hcd
+import argparse
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("-e", "--enzyme")
+    parser.add_argument("-i", "--index")
+    args = parser.parse_args()
+
+    enzyme = args.enzyme
+    index = args.index
+
+
+    #hicstuff case sensitive enzymes adaptation
+    if enzyme == "hindiii":
+        enzyme = "HindIII"
+    elif enzyme == "dpnii":
+        enzyme = "DpnII"
+    elif enzyme == "bglii":
+        enzyme = "BglII"
+    elif enzyme == "mboi":
+        enzyme = "MboI"
+    elif enzyme == "arima":
+        enzyme = "Arima"
+
+
+    # Generate info_contigs and fragments_list output files
+    hcd.write_frag_info(
+        index,
+        enzyme,
+        min_size=0,
+        circular=False,
+        output_contigs="info_contigs.txt",
+        output_frags="fragments_list.txt",
+    )
+
+    # Log fragment size distribution
+    hcd.frag_len(frags_file_name="fragments_list.txt", plot=False, fig_path="frags_hist.pdf")
diff --git a/bin/hicstuff_io.py b/bin/hicstuff_io.py
new file mode 100644
index 0000000..c8c4dde
--- /dev/null
+++ b/bin/hicstuff_io.py
@@ -0,0 +1,1379 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+import gzip
+import zipfile
+import bz2
+import io
+import os
+import functools
+import sys
+import numpy as np
+import pandas as pd
+import collections
+import subprocess as sp
+from scipy.sparse import tril, triu
+import pathlib
+import re
+from os.path import join, exists
+from random import getrandbits
+from scipy.sparse import coo_matrix
+from Bio import SeqIO, SeqUtils
+#import hicstuff.hicstuff as hcs
+#from hicstuff.log import logger
+#from hicstuff.version import __version__
+import scipy.stats as ss
+
+DEFAULT_MAX_MATRIX_SHAPE = 10000
+
+DEFAULT_FRAGMENTS_LIST_FILE_NAME = "fragments_list.txt"
+DEFAULT_INFO_CONTIGS_FILE_NAME = "info_contigs.txt"
+DEFAULT_SPARSE_MATRIX_FILE_NAME = "abs_fragments_contacts_weighted.txt"
+
+
+def _cols_to_sparse(sparse_array, shape=None, dtype=np.float64):
+    """
+    Make a coordinate based sparse matrix from columns.
+    Convert (3, n) shaped arrays to a sparse matrix. The first
+    one acts as the row coordinates, the second one as the
+    column coordinates, the third one as the data points
+    for each pair. If duplicates are found, the data points are
+    added.
+    Parameters
+    ----------
+    sparse_array : array_like
+        An array with exactly three columns representing the sparse
+        matrix data in coordinate format.
+    shape : tuple of int
+        The total number of rows and columns in the matrix. Will be estimated
+        from nonzero values if omitted.
+    dtype : type, optional
+        The type of data being loaded. Default is numpy.float64
+    Example
+    -------
+        >>> import numpy as np
+        >>> row, col = np.array([1, 2, 3]), np.array([3, 2, 1])
+        >>> data = np.array([4, 5, 6])
+        >>> M = np.array([row, col, data]).T
+        >>> S = _cols_to_sparse(M)
+        >>> print(S.todense())
+        [[0. 0. 0. 0.]
+         [0. 0. 0. 4.]
+         [0. 0. 5. 0.]
+         [0. 6. 0. 0.]]
+    """
+
+    row = sparse_array[:, 0]
+    col = sparse_array[:, 1]
+    data = sparse_array[:, 2]
+    if shape is None:
+        n = int(max(np.amax(row), np.amax(col))) + 1
+        shape = (n, n)
+
+    S = coo_matrix((data, (row, col)), shape=shape, dtype=dtype)
+    return S
+
+
+def load_sparse_matrix(mat_path, binning=1, dtype=np.float64):
+    """Load sparse matrix
+    Load a text file matrix into a sparse matrix object. The expected format is
+    a 3 column file where columns are row_number, col_number, value. The first
+    line consists of 3 values representing the total number of rows, columns
+    and nonzero values.
+    Parameters
+    ----------
+    mat_path : file, str or pathlib.Path
+        The input matrix file in instagraal format.
+    binning : int or "auto"
+        The binning to perform. If "auto", binning will
+        be automatically inferred so that the matrix size
+        will not go beyond (10000, 10000) in shape. That
+        can be changed by modifying the DEFAULT_MAX_MATRIX_SHAPE
+        value. Default is 1, i.e. no binning is performed
+    dtype : type, optional
+        The type of data being loaded. Default is numpy.float64
+    Returns
+    -------
+    sparse_mat : scipy.sparse.coo_matrix
+        The output (sparse) matrix in COOrdinate format.
+    Examples
+    --------
+    >>> S = load_sparse_matrix('test_data/mat_5kb.tsv', binning=1)
+    >>> S.data[:10]
+    array([84.,  2.,  3.,  2.,  1.,  1., 50.,  1., 66.,  1.])
+    >>> S.shape
+    (16, 16)
+    """
+    try:
+        cols_arr = np.loadtxt(mat_path, delimiter="\t", dtype=dtype)
+        shape = (int(cols_arr[0, 0]), int(cols_arr[0, 1]))
+    except ValueError:
+        cols_arr = np.loadtxt(
+            mat_path, delimiter="\t", dtype=dtype, skiprows=1
+        )
+        shape = None
+
+    # Get values into an array without the header. Use the header to give size.
+    sparse_mat = _cols_to_sparse(cols_arr[1:, :], shape=shape, dtype=dtype)
+
+    if binning == "auto":
+        num_bins = max(sparse_mat.shape) + 1
+        subsampling_factor = num_bins // DEFAULT_MAX_MATRIX_SHAPE
+    else:
+        subsampling_factor = binning
+    sparse_mat = hcs.bin_sparse(
+        sparse_mat, subsampling_factor=subsampling_factor
+    )
+    return sparse_mat
+
+
+def save_sparse_matrix(s_mat, path):
+    """Save a sparse matrix
+    Saves a sparse matrix object into tsv format.
+    Parameters
+    ----------
+    s_mat : scipy.sparse.coo_matrix
+        The sparse matrix to save on disk
+    path : str
+        File path where the matrix will be stored
+    """
+    if s_mat.format != "coo":
+        ValueError("Sparse matrix must be in coo format")
+    dtype = s_mat.dtype
+    fmt = "%i" if dtype == int else "%.10e"
+    sparse_arr = np.vstack([s_mat.row, s_mat.col, s_mat.data]).T
+
+    np.savetxt(
+        path,
+        sparse_arr,
+        header="{nrows}\t{ncols}\t{nonzero}".format(
+            nrows=s_mat.shape[0], ncols=s_mat.shape[1], nonzero=s_mat.nnz
+        ),
+        comments="",
+        fmt=fmt,
+        delimiter="\t",
+    )
+
+
+def load_pos_col(path, colnum, header=1, dtype=np.int64):
+    """
+    Loads a single column of a TSV file with header into a numpy array.
+
+    Parameters
+    ----------
+    path : str
+        The path of the TSV file to load.
+    colnum : int
+        The 0-based index of the column to load.
+    header : int
+        Number of line to skip. By default the header is a single line.
+    Returns
+    -------
+    numpy.array :
+        A 1D numpy array with the
+    Examples
+    --------
+    >>> load_pos_col('test_data/mat_5kb.tsv', 0)[:10]
+    array([0, 0, 0, 0, 0, 0, 1, 1, 2, 2])
+    """
+    pos_arr = np.genfromtxt(
+        path,
+        delimiter="\t",
+        usecols=(colnum,),
+        skip_header=header,
+        dtype=dtype,
+    )
+    return pos_arr
+
+
+def generate_temp_dir(path):
+    """Temporary directory generation
+    Generates a temporary file with a random name at the input path.
+
+    Parameters
+    ----------
+    path : str
+        The path at which the temporary directory will be created.
+
+    Returns
+    -------
+    str
+        The path of the newly created temporary directory.
+    """
+    exist = True
+    while exist:
+        # Keep trying random directory names if they already exist
+        directory = str(hex(getrandbits(32)))[2:]
+        full_path = join(path, directory)
+        if not exists(full_path):
+            exist = False
+    try:
+        os.makedirs(full_path)
+    except PermissionError:
+        raise PermissionError(
+            "The temporary directory cannot be created in {}. "
+            "Make sure you have write permission.".format(path)
+        )
+    return full_path
+
+
+def _check_cooler(fun):
+    """Decorates function `fun` to check if cooler is available.."""
+
+    @functools.wraps(fun)
+    def wrapped(*args, **kwargs):
+        try:
+            import cooler
+
+            fun.__globals__["cooler"] = cooler
+        except ImportError:
+            logger.error(
+                "The cooler package is required to use {0}, please install it first".format(
+                    fun.__name__
+                )
+            )
+            raise ImportError("The cooler package is required.")
+        return fun(*args, **kwargs)
+
+    return wrapped
+
+
+@_check_cooler
+def add_cool_column(
+    clr, column, column_name, table_name="bins", metadata={}, dtype=None
+):
+    """
+    Adds a new column to a loaded Cooler store. If the column exists,
+    it is replaced. This will affect the .cool file.
+
+    Parameters
+    ----------
+    clr : Cooler object
+        A Cooler store.
+    column : pandas Series
+        The column to add to the cooler.
+    column_name : str
+        The name of the column to add.
+    table_name : str
+        The name of the table to which the column should be added.
+        Defaults to the "bins" table.
+    metadata : dict
+        A dictionary of metadata to associate with the new column.
+    """
+    with clr.open("r+") as c:
+        if column_name in c[table_name]:
+            del c[table_name][column_name]
+        h5opts = dict(compression="gzip", compression_opts=6)
+        c[table_name].create_dataset(
+            column_name, data=column, dtype=dtype, **h5opts
+        )
+        c[table_name][column_name].attrs.update(metadata)
+
+
+@_check_cooler
+def load_cool(cool):
+    """
+    Reads a cool file into memory and parses it into graal style tables.
+
+    Parameters
+    ----------
+    cool : str
+        Path to the input .cool file.
+    Returns
+    -------
+    mat : scipy coo_matrix
+        Hi-C contact map in COO format.
+    frags : pandas DataFrame
+        Table off bins matching the matrix. Corresponds to the content of the fragments_list.txt file.
+    chroms : pandas DataFrame
+        Table of chromosome informations.
+    """
+    c = cooler.Cooler(cool)  # pylint: disable=undefined-variable
+    frags = c.bins()[:]
+    chroms = c.chroms()[:]
+    mat = c.pixels()[:]
+    frags.rename(
+        columns={"start": "start_pos", "end": "end_pos"}, inplace=True
+    )
+    frags["id"] = frags.groupby("chrom", sort=False).cumcount() + 1
+    # Try loading hicstuff-specific columns
+    try:
+        frags = frags[
+            ["id", "chrom", "start_pos", "end_pos", "size", "gc_content"]
+        ]
+    # If absent, only load standard columns
+    except KeyError:
+        frags = frags[["id", "chrom", "start_pos", "end_pos"]]
+
+    chroms["cumul_length"] = (
+        chroms.length.shift(1).fillna(0).cumsum().astype(int)
+    )
+    n_frags = c.bins()[:].groupby("chrom", sort=False).count().start
+    chroms["n_frags"] = chroms.merge(
+        n_frags, right_index=True, left_on="name", how="left"
+    ).start
+    chroms.rename(columns={"name": "contig"}, inplace=True)
+    n = int(max(np.amax(mat.bin1_id), np.amax(mat.bin2_id))) + 1
+    shape = (n, n)
+    mat = coo_matrix((mat["count"], (mat.bin1_id, mat.bin2_id)), shape=shape)
+
+    return mat, frags, chroms
+
+
+@_check_cooler
+def save_cool(cool_out, mat, frags, metadata={}):
+    """
+    Writes a .cool file from graal style tables.
+
+    Parameters
+    ----------
+    cool_out : str
+        Path to the output cool file.
+    mat : scipy coo_matrix
+        The Hi-C contact matrix in sparse COO format.
+    frags : pandas DataFrame
+        The graal style 'fragments_list' table.
+    metadata : dict
+        Potential metadata to associate with the cool file.
+    """
+    up_tri = False
+    # Check if symmetric matrix is symmetric
+    # (i.e. only upper triangle or full mat)
+    if (abs(mat - mat.T) > 1e-10).nnz != 0:
+        up_tri = True
+    # Drop useless column
+    try:
+        bins = frags.drop("id", axis=1)
+    except KeyError:
+        bins = frags
+    # Get column names right
+    bins.rename(
+        columns={"seq": "chrom", "start_pos": "start", "end_pos": "end"},
+        inplace=True,
+    )
+    mat_dict = {"bin1_id": mat.row, "bin2_id": mat.col, "count": mat.data}
+    pixels = pd.DataFrame(mat_dict)
+    cooler.create_cooler(  # pylint: disable=undefined-variable
+        cool_out,
+        bins,
+        pixels,
+        metadata=metadata,
+        symmetric_upper=up_tri,
+        triucheck=False,
+    )
+
+
+def read_compressed(filename, mode='r'):
+    """Read compressed file
+    Opens the file in read mode with appropriate decompression algorithm.
+    Parameters
+    ----------
+    filename : str
+        The path to the input file
+    Returns
+    -------
+    file-like object
+        The handle to access the input file's content
+    """
+
+    # Standard header bytes for diff compression formats
+    comp_bytes = {
+        b"\x1f\x8b\x08": "gz",
+        b"\x42\x5a\x68": "bz2",
+        b"\x50\x4b\x03\x04": "zip",
+    }
+
+    max_len = max(len(x) for x in comp_bytes)
+
+    def file_type(filename):
+        """Guess file type
+        Compare header bytes with those in the file and return type.
+        """
+        with open(filename, "rb") as f:
+            file_start = f.read(max_len)
+        for magic, filetype in comp_bytes.items():
+            if file_start.startswith(magic):
+                return filetype
+        return "uncompressed"
+
+    # Open file with appropriate function
+    mode_map = {'r': 'rt', 'rb': 'rb'}
+    comp = file_type(filename)
+    if comp == "gz":
+        return gzip.open(filename, mode_map[mode])
+    elif comp == "bz2":
+        return bz2.open(filename, mode_map[mode])
+    elif comp == "zip":
+        zip_arch = zipfile.ZipFile(filename, mode)
+        if len(zip_arch.namelist()) > 1:
+            raise IOError(
+                "Only a single fastq file must be in the zip archive."
+            )
+        else:
+            # ZipFile opens as bytes by default, using io to read as text
+            zip_content = zip_arch.open(zip_arch.namelist()[0], mode)
+            return io.TextIOWrapper(zip_content, encoding="utf-8")
+    else:
+        return open(filename, mode)
+
+
+def is_compressed(filename):
+    """Check compression status
+    Check if the input file is compressed from the first bytes.
+    Parameters
+    ----------
+    filename : str
+        The path to the input file
+    Returns
+    -------
+    bool
+        True if the file is compressed, False otherwise.
+    """
+
+    # Standard header bytes for diff compression formats
+    comp_bytes = {
+        b"\x1f\x8b\x08": "gz",
+        b"\x42\x5a\x68": "bz2",
+        b"\x50\x4b\x03\x04": "zip",
+    }
+    max_len = max(len(x) for x in comp_bytes)
+    with open(filename, "rb") as f:
+        file_start = f.read(max_len)
+    for magic, _ in comp_bytes.items():
+        if file_start.startswith(magic):
+            return True
+    return False
+
+
+def from_dade_matrix(filename, header=False):
+    """Load a DADE matrix
+    Load a numpy array from a DADE matrix file, optionally
+    returning bin information from the header. Header data
+    processing is delegated downstream.
+    Parameters
+    ----------
+    filename : str, file or pathlib.Path
+        The name of the file containing the DADE matrix.
+    header : bool
+        Whether to return as well information contained
+        in the header. Default is False.
+    Example
+    -------
+        >>> import numpy as np
+        >>> import tempfile
+        >>> lines = [['RST', 'chr1~0', 'chr1~10', 'chr2~0', 'chr2~30'],
+        ...          ['chr1~0', '5'],
+        ...          ['chr1~10', '8', '3'],
+        ...          ['chr2~0', '3', '5', '5'],
+        ...          ['chr2~30', '5', '10', '11', '2']
+        ...          ]
+        >>> formatted = ["\\t".join(l) + "\\n" for l in lines ]
+        >>> dade = tempfile.NamedTemporaryFile(mode='w')
+        >>> for fm in formatted:
+        ...     dade.write(fm)
+        34
+        9
+        12
+        13
+        18
+        >>> dade.flush()
+        >>> M, h = from_dade_matrix(dade.name, header=True)
+        >>> dade.close()
+        >>> print(M)
+        [[ 5.  8.  3.  5.]
+         [ 8.  3.  5. 10.]
+         [ 3.  5.  5. 11.]
+         [ 5. 10. 11.  2.]]
+        >>> print(h)
+        ['chr1~0', 'chr1~10', 'chr2~0', 'chr2~30']
+    See https://github.com/scovit/DADE for more details about Dade.
+    """
+
+    A = pd.read_csv(filename, sep="\t", header=None)
+    A.fillna("0", inplace=True)
+    M, headers = np.array(A.iloc[1:, 1:], dtype=np.float64), A.iloc[0, :]
+    matrix = M + M.T - np.diag(np.diag(M))
+    if header:
+        return matrix, headers.tolist()[1:]
+    else:
+        return matrix
+
+
+def to_dade_matrix(M, annotations="", filename=None):
+    """Returns a Dade matrix from input numpy matrix. Any annotations are added
+    as header. If filename is provided and valid, said matrix is also saved
+    as text.
+    """
+
+    n, m = M.shape
+    A = np.zeros((n + 1, m + 1))
+    A[1:, 1:] = M
+    if not annotations:
+        annotations = np.array(["" for _ in n], dtype=str)
+    A[0, :] = annotations
+    A[:, 0] = annotations.T
+    if filename:
+        try:
+            np.savetxt(filename, A, fmt="%i")
+            logger.info(
+                "I saved input matrix in dade format as {0}".format(
+                    str(filename)
+                )
+            )
+        except ValueError as e:
+            logger.warning("I couldn't save input matrix.")
+            logger.warning(str(e))
+
+    return A
+
+
+def load_into_redis(filename):
+    """Load a file into redis
+    Load a matrix file and sotre it in memory with redis.
+    Useful to pass around huge datasets from scripts to
+    scripts and load them only once.
+    Inspired from https://gist.github.com/alexland/ce02d6ae5c8b63413843
+    Parameters
+    ----------
+    filename : str, file or pathlib.Path
+        The file of the matrix to load.
+    Returns
+    -------
+    key : str
+        The key of the dataset needed to retrieve it from redis.
+    """
+    try:
+        from redis import StrictRedis as redis
+        import time
+    except ImportError:
+        print(
+            "Error! Redis does not appear to be installed in your system.",
+            file=sys.stderr,
+        )
+        exit(1)
+
+    M = np.genfromtxt(filename, dtype=None)
+    array_dtype = str(M.dtype)
+    m, n = M.shape
+    M = M.ravel().tostring()
+    database = redis(host="localhost", port=6379, db=0)
+    key = "{0}|{1}#{2}#{3}".format(int(time.time()), array_dtype, m, n)
+
+    database.set(key, M)
+
+    return key
+
+
+def load_from_redis(key):
+    """Retrieve a dataset from redis
+    Retrieve a cached dataset that was stored in redis
+    with the input key.
+    Parameters
+    ----------
+    key : str
+        The key of the dataset that was stored in redis.
+    Returns
+    -------
+    M : numpy.ndarray
+        The retrieved dataset in array format.
+    """
+
+    try:
+        from redis import StrictRedis as redis
+    except ImportError:
+        print(
+            "Error! Redis does not appear to be installed in your system.",
+            file=sys.stderr,
+        )
+        exit(1)
+
+    database = redis(host="localhost", port=6379, db=0)
+
+    try:
+        M = database.get(key)
+    except KeyError:
+        print(
+            "Error! No dataset was found with the supplied key.",
+            file=sys.stderr,
+        )
+        exit(1)
+
+    array_dtype, n, m = key.split("|")[1].split("#")
+
+    M = np.fromstring(M, dtype=array_dtype).reshape(int(n), int(m))
+    return M
+
+
+def dade_to_graal(
+    filename,
+    output_matrix=DEFAULT_SPARSE_MATRIX_FILE_NAME,
+    output_contigs=DEFAULT_INFO_CONTIGS_FILE_NAME,
+    output_frags=DEFAULT_SPARSE_MATRIX_FILE_NAME,
+    output_dir=None,
+):
+    """Convert a matrix from DADE format (https://github.com/scovit/dade)
+    to a graal-compatible format. Since DADE matrices contain both fragment
+    and contact information all files are generated at the same time.
+    """
+
+    with open(output_matrix, "w") as sparse_file:
+        sparse_file.write("id_frag_a\tid_frag_b\tn_contact")
+        with open(filename) as file_handle:
+            first_line = file_handle.readline()
+            for row_index, line in enumerate(file_handle):
+                dense_row = np.array(line.split("\t")[1:], dtype=np.int32)
+                for col_index in np.nonzero(dense_row)[0]:
+                    line_to_write = "{}\t{}\t{}\n".format(
+                        row_index, col_index, dense_row[col_index]
+                    )
+                    sparse_file.write(line_to_write)
+
+    header = first_line.split("\t")
+    bin_type = header[0]
+    if bin_type == '"RST"':
+        logger.info("I detected fragment-wise binning")
+    elif bin_type == '"BIN"':
+        logger.info("I detected fixed size binning")
+    else:
+        logger.warning(
+            (
+                "Sorry, I don't understand this matrix's "
+                "binning: I read {}".format(str(bin_type))
+            )
+        )
+
+    header_data = [
+        header_elt.replace("'", "")
+        .replace('"', "")
+        .replace("\n", "")
+        .split("~")
+        for header_elt in header[1:]
+    ]
+
+    (
+        global_frag_ids,
+        contig_names,
+        local_frag_ids,
+        frag_starts,
+        frag_ends,
+    ) = np.array(list(zip(*header_data)))
+
+    frag_starts = frag_starts.astype(np.int32) - 1
+    frag_ends = frag_ends.astype(np.int32) - 1
+    frag_lengths = frag_ends - frag_starts
+
+    total_length = len(global_frag_ids)
+
+    with open(output_contigs, "w") as info_contigs:
+
+        info_contigs.write("contig\tlength\tn_frags\tcumul_length\n")
+
+        cumul_length = 0
+
+        for contig in collections.OrderedDict.fromkeys(contig_names):
+
+            length_tig = np.sum(frag_lengths[contig_names == contig])
+            n_frags = collections.Counter(contig_names)[contig]
+            line_to_write = "%s\t%s\t%s\t%s\n" % (
+                contig,
+                length_tig,
+                n_frags,
+                cumul_length,
+            )
+            info_contigs.write(line_to_write)
+            cumul_length += n_frags
+
+    with open(output_frags, "w") as fragments_list:
+
+        fragments_list.write(
+            "id\tchrom\tstart_pos\tend_pos" "\tsize\tgc_content\n"
+        )
+        bogus_gc = 0.5
+
+        for i in range(total_length):
+            line_to_write = "%s\t%s\t%s\t%s\t%s\t%s\n" % (
+                int(local_frag_ids[i]) + 1,
+                contig_names[i],
+                frag_starts[i],
+                frag_ends[i],
+                frag_lengths[i],
+                bogus_gc,
+            )
+            fragments_list.write(line_to_write)
+
+
+def load_bedgraph2d(filename, bin_size=None, fragments_file=None):
+    """
+    Loads matrix and fragment information from a 2D bedgraph file. Note this
+    function assumes chromosomes are ordered in alphabetical. order
+
+    Parameters
+    ----------
+    filename : str
+        Path to the bedgraph2D file.
+    bin_size : int
+        The size of bins in the case of fixed bin size.
+    fragments_file : str
+        Path to a fragments file to explicitely provide fragments positions.
+        If the matrix does not have fixed bin size, this prevents errors.
+
+    Returns
+    -------
+    mat : scipy.sparse.coo_matrix
+        The Hi-C contact map as the upper triangle of a symetric matrix, in
+        sparse format.
+    frags : pandas.DataFrame
+        The list of fragments/bin present in the matrix with their genomic
+        positions.
+    """
+    bed2d = pd.read_csv(filename, sep="\t", header=None)
+    chrom_sizes = {}
+    if bin_size is not None:
+        # If bin size if provided, retrieve chromosome lengths, this will be
+        # used when regenerating bin coordinates
+        chroms_left = bed2d[[3, 5]]
+        chroms_left.columns = [0, 2]
+        chroms = (
+            pd.concat([bed2d[[0, 2]], chroms_left])
+            .groupby([0], sort=False)
+            .max()
+        )
+        for chrom, size in zip(chroms.index, np.array(chroms)):
+            chrom_sizes[chrom] = size[0]
+    elif fragments_file is None:
+        logger.warning(
+            "Please be aware that not all information can be restored from a "
+            "bg2 file without fixed bin size; fragments without any contact "
+            "will be lost"
+        )
+    # Get all possible fragment chrom-positions into an array
+    frag_pos = np.vstack(
+        [np.array(bed2d[[0, 1, 2]]), np.array(bed2d[[3, 4, 5]])]
+    )
+    # Sort by position (least important, col 1)
+    frag_pos = frag_pos[frag_pos[:, 1].argsort(kind="mergesort")]
+    # Then by chrom (most important, col 0)
+    frag_pos = frag_pos[frag_pos[:, 0].argsort(kind="mergesort")]
+    # Get unique names for fragments (chrom+pos)
+    ordered_frag_pos = (
+        pd.DataFrame(frag_pos).drop_duplicates().reset_index(drop=True)
+    )
+    frag_pos_a = bed2d[[0, 1]].apply(lambda x: tuple(x), axis=1)
+    frag_pos_b = bed2d[[3, 4]].apply(lambda x: tuple(x), axis=1)
+    # If fragments file is provided, use fragments positions to indices mapping
+    if fragments_file is not None:
+        frags = pd.read_csv(fragments_file, delimiter="\t")
+        frag_map = frags.apply(lambda x: (str(x.chrom), x.start_pos), axis=1)
+        frag_map = {f_name: f_idx for f_idx, f_name in enumerate(frag_map)}
+    # If fixed fragment size available, use it to reconstruct original
+    # fragments ID (even if they are absent from the bedgraph file).
+    elif bin_size is not None:
+        frag_map = {}
+        chrom_frags = []
+        for chrom, size in chrom_sizes.items():
+            prev_frags = len(frag_map)
+            for bin_id, bin_pos in enumerate(range(0, size, bin_size)):
+                frag_map[(chrom, bin_pos)] = bin_id + prev_frags
+            n_bins = size // bin_size
+            chrom_frags.append(
+                pd.DataFrame(
+                    {
+                        "id": range(1, n_bins + 1),
+                        "chrom": np.repeat(chrom, n_bins),
+                        "start_pos": range(0, size, bin_size),
+                        "end_pos": range(bin_size, size + bin_size, bin_size),
+                    }
+                )
+            )
+        frags = pd.concat(chrom_frags, axis=0).reset_index(drop=True)
+        frags.insert(
+            loc=3, column="size", value=frags.end_pos - frags.start_pos
+        )
+    # If None available, guess fragments indices from bedgraph (potentially wrong)
+    else:
+        frag_map = {
+            (v[0], v[1]): i
+            for i, v in ordered_frag_pos.iloc[:, [0, 1]].iterrows()
+        }
+        frags = ordered_frag_pos.copy()
+        frags[3] = frags.iloc[:, 2] - frags.iloc[:, 1]
+        frags.insert(loc=0, column="id", value=0)
+        frags.id = frags.groupby([0], sort=False).cumcount() + 1
+        frags.columns = ["id", "chrom", "start_pos", "end_pos", "size"]
+    # Match bin indices to their names
+    frag_id_a = np.array(list(map(lambda x: frag_map[x], frag_pos_a)))
+    frag_id_b = np.array(list(map(lambda x: frag_map[x], frag_pos_b)))
+    contacts = np.array(bed2d.iloc[:, 6].tolist())
+    # Use index to build matrix
+    n_frags = len(frag_map.keys())
+    mat = coo_matrix(
+        (contacts, (frag_id_a, frag_id_b)), shape=(n_frags, n_frags)
+    )
+
+    # Get size of each chromosome in basepairs
+    chromsizes = frags.groupby("chrom", sort=False).apply(
+        lambda x: np.int64(max(x.end_pos))
+    )
+    chrom_bins = frags.groupby("chrom", sort=False).size()
+    # Shift chromsizes by one to get starting bin, first one is zero
+    # Make chromsize cumulative to get start bin of each chrom
+    # Get chroms into a 1D array of bin starts
+    chrom_start = chrom_bins.shift(1, fill_value=0).cumsum()
+    chroms = pd.DataFrame(
+        {
+            "contig": chromsizes.index,
+            "length": chromsizes.values,
+            "n_frags": chrom_bins,
+            "cumul_length": chrom_start,
+        }
+    )
+    return mat, frags, chroms
+
+
+def flexible_hic_loader(
+    mat, fragments_file=None, chroms_file=None, quiet=False
+):
+    """
+    Wrapper function to load COO, bg2 or cool input and return the same output.
+    COO formats requires fragments_file and chroms_file options. bg2 format can
+    infer bin_size if fixed. When providing a bg2 matrix with uneven fragments
+    length, one should provide fragments_file as well or empty bins will be
+    truncated from the output.
+
+    Parameters
+    ----------
+    mat : str
+        Path to the matrix in graal, bedgraph2 or cool format.
+    fragments_file : str or None
+        Path to the file with fragments information (fragments_list.txt).
+        Only required if the matrix is in graal format.
+    chroms_file : str or None
+        Path to the file with chromosome information (info_contigs.txt). Only required
+        if the matrix is in graal format.
+    quiet : bool
+        If True, will silence warnings for empty outputs.
+    Returns
+    -------
+    mat : scipy.sparse.coo_matrix
+        Sparse upper triangle Hi-C matrix.
+    frags : pandas.DataFrame or None
+        Table of fragment informations. None if information was not provided.
+    chroms : pandas.DataFrame or None
+        Table of chromosomes/contig information. None if information was not provided.
+    """
+    hic_format = get_hic_format(mat)
+    # Load cool based on file extension
+    if hic_format == "cool":
+        mat, frags, chroms = load_cool(mat)
+    # Use the first line to determine COO / bg2 format
+    if hic_format == "bg2":
+        # Use the frags file to define bins if available
+        if fragments_file is not None:
+            mat, frags, chroms = load_bedgraph2d(
+                mat, fragments_file=fragments_file
+            )
+        else:
+            # Guess if bin size is fixed based on MAD
+            bg2 = pd.read_csv(mat, sep="\t")
+            sizes = np.array(bg2.iloc[:, 2] - bg2.iloc[:, 1])
+            size_mad = ss.median_abs_deviation(sizes, scale='normal')
+            # Use only the bg2
+            if size_mad > 0:
+                mat, frags, chroms = load_bedgraph2d(mat)
+                logger.warning(
+                    "Input is a bedgraph2d file with uneven bin size, "
+                    "but no fragments_file was provided. Empty bins will "
+                    "be missing from the output. To avoid this, provide a "
+                    "fragments file."
+                )
+            # Use fixed bin size
+            else:
+                mat, frags, chroms = load_bedgraph2d(
+                    mat, bin_size=int(np.median(sizes))
+                )
+
+    elif hic_format == "graal":
+        mat = load_sparse_matrix(mat)
+        try:
+            frags = pd.read_csv(fragments_file, sep="\t")
+        except ValueError:
+            if not quiet:
+                logger.warning(
+                    "fragments_file was not provided when "
+                    "loading a matrix in COO/graal format. frags will be None."
+                )
+            frags = None
+        try:
+            chroms = pd.read_csv(chroms_file, sep="\t")
+        except ValueError:
+            if not quiet:
+                logger.warning(
+                    "chroms_file was not provided when "
+                    "loading a matrix in COO/graal format. chroms will be None."
+                )
+
+            chroms = None
+
+    # Ensure the matrix is upper triangle symmetric
+    if mat.shape[0] == mat.shape[1]:
+        if (abs(mat - mat.T) > 1e-10).nnz > 0:
+            mat = mat + tril(mat, k=-1).T
+        mat = triu(mat, format="coo")
+
+    return mat, frags, chroms
+
+
+def get_hic_format(mat):
+    """Returns the format of the input Hi-C matrix
+
+    Parameters
+    ----------
+    mat : str
+        Path to the input Hi-C matrix
+    Returns
+    -------
+    str :
+        Hi-C format string. One of graal, bg2, cool
+    """
+    if (
+        mat.endswith(".cool")
+        or mat.count(".mcool::/") == 1
+        or mat.count(".cool::/") == 1
+    ):
+        hic_format = "cool"
+    else:
+        # Use the first line to determine COO / bg2 format
+        ncols = len(open(mat).readline().split("\t"))
+        if ncols == 7:
+            hic_format = "bg2"
+        elif ncols == 3:
+            hic_format = "graal"
+        else:
+            raise ValueError("Unkown file format")
+    return hic_format
+
+
+def flexible_hic_saver(
+    mat, out_prefix, frags=None, chroms=None, hic_fmt="graal", quiet=False,
+):
+    """
+    Saves objects to the desired Hi-C file format.
+    Parameters
+    ----------
+    mat : scipy.sparse.coo_matrix
+        Hi-C contact map.
+    out_prefix : str
+        Output path without extension (the extension is added based on hic_fmt).
+    frags : pandas.DataFrame or None
+        Table of fragments informations.
+    chroms : pandas.DataFrame or None
+        Table of chromosomes / contigs informations.
+    hic_fmt : str
+        Output format. Can be one of graal for graal-compatible COO format, bg2 for
+        2D bedgraph format, or cool for cooler compatible format.
+    """
+    if hic_fmt == "graal":
+        save_sparse_matrix(mat, out_prefix + ".mat.tsv")
+        try:
+            frags.to_csv(out_prefix + ".frags.tsv", sep="\t", index=False)
+        except AttributeError:
+            if not quiet:
+                logger.warning(
+                    "Could not create fragments_list.txt from input files"
+                )
+        try:
+            chroms.to_csv(out_prefix + ".chr.tsv", sep="\t", index=False)
+        except AttributeError:
+            if not quiet:
+                logger.warning(
+                    "Could not create info_contigs.txt from input files"
+                )
+    elif hic_fmt == "cool":
+        frag_sizes = frags.end_pos - frags.start_pos
+        size_mad = np.median(frag_sizes - np.median(frag_sizes))
+        bin_type = "variable" if size_mad else "fixed"
+        try:
+            save_cool(
+                out_prefix + ".cool",
+                mat,
+                frags,
+                metadata={"hicstuff": __version__, "bin-type": bin_type},
+            )
+        except NameError:
+            NameError("frags is required to save a cool file")
+    elif hic_fmt == "bg2":
+        try:
+            save_bedgraph2d(mat, frags, out_prefix + ".bg2")
+        except NameError:
+            NameError("frags is required to save a bg2 file")
+    else:
+        raise ValueError("Unknown output format: {0}".format(hic_fmt))
+
+
+def save_bedgraph2d(mat, frags, out_path):
+    """
+    Given a sparse matrix and a corresponding list of fragments, save a file
+    in 2D bedgraph format.
+    Parameters
+    ----------
+    mat : scipy.sparse.coo_matrix
+        The sparse contact map.
+    frags : pandas.DataFrame
+        A structure containing the annotations for each matrix bin. Should
+        correspond to the content of the fragments_list.txt file.
+    """
+
+    mat_df = pd.DataFrame(
+        {"row": mat.row, "col": mat.col, "data": mat.data.astype(int)}
+    )
+    # Merge fragments with matrix based on row indices to annotate rows
+    merge_mat = mat_df.merge(
+        frags, left_on="row", right_index=True, how="left", suffixes=("", "")
+    )
+    # Rename annotations to assign to frag1
+    merge_mat.rename(
+        columns={"chrom": "chr1", "start_pos": "start1", "end_pos": "end1"},
+        inplace=True,
+    )
+    # Do the same operation for cols (frag2)
+    merge_mat = merge_mat.merge(
+        frags,
+        left_on="col",
+        right_index=True,
+        how="left",
+        suffixes=("_0", "_2"),
+    )
+    merge_mat.rename(
+        columns={"chrom": "chr2", "start_pos": "start2", "end_pos": "end2"},
+        inplace=True,
+    )
+    # Select only relevant columns in correct order
+    bg2 = merge_mat.loc[
+        :, ["chr1", "start1", "end1", "chr2", "start2", "end2", "data"]
+    ]
+    bg2.to_csv(out_path, header=None, index=False, sep="\t")
+
+
+def get_pos_cols(df):
+    """Get column names representing chromosome, start and end column
+    from a dataframe. Allows flexible names.
+    Parameters
+    ----------
+    df : pd.DataFrame
+    Returns
+    -------
+    tuple of str:
+        Tuple containing the names of the chromosome, start and end
+        columns in the input dataframe.
+    Examples
+    --------
+    >>> import pandas as pd
+    >>> d = [1, 2, 3]
+    >>> df = pd.DataFrame(
+    ...     {'Chromosome': d, 'Start': d, 'End': d, 'species': d}
+    ... )
+    >>> get_pos_cols(df)
+    ('Chromosome', 'Start', 'End')
+    >>> df = pd.DataFrame(
+    ...     {'id': d, 'chr': d, 'start_bp': d, 'end_bp': d}
+    ... )
+    >>> get_pos_cols(df)
+    ('chr', 'start_bp', 'end_bp')
+    """
+
+    # Get case insensitive column names
+    cnames = df.columns
+    inames = cnames.str.lower()
+
+    def _col_getter(cols, pat):
+        """Helper to get column index from the start of its name"""
+        mask = cols.str.startswith(pat)
+        idx = np.flatnonzero(mask)[0]
+        return idx
+
+    chrom_col = cnames[_col_getter(inames, "chr")]
+    start_col = cnames[_col_getter(inames, "start")]
+    end_col = cnames[_col_getter(inames, "end")]
+    return chrom_col, start_col, end_col
+
+
+def gc_bins(genome_path, frags):
+    """Generate GC content annotation for bins using input genome.
+    Parameters
+    ----------
+    genome_path : str
+        Path the the genome file in FASTA format.
+    frags : pandas.DataFrame
+        Table containing bin segmentation of the genome.
+        Required columns: chrom, start, end.
+    Returns
+    -------
+    numpy.ndarray of floats:
+        GC content per bin, in the range [0.0, 1.0].
+    """
+    # Grab columns by name (catch start, Start, star_pos, etc)
+    chrom_col, start_col, end_col = get_pos_cols(frags)
+    # Fill the gc array by chromosome
+    gc_bins = np.zeros(frags.shape[0], dtype=float)
+    for rec in SeqIO.parse(genome_path, "fasta"):
+        mask = frags[chrom_col] == rec.id
+        # Define coordinates of each bin
+        starts = frags.loc[mask, start_col]
+        ends = frags.loc[mask, end_col]
+        # Slice chromosome sequence based on bins
+        seqs = [str(rec.seq)[s:e] for s, e in zip(starts, ends)]
+        # Fill GC values for bins in the chromosome
+        idx = np.flatnonzero(mask)
+        gc_bins[idx] = np.array(list(map(SeqUtils.GC, seqs))) / 100.0
+
+    return gc_bins
+
+
+def sort_pairs(in_file, out_file, keys, tmp_dir=None, threads=1, buffer="2G"):
+    """
+    Sort a pairs file in batches using UNIX sort.
+    Parameters
+    ----------
+    in_file : str
+        Path to the unsorted input file
+    out_file : str
+        Path to the sorted output file.
+    keys : list of str
+        list of columns to use as sort keys. Each column can be one of readID,
+        chr1, pos1, chr2, pos2, frag1, frag2. Key priorities are according to
+        the order in the list.
+    tmp_dir : str
+        Path to the directory where temporary files will be created. Defaults
+        to current directory.
+    threads : int
+        Number of parallel sorting threads.
+    buffer : str
+        Buffer size used for sorting. Consists of a number and a unit.
+    """
+    # TODO: Write a pure python implementation to drop GNU coreutils depencency,
+    # could be inspired from: https://stackoverflow.com/q/14465154/8440675
+
+    # Check if UNIX sort version supports parallelism
+    parallel_ok = True
+    sort_ver = sp.Popen(["sort", "--version"], stdout=sp.PIPE)
+    sort_ver = (
+        sort_ver.communicate()[0]
+        .decode()
+        .split("\n")[0]
+        .split(" ")[-1]
+        .split(".")
+    )
+    # If so, specify threads, otherwise don't mention it in the command line
+    try:
+        sort_ver = list(map(int, sort_ver))
+        if sort_ver[0] < 8 or (sort_ver[0] == 8 and sort_ver[1] < 23):
+            logger.warning(
+                "GNU sort version is {0} but >8.23 is required for parallel "
+                "sort. Sorting on a single thread.".format(
+                    ".".join(map(str, sort_ver))
+                )
+            )
+            parallel_ok = False
+    # BSD sort has a different format and will throw error upon parsing. It does
+    # not support parallel processes anyway.
+    except ValueError:
+        logger.warning(
+            "Using BSD sort instead of GNU sort, sorting on a single thread."
+        )
+        parallel_ok = False
+
+    key_map = {
+        "readID": "-k1,1d",
+        "chr1": "-k2,2V",
+        "pos1": "-k3,3n",
+        "chr2": "-k4,4V",
+        "pos2": "-k5,5n",
+        "strand1": "-k6,6d",
+        "strand2": "-k7,7d",
+        "frag1": "-k8,8n",
+        "frag2": "-k9,9n",
+    }
+
+    # transform column names to corresponding sort keys
+    try:
+        sort_keys = map(lambda k: key_map[k], keys)
+    except KeyError:
+        print("Unkown column name.")
+        raise
+    # Rewrite header with new sorting order
+    header = get_pairs_header(in_file)
+    with open(out_file, "w") as output:
+        for line in header:
+            if line.startswith("#sorted"):
+                output.write("#sorted: {0}\n".format("-".join(keys)))
+            else:
+                output.write(line + "\n")
+
+    # Sort pairs and append to file.
+    with open(out_file, "a") as output:
+        grep_proc = sp.Popen(["grep", "-v", "^#", in_file], stdout=sp.PIPE)
+        sort_cmd = ["sort", "-S %s" % buffer] + list(sort_keys)
+        if tmp_dir is not None:
+            sort_cmd.append("--temporary-directory={0}".format(tmp_dir))
+        if parallel_ok:
+            sort_cmd.append("--parallel={0}".format(threads))
+        sort_proc = sp.Popen(sort_cmd, stdin=grep_proc.stdout, stdout=output)
+        sort_proc.communicate()
+
+
+def get_pairs_header(pairs):
+    r"""Retrieves the header of a .pairs file and stores lines into a list.
+    Parameters
+    ----------
+    pairs : str or file object
+        Path to the pairs file.
+    Returns
+    -------
+    header : list of str
+        A list of header lines found, in the same order they appear in pairs.
+    Examples
+    --------
+        >>> import os
+        >>> from tempfile import NamedTemporaryFile
+        >>> p = NamedTemporaryFile('w', delete=False)
+        >>> p.writelines(["## pairs format v1.0\n", "#sorted: chr1-chr2\n", "abcd\n"])
+        >>> p.close()
+        >>> h = get_pairs_header(p.name)
+        >>> for line in h:
+        ...     print([line])
+        ['## pairs format v1.0']
+        ['#sorted: chr1-chr2']
+        >>> os.unlink(p.name)
+    """
+    # Open file if needed
+    with open(pairs, "r") as pairs:
+        # Store header lines into a list
+        header = []
+        line = pairs.readline()
+        while line.startswith("#"):
+            header.append(line.rstrip())
+            line = pairs.readline()
+
+    return header
+
+
+def reorder_fasta(genome, output=None, threshold=100000):
+    """Reorder and trim a fasta file
+    Sort a fasta file by record lengths, optionally trimming the smallest ones.
+    Parameters
+    ----------
+    genome : str, file or pathlib.Path
+        The genome scaffold file (or file handle)
+    output : str, file or pathlib.Path
+        The output file to write to
+    threshold : int, optional
+        The size below which scaffolds are discarded, by default 100000
+    """
+
+    if output is None:
+        output = "{}_renamed.fa".format(genome.split(".")[0])
+
+    handle = SeqIO.parse(genome, "fasta")
+    handle_to_write = sorted(
+        (len(u) for u in handle if len(u) > threshold), reverse=True
+    )
+    SeqIO.write(handle_to_write, output, "fasta")
+
+
+def rename_genome(genome, output=None, ambiguous=True):
+    """Rename genome and slugify headers
+    Rename genomes according to a simple naming scheme; this
+    is mainly done to avoid special character weirdness.
+    Parameters
+    ----------
+    genome : file, str or pathlib.Path
+        The input genome to be renamed and slugify.
+    output : file, str or pathlib.Path
+        The output genome to be written into. Default is <base>_renamed.fa,
+        where <base> is genome_in without its extension.
+    ambiguous : bool
+        Whether to retain ambiguous non-N bases, otherwise rename them to Ns.
+        Default is True.
+    """
+
+    if output is None:
+        output = "{}_renamed.fa".format(genome.split(".")[0])
+
+    with open(output, "w") as output_handle:
+        for record in SeqIO.parse(genome, "fasta"):
+
+            # Replace hyphens, tabs and whitespace with underscores
+            new_record_id = record.id.replace(" ", "_")
+            new_record_id = new_record_id.replace("-", "_")
+            new_record_id = new_record_id.replace("\t", "_")
+
+            # Remove anything that's weird, i.e. not alphanumeric
+            # or an underscore
+            new_record_id = re.sub("[^_A-Za-z0-9]+", "", new_record_id)
+            header = ">{}\n".format(new_record_id)
+
+            new_seq = re.sub("[^ATGCatgcNn]", "N", str(record.seq))
+
+            output_handle.write(header)
+            output_handle.write("{}\n".format(new_seq))
+
+
+def check_fasta_index(ref, mode="bowtie2"):
+    """
+    Checks for the existence of a bowtie2 or bwa index based on the reference
+    file name.
+    Parameters
+    ----------
+    ref : str
+        Path to the reference genome.
+    mode : str
+        The alignment software used to build the index. bowtie2 or bwa. If any
+        other value is given, the function returns the reference path.
+    Returns
+    -------
+    index : str
+        The bowtie2 or bwa index basename. None if no index was found
+    """
+    ref = pathlib.Path(ref)
+    if mode == "bowtie2":
+        # Bowtie2 should have 6 index files
+        bt2_idx_files = list(ref.parent.glob("{}*bt2*".format(ref.name)))
+        index = None if len(bt2_idx_files) < 6 else bt2_idx_files
+    elif mode == "bwa":
+        refdir = str(ref.parent)
+        refdir_files = os.listdir(refdir)
+        bwa_idx_files = [
+            join(refdir, f)
+            for f in refdir_files
+            if re.search(r".*\.(sa|pac|bwt|ann|amb)$", f)
+        ]
+        index = None if len(bwa_idx_files) < 5 else bwa_idx_files
+    else:
+        index = [ref]
+    if index is not None:
+        # Convert the PosixPath objects to strings and get the longest common prefix to obtain
+        # index basename (without the dot)
+        index = os.path.commonprefix(list(map(str, index))).strip(".")
+    return index
+
+
+def check_is_fasta(in_file):
+    """
+    Checks whether input file is in fasta format.
+
+    Parameters
+    ----------
+    in_file : str
+        Path to the input file.
+    Returns
+    -------
+    bool :
+        True if the input is in fasta format, False otherwise
+    """
+    try:
+        with read_compressed(in_file) as handle:
+            fasta = any(SeqIO.parse(handle, "fasta"))
+    except FileNotFoundError:
+        fasta = False
+
+    return fasta
diff --git a/bin/hicstuff_log.py b/bin/hicstuff_log.py
new file mode 100644
index 0000000..171a566
--- /dev/null
+++ b/bin/hicstuff_log.py
@@ -0,0 +1,89 @@
+#!/usr/bin/env python3
+# coding: utf-8
+
+"""Basic logging
+Basic logging setup with three main handlers (stdout, log file and optionally
+texting with the right API). By default the log file is disabled, but can be
+enabled or changed using set_file_handler.
+"""
+
+
+import logging
+import logging.handlers
+import requests
+import json
+
+TEXT_CREDENTIALS_DEFAULT_PATH = "text_credentials.json"
+
+CURRENT_LOG_LEVEL = logging.INFO
+
+
+logging.captureWarnings(True)
+
+logger = logging.getLogger()
+logger.setLevel(CURRENT_LOG_LEVEL)
+
+logfile_formatter = logging.Formatter(
+    "%(asctime)s :: %(levelname)s :: %(message)s", datefmt="%Y-%m-%d,%H:%M:%S"
+)
+stdout_formatter = logging.Formatter("%(levelname)s :: %(message)s")
+text_formatter = logging.Formatter("%(message)s")
+
+# file_handler = logging.FileHandler("hicstuff.log", "a")
+
+# file_handler.setLevel(logging.INFO)
+# file_handler.setFormatter(logfile_formatter)
+# logger.addHandler(file_handler)
+
+stream_handler = logging.StreamHandler()
+stream_handler.setLevel(logging.DEBUG)
+stream_handler.setFormatter(stdout_formatter)
+logger.addHandler(stream_handler)
+
+
+def set_file_handler(log_path, formatter=logfile_formatter):
+    """Change the file handler for custom log file location"""
+
+    filehandler = logging.FileHandler(log_path, "a")
+    filehandler.setLevel(logging.INFO)
+    filehandler.setFormatter(formatter)
+    for hdlr in logger.handlers[:]:  # remove the existing file handlers
+        if isinstance(hdlr, logging.FileHandler):
+            logger.removeHandler(hdlr)
+    logger.addHandler(filehandler)  # set the new handler
+
+
+def setup_text_logging(credentials=TEXT_CREDENTIALS_DEFAULT_PATH):
+    """Setup text logging
+    Setup text notifications on errors with a basic API. In order for it to
+    work, a file named 'text_credentials.json' must be in the directory where
+    a command is run. The logger performs a GET request to a text notification
+    API.
+    Parameters
+    ----------
+    credentials : str or pathlib.Path
+        A JSON file with necessary credentials for the GET request
+    """
+
+    with open(credentials) as cred_handle:
+        cred_dict = json.load(cred_handle)
+
+    api_service = cred_dict.pop("api_service")
+
+    class TextHandler(logging.Handler):
+        def emit(self, record):
+            log_entry = self.format(record)
+            cred_dict["msg"] = log_entry
+            return requests.get(api_service, cred_dict)
+
+    sms_handler = TextHandler()
+    sms_handler.setLevel(logging.ERROR)
+    sms_handler.setFormatter(text_formatter)
+
+    logger.addHandler(sms_handler)
+
+
+try:
+    setup_text_logging(TEXT_CREDENTIALS_DEFAULT_PATH)
+except FileNotFoundError:
+    pass
diff --git a/conf/hicstuff.config b/conf/hicstuff.config
index 6e20d06..f26ae40 100644
--- a/conf/hicstuff.config
+++ b/conf/hicstuff.config
@@ -9,6 +9,37 @@ process {
     withName: 'BOWTIE2_ALIGNMENT' {
         ext.prefix = { "${meta.id}_${meta.chunk}_${meta.mates}" }
     }
+
+    withName: 'FRAGMENT_ENZYME' {
+        publishDir = [
+            path: { "${params.outdir}/hicstuff/fragment_enzyme" },
+            mode: 'copy'
+        ]
+    }
+
+    //**********************************************
+    // PREPARE_GENOME
+    withName: 'BOWTIE2_BUILD' {
+        publishDir = [
+            path: { "${params.outdir}/genome/bowtie2" },
+            mode: 'copy'
+        ]
+    }
+
+    withName: 'CUSTOM_GETCHROMSIZES' {
+        publishDir = [
+            path: { "${params.outdir}/genome" },
+            mode: 'copy'
+        ]
+    }
+
+    withName: 'GET_RESTRICTION_FRAGMENTS' {
+        publishDir = [
+            path: { "${params.outdir}/genome" },
+            mode: 'copy'
+        ]
+    }
+
 }
 
 params {
diff --git a/modules/local/hicstuff/fragment_enzyme.nf b/modules/local/hicstuff/fragment_enzyme.nf
new file mode 100644
index 0000000..dafa3cf
--- /dev/null
+++ b/modules/local/hicstuff/fragment_enzyme.nf
@@ -0,0 +1,21 @@
+process FRAGMENT_ENZYME {
+    tag "$digestion"
+    label "process_single"
+
+    conda "conda-forge::python=3.9 conda-forge::biopython=1.80 conda-forge::numpy=1.22.3 conda-forge::matplotlib=3.6.3 conda-forge::pandas=1.5.3"
+    container = "lbmc/hicstuff:3.1.3"
+
+    input:
+    val(digestion)
+    tuple val(meta), path(fasta)
+
+    output:
+    tuple val(meta), path("info_contigs.txt") , emit: info_contigs
+    tuple val(meta), path("fragments_list.txt"), emit: fragments_list
+
+    script:
+    """
+    hicstuff_fragments.py -e ${digestion} -i ${fasta}
+    """
+
+}
diff --git a/subworkflows/local/hicstuff_sub.nf b/subworkflows/local/hicstuff_sub.nf
index dfb160b..99516b3 100644
--- a/subworkflows/local/hicstuff_sub.nf
+++ b/subworkflows/local/hicstuff_sub.nf
@@ -1,5 +1,6 @@
 
 include { BOWTIE2_ALIGNMENT } from '../../modules/local/hicstuff/align_bowtie2'
+include { FRAGMENT_ENZYME } from '../../modules/local/hicstuff/fragment_enzyme'
 
 // Paired-end to Single-end
 def pairToSingle(row, mates) {
@@ -27,6 +28,8 @@ workflow HICSTUFF_SUB {
     reads
     index
     ligation_site
+    digestion
+    fasta
 
     main:
 
@@ -39,4 +42,13 @@ workflow HICSTUFF_SUB {
         ch_reads,
         index.collect()
     )
+
+    FRAGMENT_ENZYME(
+        digestion,
+        fasta
+    )
+
+    emit:
+    info_contigs = FRAGMENT_ENZYME.out.info_contigs
+    fragments_list = FRAGMENT_ENZYME.out.fragments_list
 }
diff --git a/workflows/hicstuff.nf b/workflows/hicstuff.nf
index 0a760cf..4a69b68 100644
--- a/workflows/hicstuff.nf
+++ b/workflows/hicstuff.nf
@@ -6,9 +6,9 @@ if (params.input) { ch_input = file(params.input) } else { exit 1, 'Input sample
 // Digestion parameters
 if (params.digestion){
     restriction_site = params.digestion ? params.digest[ params.digestion ].restriction_site ?: false : false
-    ch_restriction_site = Channel.value(restriction_site)
+    ch_restriction_site = Channel.value(restriction_site) //str seq avec ^ pour cut
     ligation_site = params.digestion ? params.digest[ params.digestion ].ligation_site ?: false : false
-    ch_ligation_site = Channel.value(ligation_site)
+    ch_ligation_site = Channel.value(ligation_site) //str seq
 }else if (params.dnase){
     ch_restriction_site = Channel.empty()
     ch_ligation_site = Channel.empty()
@@ -37,6 +37,8 @@ workflow HICSTUFF {
     HICSTUFF_SUB(
         INPUT_CHECK.out.reads,
         PREPARE_GENOME.out.index,
-        ch_ligation_site
+        ch_ligation_site,
+        params.digestion, //str enzyme
+        ch_fasta
     )
 }
-- 
GitLab