diff --git a/bin/__init__.py b/bin/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..813219d380e222f851c6ca9a74da54c87290acff
--- /dev/null
+++ b/bin/__init__.py
@@ -0,0 +1 @@
+from .bin import *
diff --git a/bin/hicstuff_bam2pairs.py b/bin/hicstuff_bam2pairs.py
new file mode 100755
index 0000000000000000000000000000000000000000..6e3cfeb38e2ed81be2f71a14d7da87a08f4f19cd
--- /dev/null
+++ b/bin/hicstuff_bam2pairs.py
@@ -0,0 +1,192 @@
+#!/usr/bin/env python
+
+import os, time, csv, sys, re
+import argparse
+import pysam as ps
+import pandas as pd
+import itertools
+from hicstuff_log import logger
+import hicstuff_io as hio
+import hicstuff_digest as hcd
+from Bio import SeqIO
+
+
+
+def bam2pairs(bam1, bam2, out_pairs, info_contigs, min_qual=30):
+    """
+    Make a .pairs file from two Hi-C bam files sorted by read names.
+    The Hi-C mates are matched by read identifier. Pairs where at least one
+    reads maps with MAPQ below  min_qual threshold are discarded. Pairs are
+    sorted by readID and stored in upper triangle (first pair higher).
+    Parameters
+    ----------
+    bam1 : str
+        Path to the name-sorted BAM file with aligned Hi-C forward reads.
+    bam2 : str
+        Path to the name-sorted BAM file with aligned Hi-C reverse reads.
+    out_pairs : str
+        Path to the output space-separated .pairs file with columns
+        readID, chr1 pos1 chr2 pos2 strand1 strand2
+    info_contigs : str
+        Path to the info contigs file, to get info on chromosome sizes and order.
+    min_qual : int
+        Minimum mapping quality required to keep a Hi-C pair.
+    """
+    forward = ps.AlignmentFile(bam1, "rb")
+    reverse = ps.AlignmentFile(bam2, "rb")
+
+    # Generate header lines
+    format_version = "## pairs format v1.0\n"
+    sorting = "#sorted: readID\n"
+    cols = "#columns: readID chr1 pos1 chr2 pos2 strand1 strand2\n"
+    # Chromosome order will be identical in info_contigs and pair files
+    chroms = pd.read_csv(info_contigs, sep="\t").apply(
+        lambda x: "#chromsize: %s %d\n" % (x.contig, x.length), axis=1
+    )
+    with open(out_pairs, "w") as pairs:
+        pairs.writelines([format_version, sorting, cols] + chroms.tolist())
+        pairs_writer = csv.writer(pairs, delimiter="\t")
+        n_reads = {"total": 0, "mapped": 0}
+        # Remember if some read IDs were missing from either file
+        unmatched_reads = 0
+        # Remember if all reads in one bam file have been read
+        exhausted = [False, False]
+        # Iterate on both BAM simultaneously
+        end_regex = re.compile(r'/[12]$')
+        for end1, end2 in itertools.zip_longest(forward, reverse):
+            # Remove end-specific suffix if any
+            end1.query_name = re.sub(end_regex, '', end1.query_name)
+            end2.query_name = re.sub(end_regex, '', end2.query_name)
+            # Both file still have reads
+            # Check if reads pass filter
+            try:
+                end1_passed = end1.mapping_quality >= min_qual
+            # Happens if end1 bam file has been exhausted
+            except AttributeError:
+                exhausted[0] = True
+                end1_passed = False
+            try:
+                end2_passed = end2.mapping_quality >= min_qual
+            # Happens if end2 bam file has been exhausted
+            except AttributeError:
+                exhausted[1] = True
+                end2_passed = False
+            # Skip read if mate is not present until they match or reads
+            # have been exhausted
+            while sum(exhausted) == 0 and end1.query_name != end2.query_name:
+                # Get next read and check filters again
+                # Count single-read iteration
+                unmatched_reads += 1
+                n_reads["total"] += 1
+                if end1.query_name < end2.query_name:
+                    try:
+                        end1 = next(forward)
+                        end1_passed = end1.mapping_quality >= min_qual
+                    # If EOF is reached in BAM 1
+                    except (StopIteration, AttributeError):
+                        exhausted[0] = True
+                        end1_passed = False
+                    n_reads["mapped"] += end1_passed
+                elif end1.query_name > end2.query_name:
+                    try:
+                        end2 = next(reverse)
+                        end2_passed = end2.mapping_quality >= min_qual
+                    # If EOF is reached in BAM 2
+                    except (StopIteration, AttributeError):
+                        exhausted[1] = True
+                        end2_passed = False
+                    n_reads["mapped"] += end2_passed
+
+            # 2 reads processed per iteration, unless one file is exhausted
+            n_reads["total"] += 2 - sum(exhausted)
+            n_reads["mapped"] += sum([end1_passed, end2_passed])
+            # Keep only pairs where both reads have good quality
+            if end1_passed and end2_passed:
+
+                # Flipping to get upper triangle
+                if (
+                    end1.reference_id == end2.reference_id
+                    and end1.reference_start > end2.reference_start
+                ) or end1.reference_id > end2.reference_id:
+                    end1, end2 = end2, end1
+                pairs_writer.writerow(
+                    [
+                        end1.query_name,
+                        end1.reference_name,
+                        end1.reference_start + 1,
+                        end2.reference_name,
+                        end2.reference_start + 1,
+                        "-" if end1.is_reverse else "+",
+                        "-" if end2.is_reverse else "+",
+                    ]
+                )
+    pairs.close()
+    if unmatched_reads > 0:
+        logger.warning(
+            "%d reads were only present in one BAM file. Make sure you sorted reads by name before running the pipeline.",
+            unmatched_reads,
+        )
+    logger.info(
+        "{perc_map}% reads (single ends) mapped with Q >= {qual} ({mapped}/{total})".format(
+            total=n_reads["total"],
+            mapped=n_reads["mapped"],
+            perc_map=round(100 * n_reads["mapped"] / n_reads["total"]),
+            qual=min_qual,
+        )
+    )
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("-b1", "--bam1")
+    parser.add_argument("-b2", "--bam2")
+    parser.add_argument("-o", "--out_pairs")
+    parser.add_argument("-x", "--out_idx")
+    parser.add_argument("-i", "--info_contigs")
+    parser.add_argument("-q", "--min_qual")
+    parser.add_argument("-e", "--enzyme")
+    parser.add_argument("-f", "--fasta")
+    parser.add_argument("-c", "--circular")
+    args = parser.parse_args()
+
+    bam1 = args.bam1
+    bam2 = args.bam2
+    out_pairs = args.out_pairs
+    out_idx = args.out_idx
+    info_contigs = args.info_contigs
+    min_qual = int(args.min_qual)
+    enzyme = args.enzyme
+    fasta = args.fasta
+    circular = args.circular
+
+    #hicstuff case sensitive enzymes adaptation
+    if enzyme == "hindiii":
+        enzyme = "HindIII"
+    elif enzyme == "dpnii":
+        enzyme = "DpnII"
+    elif enzyme == "bglii":
+        enzyme = "BglII"
+    elif enzyme == "mboi":
+        enzyme = "MboI"
+    elif enzyme == "arima":
+        enzyme = "Arima"
+
+    bam2pairs(bam1, bam2, out_pairs, info_contigs, min_qual)
+
+    restrict_table = {}
+    for record in SeqIO.parse(hio.read_compressed(fasta), "fasta"):
+        # Get chromosome restriction table
+        restrict_table[record.id] = hcd.get_restriction_table(
+            record.seq, enzyme, circular=circular
+        )
+
+    hcd.attribute_fragments(out_pairs, out_idx, restrict_table)
+
+    hio.sort_pairs(
+        out_idx,
+        out_idx + ".sorted",
+        keys=["chr1", "pos1", "chr2", "pos2"],
+        threads=1,
+        tmp_dir=None,
+    )
+    os.rename(out_idx + ".sorted", out_idx)
diff --git a/bin/hicstuff_build_matrix.py b/bin/hicstuff_build_matrix.py
new file mode 100755
index 0000000000000000000000000000000000000000..bd68ee0f5c6a7b839ee4537240019e0147cbb080
--- /dev/null
+++ b/bin/hicstuff_build_matrix.py
@@ -0,0 +1,176 @@
+#!/usr/bin/env python
+
+import os, time, csv, sys, re
+import argparse
+import pysam as ps
+import pandas as pd
+import shutil as st
+import subprocess as sp
+import itertools
+from hicstuff_log import logger
+import hicstuff_io as hio
+import hicstuff_digest as hcd
+from Bio import SeqIO
+
+
+def pairs2matrix(
+    pairs_file, mat_file, fragments_file, mat_fmt="graal", threads=1, tmp_dir=None
+):
+    """Generate the matrix by counting the number of occurences of each
+    combination of restriction fragments in a pairs file.
+    Parameters
+    ----------
+    pairs_file : str
+        Path to a Hi-C pairs file, with frag1 and frag2 columns added.
+    mat_file : str
+        Path where the matrix will be written.
+    fragments_file : str
+        Path to the fragments_list.txt file. Used to know total
+        matrix size in case some observations are not observed at the end.
+    mat_fmt : str
+        The format to use when writing the matrix. Can be graal or bg2 format.
+    threads : int
+        Number of threads to use in parallel.
+    tmp_dir : str
+        Temporary directory for sorting files. If None given, will use the system default.
+    """
+    # Number of fragments is N lines in frag list - 1 for the header
+    n_frags = sum(1 for line in open(fragments_file, "r")) - 1
+    frags = pd.read_csv(fragments_file, delimiter="\t")
+
+    def write_mat_entry(frag1, frag2, contacts):
+        """Write a single sparse matrix entry in either graal or bg2 format"""
+        if mat_fmt == "graal":
+            mat.write("\t".join(map(str, [frag1, frag2, n_occ])) + "\n")
+        elif mat_fmt == "bg2":
+            frag1, frag2 = int(frag1), int(frag2)
+            mat.write(
+                "\t".join(
+                    map(
+                        str,
+                        [
+                            frags.chrom[frag1],
+                            frags.start_pos[frag1],
+                            frags.end_pos[frag1],
+                            frags.chrom[frag2],
+                            frags.start_pos[frag2],
+                            frags.end_pos[frag2],
+                            contacts,
+                        ],
+                    )
+                )
+                + "\n"
+            )
+
+    pre_mat_file = mat_file + ".pre.pairs"
+    hio.sort_pairs(
+        pairs_file,
+        pre_mat_file,
+        keys=["frag1", "frag2"],
+        threads=threads,
+        tmp_dir=tmp_dir,
+    )
+    header_size = len(hio.get_pairs_header(pre_mat_file))
+    with open(pre_mat_file, "r") as pairs, open(mat_file, "w") as mat:
+        # Skip header lines
+        for _ in range(header_size):
+            next(pairs)
+        prev_pair = ["0", "0"]  # Pairs identified by [frag1, frag2]
+        n_occ = 0  # Number of occurences of each frag combination
+        n_nonzero = 0  # Total number of nonzero matrix entries
+        n_pairs = 0  # Total number of pairs entered in the matrix
+        pairs_reader = csv.reader(pairs, delimiter="\t")
+        # First line contains nrows, ncols and number of nonzero entries.
+        # Number of nonzero entries is unknown for now
+        if mat_fmt == "graal":
+            mat.write("\t".join(map(str, [n_frags, n_frags, "-"])) + "\n")
+        for pair in pairs_reader:
+            # Fragment ids are field 8 and 9
+            curr_pair = [pair[7], pair[8]]
+            # Increment number of occurences for fragment pair
+            if prev_pair == curr_pair:
+                n_occ += 1
+            # Write previous pair and start a new one
+            else:
+                if n_occ > 0:
+                    write_mat_entry(prev_pair[0], prev_pair[1], n_occ)
+                prev_pair = curr_pair
+                n_pairs += n_occ
+                n_occ = 1
+                n_nonzero += 1
+        # Write the last value
+        write_mat_entry(curr_pair[0], curr_pair[1], n_occ)
+        n_nonzero += 1
+        n_pairs += 1
+
+    # Edit header line to fill number of nonzero entries inplace in graal header
+    if mat_fmt == "graal":
+        with open(mat_file) as mat, open(pre_mat_file, "w") as tmp_mat:
+            header = mat.readline()
+            header = header.replace("-", str(n_nonzero))
+            tmp_mat.write(header)
+            st.copyfileobj(mat, tmp_mat)
+        # Replace the matrix file with the one with corrected header
+        os.rename(pre_mat_file, mat_file)
+    else:
+        os.remove(pre_mat_file)
+
+    logger.info(
+        "%d pairs used to build a contact map of %d bins with %d nonzero entries.",
+        n_pairs,
+        n_frags,
+        n_nonzero,
+    )
+
+
+def pairs2cool(pairs_file, cool_file, bins_file):
+    """
+    Make a cooler file from the pairs file. See: https://github.com/mirnylab/cooler/ for more informations.
+
+    Parameters
+    ----------
+    pairs_file : str
+        Path to the pairs file containing input contact data.
+    cool_file : str
+        Path to the output cool file name to generate.
+    bins_file : str
+        Path to the file containing genomic segmentation information. (fragments_list.txt).
+    """
+
+    # Make bins file compatible with cooler cload
+    bins_tmp = bins_file + ".cooler"
+    bins = pd.read_csv(bins_file, sep="\t", usecols=[1, 2, 3], skiprows=1, header=None)
+    bins.to_csv(bins_tmp, sep="\t", header=False, index=False)
+
+    cooler_cmd = "cooler cload pairs -c1 2 -p1 3 -p2 4 -c2 5 {bins} {pairs} {cool}"
+    cool_args = {"bins": bins_tmp, "pairs": pairs_file, "cool": cool_file}
+    sp.call(cooler_cmd.format(**cool_args), shell=True)
+    os.remove(bins_tmp)
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("-p", "--pairs")
+    parser.add_argument("-f", "--fragments")
+    parser.add_argument("-t", "--type")
+    parser.add_argument("-o", "--output")
+    args = parser.parse_args()
+
+    pairs = args.pairs
+    fragments_list = args.fragments
+    mat_format = args.type
+    mat_out = args.output
+
+    if mat_format == "cool":
+        # Name matrix file in .cool
+        cool_file = os.path.splitext(mat_out)[0] + ".cool"
+        pairs2cool(pairs, cool_file, fragments_list)
+    else:
+        pairs2matrix(
+            pairs,
+            mat_out,
+            fragments_list,
+            mat_fmt=mat_format,
+            threads=1,
+            tmp_dir=None,
+        )
diff --git a/bin/hicstuff_digest.py b/bin/hicstuff_digest.py
new file mode 100644
index 0000000000000000000000000000000000000000..3fba3da20ddfd4d500c00cc5b01141b8f355f410
--- /dev/null
+++ b/bin/hicstuff_digest.py
@@ -0,0 +1,489 @@
+#!/usr/bin/env python3
+# coding: utf-8
+
+"""Genome digestion
+Functions used to write auxiliary instagraal compatible
+sparse matrices.
+"""
+
+from Bio import SeqIO, SeqUtils
+from Bio.Seq import Seq
+from Bio.Restriction import RestrictionBatch, Analysis
+import os, sys, csv
+import re
+import collections
+import copy
+import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
+from hicstuff_log import logger
+import hicstuff_io as hio
+
+DEFAULT_FRAGMENTS_LIST_FILE_NAME = "fragments_list.txt"
+DEFAULT_INFO_CONTIGS_FILE_NAME = "info_contigs.txt"
+DEFAULT_SPARSE_MATRIX_FILE_NAME = "abs_fragments_contacts_weighted.txt"
+DEFAULT_KB_BINNING = 1
+DEFAULT_THRESHOLD_SIZE = 0
+# Most used enzyme for eukaryotes
+DEFAULT_ENZYME = "DpnII"
+# If using evenly-sized chunks instead of restriction
+# enzymes, they shouldn't be too short
+DEFAULT_MIN_CHUNK_SIZE = 50
+
+
+def write_frag_info(
+    fasta,
+    enzyme,
+    min_size=DEFAULT_THRESHOLD_SIZE,
+    circular=False,
+    output_contigs=DEFAULT_INFO_CONTIGS_FILE_NAME,
+    output_frags=DEFAULT_FRAGMENTS_LIST_FILE_NAME,
+    output_dir=None,
+):
+    """Digest and write fragment information
+    Write the fragments_list.txt and info_contigs.txt that are necessary for
+    instagraal to run.
+    Parameters
+    ----------
+    fasta : pathlib.Path or str
+        The path to the reference genome
+    enzyme : str, int or list of str
+        If a string, must be the name of an enzyme (e.g. DpnII) and the genome
+        will be cut at the enzyme's restriction sites. If a number, the genome
+        will be cut uniformly into chunks with length equal to that number. A
+        list of enzymes can also be specified if using multiple enzymes.
+    min_size : float, optional
+        Size below which shorter contigs are discarded. Default is 0, i.e. all
+        contigs are retained.
+    circular : bool, optional
+        Whether the genome is circular. Default is False.
+    output_contigs : str, optional
+        The name of the file with contig info. Default is info_contigs.txt
+    output_frags : str, optional
+        The name of the file with fragment info. Default is fragments_list.txt
+    output_dir : [type], optional
+        The path to the output directory, which will be created if not already
+        existing. Default is the current directory.
+    """
+
+    records = SeqIO.parse(hio.read_compressed(fasta), "fasta")
+
+    try:
+        info_contigs_path = os.path.join(output_dir, output_contigs)
+        frag_list_path = os.path.join(output_dir, output_frags)
+    except TypeError:
+        info_contigs_path = output_contigs
+        frag_list_path = output_frags
+
+    with open(info_contigs_path, "w") as info_contigs:
+
+        info_contigs.write("contig\tlength\tn_frags\tcumul_length\n")
+
+        with open(frag_list_path, "w") as fragments_list:
+
+            fragments_list.write(
+                "id\tchrom\tstart_pos" "\tend_pos\tsize\tgc_content\n"
+            )
+
+            total_frags = 0
+
+            for record in records:
+                contig_seq = record.seq
+                contig_name = record.id
+                contig_length = len(contig_seq)
+                if contig_length < int(min_size):
+                    continue
+
+                sites = get_restriction_table(
+                    contig_seq, enzyme, circular=circular
+                )
+                fragments = (
+                    contig_seq[sites[i] : sites[i + 1]]
+                    for i in range(len(sites) - 1)
+                )
+                n_frags = 0
+
+                current_id = 1
+                start_pos = 0
+                for frag in fragments:
+                    frag_length = len(frag)
+                    if frag_length > 0:
+                        end_pos = start_pos + frag_length
+                        gc_content = SeqUtils.GC(frag) / 100.0
+
+                        current_fragment_line = "%s\t%s\t%s\t%s\t%s\t%s\n" % (
+                            current_id,
+                            contig_name,
+                            start_pos,
+                            end_pos,
+                            frag_length,
+                            gc_content,
+                        )
+
+                        fragments_list.write(current_fragment_line)
+
+                        try:
+                            assert (current_id == 1 and start_pos == 0) or (
+                                current_id > 1 and start_pos > 0
+                            )
+                        except AssertionError:
+                            logger.error((current_id, start_pos))
+                            raise
+                        start_pos = end_pos
+                        current_id += 1
+                        n_frags += 1
+
+                current_contig_line = "%s\t%s\t%s\t%s\n" % (
+                    contig_name,
+                    contig_length,
+                    n_frags,
+                    total_frags,
+                )
+                total_frags += n_frags
+                info_contigs.write(current_contig_line)
+
+
+def attribute_fragments(pairs_file, idx_pairs_file, restriction_table):
+    """
+    Writes the indexed pairs file, which has two more columns than the input
+    pairs file corresponding to the restriction fragment index of each read.
+    Note that pairs files have 1bp point positions whereas restriction table
+    has 0bp point poisitions.
+    Parameters
+    ----------
+    pairs_file: str
+        Path the the input pairs file. Consists of 7 tab-separated
+        columns: readID, chr1, pos1, chr2, pos2, strand1, strand2
+    idx_pairs_file: str
+        Path to the output indexed pairs file. Consists of 9 white space
+        separated columns: readID, chr1, pos1, chr2, pos2, strand1, strand2,
+        frag1, frag2. frag1 and frag2 are 0-based restriction fragments based
+        on whole genome.
+    restriction_table: dict
+        Dictionary with chromosome identifiers (str) as keys and list of
+        positions (int) of restriction sites as values.
+    """
+
+    # NOTE: Bottlenecks here are 1. binary search in find_frag and 2. writerow
+    # 1. could be reduced by searching groups of N frags in parallel and 2. by
+    # writing N frags simultaneously using a single call of writerows.
+
+    # Parse and update header section
+    pairs_header = hio.get_pairs_header(pairs_file)
+    header_size = len(pairs_header)
+    chrom_order = []
+    with open(idx_pairs_file, "w") as idx_pairs:
+        for line in pairs_header:
+            # Add new column names to header
+            if line.startswith("#columns"):
+                line = line.rstrip() + " frag1 frag2"
+            if line.startswith("#chromsize"):
+                chrom_order.append(line.split()[1])
+            idx_pairs.write(line + "\n")
+
+    # Get number of fragments per chrom to allow genome-based indices
+    shift_frags = {}
+    prev_frags = 0
+    for rank, chrom in enumerate(chrom_order):
+        if rank > 0:
+            # Note the "-1" because there are nfrags + 1 sites in rest table
+            prev_frags += len(restriction_table[chrom_order[rank - 1]]) - 1
+        # Idx of each chrom's frags will be shifted by n frags in previous chroms
+        shift_frags[chrom] = prev_frags
+
+    missing_contigs = set()
+    # Attribute pairs to fragments and append them to output file (after header)
+    with open(pairs_file, "r") as pairs, open(
+        idx_pairs_file, "a"
+    ) as idx_pairs:
+        # Skip header lines
+        for _ in range(header_size):
+            next(pairs)
+
+        # Define input and output fields
+        pairs_cols = [
+            "readID",
+            "chr1",
+            "pos1",
+            "chr2",
+            "pos2",
+            "strand1",
+            "strand2",
+        ]
+        idx_cols = pairs_cols + ["frag1", "frag2"]
+
+        # Use csv reader / writer to automatically parse columns into a dict
+        pairs_reader = csv.DictReader(
+            pairs, fieldnames=pairs_cols, delimiter="\t"
+        )
+        pairs_writer = csv.DictWriter(
+            idx_pairs, fieldnames=idx_cols, delimiter="\t"
+        )
+
+        for pair in pairs_reader:
+            # Get the 0-based indices of corresponding restriction fragments
+            # Deducing 1 from pair position to get it into 0bp point
+            pair["frag1"] = find_frag(
+                int(pair["pos1"]) - 1, restriction_table[pair["chr1"]]
+            )
+            pair["frag2"] = find_frag(
+                int(pair["pos2"]) - 1, restriction_table[pair["chr2"]]
+            )
+            # Shift fragment indices to make them genome-based instead of
+            # chromosome-based
+            try:
+                pair["frag1"] += shift_frags[pair["chr1"]]
+            except KeyError:
+                missing_contigs.add(pair["chr1"])
+            try:
+                pair["frag2"] += shift_frags[pair["chr2"]]
+            except KeyError:
+                missing_contigs.add(pair["chr2"])
+
+            # Write indexed pairs in the new file
+            pairs_writer.writerow(pair)
+
+        if missing_contigs:
+            logger.warning(
+                "Pairs on the following contigs were discarded as "
+                "those contigs are not listed in the paris file header. "
+                "This is normal if you filtered out small contigs: %s"
+                % " ".join(list(missing_contigs))
+            )
+
+
+def get_restriction_table(seq, enzyme, circular=False):
+    """
+    Get the restriction table for a single genomic sequence.
+    Parameters
+    ----------
+    seq : Seq object
+        A biopython Seq object representing a chromosomes or contig.
+    enzyme : int, str or list of str
+        The name of the restriction enzyme used, or a list of restriction
+        enzyme names. Can also be an integer, to digest by fixed chunk size.
+    circular : bool
+        Wether the genome is circular.
+    Returns
+    -------
+    numpy.array:
+        List of restriction fragment boundary positions for the input sequence.
+    >>> from Bio.Seq import Seq
+    >>> get_restriction_table(Seq("AAGCCGGATCGG"),"HpaII")
+    array([ 0,  4, 12])
+    >>> get_restriction_table(Seq("AA"),["HpaII", "MluCI"])
+    array([0, 2])
+    >>> get_restriction_table(Seq("AA"),"aeiou1")
+    Traceback (most recent call last):
+        ...
+    ValueError: aeiou1 is not a valid restriction enzyme.
+    >>> get_restriction_table("AA","HpaII")
+    Traceback (most recent call last):
+        ...
+    TypeError: Expected Seq or MutableSeq instance, got <class 'str'> instead
+    """
+    chrom_len = len(seq)
+    wrong_enzyme = "{} is not a valid restriction enzyme.".format(enzyme)
+    # Restriction batch containing the restriction enzyme
+    try:
+        enz = [enzyme] if isinstance(enzyme, str) else enzyme
+        cutter = RestrictionBatch(enz)
+    except (TypeError, ValueError):
+        try:
+            cutter = max(int(enzyme), DEFAULT_MIN_CHUNK_SIZE)
+        except ValueError:
+            raise ValueError(wrong_enzyme)
+
+    # Conversion from string type to restriction type
+    if isinstance(cutter, int):
+        sites = [i for i in range(0, chrom_len, cutter)]
+        if sites[-1] < chrom_len:
+            sites.append(chrom_len)
+    else:
+        # Find sites of all restriction enzymes given
+        ana = Analysis(cutter, seq, linear=not circular)
+        sites = ana.full()
+        # Gets all sites into a single flat list with 0-based index
+        sites = [site - 1 for enz in sites.values() for site in enz]
+        # Sort by position and allow first add start and end of seq
+        sites.sort()
+        sites.insert(0, 0)
+        sites.append(chrom_len)
+
+    return np.array(sites)
+
+
+def find_frag(pos, r_sites):
+    """
+    Use binary search to find the index of a chromosome restriction fragment
+    corresponding to an input genomic position.
+    Parameters
+    ----------
+    pos : int
+        Genomic position, in base pairs.
+    r_sites : list
+        List of genomic positions corresponding to restriction sites.
+    Returns
+    -------
+    int
+        The 0-based index of the restriction fragment to which the position belongs.
+    >>> find_frag(15, [0, 20, 30])
+    0
+    >>> find_frag(15, [10, 20, 30])
+    Traceback (most recent call last):
+        ...
+    ValueError: The first position in the restriction table is not 0.
+    >>> find_frag(31, [0, 20, 30])
+    Traceback (most recent call last):
+        ...
+    ValueError: Read position is larger than last entry in restriction table.
+    """
+    if r_sites[0] != 0:
+        raise ValueError(
+            "The first position in the restriction table is not 0."
+        )
+    if pos > r_sites[-1]:
+        raise ValueError(
+            "Read position is larger than last entry in restriction table."
+        )
+    # binary search for the index of the read
+    index = max(np.searchsorted(r_sites, pos, side="right") - 1, 0)
+    # Last site = end of the chrom, index of last fragment is last site - 1
+    index = min(len(r_sites) - 2, index)
+
+    return index
+
+
+def frag_len(
+    frags_file_name=DEFAULT_FRAGMENTS_LIST_FILE_NAME,
+    output_dir=None,
+    plot=False,
+    fig_path=None,
+):
+    """
+    logs summary statistics of fragment length distribution based on an
+    input fragment file. Can optionally show a histogram instead
+    of text summary.
+    Parameters
+    ----------
+    frags_file_name : str
+        Path to the output list of fragments.
+    output_dir : str
+        Directory where the list should be saved.
+    plot : bool
+        Wether a histogram of fragment length should be shown.
+    fig_path : str
+        If a path is given, the figure will be saved instead of shown.
+    """
+
+    try:
+        frag_list_path = os.path.join(output_dir, frags_file_name)
+    except TypeError:
+        frag_list_path = frags_file_name
+    frags = pd.read_csv(frag_list_path, sep="\t")
+    nfrags = frags.shape[0]
+    med_len = frags["size"].median()
+    nbins = 40
+    if plot:
+        fig, ax = plt.subplots()
+        _, _, _ = ax.hist(frags["size"], bins=nbins)
+
+        ax.set_xlabel("Fragment length [bp]")
+        ax.set_ylabel("Log10 number of fragments")
+        ax.set_title("Distribution of restriction fragment length")
+        ax.set_yscale("log", base=10)
+        ax.annotate(
+            "Total fragments: {}".format(nfrags),
+            xy=(0.95, 0.95),
+            xycoords="axes fraction",
+            fontsize=12,
+            horizontalalignment="right",
+            verticalalignment="top",
+        )
+        ax.annotate(
+            "Median length: {}".format(med_len),
+            xy=(0.95, 0.90),
+            xycoords="axes fraction",
+            fontsize=12,
+            horizontalalignment="right",
+            verticalalignment="top",
+        )
+        # Tweak spacing to prevent clipping of ylabel
+        fig.tight_layout()
+        if fig_path:
+            plt.savefig(fig_path)
+        else:
+            plt.show()
+        plt.clf()
+    else:
+        logger.info(
+            "Genome digested into {0} fragments with a median "
+            "length of {1}".format(nfrags, med_len)
+        )
+
+
+def gen_enzyme_religation_regex(enzyme):
+    """Return a regex which corresponds to all possible religation sites given a
+    set of enzyme.
+    Parameters:
+    -----------
+    enzyme : str
+        String that contains the names of the enzyme separated by a comma.
+    Returns:
+    --------
+    re.Pattern :
+        Regex that corresponds to all possible ligation sites given a set of
+        enzyme.
+    Examples:
+    ---------
+    >>> gen_enzyme_religation_regex('HpaII')
+    re.compile('CCGCGG')
+    >>> gen_enzyme_religation_regex('HpaII,MluCI')
+    re.compile('AATTAATT|AATTCGG|CCGAATT|CCGCGG')
+    """
+
+    # Split the str on the comma to separate the different enzymes.
+    enzyme = enzyme.split(",")
+
+    # Check on Biopython dictionnary the enzyme.
+    rb = RestrictionBatch(enzyme)
+
+    # Initiation:
+    give_list = []
+    accept_list = []
+    ligation_list = []
+
+    # Iterates on the enzymes.
+    for enz in rb:
+
+        # Extract restriction sites and look for cut sites.
+        site = enz.elucidate()
+        fw_cut = site.find("^")
+        rev_cut = site.find("_")
+
+        # Process "give" site. Remove N on the left (useless).
+        give_site = site[:rev_cut].replace("^", "")
+        while give_site[0] == "N":
+            give_site = give_site[1:]
+        give_list.append(give_site)
+
+        # Process "accept" site. Remove N on the rigth (useless).
+        accept_site = site[fw_cut + 1 :].replace("_", "")
+        while accept_site[-1] == "N":
+            accept_site = accept_site[:-1]
+        accept_list.append(accept_site)
+
+    # Iterates on the two list to build all the possible HiC ligation sites.
+    for give_site in give_list:
+        for accept_site in accept_list:
+            # Replace "N" by "." for regex searching of the sites
+            ligation_list.append((give_site + accept_site).replace("N", "."))
+            ligation_list.append(
+                str(Seq(give_site + accept_site).reverse_complement()).replace(
+                    "N", "."
+                )
+            )
+
+    # Build the regex for any ligation sites.
+    pattern = "|".join(sorted(list(set(ligation_list))))
+    return re.compile(pattern)
diff --git a/bin/hicstuff_distance_law.py b/bin/hicstuff_distance_law.py
new file mode 100755
index 0000000000000000000000000000000000000000..545c3f7e1e0929d070a850ed239aab90d585f8dc
--- /dev/null
+++ b/bin/hicstuff_distance_law.py
@@ -0,0 +1,839 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+import numpy as np
+import sys
+import matplotlib.pyplot as plt
+import warnings
+from scipy import ndimage
+from matplotlib import cm
+import pandas as pd
+import os as os
+import csv as csv
+import hicstuff_io as hio
+from hicstuff_log import logger
+import argparse
+
+def export_distance_law(xs, ps, names, out_file=None):
+    """ Export the x(s) and p(s) from two list of numpy.ndarrays to a table
+    in txt file with three columns separated by a tabulation. The first column
+    contains the x(s), the second the p(s) and the third the name of the arm or
+    chromosome. The file is created in the directory given by outdir or the
+    current file if no file is given.
+    Parameters
+    ----------
+    xs : list of numpy.ndarray
+        The list of the start position of logbins of each p(s) in base pairs.
+    ps : list of numpy.ndarray
+        The list of p(s).
+    names : list of string
+        List containing the names of the chromosomes/arms/conditions of the p(s)
+        values given.
+    out_file : str or None
+        Path where output file should be written. ./distance_law.txt by default.
+    Return
+    ------
+    txt file:
+        File with three columns separated by a tabulation. The first column
+        contains the x(s), the second the p(s) and the third the name of the arm
+        or chromosome. The file is creates in the output file given or the
+        default one if none given.
+    """
+    # ./distance_law.txt as out_file if no out_file is given.
+    if out_file is None:
+        out_file = os.getcwd() + "/distance_law.txt"
+    # Sanity check: as many chromosomes/arms as ps
+    if len(xs) != len(names):
+        logger.error("Number of chromosomes/arms and number of p(s) list differ.")
+        sys.exit(1)
+    # Create the file and write it
+    f = open(out_file, "w")
+    for i in range(len(xs)):
+        for j in range(len(xs[i])):
+            line = (
+                str(format(xs[i][j], "g"))
+                + "\t"
+                + str(format(ps[i][j], "g"))
+                + "\t"
+                + names[i]
+                + "\n"
+            )
+            f.write(line)
+    f.close()
+
+
+def import_distance_law(distance_law_file):
+    """ Import the table created by export_distance_law and return the list of
+    x(s) and p(s) in the order of the chromosomes.
+    Parameters
+    ----------
+    distance_law_file : string
+        Path to the file containing three columns : the x(s), the p(s), and the
+        chromosome/arm name.
+    Returns
+    -------
+    list of numpy.ndarray :
+        The start coordinate of each bin one array per chromosome or arm.
+    list of numpy.ndarray :
+        The distance law probabilities corresponding of the bins of the
+        previous list.
+    list of numpy.ndarray :
+        The names of the arms/chromosomes corresponding to the previous
+        list.
+    """
+    file = pd.read_csv(
+        distance_law_file,
+        sep="\t",
+        header=None,
+        dtype={"a": np.int32, "b": np.float32, "c": str},
+    )
+    names_idx = np.unique(file.iloc[:, 2], return_index=True)[1]
+    names = [file.iloc[:, 2][index] for index in sorted(names_idx)]
+    xs = [None] * len(names)
+    ps = [None] * len(names)
+    labels = [None] * len(names)
+    for i in range(len(names)):
+        subfile = file[file.iloc[:, 2] == names[i]]
+        xs[i] = np.array(subfile.iloc[:, 0])
+        ps[i] = np.array(subfile.iloc[:, 1])
+        labels[i] = np.array(subfile.iloc[:, 2])
+    return xs, ps, labels
+
+
+def get_chr_segment_bins_index(fragments, centro_file=None, rm_centro=0):
+    """Get the index positions of the start and end bins of different
+    chromosomes, or arms if the centromers position have been given from the
+    fragments file made by hicstuff.
+
+    Parameters
+    ----------
+    fragments : pandas.DataFrame
+        Table containing in the first coulum the ID of the fragment, in the
+        second the names of the chromosome in the third and fourth the start
+        position and the end position of the fragment. The file have no header.
+        (File like the 'fragments_list.txt' from hicstuff)
+    centro_file : None or str
+        None or path to a file with the genomic positions of the centromers
+        sorted as the chromosomes separated by a space. The file have only one
+        line.
+    rm_centro : int
+        If a value is given, will remove the contacts close the centromeres.
+        It will remove as many kb as the argument given. Default is zero.
+
+    Returns
+    -------
+    list of floats :
+        The start and end indices of chromosomes/arms to compute the distance
+        law on each chromosome/arm separately.
+    """
+    # Get bins where chromosomes start
+    chr_start_bins = np.where(fragments == 0)[0]
+    # Create a list of same length for the end of the bins
+    chr_end_bins = np.zeros(len(chr_start_bins))
+    # Get bins where chromsomes end
+    for i in range(len(chr_start_bins) - 1):
+        chr_end_bins[i] = chr_start_bins[i + 1]
+    chr_end_bins[-1] = len(fragments.iloc[:, 0])
+    # Combine start and end of bins in a single array. Values are the id of the
+    # bins
+    chr_segment_bins = np.sort(np.concatenate((chr_start_bins, chr_end_bins)))
+    if centro_file is not None:
+        # Read the file of the centromers
+        with open(centro_file, "r", newline="") as centro:
+            centro = csv.reader(centro, delimiter=" ")
+            centro_pos = next(centro)
+        # Sanity check: as many chroms as centromeres
+        if len(chr_start_bins) != len(centro_pos):
+            logger.warning(
+                "Number of chromosomes and centromeres differ, centromeres position are not taking into account."
+            )
+            centro_file = None
+    if centro_file is not None:
+        # Get bins of centromeres
+        centro_bins = np.zeros(2 * len(centro_pos))
+        for i in range(len(chr_start_bins)):
+            if (i + 1) < len(chr_start_bins):
+                subfrags = fragments[chr_start_bins[i] : chr_start_bins[i + 1]]
+            else:
+                subfrags = fragments[chr_start_bins[i] :]
+            # index of last fragment starting before centro in same chrom
+            centro_bins[2 * i] = chr_start_bins[i] + max(
+                np.where(
+                    subfrags["start_pos"][:] // (int(centro_pos[i]) - rm_centro) == 0
+                )[0]
+            )
+            centro_bins[2 * i + 1] = chr_start_bins[i] + max(
+                np.where(
+                    subfrags["start_pos"][:] // (int(centro_pos[i]) + rm_centro) == 0
+                )[0]
+            )
+        # Combine centro and chrom bins into a single array. Values are the id
+        # of the bins started and ending the arms.
+        chr_segment_bins = np.sort(
+            np.concatenate((chr_start_bins, chr_end_bins, centro_bins))
+        )
+    return list(chr_segment_bins)
+
+
+def get_chr_segment_length(fragments, chr_segment_bins):
+    """Compute a list of the length of the different objects (arm or
+    chromosome) given by chr_segment_bins.
+
+    Parameters
+    ----------
+    fragments : pandas.DataFrame
+        Table containing in the first coulum the ID of the fragment, in the
+        second the names of the chromosome in the third and fourth the start
+        position and the end position of the fragment. The file have no header.
+        (File like the 'fragments_list.txt' from hicstuff)
+    chr_segment_bins : list of floats
+        The start and end indices of chromosomes/arms to compute the distance
+        law on each chromosome/arm separately.
+
+    Returns
+    -------
+    list of numpy.ndarray:
+        The length in base pairs of each chromosome or arm.
+    """
+    chr_segment_length = [None] * int(len(chr_segment_bins) / 2)
+    # Iterate in chr_segment_bins in order to obtain the size of each chromosome/arm
+    for i in range(len(chr_segment_length)):
+        # Obtain the size of the chromosome/arm, the if loop is to avoid the
+        # case of arms where the end position of the last fragments doesn't
+        # mean the size of arm. If it's the right arm we have to start to count the
+        # size from the beginning of the arm.
+        if fragments["start_pos"].iloc[int(chr_segment_bins[2 * i])] == 0:
+            n = fragments["end_pos"].iloc[int(chr_segment_bins[2 * i + 1]) - 1]
+        else:
+            n = (
+                fragments["end_pos"].iloc[int(chr_segment_bins[2 * i + 1]) - 1]
+                - fragments["start_pos"].iloc[int(chr_segment_bins[2 * i])]
+            )
+        chr_segment_length[i] = n
+    return chr_segment_length
+
+
+def logbins_xs(fragments, chr_segment_length, base=1.1, circular=False):
+    """Compute the logbins of each chromosome/arm in order to have theme to
+    compute distance law. At the end you will have bins of increasing with a
+    logspace with the base of the value given in base.
+
+    Parameters
+    ----------
+    fragments : pandas.DataFrame
+        Table containing in the first coulum the ID of the fragment, in the
+        second the names of the chromosome in the third and fourth the start
+        position and the end position of the fragment. The file have no header.
+        (File like the 'fragments_list.txt' from hicstuff)
+    chr_segment_length: list of floats
+        List of the size in base pairs of the different arms or chromosomes.
+    base : float
+        Base use to construct the logspace of the bins, 1.1 by default.
+    circular : bool
+        If True, calculate the distance as the chromosome is circular. Default
+        value is False.
+
+    Returns
+    -------
+    list of numpy.ndarray :
+        The start coordinate of each bin one array per chromosome or arm.
+    """
+    # Create the xs array and a list of the length of the chromosomes/arms
+    xs = [None] * len(chr_segment_length)
+    # Iterate in chr_segment_bins in order to make the logspace
+    for i in range(len(chr_segment_length)):
+        n = chr_segment_length[i]
+        # if the chromosome is circular the mawimum distance between two reads
+        # are divided by two
+        if circular:
+            n /= 2
+        n_bins = int(np.log(n) / np.log(base))
+        # For each chromosome/arm compute a logspace to have the logbin
+        # equivalent to the size of the arms and increasing size of bins
+        xs[i] = np.unique(
+            np.logspace(0, n_bins, num=n_bins + 1, base=base, dtype=int)
+        )
+    return xs
+
+
+def circular_distance_law(distance, chr_segment_length, chr_bin):
+    """Recalculate the distance to return the distance in a circular chromosome
+    and not the distance between the two genomic positions.
+    Parameters
+    ----------
+    chr_segment_bins : list of floats
+        The start and end indices of chromosomes/arms to compute the distance
+        law on each chromosome/arm separately.
+    chr_segment_length: list of floats
+        List of the size in base pairs of the different arms or chromosomes.
+    distance : int
+        Distance between two fragments with a contact.
+    Returns
+    -------
+    int :
+        The real distance in the chromosome circular and not the distance
+        between two genomic positions
+    Examples
+    --------
+    >>> circular_distance_law(7500, [2800, 9000], 1)
+    1500
+    >>> circular_distance_law(1300, [2800, 9000], 0)
+    1300
+    >>> circular_distance_law(1400, [2800, 9000], 0)
+    1400
+    """
+    chr_len = chr_segment_length[chr_bin]
+    if distance > chr_len / 2:
+        distance = chr_len - distance
+    return distance
+
+
+def get_pairs_distance(
+    line, fragments, chr_segment_bins, chr_segment_length, xs, ps, circular=False
+):
+    """From a line of a pair reads file, filter -/+ or +/- reads, keep only the
+    reads in the same chromosome/arm and compute the distance of the the two
+    fragments. It modify the input ps in order to count or not the line given.
+    It will add one in the logbin corresponding to the distance.
+    Parameters
+    ----------
+    line : OrderedDict
+        Line of a pair reads file with the these keys readID, chr1, pos1, chr2,
+        pos2, strand1, strand2, frag1, frag2. The values are in a dictionnary.
+    fragments : pandas.DataFrame
+        Table containing in the first coulum the ID of the fragment, in the
+        second the names of the chromosome in the third and fourth the start
+        position and the end position of the fragment. The file have no header.
+        (File like the 'fragments_list.txt' from hicstuff)
+    chr_segment_bins : list of floats
+        The start and end indices of chromosomes/arms to compute the distance
+        law on each chromosome/arm separately.
+    chr_segment_length: list of floats
+        List of the size in base pairs of the different arms or chromosomes.
+    xs : list of lists
+        The start coordinate of each bin one array per chromosome or arm.
+    ps : list of lists
+        The sum of contact already count. xs and ps should have the same
+        dimensions.
+    circular : bool
+        If True, calculate the distance as the chromosome is circular. Default
+        value is False.
+    """
+    # Check this is a pairs_idx file and not simple pairs
+    if line['frag1'] is None:
+        logger.error(
+            'Input pairs file must have frag1 and frag2 columns. In hicstuff '
+            'pipeline, this is the "valid_idx.pairs" file.'
+        )
+    # We only keep the event +/+ or -/-. This is done to avoid to have any
+    # event of uncut which are not possible in these events. We can remove the
+    # good events of +/- or -/+ because we don't need a lot of reads to compute
+    # the distance law and if we eliminate these reads we do not create others
+    # biases as they should have the same distribution.
+    if line["strand1"] == line["strand2"]:
+        # Find in which chromosome/arm are the fragment 1 and 2.
+        chr_bin1 = (
+            np.searchsorted(chr_segment_bins, int(line["frag1"]), side="right") - 1
+        )
+        chr_bin2 = (
+            np.searchsorted(chr_segment_bins, int(line["frag2"]), side="right") - 1
+        )
+        # We only keep the reads with the two fragments in the same chromosome
+        # or arm.
+        if chr_bin1 == chr_bin2:
+            # Remove the contacts in the centromeres if centro_remove
+            if chr_bin1 % 2 == 0:
+                chr_bin1 = int(chr_bin1 / 2)
+                # For the reads -/-, the fragments should be religated with both
+                # their start position (position in the left on the genomic
+                # sequence, 5'). For the reads +/+ it's the contrary. We compute
+                # the distance as the distance between the two extremities which
+                # are religated.
+                if line["strand1"] == "-":
+                    distance = abs(
+                        np.array(fragments["start_pos"][int(line["frag1"])])
+                        - np.array(fragments["start_pos"][int(line["frag2"])])
+                    )
+                if line["strand1"] == "+":
+                    distance = abs(
+                        np.array(fragments["end_pos"][int(line["frag1"])])
+                        - np.array(fragments["end_pos"][int(line["frag2"])])
+                    )
+                if circular:
+                    distance = circular_distance_law(
+                        distance, chr_segment_length, chr_bin1
+                    )
+                xs_temp = xs[chr_bin1][:]
+                # Find the logbins in which the distance is and add one to the sum
+                # of contact.
+                ps_indice = np.searchsorted(xs_temp, distance, side="right") - 1
+                ps[chr_bin1][ps_indice] += 1
+
+
+def get_names(fragments, chr_segment_bins):
+    """Make a list of the names of the arms or the chromosomes.
+    Parameters
+    ----------
+    fragments : pandas.DataFrame
+        Table containing in the first coulum the ID of the fragment, in the
+        second the names of the chromosome in the third and fourth the start
+        position and the end position of the fragment. The file have no header.
+        (File like the 'fragments_list.txt' from hicstuff)
+    chr_segment_bins : list of floats
+        The start position of chromosomes/arms to compute the distance law on
+        each chromosome/arm separately.
+    Returns
+    -------
+    list of floats :
+        List of the labels given to the curves. It will be the name of the arms
+        or chromosomes.
+    """
+    # Get the name of the chromosomes.
+    chr_names_idx = np.unique(fragments.iloc[:, 1], return_index=True)[1]
+    chr_names = [fragments.iloc[:, 1][index] for index in sorted(chr_names_idx)]
+    # Case where they are separate in chromosomes
+    if len(chr_segment_bins) / 2 != len(chr_names):
+        names = []
+        for chr in chr_names:
+            names.append(str(chr) + "_left")
+            names.append(str(chr) + "_rigth")
+        chr_names = names
+    return chr_names
+
+
+def get_distance_law(
+    pairs_reads_file,
+    fragments_file,
+    centro_file=None,
+    base=1.1,
+    out_file=None,
+    circular=False,
+    rm_centro=0,
+):
+    """Compute distance law as a function of the genomic coordinate aka P(s).
+    Bin length increases exponentially with distance. Works on pairs file
+    format from 4D Nucleome Omics Data Standards Working Group. If the genome
+    is composed of several chromosomes and you want to compute the arms
+    separately, provide a file with the positions of centromers. Create a file
+    with three coulumns separated by a tabulation. The first column contains
+    the xs, the second the ps and the third the name of the arm or chromosome.
+    The file is create in the directory given in outdir or in the current
+    directory if no directory given.
+    Parameters
+    ----------
+    pairs_reads_file : string
+        Path of a pairs file format from 4D Nucleome Omics Data Standards
+        Working Group with the 8th and 9th coulumns are the ID of the fragments
+        of the reads 1 and 2.
+    fragments_file : path
+        Path of a table containing in the first column the ID of the fragment,
+        in the second the names of the chromosome in the third and fourth
+        the start position and the end position of the fragment. The file have
+        no header. (File like the 'fragments_list.txt' from hicstuff)
+    centro_file : None or str
+        None or path to a file with the genomic positions of the centromers
+        sorted as the chromosomes separated by a space. The file have only one
+        line.
+    base : float
+        Base use to construct the logspace of the bins - 1.1 by default.
+    out_file : None or str
+        Path of the output file. If no path given, the output is returned.
+    circular : bool
+        If True, calculate the distance as the chromosome is circular. Default
+        value is False. Cannot be True if centro_file is not None
+    rm_centro : int
+        If a value is given, will remove the contacts close the centromeres.
+        It will remove as many kb as the argument given. Default is None.
+    Returns
+    -------
+    xs : list of numpy.ndarray
+        Basepair coordinates of log bins used to compute distance law.
+    ps : list of numpy.ndarray
+        Contacts value, in arbitrary units, at increasingly long genomic ranges
+        given by xs.
+    names : list of strings
+        Names of chromosomes that are plotted
+    """
+    # Sanity check : centro_fileition should be None if chromosomes are
+    # circulars (no centromeres is circular chromosomes).
+    if circular and centro_file != None:
+        logger.error("Chromosomes cannot have a centromere and be circular")
+        sys.exit(1)
+    # Import third columns of fragments file
+    fragments = pd.read_csv(fragments_file, sep="\t", header=0, usecols=[0, 1, 2, 3])
+    # Calculate the indice of the bins to separate into chromosomes/arms
+    chr_segment_bins = get_chr_segment_bins_index(fragments, centro_file, rm_centro)
+    # Calculate the length of each chromosoms/arms
+    chr_segment_length = get_chr_segment_length(fragments, chr_segment_bins)
+    xs = logbins_xs(fragments, chr_segment_length, base, circular)
+    # Create the list of p(s) with one array for each chromosome/arm and each
+    # array contain as many values as in the logbin
+    ps = [None] * len(chr_segment_length)
+    for i in range(len(xs)):
+        ps[i] = [0] * len(xs[i])
+    # Read the pair reads file
+    with open(pairs_reads_file, "r", newline="") as reads:
+        # Remove the line of the header
+        header_length = len(hio.get_pairs_header(pairs_reads_file))
+        for i in range(header_length):
+            next(reads)
+        # Reads all the others lines and put the values in a dictionnary with
+        # the keys : 'readID', 'chr1', 'pos1', 'chr2', 'pos2', 'strand1',
+        # 'strand2', 'frag1', 'frag2'
+        reader = csv.DictReader(
+            reads,
+            fieldnames=[
+                "readID",
+                "chr1",
+                "pos1",
+                "chr2",
+                "pos2",
+                "strand1",
+                "strand2",
+                "frag1",
+                "frag2",
+            ],
+            delimiter="\t",
+        )
+        for line in reader:
+            # Iterate in each line of the file after the header
+            get_pairs_distance(
+                line, fragments, chr_segment_bins, chr_segment_length, xs, ps, circular
+            )
+    # Divide the number of contacts by the area of the logbin
+    for i in range(len(xs)):
+        n = chr_segment_length[i]
+        for j in range(len(xs[i]) - 1):
+            # Use the area of a trapezium to know the area of the logbin with n
+            # the size of the matrix.
+            ps[i][j] /= ((2 * n - xs[i][j + 1] - xs[i][j]) / 2) * (
+                (1 / np.sqrt(2)) * (xs[i][j + 1] - xs[i][j])
+            )
+            # print(
+            #    ((2 * n - xs[i][j + 1] - xs[i][j]) / 2)
+            #    * ((1 / np.sqrt(2)) * (xs[i][j + 1] - xs[i][j]))
+            # )
+        # Case of the last logbin which is an isosceles rectangle triangle
+        # print(ps[i][-5:-1], ((n - xs[i][-1]) ** 2) / 2)
+        ps[i][-1] /= ((n - xs[i][-1]) ** 2) / 2
+    names = get_names(fragments, chr_segment_bins)
+    if out_file:
+        export_distance_law(xs, ps, names, out_file)
+    return xs, ps, names
+
+
+def normalize_distance_law(xs, ps, inf=3000, sup=None):
+    """Normalize the distance in order to have the sum of the ps values between
+    'inf' (default value is 3kb) until the end of the array equal to one and
+    limit the effect of coverage between two conditions/chromosomes/arms when
+    you compare them together. If we have a list of ps, it will normalize until
+    the length of the shorter object or the value of sup, whichever is smaller.
+    Parameters
+    ----------
+    xs : list of numpy.ndarray
+        list of logbins corresponding to the ps.
+    ps : list of numpy.ndarray
+        Average ps or list of ps of the chromosomes/arms. xs and ps have to
+        have the same shape.
+    inf : integer
+        Inferior value of the interval on which, the normalization is applied.
+    sup : integer
+        Superior value of the interval on which, the normalization is applied.
+    Returns
+    -------
+    list of numpy.ndarray :
+        List of ps each normalized separately.
+    """
+    # Sanity check: xs and ps have the same dimension
+    if np.shape(xs) != np.shape(ps):
+        logger.error("xs and ps should have the same dimension.")
+        sys.exit(1)
+    # Define the length of shortest chromosomes as a lower bound for the sup boundary
+    min_xs = len(min(xs, key=len))
+    normed_ps = [None] * len(ps)
+    if sup is None:
+        sup = np.inf
+    for chrom_id, chrom_ps in enumerate(ps):
+        # Iterate on the different ps to normalize each of theme separately
+        chrom_sum = 0
+        # Change the last value to have something continuous because the last
+        # one is much bigger (computed on matrix corner = triangle instead of trapezoid).
+        chrom_ps[-1] = chrom_ps[-2]
+        for bin_id, bin_value in enumerate(chrom_ps):
+            # Compute normalization factor based on values between inf and sup
+            # Sup will be whatever is smaller between user-provided sup and length of
+            # the shortest chromosome
+            if (xs[chrom_id][bin_id] > inf) and (xs[chrom_id][bin_id] < sup) and (bin_id < min_xs):
+                chrom_sum += bin_value
+        if chrom_sum == 0:
+            chrom_sum += 1
+            logger.warning("No values of p(s) in one segment")
+        # Make the normalisation
+        normed_ps[chrom_id] = np.array(ps[chrom_id]) / chrom_sum
+    return normed_ps
+
+
+def average_distance_law(xs, ps, sup, big_arm_only=False):
+    """Compute the average distance law between the file the different distance
+    law of the chromosomes/arms.
+    Parameters
+    ----------
+    xs : list of numpy.ndarray
+        The list of logbins.
+    ps : list of lists of floats
+        The list of numpy.ndarray.
+    sup : int
+        Value given to set the minimum size of the chromosomes/arms to make the
+        average.
+    big_arm_only : bool
+        By default False. If True, will only take into account the arms/chromosomes
+        longer than the value of sup. Sup mandatory if set.
+    Returns
+    -------
+    numpy.ndarray :
+        List of the xs with the max length.
+    numpy.ndarray :
+        List of the average_ps.
+    """
+    # Find longest chromosome / arm and make two arrays of this length for the
+    # average distance law and remove the last value.
+    xs = max(xs, key=len)
+    max_length = len(xs)
+    ps_values = np.zeros(max_length)
+    ps_occur = np.zeros(max_length)
+    for chrom_ps in ps:
+        # Iterate on ps in order to calculate the number of occurences (all the
+        # chromossomes/arms are not as long as the longest one) and the sum of
+        # the values of distance law.
+        # Change the last value to have something continuous because the last
+        # one is much bigger.
+        chrom_ps[-1] = chrom_ps[-2]
+        # Sanity check : sup strictly inferior to maw length arms.
+        if big_arm_only:
+            if sup >= xs[-1]:
+                logger.error(
+                    "sup have to be inferior to the max length of arms/chromsomes if big arm only set"
+                )
+                sys.exit(1)
+            if sup <= xs[len(chrom_ps) - 1]:
+                ps_occur[: len(chrom_ps)] += 1
+                ps_values[: len(chrom_ps)] += chrom_ps
+        else:
+            ps_occur[: len(chrom_ps)] += 1
+            ps_values[: len(chrom_ps)] += chrom_ps
+    # Make the mean
+    averaged_ps = ps_values / ps_occur
+    return xs, averaged_ps
+
+
+def slope_distance_law(xs, ps):
+    """Compute the slope of the loglog curve of the ps as the
+    [log(ps(n+1)) - log(ps(n))] / [log(n+1) - log(n)].
+    Compute only list of ps, not list of array.
+    Parameters
+    ----------
+    xs : list of numpy.ndarray
+        The list of logbins.
+    ps : list of numpy.ndarray
+        The list of ps.
+    Returns
+    -------
+    list of numpy.ndarray :
+        The slope of the distance law. It will be shorter of one value than the
+        ps given initially.
+    """
+    slope = [None] * len(ps)
+    for i in range(len(ps)):
+        ps[i][ps[i] == 0] = 10 ** (-9)
+        # Compute the slope
+        slope_temp = np.log(np.array(ps[i][1:]) / np.array(ps[i][:-1])) / np.log(
+            np.array(xs[i][1:]) / np.array(xs[i][:-1])
+        )
+        # The 1.8 is the intensity of the normalisation, it could be adapted.
+        slope_temp[slope_temp == np.nan] = 10 ** (-15)
+        slope[i] = ndimage.filters.gaussian_filter1d(slope_temp, 1.8)
+    return slope
+
+
+def get_ylim(xs, curve, inf, sup):
+    """Compute the max and the min of the list of list between the inferior
+    (inf) and superior (sup) bounds.
+    Parameters
+    ----------
+    xs : list of numpy.ndarray
+        The list of the logbins starting position in basepair.
+    curve : list of numpy.ndarray
+        A list of numpy.ndarray from which you want to extract minimum and
+        maximum values in a given interval.
+    inf : int
+        Inferior limit of the interval in basepair.
+    sup : int
+        Superior limit of the interval in basepair.
+    Returns
+    -------
+    min_tot : float
+        Minimum value of the list of list in this interval.
+    max_tot : float
+        Maximum value of the list of list in this interval.
+    Examples
+    --------
+    >>> get_ylim([np.array([1, 4, 15]), np.array([1, 4, 15, 40])],
+    ... [np.array([5.5, 3.2, 17.10]), np.array([24, 32, 1.111, 18.5])],
+    ... 2,
+    ... 15
+    ... )
+    (1.111, 32.0)
+    """
+    # Create a list in order to add all the interesting values
+    flatten_list = []
+    for i, logbins in enumerate(xs):
+        # Iterate on xs.
+        # Search for the minimum index corresponding to the smallest bin
+        # superior or equal to inf (in pair base).
+        min_value = min(logbins[logbins >= inf], default=0)
+        min_index = np.where(logbins == min_value)[0]
+        # Skip chromosome if total size smaller than inf
+        if len(min_index) == 0:
+            continue
+        # Search for the maximum index corresponding to the biggest bin
+        # inferior or equal to sup (in pair base).
+        max_value = max(logbins[logbins <= sup])
+        max_index = np.where(logbins == max_value)[0]
+        # Add the values in the interval in the flattened list.
+        if int(max_index) != len(logbins) - 1:
+            max_index += 1
+        for j in range(int(min_index), int(max_index)):
+            flatten_list.append(curve[i][j])
+    # Caluclate the min and the max of this list.
+    min_tot = min(flatten_list)
+    max_tot = max(flatten_list)
+    return min_tot, max_tot
+
+
+def plot_ps_slope(xs, ps, labels, fig_path=None, inf=3000, sup=None):
+    """Compute two plots, one with the different distance law of each
+    arm/chromosome in loglog scale and one with the slope (derivative) of these
+    curves. Generate a svg file with savefig.
+    Parameters
+    ----------
+    xs : list of numpy.ndarray
+        The list of the logbins of each ps.
+    ps : list of numpy.ndarray
+        The list of ps.
+    labels_file : list of strings
+        Names of the different curves in the order in which they are given.
+    fig_path : str
+        Path where the figure will be created. If None (default), the plot is
+        shown in an interactive window.
+    inf : int
+        Value of the mimimum x of the window of the plot. Have to be strictly
+        positive. By default 3000.
+    sup : int
+        Value of the maximum x of the window of the plot. By default None.
+
+    """
+    # Give the max value for sup if no value have been attributed
+    if sup is None:
+        sup = max(max(xs, key=len))
+
+    # Compute slopes from the curves
+    slope = slope_distance_law(xs, ps)
+    # Make the plot of distance law
+    # Give a range of color
+    cols = iter(cm.rainbow(np.linspace(0, 1, len(ps))))
+    fig, (ax1, ax2) = plt.subplots(1, 2, sharex=True, figsize=(18, 10))
+    plt.subplots_adjust(left=0.05, right=0.85, top=0.93, bottom=0.07)
+    ax1.set_xlabel("Distance (pb)", fontsize="x-large")
+    ax1.set_ylabel("P(s)", fontsize="x-large")
+    ax1.set_title("Distance law", fontsize="xx-large")
+    ylim = get_ylim(xs, ps, inf, sup)
+    ax1.set_ylim(0.9 * ylim[0], 1.1 * ylim[1])
+    for i in range(len(ps)):
+        # Iterate on the different distance law array and take them by order of
+        # size in order to have the color scale equivalent to the size scale
+        col = next(cols)
+        ax1.loglog(xs[i], ps[i], label=labels[i])
+    # Make the same plot with the slope
+    cols = iter(cm.rainbow(np.linspace(0, 1, len(slope))))
+    ax2.set_xlabel("Distance (pb)", fontsize="x-large")
+    ax2.set_ylabel("Slope", fontsize="x-large")
+    ax2.set_title("Slope of the distance law", fontsize="xx-large")
+    ax2.set_xlim([inf, sup])
+    ylim = get_ylim(xs, slope, inf, sup)
+    ax2.set_ylim(1.1 * ylim[0], 0.9 * ylim[1])
+    xs2 = [None] * len(xs)
+    for i in range(len(slope)):
+        xs2[i] = xs[i][:-1]
+        col = next(cols)
+        ax2.semilogx(xs2[i], slope[i], label=labels[i], subs=[2, 3, 4, 5, 6, 7, 8, 9])
+    ax2.legend(loc="upper left", bbox_to_anchor=(1.02, 1.00), ncol=1, fontsize="large")
+    # Save the figure in svg
+    if fig_path is not None:
+        plt.savefig(fig_path)
+    else:
+        plt.show()
+
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("-i", "--input")
+    parser.add_argument("-f", "--fragment")
+    parser.add_argument("-c", "--centro_file")
+    parser.add_argument("-b", "--base")
+    parser.add_argument("-p", "--prefix")
+    parser.add_argument("-o", "--out_file")
+    parser.add_argument("-u", "--circular")
+    parser.add_argument("-r", "--rm_centro")
+    parser.add_argument("-l", "--plot")
+    parser.add_argument("-d", '--dist_plot')
+    args = parser.parse_args()
+
+    input = args.input
+    fragment = args.fragment
+    prefix = args.prefix
+    centro_file = args.centro_file
+    if centro_file == "None":
+        centro_file = None
+    else:
+        centro_file = prefix+"_"+centro_file
+    base = float(args.base)
+    out_file = prefix+"_"+args.out_file
+    circular = args.circular
+    if circular.lower() == "false":
+        circular = False
+    elif circular.lower() == "true":
+        circular = True
+    rm_centro = args.rm_centro
+    if rm_centro == "None":
+        rm_centro = 0
+    rm_centro = int(rm_centro)
+    plot = args.plot
+    if plot.lower() == "false":
+        plot = False
+    elif plot.lower() == "true":
+        plot = True
+    distance_law_plot = args.dist_plot
+    if distance_law_plot == "None":
+        distance_law_plot = None
+    else:
+        distance_law_plot = prefix+"_"+distance_law_plot
+
+    x_s, p_s, _ = get_distance_law(
+        input,
+        fragment,
+        centro_file,
+        base,
+        out_file,
+        circular,
+        rm_centro
+    )
+
+    if plot:
+            # Retrieve chrom labels from distance law file
+            _, _, chr_labels = import_distance_law(out_file)
+            chr_labels = [lab[0] for lab in chr_labels]
+            chr_labels_idx = np.unique(chr_labels, return_index=True)[1]
+            chr_labels = [chr_labels[index] for index in sorted(chr_labels_idx)]
+            p_s = normalize_distance_law(x_s, p_s)
+            plot_ps_slope(x_s, p_s, labels=chr_labels, fig_path=distance_law_plot)
diff --git a/bin/hicstuff_filter.py b/bin/hicstuff_filter.py
new file mode 100755
index 0000000000000000000000000000000000000000..23b74245b8440ab3bbca398618f405d629f3d610
--- /dev/null
+++ b/bin/hicstuff_filter.py
@@ -0,0 +1,569 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+"""Hi-C event filtering
+Analyse the contents of a 3C library and filters spurious events such as loops
+and uncuts to improve the overall signal. Filtering consists in excluding +-
+and -+ Hi-C pairs if their reads are closer than a threshold in minimum number
+of restriction fragments. This threshold represents the distance at which the
+abundance of these events deviate significantly from the rest of the library.
+It is estimated automatically by default using the median absolute deviation of
+pairs at longer distances, but can also be entered manually.
+The program takes a 2D BED file as input with the following fields:
+chromA startA endA indiceA strandA chromB startB endB indiceB strandB
+Each line of the file represents a Hi-C pair with reads A and B. The indices
+are 0-based and represent the restriction fragment to which reads are
+attributed.
+@author: cmdoret (reimplementation of Axel KournaK's code)
+"""
+
+import sys
+import numpy as np
+from collections import OrderedDict
+import matplotlib.pyplot as plt
+from hicstuff_log import logger
+import argparse
+
+
+def process_read_pair(line):
+    r"""Process and order read pairs in a .pairs record.
+    Takes a read pair (line) from a .pairs file as input, reorders the pair
+    so that read 1 in intrachromosomal pairs always has the smallest genomic
+    coordinate.
+    Parameters
+    ----------
+    line : str
+        Record from a pairs file with columns: readID, chr1, pos1, chr2,
+        pos2, strand1, strand2, frag1, frag2
+    Returns
+    -------
+    dict
+        Dictionary with reordered pair where each column from the line as an
+        entry. The number of restriction sites separating reads in the pair and
+        the type of event are also stored in the dictionary, under keys
+        "nsites" and "type".
+    Examples
+    --------
+        >>> d = process_read_pair("readX\ta\t1\tb\t20\t-\t-\t1\t3")
+        >>> for u in sorted(d.items()):
+        ...     print(u)
+        ('chr1', 'a')
+        ('chr2', 'b')
+        ('frag1', 1)
+        ('frag2', 3)
+        ('nsites', 2)
+        ('pos1', 1)
+        ('pos2', 20)
+        ('readID', 'readX')
+        ('strand1', '-')
+        ('strand2', '-')
+        ('type', 'inter')
+        >>> d = process_read_pair('readY\ta\t20\ta\t10\t-\t+\t2\t1')
+        >>> [d[x] for x in sorted(d.keys())]
+        ['a', 'a', 1, 2, 1, 10, 20, 'readY', '+', '-', '+-']
+    """
+    # Split line by whitespace
+    p = line.split("\t")
+    if len(p) != 9:
+        raise ValueError(
+            "Your input file does not have 9 columns. Make sure "
+            "the file has the readID and twice the following 4 fields "
+            "(once for each read in the pair): chr pos strand frag."
+        )
+    # Saving each column as a dictionary entry with meaningful key
+    cols = [
+        "readID",
+        "chr1",
+        "pos1",
+        "chr2",
+        "pos2",
+        "strand1",
+        "strand2",
+        "frag1",
+        "frag2",
+    ]
+    p = {cols[i]: p[i] for i in range(len(cols))}
+    # Transforming numeric columns to int
+    for col in ["pos1", "pos2", "frag1", "frag2"]:
+        p[col] = int(p[col])
+
+    # invert records for intrachromosomal pairs where rec2 comes before
+    # rec1 in genomic coordinates
+    if p["chr1"] == p["chr2"] and p["pos2"] < p["pos1"]:
+        p["strand1"], p["strand2"] = p["strand2"], p["strand1"]
+        p["pos1"], p["pos2"] = p["pos2"], p["pos1"]
+        p["frag1"], p["frag2"] = p["frag2"], p["frag1"]
+
+    # Number of restriction sites separating reads in the pair
+    p["nsites"] = abs(p["frag2"] - p["frag1"])
+    # Get event type
+    if p["chr1"] == p["chr2"]:
+        p["type"] = "".join([p["strand1"], p["strand2"]])
+    else:
+        p["type"] = "inter"
+
+    return p
+
+
+def get_thresholds(
+    in_dat, interactive=False, plot_events=False, fig_path=None, prefix=None
+):
+    """Guess distance threshold for event filtering
+    Analyse the events in the first million of Hi-C pairs in the library, plot
+    the occurrences of each event type according to number of restriction
+    fragments, and ask user interactively for the minimum threshold for uncuts
+    and loops.
+    Parameters
+    ----------
+    in_dat: str
+        Path to the .pairs file containing Hi-C pairs.
+    interactive: bool
+        If True, plots are diplayed and thresholds are required interactively.
+    plot_events : bool
+        Whether to show the plot
+    fig_path : str
+        Path where the figure will be saved. If None, the figure will be
+        diplayed interactively.
+    prefix : str
+        If the library has a name, it will be shown on plots.
+
+    Returns
+    -------
+    dictionary
+        dictionary with keys "uncuts" and "loops" where the values are the
+        corresponding thresholds entered by the user.
+    """
+    thr_uncut = None
+    thr_loop = None
+    max_sites = 50
+    # Map of event -> legend name of event for intrachromosomal pairs.
+    legend = {
+        "++": "++ (weird)",
+        "--": "-- (weird)",
+        "+-": "+- (uncuts)",
+        "-+": "-+ (loops)",
+    }
+    colors = {"++": "#222222", "+-": "r", "--": "#666666", "-+": "tab:orange"}
+    n_events = {event: np.zeros(max_sites) for event in legend}
+    i = 0
+    # open the file for reading (just the first 1 000 000 lines)
+    with open(in_dat, "r") as pairs:
+        for line in pairs:
+            # Skip header lines
+            if line.startswith("#"):
+                continue
+            i += 1
+            # Only use the first million pairs to estimate thresholds
+            if i == 1000000:
+                break
+            # Process Hi-C pair into a dictionary
+            p = process_read_pair(line)
+            # Type of event and number of restriction site between reads
+            etype = p["type"]
+            nsites = p["nsites"]
+            # Count number of events for intrachrom pairs
+            if etype != "inter" and nsites < max_sites:
+                n_events[etype][nsites] += 1
+
+    def plot_event(n_events, legend, name):
+        """Plot the frequency of a given event types over distance."""
+        plt.xlim([-0.5, 15])
+        plt.plot(
+            range(n_events[name].shape[0]),
+            n_events[name],
+            "o-",
+            label=legend[name],
+            linewidth=2.0,
+            c=colors[name],
+        )
+
+    if interactive:
+        # PLot:
+        try:
+            plt.figure(0)
+            for event in legend:
+                plot_event(n_events, legend, event)
+            plt.grid()
+            plt.xlabel("Number of restriction fragment(s)")
+            plt.ylabel("Number of events")
+            plt.yscale("log")
+            plt.legend()
+            plt.show(block=False)
+
+        except Exception:
+            logger.error(
+                "Unable to show plots, skipping figure generation. Perhaps "
+                "there is no Xserver running ? (might be due to windows "
+                "environment). Try running without the interactive option."
+            )
+
+        # Asks the user for appropriate thresholds
+        print(
+            "Please enter the number of restriction fragments separating "
+            "reads in a Hi-C pair below or at which loops and "
+            "uncuts events will be excluded\n",
+            file=sys.stderr,
+        )
+        thr_uncut = int(input("Enter threshold for the uncuts events (+-):"))
+        thr_loop = int(input("Enter threshold for the loops events (-+):"))
+        try:
+            plt.clf()
+        except Exception:
+            pass
+    else:
+        # Estimate thresholds from data
+        for event in n_events:
+            fixed = n_events[event]
+            fixed[fixed == 0] = 1
+            n_events[event] = fixed
+
+        all_events = np.log(np.array(list(n_events.values())))
+        # Compute median occurences at each restriction sites
+        event_med = np.median(all_events, axis=0)
+        # Compute MAD, to have a robust estimator of the expected deviation
+        # from median at long distances
+        mad = np.median(abs(all_events - event_med))
+        exp_stdev = mad / 0.67449
+        # Iterate over sites, from furthest to frag+2
+        for site in range(max_sites)[:1:-1]:
+            # For uncuts and loops, keep the last (closest) site where the
+            # deviation from other events <= expected_stdev
+            if (
+                abs(np.log(n_events["+-"][site]) - event_med[site])
+                <= exp_stdev
+            ):
+                thr_uncut = site
+            if (
+                abs(np.log(n_events["-+"][site]) - event_med[site])
+                <= exp_stdev
+            ):
+                thr_loop = site
+        if thr_uncut is None or thr_loop is None:
+            raise ValueError(
+                "The threshold for loops or uncut could not be estimated. "
+                "Please try running with -i to investigate the problem."
+            )
+        logger.info(
+            "Filtering with thresholds: uncuts={0} loops={1}".format(
+                thr_uncut, thr_loop
+            )
+        )
+        if plot_events:
+            try:
+                plt.figure(1)
+                plt.xlim([-0.5, 15])
+                # Draw colored lines for events to discard
+                plt.plot(
+                    range(0, thr_uncut + 1),
+                    n_events["+-"][: thr_uncut + 1],
+                    "o-",
+                    c=colors["+-"],
+                    label=legend["+-"],
+                )
+                plt.plot(
+                    range(0, thr_loop + 1),
+                    n_events["-+"][: thr_loop + 1],
+                    "o-",
+                    c=colors["-+"],
+                    label=legend["-+"],
+                )
+                plt.plot(
+                    range(0, 2),
+                    n_events["--"][:2],
+                    "o-",
+                    c=colors["--"],
+                    label=legend["--"],
+                )
+                plt.plot(
+                    range(0, 2),
+                    n_events["++"][:2],
+                    "o-",
+                    c=colors["++"],
+                    label=legend["++"],
+                )
+                # Draw black lines for events to keep
+                plt.plot(
+                    range(thr_uncut, n_events["+-"].shape[0]),
+                    n_events["+-"][thr_uncut:],
+                    "o-",
+                    range(thr_loop, n_events["-+"].shape[0]),
+                    n_events["-+"][thr_loop:],
+                    "o-",
+                    range(1, n_events["--"].shape[0]),
+                    n_events["--"][1:],
+                    "o-",
+                    range(1, n_events["++"].shape[0]),
+                    n_events["++"][1:],
+                    "o-",
+                    label="kept",
+                    linewidth=2.0,
+                    c="g",
+                )
+                plt.grid()
+                plt.xlabel("Number of restriction site(s)")
+                plt.ylabel("Number of events")
+                plt.yscale("log")
+                # Remove duplicate "kept" entries in legend
+                handles, labels = plt.gca().get_legend_handles_labels()
+                by_label = OrderedDict(zip(labels, handles))
+                plt.legend(by_label.values(), by_label.keys())
+                # Show uncut and loop threshold as vertical lines
+                plt.axvline(x=thr_loop, color=colors["-+"])
+                plt.axvline(x=thr_uncut, color=colors["+-"])
+
+                if prefix:
+                    plt.title(
+                        "Library events by distance in {}".format(prefix)
+                    )
+                plt.tight_layout()
+                if fig_path:
+                    plt.savefig(fig_path)
+                else:
+                    plt.show(block=False)
+                # plt.clf()
+
+            except Exception:
+                logger.error(
+                    "Unable to show plots, skipping figure generation. Is "
+                    "an X server running? (might be due to windows "
+                    "environment). Try running without the plot option."
+                )
+    return thr_uncut, thr_loop
+
+
+def filter_events(
+    in_dat,
+    out_filtered,
+    thr_uncut,
+    thr_loop,
+    plot_events=False,
+    fig_path=None,
+    prefix=None,
+):
+    """Filter events (loops, uncuts and weirds)
+    Filter out spurious intrachromosomal Hi-C pairs from input file. +- pairs
+    with reads closer or at the uncut threshold and -+ pairs with reads closer
+    or at the loop thresholds are excluded from the ouput file. -- and ++ pairs
+    with both mates on the same fragments are also discarded. All others are
+    written.
+    Parameters
+    ----------
+    in_dat : file object
+        File handle in read mode to the 2D BED file containing Hi-C pairs.
+    out_filtered : file object
+        File handle in write mode the output filtered 2D BED file.
+    thr_uncut : int
+        Minimum number of restriction sites between reads to keep an
+        intrachromosomal +- pair.
+    thr_loop : int
+        Minimum number of restriction sites between reads to keep an
+        intrachromosomal -+ pair.
+    plot_events : bool
+        If True, a plot showing the proportion of each type of event will be
+        shown after filtering.
+    fig_path : str
+        Path where the figure will be saved. If None, figure is displayed
+        interactively.
+    prefix : str
+        If the library has a name, it will be shown on plots.
+    """
+    n_uncuts = 0
+    n_loops = 0
+    n_weirds = 0
+    lrange_intra = 0
+    lrange_inter = 0
+
+    # open the files for reading and writing
+    with open(in_dat, "r") as pairs, open(out_filtered, "w") as filtered:
+        for line in pairs:  # iterate over each line
+            # Copy header lines to output
+            if line.startswith("#"):
+                filtered.write(line)
+                continue
+
+            p = process_read_pair(line)
+            line_to_write = (
+                "\t".join(
+                    map(
+                        str,
+                        (
+                            p["readID"],
+                            p["chr1"],
+                            p["pos1"],
+                            p["chr2"],
+                            p["pos2"],
+                            p["strand1"],
+                            p["strand2"],
+                            p["frag1"],
+                            p["frag2"],
+                        ),
+                    )
+                )
+                + "\n"
+            )
+            if p["chr1"] == p["chr2"]:
+                # Do not report ++ and -- pairs on the same fragment (impossible)
+                if p["frag1"] == p["frag2"] and p["strand1"] == p["strand2"]:
+                    n_weirds += 1
+                elif p["nsites"] <= thr_loop and p["type"] == "-+":
+                    n_loops += 1
+                elif p["nsites"] <= thr_uncut and p["type"] == "+-":
+                    n_uncuts += 1
+                else:
+                    lrange_intra += 1
+                    filtered.write(line_to_write)
+
+            if p["chr1"] != p["chr2"]:
+                lrange_inter += 1
+                filtered.write(line_to_write)
+
+    if lrange_inter > 0:
+        ratio_inter = round(
+            100 * lrange_inter / float(lrange_intra + lrange_inter), 2
+        )
+    else:
+        ratio_inter = 0
+
+    # Log quick summary of operation results
+    kept = lrange_intra + lrange_inter
+    discarded = n_loops + n_uncuts + n_weirds
+    total = kept + discarded
+    logger.info(
+        "Proportion of inter contacts: {0}% (intra: {1}, "
+        "inter: {2})".format(ratio_inter, lrange_intra, lrange_inter)
+    )
+    logger.info(
+        "{0} pairs discarded: Loops: {1}, Uncuts: {2}, Weirds: {3}".format(
+            discarded, n_loops, n_uncuts, n_weirds
+        )
+    )
+    logger.info(
+        "{0} pairs kept ({1}%)".format(
+            kept, round(100 * kept / (kept + discarded), 2)
+        )
+    )
+
+    # Visualize summary if requested by user
+    if plot_events:
+        try:
+        # Plot: make a square figure and axes to plot a pieChart:
+            plt.figure(2, figsize=(6, 6))
+            # The slices will be ordered and plotted counter-clockwise.
+            fracs = [n_uncuts, n_loops, n_weirds, lrange_intra, lrange_inter]
+            # Format labels to include event names and proportion
+            labels = list(
+                map(
+                    lambda x: (x[0] + ": %.2f%%") % (100 * x[1] / total),
+                    [
+                        ("Uncuts", n_uncuts),
+                        ("Loops", n_loops),
+                        ("Weirds", n_weirds),
+                        ("3D intra", lrange_intra),
+                        ("3D inter", lrange_inter),
+                    ],
+                )
+            )
+            colors = ["salmon", "lightskyblue", "yellow", "palegreen", "plum"]
+            patches, _ = plt.pie(fracs, colors=colors, startangle=90)
+            plt.legend(
+                patches,
+                labels,
+                loc='upper left',
+                bbox_to_anchor=(-0.1, 1.),
+            )
+            if prefix:
+                plt.title(
+                    "Distribution of library events in {}".format(prefix),
+                    bbox={"facecolor": "1.0", "pad": 5},
+                )
+            plt.text(
+                0.3,
+                1.15,
+                "Threshold Uncuts = " + str(thr_uncut),
+                fontdict=None,
+            )
+            plt.text(
+                0.3,
+                1.05,
+                "Threshold Loops = " + str(thr_loop),
+                fontdict=None,
+            )
+
+            plt.text(
+                -1.5,
+                -1.2,
+                "Total number of reads = " + str(total),
+                fontdict=None,
+            )
+            plt.text(
+                -1.5,
+                -1.3,
+                "Ratio inter/(intra+inter) = " + str(ratio_inter) + "%",
+                fontdict=None,
+            )
+            percentage = round(
+                100
+                * float(lrange_inter + lrange_intra)
+                / (n_loops + n_uncuts + n_weirds + lrange_inter + lrange_intra)
+            )
+            plt.text(
+                -1.5,
+                -1.4,
+                "Selected reads = {0}%".format(percentage),
+                fontdict=None,
+            )
+            if fig_path:
+                plt.savefig(fig_path)
+            else:
+                plt.show()
+            plt.clf()
+        except Exception:
+            logger.error(
+                "Unable to show plots. Perhaps there is no Xserver running ?"
+                "(might be due to windows environment) skipping figure "
+                "generation."
+            )
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("-i", "--input")
+    parser.add_argument("-o", "--output")
+    parser.add_argument("-p", "--prefix")
+    parser.add_argument("-l", "--plot")
+    parser.add_argument("-d", '--dist_plot')
+    parser.add_argument("-q", "--pie_plot")
+    args = parser.parse_args()
+
+    pairs_idx = args.input
+    pairs_out = args.output
+    prefix = args.prefix
+    pairs_out = prefix+"_"+pairs_out
+    plot = args.plot
+    if plot.lower() == "false":
+        plot = False
+    elif plot.lower() == "true":
+        plot = True
+    dist_plot = args.dist_plot
+    if dist_plot == "None":
+        dist_plot = None
+    else:
+        dist_plot = prefix+"_"+dist_plot
+    pie_plot = args.pie_plot
+    if pie_plot == "None":
+        pie_plot = None
+    else:
+        pie_plot = prefix+"_"+pie_plot
+
+    uncut_thr, loop_thr = get_thresholds(
+        pairs_idx, plot_events=plot, fig_path=dist_plot, prefix=prefix
+        )
+
+    filter_events(
+        pairs_idx,
+        pairs_out,
+        uncut_thr,
+        loop_thr,
+        plot_events=plot,
+        fig_path=pie_plot,
+        prefix=prefix
+        )
+
diff --git a/bin/hicstuff_filter_pcr.py b/bin/hicstuff_filter_pcr.py
new file mode 100755
index 0000000000000000000000000000000000000000..40c45687d85de9e2e055776761df9152e2361971
--- /dev/null
+++ b/bin/hicstuff_filter_pcr.py
@@ -0,0 +1,78 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+import hicstuff_io as hio
+from hicstuff_log import logger
+import argparse
+import os, time, csv, sys, re
+
+def filter_pcr_dup(pairs_idx_file, filtered_file):
+    """
+    Filter out PCR duplicates from a coordinate-sorted pairs file using
+    overrrepresented exact coordinates. If multiple fragments have two reads
+    with the exact same coordinates, only one of those fragments is kept.
+    Parameters
+    ----------
+    pairs_idx_file : str
+        Path to an indexed pairs file containing the Hi-C reads.
+    filtered_file : str
+        Path to the output pairs file after removing duplicates.
+    """
+    # Keep count of how many reads are filtered
+    filter_count = 0
+    reads_count = 0
+    # Store header lines
+    header = hio.get_pairs_header(pairs_idx_file)
+    with open(pairs_idx_file, "r") as pairs, open(filtered_file, "w") as filtered:
+        # Copy header lines to filtered file
+        for head_line in header:
+            filtered.write(head_line + "\n")
+            next(pairs)
+
+        # Use csv methods to easily access columns
+        paircols = [
+            "readID",
+            "chr1",
+            "pos1",
+            "chr2",
+            "pos2",
+            "strand1",
+            "strand2",
+            "frag1",
+            "frag2",
+        ]
+        # Columns used for comparison of coordinates
+        coord_cols = [col for col in paircols if col != "readID"]
+        pair_reader = csv.DictReader(pairs, delimiter="\t", fieldnames=paircols)
+        filt_writer = csv.DictWriter(filtered, delimiter="\t", fieldnames=paircols)
+
+        # Initialise a variable to store coordinates of reads in previous pair
+        prev = {k: 0 for k in paircols}
+        for pair in pair_reader:
+            reads_count += 1
+            # If coordinates are the same as before, skip pair
+            if all(pair[pair_var] == prev[pair_var] for pair_var in coord_cols):
+                filter_count += 1
+                continue
+            # Else write pair and store new coordinates as previous
+            else:
+                filt_writer.writerow(pair)
+                prev = pair
+        logger.info(
+            "%d%% PCR duplicates have been filtered out (%d / %d pairs) "
+            % (100 * round(filter_count / reads_count, 3), filter_count, reads_count)
+        )
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("-i", "--input")
+    parser.add_argument("-p", "--prefix")
+    parser.add_argument("-o" ,"--output_file")
+    args = parser.parse_args()
+
+    input = args.input
+    prefix = args.prefix
+    output_file = prefix+"_"+args.output_file
+
+    filter_pcr_dup(input, output_file)
diff --git a/bin/hicstuff_fragments.py b/bin/hicstuff_fragments.py
new file mode 100755
index 0000000000000000000000000000000000000000..5594097f4a8177d32ab879b75d9c18c6f814ee2d
--- /dev/null
+++ b/bin/hicstuff_fragments.py
@@ -0,0 +1,60 @@
+#!/usr/bin/env python
+
+import hicstuff_digest as hcd
+import argparse
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("-e", "--enzyme")
+    parser.add_argument("-i", "--index")
+    parser.add_argument("-m", "--min_size")
+    parser.add_argument("-c", "--circular")
+    parser.add_argument("-o", "--output_contigs")
+    parser.add_argument("-f", "--output_frags")
+    parser.add_argument("-p", "--plot")
+    parser.add_argument("-g", "--fig_path")
+    args = parser.parse_args()
+
+    enzyme = args.enzyme
+    index = args.index
+    min_size = args.min_size
+    circular = args.circular
+    if circular.lower() == "false":
+        circular = False
+    elif circular.lower() == "true":
+        circular = True
+    output_contigs = args.output_contigs
+    output_frags = args.output_frags
+    plot = args.plot
+    if plot.lower() == "false":
+        plot = False
+    elif plot.lower() == "true":
+        plot = True
+    fig_path = args.fig_path
+
+
+    #hicstuff case sensitive enzymes adaptation
+    if enzyme == "hindiii":
+        enzyme = "HindIII"
+    elif enzyme == "dpnii":
+        enzyme = "DpnII"
+    elif enzyme == "bglii":
+        enzyme = "BglII"
+    elif enzyme == "mboi":
+        enzyme = "MboI"
+    elif enzyme == "arima":
+        enzyme = "Arima"
+
+
+    # Generate info_contigs and fragments_list output files
+    hcd.write_frag_info(
+        index,
+        enzyme,
+        min_size,
+        circular,
+        output_contigs,
+        output_frags
+    )
+
+    # Log fragment size distribution
+    hcd.frag_len(output_frags, plot=plot, fig_path=fig_path)
diff --git a/bin/hicstuff_io.py b/bin/hicstuff_io.py
new file mode 100644
index 0000000000000000000000000000000000000000..c8c4dde924c52d5e44661a3e528acf1cae53388e
--- /dev/null
+++ b/bin/hicstuff_io.py
@@ -0,0 +1,1379 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+import gzip
+import zipfile
+import bz2
+import io
+import os
+import functools
+import sys
+import numpy as np
+import pandas as pd
+import collections
+import subprocess as sp
+from scipy.sparse import tril, triu
+import pathlib
+import re
+from os.path import join, exists
+from random import getrandbits
+from scipy.sparse import coo_matrix
+from Bio import SeqIO, SeqUtils
+#import hicstuff.hicstuff as hcs
+#from hicstuff.log import logger
+#from hicstuff.version import __version__
+import scipy.stats as ss
+
+DEFAULT_MAX_MATRIX_SHAPE = 10000
+
+DEFAULT_FRAGMENTS_LIST_FILE_NAME = "fragments_list.txt"
+DEFAULT_INFO_CONTIGS_FILE_NAME = "info_contigs.txt"
+DEFAULT_SPARSE_MATRIX_FILE_NAME = "abs_fragments_contacts_weighted.txt"
+
+
+def _cols_to_sparse(sparse_array, shape=None, dtype=np.float64):
+    """
+    Make a coordinate based sparse matrix from columns.
+    Convert (3, n) shaped arrays to a sparse matrix. The first
+    one acts as the row coordinates, the second one as the
+    column coordinates, the third one as the data points
+    for each pair. If duplicates are found, the data points are
+    added.
+    Parameters
+    ----------
+    sparse_array : array_like
+        An array with exactly three columns representing the sparse
+        matrix data in coordinate format.
+    shape : tuple of int
+        The total number of rows and columns in the matrix. Will be estimated
+        from nonzero values if omitted.
+    dtype : type, optional
+        The type of data being loaded. Default is numpy.float64
+    Example
+    -------
+        >>> import numpy as np
+        >>> row, col = np.array([1, 2, 3]), np.array([3, 2, 1])
+        >>> data = np.array([4, 5, 6])
+        >>> M = np.array([row, col, data]).T
+        >>> S = _cols_to_sparse(M)
+        >>> print(S.todense())
+        [[0. 0. 0. 0.]
+         [0. 0. 0. 4.]
+         [0. 0. 5. 0.]
+         [0. 6. 0. 0.]]
+    """
+
+    row = sparse_array[:, 0]
+    col = sparse_array[:, 1]
+    data = sparse_array[:, 2]
+    if shape is None:
+        n = int(max(np.amax(row), np.amax(col))) + 1
+        shape = (n, n)
+
+    S = coo_matrix((data, (row, col)), shape=shape, dtype=dtype)
+    return S
+
+
+def load_sparse_matrix(mat_path, binning=1, dtype=np.float64):
+    """Load sparse matrix
+    Load a text file matrix into a sparse matrix object. The expected format is
+    a 3 column file where columns are row_number, col_number, value. The first
+    line consists of 3 values representing the total number of rows, columns
+    and nonzero values.
+    Parameters
+    ----------
+    mat_path : file, str or pathlib.Path
+        The input matrix file in instagraal format.
+    binning : int or "auto"
+        The binning to perform. If "auto", binning will
+        be automatically inferred so that the matrix size
+        will not go beyond (10000, 10000) in shape. That
+        can be changed by modifying the DEFAULT_MAX_MATRIX_SHAPE
+        value. Default is 1, i.e. no binning is performed
+    dtype : type, optional
+        The type of data being loaded. Default is numpy.float64
+    Returns
+    -------
+    sparse_mat : scipy.sparse.coo_matrix
+        The output (sparse) matrix in COOrdinate format.
+    Examples
+    --------
+    >>> S = load_sparse_matrix('test_data/mat_5kb.tsv', binning=1)
+    >>> S.data[:10]
+    array([84.,  2.,  3.,  2.,  1.,  1., 50.,  1., 66.,  1.])
+    >>> S.shape
+    (16, 16)
+    """
+    try:
+        cols_arr = np.loadtxt(mat_path, delimiter="\t", dtype=dtype)
+        shape = (int(cols_arr[0, 0]), int(cols_arr[0, 1]))
+    except ValueError:
+        cols_arr = np.loadtxt(
+            mat_path, delimiter="\t", dtype=dtype, skiprows=1
+        )
+        shape = None
+
+    # Get values into an array without the header. Use the header to give size.
+    sparse_mat = _cols_to_sparse(cols_arr[1:, :], shape=shape, dtype=dtype)
+
+    if binning == "auto":
+        num_bins = max(sparse_mat.shape) + 1
+        subsampling_factor = num_bins // DEFAULT_MAX_MATRIX_SHAPE
+    else:
+        subsampling_factor = binning
+    sparse_mat = hcs.bin_sparse(
+        sparse_mat, subsampling_factor=subsampling_factor
+    )
+    return sparse_mat
+
+
+def save_sparse_matrix(s_mat, path):
+    """Save a sparse matrix
+    Saves a sparse matrix object into tsv format.
+    Parameters
+    ----------
+    s_mat : scipy.sparse.coo_matrix
+        The sparse matrix to save on disk
+    path : str
+        File path where the matrix will be stored
+    """
+    if s_mat.format != "coo":
+        ValueError("Sparse matrix must be in coo format")
+    dtype = s_mat.dtype
+    fmt = "%i" if dtype == int else "%.10e"
+    sparse_arr = np.vstack([s_mat.row, s_mat.col, s_mat.data]).T
+
+    np.savetxt(
+        path,
+        sparse_arr,
+        header="{nrows}\t{ncols}\t{nonzero}".format(
+            nrows=s_mat.shape[0], ncols=s_mat.shape[1], nonzero=s_mat.nnz
+        ),
+        comments="",
+        fmt=fmt,
+        delimiter="\t",
+    )
+
+
+def load_pos_col(path, colnum, header=1, dtype=np.int64):
+    """
+    Loads a single column of a TSV file with header into a numpy array.
+
+    Parameters
+    ----------
+    path : str
+        The path of the TSV file to load.
+    colnum : int
+        The 0-based index of the column to load.
+    header : int
+        Number of line to skip. By default the header is a single line.
+    Returns
+    -------
+    numpy.array :
+        A 1D numpy array with the
+    Examples
+    --------
+    >>> load_pos_col('test_data/mat_5kb.tsv', 0)[:10]
+    array([0, 0, 0, 0, 0, 0, 1, 1, 2, 2])
+    """
+    pos_arr = np.genfromtxt(
+        path,
+        delimiter="\t",
+        usecols=(colnum,),
+        skip_header=header,
+        dtype=dtype,
+    )
+    return pos_arr
+
+
+def generate_temp_dir(path):
+    """Temporary directory generation
+    Generates a temporary file with a random name at the input path.
+
+    Parameters
+    ----------
+    path : str
+        The path at which the temporary directory will be created.
+
+    Returns
+    -------
+    str
+        The path of the newly created temporary directory.
+    """
+    exist = True
+    while exist:
+        # Keep trying random directory names if they already exist
+        directory = str(hex(getrandbits(32)))[2:]
+        full_path = join(path, directory)
+        if not exists(full_path):
+            exist = False
+    try:
+        os.makedirs(full_path)
+    except PermissionError:
+        raise PermissionError(
+            "The temporary directory cannot be created in {}. "
+            "Make sure you have write permission.".format(path)
+        )
+    return full_path
+
+
+def _check_cooler(fun):
+    """Decorates function `fun` to check if cooler is available.."""
+
+    @functools.wraps(fun)
+    def wrapped(*args, **kwargs):
+        try:
+            import cooler
+
+            fun.__globals__["cooler"] = cooler
+        except ImportError:
+            logger.error(
+                "The cooler package is required to use {0}, please install it first".format(
+                    fun.__name__
+                )
+            )
+            raise ImportError("The cooler package is required.")
+        return fun(*args, **kwargs)
+
+    return wrapped
+
+
+@_check_cooler
+def add_cool_column(
+    clr, column, column_name, table_name="bins", metadata={}, dtype=None
+):
+    """
+    Adds a new column to a loaded Cooler store. If the column exists,
+    it is replaced. This will affect the .cool file.
+
+    Parameters
+    ----------
+    clr : Cooler object
+        A Cooler store.
+    column : pandas Series
+        The column to add to the cooler.
+    column_name : str
+        The name of the column to add.
+    table_name : str
+        The name of the table to which the column should be added.
+        Defaults to the "bins" table.
+    metadata : dict
+        A dictionary of metadata to associate with the new column.
+    """
+    with clr.open("r+") as c:
+        if column_name in c[table_name]:
+            del c[table_name][column_name]
+        h5opts = dict(compression="gzip", compression_opts=6)
+        c[table_name].create_dataset(
+            column_name, data=column, dtype=dtype, **h5opts
+        )
+        c[table_name][column_name].attrs.update(metadata)
+
+
+@_check_cooler
+def load_cool(cool):
+    """
+    Reads a cool file into memory and parses it into graal style tables.
+
+    Parameters
+    ----------
+    cool : str
+        Path to the input .cool file.
+    Returns
+    -------
+    mat : scipy coo_matrix
+        Hi-C contact map in COO format.
+    frags : pandas DataFrame
+        Table off bins matching the matrix. Corresponds to the content of the fragments_list.txt file.
+    chroms : pandas DataFrame
+        Table of chromosome informations.
+    """
+    c = cooler.Cooler(cool)  # pylint: disable=undefined-variable
+    frags = c.bins()[:]
+    chroms = c.chroms()[:]
+    mat = c.pixels()[:]
+    frags.rename(
+        columns={"start": "start_pos", "end": "end_pos"}, inplace=True
+    )
+    frags["id"] = frags.groupby("chrom", sort=False).cumcount() + 1
+    # Try loading hicstuff-specific columns
+    try:
+        frags = frags[
+            ["id", "chrom", "start_pos", "end_pos", "size", "gc_content"]
+        ]
+    # If absent, only load standard columns
+    except KeyError:
+        frags = frags[["id", "chrom", "start_pos", "end_pos"]]
+
+    chroms["cumul_length"] = (
+        chroms.length.shift(1).fillna(0).cumsum().astype(int)
+    )
+    n_frags = c.bins()[:].groupby("chrom", sort=False).count().start
+    chroms["n_frags"] = chroms.merge(
+        n_frags, right_index=True, left_on="name", how="left"
+    ).start
+    chroms.rename(columns={"name": "contig"}, inplace=True)
+    n = int(max(np.amax(mat.bin1_id), np.amax(mat.bin2_id))) + 1
+    shape = (n, n)
+    mat = coo_matrix((mat["count"], (mat.bin1_id, mat.bin2_id)), shape=shape)
+
+    return mat, frags, chroms
+
+
+@_check_cooler
+def save_cool(cool_out, mat, frags, metadata={}):
+    """
+    Writes a .cool file from graal style tables.
+
+    Parameters
+    ----------
+    cool_out : str
+        Path to the output cool file.
+    mat : scipy coo_matrix
+        The Hi-C contact matrix in sparse COO format.
+    frags : pandas DataFrame
+        The graal style 'fragments_list' table.
+    metadata : dict
+        Potential metadata to associate with the cool file.
+    """
+    up_tri = False
+    # Check if symmetric matrix is symmetric
+    # (i.e. only upper triangle or full mat)
+    if (abs(mat - mat.T) > 1e-10).nnz != 0:
+        up_tri = True
+    # Drop useless column
+    try:
+        bins = frags.drop("id", axis=1)
+    except KeyError:
+        bins = frags
+    # Get column names right
+    bins.rename(
+        columns={"seq": "chrom", "start_pos": "start", "end_pos": "end"},
+        inplace=True,
+    )
+    mat_dict = {"bin1_id": mat.row, "bin2_id": mat.col, "count": mat.data}
+    pixels = pd.DataFrame(mat_dict)
+    cooler.create_cooler(  # pylint: disable=undefined-variable
+        cool_out,
+        bins,
+        pixels,
+        metadata=metadata,
+        symmetric_upper=up_tri,
+        triucheck=False,
+    )
+
+
+def read_compressed(filename, mode='r'):
+    """Read compressed file
+    Opens the file in read mode with appropriate decompression algorithm.
+    Parameters
+    ----------
+    filename : str
+        The path to the input file
+    Returns
+    -------
+    file-like object
+        The handle to access the input file's content
+    """
+
+    # Standard header bytes for diff compression formats
+    comp_bytes = {
+        b"\x1f\x8b\x08": "gz",
+        b"\x42\x5a\x68": "bz2",
+        b"\x50\x4b\x03\x04": "zip",
+    }
+
+    max_len = max(len(x) for x in comp_bytes)
+
+    def file_type(filename):
+        """Guess file type
+        Compare header bytes with those in the file and return type.
+        """
+        with open(filename, "rb") as f:
+            file_start = f.read(max_len)
+        for magic, filetype in comp_bytes.items():
+            if file_start.startswith(magic):
+                return filetype
+        return "uncompressed"
+
+    # Open file with appropriate function
+    mode_map = {'r': 'rt', 'rb': 'rb'}
+    comp = file_type(filename)
+    if comp == "gz":
+        return gzip.open(filename, mode_map[mode])
+    elif comp == "bz2":
+        return bz2.open(filename, mode_map[mode])
+    elif comp == "zip":
+        zip_arch = zipfile.ZipFile(filename, mode)
+        if len(zip_arch.namelist()) > 1:
+            raise IOError(
+                "Only a single fastq file must be in the zip archive."
+            )
+        else:
+            # ZipFile opens as bytes by default, using io to read as text
+            zip_content = zip_arch.open(zip_arch.namelist()[0], mode)
+            return io.TextIOWrapper(zip_content, encoding="utf-8")
+    else:
+        return open(filename, mode)
+
+
+def is_compressed(filename):
+    """Check compression status
+    Check if the input file is compressed from the first bytes.
+    Parameters
+    ----------
+    filename : str
+        The path to the input file
+    Returns
+    -------
+    bool
+        True if the file is compressed, False otherwise.
+    """
+
+    # Standard header bytes for diff compression formats
+    comp_bytes = {
+        b"\x1f\x8b\x08": "gz",
+        b"\x42\x5a\x68": "bz2",
+        b"\x50\x4b\x03\x04": "zip",
+    }
+    max_len = max(len(x) for x in comp_bytes)
+    with open(filename, "rb") as f:
+        file_start = f.read(max_len)
+    for magic, _ in comp_bytes.items():
+        if file_start.startswith(magic):
+            return True
+    return False
+
+
+def from_dade_matrix(filename, header=False):
+    """Load a DADE matrix
+    Load a numpy array from a DADE matrix file, optionally
+    returning bin information from the header. Header data
+    processing is delegated downstream.
+    Parameters
+    ----------
+    filename : str, file or pathlib.Path
+        The name of the file containing the DADE matrix.
+    header : bool
+        Whether to return as well information contained
+        in the header. Default is False.
+    Example
+    -------
+        >>> import numpy as np
+        >>> import tempfile
+        >>> lines = [['RST', 'chr1~0', 'chr1~10', 'chr2~0', 'chr2~30'],
+        ...          ['chr1~0', '5'],
+        ...          ['chr1~10', '8', '3'],
+        ...          ['chr2~0', '3', '5', '5'],
+        ...          ['chr2~30', '5', '10', '11', '2']
+        ...          ]
+        >>> formatted = ["\\t".join(l) + "\\n" for l in lines ]
+        >>> dade = tempfile.NamedTemporaryFile(mode='w')
+        >>> for fm in formatted:
+        ...     dade.write(fm)
+        34
+        9
+        12
+        13
+        18
+        >>> dade.flush()
+        >>> M, h = from_dade_matrix(dade.name, header=True)
+        >>> dade.close()
+        >>> print(M)
+        [[ 5.  8.  3.  5.]
+         [ 8.  3.  5. 10.]
+         [ 3.  5.  5. 11.]
+         [ 5. 10. 11.  2.]]
+        >>> print(h)
+        ['chr1~0', 'chr1~10', 'chr2~0', 'chr2~30']
+    See https://github.com/scovit/DADE for more details about Dade.
+    """
+
+    A = pd.read_csv(filename, sep="\t", header=None)
+    A.fillna("0", inplace=True)
+    M, headers = np.array(A.iloc[1:, 1:], dtype=np.float64), A.iloc[0, :]
+    matrix = M + M.T - np.diag(np.diag(M))
+    if header:
+        return matrix, headers.tolist()[1:]
+    else:
+        return matrix
+
+
+def to_dade_matrix(M, annotations="", filename=None):
+    """Returns a Dade matrix from input numpy matrix. Any annotations are added
+    as header. If filename is provided and valid, said matrix is also saved
+    as text.
+    """
+
+    n, m = M.shape
+    A = np.zeros((n + 1, m + 1))
+    A[1:, 1:] = M
+    if not annotations:
+        annotations = np.array(["" for _ in n], dtype=str)
+    A[0, :] = annotations
+    A[:, 0] = annotations.T
+    if filename:
+        try:
+            np.savetxt(filename, A, fmt="%i")
+            logger.info(
+                "I saved input matrix in dade format as {0}".format(
+                    str(filename)
+                )
+            )
+        except ValueError as e:
+            logger.warning("I couldn't save input matrix.")
+            logger.warning(str(e))
+
+    return A
+
+
+def load_into_redis(filename):
+    """Load a file into redis
+    Load a matrix file and sotre it in memory with redis.
+    Useful to pass around huge datasets from scripts to
+    scripts and load them only once.
+    Inspired from https://gist.github.com/alexland/ce02d6ae5c8b63413843
+    Parameters
+    ----------
+    filename : str, file or pathlib.Path
+        The file of the matrix to load.
+    Returns
+    -------
+    key : str
+        The key of the dataset needed to retrieve it from redis.
+    """
+    try:
+        from redis import StrictRedis as redis
+        import time
+    except ImportError:
+        print(
+            "Error! Redis does not appear to be installed in your system.",
+            file=sys.stderr,
+        )
+        exit(1)
+
+    M = np.genfromtxt(filename, dtype=None)
+    array_dtype = str(M.dtype)
+    m, n = M.shape
+    M = M.ravel().tostring()
+    database = redis(host="localhost", port=6379, db=0)
+    key = "{0}|{1}#{2}#{3}".format(int(time.time()), array_dtype, m, n)
+
+    database.set(key, M)
+
+    return key
+
+
+def load_from_redis(key):
+    """Retrieve a dataset from redis
+    Retrieve a cached dataset that was stored in redis
+    with the input key.
+    Parameters
+    ----------
+    key : str
+        The key of the dataset that was stored in redis.
+    Returns
+    -------
+    M : numpy.ndarray
+        The retrieved dataset in array format.
+    """
+
+    try:
+        from redis import StrictRedis as redis
+    except ImportError:
+        print(
+            "Error! Redis does not appear to be installed in your system.",
+            file=sys.stderr,
+        )
+        exit(1)
+
+    database = redis(host="localhost", port=6379, db=0)
+
+    try:
+        M = database.get(key)
+    except KeyError:
+        print(
+            "Error! No dataset was found with the supplied key.",
+            file=sys.stderr,
+        )
+        exit(1)
+
+    array_dtype, n, m = key.split("|")[1].split("#")
+
+    M = np.fromstring(M, dtype=array_dtype).reshape(int(n), int(m))
+    return M
+
+
+def dade_to_graal(
+    filename,
+    output_matrix=DEFAULT_SPARSE_MATRIX_FILE_NAME,
+    output_contigs=DEFAULT_INFO_CONTIGS_FILE_NAME,
+    output_frags=DEFAULT_SPARSE_MATRIX_FILE_NAME,
+    output_dir=None,
+):
+    """Convert a matrix from DADE format (https://github.com/scovit/dade)
+    to a graal-compatible format. Since DADE matrices contain both fragment
+    and contact information all files are generated at the same time.
+    """
+
+    with open(output_matrix, "w") as sparse_file:
+        sparse_file.write("id_frag_a\tid_frag_b\tn_contact")
+        with open(filename) as file_handle:
+            first_line = file_handle.readline()
+            for row_index, line in enumerate(file_handle):
+                dense_row = np.array(line.split("\t")[1:], dtype=np.int32)
+                for col_index in np.nonzero(dense_row)[0]:
+                    line_to_write = "{}\t{}\t{}\n".format(
+                        row_index, col_index, dense_row[col_index]
+                    )
+                    sparse_file.write(line_to_write)
+
+    header = first_line.split("\t")
+    bin_type = header[0]
+    if bin_type == '"RST"':
+        logger.info("I detected fragment-wise binning")
+    elif bin_type == '"BIN"':
+        logger.info("I detected fixed size binning")
+    else:
+        logger.warning(
+            (
+                "Sorry, I don't understand this matrix's "
+                "binning: I read {}".format(str(bin_type))
+            )
+        )
+
+    header_data = [
+        header_elt.replace("'", "")
+        .replace('"', "")
+        .replace("\n", "")
+        .split("~")
+        for header_elt in header[1:]
+    ]
+
+    (
+        global_frag_ids,
+        contig_names,
+        local_frag_ids,
+        frag_starts,
+        frag_ends,
+    ) = np.array(list(zip(*header_data)))
+
+    frag_starts = frag_starts.astype(np.int32) - 1
+    frag_ends = frag_ends.astype(np.int32) - 1
+    frag_lengths = frag_ends - frag_starts
+
+    total_length = len(global_frag_ids)
+
+    with open(output_contigs, "w") as info_contigs:
+
+        info_contigs.write("contig\tlength\tn_frags\tcumul_length\n")
+
+        cumul_length = 0
+
+        for contig in collections.OrderedDict.fromkeys(contig_names):
+
+            length_tig = np.sum(frag_lengths[contig_names == contig])
+            n_frags = collections.Counter(contig_names)[contig]
+            line_to_write = "%s\t%s\t%s\t%s\n" % (
+                contig,
+                length_tig,
+                n_frags,
+                cumul_length,
+            )
+            info_contigs.write(line_to_write)
+            cumul_length += n_frags
+
+    with open(output_frags, "w") as fragments_list:
+
+        fragments_list.write(
+            "id\tchrom\tstart_pos\tend_pos" "\tsize\tgc_content\n"
+        )
+        bogus_gc = 0.5
+
+        for i in range(total_length):
+            line_to_write = "%s\t%s\t%s\t%s\t%s\t%s\n" % (
+                int(local_frag_ids[i]) + 1,
+                contig_names[i],
+                frag_starts[i],
+                frag_ends[i],
+                frag_lengths[i],
+                bogus_gc,
+            )
+            fragments_list.write(line_to_write)
+
+
+def load_bedgraph2d(filename, bin_size=None, fragments_file=None):
+    """
+    Loads matrix and fragment information from a 2D bedgraph file. Note this
+    function assumes chromosomes are ordered in alphabetical. order
+
+    Parameters
+    ----------
+    filename : str
+        Path to the bedgraph2D file.
+    bin_size : int
+        The size of bins in the case of fixed bin size.
+    fragments_file : str
+        Path to a fragments file to explicitely provide fragments positions.
+        If the matrix does not have fixed bin size, this prevents errors.
+
+    Returns
+    -------
+    mat : scipy.sparse.coo_matrix
+        The Hi-C contact map as the upper triangle of a symetric matrix, in
+        sparse format.
+    frags : pandas.DataFrame
+        The list of fragments/bin present in the matrix with their genomic
+        positions.
+    """
+    bed2d = pd.read_csv(filename, sep="\t", header=None)
+    chrom_sizes = {}
+    if bin_size is not None:
+        # If bin size if provided, retrieve chromosome lengths, this will be
+        # used when regenerating bin coordinates
+        chroms_left = bed2d[[3, 5]]
+        chroms_left.columns = [0, 2]
+        chroms = (
+            pd.concat([bed2d[[0, 2]], chroms_left])
+            .groupby([0], sort=False)
+            .max()
+        )
+        for chrom, size in zip(chroms.index, np.array(chroms)):
+            chrom_sizes[chrom] = size[0]
+    elif fragments_file is None:
+        logger.warning(
+            "Please be aware that not all information can be restored from a "
+            "bg2 file without fixed bin size; fragments without any contact "
+            "will be lost"
+        )
+    # Get all possible fragment chrom-positions into an array
+    frag_pos = np.vstack(
+        [np.array(bed2d[[0, 1, 2]]), np.array(bed2d[[3, 4, 5]])]
+    )
+    # Sort by position (least important, col 1)
+    frag_pos = frag_pos[frag_pos[:, 1].argsort(kind="mergesort")]
+    # Then by chrom (most important, col 0)
+    frag_pos = frag_pos[frag_pos[:, 0].argsort(kind="mergesort")]
+    # Get unique names for fragments (chrom+pos)
+    ordered_frag_pos = (
+        pd.DataFrame(frag_pos).drop_duplicates().reset_index(drop=True)
+    )
+    frag_pos_a = bed2d[[0, 1]].apply(lambda x: tuple(x), axis=1)
+    frag_pos_b = bed2d[[3, 4]].apply(lambda x: tuple(x), axis=1)
+    # If fragments file is provided, use fragments positions to indices mapping
+    if fragments_file is not None:
+        frags = pd.read_csv(fragments_file, delimiter="\t")
+        frag_map = frags.apply(lambda x: (str(x.chrom), x.start_pos), axis=1)
+        frag_map = {f_name: f_idx for f_idx, f_name in enumerate(frag_map)}
+    # If fixed fragment size available, use it to reconstruct original
+    # fragments ID (even if they are absent from the bedgraph file).
+    elif bin_size is not None:
+        frag_map = {}
+        chrom_frags = []
+        for chrom, size in chrom_sizes.items():
+            prev_frags = len(frag_map)
+            for bin_id, bin_pos in enumerate(range(0, size, bin_size)):
+                frag_map[(chrom, bin_pos)] = bin_id + prev_frags
+            n_bins = size // bin_size
+            chrom_frags.append(
+                pd.DataFrame(
+                    {
+                        "id": range(1, n_bins + 1),
+                        "chrom": np.repeat(chrom, n_bins),
+                        "start_pos": range(0, size, bin_size),
+                        "end_pos": range(bin_size, size + bin_size, bin_size),
+                    }
+                )
+            )
+        frags = pd.concat(chrom_frags, axis=0).reset_index(drop=True)
+        frags.insert(
+            loc=3, column="size", value=frags.end_pos - frags.start_pos
+        )
+    # If None available, guess fragments indices from bedgraph (potentially wrong)
+    else:
+        frag_map = {
+            (v[0], v[1]): i
+            for i, v in ordered_frag_pos.iloc[:, [0, 1]].iterrows()
+        }
+        frags = ordered_frag_pos.copy()
+        frags[3] = frags.iloc[:, 2] - frags.iloc[:, 1]
+        frags.insert(loc=0, column="id", value=0)
+        frags.id = frags.groupby([0], sort=False).cumcount() + 1
+        frags.columns = ["id", "chrom", "start_pos", "end_pos", "size"]
+    # Match bin indices to their names
+    frag_id_a = np.array(list(map(lambda x: frag_map[x], frag_pos_a)))
+    frag_id_b = np.array(list(map(lambda x: frag_map[x], frag_pos_b)))
+    contacts = np.array(bed2d.iloc[:, 6].tolist())
+    # Use index to build matrix
+    n_frags = len(frag_map.keys())
+    mat = coo_matrix(
+        (contacts, (frag_id_a, frag_id_b)), shape=(n_frags, n_frags)
+    )
+
+    # Get size of each chromosome in basepairs
+    chromsizes = frags.groupby("chrom", sort=False).apply(
+        lambda x: np.int64(max(x.end_pos))
+    )
+    chrom_bins = frags.groupby("chrom", sort=False).size()
+    # Shift chromsizes by one to get starting bin, first one is zero
+    # Make chromsize cumulative to get start bin of each chrom
+    # Get chroms into a 1D array of bin starts
+    chrom_start = chrom_bins.shift(1, fill_value=0).cumsum()
+    chroms = pd.DataFrame(
+        {
+            "contig": chromsizes.index,
+            "length": chromsizes.values,
+            "n_frags": chrom_bins,
+            "cumul_length": chrom_start,
+        }
+    )
+    return mat, frags, chroms
+
+
+def flexible_hic_loader(
+    mat, fragments_file=None, chroms_file=None, quiet=False
+):
+    """
+    Wrapper function to load COO, bg2 or cool input and return the same output.
+    COO formats requires fragments_file and chroms_file options. bg2 format can
+    infer bin_size if fixed. When providing a bg2 matrix with uneven fragments
+    length, one should provide fragments_file as well or empty bins will be
+    truncated from the output.
+
+    Parameters
+    ----------
+    mat : str
+        Path to the matrix in graal, bedgraph2 or cool format.
+    fragments_file : str or None
+        Path to the file with fragments information (fragments_list.txt).
+        Only required if the matrix is in graal format.
+    chroms_file : str or None
+        Path to the file with chromosome information (info_contigs.txt). Only required
+        if the matrix is in graal format.
+    quiet : bool
+        If True, will silence warnings for empty outputs.
+    Returns
+    -------
+    mat : scipy.sparse.coo_matrix
+        Sparse upper triangle Hi-C matrix.
+    frags : pandas.DataFrame or None
+        Table of fragment informations. None if information was not provided.
+    chroms : pandas.DataFrame or None
+        Table of chromosomes/contig information. None if information was not provided.
+    """
+    hic_format = get_hic_format(mat)
+    # Load cool based on file extension
+    if hic_format == "cool":
+        mat, frags, chroms = load_cool(mat)
+    # Use the first line to determine COO / bg2 format
+    if hic_format == "bg2":
+        # Use the frags file to define bins if available
+        if fragments_file is not None:
+            mat, frags, chroms = load_bedgraph2d(
+                mat, fragments_file=fragments_file
+            )
+        else:
+            # Guess if bin size is fixed based on MAD
+            bg2 = pd.read_csv(mat, sep="\t")
+            sizes = np.array(bg2.iloc[:, 2] - bg2.iloc[:, 1])
+            size_mad = ss.median_abs_deviation(sizes, scale='normal')
+            # Use only the bg2
+            if size_mad > 0:
+                mat, frags, chroms = load_bedgraph2d(mat)
+                logger.warning(
+                    "Input is a bedgraph2d file with uneven bin size, "
+                    "but no fragments_file was provided. Empty bins will "
+                    "be missing from the output. To avoid this, provide a "
+                    "fragments file."
+                )
+            # Use fixed bin size
+            else:
+                mat, frags, chroms = load_bedgraph2d(
+                    mat, bin_size=int(np.median(sizes))
+                )
+
+    elif hic_format == "graal":
+        mat = load_sparse_matrix(mat)
+        try:
+            frags = pd.read_csv(fragments_file, sep="\t")
+        except ValueError:
+            if not quiet:
+                logger.warning(
+                    "fragments_file was not provided when "
+                    "loading a matrix in COO/graal format. frags will be None."
+                )
+            frags = None
+        try:
+            chroms = pd.read_csv(chroms_file, sep="\t")
+        except ValueError:
+            if not quiet:
+                logger.warning(
+                    "chroms_file was not provided when "
+                    "loading a matrix in COO/graal format. chroms will be None."
+                )
+
+            chroms = None
+
+    # Ensure the matrix is upper triangle symmetric
+    if mat.shape[0] == mat.shape[1]:
+        if (abs(mat - mat.T) > 1e-10).nnz > 0:
+            mat = mat + tril(mat, k=-1).T
+        mat = triu(mat, format="coo")
+
+    return mat, frags, chroms
+
+
+def get_hic_format(mat):
+    """Returns the format of the input Hi-C matrix
+
+    Parameters
+    ----------
+    mat : str
+        Path to the input Hi-C matrix
+    Returns
+    -------
+    str :
+        Hi-C format string. One of graal, bg2, cool
+    """
+    if (
+        mat.endswith(".cool")
+        or mat.count(".mcool::/") == 1
+        or mat.count(".cool::/") == 1
+    ):
+        hic_format = "cool"
+    else:
+        # Use the first line to determine COO / bg2 format
+        ncols = len(open(mat).readline().split("\t"))
+        if ncols == 7:
+            hic_format = "bg2"
+        elif ncols == 3:
+            hic_format = "graal"
+        else:
+            raise ValueError("Unkown file format")
+    return hic_format
+
+
+def flexible_hic_saver(
+    mat, out_prefix, frags=None, chroms=None, hic_fmt="graal", quiet=False,
+):
+    """
+    Saves objects to the desired Hi-C file format.
+    Parameters
+    ----------
+    mat : scipy.sparse.coo_matrix
+        Hi-C contact map.
+    out_prefix : str
+        Output path without extension (the extension is added based on hic_fmt).
+    frags : pandas.DataFrame or None
+        Table of fragments informations.
+    chroms : pandas.DataFrame or None
+        Table of chromosomes / contigs informations.
+    hic_fmt : str
+        Output format. Can be one of graal for graal-compatible COO format, bg2 for
+        2D bedgraph format, or cool for cooler compatible format.
+    """
+    if hic_fmt == "graal":
+        save_sparse_matrix(mat, out_prefix + ".mat.tsv")
+        try:
+            frags.to_csv(out_prefix + ".frags.tsv", sep="\t", index=False)
+        except AttributeError:
+            if not quiet:
+                logger.warning(
+                    "Could not create fragments_list.txt from input files"
+                )
+        try:
+            chroms.to_csv(out_prefix + ".chr.tsv", sep="\t", index=False)
+        except AttributeError:
+            if not quiet:
+                logger.warning(
+                    "Could not create info_contigs.txt from input files"
+                )
+    elif hic_fmt == "cool":
+        frag_sizes = frags.end_pos - frags.start_pos
+        size_mad = np.median(frag_sizes - np.median(frag_sizes))
+        bin_type = "variable" if size_mad else "fixed"
+        try:
+            save_cool(
+                out_prefix + ".cool",
+                mat,
+                frags,
+                metadata={"hicstuff": __version__, "bin-type": bin_type},
+            )
+        except NameError:
+            NameError("frags is required to save a cool file")
+    elif hic_fmt == "bg2":
+        try:
+            save_bedgraph2d(mat, frags, out_prefix + ".bg2")
+        except NameError:
+            NameError("frags is required to save a bg2 file")
+    else:
+        raise ValueError("Unknown output format: {0}".format(hic_fmt))
+
+
+def save_bedgraph2d(mat, frags, out_path):
+    """
+    Given a sparse matrix and a corresponding list of fragments, save a file
+    in 2D bedgraph format.
+    Parameters
+    ----------
+    mat : scipy.sparse.coo_matrix
+        The sparse contact map.
+    frags : pandas.DataFrame
+        A structure containing the annotations for each matrix bin. Should
+        correspond to the content of the fragments_list.txt file.
+    """
+
+    mat_df = pd.DataFrame(
+        {"row": mat.row, "col": mat.col, "data": mat.data.astype(int)}
+    )
+    # Merge fragments with matrix based on row indices to annotate rows
+    merge_mat = mat_df.merge(
+        frags, left_on="row", right_index=True, how="left", suffixes=("", "")
+    )
+    # Rename annotations to assign to frag1
+    merge_mat.rename(
+        columns={"chrom": "chr1", "start_pos": "start1", "end_pos": "end1"},
+        inplace=True,
+    )
+    # Do the same operation for cols (frag2)
+    merge_mat = merge_mat.merge(
+        frags,
+        left_on="col",
+        right_index=True,
+        how="left",
+        suffixes=("_0", "_2"),
+    )
+    merge_mat.rename(
+        columns={"chrom": "chr2", "start_pos": "start2", "end_pos": "end2"},
+        inplace=True,
+    )
+    # Select only relevant columns in correct order
+    bg2 = merge_mat.loc[
+        :, ["chr1", "start1", "end1", "chr2", "start2", "end2", "data"]
+    ]
+    bg2.to_csv(out_path, header=None, index=False, sep="\t")
+
+
+def get_pos_cols(df):
+    """Get column names representing chromosome, start and end column
+    from a dataframe. Allows flexible names.
+    Parameters
+    ----------
+    df : pd.DataFrame
+    Returns
+    -------
+    tuple of str:
+        Tuple containing the names of the chromosome, start and end
+        columns in the input dataframe.
+    Examples
+    --------
+    >>> import pandas as pd
+    >>> d = [1, 2, 3]
+    >>> df = pd.DataFrame(
+    ...     {'Chromosome': d, 'Start': d, 'End': d, 'species': d}
+    ... )
+    >>> get_pos_cols(df)
+    ('Chromosome', 'Start', 'End')
+    >>> df = pd.DataFrame(
+    ...     {'id': d, 'chr': d, 'start_bp': d, 'end_bp': d}
+    ... )
+    >>> get_pos_cols(df)
+    ('chr', 'start_bp', 'end_bp')
+    """
+
+    # Get case insensitive column names
+    cnames = df.columns
+    inames = cnames.str.lower()
+
+    def _col_getter(cols, pat):
+        """Helper to get column index from the start of its name"""
+        mask = cols.str.startswith(pat)
+        idx = np.flatnonzero(mask)[0]
+        return idx
+
+    chrom_col = cnames[_col_getter(inames, "chr")]
+    start_col = cnames[_col_getter(inames, "start")]
+    end_col = cnames[_col_getter(inames, "end")]
+    return chrom_col, start_col, end_col
+
+
+def gc_bins(genome_path, frags):
+    """Generate GC content annotation for bins using input genome.
+    Parameters
+    ----------
+    genome_path : str
+        Path the the genome file in FASTA format.
+    frags : pandas.DataFrame
+        Table containing bin segmentation of the genome.
+        Required columns: chrom, start, end.
+    Returns
+    -------
+    numpy.ndarray of floats:
+        GC content per bin, in the range [0.0, 1.0].
+    """
+    # Grab columns by name (catch start, Start, star_pos, etc)
+    chrom_col, start_col, end_col = get_pos_cols(frags)
+    # Fill the gc array by chromosome
+    gc_bins = np.zeros(frags.shape[0], dtype=float)
+    for rec in SeqIO.parse(genome_path, "fasta"):
+        mask = frags[chrom_col] == rec.id
+        # Define coordinates of each bin
+        starts = frags.loc[mask, start_col]
+        ends = frags.loc[mask, end_col]
+        # Slice chromosome sequence based on bins
+        seqs = [str(rec.seq)[s:e] for s, e in zip(starts, ends)]
+        # Fill GC values for bins in the chromosome
+        idx = np.flatnonzero(mask)
+        gc_bins[idx] = np.array(list(map(SeqUtils.GC, seqs))) / 100.0
+
+    return gc_bins
+
+
+def sort_pairs(in_file, out_file, keys, tmp_dir=None, threads=1, buffer="2G"):
+    """
+    Sort a pairs file in batches using UNIX sort.
+    Parameters
+    ----------
+    in_file : str
+        Path to the unsorted input file
+    out_file : str
+        Path to the sorted output file.
+    keys : list of str
+        list of columns to use as sort keys. Each column can be one of readID,
+        chr1, pos1, chr2, pos2, frag1, frag2. Key priorities are according to
+        the order in the list.
+    tmp_dir : str
+        Path to the directory where temporary files will be created. Defaults
+        to current directory.
+    threads : int
+        Number of parallel sorting threads.
+    buffer : str
+        Buffer size used for sorting. Consists of a number and a unit.
+    """
+    # TODO: Write a pure python implementation to drop GNU coreutils depencency,
+    # could be inspired from: https://stackoverflow.com/q/14465154/8440675
+
+    # Check if UNIX sort version supports parallelism
+    parallel_ok = True
+    sort_ver = sp.Popen(["sort", "--version"], stdout=sp.PIPE)
+    sort_ver = (
+        sort_ver.communicate()[0]
+        .decode()
+        .split("\n")[0]
+        .split(" ")[-1]
+        .split(".")
+    )
+    # If so, specify threads, otherwise don't mention it in the command line
+    try:
+        sort_ver = list(map(int, sort_ver))
+        if sort_ver[0] < 8 or (sort_ver[0] == 8 and sort_ver[1] < 23):
+            logger.warning(
+                "GNU sort version is {0} but >8.23 is required for parallel "
+                "sort. Sorting on a single thread.".format(
+                    ".".join(map(str, sort_ver))
+                )
+            )
+            parallel_ok = False
+    # BSD sort has a different format and will throw error upon parsing. It does
+    # not support parallel processes anyway.
+    except ValueError:
+        logger.warning(
+            "Using BSD sort instead of GNU sort, sorting on a single thread."
+        )
+        parallel_ok = False
+
+    key_map = {
+        "readID": "-k1,1d",
+        "chr1": "-k2,2V",
+        "pos1": "-k3,3n",
+        "chr2": "-k4,4V",
+        "pos2": "-k5,5n",
+        "strand1": "-k6,6d",
+        "strand2": "-k7,7d",
+        "frag1": "-k8,8n",
+        "frag2": "-k9,9n",
+    }
+
+    # transform column names to corresponding sort keys
+    try:
+        sort_keys = map(lambda k: key_map[k], keys)
+    except KeyError:
+        print("Unkown column name.")
+        raise
+    # Rewrite header with new sorting order
+    header = get_pairs_header(in_file)
+    with open(out_file, "w") as output:
+        for line in header:
+            if line.startswith("#sorted"):
+                output.write("#sorted: {0}\n".format("-".join(keys)))
+            else:
+                output.write(line + "\n")
+
+    # Sort pairs and append to file.
+    with open(out_file, "a") as output:
+        grep_proc = sp.Popen(["grep", "-v", "^#", in_file], stdout=sp.PIPE)
+        sort_cmd = ["sort", "-S %s" % buffer] + list(sort_keys)
+        if tmp_dir is not None:
+            sort_cmd.append("--temporary-directory={0}".format(tmp_dir))
+        if parallel_ok:
+            sort_cmd.append("--parallel={0}".format(threads))
+        sort_proc = sp.Popen(sort_cmd, stdin=grep_proc.stdout, stdout=output)
+        sort_proc.communicate()
+
+
+def get_pairs_header(pairs):
+    r"""Retrieves the header of a .pairs file and stores lines into a list.
+    Parameters
+    ----------
+    pairs : str or file object
+        Path to the pairs file.
+    Returns
+    -------
+    header : list of str
+        A list of header lines found, in the same order they appear in pairs.
+    Examples
+    --------
+        >>> import os
+        >>> from tempfile import NamedTemporaryFile
+        >>> p = NamedTemporaryFile('w', delete=False)
+        >>> p.writelines(["## pairs format v1.0\n", "#sorted: chr1-chr2\n", "abcd\n"])
+        >>> p.close()
+        >>> h = get_pairs_header(p.name)
+        >>> for line in h:
+        ...     print([line])
+        ['## pairs format v1.0']
+        ['#sorted: chr1-chr2']
+        >>> os.unlink(p.name)
+    """
+    # Open file if needed
+    with open(pairs, "r") as pairs:
+        # Store header lines into a list
+        header = []
+        line = pairs.readline()
+        while line.startswith("#"):
+            header.append(line.rstrip())
+            line = pairs.readline()
+
+    return header
+
+
+def reorder_fasta(genome, output=None, threshold=100000):
+    """Reorder and trim a fasta file
+    Sort a fasta file by record lengths, optionally trimming the smallest ones.
+    Parameters
+    ----------
+    genome : str, file or pathlib.Path
+        The genome scaffold file (or file handle)
+    output : str, file or pathlib.Path
+        The output file to write to
+    threshold : int, optional
+        The size below which scaffolds are discarded, by default 100000
+    """
+
+    if output is None:
+        output = "{}_renamed.fa".format(genome.split(".")[0])
+
+    handle = SeqIO.parse(genome, "fasta")
+    handle_to_write = sorted(
+        (len(u) for u in handle if len(u) > threshold), reverse=True
+    )
+    SeqIO.write(handle_to_write, output, "fasta")
+
+
+def rename_genome(genome, output=None, ambiguous=True):
+    """Rename genome and slugify headers
+    Rename genomes according to a simple naming scheme; this
+    is mainly done to avoid special character weirdness.
+    Parameters
+    ----------
+    genome : file, str or pathlib.Path
+        The input genome to be renamed and slugify.
+    output : file, str or pathlib.Path
+        The output genome to be written into. Default is <base>_renamed.fa,
+        where <base> is genome_in without its extension.
+    ambiguous : bool
+        Whether to retain ambiguous non-N bases, otherwise rename them to Ns.
+        Default is True.
+    """
+
+    if output is None:
+        output = "{}_renamed.fa".format(genome.split(".")[0])
+
+    with open(output, "w") as output_handle:
+        for record in SeqIO.parse(genome, "fasta"):
+
+            # Replace hyphens, tabs and whitespace with underscores
+            new_record_id = record.id.replace(" ", "_")
+            new_record_id = new_record_id.replace("-", "_")
+            new_record_id = new_record_id.replace("\t", "_")
+
+            # Remove anything that's weird, i.e. not alphanumeric
+            # or an underscore
+            new_record_id = re.sub("[^_A-Za-z0-9]+", "", new_record_id)
+            header = ">{}\n".format(new_record_id)
+
+            new_seq = re.sub("[^ATGCatgcNn]", "N", str(record.seq))
+
+            output_handle.write(header)
+            output_handle.write("{}\n".format(new_seq))
+
+
+def check_fasta_index(ref, mode="bowtie2"):
+    """
+    Checks for the existence of a bowtie2 or bwa index based on the reference
+    file name.
+    Parameters
+    ----------
+    ref : str
+        Path to the reference genome.
+    mode : str
+        The alignment software used to build the index. bowtie2 or bwa. If any
+        other value is given, the function returns the reference path.
+    Returns
+    -------
+    index : str
+        The bowtie2 or bwa index basename. None if no index was found
+    """
+    ref = pathlib.Path(ref)
+    if mode == "bowtie2":
+        # Bowtie2 should have 6 index files
+        bt2_idx_files = list(ref.parent.glob("{}*bt2*".format(ref.name)))
+        index = None if len(bt2_idx_files) < 6 else bt2_idx_files
+    elif mode == "bwa":
+        refdir = str(ref.parent)
+        refdir_files = os.listdir(refdir)
+        bwa_idx_files = [
+            join(refdir, f)
+            for f in refdir_files
+            if re.search(r".*\.(sa|pac|bwt|ann|amb)$", f)
+        ]
+        index = None if len(bwa_idx_files) < 5 else bwa_idx_files
+    else:
+        index = [ref]
+    if index is not None:
+        # Convert the PosixPath objects to strings and get the longest common prefix to obtain
+        # index basename (without the dot)
+        index = os.path.commonprefix(list(map(str, index))).strip(".")
+    return index
+
+
+def check_is_fasta(in_file):
+    """
+    Checks whether input file is in fasta format.
+
+    Parameters
+    ----------
+    in_file : str
+        Path to the input file.
+    Returns
+    -------
+    bool :
+        True if the input is in fasta format, False otherwise
+    """
+    try:
+        with read_compressed(in_file) as handle:
+            fasta = any(SeqIO.parse(handle, "fasta"))
+    except FileNotFoundError:
+        fasta = False
+
+    return fasta
diff --git a/bin/hicstuff_log.py b/bin/hicstuff_log.py
new file mode 100644
index 0000000000000000000000000000000000000000..171a5665ed1ea81371dcd39b28cc701f6cc7cb86
--- /dev/null
+++ b/bin/hicstuff_log.py
@@ -0,0 +1,89 @@
+#!/usr/bin/env python3
+# coding: utf-8
+
+"""Basic logging
+Basic logging setup with three main handlers (stdout, log file and optionally
+texting with the right API). By default the log file is disabled, but can be
+enabled or changed using set_file_handler.
+"""
+
+
+import logging
+import logging.handlers
+import requests
+import json
+
+TEXT_CREDENTIALS_DEFAULT_PATH = "text_credentials.json"
+
+CURRENT_LOG_LEVEL = logging.INFO
+
+
+logging.captureWarnings(True)
+
+logger = logging.getLogger()
+logger.setLevel(CURRENT_LOG_LEVEL)
+
+logfile_formatter = logging.Formatter(
+    "%(asctime)s :: %(levelname)s :: %(message)s", datefmt="%Y-%m-%d,%H:%M:%S"
+)
+stdout_formatter = logging.Formatter("%(levelname)s :: %(message)s")
+text_formatter = logging.Formatter("%(message)s")
+
+# file_handler = logging.FileHandler("hicstuff.log", "a")
+
+# file_handler.setLevel(logging.INFO)
+# file_handler.setFormatter(logfile_formatter)
+# logger.addHandler(file_handler)
+
+stream_handler = logging.StreamHandler()
+stream_handler.setLevel(logging.DEBUG)
+stream_handler.setFormatter(stdout_formatter)
+logger.addHandler(stream_handler)
+
+
+def set_file_handler(log_path, formatter=logfile_formatter):
+    """Change the file handler for custom log file location"""
+
+    filehandler = logging.FileHandler(log_path, "a")
+    filehandler.setLevel(logging.INFO)
+    filehandler.setFormatter(formatter)
+    for hdlr in logger.handlers[:]:  # remove the existing file handlers
+        if isinstance(hdlr, logging.FileHandler):
+            logger.removeHandler(hdlr)
+    logger.addHandler(filehandler)  # set the new handler
+
+
+def setup_text_logging(credentials=TEXT_CREDENTIALS_DEFAULT_PATH):
+    """Setup text logging
+    Setup text notifications on errors with a basic API. In order for it to
+    work, a file named 'text_credentials.json' must be in the directory where
+    a command is run. The logger performs a GET request to a text notification
+    API.
+    Parameters
+    ----------
+    credentials : str or pathlib.Path
+        A JSON file with necessary credentials for the GET request
+    """
+
+    with open(credentials) as cred_handle:
+        cred_dict = json.load(cred_handle)
+
+    api_service = cred_dict.pop("api_service")
+
+    class TextHandler(logging.Handler):
+        def emit(self, record):
+            log_entry = self.format(record)
+            cred_dict["msg"] = log_entry
+            return requests.get(api_service, cred_dict)
+
+    sms_handler = TextHandler()
+    sms_handler.setLevel(logging.ERROR)
+    sms_handler.setFormatter(text_formatter)
+
+    logger.addHandler(sms_handler)
+
+
+try:
+    setup_text_logging(TEXT_CREDENTIALS_DEFAULT_PATH)
+except FileNotFoundError:
+    pass
diff --git a/conf/base.config b/conf/base.config
index 2558cb1be3b55fa76fac6e98671f945093579d44..eff5eba6b3e73735ef10d9263640a1437ad32058 100644
--- a/conf/base.config
+++ b/conf/base.config
@@ -40,8 +40,8 @@ process {
         time   = { check_max( 8.h   * task.attempt, 'time'    ) }
     }
     withLabel:process_high {
-        cpus   = { check_max( 12    * task.attempt, 'cpus'    ) }
-        memory = { check_max( 64.GB * task.attempt, 'memory'  ) }
+        cpus   = { check_max( 8    * task.attempt, 'cpus'    ) } //TODO go back to 16 when not local
+        memory = { check_max( 31.GB * task.attempt, 'memory'  ) }//TODO go back to 64 when not local
         time   = { check_max( 16.h  * task.attempt, 'time'    ) }
     }
     withLabel:process_long {
@@ -61,3 +61,35 @@ process {
         cache = false
     }
 }
+// Function to ensure that resource requirements don't go beyond
+// a maximum limit
+def check_max(obj, type) {
+    if (type == 'memory') {
+        try {
+            if (obj.compareTo(params.max_memory as nextflow.util.MemoryUnit) == 1)
+                return params.max_memory as nextflow.util.MemoryUnit
+            else
+                return obj
+        } catch (all) {
+            println "   ### ERROR ###   Max memory '${params.max_memory}' is not valid! Using default value: $obj"
+            return obj
+        }
+    } else if (type == 'time') {
+        try {
+            if (obj.compareTo(params.max_time as nextflow.util.Duration) == 1)
+                return params.max_time as nextflow.util.Duration
+            else
+                return obj
+        } catch (all) {
+            println "   ### ERROR ###   Max time '${params.max_time}' is not valid! Using default value: $obj"
+            return obj
+        }
+    } else if (type == 'cpus') {
+        try {
+            return Math.min( obj, params.max_cpus as int )
+        } catch (all) {
+            println "   ### ERROR ###   Max cpus '${params.max_cpus}' is not valid! Using default value: $obj"
+            return obj
+        }
+    }
+}
diff --git a/conf/hicstuff.config b/conf/hicstuff.config
new file mode 100644
index 0000000000000000000000000000000000000000..3f5cdf60468e3f2ce4fca93cfdc5340f7be71a4d
--- /dev/null
+++ b/conf/hicstuff.config
@@ -0,0 +1,399 @@
+params {
+
+    // Input options
+    input = null
+
+
+    // References
+    genome = null
+    igenomes_base = 's3://ngi-igenomes/igenomes'
+    igenomes_ignore = false
+    chromosome_size = null
+    restriction_fragments = null
+    save_reference = false
+
+    // Mapping
+    split_fastq = false
+    fastq_chunks_size = 20000000
+    save_interaction_bam = false
+    save_aligned_intermediates = false
+    bwt2_opts_end2end = '--very-sensitive -L 30 --score-min L,-0.6,-0.2 --end-to-end --reorder'
+    bwt2_opts_trimmed = '--very-sensitive -L 20 --score-min L,-0.6,-0.2 --end-to-end --reorder'
+    keep_dups = false
+    keep_multi = false
+    min_mapq = 10
+
+    // Digestion Hi-C
+    digestion = null
+    ligation_site = null
+    restriction_site = null
+    digest {
+      'hindiii'{
+         restriction_site='A^AGCTT'
+         ligation_site='AAGCTAGCTT'
+      }
+      'mboi' {
+         restriction_site='^GATC'
+         ligation_site='GATCGATC'
+      }
+      'dpnii' {
+         restriction_site='^GATC'
+         ligation_site='GATCGATC'
+      }
+      'arima' {
+         restriction_site='^GATC,G^ANTC'
+         ligation_site='GATCGATC,GATCANTC,GANTGATC,GANTANTC'
+      }
+    }
+
+    min_restriction_fragment_size = 0
+    max_restriction_fragment_size = 0
+    min_insert_size = 0
+    max_insert_size = 0
+    save_pairs_intermediates = false
+
+    // Dnase Hi-C
+    dnase = false
+    min_cis_dist = 0
+
+    // Contact maps
+    save_raw_maps = false
+    bin_size = '1000000'
+    res_zoomify = null
+    hicpro_maps = false
+    ice_max_iter = 100
+    ice_filter_low_count_perc = 0.02
+    ice_filter_high_count_perc =  0
+    ice_eps = 0.1
+
+    // Downstream Analysis
+    res_dist_decay = '250000'
+    tads_caller = 'insulation'
+    res_tads = '40000'
+    res_compartments = '250000'
+
+    // Workflow
+    skip_maps = false
+    skip_balancing = false
+    skip_mcool = false
+    skip_dist_decay = false
+    skip_compartments = false
+    skip_tads = false
+    skip_multiqc = false
+
+    // MultiQC options
+    multiqc_config             = null
+    multiqc_title              = null
+    multiqc_logo               = null
+    max_multiqc_email_size     = '25.MB'
+    multiqc_methods_description = null
+
+    // Boilerplate options
+    outdir                     = '${params.outdir}'
+    tracedir                   = "${params.outdir}/pipeline_info"
+    publish_dir_mode           = 'copy'
+    email                      = null
+    email_on_fail              = null
+    plaintext_email            = false
+    monochrome_logs            = false
+    hook_url                   = null
+    help                       = false
+    version                    = false
+    validate_params            = true
+    show_hidden_params         = false
+    schema_ignore_params       = 'genomes,digest'
+
+    // Config options
+    custom_config_version      = 'master'
+    custom_config_base         = "https://raw.githubusercontent.com/nf-core/configs/${params.custom_config_version}"
+    config_profile_description = null
+    config_profile_contact     = null
+    config_profile_url         = null
+    config_profile_name        = null
+
+
+    // Max resource options
+    // Defaults only, expecting to be overwritten
+    max_memory                 = '128.GB'
+    max_cpus                   = 16
+    max_time                   = '240.h'
+
+    // Hicstuff
+    hicstuff_bwt2_align_opts = '--very-sensitive-local'
+    hicstuff_min_size = 0
+    hicstuff_circular = 'false'
+    hicstuff_output_contigs = 'info_contigs.txt'
+    hicstuff_output_frags = 'fragments_list.txt'
+    hicstuff_frags_plot = 'false'
+    hicstuff_frags_plot_path = 'frags_hist.pdf'
+    hicstuff_valid_pairs = 'valid.pairs'
+    hicstuff_valid_idx = 'valid_idx.pairs'
+    hicstuff_min_qual = 30
+    hicstuff_matrix = 'abs_fragments_contacts_weighted.txt'
+    hicstuff_bin = 10000
+    hicstuff_valid_idx_filtered = 'valid_idx_filtered.pairs'
+    hicstuff_plot_events = 'false'
+    hicstuff_pie_plot = 'distrib'
+    hicstuff_dist_plot = 'dist'
+    hicstuff_centro_file = 'None'       //give absolute path or 'None'
+    hicstuff_base = 1.1
+    hicstuff_distance_out_file = 'distance_law.txt'
+    hicstuff_rm_centro = 'None'         //int or 'None'
+    hicstuff_distance_plot = 'false'
+    hicstuff_distance_out_plot = 'plot_distance_law.pdf'
+    hicstuff_filter_pcr_out_file = 'valid_idx_pcrfree.pairs'
+
+    //Hicstuff optional modules
+    filter_event = false
+    distance_law = false
+    filter_pcr = false
+    filter_pcr_picard = true
+}
+
+process {
+    //Default
+    publishDir = [
+        path: { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" },
+        mode: 'copy',
+        saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
+    ]
+
+    withName: 'BOWTIE2_ALIGNMENT' {
+        ext.prefix = { "${meta.id}_${meta.chunk}_${meta.mates}" }
+        ext.args = params.hicstuff_bwt2_align_opts
+        publishDir = [
+            path: { "${params.outdir}/hicstuff/reads"},
+            mode: 'copy'
+        ]
+    }
+
+    withName: 'FRAGMENT_ENZYME' {
+        ext.args = { [
+            " -m ${params.hicstuff_min_size}",
+            " -c ${params.hicstuff_circular}",
+            " -o ${params.hicstuff_output_contigs}",
+            " -f ${params.hicstuff_output_frags}",
+            " -p ${params.hicstuff_frags_plot}",
+            " -g ${params.hicstuff_frags_plot_path}"
+        ].join('').trim() }
+        publishDir = [
+            path: { "${params.outdir}/hicstuff/fragment_enzyme" },
+            mode: 'copy'
+        ]
+    }
+
+    withName: 'BAM2PAIRS' {
+        ext.pairs = params.hicstuff_valid_pairs
+        ext.index = params.hicstuff_valid_idx
+        ext.args = { [
+            " -q ${params.hicstuff_min_qual}",
+            " -c ${params.hicstuff_circular}"
+        ].join('').trim() }
+        publishDir = [
+            path: { "${params.outdir}/hicstuff/pairs" },
+            mode: 'copy'
+        ]
+    }
+
+    withName: 'FILTER_EVENT' {
+        ext.args = { [
+           " -o ${params.hicstuff_valid_idx_filtered}",
+           " --plot ${params.hicstuff_plot_events}",
+           " -d ${params.hicstuff_dist_plot}",
+           " -q ${params.hicstuff_pie_plot}"
+        ].join('').trim() }
+        publishDir = [
+            path: { "${params.outdir}/hicstuff/pairs" },
+            mode: 'copy'
+        ]
+    }
+
+    withName: 'DISTANCE_LAW' {
+        ext.args = { [
+            " -c ${params.hicstuff_centro_file}",
+            " -b ${params.hicstuff_base}",
+            " -o ${params.hicstuff_distance_out_file}",
+            " -u ${params.hicstuff_circular}",
+            " -r ${params.hicstuff_rm_centro}",
+            " -l ${params.hicstuff_distance_plot}",
+            " -d ${params.hicstuff_distance_out_plot}"
+        ].join('').trim() }
+        publishDir = [
+            path: { "${params.outdir}/hicstuff/pairs" },
+            mode: 'copy'
+        ]
+    }
+
+    withName: 'FILTER_PCR' {
+        ext.args = { [
+            "-o ${params.hicstuff_filter_pcr_out_file}"
+        ].join('').trim() }
+        publishDir = [
+            path: { "${params.outdir}/hicstuff/pairs" },
+            mode: 'copy'
+        ]
+    }
+
+    withName: 'GATK4_MARKDUPLICATES' {
+        ext.prefix = { "${meta.id}_${meta.chunk}_${meta.mates}.bam" }
+        ext.args = { [
+            "--REMOVE_DUPLICATES true"
+        ].join('').trim() }
+        publishDir = [
+            path: { "${params.outdir}/gatk4/bam" },
+            mode: 'copy'
+        ]
+    }
+
+    withName: 'PICARD_MARKDUPLICATES' {
+        ext.prefix = { "${meta.id}_${meta.chunk}_${meta.mates}" }
+        ext.args = { [
+            "--REMOVE_DUPLICATES true"
+        ].join('').trim() }
+        publishDir = [
+            path: { "${params.outdir}/picard/bam" },
+            mode: 'copy'
+        ]
+    }
+
+    withName: 'SAMTOOLS_SORT' {
+        ext.prefix = { "${meta.id}_${meta.chunk}_${meta.mates}_sorted" }
+        ext.args = { [
+            ""
+        ].join('').trim() }
+    }
+
+    withName: 'SAMTOOLS_SORT_N' {
+        ext.prefix = { "${meta.id}_${meta.chunk}_${meta.mates}_nsorted" }
+        ext.args = { [
+            "-n"
+        ].join('').trim() }
+        publishDir = [
+            path: { "${params.outdir}/gatk4/bam" },
+            mode: 'copy'
+        ]
+    }
+
+    withName: 'SAMTOOLS_INDEX' {
+        ext.args = { [
+            ""
+        ].join('').trim() }
+    }
+
+    withName: 'BUILD_MATRIX' {
+        ext.args = params.hicstuff_matrix
+        publishDir = [
+            path: { "${params.outdir}/hicstuff/matrix" },
+            mode: 'copy'
+        ]
+    }
+    withName: 'BUILD_MATRIX_COOL' {
+        ext.args = params.hicstuff_matrix
+        publishDir = [
+            path: { "${params.outdir}/hicstuff/matrix" },
+            mode: 'copy'
+        ]
+    }
+        withName: 'BUILD_MATRIX_COOL_ALT' {
+        ext.outname = params.hicstuff_matrix
+        ext.bin = params.hicstuff_bin
+        ext.args = {"-c1 2 -p1 3 -p2 4 -c2 5"}
+        publishDir = [
+            path: { "${params.outdir}/hicstuff/matrix" },
+            mode: 'copy'
+        ]
+    }
+
+    //**********************************************
+    // PREPARE_GENOME
+    withName: 'BOWTIE2_BUILD' {
+        publishDir = [
+            path: { "${params.outdir}/genome/bowtie2_index" },
+            mode: 'copy'
+        ]
+    }
+
+    withName: 'CUSTOM_GETCHROMSIZES' {
+        publishDir = [
+            path: { "${params.outdir}/genome" },
+            mode: 'copy'
+        ]
+    }
+
+    withName: 'GET_RESTRICTION_FRAGMENTS' {
+        publishDir = [
+            path: { "${params.outdir}/genome" },
+            mode: 'copy'
+        ]
+    }
+
+}
+
+profiles {
+    debug { process.beforeScript = 'echo $HOSTNAME' }
+    conda {
+        conda.enabled          = true
+        docker.enabled         = false
+        singularity.enabled    = false
+        podman.enabled         = false
+        shifter.enabled        = false
+        charliecloud.enabled   = false
+    }
+    mamba {
+        conda.enabled          = true
+        conda.useMamba         = true
+        docker.enabled         = false
+        singularity.enabled    = false
+        podman.enabled         = false
+        shifter.enabled        = false
+        charliecloud.enabled   = false
+    }
+    docker {
+        docker.enabled         = true
+        docker.userEmulation   = true
+        singularity.enabled    = false
+        podman.enabled         = false
+        shifter.enabled        = false
+        charliecloud.enabled   = false
+    }
+    arm {
+        docker.runOptions = '-u $(id -u):$(id -g) --platform=linux/amd64'
+    }
+    singularity {
+        singularity.enabled    = true
+        singularity.autoMounts = true
+        docker.enabled         = false
+        podman.enabled         = false
+        shifter.enabled        = false
+        charliecloud.enabled   = false
+    }
+    podman {
+        podman.enabled         = true
+        docker.enabled         = false
+        singularity.enabled    = false
+        shifter.enabled        = false
+        charliecloud.enabled   = false
+    }
+    shifter {
+        shifter.enabled        = true
+        docker.enabled         = false
+        singularity.enabled    = false
+        podman.enabled         = false
+        charliecloud.enabled   = false
+    }
+    charliecloud {
+        charliecloud.enabled   = true
+        docker.enabled         = false
+        singularity.enabled    = false
+        podman.enabled         = false
+        shifter.enabled        = false
+    }
+    gitpod {
+        executor.name          = 'local'
+        executor.cpus          = 16
+        executor.memory        = 60.GB
+    }
+    test      { includeConfig 'conf/test.config'      }
+    test_full { includeConfig 'conf/test_full.config' }
+}
+includeConfig 'base.config'
diff --git a/conf/modules.config b/conf/modules.config
index 096a86006168e216e1f68863f65e4dd2d5d96c7d..c289ffbf9f52ebf1230baa4f7fb87b25e81eb9bd 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -286,4 +286,45 @@ process {
         ext.args = '--correctForMultipleTesting fdr'
         ext.prefix = { "${cool.baseName}" }
     }
+
+//  withName: 'HIC_PLOT_MATRIX' {
+//      publishDir = [
+//          path: { "${params.outdir}/contact_maps/cool/" },
+//          mode: 'copy'
+//      ]
+//      ext.args = '--log --perChromosome'
+//      ext.prefix = { "${cool.baseName}" }
+//  }
+
+    //********************************
+    // FILTER_PCR_DUP
+
+    withName: 'PICARD_MARKDUPLICATES' {
+        ext.prefix = { "${meta.id}_${meta.chunk}_${meta.mates}" }
+        ext.args = { [
+            "--REMOVE_DUPLICATES true"
+        ].join('').trim() }
+        publishDir = [
+            path: { "${params.outdir}/picard/bam" },
+            mode: 'copy'
+        ]
+    }
+
+    withName: 'SAMTOOLS_SORT' {
+        ext.prefix = { "${meta.id}_${meta.chunk}_${meta.mates}_sorted" }
+        ext.args = { [
+            ""
+        ].join('').trim() }
+    }
+
+    withName: 'SAMTOOLS_SORT_N' {
+        ext.prefix = { "${meta.id}_${meta.chunk}_${meta.mates}_nsorted" }
+        ext.args = { [
+            "-n"
+        ].join('').trim() }
+        publishDir = [
+            path: { "${params.outdir}/picard/bam" },
+            mode: 'copy'
+        ]
+    }
 }
diff --git a/modules/local/filterbam/main.nf b/modules/local/filterbam/main.nf
new file mode 100644
index 0000000000000000000000000000000000000000..0f63be60330d928711dff04923ea7e3d562e5b76
--- /dev/null
+++ b/modules/local/filterbam/main.nf
@@ -0,0 +1,25 @@
+process FILTER_PAIR {
+    tag "$meta1.id"
+    label 'process_high' //medium
+
+    conda "bioconda::samtools=1.17"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/samtools:1.17--h00cdaf9_0' :
+        'quay.io/biocontainers/samtools:1.17--h00cdaf9_0' }"
+
+    input:
+    tuple val(meta1), path(bam1), val(meta2), path(bam2)
+
+    output:
+    tuple val(meta1), path("*_1paired.bam"), val(meta2), path("*_2paired.bam"), emit: bam
+
+    script:
+    """
+    samtools view ${bam1} | cut -f1 | sort | uniq > names1.txt
+    samtools view ${bam2} | cut -f1 | sort | uniq > names2.txt
+    cat names1.txt names2.txt| sort | uniq -c | grep "     2" | sed "s/^      2 //g" > common_reads.txt
+    samtools view -h -N common_reads.txt ${bam1} -O BAM > ${meta1.id}_1paired.bam
+    samtools view -h -N common_reads.txt ${bam2} -O BAM > ${meta2.id}_2paired.bam
+
+    """
+}
\ No newline at end of file
diff --git a/modules/local/hicexplorer/hicPlotMatrix.nf b/modules/local/hicexplorer/hicPlotMatrix.nf
new file mode 100644
index 0000000000000000000000000000000000000000..8041aa1e3afe88acb2cc3ac26c9ce25b10d45d0a
--- /dev/null
+++ b/modules/local/hicexplorer/hicPlotMatrix.nf
@@ -0,0 +1,24 @@
+process HIC_PLOT_MATRIX {
+    tag "$meta.id"
+    label 'process_single'
+
+    conda "bioconda::hicexplorer=3.7.2"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/hicexplorer:3.7.2--pyhdfd78af_1' :
+        'quay.io/biocontainers/hicexplorer:3.7.2--pyhdfd78af_1' }"
+
+
+    input:
+    tuple val(meta), path(cool)
+
+    output:
+    tuple val(meta), path("*.pdf"), emit: pdf
+
+    script:
+    def args = task.ext.args ?: ''
+    def prefix = task.ext.prefix ?: "${meta.id}"
+
+    """
+    hicPlotMatrix -m ${cool} -o ${prefix}_cool_log.pdf ${args}
+    """
+}
diff --git a/modules/local/hicstuff/align_bowtie2.nf b/modules/local/hicstuff/align_bowtie2.nf
new file mode 100644
index 0000000000000000000000000000000000000000..188836d9ff6cf3affda23287355eb390896c22ec
--- /dev/null
+++ b/modules/local/hicstuff/align_bowtie2.nf
@@ -0,0 +1,31 @@
+process BOWTIE2_ALIGNMENT {
+    tag "$meta.id"
+    label "process_high"
+
+    conda "bioconda::bowtie2=2.4.4 bioconda::samtools=1.16.1 conda-forge::pigz=2.6"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/mulled-v2-ac74a7f02cebcfcc07d8e8d1d750af9c83b4d45a:a0ffedb52808e102887f6ce600d092675bf3528a-0' :
+        'quay.io/biocontainers/mulled-v2-ac74a7f02cebcfcc07d8e8d1d750af9c83b4d45a:a0ffedb52808e102887f6ce600d092675bf3528a-0' }"
+
+    input:
+    tuple val(meta), path(reads)
+    tuple val(meta2), path(index)
+
+    output:
+    tuple val(meta), path ("*.bam"), emit: bam
+
+    script:
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    def args = task.ext.args ?: ''
+
+    """
+    INDEX=`find -L ./ -name "*.rev.1.bt2" | sed "s/\\.rev.1.bt2\$//"`
+    [ -z "\$INDEX" ] && INDEX=`find -L ./ -name "*.rev.1.bt2l" | sed "s/\\.rev.1.bt2l\$//"`
+    [ -z "\$INDEX" ] && echo "Bowtie2 index files not found" 1>&2 && exit 1
+
+    bowtie2 $args -p $task.cpus -x \$INDEX -U $reads > ${prefix}.tmp
+
+    samtools view -F 2048 -h -@ $task.cpus ${prefix}.tmp | samtools sort -n -@ $task.cpus -o ${prefix}.bam -
+    """
+
+}
diff --git a/modules/local/hicstuff/bam2pairs.nf b/modules/local/hicstuff/bam2pairs.nf
new file mode 100644
index 0000000000000000000000000000000000000000..4b2b26ecccceba423ef20270116a433650803ac5
--- /dev/null
+++ b/modules/local/hicstuff/bam2pairs.nf
@@ -0,0 +1,28 @@
+process BAM2PAIRS {
+    tag "$meta1.id"
+    label 'process_high'
+
+    conda "conda-forge::python=3.9 conda-forge::biopython=1.80 conda-forge::numpy=1.22.3 conda-forge::matplotlib=3.6.3 conda-forge::pandas=1.5.3"
+    container = "lbmc/hicstuff:3.1.3"
+
+    input:
+    tuple val(meta1), path(bam1), val(meta2), path(bam2)
+    tuple val(meta), path(info_contigs)
+    val(digestion)
+    tuple val(meta), path(fasta)
+
+    output:
+    tuple val(meta1), path("*_valid.pairs"), emit: valid_pairs
+    tuple val(meta1), path("*_valid_idx.pairs"), emit: idx_pairs
+
+    script:
+
+    def args = task.ext.args ?: ''
+    def pairs = task.ext.pairs ?: ''
+    def index = task.ext.index ?: ''
+
+    """
+    hicstuff_bam2pairs.py -b1 ${bam1} -b2 ${bam2} -i ${info_contigs} -e ${digestion} -f ${fasta} -o ${meta1.id}_${pairs} -x ${meta1.id}_${index} ${args}
+    """
+
+}
diff --git a/modules/local/hicstuff/build_matrix.nf b/modules/local/hicstuff/build_matrix.nf
new file mode 100644
index 0000000000000000000000000000000000000000..d8ff6a6a21726028d4d1ff35fb282382f0e7864e
--- /dev/null
+++ b/modules/local/hicstuff/build_matrix.nf
@@ -0,0 +1,24 @@
+process BUILD_MATRIX {
+    tag "$meta1.id"
+    label 'process_single'
+
+    conda "conda-forge::python=3.9 conda-forge::biopython=1.80 conda-forge::numpy=1.22.3 conda-forge::matplotlib=3.6.3 conda-forge::pandas=1.5.3"
+    container = "lbmc/hicstuff:3.1.3"
+
+    input:
+    tuple val(meta1), path(idx_pairs)
+    tuple val(meta), path(fragments_list)
+
+    output:
+    tuple val(meta), path("${meta1.id}_*"), emit: matrix
+
+    script:
+
+    def args = task.ext.args ?: ''
+
+    """
+    hicstuff_build_matrix.py -p ${idx_pairs} -f ${fragments_list} -t graal -o ${args}
+
+    mv ${args} ${meta1.id}_${args}
+    """
+}
diff --git a/modules/local/hicstuff/build_matrix_cool.nf b/modules/local/hicstuff/build_matrix_cool.nf
new file mode 100644
index 0000000000000000000000000000000000000000..36110614ecb9f0e1014cdf19904fe56d5f83ad0e
--- /dev/null
+++ b/modules/local/hicstuff/build_matrix_cool.nf
@@ -0,0 +1,25 @@
+process BUILD_MATRIX_COOL {
+    tag "$meta1.id"
+    label 'process_single'
+
+    conda "conda-forge::python=3.9 conda-forge::biopython=1.80 conda-forge::numpy=1.22.3 conda-forge::matplotlib=3.6.3 conda-forge::pandas=1.5.3"
+    container = "lbmc/hicstuff:3.1.3"
+
+    input:
+    tuple val(meta1), path(idx_pairs)
+    tuple val(meta), path(fragments_list)
+
+    output:
+    tuple val(meta), path("${meta1.id}_*.cool"), emit: matrix
+
+    script:
+
+    def args = task.ext.args ?: ''
+    def base = args.replaceFirst(/.txt/,"")
+
+    """
+    hicstuff_build_matrix.py -p ${idx_pairs} -f ${fragments_list} -t cool -o ${args}
+
+    mv ${base}.cool ${meta1.id}_${base}.cool
+    """
+}
diff --git a/modules/local/hicstuff/build_matrix_cool_alt.nf b/modules/local/hicstuff/build_matrix_cool_alt.nf
new file mode 100644
index 0000000000000000000000000000000000000000..a52465dcbc9f76e6d4249f74d9da44ea2d810de1
--- /dev/null
+++ b/modules/local/hicstuff/build_matrix_cool_alt.nf
@@ -0,0 +1,28 @@
+process BUILD_MATRIX_COOL_ALT {
+    tag "$meta1.id"
+    label 'process_single'
+
+    conda "conda-forge::python=3.9 conda-forge::biopython=1.80 conda-forge::numpy=1.22.3 conda-forge::matplotlib=3.6.3 conda-forge::pandas=1.5.3"
+    container = "lbmc/hicstuff:3.1.3"
+
+    input:
+    tuple val(meta), path(chromosome_size)
+    tuple val(meta1), path(idx_pairs)
+
+
+    output:
+    tuple val(meta), path("${meta1.id}_*.cool"), emit: matrix
+
+    script:
+
+    def args = task.ext.args ?: ''
+    def bin = task.ext.bin.toInteger() ?: ''
+    def outname = task.ext.outname ?: ''
+    def base = outname.replaceFirst(/.txt/,"")
+
+    """
+    cooler cload pairs ${args} ${chromosome_size}:${bin} ${idx_pairs} ${base}.cool
+
+    mv ${base}.cool ${meta1.id}_${base}_${bin}_alt.cool
+    """
+}
diff --git a/modules/local/hicstuff/distance_law.nf b/modules/local/hicstuff/distance_law.nf
new file mode 100644
index 0000000000000000000000000000000000000000..c87112ce6d3f016e319be21660186b451a184110
--- /dev/null
+++ b/modules/local/hicstuff/distance_law.nf
@@ -0,0 +1,23 @@
+process DISTANCE_LAW {
+    tag "$meta1.id"
+    label 'process_high'
+
+    conda "conda-forge::python=3.9 conda-forge::biopython=1.80 conda-forge::numpy=1.22.3 conda-forge::matplotlib=3.6.3 conda-forge::pandas=1.5.3"
+    container = "lbmc/hicstuff:3.1.3"
+
+    input:
+    tuple val(meta1), path(idx_pairs)
+    tuple val(meta), path(fragments_list)
+
+    output:
+    path("*distance_law.txt")
+    path("*.pdf"), optional: true
+
+    script:
+
+    def args = task.ext.args ?: ''
+
+    """
+    hicstuff_distance_law.py -i ${idx_pairs} -f ${fragments_list} -p ${meta1.id} ${args}
+    """
+}
diff --git a/modules/local/hicstuff/filter_event.nf b/modules/local/hicstuff/filter_event.nf
new file mode 100644
index 0000000000000000000000000000000000000000..2fa70512ed1a815d50d61a4b8e3ba35d6eb748b6
--- /dev/null
+++ b/modules/local/hicstuff/filter_event.nf
@@ -0,0 +1,22 @@
+process FILTER_EVENT {
+    tag "$meta1.id"
+    label 'process_high'
+
+    conda "conda-forge::python=3.9 conda-forge::biopython=1.80 conda-forge::numpy=1.22.3 conda-forge::matplotlib=3.6.3 conda-forge::pandas=1.5.3"
+    container = "lbmc/hicstuff:3.1.3"
+
+    input:
+    tuple val(meta1), path(idx_pairs)
+
+    output:
+    tuple val(meta1), path("*_valid_idx_filtered.pairs"), emit: idx_pairs_filtered
+    path("*.png"), optional: true
+
+    script:
+
+    def args = task.ext.args ?: ''
+
+    """
+    hicstuff_filter.py -i ${idx_pairs} -p ${meta1.id} ${args}
+    """
+}
diff --git a/modules/local/hicstuff/filter_pcr.nf b/modules/local/hicstuff/filter_pcr.nf
new file mode 100644
index 0000000000000000000000000000000000000000..57ab0f4312846356f2a5f7ce971ffa8c7511d3c9
--- /dev/null
+++ b/modules/local/hicstuff/filter_pcr.nf
@@ -0,0 +1,21 @@
+process FILTER_PCR {
+    tag "$meta1.id"
+    label 'process_high'
+
+    conda "conda-forge::python=3.9 conda-forge::biopython=1.80 conda-forge::numpy=1.22.3 conda-forge::matplotlib=3.6.3 conda-forge::pandas=1.5.3"
+    container = "lbmc/hicstuff:3.1.3"
+
+    input:
+    tuple val(meta1), path(idx_pairs)
+
+    output:
+    tuple val(meta1), path("*_valid_idx_pcrfree.pairs"), emit: idx_pairs_pcrfree
+
+    script:
+
+    def args = task.ext.args ?: ''
+
+    """
+    hicstuff_filter_pcr.py -i ${idx_pairs} -p ${meta1.id} ${args}
+    """
+}
diff --git a/modules/local/hicstuff/fragment_enzyme.nf b/modules/local/hicstuff/fragment_enzyme.nf
new file mode 100644
index 0000000000000000000000000000000000000000..b770ee4b8af0b3914b7e8541ff5002bf260fcd84
--- /dev/null
+++ b/modules/local/hicstuff/fragment_enzyme.nf
@@ -0,0 +1,25 @@
+process FRAGMENT_ENZYME {
+    tag "$digestion"
+    label "process_single"
+
+    conda "conda-forge::python=3.9 conda-forge::biopython=1.80 conda-forge::numpy=1.22.3 conda-forge::matplotlib=3.6.3 conda-forge::pandas=1.5.3"
+    container = "lbmc/hicstuff:3.1.3"
+
+    input:
+    val(digestion)
+    tuple val(meta), path(fasta)
+
+    output:
+    tuple val(meta), path(params.hicstuff_output_contigs) , emit: info_contigs
+    tuple val(meta), path(params.hicstuff_output_frags), emit: fragments_list
+    path("*.pdf"), optional: true
+
+    script:
+
+    def args = task.ext.args ?: ''
+
+    """
+    hicstuff_fragments.py -e ${digestion} -i ${fasta} ${args}
+    """
+
+}
diff --git a/modules/nf-core/custom/gatk4/markduplicates/main.nf b/modules/nf-core/custom/gatk4/markduplicates/main.nf
new file mode 100644
index 0000000000000000000000000000000000000000..223fa7c1a568cbcb5dfb6063acb928a9d6971ce2
--- /dev/null
+++ b/modules/nf-core/custom/gatk4/markduplicates/main.nf
@@ -0,0 +1,54 @@
+process GATK4_MARKDUPLICATES {
+    tag "$meta.id"
+    label 'process_medium'
+
+    conda "bioconda::gatk4=4.4.0.0"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/gatk4:4.4.0.0--py36hdfd78af_0':
+        'quay.io/biocontainers/gatk4:4.4.0.0--py36hdfd78af_0' }"
+
+    input:
+    tuple val(meta), path(bam)
+    path  fasta
+    path  fasta_fai
+
+    output:
+    tuple val(meta), path("*cram"),     emit: cram,  optional: true
+    tuple val(meta), path("*bam"),      emit: bam,   optional: true
+    tuple val(meta), path("*.crai"),    emit: crai,  optional: true
+    tuple val(meta), path("*.bai"),     emit: bai,   optional: true
+    tuple val(meta), path("*.metrics"), emit: metrics
+    path "versions.yml",                emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args ?: ''
+    prefix = task.ext.prefix ?: "${meta.id}"
+    def input_list = bam.collect{"--INPUT $it"}.join(' ')
+    def reference = fasta ? "--REFERENCE_SEQUENCE ${fasta}" : ""
+
+    def avail_mem = 3072
+    if (!task.memory) {
+        log.info '[GATK MarkDuplicates] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.'
+    } else {
+        avail_mem = (task.memory.mega*0.8).intValue()
+    }
+    """
+    gatk --java-options "-Xmx${avail_mem}M" MarkDuplicates \\
+        $input_list \\
+        --OUTPUT ${prefix} \\
+        --METRICS_FILE ${prefix}.metrics \\
+        --TMP_DIR . \\
+        ${reference} \\
+        $args
+    if  [[ ${prefix} == *.cram ]]&&[[ -f ${prefix}.bai ]]; then
+        mv ${prefix}.bai ${prefix}.crai
+    fi
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        gatk4: \$(echo \$(gatk --version 2>&1) | sed 's/^.*(GATK) v//; s/ .*\$//')
+    END_VERSIONS
+    """
+}
\ No newline at end of file
diff --git a/modules/nf-core/custom/gatk4/markduplicates/meta.yml b/modules/nf-core/custom/gatk4/markduplicates/meta.yml
new file mode 100644
index 0000000000000000000000000000000000000000..ae7443dab6e3682b7cfeb7a03b1f3f0252c8f59d
--- /dev/null
+++ b/modules/nf-core/custom/gatk4/markduplicates/meta.yml
@@ -0,0 +1,72 @@
+name: gatk4_markduplicates
+description: This tool locates and tags duplicate reads in a BAM or SAM file, where duplicate reads are defined as originating from a single fragment of DNA.
+keywords:
+  - markduplicates
+  - bam
+  - sort
+tools:
+  - gatk4:
+      description:
+        Developed in the Data Sciences Platform at the Broad Institute, the toolkit offers a wide variety of tools
+        with a primary focus on variant discovery and genotyping. Its powerful processing engine
+        and high-performance computing features make it capable of taking on projects of any size.
+      homepage: https://gatk.broadinstitute.org/hc/en-us
+      documentation: https://gatk.broadinstitute.org/hc/en-us/articles/360037052812-MarkDuplicates-Picard-
+      tool_dev_url: https://github.com/broadinstitute/gatk
+      doi: 10.1158/1538-7445.AM2017-3590
+      licence: ["MIT"]
+
+input:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - bam:
+      type: file
+      description: Sorted BAM file
+      pattern: "*.{bam}"
+  - fasta:
+      type: file
+      description: Fasta file
+      pattern: "*.{fasta}"
+  - fasta_fai:
+      type: file
+      description: Fasta index file
+      pattern: "*.{fai}"
+
+output:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - versions:
+      type: file
+      description: File containing software versions
+      pattern: "versions.yml"
+  - bam:
+      type: file
+      description: Marked duplicates BAM file
+      pattern: "*.{bam}"
+  - cram:
+      type: file
+      description: Marked duplicates CRAM file
+      pattern: "*.{cram}"
+  - bai:
+      type: file
+      description: BAM index file
+      pattern: "*.{bam.bai}"
+  - crai:
+      type: file
+      description: CRAM index file
+      pattern: "*.{cram.crai}"
+  - metrics:
+      type: file
+      description: Duplicate metrics file generated by GATK
+      pattern: "*.{metrics.txt}"
+
+authors:
+  - "@ajodeh-juma"
+  - "@FriederikeHanssen"
+  - "@maxulysse"
\ No newline at end of file
diff --git a/modules/nf-core/custom/picard/markduplicates/main.nf b/modules/nf-core/custom/picard/markduplicates/main.nf
new file mode 100644
index 0000000000000000000000000000000000000000..eef8c81601a2414f3b1b8a3d5cc7b5303f3e0b76
--- /dev/null
+++ b/modules/nf-core/custom/picard/markduplicates/main.nf
@@ -0,0 +1,61 @@
+process PICARD_MARKDUPLICATES {
+    tag "$meta.id"
+    label 'process_medium'
+
+    conda "bioconda::picard=3.0.0"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/picard:3.0.0--hdfd78af_1' :
+        'quay.io/biocontainers/picard:3.0.0--hdfd78af_1' }"
+
+    input:
+    tuple val(meta), path(bam)
+    tuple val(meta2), path(fasta)
+    tuple val(meta3), path(fai)
+
+    output:
+    tuple val(meta), path("*.bam")        , emit: bam
+    tuple val(meta), path("*.bai")        , optional:true, emit: bai
+    tuple val(meta), path("*.metrics.txt"), emit: metrics
+    path  "versions.yml"                  , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args ?: ''
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    def avail_mem = 3072
+    if (!task.memory) {
+        log.info '[Picard MarkDuplicates] Available memory not known - defaulting to 3GB. Specify process memory requirements to change this.'
+    } else {
+        avail_mem = (task.memory.mega*0.8).intValue()
+    }
+    """
+    mv ${fasta} ${fasta.simpleName}.fasta
+
+    picard \\
+        -Xmx${avail_mem}M \\
+        MarkDuplicates \\
+        $args \\
+        --INPUT $bam \\
+        --OUTPUT ${prefix}.bam \\
+        --REFERENCE_SEQUENCE ${fasta.simpleName}.fasta \\
+        --METRICS_FILE ${prefix}.MarkDuplicates.metrics.txt
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        picard: \$(echo \$(picard MarkDuplicates --version 2>&1) | grep -o 'Version:.*' | cut -f2- -d:)
+    END_VERSIONS
+    """
+
+    stub:
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    """
+    touch ${prefix}.bam
+    touch ${prefix}.bam.bai
+    touch ${prefix}.MarkDuplicates.metrics.txt
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        picard: \$(echo \$(picard MarkDuplicates --version 2>&1) | grep -o 'Version:.*' | cut -f2- -d:)
+    END_VERSIONS
+    """
+}
\ No newline at end of file
diff --git a/modules/nf-core/custom/picard/markduplicates/meta.yml b/modules/nf-core/custom/picard/markduplicates/meta.yml
new file mode 100644
index 0000000000000000000000000000000000000000..e463328d9c6e50abcd466a65b9b022b881f7f1e2
--- /dev/null
+++ b/modules/nf-core/custom/picard/markduplicates/meta.yml
@@ -0,0 +1,71 @@
+name: picard_markduplicates
+description: Locate and tag duplicate reads in a BAM file
+keywords:
+  - markduplicates
+  - pcr
+  - duplicates
+  - bam
+  - sam
+  - cram
+tools:
+  - picard:
+      description: |
+        A set of command line tools (in Java) for manipulating high-throughput sequencing (HTS)
+        data and formats such as SAM/BAM/CRAM and VCF.
+      homepage: https://broadinstitute.github.io/picard/
+      documentation: https://broadinstitute.github.io/picard/
+      licence: ["MIT"]
+input:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - bam:
+      type: file
+      description: BAM file
+      pattern: "*.{bam,cram,sam}"
+  - meta2:
+      type: map
+      description: |
+        Groovy Map containing reference information
+        e.g. [ id:'genome' ]
+  - fasta:
+      type: file
+      description: Reference genome fasta file
+      pattern: "*.{fasta,fa}"
+  - meta3:
+      type: map
+      description: |
+        Groovy Map containing reference information
+        e.g. [ id:'genome' ]
+  - fai:
+      type: file
+      description: Reference genome fasta index
+      pattern: "*.{fai}"
+output:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - bam:
+      type: file
+      description: BAM file with duplicate reads marked/removed
+      pattern: "*.{bam}"
+  - bai:
+      type: file
+      description: An optional BAM index file. If desired, --CREATE_INDEX must be passed as a flag
+      pattern: "*.{bai}"
+  - metrics:
+      type: file
+      description: Duplicate metrics file generated by picard
+      pattern: "*.{metrics.txt}"
+  - versions:
+      type: file
+      description: File containing software versions
+      pattern: "versions.yml"
+authors:
+  - "@drpatelh"
+  - "@projectoriented"
+  - "@ramprasadn"
\ No newline at end of file
diff --git a/modules/nf-core/custom/samtools/index/main.nf b/modules/nf-core/custom/samtools/index/main.nf
new file mode 100644
index 0000000000000000000000000000000000000000..05fa9758e6b2f9ce5ab3e7fa2404e9b234947169
--- /dev/null
+++ b/modules/nf-core/custom/samtools/index/main.nf
@@ -0,0 +1,46 @@
+process SAMTOOLS_INDEX {
+    tag "$meta.id"
+    label 'process_low'
+
+    conda "bioconda::samtools=1.17"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/samtools:1.17--h00cdaf9_0' :
+        'quay.io/biocontainers/samtools:1.17--h00cdaf9_0' }"
+
+    input:
+    tuple val(meta), path(input)
+
+    output:
+    tuple val(meta), path("*.bai") , optional:true, emit: bai
+    tuple val(meta), path("*.csi") , optional:true, emit: csi
+    tuple val(meta), path("*.crai"), optional:true, emit: crai
+    path  "versions.yml"           , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args ?: ''
+    """
+    samtools \\
+        index \\
+        -@ ${task.cpus-1} \\
+        $args \\
+        $input
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
+    END_VERSIONS
+    """
+
+    stub:
+    """
+    touch ${input}.bai
+    touch ${input}.crai
+    touch ${input}.csi
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
+    END_VERSIONS
+    """
+}
\ No newline at end of file
diff --git a/modules/nf-core/custom/samtools/index/meta.yml b/modules/nf-core/custom/samtools/index/meta.yml
new file mode 100644
index 0000000000000000000000000000000000000000..6037b9e658387998214f987017d0bc742156b553
--- /dev/null
+++ b/modules/nf-core/custom/samtools/index/meta.yml
@@ -0,0 +1,53 @@
+name: samtools_index
+description: Index SAM/BAM/CRAM file
+keywords:
+  - index
+  - bam
+  - sam
+  - cram
+tools:
+  - samtools:
+      description: |
+        SAMtools is a set of utilities for interacting with and post-processing
+        short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li.
+        These files are generated as output by short read aligners like BWA.
+      homepage: http://www.htslib.org/
+      documentation: http://www.htslib.org/doc/samtools.html
+      doi: 10.1093/bioinformatics/btp352
+      licence: ["MIT"]
+input:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - bam:
+      type: file
+      description: BAM/CRAM/SAM file
+      pattern: "*.{bam,cram,sam}"
+output:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - bai:
+      type: file
+      description: BAM/CRAM/SAM index file
+      pattern: "*.{bai,crai,sai}"
+  - crai:
+      type: file
+      description: BAM/CRAM/SAM index file
+      pattern: "*.{bai,crai,sai}"
+  - csi:
+      type: file
+      description: CSI index file
+      pattern: "*.{csi}"
+  - versions:
+      type: file
+      description: File containing software versions
+      pattern: "versions.yml"
+authors:
+  - "@drpatelh"
+  - "@ewels"
+  - "@maxulysse"
\ No newline at end of file
diff --git a/modules/nf-core/custom/samtools/sort/main.nf b/modules/nf-core/custom/samtools/sort/main.nf
new file mode 100644
index 0000000000000000000000000000000000000000..f5692575cb24910127b33e243b7cb29f7535c1c7
--- /dev/null
+++ b/modules/nf-core/custom/samtools/sort/main.nf
@@ -0,0 +1,49 @@
+process SAMTOOLS_SORT {
+    tag "$meta.id"
+    label 'process_high' //medium
+
+    conda "bioconda::samtools=1.17"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/samtools:1.17--h00cdaf9_0' :
+        'quay.io/biocontainers/samtools:1.17--h00cdaf9_0' }"
+
+    input:
+    tuple val(meta), path(bam)
+
+    output:
+    tuple val(meta), path("*.bam"), emit: bam
+    tuple val(meta), path("*.csi"), emit: csi, optional: true
+    path  "versions.yml"          , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args ?: ''
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    def sort_memory = (task.memory.mega/task.cpus).intValue()
+    if ("$bam" == "${prefix}.bam") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!"
+    """
+    samtools sort \\
+        $args \\
+        -@ $task.cpus \\
+        -m ${sort_memory}M \\
+        -o ${prefix}.bam \\
+        -T $prefix \\
+        $bam
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
+    END_VERSIONS
+    """
+
+    stub:
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    """
+    touch ${prefix}.bam
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
+    END_VERSIONS
+    """
+}
\ No newline at end of file
diff --git a/modules/nf-core/custom/samtools/sort/meta.yml b/modules/nf-core/custom/samtools/sort/meta.yml
new file mode 100644
index 0000000000000000000000000000000000000000..158785726d6436b8e5672bfbcc7ff2d948e06e97
--- /dev/null
+++ b/modules/nf-core/custom/samtools/sort/meta.yml
@@ -0,0 +1,48 @@
+name: samtools_sort
+description: Sort SAM/BAM/CRAM file
+keywords:
+  - sort
+  - bam
+  - sam
+  - cram
+tools:
+  - samtools:
+      description: |
+        SAMtools is a set of utilities for interacting with and post-processing
+        short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li.
+        These files are generated as output by short read aligners like BWA.
+      homepage: http://www.htslib.org/
+      documentation: http://www.htslib.org/doc/samtools.html
+      doi: 10.1093/bioinformatics/btp352
+      licence: ["MIT"]
+input:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - bam:
+      type: file
+      description: BAM/CRAM/SAM file
+      pattern: "*.{bam,cram,sam}"
+output:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - bam:
+      type: file
+      description: Sorted BAM/CRAM/SAM file
+      pattern: "*.{bam,cram,sam}"
+  - versions:
+      type: file
+      description: File containing software versions
+      pattern: "versions.yml"
+  - csi:
+      type: file
+      description: BAM index file (optional)
+      pattern: "*.csi"
+authors:
+  - "@drpatelh"
+  - "@ewels"
\ No newline at end of file
diff --git a/modules/nf-core/custom/samtools_n/sort/main.nf b/modules/nf-core/custom/samtools_n/sort/main.nf
new file mode 100644
index 0000000000000000000000000000000000000000..59a6e2a4e0827332cf3742d2d6e696d299fd4282
--- /dev/null
+++ b/modules/nf-core/custom/samtools_n/sort/main.nf
@@ -0,0 +1,49 @@
+process SAMTOOLS_SORT_N {
+    tag "$meta.id"
+    label 'process_high' //medium
+
+    conda "bioconda::samtools=1.17"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/samtools:1.17--h00cdaf9_0' :
+        'quay.io/biocontainers/samtools:1.17--h00cdaf9_0' }"
+
+    input:
+    tuple val(meta), path(bam)
+
+    output:
+    tuple val(meta), path("*.bam"), emit: bam
+    tuple val(meta), path("*.csi"), emit: csi, optional: true
+    path  "versions.yml"          , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args ?: ''
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    def sort_memory = (task.memory.mega/task.cpus).intValue()
+    if ("$bam" == "${prefix}.bam") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!"
+    """
+    samtools sort \\
+        $args \\
+        -@ $task.cpus \\
+        -m ${sort_memory}M \\
+        -o ${prefix}.bam \\
+        -T $prefix \\
+        $bam
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
+    END_VERSIONS
+    """
+
+    stub:
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    """
+    touch ${prefix}.bam
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
+    END_VERSIONS
+    """
+}
\ No newline at end of file
diff --git a/modules/nf-core/custom/samtools_n/sort/meta.yml b/modules/nf-core/custom/samtools_n/sort/meta.yml
new file mode 100644
index 0000000000000000000000000000000000000000..158785726d6436b8e5672bfbcc7ff2d948e06e97
--- /dev/null
+++ b/modules/nf-core/custom/samtools_n/sort/meta.yml
@@ -0,0 +1,48 @@
+name: samtools_sort
+description: Sort SAM/BAM/CRAM file
+keywords:
+  - sort
+  - bam
+  - sam
+  - cram
+tools:
+  - samtools:
+      description: |
+        SAMtools is a set of utilities for interacting with and post-processing
+        short DNA sequence read alignments in the SAM, BAM and CRAM formats, written by Heng Li.
+        These files are generated as output by short read aligners like BWA.
+      homepage: http://www.htslib.org/
+      documentation: http://www.htslib.org/doc/samtools.html
+      doi: 10.1093/bioinformatics/btp352
+      licence: ["MIT"]
+input:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - bam:
+      type: file
+      description: BAM/CRAM/SAM file
+      pattern: "*.{bam,cram,sam}"
+output:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. [ id:'test', single_end:false ]
+  - bam:
+      type: file
+      description: Sorted BAM/CRAM/SAM file
+      pattern: "*.{bam,cram,sam}"
+  - versions:
+      type: file
+      description: File containing software versions
+      pattern: "versions.yml"
+  - csi:
+      type: file
+      description: BAM index file (optional)
+      pattern: "*.csi"
+authors:
+  - "@drpatelh"
+  - "@ewels"
\ No newline at end of file
diff --git a/nextflow.config b/nextflow.config
index 24d1bc0e535fa7c8bc48fb93ce22e9875a12c716..8a5267e1091e54798010dcab7b6565e9e46464c8 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -28,8 +28,9 @@ params {
     save_aligned_intermediates = false
     bwt2_opts_end2end = '--very-sensitive -L 30 --score-min L,-0.6,-0.2 --end-to-end --reorder'
     bwt2_opts_trimmed = '--very-sensitive -L 20 --score-min L,-0.6,-0.2 --end-to-end --reorder'
-    keep_dups = false
+    keep_dups = true
     keep_multi = false
+    filter_pcr_picard = true
     min_mapq = 10
 
     // Digestion Hi-C
diff --git a/subworkflows/local/filter_pcr_dup.nf b/subworkflows/local/filter_pcr_dup.nf
new file mode 100644
index 0000000000000000000000000000000000000000..7fb3fa55f498e66c9ebc6cbc386946ce6e3000e9
--- /dev/null
+++ b/subworkflows/local/filter_pcr_dup.nf
@@ -0,0 +1,46 @@
+/*
+ * FILTER_PCR_DUP
+ * Delete PCR duplicates reads with PICARD MarkDuplicates
+ */
+
+include { SAMTOOLS_SORT } from '../../modules/nf-core/custom/samtools/sort/main'
+include { SAMTOOLS_SORT_N } from '../../modules/nf-core/custom/samtools_n/sort/main'
+include { FILTER_PAIR } from '../../modules/local/filterbam/main'
+include { SAMTOOLS_INDEX } from '../../modules/nf-core/custom/samtools/index/main'
+include { PICARD_MARKDUPLICATES } from '../../modules/nf-core/custom/picard/markduplicates/main'
+
+workflow FILTER_PCR_DUP {
+
+    take:
+    bam
+    fasta
+    index
+
+    main:
+
+    SAMTOOLS_SORT(
+        bam
+    )
+
+    PICARD_MARKDUPLICATES(
+        SAMTOOLS_SORT.out.bam,
+        fasta.collect(),
+        index.collect()
+    )
+
+    SAMTOOLS_SORT_N(
+        PICARD_MARKDUPLICATES.out.bam
+    )
+    SAMTOOLS_SORT_N.out.bam.set{ ch_bam }
+
+    FILTER_PAIR(
+    ch_bam.combine(ch_bam)
+    .map {
+        meta1, bam1, meta2, bam2 ->
+            meta1.id == meta2.id && meta1.chunk == meta2.chunk && meta1.mates == "R1" && meta2.mates == "R2" ? [ meta1,  bam1,  meta2, bam2 ] : null
+    })
+    FILTER_PAIR.out.bam.set{ new_ch_bam }
+
+    emit:
+    bam = new_ch_bam
+}
diff --git a/subworkflows/local/hicpro.nf b/subworkflows/local/hicpro.nf
index 8b106a0820cf808501add6e5ad886cc726626107..0cea0ff0cf49d7aa84a49d9b042e349ca7610d6f 100644
--- a/subworkflows/local/hicpro.nf
+++ b/subworkflows/local/hicpro.nf
@@ -3,7 +3,7 @@
  * MAIN WORKFLOW
  * From the raw sequencing reads to the list of valid interactions
  */
-  
+
 include { HICPRO_MAPPING } from './hicpro_mapping'
 include { GET_VALID_INTERACTION } from '../../modules/local/hicpro/get_valid_interaction'
 include { GET_VALID_INTERACTION_DNASE } from '../../modules/local/hicpro/get_valid_interaction_dnase'
@@ -12,121 +12,142 @@ include { MERGE_STATS } from '../../modules/local/hicpro/merge_stats'
 include { HICPRO2PAIRS } from '../../modules/local/hicpro/hicpro2pairs'
 include { BUILD_CONTACT_MAPS } from '../../modules/local/hicpro/build_contact_maps'
 include { ICE_NORMALIZATION } from '../../modules/local/hicpro/run_ice'
+include { FILTER_PCR_DUP } from './filter_pcr_dup'
 
 // Remove meta.chunks
 def removeChunks(row){
-  meta = row[0].clone()
-  meta.remove('chunk')
-  return [meta, row[1]]
+    meta = row[0].clone()
+    meta.remove('chunk')
+    return [meta, row[1]]
 }
 
 workflow HICPRO {
 
-  take:
-  reads // [meta, read1, read2]
-  index // path
-  fragments // path
-  chrsize // path
-  ligation_site // value
-  map_res // values
-
-  main:
-  ch_versions = Channel.empty()
-
-  // Fastq to paired-end bam
-  HICPRO_MAPPING(
-    reads,
-    index,
-    ligation_site
-  )
-  ch_versions = ch_versions.mix(HICPRO_MAPPING.out.versions)
-
-  //***************************************
-  // DIGESTION PROTOCOLS
-
-  if (!params.dnase){
-    GET_VALID_INTERACTION (
-      HICPRO_MAPPING.out.bam,
-      fragments.collect()
+    take:
+    reads // [meta, read1, read2]
+    index // path
+    fragments // path
+    chrsize // path
+    ligation_site // value
+    map_res // values
+    fasta
+
+    main:
+    ch_versions = Channel.empty()
+
+    // Fastq to paired-end bam
+    HICPRO_MAPPING(
+        reads,
+        index,
+        ligation_site
     )
-    ch_versions = ch_versions.mix(GET_VALID_INTERACTION.out.versions)
-    ch_valid_pairs = GET_VALID_INTERACTION.out.valid_pairs
-    ch_valid_stats = GET_VALID_INTERACTION.out.stats
-
-  }else{
-
-  //****************************************
-  // DNASE-LIKE PROTOCOLS
-
-    GET_VALID_INTERACTION_DNASE (
-      HICPRO_MAPPING.out.bam
+    ch_versions = ch_versions.mix(HICPRO_MAPPING.out.versions)
+
+    //***************************************
+    // FILTER PCR DUPLICATES
+
+    if (params.filter_pcr_picard && !params.keep_dups){
+        error "Error: cannot filter PCR duplicates with both methods! If filter_pcr_picard is true, keep_dups should be true too"
+    }
+    else if (params.filter_pcr_picard){
+        FILTER_PCR_DUP(
+            HICPRO_MAPPING.out.bam,
+            fasta,
+            index
+        )
+        FILTER_PCR_DUP.out.bam.set{ ch_bam }
+    }
+    else {
+        HICPRO_MAPPING.out.bam.set{ ch_bam }
+    }
+
+
+    //***************************************
+    // DIGESTION PROTOCOLS
+
+    if (!params.dnase){
+        GET_VALID_INTERACTION (
+            ch_bam,
+            fragments.collect()
+        )
+        ch_versions = ch_versions.mix(GET_VALID_INTERACTION.out.versions)
+        ch_valid_pairs = GET_VALID_INTERACTION.out.valid_pairs
+        ch_valid_stats = GET_VALID_INTERACTION.out.stats
+
+    }else{
+
+    //****************************************
+    // DNASE-LIKE PROTOCOLS
+
+        GET_VALID_INTERACTION_DNASE (
+            ch_bam
+        )
+        ch_versions = ch_versions.mix(GET_VALID_INTERACTION_DNASE.out.versions)
+        ch_valid_pairs = GET_VALID_INTERACTION_DNASE.out.valid_pairs
+        ch_valid_stats = GET_VALID_INTERACTION_DNASE.out.stats
+    }
+
+
+    //**************************************
+    // MERGE AND REMOVE DUPLICATES
+
+    //if (params.split_fastq){
+    ch_valid_pairs = ch_valid_pairs.map{ it -> removeChunks(it)}.groupTuple()
+    ch_hicpro_stats = HICPRO_MAPPING.out.mapstats.map{it->removeChunks(it)}.groupTuple()
+                            .concat(HICPRO_MAPPING.out.pairstats.map{it->removeChunks(it)}.groupTuple(),
+		            ch_valid_stats.map{it->removeChunks(it)}.groupTuple())
+    //}else{
+    //    ch_hicpro_stats = HICPRO_MAPPING.out.mapstats.groupTuple()
+    //                                            .concat(HICPRO_MAPPING.out.pairstats.groupTuple(),
+    //                                                            ch_valid_stats.groupTuple())
+    //}
+
+    MERGE_VALID_INTERACTION (
+        ch_valid_pairs
     )
-    ch_versions = ch_versions.mix(GET_VALID_INTERACTION_DNASE.out.versions)
-    ch_valid_pairs = GET_VALID_INTERACTION_DNASE.out.valid_pairs
-    ch_valid_stats = GET_VALID_INTERACTION_DNASE.out.stats
-  }
-  
-
-  //**************************************
-  // MERGE AND REMOVE DUPLICATES
-  
-  //if (params.split_fastq){
-  ch_valid_pairs = ch_valid_pairs.map{ it -> removeChunks(it)}.groupTuple()
-  ch_hicpro_stats = HICPRO_MAPPING.out.mapstats.map{it->removeChunks(it)}.groupTuple()
-                      .concat(HICPRO_MAPPING.out.pairstats.map{it->removeChunks(it)}.groupTuple(),
-		        ch_valid_stats.map{it->removeChunks(it)}.groupTuple())
-  //}else{
-  //  ch_hicpro_stats = HICPRO_MAPPING.out.mapstats.groupTuple()
-  //                      .concat(HICPRO_MAPPING.out.pairstats.groupTuple(),
-  //                              ch_valid_stats.groupTuple())
-  //}
-
-  MERGE_VALID_INTERACTION (
-    ch_valid_pairs
-  )
-  ch_versions = ch_versions.mix(MERGE_VALID_INTERACTION.out.versions)
-
-  MERGE_STATS(
-    ch_hicpro_stats
-  )
-  ch_versions = ch_versions.mix(MERGE_STATS.out.versions)
-
-  //***************************************
-  // CONVERTS TO PAIRS
-  HICPRO2PAIRS (
-    MERGE_VALID_INTERACTION.out.valid_pairs,
-    chrsize.collect()
-  )
-  ch_versions = ch_versions.mix(HICPRO2PAIRS.out.versions)
-
-  //***************************************
-  // CONTACT MAPS
-  
-  if (params.hicpro_maps){    
-
-    //build_contact_maps
-    BUILD_CONTACT_MAPS(
-      MERGE_VALID_INTERACTION.out.valid_pairs.combine(map_res),
-      chrsize.collect()
+    ch_versions = ch_versions.mix(MERGE_VALID_INTERACTION.out.versions)
+
+    MERGE_STATS(
+        ch_hicpro_stats
     )
-    ch_hicpro_raw_maps = BUILD_CONTACT_MAPS.out.maps
- 
-    // run_ice
-    ICE_NORMALIZATION(
-      BUILD_CONTACT_MAPS.out.maps
+    ch_versions = ch_versions.mix(MERGE_STATS.out.versions)
+
+    //***************************************
+    // CONVERTS TO PAIRS
+    HICPRO2PAIRS (
+        MERGE_VALID_INTERACTION.out.valid_pairs,
+        chrsize.collect()
     )
-    ch_hicpro_iced_maps = ICE_NORMALIZATION.out.maps
-    ch_versions = ch_versions.mix(ICE_NORMALIZATION.out.versions)
-
-  }else{
-    ch_hicpro_raw_maps = Channel.empty()
-    ch_hicpro_iced_maps = Channel.empty()
-  }
-
-  emit:
-  versions = ch_versions
-  pairs = HICPRO2PAIRS.out.pairs
-  mqc = MERGE_VALID_INTERACTION.out.mqc.concat(MERGE_STATS.out.mqc)
-  raw_maps = ch_hicpro_raw_maps
-  iced_maps = ch_hicpro_iced_maps
+    ch_versions = ch_versions.mix(HICPRO2PAIRS.out.versions)
+
+    //***************************************
+    // CONTACT MAPS
+
+    if (params.hicpro_maps){
+
+        //build_contact_maps
+        BUILD_CONTACT_MAPS(
+            MERGE_VALID_INTERACTION.out.valid_pairs.combine(map_res),
+            chrsize.collect()
+        )
+        ch_hicpro_raw_maps = BUILD_CONTACT_MAPS.out.maps
+
+        // run_ice
+        ICE_NORMALIZATION(
+            BUILD_CONTACT_MAPS.out.maps
+        )
+        ch_hicpro_iced_maps = ICE_NORMALIZATION.out.maps
+        ch_versions = ch_versions.mix(ICE_NORMALIZATION.out.versions)
+
+    }else{
+        ch_hicpro_raw_maps = Channel.empty()
+        ch_hicpro_iced_maps = Channel.empty()
+    }
+
+    emit:
+    versions = ch_versions
+    pairs = HICPRO2PAIRS.out.pairs
+    mqc = MERGE_VALID_INTERACTION.out.mqc.concat(MERGE_STATS.out.mqc)
+    raw_maps = ch_hicpro_raw_maps
+    iced_maps = ch_hicpro_iced_maps
 }
diff --git a/subworkflows/local/hicstuff_sub.nf b/subworkflows/local/hicstuff_sub.nf
new file mode 100644
index 0000000000000000000000000000000000000000..9bb3406bbc21a298d1919402dd4d6bdcbdd0690d
--- /dev/null
+++ b/subworkflows/local/hicstuff_sub.nf
@@ -0,0 +1,157 @@
+
+include { BOWTIE2_ALIGNMENT } from '../../modules/local/hicstuff/align_bowtie2'
+include { FRAGMENT_ENZYME } from '../../modules/local/hicstuff/fragment_enzyme'
+include { BAM2PAIRS } from '../../modules/local/hicstuff/bam2pairs'
+include { BUILD_MATRIX } from '../../modules/local/hicstuff/build_matrix'
+include { BUILD_MATRIX_COOL } from '../../modules/local/hicstuff/build_matrix_cool'
+include { BUILD_MATRIX_COOL_ALT } from '../../modules/local/hicstuff/build_matrix_cool_alt'
+include { FILTER_EVENT } from '../../modules/local/hicstuff/filter_event'
+include { DISTANCE_LAW } from '../../modules/local/hicstuff/distance_law'
+include { FILTER_PCR } from '../../modules/local/hicstuff/filter_pcr'
+include { GATK4_MARKDUPLICATES } from '../../modules/nf-core/custom/gatk4/markduplicates/main'
+include { SAMTOOLS_SORT } from '../../modules/nf-core/custom/samtools/sort/main'
+include { SAMTOOLS_SORT_N } from '../../modules/nf-core/custom/samtools_n/sort/main'
+include { FILTER_PAIR } from '../../modules/local/filterbam/main'
+include { SAMTOOLS_INDEX } from '../../modules/nf-core/custom/samtools/index/main'
+include { PICARD_MARKDUPLICATES } from '../../modules/nf-core/custom/picard/markduplicates/main'
+
+// Paired-end to Single-end
+def pairToSingle(row, mates) {
+    def meta = row[0].clone()
+    meta.single_end = true
+    meta.mates = mates
+    if (mates == "R1") {
+        return [meta, [ row[1][0]] ]
+    }else if (mates == "R2"){
+        return [meta, [ row[1][1]] ]
+    }
+}
+
+// Single-end to Paired-end
+def singleToPair(row){
+    def meta = row[0].clone()
+    meta.remove('mates')
+    meta.single_end = false
+    return [ meta, row[1] ]
+}
+
+workflow HICSTUFF_SUB {
+
+    take:
+    reads
+    index
+    ligation_site
+    digestion
+    fasta
+    chromosome_size
+
+    main:
+
+    // Align each mates separetly and add mates information in [meta]
+    ch_reads_r1 = reads.map{ it -> pairToSingle(it,"R1") }
+    ch_reads_r2 = reads.map{ it -> pairToSingle(it,"R2") }
+    ch_reads = ch_reads_r1.concat(ch_reads_r2)
+
+    BOWTIE2_ALIGNMENT(
+        ch_reads,
+        index.collect()
+    )
+
+    FRAGMENT_ENZYME(
+        digestion,
+        fasta
+    )
+
+    if (params.filter_pcr && params.filter_pcr_picard ){
+        error "Error: filter_pcr and filter_pcr_picard can't both be true at the same time! Set one of them false in the config file"
+    }
+    else if (params.filter_pcr_picard){
+        SAMTOOLS_SORT(
+            BOWTIE2_ALIGNMENT.out.bam
+        )
+
+
+        PICARD_MARKDUPLICATES(
+            SAMTOOLS_SORT.out.bam,
+            fasta.collect(),
+            index.collect()
+        )
+
+        SAMTOOLS_SORT_N(
+            PICARD_MARKDUPLICATES.out.bam
+        )
+
+        SAMTOOLS_SORT_N.out.bam.set{ ch_bam }
+
+        FILTER_PAIR(
+        ch_bam.combine(ch_bam)
+        .map {
+            meta1, bam1, meta2, bam2 ->
+                meta1.id == meta2.id && meta1.chunk == meta2.chunk && meta1.mates == "R1" && meta2.mates == "R2" ? [ meta1,  bam1,  meta2, bam2 ] : null
+        })
+
+        FILTER_PAIR.out.bam.set{ new_ch_bam }
+
+    }
+    else {
+
+        BOWTIE2_ALIGNMENT.out.bam.set{ ch_bam }
+
+        ch_bam.combine(ch_bam)
+        .map {
+            meta1, bam1, meta2, bam2 ->
+                meta1.id == meta2.id && meta1.chunk == meta2.chunk && meta1.mates == "R1" && meta2.mates == "R2" ? [ meta1,  bam1,  meta2, bam2 ] : null
+        }.set{ new_ch_bam }
+
+    }
+
+    BAM2PAIRS(
+        new_ch_bam,
+        FRAGMENT_ENZYME.out.info_contigs.collect(),
+        digestion,
+        fasta.collect()
+    )
+    BAM2PAIRS.out.idx_pairs.set{ ch_idx_pairs }
+
+    if( params.filter_event ){
+        FILTER_EVENT(
+            ch_idx_pairs
+        )
+        FILTER_EVENT.out.idx_pairs_filtered.set{ ch_idx_pairs }
+    }
+
+    if ( params.distance_law ){
+        DISTANCE_LAW(
+            ch_idx_pairs,
+            FRAGMENT_ENZYME.out.fragments_list.collect()
+        )
+    }
+
+    if (params.filter_pcr && params.filter_pcr_picard ){
+        error "Error: filter_pcr and filter_pcr_picard can't both be true at the same time! Set one of them false in the config file"
+    }
+    else if ( params.filter_pcr ){
+        FILTER_PCR(
+            ch_idx_pairs
+        )
+        FILTER_PCR.out.idx_pairs_pcrfree.set{ ch_idx_pairs }
+    }
+
+    //TODO rajouter filtres + distance law + filtres PCR en options
+    // pour les PCR filter, soit le hicstuff soit PICARD
+
+    BUILD_MATRIX(
+        ch_idx_pairs,
+        FRAGMENT_ENZYME.out.fragments_list.collect()
+    )
+
+    BUILD_MATRIX_COOL(
+        ch_idx_pairs,
+        FRAGMENT_ENZYME.out.fragments_list.collect()
+    )
+
+    BUILD_MATRIX_COOL_ALT(
+        chromosome_size.collect(),
+        ch_idx_pairs
+    )
+}
diff --git a/test.nf b/test.nf
new file mode 100644
index 0000000000000000000000000000000000000000..b960e23168f81ba91048847de68419281d891640
--- /dev/null
+++ b/test.nf
@@ -0,0 +1,55 @@
+#!/usr/bin/env nextflow
+
+nextflow.enable.dsl = 2
+
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    GENOME PARAMETER VALUES
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+*/
+
+params.fasta = WorkflowMain.getGenomeAttribute(params, 'fasta')
+params.bwt2_index = WorkflowMain.getGenomeAttribute(params, 'bowtie2')
+
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    VALIDATE & PRINT PARAMETER SUMMARY
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+*/
+
+WorkflowMain.initialise(workflow, params, log)
+
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    NAMED WORKFLOW FOR PIPELINE
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+*/
+
+include { HICSTUFF } from './workflows/hicstuff'
+
+//
+// WORKFLOW: Run main nf-core/hic analysis pipeline
+//
+workflow TEST_HICSTUFF {
+    HICSTUFF ()
+}
+
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    RUN ALL WORKFLOWS
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+*/
+
+//
+// WORKFLOW: Execute a single named workflow for the pipeline
+// See: https://github.com/nf-core/rnaseq/issues/619
+//
+workflow {
+    TEST_HICSTUFF ()
+}
+
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    THE END
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+*/
diff --git a/workflows/hic.nf b/workflows/hic.nf
index 2ffa5b4dc8bee033473cab5b755befe9460ca0a9..1b6b81e1b63dc902004d2bc3f6ed80905360ff89 100644
--- a/workflows/hic.nf
+++ b/workflows/hic.nf
@@ -54,7 +54,7 @@ if (params.res_tads && !params.skip_tads){
 }else{
   ch_tads_res=Channel.empty()
   if (!params.skip_tads){
-    log.warn "[nf-core/hic] Hi-C resolution for TADs calling not specified. See --res_tads" 
+    log.warn "[nf-core/hic] Hi-C resolution for TADs calling not specified. See --res_tads"
   }
 }
 
@@ -64,7 +64,7 @@ if (params.res_dist_decay && !params.skip_dist_decay){
 }else{
   ch_ddecay_res = Channel.empty()
   if (!params.skip_dist_decay){
-    log.warn "[nf-core/hic] Hi-C resolution for distance decay not specified. See --res_dist_decay" 
+    log.warn "[nf-core/hic] Hi-C resolution for distance decay not specified. See --res_dist_decay"
   }
 }
 
@@ -74,7 +74,7 @@ if (params.res_compartments && !params.skip_compartments){
 }else{
   ch_comp_res = Channel.empty()
   if (!params.skip_compartments){
-    log.warn "[nf-core/hic] Hi-C resolution for compartment calling not specified. See --res_compartments" 
+    log.warn "[nf-core/hic] Hi-C resolution for compartment calling not specified. See --res_compartments"
   }
 }
 
@@ -99,7 +99,7 @@ ch_multiqc_custom_methods_description = params.multiqc_methods_description ? fil
 //
 // MODULE: Local to the pipeline
 //
-include { HIC_PLOT_DIST_VS_COUNTS } from '../modules/local/hicexplorer/hicPlotDistVsCounts' 
+include { HIC_PLOT_DIST_VS_COUNTS } from '../modules/local/hicexplorer/hicPlotDistVsCounts'
 include { MULTIQC } from '../modules/local/multiqc'
 
 //
@@ -182,7 +182,8 @@ workflow HIC {
     PREPARE_GENOME.out.res_frag,
     PREPARE_GENOME.out.chromosome_size,
     ch_ligation_site,
-    ch_map_res
+    ch_map_res,
+    ch_fasta
   )
   ch_versions = ch_versions.mix(HICPRO.out.versions)
 
@@ -239,7 +240,7 @@ workflow HIC {
       .filter{ it[0].resolution == it[2] }
       .map { it -> [it[0], it[1]]}
       .set{ ch_cool_tads }
-                                                                                                                                                                                                            
+
     TADS(
       ch_cool_tads
     )
diff --git a/workflows/hicstuff.nf b/workflows/hicstuff.nf
new file mode 100644
index 0000000000000000000000000000000000000000..8263bd3db82361774f74c84884e373cdc8d51a7c
--- /dev/null
+++ b/workflows/hicstuff.nf
@@ -0,0 +1,45 @@
+def summary_params = NfcoreSchema.paramsSummaryMap(workflow, params)
+// Validate input parameters
+WorkflowHic.initialise(params, log)
+// Check mandatory parameters
+if (params.input) { ch_input = file(params.input) } else { exit 1, 'Input samplesheet not specified!' }
+// Digestion parameters
+if (params.digestion){
+    restriction_site = params.digestion ? params.digest[ params.digestion ].restriction_site ?: false : false
+    ch_restriction_site = Channel.value(restriction_site) //str seq avec ^ pour cut
+    ligation_site = params.digestion ? params.digest[ params.digestion ].ligation_site ?: false : false
+    ch_ligation_site = Channel.value(ligation_site) //str seq
+}else if (params.dnase){
+    ch_restriction_site = Channel.empty()
+    ch_ligation_site = Channel.empty()
+}else{
+    exit 1, "Ligation motif not found. Please either use the `--digestion` parameters or specify the `--restriction_site` and `--ligation_site`. For DNase Hi-C, please use '--dnase' option"
+}
+
+Channel.fromPath( params.fasta )
+    .ifEmpty { exit 1, "Genome index: Fasta file not found: ${params.fasta}" }
+    .map{it->[[:],it]}
+    .set { ch_fasta }
+
+
+include { INPUT_CHECK } from '../subworkflows/local/input_check'
+include { PREPARE_GENOME } from '../subworkflows/local/prepare_genome'
+include { HICSTUFF_SUB } from '../subworkflows/local/hicstuff_sub'
+
+workflow HICSTUFF {
+    INPUT_CHECK (
+        ch_input
+    )
+    PREPARE_GENOME(
+        ch_fasta,
+        ch_restriction_site
+    )
+    HICSTUFF_SUB(
+        INPUT_CHECK.out.reads,
+        PREPARE_GENOME.out.index,
+        ch_ligation_site,
+        params.digestion, //str enzyme
+        ch_fasta,
+        PREPARE_GENOME.out.chromosome_size
+    )
+}