diff --git a/bin/hicstuff b/bin/hicstuff
new file mode 120000
index 0000000000000000000000000000000000000000..156df240f8aab8ee35206570c438a0461cbd76c8
--- /dev/null
+++ b/bin/hicstuff
@@ -0,0 +1 @@
+hicstuff_repo/hicstuff
\ No newline at end of file
diff --git a/bin/hicstuff_bam2pairs.py b/bin/hicstuff_bam2pairs.py
index 744b5875d390a681cfc2bce8417ab7d0198ea608..83d5c9702fe04601322aaf772031cfe98c1227d2 100755
--- a/bin/hicstuff_bam2pairs.py
+++ b/bin/hicstuff_bam2pairs.py
@@ -5,14 +5,14 @@ import argparse
 import pysam as ps
 import pandas as pd
 import itertools
-from hicstuff_log import logger
-import hicstuff_log as hcl
-import hicstuff_io as hio
-import hicstuff_digest as hcd
-import hicstuff_stats as hcs
+from hicstuff.log import logger
+import hicstuff.log as hcl
+import hicstuff.io as hio
+import hicstuff.digest as hcd
+import hicstuff.stats as hcs
 from Bio import SeqIO
 import logging
-from hicstuff_version import __version__
+from hicstuff.version import __version__
 
 
 
diff --git a/bin/hicstuff_build_matrix.py b/bin/hicstuff_build_matrix.py
index f99700bbc648ced284c5e64a499d4eecd74e8cba..fef5f0a4a54cc20ebb7e6fed0216482697ca9f5e 100755
--- a/bin/hicstuff_build_matrix.py
+++ b/bin/hicstuff_build_matrix.py
@@ -7,11 +7,11 @@ import pandas as pd
 import shutil as st
 import subprocess as sp
 import itertools
-from hicstuff_log import logger
-import hicstuff_io as hio
-import hicstuff_digest as hcd
+from hicstuff.log import logger
+import hicstuff.io as hio
+import hicstuff.digest as hcd
 from Bio import SeqIO
-import hicstuff_log as hcl
+import hicstuff.log as hcl
 
 
 def pairs2matrix(
diff --git a/bin/hicstuff_cutsite.py b/bin/hicstuff_cutsite.py
index 058c907d82041b4d67c752841d25d1e422d40dc0..fa3faa2a74ccd718773ebdcc6b10c2a94c574910 100755
--- a/bin/hicstuff_cutsite.py
+++ b/bin/hicstuff_cutsite.py
@@ -28,359 +28,9 @@ import pyfastx
 import re
 import sys
 import argparse
-import hicstuff_digest as hcd
-from hicstuff_log import logger
-
-
-def cut_ligation_sites(
-    fq_for, fq_rev, digest_for, digest_rev, enzyme, mode, seed_size, n_cpu
-):
-    """Create new reads to manage pairs with a digestion and create multiple
-    pairs to take into account all the contact present.
-
-    The function write two files for both the forward and reverse fastq with the
-    new reads. The new reads have at the end of the ID ":[0-9]" added to
-    differentiate the different pairs created from one read.
-
-    The function will look for all the sites present and create new pairs of
-    reads according to the mode given to retreive as much as possible of the HiC
-    signal.
-
-    Parameters
-    ----------
-    fq_for : str
-        Path to the forward fastq file to digest.
-    fq_rev : str
-        Path to the reverse fatsq file to digest.
-    digest_for : str
-        Path to the output digested forward fatsq file to write.
-    digest_rev : str
-        Path to the output digested reverse fatsq file to write.
-    enzyme : str
-        The list of restriction enzyme used to digest the genome separated by a
-        comma. Example: HpaII,MluCI.
-    mode : str
-        Mode to use to make the digestion. Three values possible: "all",
-        "for_vs_rev", "pile".
-    seed_size : int
-        Minimum size of a fragment (i.e. seed size used in mapping as reads
-        smaller won't be mapped.)
-    n_cpu : int
-        Number of CPUs.
-    """
-    # Process the ligation sites given
-    ligation_sites = hcd.gen_enzyme_religation_regex(enzyme)
-
-    # Defined stop_token which is used to mark the end of input file
-    stop_token = "STOP"
-    # A stack is a string cointaining multiple read pairs
-    max_stack_size = 1000
-
-    # Create count to have an idea of the digested pairs repartition.
-    original_number_of_pairs = 0
-    final_number_of_pairs = 0
-    new_reads_for = ""
-    new_reads_rev = ""
-    current_stack = 0
-
-    # Start parallel threading to compute the
-    # ctx = multiprocessing.get_context("spawn")
-    queue = multiprocessing.Queue(max(1, n_cpu - 1))
-    writer_process = multiprocessing.Process(
-        target=_writer, args=(digest_for, digest_rev, queue, stop_token)
-    )
-    writer_process.start()
-
-    # Iterate on all pairs
-    for read_for, read_rev in zip(
-        pyfastx.Fastq(fq_for, build_index=False),
-        pyfastx.Fastq(fq_rev, build_index=False),
-    ):
-
-        # Count the numbers of original reads processed.
-        original_number_of_pairs += 1
-
-        # Count for stack size.
-        current_stack += 1
-
-        # Extract components of the reads.
-        for_name, for_seq, for_qual = read_for
-        rev_name, rev_seq, rev_qual = read_rev
-
-        # Sanity check to be sure all reads are with their mate.
-        if for_name != rev_name:
-            logger.error(
-                "The fastq files contains reads not sorted :\n{0}\n{1}".format(
-                    for_name, rev_name
-                )
-            )
-            sys.exit(1)
-
-        # Cut the forward and reverse reads at the ligation sites.
-        for_seq_list, for_qual_list = cutsite_read(
-            ligation_sites,
-            for_seq,
-            for_qual,
-            seed_size,
-        )
-        rev_seq_list, rev_qual_list = cutsite_read(
-            ligation_sites,
-            rev_seq,
-            rev_qual,
-            seed_size,
-        )
-
-        # Write the new combinations of fragments.
-        new_reads_for, new_reads_rev, final_number_of_pairs = write_pair(
-            new_reads_for,
-            new_reads_rev,
-            for_name,
-            for_seq_list,
-            for_qual_list,
-            rev_seq_list,
-            rev_qual_list,
-            mode,
-            final_number_of_pairs,
-        )
-
-        # If stack full, add it in the queue.
-        if current_stack == max_stack_size:
-
-            # Add the pair in the queue.
-            pairs = (new_reads_for.encode(), new_reads_rev.encode())
-            queue.put(pairs)
-
-            # Empty the stack
-            current_stack = 0
-            new_reads_for = ""
-            new_reads_rev = ""
-
-    # End the parallel processing.
-    pairs = (new_reads_for.encode(), new_reads_rev.encode())
-    queue.put(pairs)
-    queue.put(stop_token)
-    writer_process.join()
-
-    # Return information on the different pairs created
-    logger.info(f"Library used: {fq_for} - {fq_rev}")
-    logger.info(f"Number of pairs before digestion: {original_number_of_pairs}")
-    logger.info(f"Number of pairs after digestion: {final_number_of_pairs}")
-
-
-def cutsite_read(ligation_sites, seq, qual, seed_size=0):
-    """Find ligation sites in a given sequence and return list of the fragmented
-    sequence and quality.
-
-    Parameters:
-    -----------
-    ligation_sites : re.Pattern
-        Regex of all possible ligations according to the given enzymes.
-    seq : str
-        Sequence where to search for ligation_sites.
-    qual : str
-        Quality values of the sequence given.
-    seed_size : int
-        Minimum size of a fragment (i.e. seed size used in mapping as reads
-        smaller won't be mapped.)
-
-    Returns:
-    --------
-    list of str
-        List of cut sequences. The split is made 4 bases after the start of
-        the ligation site.
-    list of str
-        List of string of the qualities.
-
-    Examples:
-    ---------
-    >>> cutsite_read(re.compile(r'GA.TA.TC'), "AAGAGTATTC", "FFF--FAFAF")
-    (['AAGAGT', 'ATTC'], ['FFF--F', 'AFAF'])
-    """
-
-    # Find the ligation sites.
-    ligation_sites_list = []
-    if ligation_sites.search(seq):
-        ligation_sites_list = [
-            site.start() for site in ligation_sites.finditer(seq)
-        ]
-    ligation_sites_list.append(len(seq))
-
-    # Split the sequences on the ligation sites.
-    seq_list = []
-    qual_list = []
-    left_site = 0
-    for right_site in ligation_sites_list:
-        if right_site + 4 - left_site > seed_size:
-            seq_list.append(seq[left_site : right_site + 4])
-            qual_list.append(qual[left_site : right_site + 4])
-            left_site = right_site + 4
-
-    return seq_list, qual_list
-
-
-def write_pair(
-    new_reads_for,
-    new_reads_rev,
-    name,
-    for_seq_list,
-    for_qual_list,
-    rev_seq_list,
-    rev_qual_list,
-    mode,
-    final_number_of_pairs,
-):
-    """Function to write one pair with the combinations of fragment depending on
-    the chosen mode.
-
-    Parameters:
-    -----------
-    new_reads_for : str
-        Stack of the new forward reads ready to be written.
-    new_reads_rev : str
-        Stack of the new reverse reads ready to be written.
-    name : str
-        Name of the fastq read.
-    for_seq_list : list
-        List of forward sequences of the fastq read.
-    for_qual_list : list
-        List of forward qualities of the fastq read.
-    rev_seq_list : list
-        List of reverse sequencs of the fastq read.
-    rev_qual_list : list
-        List of reverse qualities of the fastq read.
-    mode : str
-        Mode to use to make the digestion. Three values possible: "all",
-        "for_vs_rev", "pile".
-    final_numbers_of_pairs : int
-        Count of pairs after cutting.
-
-    Returns:
-    --------
-    str
-        Stack of forward reads ready to be written with the last pairs added.
-    str
-        Stack of reverse reads ready to be written with the last pairs added.
-    int
-        Count of pairs after cutting.
-    """
-
-    # Mode "for_vs_rev": Make contacts only between fragments from different
-    # reads (one fragment from forward and one from reverse).
-    if mode == "for_vs_rev":
-        for i in range(len(for_seq_list)):
-            for j in range(len(rev_seq_list)):
-                final_number_of_pairs += 1
-                new_reads_for += "@%s\n%s\n+\n%s\n" % (
-                    name + ":" + str(i) + str(j),
-                    for_seq_list[i],
-                    for_qual_list[i],
-                )
-                new_reads_rev += "@%s\n%s\n+\n%s\n" % (
-                    name + ":" + str(i) + str(j),
-                    rev_seq_list[j],
-                    rev_qual_list[j],
-                )
-
-    #  Mode "all": Make all the possible contacts between the fragments.
-    elif mode == "all":
-        seq_list = for_seq_list + rev_seq_list
-        qual_list = for_qual_list + rev_qual_list
-        for i in range(len(seq_list)):
-            for j in range(i + 1, len(seq_list)):
-                final_number_of_pairs += 1
-                if i < len(for_seq_list):
-                    new_reads_for += "@%s\n%s\n+\n%s\n" % (
-                        name + ":" + str(i) + str(j),
-                        seq_list[i],
-                        qual_list[i],
-                    )
-                # Reverse the forward read if comes from reverse.
-                else:
-                    new_reads_for += "@%s\n%s\n+\n%s\n" % (
-                        name + ":" + str(i) + str(j),
-                        seq_list[i][::-1],
-                        qual_list[i][::-1],
-                    )
-                # Reverse the reverse read if comes from forward.
-                if j < len(for_seq_list):
-                    new_reads_rev += "@%s\n%s\n+\n%s\n" % (
-                        name + ":" + str(i) + str(j),
-                        seq_list[j][::-1],
-                        qual_list[j][::-1],
-                    )
-                else:
-                    new_reads_rev += "@%s\n%s\n+\n%s\n" % (
-                        name + ":" + str(i) + str(j),
-                        seq_list[j],
-                        qual_list[j],
-                    )
-
-    # Mode "pile": Only make contacts bewteen two adjacent fragments.
-    elif mode == "pile":
-        seq_list = for_seq_list + rev_seq_list
-        qual_list = for_qual_list + rev_qual_list
-        for i in range(len(seq_list) - 1):
-            final_number_of_pairs += 1
-            if i < len(for_seq_list) - 1:
-                new_reads_for += "@%s\n%s\n+\n%s\n" % (
-                    name + ":" + str(i) + str(i + 1),
-                    seq_list[i],
-                    qual_list[i],
-                )
-                new_reads_rev += "@%s\n%s\n+\n%s\n" % (
-                    name + ":" + str(i) + str(i + 1),
-                    seq_list[i + 1][::-1],
-                    qual_list[i + 1][::-1],
-                )
-            elif i == len(for_seq_list) - 1:
-                new_reads_for += "@%s\n%s\n+\n%s\n" % (
-                    name + ":" + str(i) + str(i + 1),
-                    seq_list[i],
-                    qual_list[i],
-                )
-                new_reads_rev += "@%s\n%s\n+\n%s\n" % (
-                    name + ":" + str(i) + str(i + 1),
-                    seq_list[i + 1],
-                    qual_list[i + 1],
-                )
-            else:
-                new_reads_for += "@%s\n%s\n+\n%s\n" % (
-                    name + ":" + str(i) + str(i + 1),
-                    seq_list[i][::-1],
-                    qual_list[i][::-1],
-                )
-                new_reads_rev += "@%s\n%s\n+\n%s\n" % (
-                    name + ":" + str(i) + str(i + 1),
-                    seq_list[i + 1],
-                    qual_list[i + 1],
-                )
-
-    return new_reads_for, new_reads_rev, final_number_of_pairs
-
-
-def _writer(output_for, output_rev, queue, stop_token):
-    """Function to write the pair throw parallel processing.
-
-    Parameters:
-    -----------
-    output_for : str
-        Path to the output forward compressed fastq file.
-    output_rev : str
-        Path to the output reverse compressed fastq file.
-    queue : multiprocessing.queues.Queue
-        Queue for the multiprocesing of the whole file.
-    stop_token : str
-        Token to signal that the end of the file have been reached.
-    """
-    with gzip.open(output_for, "wb") as for_fq, gzip.open(
-        output_rev, "wb"
-    ) as rev_fq:
-        while True:
-            line = queue.get()
-            if line == stop_token:
-                return 0
-            for_fq.write(line[0])
-            rev_fq.write(line[1])
+import hicstuff.digest as hcd
+from hicstuff.log import logger
+import hicstuff.cutsite as hc
 
 if __name__ == "__main__":
     parser = argparse.ArgumentParser()
@@ -416,4 +66,4 @@ if __name__ == "__main__":
     elif enzyme == "arima":
         enzyme = "DpnII,HinfI"
 
-    cut_ligation_sites(reads1, reads2, out1, out2, enzyme, mode, seed_size, n_cpu)
+    hc.cut_ligation_sites(reads1, reads2, out1, out2, enzyme, mode, seed_size, n_cpu)
diff --git a/bin/hicstuff_digest.py b/bin/hicstuff_digest.py
deleted file mode 100644
index 3fba3da20ddfd4d500c00cc5b01141b8f355f410..0000000000000000000000000000000000000000
--- a/bin/hicstuff_digest.py
+++ /dev/null
@@ -1,489 +0,0 @@
-#!/usr/bin/env python3
-# coding: utf-8
-
-"""Genome digestion
-Functions used to write auxiliary instagraal compatible
-sparse matrices.
-"""
-
-from Bio import SeqIO, SeqUtils
-from Bio.Seq import Seq
-from Bio.Restriction import RestrictionBatch, Analysis
-import os, sys, csv
-import re
-import collections
-import copy
-import matplotlib.pyplot as plt
-import numpy as np
-import pandas as pd
-from hicstuff_log import logger
-import hicstuff_io as hio
-
-DEFAULT_FRAGMENTS_LIST_FILE_NAME = "fragments_list.txt"
-DEFAULT_INFO_CONTIGS_FILE_NAME = "info_contigs.txt"
-DEFAULT_SPARSE_MATRIX_FILE_NAME = "abs_fragments_contacts_weighted.txt"
-DEFAULT_KB_BINNING = 1
-DEFAULT_THRESHOLD_SIZE = 0
-# Most used enzyme for eukaryotes
-DEFAULT_ENZYME = "DpnII"
-# If using evenly-sized chunks instead of restriction
-# enzymes, they shouldn't be too short
-DEFAULT_MIN_CHUNK_SIZE = 50
-
-
-def write_frag_info(
-    fasta,
-    enzyme,
-    min_size=DEFAULT_THRESHOLD_SIZE,
-    circular=False,
-    output_contigs=DEFAULT_INFO_CONTIGS_FILE_NAME,
-    output_frags=DEFAULT_FRAGMENTS_LIST_FILE_NAME,
-    output_dir=None,
-):
-    """Digest and write fragment information
-    Write the fragments_list.txt and info_contigs.txt that are necessary for
-    instagraal to run.
-    Parameters
-    ----------
-    fasta : pathlib.Path or str
-        The path to the reference genome
-    enzyme : str, int or list of str
-        If a string, must be the name of an enzyme (e.g. DpnII) and the genome
-        will be cut at the enzyme's restriction sites. If a number, the genome
-        will be cut uniformly into chunks with length equal to that number. A
-        list of enzymes can also be specified if using multiple enzymes.
-    min_size : float, optional
-        Size below which shorter contigs are discarded. Default is 0, i.e. all
-        contigs are retained.
-    circular : bool, optional
-        Whether the genome is circular. Default is False.
-    output_contigs : str, optional
-        The name of the file with contig info. Default is info_contigs.txt
-    output_frags : str, optional
-        The name of the file with fragment info. Default is fragments_list.txt
-    output_dir : [type], optional
-        The path to the output directory, which will be created if not already
-        existing. Default is the current directory.
-    """
-
-    records = SeqIO.parse(hio.read_compressed(fasta), "fasta")
-
-    try:
-        info_contigs_path = os.path.join(output_dir, output_contigs)
-        frag_list_path = os.path.join(output_dir, output_frags)
-    except TypeError:
-        info_contigs_path = output_contigs
-        frag_list_path = output_frags
-
-    with open(info_contigs_path, "w") as info_contigs:
-
-        info_contigs.write("contig\tlength\tn_frags\tcumul_length\n")
-
-        with open(frag_list_path, "w") as fragments_list:
-
-            fragments_list.write(
-                "id\tchrom\tstart_pos" "\tend_pos\tsize\tgc_content\n"
-            )
-
-            total_frags = 0
-
-            for record in records:
-                contig_seq = record.seq
-                contig_name = record.id
-                contig_length = len(contig_seq)
-                if contig_length < int(min_size):
-                    continue
-
-                sites = get_restriction_table(
-                    contig_seq, enzyme, circular=circular
-                )
-                fragments = (
-                    contig_seq[sites[i] : sites[i + 1]]
-                    for i in range(len(sites) - 1)
-                )
-                n_frags = 0
-
-                current_id = 1
-                start_pos = 0
-                for frag in fragments:
-                    frag_length = len(frag)
-                    if frag_length > 0:
-                        end_pos = start_pos + frag_length
-                        gc_content = SeqUtils.GC(frag) / 100.0
-
-                        current_fragment_line = "%s\t%s\t%s\t%s\t%s\t%s\n" % (
-                            current_id,
-                            contig_name,
-                            start_pos,
-                            end_pos,
-                            frag_length,
-                            gc_content,
-                        )
-
-                        fragments_list.write(current_fragment_line)
-
-                        try:
-                            assert (current_id == 1 and start_pos == 0) or (
-                                current_id > 1 and start_pos > 0
-                            )
-                        except AssertionError:
-                            logger.error((current_id, start_pos))
-                            raise
-                        start_pos = end_pos
-                        current_id += 1
-                        n_frags += 1
-
-                current_contig_line = "%s\t%s\t%s\t%s\n" % (
-                    contig_name,
-                    contig_length,
-                    n_frags,
-                    total_frags,
-                )
-                total_frags += n_frags
-                info_contigs.write(current_contig_line)
-
-
-def attribute_fragments(pairs_file, idx_pairs_file, restriction_table):
-    """
-    Writes the indexed pairs file, which has two more columns than the input
-    pairs file corresponding to the restriction fragment index of each read.
-    Note that pairs files have 1bp point positions whereas restriction table
-    has 0bp point poisitions.
-    Parameters
-    ----------
-    pairs_file: str
-        Path the the input pairs file. Consists of 7 tab-separated
-        columns: readID, chr1, pos1, chr2, pos2, strand1, strand2
-    idx_pairs_file: str
-        Path to the output indexed pairs file. Consists of 9 white space
-        separated columns: readID, chr1, pos1, chr2, pos2, strand1, strand2,
-        frag1, frag2. frag1 and frag2 are 0-based restriction fragments based
-        on whole genome.
-    restriction_table: dict
-        Dictionary with chromosome identifiers (str) as keys and list of
-        positions (int) of restriction sites as values.
-    """
-
-    # NOTE: Bottlenecks here are 1. binary search in find_frag and 2. writerow
-    # 1. could be reduced by searching groups of N frags in parallel and 2. by
-    # writing N frags simultaneously using a single call of writerows.
-
-    # Parse and update header section
-    pairs_header = hio.get_pairs_header(pairs_file)
-    header_size = len(pairs_header)
-    chrom_order = []
-    with open(idx_pairs_file, "w") as idx_pairs:
-        for line in pairs_header:
-            # Add new column names to header
-            if line.startswith("#columns"):
-                line = line.rstrip() + " frag1 frag2"
-            if line.startswith("#chromsize"):
-                chrom_order.append(line.split()[1])
-            idx_pairs.write(line + "\n")
-
-    # Get number of fragments per chrom to allow genome-based indices
-    shift_frags = {}
-    prev_frags = 0
-    for rank, chrom in enumerate(chrom_order):
-        if rank > 0:
-            # Note the "-1" because there are nfrags + 1 sites in rest table
-            prev_frags += len(restriction_table[chrom_order[rank - 1]]) - 1
-        # Idx of each chrom's frags will be shifted by n frags in previous chroms
-        shift_frags[chrom] = prev_frags
-
-    missing_contigs = set()
-    # Attribute pairs to fragments and append them to output file (after header)
-    with open(pairs_file, "r") as pairs, open(
-        idx_pairs_file, "a"
-    ) as idx_pairs:
-        # Skip header lines
-        for _ in range(header_size):
-            next(pairs)
-
-        # Define input and output fields
-        pairs_cols = [
-            "readID",
-            "chr1",
-            "pos1",
-            "chr2",
-            "pos2",
-            "strand1",
-            "strand2",
-        ]
-        idx_cols = pairs_cols + ["frag1", "frag2"]
-
-        # Use csv reader / writer to automatically parse columns into a dict
-        pairs_reader = csv.DictReader(
-            pairs, fieldnames=pairs_cols, delimiter="\t"
-        )
-        pairs_writer = csv.DictWriter(
-            idx_pairs, fieldnames=idx_cols, delimiter="\t"
-        )
-
-        for pair in pairs_reader:
-            # Get the 0-based indices of corresponding restriction fragments
-            # Deducing 1 from pair position to get it into 0bp point
-            pair["frag1"] = find_frag(
-                int(pair["pos1"]) - 1, restriction_table[pair["chr1"]]
-            )
-            pair["frag2"] = find_frag(
-                int(pair["pos2"]) - 1, restriction_table[pair["chr2"]]
-            )
-            # Shift fragment indices to make them genome-based instead of
-            # chromosome-based
-            try:
-                pair["frag1"] += shift_frags[pair["chr1"]]
-            except KeyError:
-                missing_contigs.add(pair["chr1"])
-            try:
-                pair["frag2"] += shift_frags[pair["chr2"]]
-            except KeyError:
-                missing_contigs.add(pair["chr2"])
-
-            # Write indexed pairs in the new file
-            pairs_writer.writerow(pair)
-
-        if missing_contigs:
-            logger.warning(
-                "Pairs on the following contigs were discarded as "
-                "those contigs are not listed in the paris file header. "
-                "This is normal if you filtered out small contigs: %s"
-                % " ".join(list(missing_contigs))
-            )
-
-
-def get_restriction_table(seq, enzyme, circular=False):
-    """
-    Get the restriction table for a single genomic sequence.
-    Parameters
-    ----------
-    seq : Seq object
-        A biopython Seq object representing a chromosomes or contig.
-    enzyme : int, str or list of str
-        The name of the restriction enzyme used, or a list of restriction
-        enzyme names. Can also be an integer, to digest by fixed chunk size.
-    circular : bool
-        Wether the genome is circular.
-    Returns
-    -------
-    numpy.array:
-        List of restriction fragment boundary positions for the input sequence.
-    >>> from Bio.Seq import Seq
-    >>> get_restriction_table(Seq("AAGCCGGATCGG"),"HpaII")
-    array([ 0,  4, 12])
-    >>> get_restriction_table(Seq("AA"),["HpaII", "MluCI"])
-    array([0, 2])
-    >>> get_restriction_table(Seq("AA"),"aeiou1")
-    Traceback (most recent call last):
-        ...
-    ValueError: aeiou1 is not a valid restriction enzyme.
-    >>> get_restriction_table("AA","HpaII")
-    Traceback (most recent call last):
-        ...
-    TypeError: Expected Seq or MutableSeq instance, got <class 'str'> instead
-    """
-    chrom_len = len(seq)
-    wrong_enzyme = "{} is not a valid restriction enzyme.".format(enzyme)
-    # Restriction batch containing the restriction enzyme
-    try:
-        enz = [enzyme] if isinstance(enzyme, str) else enzyme
-        cutter = RestrictionBatch(enz)
-    except (TypeError, ValueError):
-        try:
-            cutter = max(int(enzyme), DEFAULT_MIN_CHUNK_SIZE)
-        except ValueError:
-            raise ValueError(wrong_enzyme)
-
-    # Conversion from string type to restriction type
-    if isinstance(cutter, int):
-        sites = [i for i in range(0, chrom_len, cutter)]
-        if sites[-1] < chrom_len:
-            sites.append(chrom_len)
-    else:
-        # Find sites of all restriction enzymes given
-        ana = Analysis(cutter, seq, linear=not circular)
-        sites = ana.full()
-        # Gets all sites into a single flat list with 0-based index
-        sites = [site - 1 for enz in sites.values() for site in enz]
-        # Sort by position and allow first add start and end of seq
-        sites.sort()
-        sites.insert(0, 0)
-        sites.append(chrom_len)
-
-    return np.array(sites)
-
-
-def find_frag(pos, r_sites):
-    """
-    Use binary search to find the index of a chromosome restriction fragment
-    corresponding to an input genomic position.
-    Parameters
-    ----------
-    pos : int
-        Genomic position, in base pairs.
-    r_sites : list
-        List of genomic positions corresponding to restriction sites.
-    Returns
-    -------
-    int
-        The 0-based index of the restriction fragment to which the position belongs.
-    >>> find_frag(15, [0, 20, 30])
-    0
-    >>> find_frag(15, [10, 20, 30])
-    Traceback (most recent call last):
-        ...
-    ValueError: The first position in the restriction table is not 0.
-    >>> find_frag(31, [0, 20, 30])
-    Traceback (most recent call last):
-        ...
-    ValueError: Read position is larger than last entry in restriction table.
-    """
-    if r_sites[0] != 0:
-        raise ValueError(
-            "The first position in the restriction table is not 0."
-        )
-    if pos > r_sites[-1]:
-        raise ValueError(
-            "Read position is larger than last entry in restriction table."
-        )
-    # binary search for the index of the read
-    index = max(np.searchsorted(r_sites, pos, side="right") - 1, 0)
-    # Last site = end of the chrom, index of last fragment is last site - 1
-    index = min(len(r_sites) - 2, index)
-
-    return index
-
-
-def frag_len(
-    frags_file_name=DEFAULT_FRAGMENTS_LIST_FILE_NAME,
-    output_dir=None,
-    plot=False,
-    fig_path=None,
-):
-    """
-    logs summary statistics of fragment length distribution based on an
-    input fragment file. Can optionally show a histogram instead
-    of text summary.
-    Parameters
-    ----------
-    frags_file_name : str
-        Path to the output list of fragments.
-    output_dir : str
-        Directory where the list should be saved.
-    plot : bool
-        Wether a histogram of fragment length should be shown.
-    fig_path : str
-        If a path is given, the figure will be saved instead of shown.
-    """
-
-    try:
-        frag_list_path = os.path.join(output_dir, frags_file_name)
-    except TypeError:
-        frag_list_path = frags_file_name
-    frags = pd.read_csv(frag_list_path, sep="\t")
-    nfrags = frags.shape[0]
-    med_len = frags["size"].median()
-    nbins = 40
-    if plot:
-        fig, ax = plt.subplots()
-        _, _, _ = ax.hist(frags["size"], bins=nbins)
-
-        ax.set_xlabel("Fragment length [bp]")
-        ax.set_ylabel("Log10 number of fragments")
-        ax.set_title("Distribution of restriction fragment length")
-        ax.set_yscale("log", base=10)
-        ax.annotate(
-            "Total fragments: {}".format(nfrags),
-            xy=(0.95, 0.95),
-            xycoords="axes fraction",
-            fontsize=12,
-            horizontalalignment="right",
-            verticalalignment="top",
-        )
-        ax.annotate(
-            "Median length: {}".format(med_len),
-            xy=(0.95, 0.90),
-            xycoords="axes fraction",
-            fontsize=12,
-            horizontalalignment="right",
-            verticalalignment="top",
-        )
-        # Tweak spacing to prevent clipping of ylabel
-        fig.tight_layout()
-        if fig_path:
-            plt.savefig(fig_path)
-        else:
-            plt.show()
-        plt.clf()
-    else:
-        logger.info(
-            "Genome digested into {0} fragments with a median "
-            "length of {1}".format(nfrags, med_len)
-        )
-
-
-def gen_enzyme_religation_regex(enzyme):
-    """Return a regex which corresponds to all possible religation sites given a
-    set of enzyme.
-    Parameters:
-    -----------
-    enzyme : str
-        String that contains the names of the enzyme separated by a comma.
-    Returns:
-    --------
-    re.Pattern :
-        Regex that corresponds to all possible ligation sites given a set of
-        enzyme.
-    Examples:
-    ---------
-    >>> gen_enzyme_religation_regex('HpaII')
-    re.compile('CCGCGG')
-    >>> gen_enzyme_religation_regex('HpaII,MluCI')
-    re.compile('AATTAATT|AATTCGG|CCGAATT|CCGCGG')
-    """
-
-    # Split the str on the comma to separate the different enzymes.
-    enzyme = enzyme.split(",")
-
-    # Check on Biopython dictionnary the enzyme.
-    rb = RestrictionBatch(enzyme)
-
-    # Initiation:
-    give_list = []
-    accept_list = []
-    ligation_list = []
-
-    # Iterates on the enzymes.
-    for enz in rb:
-
-        # Extract restriction sites and look for cut sites.
-        site = enz.elucidate()
-        fw_cut = site.find("^")
-        rev_cut = site.find("_")
-
-        # Process "give" site. Remove N on the left (useless).
-        give_site = site[:rev_cut].replace("^", "")
-        while give_site[0] == "N":
-            give_site = give_site[1:]
-        give_list.append(give_site)
-
-        # Process "accept" site. Remove N on the rigth (useless).
-        accept_site = site[fw_cut + 1 :].replace("_", "")
-        while accept_site[-1] == "N":
-            accept_site = accept_site[:-1]
-        accept_list.append(accept_site)
-
-    # Iterates on the two list to build all the possible HiC ligation sites.
-    for give_site in give_list:
-        for accept_site in accept_list:
-            # Replace "N" by "." for regex searching of the sites
-            ligation_list.append((give_site + accept_site).replace("N", "."))
-            ligation_list.append(
-                str(Seq(give_site + accept_site).reverse_complement()).replace(
-                    "N", "."
-                )
-            )
-
-    # Build the regex for any ligation sites.
-    pattern = "|".join(sorted(list(set(ligation_list))))
-    return re.compile(pattern)
diff --git a/bin/hicstuff_distance_law.py b/bin/hicstuff_distance_law.py
index 545c3f7e1e0929d070a850ed239aab90d585f8dc..8b49d6c61921013abcffb8bf6d2384cfc4e0ce8d 100755
--- a/bin/hicstuff_distance_law.py
+++ b/bin/hicstuff_distance_law.py
@@ -10,770 +10,10 @@ from matplotlib import cm
 import pandas as pd
 import os as os
 import csv as csv
-import hicstuff_io as hio
-from hicstuff_log import logger
+import hicstuff.io as hio
+from hicstuff.log import logger
 import argparse
-
-def export_distance_law(xs, ps, names, out_file=None):
-    """ Export the x(s) and p(s) from two list of numpy.ndarrays to a table
-    in txt file with three columns separated by a tabulation. The first column
-    contains the x(s), the second the p(s) and the third the name of the arm or
-    chromosome. The file is created in the directory given by outdir or the
-    current file if no file is given.
-    Parameters
-    ----------
-    xs : list of numpy.ndarray
-        The list of the start position of logbins of each p(s) in base pairs.
-    ps : list of numpy.ndarray
-        The list of p(s).
-    names : list of string
-        List containing the names of the chromosomes/arms/conditions of the p(s)
-        values given.
-    out_file : str or None
-        Path where output file should be written. ./distance_law.txt by default.
-    Return
-    ------
-    txt file:
-        File with three columns separated by a tabulation. The first column
-        contains the x(s), the second the p(s) and the third the name of the arm
-        or chromosome. The file is creates in the output file given or the
-        default one if none given.
-    """
-    # ./distance_law.txt as out_file if no out_file is given.
-    if out_file is None:
-        out_file = os.getcwd() + "/distance_law.txt"
-    # Sanity check: as many chromosomes/arms as ps
-    if len(xs) != len(names):
-        logger.error("Number of chromosomes/arms and number of p(s) list differ.")
-        sys.exit(1)
-    # Create the file and write it
-    f = open(out_file, "w")
-    for i in range(len(xs)):
-        for j in range(len(xs[i])):
-            line = (
-                str(format(xs[i][j], "g"))
-                + "\t"
-                + str(format(ps[i][j], "g"))
-                + "\t"
-                + names[i]
-                + "\n"
-            )
-            f.write(line)
-    f.close()
-
-
-def import_distance_law(distance_law_file):
-    """ Import the table created by export_distance_law and return the list of
-    x(s) and p(s) in the order of the chromosomes.
-    Parameters
-    ----------
-    distance_law_file : string
-        Path to the file containing three columns : the x(s), the p(s), and the
-        chromosome/arm name.
-    Returns
-    -------
-    list of numpy.ndarray :
-        The start coordinate of each bin one array per chromosome or arm.
-    list of numpy.ndarray :
-        The distance law probabilities corresponding of the bins of the
-        previous list.
-    list of numpy.ndarray :
-        The names of the arms/chromosomes corresponding to the previous
-        list.
-    """
-    file = pd.read_csv(
-        distance_law_file,
-        sep="\t",
-        header=None,
-        dtype={"a": np.int32, "b": np.float32, "c": str},
-    )
-    names_idx = np.unique(file.iloc[:, 2], return_index=True)[1]
-    names = [file.iloc[:, 2][index] for index in sorted(names_idx)]
-    xs = [None] * len(names)
-    ps = [None] * len(names)
-    labels = [None] * len(names)
-    for i in range(len(names)):
-        subfile = file[file.iloc[:, 2] == names[i]]
-        xs[i] = np.array(subfile.iloc[:, 0])
-        ps[i] = np.array(subfile.iloc[:, 1])
-        labels[i] = np.array(subfile.iloc[:, 2])
-    return xs, ps, labels
-
-
-def get_chr_segment_bins_index(fragments, centro_file=None, rm_centro=0):
-    """Get the index positions of the start and end bins of different
-    chromosomes, or arms if the centromers position have been given from the
-    fragments file made by hicstuff.
-
-    Parameters
-    ----------
-    fragments : pandas.DataFrame
-        Table containing in the first coulum the ID of the fragment, in the
-        second the names of the chromosome in the third and fourth the start
-        position and the end position of the fragment. The file have no header.
-        (File like the 'fragments_list.txt' from hicstuff)
-    centro_file : None or str
-        None or path to a file with the genomic positions of the centromers
-        sorted as the chromosomes separated by a space. The file have only one
-        line.
-    rm_centro : int
-        If a value is given, will remove the contacts close the centromeres.
-        It will remove as many kb as the argument given. Default is zero.
-
-    Returns
-    -------
-    list of floats :
-        The start and end indices of chromosomes/arms to compute the distance
-        law on each chromosome/arm separately.
-    """
-    # Get bins where chromosomes start
-    chr_start_bins = np.where(fragments == 0)[0]
-    # Create a list of same length for the end of the bins
-    chr_end_bins = np.zeros(len(chr_start_bins))
-    # Get bins where chromsomes end
-    for i in range(len(chr_start_bins) - 1):
-        chr_end_bins[i] = chr_start_bins[i + 1]
-    chr_end_bins[-1] = len(fragments.iloc[:, 0])
-    # Combine start and end of bins in a single array. Values are the id of the
-    # bins
-    chr_segment_bins = np.sort(np.concatenate((chr_start_bins, chr_end_bins)))
-    if centro_file is not None:
-        # Read the file of the centromers
-        with open(centro_file, "r", newline="") as centro:
-            centro = csv.reader(centro, delimiter=" ")
-            centro_pos = next(centro)
-        # Sanity check: as many chroms as centromeres
-        if len(chr_start_bins) != len(centro_pos):
-            logger.warning(
-                "Number of chromosomes and centromeres differ, centromeres position are not taking into account."
-            )
-            centro_file = None
-    if centro_file is not None:
-        # Get bins of centromeres
-        centro_bins = np.zeros(2 * len(centro_pos))
-        for i in range(len(chr_start_bins)):
-            if (i + 1) < len(chr_start_bins):
-                subfrags = fragments[chr_start_bins[i] : chr_start_bins[i + 1]]
-            else:
-                subfrags = fragments[chr_start_bins[i] :]
-            # index of last fragment starting before centro in same chrom
-            centro_bins[2 * i] = chr_start_bins[i] + max(
-                np.where(
-                    subfrags["start_pos"][:] // (int(centro_pos[i]) - rm_centro) == 0
-                )[0]
-            )
-            centro_bins[2 * i + 1] = chr_start_bins[i] + max(
-                np.where(
-                    subfrags["start_pos"][:] // (int(centro_pos[i]) + rm_centro) == 0
-                )[0]
-            )
-        # Combine centro and chrom bins into a single array. Values are the id
-        # of the bins started and ending the arms.
-        chr_segment_bins = np.sort(
-            np.concatenate((chr_start_bins, chr_end_bins, centro_bins))
-        )
-    return list(chr_segment_bins)
-
-
-def get_chr_segment_length(fragments, chr_segment_bins):
-    """Compute a list of the length of the different objects (arm or
-    chromosome) given by chr_segment_bins.
-
-    Parameters
-    ----------
-    fragments : pandas.DataFrame
-        Table containing in the first coulum the ID of the fragment, in the
-        second the names of the chromosome in the third and fourth the start
-        position and the end position of the fragment. The file have no header.
-        (File like the 'fragments_list.txt' from hicstuff)
-    chr_segment_bins : list of floats
-        The start and end indices of chromosomes/arms to compute the distance
-        law on each chromosome/arm separately.
-
-    Returns
-    -------
-    list of numpy.ndarray:
-        The length in base pairs of each chromosome or arm.
-    """
-    chr_segment_length = [None] * int(len(chr_segment_bins) / 2)
-    # Iterate in chr_segment_bins in order to obtain the size of each chromosome/arm
-    for i in range(len(chr_segment_length)):
-        # Obtain the size of the chromosome/arm, the if loop is to avoid the
-        # case of arms where the end position of the last fragments doesn't
-        # mean the size of arm. If it's the right arm we have to start to count the
-        # size from the beginning of the arm.
-        if fragments["start_pos"].iloc[int(chr_segment_bins[2 * i])] == 0:
-            n = fragments["end_pos"].iloc[int(chr_segment_bins[2 * i + 1]) - 1]
-        else:
-            n = (
-                fragments["end_pos"].iloc[int(chr_segment_bins[2 * i + 1]) - 1]
-                - fragments["start_pos"].iloc[int(chr_segment_bins[2 * i])]
-            )
-        chr_segment_length[i] = n
-    return chr_segment_length
-
-
-def logbins_xs(fragments, chr_segment_length, base=1.1, circular=False):
-    """Compute the logbins of each chromosome/arm in order to have theme to
-    compute distance law. At the end you will have bins of increasing with a
-    logspace with the base of the value given in base.
-
-    Parameters
-    ----------
-    fragments : pandas.DataFrame
-        Table containing in the first coulum the ID of the fragment, in the
-        second the names of the chromosome in the third and fourth the start
-        position and the end position of the fragment. The file have no header.
-        (File like the 'fragments_list.txt' from hicstuff)
-    chr_segment_length: list of floats
-        List of the size in base pairs of the different arms or chromosomes.
-    base : float
-        Base use to construct the logspace of the bins, 1.1 by default.
-    circular : bool
-        If True, calculate the distance as the chromosome is circular. Default
-        value is False.
-
-    Returns
-    -------
-    list of numpy.ndarray :
-        The start coordinate of each bin one array per chromosome or arm.
-    """
-    # Create the xs array and a list of the length of the chromosomes/arms
-    xs = [None] * len(chr_segment_length)
-    # Iterate in chr_segment_bins in order to make the logspace
-    for i in range(len(chr_segment_length)):
-        n = chr_segment_length[i]
-        # if the chromosome is circular the mawimum distance between two reads
-        # are divided by two
-        if circular:
-            n /= 2
-        n_bins = int(np.log(n) / np.log(base))
-        # For each chromosome/arm compute a logspace to have the logbin
-        # equivalent to the size of the arms and increasing size of bins
-        xs[i] = np.unique(
-            np.logspace(0, n_bins, num=n_bins + 1, base=base, dtype=int)
-        )
-    return xs
-
-
-def circular_distance_law(distance, chr_segment_length, chr_bin):
-    """Recalculate the distance to return the distance in a circular chromosome
-    and not the distance between the two genomic positions.
-    Parameters
-    ----------
-    chr_segment_bins : list of floats
-        The start and end indices of chromosomes/arms to compute the distance
-        law on each chromosome/arm separately.
-    chr_segment_length: list of floats
-        List of the size in base pairs of the different arms or chromosomes.
-    distance : int
-        Distance between two fragments with a contact.
-    Returns
-    -------
-    int :
-        The real distance in the chromosome circular and not the distance
-        between two genomic positions
-    Examples
-    --------
-    >>> circular_distance_law(7500, [2800, 9000], 1)
-    1500
-    >>> circular_distance_law(1300, [2800, 9000], 0)
-    1300
-    >>> circular_distance_law(1400, [2800, 9000], 0)
-    1400
-    """
-    chr_len = chr_segment_length[chr_bin]
-    if distance > chr_len / 2:
-        distance = chr_len - distance
-    return distance
-
-
-def get_pairs_distance(
-    line, fragments, chr_segment_bins, chr_segment_length, xs, ps, circular=False
-):
-    """From a line of a pair reads file, filter -/+ or +/- reads, keep only the
-    reads in the same chromosome/arm and compute the distance of the the two
-    fragments. It modify the input ps in order to count or not the line given.
-    It will add one in the logbin corresponding to the distance.
-    Parameters
-    ----------
-    line : OrderedDict
-        Line of a pair reads file with the these keys readID, chr1, pos1, chr2,
-        pos2, strand1, strand2, frag1, frag2. The values are in a dictionnary.
-    fragments : pandas.DataFrame
-        Table containing in the first coulum the ID of the fragment, in the
-        second the names of the chromosome in the third and fourth the start
-        position and the end position of the fragment. The file have no header.
-        (File like the 'fragments_list.txt' from hicstuff)
-    chr_segment_bins : list of floats
-        The start and end indices of chromosomes/arms to compute the distance
-        law on each chromosome/arm separately.
-    chr_segment_length: list of floats
-        List of the size in base pairs of the different arms or chromosomes.
-    xs : list of lists
-        The start coordinate of each bin one array per chromosome or arm.
-    ps : list of lists
-        The sum of contact already count. xs and ps should have the same
-        dimensions.
-    circular : bool
-        If True, calculate the distance as the chromosome is circular. Default
-        value is False.
-    """
-    # Check this is a pairs_idx file and not simple pairs
-    if line['frag1'] is None:
-        logger.error(
-            'Input pairs file must have frag1 and frag2 columns. In hicstuff '
-            'pipeline, this is the "valid_idx.pairs" file.'
-        )
-    # We only keep the event +/+ or -/-. This is done to avoid to have any
-    # event of uncut which are not possible in these events. We can remove the
-    # good events of +/- or -/+ because we don't need a lot of reads to compute
-    # the distance law and if we eliminate these reads we do not create others
-    # biases as they should have the same distribution.
-    if line["strand1"] == line["strand2"]:
-        # Find in which chromosome/arm are the fragment 1 and 2.
-        chr_bin1 = (
-            np.searchsorted(chr_segment_bins, int(line["frag1"]), side="right") - 1
-        )
-        chr_bin2 = (
-            np.searchsorted(chr_segment_bins, int(line["frag2"]), side="right") - 1
-        )
-        # We only keep the reads with the two fragments in the same chromosome
-        # or arm.
-        if chr_bin1 == chr_bin2:
-            # Remove the contacts in the centromeres if centro_remove
-            if chr_bin1 % 2 == 0:
-                chr_bin1 = int(chr_bin1 / 2)
-                # For the reads -/-, the fragments should be religated with both
-                # their start position (position in the left on the genomic
-                # sequence, 5'). For the reads +/+ it's the contrary. We compute
-                # the distance as the distance between the two extremities which
-                # are religated.
-                if line["strand1"] == "-":
-                    distance = abs(
-                        np.array(fragments["start_pos"][int(line["frag1"])])
-                        - np.array(fragments["start_pos"][int(line["frag2"])])
-                    )
-                if line["strand1"] == "+":
-                    distance = abs(
-                        np.array(fragments["end_pos"][int(line["frag1"])])
-                        - np.array(fragments["end_pos"][int(line["frag2"])])
-                    )
-                if circular:
-                    distance = circular_distance_law(
-                        distance, chr_segment_length, chr_bin1
-                    )
-                xs_temp = xs[chr_bin1][:]
-                # Find the logbins in which the distance is and add one to the sum
-                # of contact.
-                ps_indice = np.searchsorted(xs_temp, distance, side="right") - 1
-                ps[chr_bin1][ps_indice] += 1
-
-
-def get_names(fragments, chr_segment_bins):
-    """Make a list of the names of the arms or the chromosomes.
-    Parameters
-    ----------
-    fragments : pandas.DataFrame
-        Table containing in the first coulum the ID of the fragment, in the
-        second the names of the chromosome in the third and fourth the start
-        position and the end position of the fragment. The file have no header.
-        (File like the 'fragments_list.txt' from hicstuff)
-    chr_segment_bins : list of floats
-        The start position of chromosomes/arms to compute the distance law on
-        each chromosome/arm separately.
-    Returns
-    -------
-    list of floats :
-        List of the labels given to the curves. It will be the name of the arms
-        or chromosomes.
-    """
-    # Get the name of the chromosomes.
-    chr_names_idx = np.unique(fragments.iloc[:, 1], return_index=True)[1]
-    chr_names = [fragments.iloc[:, 1][index] for index in sorted(chr_names_idx)]
-    # Case where they are separate in chromosomes
-    if len(chr_segment_bins) / 2 != len(chr_names):
-        names = []
-        for chr in chr_names:
-            names.append(str(chr) + "_left")
-            names.append(str(chr) + "_rigth")
-        chr_names = names
-    return chr_names
-
-
-def get_distance_law(
-    pairs_reads_file,
-    fragments_file,
-    centro_file=None,
-    base=1.1,
-    out_file=None,
-    circular=False,
-    rm_centro=0,
-):
-    """Compute distance law as a function of the genomic coordinate aka P(s).
-    Bin length increases exponentially with distance. Works on pairs file
-    format from 4D Nucleome Omics Data Standards Working Group. If the genome
-    is composed of several chromosomes and you want to compute the arms
-    separately, provide a file with the positions of centromers. Create a file
-    with three coulumns separated by a tabulation. The first column contains
-    the xs, the second the ps and the third the name of the arm or chromosome.
-    The file is create in the directory given in outdir or in the current
-    directory if no directory given.
-    Parameters
-    ----------
-    pairs_reads_file : string
-        Path of a pairs file format from 4D Nucleome Omics Data Standards
-        Working Group with the 8th and 9th coulumns are the ID of the fragments
-        of the reads 1 and 2.
-    fragments_file : path
-        Path of a table containing in the first column the ID of the fragment,
-        in the second the names of the chromosome in the third and fourth
-        the start position and the end position of the fragment. The file have
-        no header. (File like the 'fragments_list.txt' from hicstuff)
-    centro_file : None or str
-        None or path to a file with the genomic positions of the centromers
-        sorted as the chromosomes separated by a space. The file have only one
-        line.
-    base : float
-        Base use to construct the logspace of the bins - 1.1 by default.
-    out_file : None or str
-        Path of the output file. If no path given, the output is returned.
-    circular : bool
-        If True, calculate the distance as the chromosome is circular. Default
-        value is False. Cannot be True if centro_file is not None
-    rm_centro : int
-        If a value is given, will remove the contacts close the centromeres.
-        It will remove as many kb as the argument given. Default is None.
-    Returns
-    -------
-    xs : list of numpy.ndarray
-        Basepair coordinates of log bins used to compute distance law.
-    ps : list of numpy.ndarray
-        Contacts value, in arbitrary units, at increasingly long genomic ranges
-        given by xs.
-    names : list of strings
-        Names of chromosomes that are plotted
-    """
-    # Sanity check : centro_fileition should be None if chromosomes are
-    # circulars (no centromeres is circular chromosomes).
-    if circular and centro_file != None:
-        logger.error("Chromosomes cannot have a centromere and be circular")
-        sys.exit(1)
-    # Import third columns of fragments file
-    fragments = pd.read_csv(fragments_file, sep="\t", header=0, usecols=[0, 1, 2, 3])
-    # Calculate the indice of the bins to separate into chromosomes/arms
-    chr_segment_bins = get_chr_segment_bins_index(fragments, centro_file, rm_centro)
-    # Calculate the length of each chromosoms/arms
-    chr_segment_length = get_chr_segment_length(fragments, chr_segment_bins)
-    xs = logbins_xs(fragments, chr_segment_length, base, circular)
-    # Create the list of p(s) with one array for each chromosome/arm and each
-    # array contain as many values as in the logbin
-    ps = [None] * len(chr_segment_length)
-    for i in range(len(xs)):
-        ps[i] = [0] * len(xs[i])
-    # Read the pair reads file
-    with open(pairs_reads_file, "r", newline="") as reads:
-        # Remove the line of the header
-        header_length = len(hio.get_pairs_header(pairs_reads_file))
-        for i in range(header_length):
-            next(reads)
-        # Reads all the others lines and put the values in a dictionnary with
-        # the keys : 'readID', 'chr1', 'pos1', 'chr2', 'pos2', 'strand1',
-        # 'strand2', 'frag1', 'frag2'
-        reader = csv.DictReader(
-            reads,
-            fieldnames=[
-                "readID",
-                "chr1",
-                "pos1",
-                "chr2",
-                "pos2",
-                "strand1",
-                "strand2",
-                "frag1",
-                "frag2",
-            ],
-            delimiter="\t",
-        )
-        for line in reader:
-            # Iterate in each line of the file after the header
-            get_pairs_distance(
-                line, fragments, chr_segment_bins, chr_segment_length, xs, ps, circular
-            )
-    # Divide the number of contacts by the area of the logbin
-    for i in range(len(xs)):
-        n = chr_segment_length[i]
-        for j in range(len(xs[i]) - 1):
-            # Use the area of a trapezium to know the area of the logbin with n
-            # the size of the matrix.
-            ps[i][j] /= ((2 * n - xs[i][j + 1] - xs[i][j]) / 2) * (
-                (1 / np.sqrt(2)) * (xs[i][j + 1] - xs[i][j])
-            )
-            # print(
-            #    ((2 * n - xs[i][j + 1] - xs[i][j]) / 2)
-            #    * ((1 / np.sqrt(2)) * (xs[i][j + 1] - xs[i][j]))
-            # )
-        # Case of the last logbin which is an isosceles rectangle triangle
-        # print(ps[i][-5:-1], ((n - xs[i][-1]) ** 2) / 2)
-        ps[i][-1] /= ((n - xs[i][-1]) ** 2) / 2
-    names = get_names(fragments, chr_segment_bins)
-    if out_file:
-        export_distance_law(xs, ps, names, out_file)
-    return xs, ps, names
-
-
-def normalize_distance_law(xs, ps, inf=3000, sup=None):
-    """Normalize the distance in order to have the sum of the ps values between
-    'inf' (default value is 3kb) until the end of the array equal to one and
-    limit the effect of coverage between two conditions/chromosomes/arms when
-    you compare them together. If we have a list of ps, it will normalize until
-    the length of the shorter object or the value of sup, whichever is smaller.
-    Parameters
-    ----------
-    xs : list of numpy.ndarray
-        list of logbins corresponding to the ps.
-    ps : list of numpy.ndarray
-        Average ps or list of ps of the chromosomes/arms. xs and ps have to
-        have the same shape.
-    inf : integer
-        Inferior value of the interval on which, the normalization is applied.
-    sup : integer
-        Superior value of the interval on which, the normalization is applied.
-    Returns
-    -------
-    list of numpy.ndarray :
-        List of ps each normalized separately.
-    """
-    # Sanity check: xs and ps have the same dimension
-    if np.shape(xs) != np.shape(ps):
-        logger.error("xs and ps should have the same dimension.")
-        sys.exit(1)
-    # Define the length of shortest chromosomes as a lower bound for the sup boundary
-    min_xs = len(min(xs, key=len))
-    normed_ps = [None] * len(ps)
-    if sup is None:
-        sup = np.inf
-    for chrom_id, chrom_ps in enumerate(ps):
-        # Iterate on the different ps to normalize each of theme separately
-        chrom_sum = 0
-        # Change the last value to have something continuous because the last
-        # one is much bigger (computed on matrix corner = triangle instead of trapezoid).
-        chrom_ps[-1] = chrom_ps[-2]
-        for bin_id, bin_value in enumerate(chrom_ps):
-            # Compute normalization factor based on values between inf and sup
-            # Sup will be whatever is smaller between user-provided sup and length of
-            # the shortest chromosome
-            if (xs[chrom_id][bin_id] > inf) and (xs[chrom_id][bin_id] < sup) and (bin_id < min_xs):
-                chrom_sum += bin_value
-        if chrom_sum == 0:
-            chrom_sum += 1
-            logger.warning("No values of p(s) in one segment")
-        # Make the normalisation
-        normed_ps[chrom_id] = np.array(ps[chrom_id]) / chrom_sum
-    return normed_ps
-
-
-def average_distance_law(xs, ps, sup, big_arm_only=False):
-    """Compute the average distance law between the file the different distance
-    law of the chromosomes/arms.
-    Parameters
-    ----------
-    xs : list of numpy.ndarray
-        The list of logbins.
-    ps : list of lists of floats
-        The list of numpy.ndarray.
-    sup : int
-        Value given to set the minimum size of the chromosomes/arms to make the
-        average.
-    big_arm_only : bool
-        By default False. If True, will only take into account the arms/chromosomes
-        longer than the value of sup. Sup mandatory if set.
-    Returns
-    -------
-    numpy.ndarray :
-        List of the xs with the max length.
-    numpy.ndarray :
-        List of the average_ps.
-    """
-    # Find longest chromosome / arm and make two arrays of this length for the
-    # average distance law and remove the last value.
-    xs = max(xs, key=len)
-    max_length = len(xs)
-    ps_values = np.zeros(max_length)
-    ps_occur = np.zeros(max_length)
-    for chrom_ps in ps:
-        # Iterate on ps in order to calculate the number of occurences (all the
-        # chromossomes/arms are not as long as the longest one) and the sum of
-        # the values of distance law.
-        # Change the last value to have something continuous because the last
-        # one is much bigger.
-        chrom_ps[-1] = chrom_ps[-2]
-        # Sanity check : sup strictly inferior to maw length arms.
-        if big_arm_only:
-            if sup >= xs[-1]:
-                logger.error(
-                    "sup have to be inferior to the max length of arms/chromsomes if big arm only set"
-                )
-                sys.exit(1)
-            if sup <= xs[len(chrom_ps) - 1]:
-                ps_occur[: len(chrom_ps)] += 1
-                ps_values[: len(chrom_ps)] += chrom_ps
-        else:
-            ps_occur[: len(chrom_ps)] += 1
-            ps_values[: len(chrom_ps)] += chrom_ps
-    # Make the mean
-    averaged_ps = ps_values / ps_occur
-    return xs, averaged_ps
-
-
-def slope_distance_law(xs, ps):
-    """Compute the slope of the loglog curve of the ps as the
-    [log(ps(n+1)) - log(ps(n))] / [log(n+1) - log(n)].
-    Compute only list of ps, not list of array.
-    Parameters
-    ----------
-    xs : list of numpy.ndarray
-        The list of logbins.
-    ps : list of numpy.ndarray
-        The list of ps.
-    Returns
-    -------
-    list of numpy.ndarray :
-        The slope of the distance law. It will be shorter of one value than the
-        ps given initially.
-    """
-    slope = [None] * len(ps)
-    for i in range(len(ps)):
-        ps[i][ps[i] == 0] = 10 ** (-9)
-        # Compute the slope
-        slope_temp = np.log(np.array(ps[i][1:]) / np.array(ps[i][:-1])) / np.log(
-            np.array(xs[i][1:]) / np.array(xs[i][:-1])
-        )
-        # The 1.8 is the intensity of the normalisation, it could be adapted.
-        slope_temp[slope_temp == np.nan] = 10 ** (-15)
-        slope[i] = ndimage.filters.gaussian_filter1d(slope_temp, 1.8)
-    return slope
-
-
-def get_ylim(xs, curve, inf, sup):
-    """Compute the max and the min of the list of list between the inferior
-    (inf) and superior (sup) bounds.
-    Parameters
-    ----------
-    xs : list of numpy.ndarray
-        The list of the logbins starting position in basepair.
-    curve : list of numpy.ndarray
-        A list of numpy.ndarray from which you want to extract minimum and
-        maximum values in a given interval.
-    inf : int
-        Inferior limit of the interval in basepair.
-    sup : int
-        Superior limit of the interval in basepair.
-    Returns
-    -------
-    min_tot : float
-        Minimum value of the list of list in this interval.
-    max_tot : float
-        Maximum value of the list of list in this interval.
-    Examples
-    --------
-    >>> get_ylim([np.array([1, 4, 15]), np.array([1, 4, 15, 40])],
-    ... [np.array([5.5, 3.2, 17.10]), np.array([24, 32, 1.111, 18.5])],
-    ... 2,
-    ... 15
-    ... )
-    (1.111, 32.0)
-    """
-    # Create a list in order to add all the interesting values
-    flatten_list = []
-    for i, logbins in enumerate(xs):
-        # Iterate on xs.
-        # Search for the minimum index corresponding to the smallest bin
-        # superior or equal to inf (in pair base).
-        min_value = min(logbins[logbins >= inf], default=0)
-        min_index = np.where(logbins == min_value)[0]
-        # Skip chromosome if total size smaller than inf
-        if len(min_index) == 0:
-            continue
-        # Search for the maximum index corresponding to the biggest bin
-        # inferior or equal to sup (in pair base).
-        max_value = max(logbins[logbins <= sup])
-        max_index = np.where(logbins == max_value)[0]
-        # Add the values in the interval in the flattened list.
-        if int(max_index) != len(logbins) - 1:
-            max_index += 1
-        for j in range(int(min_index), int(max_index)):
-            flatten_list.append(curve[i][j])
-    # Caluclate the min and the max of this list.
-    min_tot = min(flatten_list)
-    max_tot = max(flatten_list)
-    return min_tot, max_tot
-
-
-def plot_ps_slope(xs, ps, labels, fig_path=None, inf=3000, sup=None):
-    """Compute two plots, one with the different distance law of each
-    arm/chromosome in loglog scale and one with the slope (derivative) of these
-    curves. Generate a svg file with savefig.
-    Parameters
-    ----------
-    xs : list of numpy.ndarray
-        The list of the logbins of each ps.
-    ps : list of numpy.ndarray
-        The list of ps.
-    labels_file : list of strings
-        Names of the different curves in the order in which they are given.
-    fig_path : str
-        Path where the figure will be created. If None (default), the plot is
-        shown in an interactive window.
-    inf : int
-        Value of the mimimum x of the window of the plot. Have to be strictly
-        positive. By default 3000.
-    sup : int
-        Value of the maximum x of the window of the plot. By default None.
-
-    """
-    # Give the max value for sup if no value have been attributed
-    if sup is None:
-        sup = max(max(xs, key=len))
-
-    # Compute slopes from the curves
-    slope = slope_distance_law(xs, ps)
-    # Make the plot of distance law
-    # Give a range of color
-    cols = iter(cm.rainbow(np.linspace(0, 1, len(ps))))
-    fig, (ax1, ax2) = plt.subplots(1, 2, sharex=True, figsize=(18, 10))
-    plt.subplots_adjust(left=0.05, right=0.85, top=0.93, bottom=0.07)
-    ax1.set_xlabel("Distance (pb)", fontsize="x-large")
-    ax1.set_ylabel("P(s)", fontsize="x-large")
-    ax1.set_title("Distance law", fontsize="xx-large")
-    ylim = get_ylim(xs, ps, inf, sup)
-    ax1.set_ylim(0.9 * ylim[0], 1.1 * ylim[1])
-    for i in range(len(ps)):
-        # Iterate on the different distance law array and take them by order of
-        # size in order to have the color scale equivalent to the size scale
-        col = next(cols)
-        ax1.loglog(xs[i], ps[i], label=labels[i])
-    # Make the same plot with the slope
-    cols = iter(cm.rainbow(np.linspace(0, 1, len(slope))))
-    ax2.set_xlabel("Distance (pb)", fontsize="x-large")
-    ax2.set_ylabel("Slope", fontsize="x-large")
-    ax2.set_title("Slope of the distance law", fontsize="xx-large")
-    ax2.set_xlim([inf, sup])
-    ylim = get_ylim(xs, slope, inf, sup)
-    ax2.set_ylim(1.1 * ylim[0], 0.9 * ylim[1])
-    xs2 = [None] * len(xs)
-    for i in range(len(slope)):
-        xs2[i] = xs[i][:-1]
-        col = next(cols)
-        ax2.semilogx(xs2[i], slope[i], label=labels[i], subs=[2, 3, 4, 5, 6, 7, 8, 9])
-    ax2.legend(loc="upper left", bbox_to_anchor=(1.02, 1.00), ncol=1, fontsize="large")
-    # Save the figure in svg
-    if fig_path is not None:
-        plt.savefig(fig_path)
-    else:
-        plt.show()
-
-
+import hicstuff.distance_law as hdl
 
 if __name__ == "__main__":
     parser = argparse.ArgumentParser()
@@ -819,7 +59,7 @@ if __name__ == "__main__":
     else:
         distance_law_plot = prefix+"_"+distance_law_plot
 
-    x_s, p_s, _ = get_distance_law(
+    x_s, p_s, _ = hdl.get_distance_law(
         input,
         fragment,
         centro_file,
@@ -831,9 +71,9 @@ if __name__ == "__main__":
 
     if plot:
             # Retrieve chrom labels from distance law file
-            _, _, chr_labels = import_distance_law(out_file)
+            _, _, chr_labels = hdl.import_distance_law(out_file)
             chr_labels = [lab[0] for lab in chr_labels]
             chr_labels_idx = np.unique(chr_labels, return_index=True)[1]
             chr_labels = [chr_labels[index] for index in sorted(chr_labels_idx)]
-            p_s = normalize_distance_law(x_s, p_s)
+            p_s = hdl.normalize_distance_law(x_s, p_s)
             plot_ps_slope(x_s, p_s, labels=chr_labels, fig_path=distance_law_plot)
diff --git a/bin/hicstuff_filter.py b/bin/hicstuff_filter.py
index 62642ac6a605697dfeafea77f99dba0f99a0560b..3b0bedde4d48d5aa3a1aabb06b66416f98eaf3fb 100755
--- a/bin/hicstuff_filter.py
+++ b/bin/hicstuff_filter.py
@@ -20,511 +20,11 @@ import sys
 import numpy as np
 from collections import OrderedDict
 import matplotlib.pyplot as plt
-from hicstuff_log import logger
 import argparse
 import logging
+import hicstuff.filter as hf
 import hicstuff_log as hcl
 
-
-def process_read_pair(line):
-    r"""Process and order read pairs in a .pairs record.
-    Takes a read pair (line) from a .pairs file as input, reorders the pair
-    so that read 1 in intrachromosomal pairs always has the smallest genomic
-    coordinate.
-    Parameters
-    ----------
-    line : str
-        Record from a pairs file with columns: readID, chr1, pos1, chr2,
-        pos2, strand1, strand2, frag1, frag2
-    Returns
-    -------
-    dict
-        Dictionary with reordered pair where each column from the line as an
-        entry. The number of restriction sites separating reads in the pair and
-        the type of event are also stored in the dictionary, under keys
-        "nsites" and "type".
-    Examples
-    --------
-        >>> d = process_read_pair("readX\ta\t1\tb\t20\t-\t-\t1\t3")
-        >>> for u in sorted(d.items()):
-        ...     print(u)
-        ('chr1', 'a')
-        ('chr2', 'b')
-        ('frag1', 1)
-        ('frag2', 3)
-        ('nsites', 2)
-        ('pos1', 1)
-        ('pos2', 20)
-        ('readID', 'readX')
-        ('strand1', '-')
-        ('strand2', '-')
-        ('type', 'inter')
-        >>> d = process_read_pair('readY\ta\t20\ta\t10\t-\t+\t2\t1')
-        >>> [d[x] for x in sorted(d.keys())]
-        ['a', 'a', 1, 2, 1, 10, 20, 'readY', '+', '-', '+-']
-    """
-    # Split line by whitespace
-    p = line.split("\t")
-    if len(p) != 9:
-        raise ValueError(
-            "Your input file does not have 9 columns. Make sure "
-            "the file has the readID and twice the following 4 fields "
-            "(once for each read in the pair): chr pos strand frag."
-        )
-    # Saving each column as a dictionary entry with meaningful key
-    cols = [
-        "readID",
-        "chr1",
-        "pos1",
-        "chr2",
-        "pos2",
-        "strand1",
-        "strand2",
-        "frag1",
-        "frag2",
-    ]
-    p = {cols[i]: p[i] for i in range(len(cols))}
-    # Transforming numeric columns to int
-    for col in ["pos1", "pos2", "frag1", "frag2"]:
-        p[col] = int(p[col])
-
-    # invert records for intrachromosomal pairs where rec2 comes before
-    # rec1 in genomic coordinates
-    if p["chr1"] == p["chr2"] and p["pos2"] < p["pos1"]:
-        p["strand1"], p["strand2"] = p["strand2"], p["strand1"]
-        p["pos1"], p["pos2"] = p["pos2"], p["pos1"]
-        p["frag1"], p["frag2"] = p["frag2"], p["frag1"]
-
-    # Number of restriction sites separating reads in the pair
-    p["nsites"] = abs(p["frag2"] - p["frag1"])
-    # Get event type
-    if p["chr1"] == p["chr2"]:
-        p["type"] = "".join([p["strand1"], p["strand2"]])
-    else:
-        p["type"] = "inter"
-
-    return p
-
-
-def get_thresholds(
-    in_dat, interactive=False, plot_events=False, fig_path=None, prefix=None
-):
-    """Guess distance threshold for event filtering
-    Analyse the events in the first million of Hi-C pairs in the library, plot
-    the occurrences of each event type according to number of restriction
-    fragments, and ask user interactively for the minimum threshold for uncuts
-    and loops.
-    Parameters
-    ----------
-    in_dat: str
-        Path to the .pairs file containing Hi-C pairs.
-    interactive: bool
-        If True, plots are diplayed and thresholds are required interactively.
-    plot_events : bool
-        Whether to show the plot
-    fig_path : str
-        Path where the figure will be saved. If None, the figure will be
-        diplayed interactively.
-    prefix : str
-        If the library has a name, it will be shown on plots.
-
-    Returns
-    -------
-    dictionary
-        dictionary with keys "uncuts" and "loops" where the values are the
-        corresponding thresholds entered by the user.
-    """
-    thr_uncut = None
-    thr_loop = None
-    max_sites = 50
-    # Map of event -> legend name of event for intrachromosomal pairs.
-    legend = {
-        "++": "++ (weird)",
-        "--": "-- (weird)",
-        "+-": "+- (uncuts)",
-        "-+": "-+ (loops)",
-    }
-    colors = {"++": "#222222", "+-": "r", "--": "#666666", "-+": "tab:orange"}
-    n_events = {event: np.zeros(max_sites) for event in legend}
-    i = 0
-    # open the file for reading (just the first 1 000 000 lines)
-    with open(in_dat, "r") as pairs:
-        for line in pairs:
-            # Skip header lines
-            if line.startswith("#"):
-                continue
-            i += 1
-            # Only use the first million pairs to estimate thresholds
-            if i == 1000000:
-                break
-            # Process Hi-C pair into a dictionary
-            p = process_read_pair(line)
-            # Type of event and number of restriction site between reads
-            etype = p["type"]
-            nsites = p["nsites"]
-            # Count number of events for intrachrom pairs
-            if etype != "inter" and nsites < max_sites:
-                n_events[etype][nsites] += 1
-
-    def plot_event(n_events, legend, name):
-        """Plot the frequency of a given event types over distance."""
-        plt.xlim([-0.5, 15])
-        plt.plot(
-            range(n_events[name].shape[0]),
-            n_events[name],
-            "o-",
-            label=legend[name],
-            linewidth=2.0,
-            c=colors[name],
-        )
-
-    if interactive:
-        # PLot:
-        try:
-            plt.figure(0)
-            for event in legend:
-                plot_event(n_events, legend, event)
-            plt.grid()
-            plt.xlabel("Number of restriction fragment(s)")
-            plt.ylabel("Number of events")
-            plt.yscale("log")
-            plt.legend()
-            plt.show(block=False)
-
-        except Exception:
-            logger.error(
-                "Unable to show plots, skipping figure generation. Perhaps "
-                "there is no Xserver running ? (might be due to windows "
-                "environment). Try running without the interactive option."
-            )
-
-        # Asks the user for appropriate thresholds
-        print(
-            "Please enter the number of restriction fragments separating "
-            "reads in a Hi-C pair below or at which loops and "
-            "uncuts events will be excluded\n",
-            file=sys.stderr,
-        )
-        thr_uncut = int(input("Enter threshold for the uncuts events (+-):"))
-        thr_loop = int(input("Enter threshold for the loops events (-+):"))
-        try:
-            plt.clf()
-        except Exception:
-            pass
-    else:
-        # Estimate thresholds from data
-        for event in n_events:
-            fixed = n_events[event]
-            fixed[fixed == 0] = 1
-            n_events[event] = fixed
-
-        all_events = np.log(np.array(list(n_events.values())))
-        # Compute median occurences at each restriction sites
-        event_med = np.median(all_events, axis=0)
-        # Compute MAD, to have a robust estimator of the expected deviation
-        # from median at long distances
-        mad = np.median(abs(all_events - event_med))
-        exp_stdev = mad / 0.67449
-        # Iterate over sites, from furthest to frag+2
-        for site in range(max_sites)[:1:-1]:
-            # For uncuts and loops, keep the last (closest) site where the
-            # deviation from other events <= expected_stdev
-            if (
-                abs(np.log(n_events["+-"][site]) - event_med[site])
-                <= exp_stdev
-            ):
-                thr_uncut = site
-            if (
-                abs(np.log(n_events["-+"][site]) - event_med[site])
-                <= exp_stdev
-            ):
-                thr_loop = site
-        if thr_uncut is None or thr_loop is None:
-            raise ValueError(
-                "The threshold for loops or uncut could not be estimated. "
-                "Please try running with -i to investigate the problem."
-            )
-        logger.info(
-            "Filtering with thresholds: uncuts={0} loops={1}".format(
-                thr_uncut, thr_loop
-            )
-        )
-        if plot_events:
-            try:
-                plt.figure(1)
-                plt.xlim([-0.5, 15])
-                # Draw colored lines for events to discard
-                plt.plot(
-                    range(0, thr_uncut + 1),
-                    n_events["+-"][: thr_uncut + 1],
-                    "o-",
-                    c=colors["+-"],
-                    label=legend["+-"],
-                )
-                plt.plot(
-                    range(0, thr_loop + 1),
-                    n_events["-+"][: thr_loop + 1],
-                    "o-",
-                    c=colors["-+"],
-                    label=legend["-+"],
-                )
-                plt.plot(
-                    range(0, 2),
-                    n_events["--"][:2],
-                    "o-",
-                    c=colors["--"],
-                    label=legend["--"],
-                )
-                plt.plot(
-                    range(0, 2),
-                    n_events["++"][:2],
-                    "o-",
-                    c=colors["++"],
-                    label=legend["++"],
-                )
-                # Draw black lines for events to keep
-                plt.plot(
-                    range(thr_uncut, n_events["+-"].shape[0]),
-                    n_events["+-"][thr_uncut:],
-                    "o-",
-                    range(thr_loop, n_events["-+"].shape[0]),
-                    n_events["-+"][thr_loop:],
-                    "o-",
-                    range(1, n_events["--"].shape[0]),
-                    n_events["--"][1:],
-                    "o-",
-                    range(1, n_events["++"].shape[0]),
-                    n_events["++"][1:],
-                    "o-",
-                    label="kept",
-                    linewidth=2.0,
-                    c="g",
-                )
-                plt.grid()
-                plt.xlabel("Number of restriction site(s)")
-                plt.ylabel("Number of events")
-                plt.yscale("log")
-                # Remove duplicate "kept" entries in legend
-                handles, labels = plt.gca().get_legend_handles_labels()
-                by_label = OrderedDict(zip(labels, handles))
-                plt.legend(by_label.values(), by_label.keys())
-                # Show uncut and loop threshold as vertical lines
-                plt.axvline(x=thr_loop, color=colors["-+"])
-                plt.axvline(x=thr_uncut, color=colors["+-"])
-
-                if prefix:
-                    plt.title(
-                        "Library events by distance in {}".format(prefix)
-                    )
-                plt.tight_layout()
-                if fig_path:
-                    plt.savefig(fig_path)
-                else:
-                    plt.show(block=False)
-                # plt.clf()
-
-            except Exception:
-                logger.error(
-                    "Unable to show plots, skipping figure generation. Is "
-                    "an X server running? (might be due to windows "
-                    "environment). Try running without the plot option."
-                )
-    return thr_uncut, thr_loop
-
-
-def filter_events(
-    in_dat,
-    out_filtered,
-    thr_uncut,
-    thr_loop,
-    plot_events=False,
-    fig_path=None,
-    prefix=None,
-):
-    """Filter events (loops, uncuts and weirds)
-    Filter out spurious intrachromosomal Hi-C pairs from input file. +- pairs
-    with reads closer or at the uncut threshold and -+ pairs with reads closer
-    or at the loop thresholds are excluded from the ouput file. -- and ++ pairs
-    with both mates on the same fragments are also discarded. All others are
-    written.
-    Parameters
-    ----------
-    in_dat : file object
-        File handle in read mode to the 2D BED file containing Hi-C pairs.
-    out_filtered : file object
-        File handle in write mode the output filtered 2D BED file.
-    thr_uncut : int
-        Minimum number of restriction sites between reads to keep an
-        intrachromosomal +- pair.
-    thr_loop : int
-        Minimum number of restriction sites between reads to keep an
-        intrachromosomal -+ pair.
-    plot_events : bool
-        If True, a plot showing the proportion of each type of event will be
-        shown after filtering.
-    fig_path : str
-        Path where the figure will be saved. If None, figure is displayed
-        interactively.
-    prefix : str
-        If the library has a name, it will be shown on plots.
-    """
-    n_uncuts = 0
-    n_loops = 0
-    n_weirds = 0
-    lrange_intra = 0
-    lrange_inter = 0
-
-    # open the files for reading and writing
-    with open(in_dat, "r") as pairs, open(out_filtered, "w") as filtered:
-        for line in pairs:  # iterate over each line
-            # Copy header lines to output
-            if line.startswith("#"):
-                filtered.write(line)
-                continue
-
-            p = process_read_pair(line)
-            line_to_write = (
-                "\t".join(
-                    map(
-                        str,
-                        (
-                            p["readID"],
-                            p["chr1"],
-                            p["pos1"],
-                            p["chr2"],
-                            p["pos2"],
-                            p["strand1"],
-                            p["strand2"],
-                            p["frag1"],
-                            p["frag2"],
-                        ),
-                    )
-                )
-                + "\n"
-            )
-            if p["chr1"] == p["chr2"]:
-                # Do not report ++ and -- pairs on the same fragment (impossible)
-                if p["frag1"] == p["frag2"] and p["strand1"] == p["strand2"]:
-                    n_weirds += 1
-                elif p["nsites"] <= thr_loop and p["type"] == "-+":
-                    n_loops += 1
-                elif p["nsites"] <= thr_uncut and p["type"] == "+-":
-                    n_uncuts += 1
-                else:
-                    lrange_intra += 1
-                    filtered.write(line_to_write)
-
-            if p["chr1"] != p["chr2"]:
-                lrange_inter += 1
-                filtered.write(line_to_write)
-
-    if lrange_inter > 0:
-        ratio_inter = round(
-            100 * lrange_inter / float(lrange_intra + lrange_inter), 2
-        )
-    else:
-        ratio_inter = 0
-
-    # Log quick summary of operation results
-    kept = lrange_intra + lrange_inter
-    discarded = n_loops + n_uncuts + n_weirds
-    total = kept + discarded
-    logger.info(
-        "Proportion of inter contacts: {0}% (intra: {1}, "
-        "inter: {2})".format(ratio_inter, lrange_intra, lrange_inter)
-    )
-    logger.info(
-        "{0} pairs discarded: Loops: {1}, Uncuts: {2}, Weirds: {3}".format(
-            discarded, n_loops, n_uncuts, n_weirds
-        )
-    )
-    logger.info(
-        "{0} pairs kept ({1}%)".format(
-            kept, round(100 * kept / (kept + discarded), 2)
-        )
-    )
-
-    # Visualize summary if requested by user
-    if plot_events:
-        try:
-        # Plot: make a square figure and axes to plot a pieChart:
-            plt.figure(2, figsize=(6, 6))
-            # The slices will be ordered and plotted counter-clockwise.
-            fracs = [n_uncuts, n_loops, n_weirds, lrange_intra, lrange_inter]
-            # Format labels to include event names and proportion
-            labels = list(
-                map(
-                    lambda x: (x[0] + ": %.2f%%") % (100 * x[1] / total),
-                    [
-                        ("Uncuts", n_uncuts),
-                        ("Loops", n_loops),
-                        ("Weirds", n_weirds),
-                        ("3D intra", lrange_intra),
-                        ("3D inter", lrange_inter),
-                    ],
-                )
-            )
-            colors = ["salmon", "lightskyblue", "yellow", "palegreen", "plum"]
-            patches, _ = plt.pie(fracs, colors=colors, startangle=90)
-            plt.legend(
-                patches,
-                labels,
-                loc='upper left',
-                bbox_to_anchor=(-0.1, 1.),
-            )
-            if prefix:
-                plt.title(
-                    "Distribution of library events in {}".format(prefix),
-                    bbox={"facecolor": "1.0", "pad": 5},
-                )
-            plt.text(
-                0.3,
-                1.15,
-                "Threshold Uncuts = " + str(thr_uncut),
-                fontdict=None,
-            )
-            plt.text(
-                0.3,
-                1.05,
-                "Threshold Loops = " + str(thr_loop),
-                fontdict=None,
-            )
-
-            plt.text(
-                -1.5,
-                -1.2,
-                "Total number of reads = " + str(total),
-                fontdict=None,
-            )
-            plt.text(
-                -1.5,
-                -1.3,
-                "Ratio inter/(intra+inter) = " + str(ratio_inter) + "%",
-                fontdict=None,
-            )
-            percentage = round(
-                100
-                * float(lrange_inter + lrange_intra)
-                / (n_loops + n_uncuts + n_weirds + lrange_inter + lrange_intra)
-            )
-            plt.text(
-                -1.5,
-                -1.4,
-                "Selected reads = {0}%".format(percentage),
-                fontdict=None,
-            )
-            if fig_path:
-                plt.savefig(fig_path)
-            else:
-                plt.show()
-            plt.clf()
-        except Exception:
-            logger.error(
-                "Unable to show plots. Perhaps there is no Xserver running ?"
-                "(might be due to windows environment) skipping figure "
-                "generation."
-            )
-
 if __name__ == "__main__":
     parser = argparse.ArgumentParser()
     parser.add_argument("-i", "--input")
@@ -559,11 +59,11 @@ if __name__ == "__main__":
     sys.stdout = open ("hicstuff_filter.log", "wt")
     hcl.set_file_handler(log_file)
 
-    uncut_thr, loop_thr = get_thresholds(
+    uncut_thr, loop_thr = hf.get_thresholds(
         pairs_idx, plot_events=plot, fig_path=dist_plot, prefix=prefix
         )
 
-    filter_events(
+    hf.filter_events(
         pairs_idx,
         pairs_out,
         uncut_thr,
diff --git a/bin/hicstuff_filter_pcr.py b/bin/hicstuff_filter_pcr.py
index 40c45687d85de9e2e055776761df9152e2361971..30a7904d9ca516a374fdad8a7189a175343ea63c 100755
--- a/bin/hicstuff_filter_pcr.py
+++ b/bin/hicstuff_filter_pcr.py
@@ -1,8 +1,8 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 
-import hicstuff_io as hio
-from hicstuff_log import logger
+import hicstuff.io as hio
+from hicstuff.log import logger
 import argparse
 import os, time, csv, sys, re
 
diff --git a/bin/hicstuff_fragments.py b/bin/hicstuff_fragments.py
index 95fb0dcc02fe99b6ab03bb7eca37a9abd951c502..72182f3b347e96051eaddd6d02185f92937459a7 100755
--- a/bin/hicstuff_fragments.py
+++ b/bin/hicstuff_fragments.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python
 
-import hicstuff_digest as hcd
+import hicstuff.digest as hcd
 import argparse
 
 if __name__ == "__main__":
diff --git a/bin/hicstuff_io.py b/bin/hicstuff_io.py
deleted file mode 100644
index 2aac719b31fbab18dee08538a3e1a322bb778f30..0000000000000000000000000000000000000000
--- a/bin/hicstuff_io.py
+++ /dev/null
@@ -1,1381 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-
-import gzip
-import zipfile
-import bz2
-import io
-import os
-import functools
-import sys
-import numpy as np
-import pandas as pd
-import collections
-import subprocess as sp
-from scipy.sparse import tril, triu
-import pathlib
-import re
-from os.path import join, exists
-from random import getrandbits
-from scipy.sparse import coo_matrix
-from Bio import SeqIO, SeqUtils
-#import hicstuff.hicstuff as hcs
-#from hicstuff.log import logger
-#from hicstuff.version import __version__
-import scipy.stats as ss
-
-DEFAULT_MAX_MATRIX_SHAPE = 10000
-
-DEFAULT_FRAGMENTS_LIST_FILE_NAME = "fragments_list.txt"
-DEFAULT_INFO_CONTIGS_FILE_NAME = "info_contigs.txt"
-DEFAULT_SPARSE_MATRIX_FILE_NAME = "abs_fragments_contacts_weighted.txt"
-
-
-def _cols_to_sparse(sparse_array, shape=None, dtype=np.float64):
-    """
-    Make a coordinate based sparse matrix from columns.
-    Convert (3, n) shaped arrays to a sparse matrix. The first
-    one acts as the row coordinates, the second one as the
-    column coordinates, the third one as the data points
-    for each pair. If duplicates are found, the data points are
-    added.
-    Parameters
-    ----------
-    sparse_array : array_like
-        An array with exactly three columns representing the sparse
-        matrix data in coordinate format.
-    shape : tuple of int
-        The total number of rows and columns in the matrix. Will be estimated
-        from nonzero values if omitted.
-    dtype : type, optional
-        The type of data being loaded. Default is numpy.float64
-    Example
-    -------
-        >>> import numpy as np
-        >>> row, col = np.array([1, 2, 3]), np.array([3, 2, 1])
-        >>> data = np.array([4, 5, 6])
-        >>> M = np.array([row, col, data]).T
-        >>> S = _cols_to_sparse(M)
-        >>> print(S.todense())
-        [[0. 0. 0. 0.]
-         [0. 0. 0. 4.]
-         [0. 0. 5. 0.]
-         [0. 6. 0. 0.]]
-    """
-
-    row = sparse_array[:, 0]
-    col = sparse_array[:, 1]
-    data = sparse_array[:, 2]
-    if shape is None:
-        n = int(max(np.amax(row), np.amax(col))) + 1
-        shape = (n, n)
-
-    S = coo_matrix((data, (row, col)), shape=shape, dtype=dtype)
-    return S
-
-
-def load_sparse_matrix(mat_path, binning=1, dtype=np.float64):
-    """Load sparse matrix
-    Load a text file matrix into a sparse matrix object. The expected format is
-    a 3 column file where columns are row_number, col_number, value. The first
-    line consists of 3 values representing the total number of rows, columns
-    and nonzero values.
-    Parameters
-    ----------
-    mat_path : file, str or pathlib.Path
-        The input matrix file in instagraal format.
-    binning : int or "auto"
-        The binning to perform. If "auto", binning will
-        be automatically inferred so that the matrix size
-        will not go beyond (10000, 10000) in shape. That
-        can be changed by modifying the DEFAULT_MAX_MATRIX_SHAPE
-        value. Default is 1, i.e. no binning is performed
-    dtype : type, optional
-        The type of data being loaded. Default is numpy.float64
-    Returns
-    -------
-    sparse_mat : scipy.sparse.coo_matrix
-        The output (sparse) matrix in COOrdinate format.
-    Examples
-    --------
-    >>> S = load_sparse_matrix('test_data/mat_5kb.tsv', binning=1)
-    >>> S.data[:10]
-    array([84.,  2.,  3.,  2.,  1.,  1., 50.,  1., 66.,  1.])
-    >>> S.shape
-    (16, 16)
-    """
-    try:
-        cols_arr = np.loadtxt(mat_path, delimiter="\t", dtype=dtype)
-        shape = (int(cols_arr[0, 0]), int(cols_arr[0, 1]))
-    except ValueError:
-        cols_arr = np.loadtxt(
-            mat_path, delimiter="\t", dtype=dtype, skiprows=1
-        )
-        shape = None
-
-    # Get values into an array without the header. Use the header to give size.
-    sparse_mat = _cols_to_sparse(cols_arr[1:, :], shape=shape, dtype=dtype)
-
-    if binning == "auto":
-        num_bins = max(sparse_mat.shape) + 1
-        subsampling_factor = num_bins // DEFAULT_MAX_MATRIX_SHAPE
-    else:
-        subsampling_factor = binning
-    sparse_mat = hcs.bin_sparse(
-        sparse_mat, subsampling_factor=subsampling_factor
-    )
-    return sparse_mat
-
-
-def save_sparse_matrix(s_mat, path):
-    """Save a sparse matrix
-    Saves a sparse matrix object into tsv format.
-    Parameters
-    ----------
-    s_mat : scipy.sparse.coo_matrix
-        The sparse matrix to save on disk
-    path : str
-        File path where the matrix will be stored
-    """
-    if s_mat.format != "coo":
-        ValueError("Sparse matrix must be in coo format")
-    dtype = s_mat.dtype
-    fmt = "%i" if dtype == int else "%.10e"
-    sparse_arr = np.vstack([s_mat.row, s_mat.col, s_mat.data]).T
-
-    np.savetxt(
-        path,
-        sparse_arr,
-        header="{nrows}\t{ncols}\t{nonzero}".format(
-            nrows=s_mat.shape[0], ncols=s_mat.shape[1], nonzero=s_mat.nnz
-        ),
-        comments="",
-        fmt=fmt,
-        delimiter="\t",
-    )
-
-
-def load_pos_col(path, colnum, header=1, dtype=np.int64):
-    """
-    Loads a single column of a TSV file with header into a numpy array.
-
-    Parameters
-    ----------
-    path : str
-        The path of the TSV file to load.
-    colnum : int
-        The 0-based index of the column to load.
-    header : int
-        Number of line to skip. By default the header is a single line.
-    Returns
-    -------
-    numpy.array :
-        A 1D numpy array with the
-    Examples
-    --------
-    >>> load_pos_col('test_data/mat_5kb.tsv', 0)[:10]
-    array([0, 0, 0, 0, 0, 0, 1, 1, 2, 2])
-    """
-    pos_arr = np.genfromtxt(
-        path,
-        delimiter="\t",
-        usecols=(colnum,),
-        skip_header=header,
-        dtype=dtype,
-    )
-    return pos_arr
-
-
-def generate_temp_dir(path):
-    """Temporary directory generation
-    Generates a temporary file with a random name at the input path.
-
-    Parameters
-    ----------
-    path : str
-        The path at which the temporary directory will be created.
-
-    Returns
-    -------
-    str
-        The path of the newly created temporary directory.
-    """
-    exist = True
-    while exist:
-        # Keep trying random directory names if they already exist
-        directory = str(hex(getrandbits(32)))[2:]
-        full_path = join(path, directory)
-        if not exists(full_path):
-            exist = False
-    try:
-        os.makedirs(full_path)
-    except PermissionError:
-        raise PermissionError(
-            "The temporary directory cannot be created in {}. "
-            "Make sure you have write permission.".format(path)
-        )
-    return full_path
-
-
-def _check_cooler(fun):
-    """Decorates function `fun` to check if cooler is available.."""
-
-    @functools.wraps(fun)
-    def wrapped(*args, **kwargs):
-        try:
-            import cooler
-
-            fun.__globals__["cooler"] = cooler
-        except ImportError:
-            logger.error(
-                "The cooler package is required to use {0}, please install it first".format(
-                    fun.__name__
-                )
-            )
-            raise ImportError("The cooler package is required.")
-        return fun(*args, **kwargs)
-
-    return wrapped
-
-
-@_check_cooler
-def add_cool_column(
-    clr, column, column_name, table_name="bins", metadata={}, dtype=None
-):
-    """
-    Adds a new column to a loaded Cooler store. If the column exists,
-    it is replaced. This will affect the .cool file.
-
-    Parameters
-    ----------
-    clr : Cooler object
-        A Cooler store.
-    column : pandas Series
-        The column to add to the cooler.
-    column_name : str
-        The name of the column to add.
-    table_name : str
-        The name of the table to which the column should be added.
-        Defaults to the "bins" table.
-    metadata : dict
-        A dictionary of metadata to associate with the new column.
-    """
-    with clr.open("r+") as c:
-        if column_name in c[table_name]:
-            del c[table_name][column_name]
-        h5opts = dict(compression="gzip", compression_opts=6)
-        c[table_name].create_dataset(
-            column_name, data=column, dtype=dtype, **h5opts
-        )
-        c[table_name][column_name].attrs.update(metadata)
-
-
-@_check_cooler
-def load_cool(cool):
-    """
-    Reads a cool file into memory and parses it into graal style tables.
-
-    Parameters
-    ----------
-    cool : str
-        Path to the input .cool file.
-    Returns
-    -------
-    mat : scipy coo_matrix
-        Hi-C contact map in COO format.
-    frags : pandas DataFrame
-        Table off bins matching the matrix. Corresponds to the content of the fragments_list.txt file.
-    chroms : pandas DataFrame
-        Table of chromosome informations.
-    """
-    c = cooler.Cooler(cool)  # pylint: disable=undefined-variable
-    frags = c.bins()[:]
-    chroms = c.chroms()[:]
-    mat = c.pixels()[:]
-    frags.rename(
-        columns={"start": "start_pos", "end": "end_pos"}, inplace=True
-    )
-    frags["id"] = frags.groupby("chrom", sort=False).cumcount() + 1
-    # Try loading hicstuff-specific columns
-    try:
-        frags = frags[
-            ["id", "chrom", "start_pos", "end_pos", "size", "gc_content"]
-        ]
-    # If absent, only load standard columns
-    except KeyError:
-        frags = frags[["id", "chrom", "start_pos", "end_pos"]]
-
-    chroms["cumul_length"] = (
-        chroms.length.shift(1).fillna(0).cumsum().astype(int)
-    )
-    n_frags = c.bins()[:].groupby("chrom", sort=False).count().start
-    chroms["n_frags"] = chroms.merge(
-        n_frags, right_index=True, left_on="name", how="left"
-    ).start
-    chroms.rename(columns={"name": "contig"}, inplace=True)
-    n = int(max(np.amax(mat.bin1_id), np.amax(mat.bin2_id))) + 1
-    shape = (n, n)
-    mat = coo_matrix((mat["count"], (mat.bin1_id, mat.bin2_id)), shape=shape)
-
-    return mat, frags, chroms
-
-
-@_check_cooler
-def save_cool(cool_out, mat, frags, metadata={}):
-    """
-    Writes a .cool file from graal style tables.
-
-    Parameters
-    ----------
-    cool_out : str
-        Path to the output cool file.
-    mat : scipy coo_matrix
-        The Hi-C contact matrix in sparse COO format.
-    frags : pandas DataFrame
-        The graal style 'fragments_list' table.
-    metadata : dict
-        Potential metadata to associate with the cool file.
-    """
-    up_tri = False
-    # Check if symmetric matrix is symmetric
-    # (i.e. only upper triangle or full mat)
-    if (abs(mat - mat.T) > 1e-10).nnz != 0:
-        up_tri = True
-    # Drop useless column
-    try:
-        bins = frags.drop("id", axis=1)
-    except KeyError:
-        bins = frags
-    # Get column names right
-    bins.rename(
-        columns={"seq": "chrom", "start_pos": "start", "end_pos": "end"},
-        inplace=True,
-    )
-    mat_dict = {"bin1_id": mat.row, "bin2_id": mat.col, "count": mat.data}
-    pixels = pd.DataFrame(mat_dict)
-    cooler.create_cooler(  # pylint: disable=undefined-variable
-        cool_out,
-        bins,
-        pixels,
-        metadata=metadata,
-        symmetric_upper=up_tri,
-        triucheck=False,
-    )
-
-
-def read_compressed(filename, mode='r'):
-    """Read compressed file
-    Opens the file in read mode with appropriate decompression algorithm.
-    Parameters
-    ----------
-    filename : str
-        The path to the input file
-    Returns
-    -------
-    file-like object
-        The handle to access the input file's content
-    """
-
-    # Standard header bytes for diff compression formats
-    comp_bytes = {
-        b"\x1f\x8b\x08": "gz",
-        b"\x42\x5a\x68": "bz2",
-        b"\x50\x4b\x03\x04": "zip",
-    }
-
-    max_len = max(len(x) for x in comp_bytes)
-
-    def file_type(filename):
-        """Guess file type
-        Compare header bytes with those in the file and return type.
-        """
-        with open(filename, "rb") as f:
-            file_start = f.read(max_len)
-        for magic, filetype in comp_bytes.items():
-            if file_start.startswith(magic):
-                return filetype
-        return "uncompressed"
-
-    # Open file with appropriate function
-    mode_map = {'r': 'rt', 'rb': 'rb'}
-    comp = file_type(filename)
-    if comp == "gz":
-        return gzip.open(filename, mode_map[mode])
-    elif comp == "bz2":
-        return bz2.open(filename, mode_map[mode])
-    elif comp == "zip":
-        zip_arch = zipfile.ZipFile(filename, mode)
-        if len(zip_arch.namelist()) > 1:
-            raise IOError(
-                "Only a single fastq file must be in the zip archive."
-            )
-        else:
-            # ZipFile opens as bytes by default, using io to read as text
-            zip_content = zip_arch.open(zip_arch.namelist()[0], mode)
-            return io.TextIOWrapper(zip_content, encoding="utf-8")
-    else:
-        return open(filename, mode)
-
-
-def is_compressed(filename):
-    """Check compression status
-    Check if the input file is compressed from the first bytes.
-    Parameters
-    ----------
-    filename : str
-        The path to the input file
-    Returns
-    -------
-    bool
-        True if the file is compressed, False otherwise.
-    """
-
-    # Standard header bytes for diff compression formats
-    comp_bytes = {
-        b"\x1f\x8b\x08": "gz",
-        b"\x42\x5a\x68": "bz2",
-        b"\x50\x4b\x03\x04": "zip",
-    }
-    max_len = max(len(x) for x in comp_bytes)
-    with open(filename, "rb") as f:
-        file_start = f.read(max_len)
-    for magic, _ in comp_bytes.items():
-        if file_start.startswith(magic):
-            return True
-    return False
-
-
-def from_dade_matrix(filename, header=False):
-    """Load a DADE matrix
-    Load a numpy array from a DADE matrix file, optionally
-    returning bin information from the header. Header data
-    processing is delegated downstream.
-    Parameters
-    ----------
-    filename : str, file or pathlib.Path
-        The name of the file containing the DADE matrix.
-    header : bool
-        Whether to return as well information contained
-        in the header. Default is False.
-    Example
-    -------
-        >>> import numpy as np
-        >>> import tempfile
-        >>> lines = [['RST', 'chr1~0', 'chr1~10', 'chr2~0', 'chr2~30'],
-        ...          ['chr1~0', '5'],
-        ...          ['chr1~10', '8', '3'],
-        ...          ['chr2~0', '3', '5', '5'],
-        ...          ['chr2~30', '5', '10', '11', '2']
-        ...          ]
-        >>> formatted = ["\\t".join(l) + "\\n" for l in lines ]
-        >>> dade = tempfile.NamedTemporaryFile(mode='w')
-        >>> for fm in formatted:
-        ...     dade.write(fm)
-        34
-        9
-        12
-        13
-        18
-        >>> dade.flush()
-        >>> M, h = from_dade_matrix(dade.name, header=True)
-        >>> dade.close()
-        >>> print(M)
-        [[ 5.  8.  3.  5.]
-         [ 8.  3.  5. 10.]
-         [ 3.  5.  5. 11.]
-         [ 5. 10. 11.  2.]]
-        >>> print(h)
-        ['chr1~0', 'chr1~10', 'chr2~0', 'chr2~30']
-    See https://github.com/scovit/DADE for more details about Dade.
-    """
-
-    A = pd.read_csv(filename, sep="\t", header=None)
-    A.fillna("0", inplace=True)
-    M, headers = np.array(A.iloc[1:, 1:], dtype=np.float64), A.iloc[0, :]
-    matrix = M + M.T - np.diag(np.diag(M))
-    if header:
-        return matrix, headers.tolist()[1:]
-    else:
-        return matrix
-
-
-def to_dade_matrix(M, annotations="", filename=None):
-    """Returns a Dade matrix from input numpy matrix. Any annotations are added
-    as header. If filename is provided and valid, said matrix is also saved
-    as text.
-    """
-
-    n, m = M.shape
-    A = np.zeros((n + 1, m + 1))
-    A[1:, 1:] = M
-    if not annotations:
-        annotations = np.array(["" for _ in n], dtype=str)
-    A[0, :] = annotations
-    A[:, 0] = annotations.T
-    if filename:
-        try:
-            np.savetxt(filename, A, fmt="%i")
-            logger.info(
-                "I saved input matrix in dade format as {0}".format(
-                    str(filename)
-                )
-            )
-        except ValueError as e:
-            logger.warning("I couldn't save input matrix.")
-            logger.warning(str(e))
-
-    return A
-
-
-def load_into_redis(filename):
-    """Load a file into redis
-    Load a matrix file and sotre it in memory with redis.
-    Useful to pass around huge datasets from scripts to
-    scripts and load them only once.
-    Inspired from https://gist.github.com/alexland/ce02d6ae5c8b63413843
-    Parameters
-    ----------
-    filename : str, file or pathlib.Path
-        The file of the matrix to load.
-    Returns
-    -------
-    key : str
-        The key of the dataset needed to retrieve it from redis.
-    """
-    try:
-        from redis import StrictRedis as redis
-        import time
-    except ImportError:
-        print(
-            "Error! Redis does not appear to be installed in your system.",
-            file=sys.stderr,
-        )
-        exit(1)
-
-    M = np.genfromtxt(filename, dtype=None)
-    array_dtype = str(M.dtype)
-    m, n = M.shape
-    M = M.ravel().tostring()
-    database = redis(host="localhost", port=6379, db=0)
-    key = "{0}|{1}#{2}#{3}".format(int(time.time()), array_dtype, m, n)
-
-    database.set(key, M)
-
-    return key
-
-
-def load_from_redis(key):
-    """Retrieve a dataset from redis
-    Retrieve a cached dataset that was stored in redis
-    with the input key.
-    Parameters
-    ----------
-    key : str
-        The key of the dataset that was stored in redis.
-    Returns
-    -------
-    M : numpy.ndarray
-        The retrieved dataset in array format.
-    """
-
-    try:
-        from redis import StrictRedis as redis
-    except ImportError:
-        print(
-            "Error! Redis does not appear to be installed in your system.",
-            file=sys.stderr,
-        )
-        exit(1)
-
-    database = redis(host="localhost", port=6379, db=0)
-
-    try:
-        M = database.get(key)
-    except KeyError:
-        print(
-            "Error! No dataset was found with the supplied key.",
-            file=sys.stderr,
-        )
-        exit(1)
-
-    array_dtype, n, m = key.split("|")[1].split("#")
-
-    M = np.fromstring(M, dtype=array_dtype).reshape(int(n), int(m))
-    return M
-
-
-def dade_to_graal(
-    filename,
-    output_matrix=DEFAULT_SPARSE_MATRIX_FILE_NAME,
-    output_contigs=DEFAULT_INFO_CONTIGS_FILE_NAME,
-    output_frags=DEFAULT_SPARSE_MATRIX_FILE_NAME,
-    output_dir=None,
-):
-    """Convert a matrix from DADE format (https://github.com/scovit/dade)
-    to a graal-compatible format. Since DADE matrices contain both fragment
-    and contact information all files are generated at the same time.
-    """
-
-    with open(output_matrix, "w") as sparse_file:
-        sparse_file.write("id_frag_a\tid_frag_b\tn_contact")
-        with open(filename) as file_handle:
-            first_line = file_handle.readline()
-            for row_index, line in enumerate(file_handle):
-                dense_row = np.array(line.split("\t")[1:], dtype=np.int32)
-                for col_index in np.nonzero(dense_row)[0]:
-                    line_to_write = "{}\t{}\t{}\n".format(
-                        row_index, col_index, dense_row[col_index]
-                    )
-                    sparse_file.write(line_to_write)
-
-    header = first_line.split("\t")
-    bin_type = header[0]
-    if bin_type == '"RST"':
-        logger.info("I detected fragment-wise binning")
-    elif bin_type == '"BIN"':
-        logger.info("I detected fixed size binning")
-    else:
-        logger.warning(
-            (
-                "Sorry, I don't understand this matrix's "
-                "binning: I read {}".format(str(bin_type))
-            )
-        )
-
-    header_data = [
-        header_elt.replace("'", "")
-        .replace('"', "")
-        .replace("\n", "")
-        .split("~")
-        for header_elt in header[1:]
-    ]
-
-    (
-        global_frag_ids,
-        contig_names,
-        local_frag_ids,
-        frag_starts,
-        frag_ends,
-    ) = np.array(list(zip(*header_data)))
-
-    frag_starts = frag_starts.astype(np.int32) - 1
-    frag_ends = frag_ends.astype(np.int32) - 1
-    frag_lengths = frag_ends - frag_starts
-
-    total_length = len(global_frag_ids)
-
-    with open(output_contigs, "w") as info_contigs:
-
-        info_contigs.write("contig\tlength\tn_frags\tcumul_length\n")
-
-        cumul_length = 0
-
-        for contig in collections.OrderedDict.fromkeys(contig_names):
-
-            length_tig = np.sum(frag_lengths[contig_names == contig])
-            n_frags = collections.Counter(contig_names)[contig]
-            line_to_write = "%s\t%s\t%s\t%s\n" % (
-                contig,
-                length_tig,
-                n_frags,
-                cumul_length,
-            )
-            info_contigs.write(line_to_write)
-            cumul_length += n_frags
-
-    with open(output_frags, "w") as fragments_list:
-
-        fragments_list.write(
-            "id\tchrom\tstart_pos\tend_pos" "\tsize\tgc_content\n"
-        )
-        bogus_gc = 0.5
-
-        for i in range(total_length):
-            line_to_write = "%s\t%s\t%s\t%s\t%s\t%s\n" % (
-                int(local_frag_ids[i]) + 1,
-                contig_names[i],
-                frag_starts[i],
-                frag_ends[i],
-                frag_lengths[i],
-                bogus_gc,
-            )
-            fragments_list.write(line_to_write)
-
-
-def load_bedgraph2d(filename, bin_size=None, fragments_file=None):
-    """
-    Loads matrix and fragment information from a 2D bedgraph file. Note this
-    function assumes chromosomes are ordered in alphabetical. order
-
-    Parameters
-    ----------
-    filename : str
-        Path to the bedgraph2D file.
-    bin_size : int
-        The size of bins in the case of fixed bin size.
-    fragments_file : str
-        Path to a fragments file to explicitely provide fragments positions.
-        If the matrix does not have fixed bin size, this prevents errors.
-
-    Returns
-    -------
-    mat : scipy.sparse.coo_matrix
-        The Hi-C contact map as the upper triangle of a symetric matrix, in
-        sparse format.
-    frags : pandas.DataFrame
-        The list of fragments/bin present in the matrix with their genomic
-        positions.
-    """
-    bed2d = pd.read_csv(filename, sep="\t", header=None)
-    chrom_sizes = {}
-    if bin_size is not None:
-        # If bin size if provided, retrieve chromosome lengths, this will be
-        # used when regenerating bin coordinates
-        chroms_left = bed2d[[3, 5]]
-        chroms_left.columns = [0, 2]
-        chroms = (
-            pd.concat([bed2d[[0, 2]], chroms_left])
-            .groupby([0], sort=False)
-            .max()
-        )
-        for chrom, size in zip(chroms.index, np.array(chroms)):
-            chrom_sizes[chrom] = size[0]
-    elif fragments_file is None:
-        logger.warning(
-            "Please be aware that not all information can be restored from a "
-            "bg2 file without fixed bin size; fragments without any contact "
-            "will be lost"
-        )
-    # Get all possible fragment chrom-positions into an array
-    frag_pos = np.vstack(
-        [np.array(bed2d[[0, 1, 2]]), np.array(bed2d[[3, 4, 5]])]
-    )
-    # Sort by position (least important, col 1)
-    frag_pos = frag_pos[frag_pos[:, 1].argsort(kind="mergesort")]
-    # Then by chrom (most important, col 0)
-    frag_pos = frag_pos[frag_pos[:, 0].argsort(kind="mergesort")]
-    # Get unique names for fragments (chrom+pos)
-    ordered_frag_pos = (
-        pd.DataFrame(frag_pos).drop_duplicates().reset_index(drop=True)
-    )
-    frag_pos_a = bed2d[[0, 1]].apply(lambda x: tuple(x), axis=1)
-    frag_pos_b = bed2d[[3, 4]].apply(lambda x: tuple(x), axis=1)
-    # If fragments file is provided, use fragments positions to indices mapping
-    if fragments_file is not None:
-        frags = pd.read_csv(fragments_file, delimiter="\t")
-        frag_map = frags.apply(lambda x: (str(x.chrom), x.start_pos), axis=1)
-        frag_map = {f_name: f_idx for f_idx, f_name in enumerate(frag_map)}
-    # If fixed fragment size available, use it to reconstruct original
-    # fragments ID (even if they are absent from the bedgraph file).
-    elif bin_size is not None:
-        frag_map = {}
-        chrom_frags = []
-        for chrom, size in chrom_sizes.items():
-            prev_frags = len(frag_map)
-            for bin_id, bin_pos in enumerate(range(0, size, bin_size)):
-                frag_map[(chrom, bin_pos)] = bin_id + prev_frags
-            n_bins = size // bin_size
-            chrom_frags.append(
-                pd.DataFrame(
-                    {
-                        "id": range(1, n_bins + 1),
-                        "chrom": np.repeat(chrom, n_bins),
-                        "start_pos": range(0, size, bin_size),
-                        "end_pos": range(bin_size, size + bin_size, bin_size),
-                    }
-                )
-            )
-        frags = pd.concat(chrom_frags, axis=0).reset_index(drop=True)
-        frags.insert(
-            loc=3, column="size", value=frags.end_pos - frags.start_pos
-        )
-    # If None available, guess fragments indices from bedgraph (potentially wrong)
-    else:
-        frag_map = {
-            (v[0], v[1]): i
-            for i, v in ordered_frag_pos.iloc[:, [0, 1]].iterrows()
-        }
-        frags = ordered_frag_pos.copy()
-        frags[3] = frags.iloc[:, 2] - frags.iloc[:, 1]
-        frags.insert(loc=0, column="id", value=0)
-        frags.id = frags.groupby([0], sort=False).cumcount() + 1
-        frags.columns = ["id", "chrom", "start_pos", "end_pos", "size"]
-    # Match bin indices to their names
-    frag_id_a = np.array(list(map(lambda x: frag_map[x], frag_pos_a)))
-    frag_id_b = np.array(list(map(lambda x: frag_map[x], frag_pos_b)))
-    contacts = np.array(bed2d.iloc[:, 6].tolist())
-    # Use index to build matrix
-    n_frags = len(frag_map.keys())
-    mat = coo_matrix(
-        (contacts, (frag_id_a, frag_id_b)), shape=(n_frags, n_frags)
-    )
-
-    # Get size of each chromosome in basepairs
-    chromsizes = frags.groupby("chrom", sort=False).apply(
-        lambda x: np.int64(max(x.end_pos))
-    )
-    chrom_bins = frags.groupby("chrom", sort=False).size()
-    # Shift chromsizes by one to get starting bin, first one is zero
-    # Make chromsize cumulative to get start bin of each chrom
-    # Get chroms into a 1D array of bin starts
-    chrom_start = chrom_bins.shift(1, fill_value=0).cumsum()
-    chroms = pd.DataFrame(
-        {
-            "contig": chromsizes.index,
-            "length": chromsizes.values,
-            "n_frags": chrom_bins,
-            "cumul_length": chrom_start,
-        }
-    )
-    return mat, frags, chroms
-
-
-def flexible_hic_loader(
-    mat, fragments_file=None, chroms_file=None, quiet=False
-):
-    """
-    Wrapper function to load COO, bg2 or cool input and return the same output.
-    COO formats requires fragments_file and chroms_file options. bg2 format can
-    infer bin_size if fixed. When providing a bg2 matrix with uneven fragments
-    length, one should provide fragments_file as well or empty bins will be
-    truncated from the output.
-
-    Parameters
-    ----------
-    mat : str
-        Path to the matrix in graal, bedgraph2 or cool format.
-    fragments_file : str or None
-        Path to the file with fragments information (fragments_list.txt).
-        Only required if the matrix is in graal format.
-    chroms_file : str or None
-        Path to the file with chromosome information (info_contigs.txt). Only required
-        if the matrix is in graal format.
-    quiet : bool
-        If True, will silence warnings for empty outputs.
-    Returns
-    -------
-    mat : scipy.sparse.coo_matrix
-        Sparse upper triangle Hi-C matrix.
-    frags : pandas.DataFrame or None
-        Table of fragment informations. None if information was not provided.
-    chroms : pandas.DataFrame or None
-        Table of chromosomes/contig information. None if information was not provided.
-    """
-    hic_format = get_hic_format(mat)
-    # Load cool based on file extension
-    if hic_format == "cool":
-        mat, frags, chroms = load_cool(mat)
-    # Use the first line to determine COO / bg2 format
-    if hic_format == "bg2":
-        # Use the frags file to define bins if available
-        if fragments_file is not None:
-            mat, frags, chroms = load_bedgraph2d(
-                mat, fragments_file=fragments_file
-            )
-        else:
-            # Guess if bin size is fixed based on MAD
-            bg2 = pd.read_csv(mat, sep="\t")
-            sizes = np.array(bg2.iloc[:, 2] - bg2.iloc[:, 1])
-            size_mad = ss.median_abs_deviation(sizes, scale='normal')
-            # Use only the bg2
-            if size_mad > 0:
-                mat, frags, chroms = load_bedgraph2d(mat)
-                logger.warning(
-                    "Input is a bedgraph2d file with uneven bin size, "
-                    "but no fragments_file was provided. Empty bins will "
-                    "be missing from the output. To avoid this, provide a "
-                    "fragments file."
-                )
-            # Use fixed bin size
-            else:
-                mat, frags, chroms = load_bedgraph2d(
-                    mat, bin_size=int(np.median(sizes))
-                )
-
-    elif hic_format == "graal":
-        mat = load_sparse_matrix(mat)
-        try:
-            frags = pd.read_csv(fragments_file, sep="\t")
-        except ValueError:
-            if not quiet:
-                logger.warning(
-                    "fragments_file was not provided when "
-                    "loading a matrix in COO/graal format. frags will be None."
-                )
-            frags = None
-        try:
-            chroms = pd.read_csv(chroms_file, sep="\t")
-        except ValueError:
-            if not quiet:
-                logger.warning(
-                    "chroms_file was not provided when "
-                    "loading a matrix in COO/graal format. chroms will be None."
-                )
-
-            chroms = None
-
-    # Ensure the matrix is upper triangle symmetric
-    if mat.shape[0] == mat.shape[1]:
-        if (abs(mat - mat.T) > 1e-10).nnz > 0:
-            mat = mat + tril(mat, k=-1).T
-        mat = triu(mat, format="coo")
-
-    return mat, frags, chroms
-
-
-def get_hic_format(mat):
-    """Returns the format of the input Hi-C matrix
-
-    Parameters
-    ----------
-    mat : str
-        Path to the input Hi-C matrix
-    Returns
-    -------
-    str :
-        Hi-C format string. One of graal, bg2, cool
-    """
-    if (
-        mat.endswith(".cool")
-        or mat.count(".mcool::/") == 1
-        or mat.count(".cool::/") == 1
-    ):
-        hic_format = "cool"
-    else:
-        # Use the first line to determine COO / bg2 format
-        ncols = len(open(mat).readline().split("\t"))
-        if ncols == 7:
-            hic_format = "bg2"
-        elif ncols == 3:
-            hic_format = "graal"
-        else:
-            raise ValueError("Unkown file format")
-    return hic_format
-
-
-def flexible_hic_saver(
-    mat, out_prefix, frags=None, chroms=None, hic_fmt="graal", quiet=False,
-):
-    """
-    Saves objects to the desired Hi-C file format.
-    Parameters
-    ----------
-    mat : scipy.sparse.coo_matrix
-        Hi-C contact map.
-    out_prefix : str
-        Output path without extension (the extension is added based on hic_fmt).
-    frags : pandas.DataFrame or None
-        Table of fragments informations.
-    chroms : pandas.DataFrame or None
-        Table of chromosomes / contigs informations.
-    hic_fmt : str
-        Output format. Can be one of graal for graal-compatible COO format, bg2 for
-        2D bedgraph format, or cool for cooler compatible format.
-    """
-    if hic_fmt == "graal":
-        save_sparse_matrix(mat, out_prefix + ".mat.tsv")
-        try:
-            frags.to_csv(out_prefix + ".frags.tsv", sep="\t", index=False)
-        except AttributeError:
-            if not quiet:
-                logger.warning(
-                    "Could not create fragments_list.txt from input files"
-                )
-        try:
-            chroms.to_csv(out_prefix + ".chr.tsv", sep="\t", index=False)
-        except AttributeError:
-            if not quiet:
-                logger.warning(
-                    "Could not create info_contigs.txt from input files"
-                )
-    elif hic_fmt == "cool":
-        frag_sizes = frags.end_pos - frags.start_pos
-        size_mad = np.median(frag_sizes - np.median(frag_sizes))
-        bin_type = "variable" if size_mad else "fixed"
-        try:
-            save_cool(
-                out_prefix + ".cool",
-                mat,
-                frags,
-                metadata={"hicstuff": __version__, "bin-type": bin_type},
-            )
-        except NameError:
-            NameError("frags is required to save a cool file")
-    elif hic_fmt == "bg2":
-        try:
-            save_bedgraph2d(mat, frags, out_prefix + ".bg2")
-        except NameError:
-            NameError("frags is required to save a bg2 file")
-    else:
-        raise ValueError("Unknown output format: {0}".format(hic_fmt))
-
-
-def save_bedgraph2d(mat, frags, out_path):
-    """
-    Given a sparse matrix and a corresponding list of fragments, save a file
-    in 2D bedgraph format.
-    Parameters
-    ----------
-    mat : scipy.sparse.coo_matrix
-        The sparse contact map.
-    frags : pandas.DataFrame
-        A structure containing the annotations for each matrix bin. Should
-        correspond to the content of the fragments_list.txt file.
-    """
-
-    mat_df = pd.DataFrame(
-        {"row": mat.row, "col": mat.col, "data": mat.data.astype(int)}
-    )
-    # Merge fragments with matrix based on row indices to annotate rows
-    merge_mat = mat_df.merge(
-        frags, left_on="row", right_index=True, how="left", suffixes=("", "")
-    )
-    # Rename annotations to assign to frag1
-    merge_mat.rename(
-        columns={"chrom": "chr1", "start_pos": "start1", "end_pos": "end1"},
-        inplace=True,
-    )
-    # Do the same operation for cols (frag2)
-    merge_mat = merge_mat.merge(
-        frags,
-        left_on="col",
-        right_index=True,
-        how="left",
-        suffixes=("_0", "_2"),
-    )
-    merge_mat.rename(
-        columns={"chrom": "chr2", "start_pos": "start2", "end_pos": "end2"},
-        inplace=True,
-    )
-    # Select only relevant columns in correct order
-    bg2 = merge_mat.loc[
-        :, ["chr1", "start1", "end1", "chr2", "start2", "end2", "data"]
-    ]
-    bg2.to_csv(out_path, header=None, index=False, sep="\t")
-
-
-def get_pos_cols(df):
-    """Get column names representing chromosome, start and end column
-    from a dataframe. Allows flexible names.
-    Parameters
-    ----------
-    df : pd.DataFrame
-    Returns
-    -------
-    tuple of str:
-        Tuple containing the names of the chromosome, start and end
-        columns in the input dataframe.
-    Examples
-    --------
-    >>> import pandas as pd
-    >>> d = [1, 2, 3]
-    >>> df = pd.DataFrame(
-    ...     {'Chromosome': d, 'Start': d, 'End': d, 'species': d}
-    ... )
-    >>> get_pos_cols(df)
-    ('Chromosome', 'Start', 'End')
-    >>> df = pd.DataFrame(
-    ...     {'id': d, 'chr': d, 'start_bp': d, 'end_bp': d}
-    ... )
-    >>> get_pos_cols(df)
-    ('chr', 'start_bp', 'end_bp')
-    """
-
-    # Get case insensitive column names
-    cnames = df.columns
-    inames = cnames.str.lower()
-
-    def _col_getter(cols, pat):
-        """Helper to get column index from the start of its name"""
-        mask = cols.str.startswith(pat)
-        idx = np.flatnonzero(mask)[0]
-        return idx
-
-    chrom_col = cnames[_col_getter(inames, "chr")]
-    start_col = cnames[_col_getter(inames, "start")]
-    end_col = cnames[_col_getter(inames, "end")]
-    return chrom_col, start_col, end_col
-
-
-def gc_bins(genome_path, frags):
-    """Generate GC content annotation for bins using input genome.
-    Parameters
-    ----------
-    genome_path : str
-        Path the the genome file in FASTA format.
-    frags : pandas.DataFrame
-        Table containing bin segmentation of the genome.
-        Required columns: chrom, start, end.
-    Returns
-    -------
-    numpy.ndarray of floats:
-        GC content per bin, in the range [0.0, 1.0].
-    """
-    # Grab columns by name (catch start, Start, star_pos, etc)
-    chrom_col, start_col, end_col = get_pos_cols(frags)
-    # Fill the gc array by chromosome
-    gc_bins = np.zeros(frags.shape[0], dtype=float)
-    for rec in SeqIO.parse(genome_path, "fasta"):
-        mask = frags[chrom_col] == rec.id
-        # Define coordinates of each bin
-        starts = frags.loc[mask, start_col]
-        ends = frags.loc[mask, end_col]
-        # Slice chromosome sequence based on bins
-        seqs = [str(rec.seq)[s:e] for s, e in zip(starts, ends)]
-        # Fill GC values for bins in the chromosome
-        idx = np.flatnonzero(mask)
-        gc_bins[idx] = np.array(list(map(SeqUtils.GC, seqs))) / 100.0
-
-    return gc_bins
-
-
-def sort_pairs(in_file, out_file, keys, tmp_dir=None, threads=1, buffer="2G"):
-    """
-    Sort a pairs file in batches using UNIX sort.
-    Parameters
-    ----------
-    in_file : str
-        Path to the unsorted input file
-    out_file : str
-        Path to the sorted output file.
-    keys : list of str
-        list of columns to use as sort keys. Each column can be one of readID,
-        chr1, pos1, chr2, pos2, frag1, frag2. Key priorities are according to
-        the order in the list.
-    tmp_dir : str
-        Path to the directory where temporary files will be created. Defaults
-        to current directory.
-    threads : int
-        Number of parallel sorting threads.
-    buffer : str
-        Buffer size used for sorting. Consists of a number and a unit.
-    """
-    # TODO: Write a pure python implementation to drop GNU coreutils depencency,
-    # could be inspired from: https://stackoverflow.com/q/14465154/8440675
-
-    # Check if UNIX sort version supports parallelism
-    parallel_ok = True
-    sort_ver = sp.Popen(["sort", "--version"], stdout=sp.PIPE)
-    sort_ver = (
-        sort_ver.communicate()[0]
-        .decode()
-        .split("\n")[0]
-        .split(" ")[-1]
-        .split(".")
-    )
-    # If so, specify threads, otherwise don't mention it in the command line
-    try:
-        sort_ver = list(map(int, sort_ver))
-        if sort_ver[0] < 8 or (sort_ver[0] == 8 and sort_ver[1] < 23):
-            logger.warning(
-                "GNU sort version is {0} but >8.23 is required for parallel "
-                "sort. Sorting on a single thread.".format(
-                    ".".join(map(str, sort_ver))
-                )
-            )
-            parallel_ok = False
-    # BSD sort has a different format and will throw error upon parsing. It does
-    # not support parallel processes anyway.
-    except ValueError:
-        logger.warning(
-            "Using BSD sort instead of GNU sort, sorting on a single thread."
-        )
-        parallel_ok = False
-
-    key_map = {
-        "readID": "-k1,1d",
-        "chr1": "-k2,2V",
-        "pos1": "-k3,3n",
-        "chr2": "-k4,4V",
-        "pos2": "-k5,5n",
-        "strand1": "-k6,6d",
-        "strand2": "-k7,7d",
-        "frag1": "-k8,8n",
-        "frag2": "-k9,9n",
-    }
-
-    # transform column names to corresponding sort keys
-    try:
-        sort_keys = map(lambda k: key_map[k], keys)
-    except KeyError:
-        print("Unkown column name.")
-        raise
-    # Rewrite header with new sorting order
-    header = get_pairs_header(in_file)
-    with open(out_file, "w") as output:
-        for line in header:
-            if line.startswith("#sorted"):
-                output.write("#sorted: {0}\n".format("-".join(keys)))
-            else:
-                output.write(line + "\n")
-
-    # Sort pairs and append to file.
-    with open(out_file, "a") as output:
-        grep_proc = sp.Popen(["grep", "-v", "^#", in_file], stdout=sp.PIPE)
-        sort_cmd = ["sort", "-S %s" % buffer] + list(sort_keys)
-        if tmp_dir is not None:
-            sort_cmd.append("--temporary-directory={0}".format(tmp_dir))
-        if parallel_ok:
-            sort_cmd.append("--parallel={0}".format(threads))
-        print(sort_cmd)
-        print(grep_proc)
-        sort_proc = sp.Popen(sort_cmd, stdin=grep_proc.stdout, stdout=output)
-        sort_proc.communicate()
-
-
-def get_pairs_header(pairs):
-    r"""Retrieves the header of a .pairs file and stores lines into a list.
-    Parameters
-    ----------
-    pairs : str or file object
-        Path to the pairs file.
-    Returns
-    -------
-    header : list of str
-        A list of header lines found, in the same order they appear in pairs.
-    Examples
-    --------
-        >>> import os
-        >>> from tempfile import NamedTemporaryFile
-        >>> p = NamedTemporaryFile('w', delete=False)
-        >>> p.writelines(["## pairs format v1.0\n", "#sorted: chr1-chr2\n", "abcd\n"])
-        >>> p.close()
-        >>> h = get_pairs_header(p.name)
-        >>> for line in h:
-        ...     print([line])
-        ['## pairs format v1.0']
-        ['#sorted: chr1-chr2']
-        >>> os.unlink(p.name)
-    """
-    # Open file if needed
-    with open(pairs, "r") as pairs:
-        # Store header lines into a list
-        header = []
-        line = pairs.readline()
-        while line.startswith("#"):
-            header.append(line.rstrip())
-            line = pairs.readline()
-
-    return header
-
-
-def reorder_fasta(genome, output=None, threshold=100000):
-    """Reorder and trim a fasta file
-    Sort a fasta file by record lengths, optionally trimming the smallest ones.
-    Parameters
-    ----------
-    genome : str, file or pathlib.Path
-        The genome scaffold file (or file handle)
-    output : str, file or pathlib.Path
-        The output file to write to
-    threshold : int, optional
-        The size below which scaffolds are discarded, by default 100000
-    """
-
-    if output is None:
-        output = "{}_renamed.fa".format(genome.split(".")[0])
-
-    handle = SeqIO.parse(genome, "fasta")
-    handle_to_write = sorted(
-        (len(u) for u in handle if len(u) > threshold), reverse=True
-    )
-    SeqIO.write(handle_to_write, output, "fasta")
-
-
-def rename_genome(genome, output=None, ambiguous=True):
-    """Rename genome and slugify headers
-    Rename genomes according to a simple naming scheme; this
-    is mainly done to avoid special character weirdness.
-    Parameters
-    ----------
-    genome : file, str or pathlib.Path
-        The input genome to be renamed and slugify.
-    output : file, str or pathlib.Path
-        The output genome to be written into. Default is <base>_renamed.fa,
-        where <base> is genome_in without its extension.
-    ambiguous : bool
-        Whether to retain ambiguous non-N bases, otherwise rename them to Ns.
-        Default is True.
-    """
-
-    if output is None:
-        output = "{}_renamed.fa".format(genome.split(".")[0])
-
-    with open(output, "w") as output_handle:
-        for record in SeqIO.parse(genome, "fasta"):
-
-            # Replace hyphens, tabs and whitespace with underscores
-            new_record_id = record.id.replace(" ", "_")
-            new_record_id = new_record_id.replace("-", "_")
-            new_record_id = new_record_id.replace("\t", "_")
-
-            # Remove anything that's weird, i.e. not alphanumeric
-            # or an underscore
-            new_record_id = re.sub("[^_A-Za-z0-9]+", "", new_record_id)
-            header = ">{}\n".format(new_record_id)
-
-            new_seq = re.sub("[^ATGCatgcNn]", "N", str(record.seq))
-
-            output_handle.write(header)
-            output_handle.write("{}\n".format(new_seq))
-
-
-def check_fasta_index(ref, mode="bowtie2"):
-    """
-    Checks for the existence of a bowtie2 or bwa index based on the reference
-    file name.
-    Parameters
-    ----------
-    ref : str
-        Path to the reference genome.
-    mode : str
-        The alignment software used to build the index. bowtie2 or bwa. If any
-        other value is given, the function returns the reference path.
-    Returns
-    -------
-    index : str
-        The bowtie2 or bwa index basename. None if no index was found
-    """
-    ref = pathlib.Path(ref)
-    if mode == "bowtie2":
-        # Bowtie2 should have 6 index files
-        bt2_idx_files = list(ref.parent.glob("{}*bt2*".format(ref.name)))
-        index = None if len(bt2_idx_files) < 6 else bt2_idx_files
-    elif mode == "bwa":
-        refdir = str(ref.parent)
-        refdir_files = os.listdir(refdir)
-        bwa_idx_files = [
-            join(refdir, f)
-            for f in refdir_files
-            if re.search(r".*\.(sa|pac|bwt|ann|amb)$", f)
-        ]
-        index = None if len(bwa_idx_files) < 5 else bwa_idx_files
-    else:
-        index = [ref]
-    if index is not None:
-        # Convert the PosixPath objects to strings and get the longest common prefix to obtain
-        # index basename (without the dot)
-        index = os.path.commonprefix(list(map(str, index))).strip(".")
-    return index
-
-
-def check_is_fasta(in_file):
-    """
-    Checks whether input file is in fasta format.
-
-    Parameters
-    ----------
-    in_file : str
-        Path to the input file.
-    Returns
-    -------
-    bool :
-        True if the input is in fasta format, False otherwise
-    """
-    try:
-        with read_compressed(in_file) as handle:
-            fasta = any(SeqIO.parse(handle, "fasta"))
-    except FileNotFoundError:
-        fasta = False
-
-    return fasta
diff --git a/bin/hicstuff_iteralign.py b/bin/hicstuff_iteralign.py
index a16b3c84872fedd8d4bcfee5e6e66e99e47174b2..ad19ed696b95acbe966e8b5304663b2f3413d918 100755
--- a/bin/hicstuff_iteralign.py
+++ b/bin/hicstuff_iteralign.py
@@ -18,310 +18,12 @@ import re
 import subprocess as sp
 import pysam as ps
 import shutil as st
-import hicstuff_io as hio
+import hicstuff.io as hio
 import contextlib
 import argparse
-from hicstuff_log import logger
+from hicstuff.log import logger
 from os.path import join
-
-
-def iterative_align(
-    fq_in,
-    tmp_dir,
-    ref,
-    n_cpu,
-    bam_out,
-    aligner="bowtie2",
-    min_len=20,
-    min_qual=30,
-    read_len=None,
-):
-    """Iterative alignment
-
-    Aligns reads iteratively reads of fq_in with bowtie2, minimap2 or bwa. Reads are
-    truncated to the 20 first nucleotides and unmapped reads are extended by 20
-    nucleotides and realigned on each iteration.
-
-    Parameters
-    ----------
-
-    fq_in : str
-        Path to input fastq file to align iteratively.
-    tmp_dir : str
-        Path where temporary files should be written.
-    ref : str
-        Path to the reference genome if Minimap2 is used for alignment.
-        Path to the index genome if Bowtie2/bwa is used for alignment.
-    n_cpu : int
-        The number of CPUs to use for the iterative alignment.
-    bam_out : str
-        Path where the final alignment should be written in BAM format.
-    aligner : str
-        Choose between minimap2, bwa or bowtie2 for the alignment.
-    min_len : int
-        The initial length of the fragments to align.
-    min_qual : int
-        Minimum mapping quality required to keep Hi-C pairs.
-    read_len : int
-        Read length in the fasta file. If set to None, the length of the first read
-        is used. Set this value to the longest read length in the file if you have
-        different read lengths.
-
-    Examples
-    --------
-
-    iterative_align(fq_in='example_for.fastq', ref='example_bt2_index', bam_out='example_for.bam', aligner="bowtie2")
-    iterative_align(fq_in='example_for.fastq', ref='example_genome.fa', bam_out='example_for.bam', aligner="minimap2")
-    """
-    # set with the name of the unaligned reads :
-    remaining_reads = set()
-    total_reads = 0
-    # Store path of SAM containing aligned reads at each iteration.
-    iter_out = []
-
-    # If there is already a file with the same name as the output file,
-    # remove it. Otherwise, ignore.
-    with contextlib.suppress(FileNotFoundError):
-        try:
-            os.remove(bam_out)
-        except IsADirectoryError:
-            logger.error("You need to give the BAM output file, not a folder.")
-            raise
-
-    # Bowtie only accepts uncompressed fastq: uncompress it into a temp file
-    if aligner == "bowtie2" and hio.is_compressed(fq_in):
-        uncomp_path = join(tmp_dir, os.path.basename(fq_in) + ".tmp")
-        with hio.read_compressed(fq_in) as inf:
-            with open(uncomp_path, "w") as uncomp:
-                st.copyfileobj(inf, uncomp)
-    else:
-        uncomp_path = fq_in
-
-    # throw error if index does not exist
-    index = hio.check_fasta_index(ref, mode=aligner)
-    if index is None:
-        logger.error(
-            f"Reference index is missing, please build the {aligner} index first."
-        )
-        sys.exit(1)
-    # Counting reads
-    with hio.read_compressed(uncomp_path) as inf:
-        for _ in inf:
-            total_reads += 1
-    total_reads /= 4
-
-    # Use first read to guess read length if not provided.
-    if read_len is None:
-        with hio.read_compressed(uncomp_path) as inf:
-            # Skip first line (read header)
-            size = inf.readline()
-            # Stripping newline from sequence line.
-            read_len = len(inf.readline().rstrip())
-
-    # initial length of the fragments to align
-    # In case reads are shorter than provided min_len
-    if read_len > min_len:
-        n = min_len
-    else:
-        logger.warning(
-            "min_len is longer than the reads. Iterative mapping will have no effect."
-        )
-        n = read_len
-    logger.info("{0} reads to parse".format(int(total_reads)))
-
-    first_round = True
-    # iterative alignment per se
-    while n <= read_len:
-        logger.info(
-            "Truncating unaligned reads to {size}bp and mapping{again}.".format(
-                size=int(n), again="" if first_round else " again"
-            )
-        )
-        iter_out += [join(tmp_dir, "trunc_{0}.bam".format(str(n)))]
-        # Generate a temporary input fastq file with the n first nucleotids
-        # of the reads.
-        truncated_reads = truncate_reads(
-            tmp_dir, uncomp_path, remaining_reads, n, first_round
-        )
-
-        # Align the truncated reads on reference genome
-        temp_alignment = join(tmp_dir, "temp_alignment.bam")
-        map_args = {
-            "fa": ref,
-            "cpus": n_cpu,
-            "fq": truncated_reads,
-            "idx": index,
-            "bam": temp_alignment,
-        }
-        if re.match(r"^(minimap[2]?|mm[2]?)$", aligner, flags=re.IGNORECASE):
-            cmd = "minimap2 -x sr -a -t {cpus} {fa} {fq}".format(**map_args)
-        elif re.match(r"^(bwa)$", aligner, flags=re.IGNORECASE):
-            cmd = "bwa mem -t {cpus} -v 1 {idx} {fq}".format(**map_args)
-        elif re.match(r"^(bowtie[2]?|bt[2]?)$", aligner, flags=re.IGNORECASE):
-            cmd = (
-                "bowtie2 -x {idx} -p {cpus} --quiet --very-sensitive-local -U {fq}"
-            ).format(**map_args)
-        else:
-            raise ValueError(
-                "Unknown aligner. Select bowtie2, minimap2 or bwa."
-            )
-
-        map_process = sp.Popen(cmd, shell=True, stdout=sp.PIPE)
-        sort_process = sp.Popen(
-            "samtools sort -n -@ {cpus} -O BAM -o {bam}".format(**map_args),
-            shell=True,
-            stdin=map_process.stdout,
-        )
-        out, err = sort_process.communicate()
-
-        # filter the reads: the reads whose truncated end was aligned are written
-        # to the output file.
-        # The reads whose truncated end was not aligned are kept for the next round.
-        remaining_reads = filter_bamfile(temp_alignment, iter_out[-1], min_qual)
-
-        n += 20
-        first_round = False
-
-    # one last round without trimming
-    logger.info(
-        "Trying to map unaligned reads at full length ({0}bp).".format(
-            int(read_len)
-        )
-    )
-
-    truncated_reads = truncate_reads(
-        tmp_dir,
-        infile=uncomp_path,
-        unaligned_set=remaining_reads,
-        trunc_len=n,
-        first_round=first_round,
-    )
-    if aligner == "minimap2" or aligner == "Minimap2":
-        cmd = "minimap2 -x sr -a -t {cpus} {fa} {fq}".format(
-            fa=ref, cpus=n_cpu, fq=truncated_reads
-        )
-    elif aligner == "bwa" or aligner == "Bwa" or aligner == "BWA":
-        cmd = "bwa mem -v 1 -t {cpus} {idx} {fq}".format(
-            idx=index, cpus=n_cpu, fq=truncated_reads
-        )
-    else:
-        cmd = (
-            "bowtie2 -x {idx} -p {cpus} --quiet " "--very-sensitive {fq}"
-        ).format(idx=index, cpus=n_cpu, fq=truncated_reads)
-    map_process = sp.Popen(cmd, shell=True, stdout=sp.PIPE)
-    # Keep reads sorted by name
-    sort_process = sp.Popen(
-        "samtools sort -n -@ {cpus} -O BAM -o {bam}".format(
-            cpus=n_cpu, bam=temp_alignment
-        ),
-        shell=True,
-        stdin=map_process.stdout,
-    )
-    out, err = sort_process.communicate()
-    iter_out += [join(tmp_dir, "trunc_{0}.bam".format(str(n)))]
-    remaining_reads = filter_bamfile(temp_alignment, iter_out[-1], min_qual)
-
-    # Report unaligned reads as well
-    iter_out += [join(tmp_dir, "unaligned.bam")]
-    temp_bam = ps.AlignmentFile(temp_alignment, "rb", check_sq=False)
-    unmapped = ps.AlignmentFile(iter_out[-1], "wb", template=temp_bam)
-    for r in temp_bam:
-        # Do not write supplementary alignments (keeping 1 alignment/read)
-        if r.query_name in remaining_reads and not r.is_supplementary:
-            unmapped.write(r)
-    unmapped.close()
-    temp_bam.close()
-
-    # Merge all aligned reads and unmapped reads into a single bam
-    ps.merge("-n", "-O", "BAM", "-@", str(n_cpu), bam_out, *iter_out)
-    logger.info(
-        "{0} reads aligned / {1} total reads.".format(
-            int(total_reads - len(remaining_reads)), int(total_reads)
-        )
-    )
-
-    return 0
-
-
-def truncate_reads(tmp_dir, infile, unaligned_set, trunc_len, first_round):
-    """Trim read ends
-
-    Writes the n first nucleotids of each sequence in infile to an auxiliary.
-    file in the temporary folder.
-
-    Parameters
-    ----------
-
-    tmp_dir : str
-        Path to the temporary folder.
-    infile : str
-        Path to the fastq file to truncate.
-    unaligned_set : set
-        Contains the names of all reads that did not map unambiguously in
-        previous rounds.
-    trunc_len : int
-        The number of basepairs to keep in each truncated sequence.
-    first_round : bool
-        If this is the first round, truncate all reads without checking mapping.
-
-    Returns
-    -------
-
-    str :
-        Path to the output fastq file containing truncated reads.
-    """
-
-    outfile = "{0}/truncated.fastq".format(tmp_dir)
-    with ps.FastxFile(infile, "r") as inf, open(outfile, "w") as outf:
-        for entry in inf:
-            # If the read did not align in previous round or this is the first round
-            if (entry.name in unaligned_set) or first_round:
-                entry.sequence = entry.sequence[:trunc_len]
-                entry.quality = entry.quality[:trunc_len]
-                outf.write(str(entry) + "\n")
-    return outfile
-
-
-def filter_bamfile(temp_alignment, filtered_out, min_qual=30):
-    """Filter alignment BAM files
-
-    Reads all the reads in the input BAM alignment file.
-    Write reads to the output file if they are aligned with a good
-    quality, otherwise add their name in a set to stage them for the next round
-    of alignment.
-
-    Parameters
-    ----------
-    temp_alignment : str
-        Path to the input temporary alignment.
-    outfile : str
-        Path to the output filtered temporary alignment.
-    min_qual : int
-        Minimum mapping quality required to keep a Hi-C pair.
-
-    Returns
-    -------
-    set:
-        Contains the names reads that did not align.
-    """
-    # Check the quality and status of each aligned fragment.
-    # Write the ones with good quality in the final output file.
-    # Keep those that do not map unambiguously for the next round.
-
-    unaligned = set()
-    temp_bam = ps.AlignmentFile(temp_alignment, "rb", check_sq=False)
-    outf = ps.AlignmentFile(filtered_out, "wb", template=temp_bam)
-    for r in temp_bam:
-        if r.flag in [0, 16] and r.mapping_quality >= min_qual:
-            outf.write(r)
-        else:
-            unaligned.add(r.query_name)
-
-    logger.info("{0} reads left to map.".format(len(unaligned)))
-    temp_bam.close()
-    outf.close()
-
-    return unaligned
+import hicstuff.iteralign as hi
 
 if __name__ == "__main__":
     parser = argparse.ArgumentParser()
@@ -349,4 +51,4 @@ if __name__ == "__main__":
     else:
         read_len = int(read_len)
 
-    iterative_align(fq_in=reads, tmp_dir=tmp_dir, ref=ref, n_cpu=n_cpu, bam_out=bam_out, min_qual=min_qual, read_len=read_len)
+    hi.iterative_align(fq_in=reads, tmp_dir=tmp_dir, ref=ref, n_cpu=n_cpu, bam_out=bam_out, min_qual=min_qual, read_len=read_len)
diff --git a/bin/hicstuff_log.py b/bin/hicstuff_log.py
deleted file mode 100644
index 171a5665ed1ea81371dcd39b28cc701f6cc7cb86..0000000000000000000000000000000000000000
--- a/bin/hicstuff_log.py
+++ /dev/null
@@ -1,89 +0,0 @@
-#!/usr/bin/env python3
-# coding: utf-8
-
-"""Basic logging
-Basic logging setup with three main handlers (stdout, log file and optionally
-texting with the right API). By default the log file is disabled, but can be
-enabled or changed using set_file_handler.
-"""
-
-
-import logging
-import logging.handlers
-import requests
-import json
-
-TEXT_CREDENTIALS_DEFAULT_PATH = "text_credentials.json"
-
-CURRENT_LOG_LEVEL = logging.INFO
-
-
-logging.captureWarnings(True)
-
-logger = logging.getLogger()
-logger.setLevel(CURRENT_LOG_LEVEL)
-
-logfile_formatter = logging.Formatter(
-    "%(asctime)s :: %(levelname)s :: %(message)s", datefmt="%Y-%m-%d,%H:%M:%S"
-)
-stdout_formatter = logging.Formatter("%(levelname)s :: %(message)s")
-text_formatter = logging.Formatter("%(message)s")
-
-# file_handler = logging.FileHandler("hicstuff.log", "a")
-
-# file_handler.setLevel(logging.INFO)
-# file_handler.setFormatter(logfile_formatter)
-# logger.addHandler(file_handler)
-
-stream_handler = logging.StreamHandler()
-stream_handler.setLevel(logging.DEBUG)
-stream_handler.setFormatter(stdout_formatter)
-logger.addHandler(stream_handler)
-
-
-def set_file_handler(log_path, formatter=logfile_formatter):
-    """Change the file handler for custom log file location"""
-
-    filehandler = logging.FileHandler(log_path, "a")
-    filehandler.setLevel(logging.INFO)
-    filehandler.setFormatter(formatter)
-    for hdlr in logger.handlers[:]:  # remove the existing file handlers
-        if isinstance(hdlr, logging.FileHandler):
-            logger.removeHandler(hdlr)
-    logger.addHandler(filehandler)  # set the new handler
-
-
-def setup_text_logging(credentials=TEXT_CREDENTIALS_DEFAULT_PATH):
-    """Setup text logging
-    Setup text notifications on errors with a basic API. In order for it to
-    work, a file named 'text_credentials.json' must be in the directory where
-    a command is run. The logger performs a GET request to a text notification
-    API.
-    Parameters
-    ----------
-    credentials : str or pathlib.Path
-        A JSON file with necessary credentials for the GET request
-    """
-
-    with open(credentials) as cred_handle:
-        cred_dict = json.load(cred_handle)
-
-    api_service = cred_dict.pop("api_service")
-
-    class TextHandler(logging.Handler):
-        def emit(self, record):
-            log_entry = self.format(record)
-            cred_dict["msg"] = log_entry
-            return requests.get(api_service, cred_dict)
-
-    sms_handler = TextHandler()
-    sms_handler.setLevel(logging.ERROR)
-    sms_handler.setFormatter(text_formatter)
-
-    logger.addHandler(sms_handler)
-
-
-try:
-    setup_text_logging(TEXT_CREDENTIALS_DEFAULT_PATH)
-except FileNotFoundError:
-    pass
diff --git a/bin/hicstuff_stats.py b/bin/hicstuff_stats.py
deleted file mode 100644
index 7fd1976162025e42db6c14c013473a6890716e24..0000000000000000000000000000000000000000
--- a/bin/hicstuff_stats.py
+++ /dev/null
@@ -1,184 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-
-import re
-import os
-from os.path import basename
-from os.path import splitext
-import json
-
-def get_pipeline_stats(log_file):
-    """Get stats after pipeline execution.
-
-    Parameters
-    ----------
-    log_file : str
-        Path to hicstuff log file.
-
-    Returns
-    -------
-    list:
-        A single list containing stats about hicstuff pipeline execution:
-        - Number of sequenced pairs from fastqs
-        - Number (% of total reads) of unmapped reads
-        - Number (% of total reads) of mapped reads
-        - Number (% of total reads) of pairs
-        - Number (% of total pairs) of filtered pairs
-            - Number (% of total pairs) of loops
-            - Number (% of total pairs) of uncuts
-            - Number (% of total pairs) of abnormal (-- and ++)
-        - Number (% of total pairs) of deduplicated pairs [Number (% of total pairs) of PCR duplicates]
-        - From all pairs used in contact matrix:
-            - Number (% of matrix) of cis pairs
-            - Number (% of matrix) of trans pairs
-            - Trans ratio
-    """
-
-    with open(log_file) as file:
-        log_lines = [line.rstrip() for line in file]
-
-    # 1. Number of sequenced pairs from fastqs
-    fastq_pairs = [s for s in log_lines if re.search("reads found in each", s)][0]
-    fastq_pairs = re.sub(".*INFO :: ", "", fastq_pairs)
-    fastq_pairs = re.sub(" reads found in each.*", "", fastq_pairs)
-    fastq_pairs = int(float(fastq_pairs))
-
-    # 2. Number (% of total) of (un)mapped reads
-    tot_mapped = [s for s in log_lines if re.search("mapped with Q ", s)][0]
-    tot_mapped = re.sub(".*Q >= 30 \(", "", tot_mapped)
-    tot_mapped = re.sub("/.*", "", tot_mapped)
-    tot_mapped = int(float(tot_mapped))
-    tot_unmapped = fastq_pairs*2 - tot_mapped
-
-    # 3. Number (% of total) of pairs
-    tot_pairs = [s for s in log_lines if re.search("pairs successfully mapped", s)][0]
-    tot_pairs = re.sub(".*INFO :: ", "", tot_pairs)
-    tot_pairs = re.sub(" pairs successfully.*", "", tot_pairs)
-    tot_pairs = int(float(tot_pairs))
-
-    # 4. Number (% of total) of filtered pairs
-    filtered_pairs = [s for s in log_lines if re.search("pairs discarded:", s)]
-    if (len(filtered_pairs) > 0):
-        filtered_pairs = filtered_pairs[0]
-        loops_pairs = re.sub(".*Loops: ", "", filtered_pairs)
-        loops_pairs = re.sub(", Uncuts:.*", "", loops_pairs)
-        loops_pairs = int(float(loops_pairs))
-        uncuts_pairs = re.sub(".*Uncuts: ", "", filtered_pairs)
-        uncuts_pairs = re.sub(", Weirds:.*", "", uncuts_pairs)
-        uncuts_pairs = int(float(uncuts_pairs))
-        abnormal_pairs = re.sub(".*Weirds: ", "", filtered_pairs)
-        abnormal_pairs = int(float(abnormal_pairs))
-        filtered_pairs = re.sub(".*INFO :: ", "", filtered_pairs)
-        filtered_pairs = re.sub(" pairs discarded.*", "", filtered_pairs)
-        filtered_pairs = int(float(filtered_pairs))
-    else:
-        loops_pairs=0
-        uncuts_pairs=0
-        abnormal_pairs=0
-        filtered_pairs=0
-
-    # 5. Number (% of total) of PCR duplicates pairs
-    pcr_pairs = [s for s in log_lines if re.search("PCR duplicates have been filtered", s)]
-    if (len(pcr_pairs) > 0):
-        pcr_pairs = pcr_pairs[0]
-        pcr_pairs = re.sub(".*have been filtered out \(", "", pcr_pairs)
-        pcr_pairs = re.sub(" / .*", "", pcr_pairs)
-        pcr_pairs = int(float(pcr_pairs))
-    else:
-        pcr_pairs = 0
-
-    # 6. Number (%) of final pairs
-    removed_pairs=filtered_pairs+pcr_pairs
-    final_pairs=tot_pairs-removed_pairs
-
-    # 7. Final stats
-    stats = {
-        'Sample': re.sub(".hicstuff.*", "", basename(log_file)),
-        'Total read pairs': fastq_pairs,
-        'Mapped reads': tot_mapped,
-        'Unmapped reads': tot_unmapped,
-        'Recovered contacts': tot_pairs,
-        'Final contacts': final_pairs,
-        'Removed contacts': removed_pairs,
-        'Filtered out': filtered_pairs,
-        'Loops': loops_pairs,
-        'Uncuts': uncuts_pairs,
-        'Weirds': abnormal_pairs,
-        'PCR duplicates': pcr_pairs
-    }
-
-    return(stats)
-
-def write_pipeline_stats(stats, out_file):
-    prefix = stats['Sample']
-    fastq_pairs=stats['Total read pairs']
-    tot_mapped=stats['Mapped reads']
-    tot_unmapped=stats['Unmapped reads']
-    tot_pairs=stats['Recovered contacts']
-    final_pairs=stats['Final contacts']
-    removed_pairs=stats['Removed contacts']
-    filtered_pairs=stats['Filtered out']
-    loops_pairs=stats['Loops']
-    uncuts_pairs=stats['Uncuts']
-    abnormal_pairs=stats['Weirds']
-    pcr_pairs=stats['PCR duplicates']
-    pct_pairs = round(tot_pairs/fastq_pairs*100, 2)
-    pct_mapped = round(tot_mapped/(fastq_pairs*2)*100, 2)
-    pct_unmapped = round(tot_unmapped/(fastq_pairs*2)*100, 2)
-    pct_filtered = round(filtered_pairs/tot_pairs*100, 2)
-    pct_loops_pairs = round(loops_pairs/tot_pairs*100, 2)
-    pct_uncuts_pairs = round(uncuts_pairs/tot_pairs*100, 2)
-    pct_abnormal_pairs = round(abnormal_pairs/tot_pairs*100, 2)
-    pct_pcr = round(pcr_pairs/tot_pairs*100, 2)
-    pct_removed=round(removed_pairs/tot_pairs*100, 2)
-    pct_final = round(final_pairs/tot_pairs*100, 2)
-
-    # Open the log file and read its contents
-    _, file_extension = splitext(out_file)
-    if (file_extension == '.json'):
-        with open(out_file, 'w') as json_file:
-            json.dump(stats, json_file, indent=4)
-    else:
-        with open(out_file, 'w') as stats_file:
-            stats_file.write("Sample:             {prefix}\n".format(prefix = prefix))
-            stats_file.write("Total read pairs:   {reads}\n".format(reads = "{:,}".format(fastq_pairs)))
-            stats_file.write("Mapped reads:       {tot_mapped} ({pct_mapped}%  of all reads)\n".format(
-                    tot_mapped = "{:,}".format(tot_mapped),
-                    pct_mapped = pct_mapped
-                )
-            )
-            stats_file.write("Unmapped reads:     {tot_unmapped} ({pct_unmapped}%  of all reads)\n".format(
-                tot_unmapped = "{:,}".format(tot_unmapped), pct_unmapped = pct_unmapped
-            ))
-            stats_file.write("Recovered contacts: {tot_pairs} ({pct_pairs}%  of all read pairs)\n".format(
-                tot_pairs = "{:,}".format(tot_pairs), pct_pairs = pct_pairs
-            ))
-            stats_file.write("Removed contacts:   {removed_pairs} ({pct_removed}% of all contacts)\n".format(
-                removed_pairs = "{:,}".format(removed_pairs), pct_removed = pct_removed
-            ))
-            stats_file.write("  Filtered out:     {filtered_pairs} ({pct_filtered}% of all contacts)\n".format(
-                filtered_pairs = "{:,}".format(filtered_pairs), pct_filtered = pct_filtered
-            ))
-            stats_file.write("    Loops:          {loops_pairs} ({pct_loops_pairs}% of all contacts)\n".format(
-                loops_pairs = "{:,}".format(loops_pairs), pct_loops_pairs = pct_loops_pairs
-            ))
-            stats_file.write("    Uncuts:         {uncuts_pairs} ({pct_uncuts_pairs}% of all contacts)\n".format(
-                uncuts_pairs = "{:,}".format(uncuts_pairs), pct_uncuts_pairs = pct_uncuts_pairs
-            ))
-            stats_file.write("    Weirds:         {abnormal_pairs} ({pct_abnormal_pairs}% of all contacts)\n".format(
-                abnormal_pairs = "{:,}".format(abnormal_pairs), pct_abnormal_pairs = pct_abnormal_pairs
-            ))
-            stats_file.write("  PCR duplicates:   {pcr_pairs} ({pct_pcr}% of all contacts)\n".format(
-                pcr_pairs = "{:,}".format(pcr_pairs), pct_pcr = pct_pcr
-            ))
-            stats_file.write("Final contacts:     {final_pairs} ({pct_final}% of all contacts)\n".format(
-            final_pairs = "{:,}".format(final_pairs), pct_final = pct_final
-        ))
-
-def print_pipeline_stats(stats):
-    tmp = '.'+stats['Sample']+'.txt'
-    write_pipeline_stats(stats, tmp)
-    with open(tmp, 'r') as file:
-        lines = [line for line in file]
-        print(''.join(lines))
-    os.unlink(tmp)
diff --git a/bin/hicstuff_version.py b/bin/hicstuff_version.py
deleted file mode 100644
index 29e4a9413d67347053bffff8555e4e60ec644686..0000000000000000000000000000000000000000
--- a/bin/hicstuff_version.py
+++ /dev/null
@@ -1 +0,0 @@
-__version__ = '3.2.2'
diff --git a/modules/local/hicstuff/bam2pairs.nf b/modules/local/hicstuff/bam2pairs.nf
index d4f279d6030db15ee66b3cb9d57e048794dac3c0..a12346f0da90086bd43a2caafd634e2292ea52f2 100644
--- a/modules/local/hicstuff/bam2pairs.nf
+++ b/modules/local/hicstuff/bam2pairs.nf
@@ -2,8 +2,7 @@ process BAM2PAIRS {
     tag "$meta1.id"
     label 'process_high'
 
-    conda "conda-forge::python=3.9 conda-forge::biopython=1.80 conda-forge::numpy=1.22.3 conda-forge::matplotlib=3.6.3 conda-forge::pandas=1.5.3"
-    container = "docker.io/lbmc/hicstuff:3.1.3"
+    container = "docker.io/lbmc/hicstuff:3.2.3"
 
     input:
     tuple val(meta1), path(bam1), val(meta2), path(bam2)
diff --git a/modules/local/hicstuff/build_matrix.nf b/modules/local/hicstuff/build_matrix.nf
index 5351c83d983fd9241b8185dd62d746bce273f453..0f9384dcb2f7a80cdade94c0cad6742c4990d10d 100644
--- a/modules/local/hicstuff/build_matrix.nf
+++ b/modules/local/hicstuff/build_matrix.nf
@@ -2,8 +2,7 @@ process BUILD_MATRIX {
     tag "$meta1.id"
     label 'process_high'
 
-    conda "conda-forge::python=3.9 conda-forge::biopython=1.80 conda-forge::numpy=1.22.3 conda-forge::matplotlib=3.6.3 conda-forge::pandas=1.5.3"
-    container = "docker.io/lbmc/hicstuff:3.1.3"
+    container = "docker.io/lbmc/hicstuff:3.2.3"
 
     input:
     tuple val(meta1), path(idx_pairs)
diff --git a/modules/local/hicstuff/build_matrix_cool.nf b/modules/local/hicstuff/build_matrix_cool.nf
index b63e931e23cef0bde43f8132df7ecc5de6fe6470..2b8ced046660aa44615d507272426d6b808d985c 100644
--- a/modules/local/hicstuff/build_matrix_cool.nf
+++ b/modules/local/hicstuff/build_matrix_cool.nf
@@ -2,8 +2,7 @@ process BUILD_MATRIX_COOL {
     tag "$meta1.id"
     label 'process_single'
 
-    conda "conda-forge::python=3.9 conda-forge::biopython=1.80 conda-forge::numpy=1.22.3 conda-forge::matplotlib=3.6.3 conda-forge::pandas=1.5.3"
-    container = "docker.io/lbmc/hicstuff:3.1.3"
+    container = "docker.io/lbmc/hicstuff:3.2.3"
 
     input:
     tuple val(meta1), path(idx_pairs)
diff --git a/modules/local/hicstuff/cutsite.nf b/modules/local/hicstuff/cutsite.nf
index 161eed1a8e7039ed2e416f5d80277125a6feefb9..c17dff7c009b2ac5b9ca6fad8794122551123d28 100644
--- a/modules/local/hicstuff/cutsite.nf
+++ b/modules/local/hicstuff/cutsite.nf
@@ -2,8 +2,7 @@ process CUTSITE {
     tag "$meta1.id"
     label 'process_high'
 
-    conda "conda-forge::python=3.9 conda-forge::biopython=1.80 conda-forge::numpy=1.22.3 conda-forge::matplotlib=3.6.3 conda-forge::pandas=1.5.3"
-    container = "docker.io/lbmc/hicstuff:3.1.3"
+    container = "docker.io/lbmc/hicstuff:3.2.3"
 
     input:
     tuple val(meta1), path(reads1), val(meta2), path(reads2)
diff --git a/modules/local/hicstuff/distance_law.nf b/modules/local/hicstuff/distance_law.nf
index 9a6ad8e85297314df593eec10f888e3ff1f89e2c..0035be64aedeea1d0b498b25ffcb50532cd86b92 100644
--- a/modules/local/hicstuff/distance_law.nf
+++ b/modules/local/hicstuff/distance_law.nf
@@ -2,8 +2,7 @@ process DISTANCE_LAW {
     tag "$meta1.id"
     label 'process_high'
 
-    conda "conda-forge::python=3.9 conda-forge::biopython=1.80 conda-forge::numpy=1.22.3 conda-forge::matplotlib=3.6.3 conda-forge::pandas=1.5.3"
-    container = "docker.io/lbmc/hicstuff:3.1.3"
+    container = "docker.io/lbmc/hicstuff:3.2.3"
 
     input:
     tuple val(meta1), path(idx_pairs)
diff --git a/modules/local/hicstuff/filter_event.nf b/modules/local/hicstuff/filter_event.nf
index f6b0cde27bcf0ffdc1df0f567c18b44675b56c31..1e9ea836eeffb4218c9cbd36877dd381ef027350 100644
--- a/modules/local/hicstuff/filter_event.nf
+++ b/modules/local/hicstuff/filter_event.nf
@@ -2,8 +2,7 @@ process FILTER_EVENT {
     tag "$meta1.id"
     label 'process_high'
 
-    conda "conda-forge::python=3.9 conda-forge::biopython=1.80 conda-forge::numpy=1.22.3 conda-forge::matplotlib=3.6.3 conda-forge::pandas=1.5.3"
-    container = "docker.io/lbmc/hicstuff:3.1.3"
+    container = "docker.io/lbmc/hicstuff:3.2.3"
 
     input:
     tuple val(meta1), path(idx_pairs)
diff --git a/modules/local/hicstuff/filter_pcr.nf b/modules/local/hicstuff/filter_pcr.nf
index 0b6800ffa0f28f3b5c9a532f10e1ab23129749d3..0824c79be6f17f78a515bda74d3f5122e87c3f70 100644
--- a/modules/local/hicstuff/filter_pcr.nf
+++ b/modules/local/hicstuff/filter_pcr.nf
@@ -2,8 +2,7 @@ process FILTER_PCR {
     tag "$meta1.id"
     label 'process_high'
 
-    conda "conda-forge::python=3.9 conda-forge::biopython=1.80 conda-forge::numpy=1.22.3 conda-forge::matplotlib=3.6.3 conda-forge::pandas=1.5.3"
-    container = "docker.io/lbmc/hicstuff:3.1.3"
+    container = "docker.io/lbmc/hicstuff:3.2.3"
 
     input:
     tuple val(meta1), path(idx_pairs)
diff --git a/modules/local/hicstuff/fragment_enzyme.nf b/modules/local/hicstuff/fragment_enzyme.nf
index b6d044a2fda16e73b433e2512f09ef69070b8db8..d8c5a09618405d44ebd81897818320d64b9e74b7 100644
--- a/modules/local/hicstuff/fragment_enzyme.nf
+++ b/modules/local/hicstuff/fragment_enzyme.nf
@@ -2,8 +2,7 @@ process FRAGMENT_ENZYME {
     tag "$digestion"
     label "process_single"
 
-    conda "conda-forge::python=3.9 conda-forge::biopython=1.80 conda-forge::numpy=1.22.3 conda-forge::matplotlib=3.6.3 conda-forge::pandas=1.5.3"
-    container = "docker.io/lbmc/hicstuff:3.1.3"
+    container = "docker.io/lbmc/hicstuff:3.2.3"
 
     input:
     val(digestion)
diff --git a/modules/local/hicstuff/iteralign.nf b/modules/local/hicstuff/iteralign.nf
index d455a7c1bc940dd0ba20be384e9e54c93a0d45c4..91f1d2930e9ff01fb89257d82e1bb79f0ae229cf 100644
--- a/modules/local/hicstuff/iteralign.nf
+++ b/modules/local/hicstuff/iteralign.nf
@@ -2,8 +2,7 @@ process ITERALIGN {
     tag "$meta.id"
     label "process_high"
 
-    conda "conda-forge::python=3.9 conda-forge::biopython=1.80 conda-forge::numpy=1.22.3 conda-forge::matplotlib=3.6.3 conda-forge::pandas=1.5.3"
-    container = "docker.io/lbmc/hicstuff:3.1.3"
+    container = "docker.io/lbmc/hicstuff:3.2.3"
 
     input:
     tuple val(meta), path(reads)