From 6fbdb2eb5aa1af6583dbe68bc343bfd866f60577 Mon Sep 17 00:00:00 2001
From: Edmund Miller <edmund.a.miller@protonmail.com>
Date: Wed, 4 Jan 2023 08:17:08 -0600
Subject: [PATCH] [LINT] Run black

---
 bin/digest_genome.py         | 101 +++++-----
 bin/mapped_2hic_dnase.py     | 222 ++++++++++++---------
 bin/mapped_2hic_fragments.py | 366 +++++++++++++++++++++--------------
 bin/mergeSAM.py              | 181 +++++++++++------
 bin/merge_statfiles.py       |  34 ++--
 5 files changed, 543 insertions(+), 361 deletions(-)

diff --git a/bin/digest_genome.py b/bin/digest_genome.py
index 2c29a49..9f05b45 100755
--- a/bin/digest_genome.py
+++ b/bin/digest_genome.py
@@ -18,15 +18,11 @@ import os
 import sys
 import numpy as np
 
-RE_cutsite = {
-    "mboi": ["^GATC"],
-    "dpnii": ["^GATC"],
-    "bglii": ["A^GATCT"],
-    "hindiii": ["A^AGCTT"]}
+RE_cutsite = {"mboi": ["^GATC"], "dpnii": ["^GATC"], "bglii": ["A^GATCT"], "hindiii": ["A^AGCTT"]}
 
 
 def find_re_sites(filename, sequences, offset):
-    with open(filename, 'r') as infile:
+    with open(filename, "r") as infile:
         chr_id = None
         big_str = ""
         indices = []
@@ -40,13 +36,12 @@ def find_re_sites(filename, sequences, offset):
                 # If this is not the first chromosome, find the indices and append
                 # them to the list
                 if chr_id is not None:
-                     for rs in range(len(sequences)):
-                         pattern = "(?={})".format(sequences[rs].lower())
-                         indices += [m.start() + offset[rs]\
-                         for m in re.finditer(pattern, big_str)]
-                     indices.sort()
-                     all_indices.append(indices)
-                     indices = []
+                    for rs in range(len(sequences)):
+                        pattern = "(?={})".format(sequences[rs].lower())
+                        indices += [m.start() + offset[rs] for m in re.finditer(pattern, big_str)]
+                    indices.sort()
+                    all_indices.append(indices)
+                    indices = []
 
                 # This is a new chromosome. Empty the sequence string, and add the
                 # correct chrom id
@@ -63,11 +58,10 @@ def find_re_sites(filename, sequences, offset):
         # Add the indices for the last chromosome
         for rs in range(len(sequences)):
             pattern = "(?={})".format(sequences[rs].lower())
-            indices += [m.start() + offset[rs]
-                        for m in re.finditer(pattern, big_str)]
+            indices += [m.start() + offset[rs] for m in re.finditer(pattern, big_str)]
         indices.sort()
         all_indices.append(indices)
-    
+
     return contig_names, all_indices
 
 
@@ -75,7 +69,7 @@ def find_chromsomose_lengths(reference_filename):
     chromosome_lengths = []
     chromosome_names = []
     length = None
-    with open(reference_filename, 'r') as infile:
+    with open(reference_filename, "r") as infile:
         for line in infile:
             if line.startswith(">"):
                 chromosome_names.append(line[1:].strip())
@@ -89,11 +83,11 @@ def find_chromsomose_lengths(reference_filename):
 
 
 def replaceN(cs):
-    npos = int(cs.find('N'))
+    npos = int(cs.find("N"))
     cseql = []
     if npos != -1:
-        for nuc in ["A","C","G","T"]:
-            tmp = cs.replace('N', nuc, 1)
+        for nuc in ["A", "C", "G", "T"]:
+            tmp = cs.replace("N", nuc, 1)
             tmpl = replaceN(tmp)
             if type(tmpl) == list:
                 cseql = cseql + tmpl
@@ -106,50 +100,59 @@ def replaceN(cs):
 
 if __name__ == "__main__":
     parser = argparse.ArgumentParser()
-    parser.add_argument('fastafile')
-    parser.add_argument('-r', '--restriction_sites',
-                        dest='res_sites',
-                        nargs='+',
-                        help=("The cutting position has to be specified using "
-                              "'^'. For instance, -r A^AGCTT for HindIII "
-                              "digestion. Several restriction enzyme can be "
-                              "specified."))
-    parser.add_argument('-o', '--out', default=None)
+    parser.add_argument("fastafile")
+    parser.add_argument(
+        "-r",
+        "--restriction_sites",
+        dest="res_sites",
+        nargs="+",
+        help=(
+            "The cutting position has to be specified using "
+            "'^'. For instance, -r A^AGCTT for HindIII "
+            "digestion. Several restriction enzyme can be "
+            "specified."
+        ),
+    )
+    parser.add_argument("-o", "--out", default=None)
     args = parser.parse_args()
 
     filename = args.fastafile
     out = args.out
-    
+
     # Split restriction sites if comma-separated
-    cutsites=[]
+    cutsites = []
     for s in args.res_sites:
-        for m in s.split(','):
+        for m in s.split(","):
             cutsites.append(m)
-                
+
     # process args and get restriction enzyme sequences
     sequences = []
     offset = []
     for cs in cutsites:
         if cs.lower() in RE_cutsite:
-            cseq = ''.join(RE_cutsite[cs.lower()])
+            cseq = "".join(RE_cutsite[cs.lower()])
         else:
             cseq = cs
 
-        offpos = int(cseq.find('^'))
+        offpos = int(cseq.find("^"))
         if offpos == -1:
-            print("Unable to detect offset for {}. Please, use '^' to specify the cutting position,\
-                   i.e A^GATCT for HindIII digestion.".format(cseq))
+            print(
+                "Unable to detect offset for {}. Please, use '^' to specify the cutting position,\
+                   i.e A^GATCT for HindIII digestion.".format(
+                    cseq
+                )
+            )
             sys.exit(-1)
 
         for nuc in list(set(cs)):
-            if nuc not in ['A','T','G','C','N','^']:
+            if nuc not in ["A", "T", "G", "C", "N", "^"]:
                 print("Find unexpected character ['{}']in restriction motif".format(nuc))
                 print("Note that multiple motifs should be separated by a space (not a comma !)")
 
                 sys.exit(-1)
 
         offset.append(offpos)
-        sequences.append(re.sub('\^', '', cseq))
+        sequences.append(re.sub("\^", "", cseq))
 
     # replace all N in restriction motif
     sequences_without_N = []
@@ -158,32 +161,32 @@ if __name__ == "__main__":
         nrs = replaceN(sequences[rs])
         sequences_without_N = sequences_without_N + nrs
         offset_without_N = offset_without_N + [offset[rs]] * len(nrs)
-          
+
     sequences = sequences_without_N
     offset = offset_without_N
-    
+
     if out is None:
         out = os.path.splitext(filename)[0] + "_fragments.bed"
 
     print("Analyzing", filename)
     print("Restriction site(s)", ",".join(sequences))
-    print("Offset(s)",  ','.join(str(x) for x in offset))
+    print("Offset(s)", ",".join(str(x) for x in offset))
 
     # Read fasta file and look for rs per chromosome
-    contig_names, all_indices = find_re_sites(filename, sequences,  offset=offset)
+    contig_names, all_indices = find_re_sites(filename, sequences, offset=offset)
     _, lengths = find_chromsomose_lengths(filename)
 
     valid_fragments = []
     for i, indices in enumerate(all_indices):
         valid_fragments_chr = np.concatenate(
-            [np.concatenate([[0], indices])[:, np.newaxis],
-             np.concatenate([indices, [lengths[i]]])[:, np.newaxis]],
-            axis=1)
+            [np.concatenate([[0], indices])[:, np.newaxis], np.concatenate([indices, [lengths[i]]])[:, np.newaxis]],
+            axis=1,
+        )
         valid_fragments.append(valid_fragments_chr)
 
     # Write results
     print("Writing to {} ...".format(out))
-    with open(out, 'w') as outfile:
+    with open(out, "w") as outfile:
         for chrom_name, indices in zip(contig_names, valid_fragments):
             frag_id = 0
             for begin, end in indices:
@@ -192,4 +195,6 @@ if __name__ == "__main__":
                 if end > begin:
                     frag_id += 1
                     frag_name = "HIC_{}_{}".format(str(chrom_name), int(frag_id))
-                    outfile.write("{}\t{}\t{}\t{}\t0\t+\n".format(str(chrom_name), int(begin), int(end), str(frag_name)))
+                    outfile.write(
+                        "{}\t{}\t{}\t{}\t0\t+\n".format(str(chrom_name), int(begin), int(end), str(frag_name))
+                    )
diff --git a/bin/mapped_2hic_dnase.py b/bin/mapped_2hic_dnase.py
index dd023b0..ff59366 100755
--- a/bin/mapped_2hic_dnase.py
+++ b/bin/mapped_2hic_dnase.py
@@ -25,8 +25,12 @@ def usage():
     print("-r/--mappedReadsFile <BAM/SAM file of mapped reads>")
     print("[-o/--outputDir] <Output directory. Default is current directory>")
     print("[-d/--minCisDist] <Minimum distance between intrachromosomal contact to consider>")
-    print("[-g/--gtag] <Genotype tag. If specified, this tag will be reported in the valid pairs output for allele specific classification>")
-    print("[-a/--all] <Write all additional output files, with information about the discarded reads (self-circle, dangling end, etc.)>")
+    print(
+        "[-g/--gtag] <Genotype tag. If specified, this tag will be reported in the valid pairs output for allele specific classification>"
+    )
+    print(
+        "[-a/--all] <Write all additional output files, with information about the discarded reads (self-circle, dangling end, etc.)>"
+    )
     print("[-v/--verbose] <Verbose>")
     print("[-h/--help] <Help>")
     return
@@ -38,8 +42,8 @@ def get_args():
         opts, args = getopt.getopt(
             sys.argv[1:],
             "r:o:d:g:avh",
-            ["mappedReadsFile=",
-             "outputDir=", "minDist=", "gatg", "all", "verbose", "help"])
+            ["mappedReadsFile=", "outputDir=", "minDist=", "gatg", "all", "verbose", "help"],
+        )
     except getopt.GetoptError:
         usage()
         sys.exit(-1)
@@ -78,8 +82,8 @@ def get_read_pos(read, st="start"):
         list of aligned reads
     """
     if st == "middle":
-        pos = read.reference_start + int(read.alen/2)
-    elif st =="start":
+        pos = read.reference_start + int(read.alen / 2)
+    elif st == "start":
         pos = get_read_start(read)
     elif st == "left":
         pos = read.reference_start
@@ -88,11 +92,11 @@ def get_read_pos(read, st="start"):
 
 
 def get_read_start(read):
-    """                                                                                                                                                                                                        
-    Return the 5' end of the read                                                                                                                                                                              
+    """
+    Return the 5' end of the read
     """
     if read.is_reverse:
-        pos = read.reference_start + read.alen -1
+        pos = read.reference_start + read.alen - 1
     else:
         pos = read.reference_start
     return pos
@@ -125,7 +129,7 @@ def get_ordered_reads(read1, read2):
 def isIntraChrom(read1, read2):
     """
     Return true is the reads pair is intrachromosomal
-    
+
     read1 : [AlignedRead]
     read2 : [AlignedRead]
 
@@ -163,23 +167,23 @@ def get_valid_orientation(read1, read2):
 
 
 def get_cis_dist(read1, read2):
-     """
-     Calculte the size of the DNA fragment library
+    """
+    Calculte the size of the DNA fragment library
 
-     read1 : [AlignedRead]
-     read2 : [AlignedRead]
+    read1 : [AlignedRead]
+    read2 : [AlignedRead]
 
-     """
-     # Get oriented reads
-     ##r1, r2 = get_ordered_reads(read1, read2)
-     dist = None
-     if not r1.is_unmapped and not r2.is_unmapped:         
-         ## Contact distances can be calculated for intrachromosomal reads only
-         if isIntraChrom(read1, read2):
-             r1pos = get_read_pos(read1)
-             r2pos = get_read_pos(read2)
-             dist = abs(r1pos - r2pos)
-     return dist
+    """
+    # Get oriented reads
+    ##r1, r2 = get_ordered_reads(read1, read2)
+    dist = None
+    if not r1.is_unmapped and not r2.is_unmapped:
+        ## Contact distances can be calculated for intrachromosomal reads only
+        if isIntraChrom(read1, read2):
+            r1pos = get_read_pos(read1)
+            r2pos = get_read_pos(read2)
+            dist = abs(r1pos - r2pos)
+    return dist
 
 
 def get_read_tag(read, tag):
@@ -255,15 +259,15 @@ if __name__ == "__main__":
     CF_ascounter = 0
 
     baseReadsFile = os.path.basename(mappedReadsFile)
-    baseReadsFile = re.sub(r'\.bam$|\.sam$', '', baseReadsFile)
+    baseReadsFile = re.sub(r"\.bam$|\.sam$", "", baseReadsFile)
 
     # Open handlers for output files
-    handle_valid = open(outputDir + '/' + baseReadsFile + '.validPairs', 'w')
+    handle_valid = open(outputDir + "/" + baseReadsFile + ".validPairs", "w")
 
     if allOutput:
-        handle_dump = open(outputDir + '/' + baseReadsFile + '.DumpPairs', 'w')
-        handle_single = open(outputDir + '/' + baseReadsFile + '.SinglePairs','w')
-        handle_filt = open(outputDir + '/' + baseReadsFile + '.FiltPairs','w')
+        handle_dump = open(outputDir + "/" + baseReadsFile + ".DumpPairs", "w")
+        handle_single = open(outputDir + "/" + baseReadsFile + ".SinglePairs", "w")
+        handle_filt = open(outputDir + "/" + baseReadsFile + ".FiltPairs", "w")
 
     # Read the SAM/BAM file
     if verbose:
@@ -306,7 +310,7 @@ if __name__ == "__main__":
                 cur_handler = handle_single if allOutput else None
 
             # Check Distance criteria - Filter
-            if (minDist is not None and dist is not None and dist < int(minDist)):
+            if minDist is not None and dist is not None and dist < int(minDist):
                 interactionType = "FILT"
                 filt_counter += 1
                 cur_handler = handle_filt if allOutput else None
@@ -330,13 +334,11 @@ if __name__ == "__main__":
                     dump_counter += 1
                     cur_handler = handle_dump if allOutput else None
 
-
-
             # Split valid pairs based on XA tag
             if gtag is not None:
                 r1as = get_read_tag(r1, gtag)
                 r2as = get_read_tag(r2, gtag)
-                        
+
                 if r1as == 1 and r2as == 1:
                     G1G1_ascounter += 1
                 elif r1as == 2 and r2as == 2:
@@ -357,11 +359,10 @@ if __name__ == "__main__":
                     CF_ascounter += 1
                 else:
                     UU_ascounter += 1
-                        
-       
+
             if cur_handler is not None:
                 if not r1.is_unmapped and not r2.is_unmapped:
-                    
+
                     ##reorient reads to ease duplicates removal
                     or1, or2 = get_ordered_reads(r1, r2)
                     or1_chrom = samfile.get_reference_name(or1.reference_id)
@@ -371,53 +372,93 @@ if __name__ == "__main__":
                     r1as = get_read_tag(or1, gtag)
                     r2as = get_read_tag(or2, gtag)
                     if gtag is not None:
-                        htag = str(r1as)+"-"+str(r2as)
-                        
+                        htag = str(r1as) + "-" + str(r2as)
+
                     cur_handler.write(
-                        or1.query_name + "\t" +
-                        or1_chrom + "\t" +
-                        str(get_read_pos(or1)+1) + "\t" +
-                        str(get_read_strand(or1)) + "\t" +
-                        or2_chrom + "\t" +
-                        str(get_read_pos(or2)+1) + "\t" +
-                        str(get_read_strand(or2)) + "\t" +
-                        "NA" + "\t" + ##dist 
-                        "NA" + "\t" + ##resfrag1
-                        "NA" + "\t" + ##resfrag2
-                        str(or1.mapping_quality) + "\t" + 
-                        str(or2.mapping_quality) + "\t" + 
-                        str(htag) + "\n")
-                
+                        or1.query_name
+                        + "\t"
+                        + or1_chrom
+                        + "\t"
+                        + str(get_read_pos(or1) + 1)
+                        + "\t"
+                        + str(get_read_strand(or1))
+                        + "\t"
+                        + or2_chrom
+                        + "\t"
+                        + str(get_read_pos(or2) + 1)
+                        + "\t"
+                        + str(get_read_strand(or2))
+                        + "\t"
+                        + "NA"
+                        + "\t"
+                        + "NA"  ##dist
+                        + "\t"
+                        + "NA"  ##resfrag1
+                        + "\t"
+                        + str(or1.mapping_quality)  ##resfrag2
+                        + "\t"
+                        + str(or2.mapping_quality)
+                        + "\t"
+                        + str(htag)
+                        + "\n"
+                    )
+
                 elif r2.is_unmapped and not r1.is_unmapped:
                     cur_handler.write(
-                        r1.query_name + "\t" +
-                        r1_chrom + "\t" +
-                        str(get_read_pos(r1)+1) + "\t" +
-                        str(get_read_strand(r1)) + "\t" +
-                        "*" + "\t" +
-                        "*" + "\t" +
-                        "*" + "\t" +
-                        "*" + "\t" + 
-                        "*" + "\t" +
-                        "*" + "\t" +
-                        str(r1.mapping_quality) + "\t" + 
-                        "*" + "\n")
+                        r1.query_name
+                        + "\t"
+                        + r1_chrom
+                        + "\t"
+                        + str(get_read_pos(r1) + 1)
+                        + "\t"
+                        + str(get_read_strand(r1))
+                        + "\t"
+                        + "*"
+                        + "\t"
+                        + "*"
+                        + "\t"
+                        + "*"
+                        + "\t"
+                        + "*"
+                        + "\t"
+                        + "*"
+                        + "\t"
+                        + "*"
+                        + "\t"
+                        + str(r1.mapping_quality)
+                        + "\t"
+                        + "*"
+                        + "\n"
+                    )
                 elif r1.is_unmapped and not r2.is_unmapped:
                     cur_handler.write(
-                        r2.query_name + "\t" +
-                        "*" + "\t" +
-                        "*" + "\t" +
-                        "*" + "\t" +
-                        r2_chrom + "\t" +
-                        str(get_read_pos(r2)+1) + "\t" +
-                        str(get_read_strand(r2)) + "\t" +
-                        "*" + "\t" +
-                        "*" + "\t" +
-                        "*" + "\t" +
-                        "*" + "\t" + 
-                        str(r2.mapping_quality) + "\n")
-
-            if (reads_counter % 100000 == 0 and verbose):
+                        r2.query_name
+                        + "\t"
+                        + "*"
+                        + "\t"
+                        + "*"
+                        + "\t"
+                        + "*"
+                        + "\t"
+                        + r2_chrom
+                        + "\t"
+                        + str(get_read_pos(r2) + 1)
+                        + "\t"
+                        + str(get_read_strand(r2))
+                        + "\t"
+                        + "*"
+                        + "\t"
+                        + "*"
+                        + "\t"
+                        + "*"
+                        + "\t"
+                        + "*"
+                        + "\t"
+                        + str(r2.mapping_quality)
+                        + "\n"
+                    )
+
+            if reads_counter % 100000 == 0 and verbose:
                 print("##", reads_counter)
 
     # Close handler
@@ -428,7 +469,7 @@ if __name__ == "__main__":
         handle_filt.close()
 
     # Write stats file
-    with open(outputDir + '/' + baseReadsFile + '.RSstat', 'w') as handle_stat:
+    with open(outputDir + "/" + baseReadsFile + ".RSstat", "w") as handle_stat:
         handle_stat.write("## Hi-C processing - no restriction fragments\n")
         handle_stat.write("Valid_interaction_pairs\t" + str(valid_counter) + "\n")
         handle_stat.write("Valid_interaction_pairs_FF\t" + str(valid_counter_FF) + "\n")
@@ -439,17 +480,24 @@ if __name__ == "__main__":
         handle_stat.write("Filtered_pairs\t" + str(filt_counter) + "\n")
         handle_stat.write("Dumped_pairs\t" + str(dump_counter) + "\n")
 
-    ## Write AS report
+        ## Write AS report
         if gtag is not None:
             handle_stat.write("## ======================================\n")
             handle_stat.write("## Allele specific information\n")
             handle_stat.write("Valid_pairs_from_ref_genome_(1-1)\t" + str(G1G1_ascounter) + "\n")
-            handle_stat.write("Valid_pairs_from_ref_genome_with_one_unassigned_mate_(0-1/1-0)\t" + str(UG1_ascounter+G1U_ascounter) + "\n")
+            handle_stat.write(
+                "Valid_pairs_from_ref_genome_with_one_unassigned_mate_(0-1/1-0)\t"
+                + str(UG1_ascounter + G1U_ascounter)
+                + "\n"
+            )
             handle_stat.write("Valid_pairs_from_alt_genome_(2-2)\t" + str(G2G2_ascounter) + "\n")
-            handle_stat.write("Valid_pairs_from_alt_genome_with_one_unassigned_mate_(0-2/2-0)\t" + str(UG2_ascounter+G2U_ascounter) + "\n")
-            handle_stat.write("Valid_pairs_from_alt_and_ref_genome_(1-2/2-1)\t" + str(G1G2_ascounter+G2G1_ascounter) + "\n")
+            handle_stat.write(
+                "Valid_pairs_from_alt_genome_with_one_unassigned_mate_(0-2/2-0)\t"
+                + str(UG2_ascounter + G2U_ascounter)
+                + "\n"
+            )
+            handle_stat.write(
+                "Valid_pairs_from_alt_and_ref_genome_(1-2/2-1)\t" + str(G1G2_ascounter + G2G1_ascounter) + "\n"
+            )
             handle_stat.write("Valid_pairs_with_both_unassigned_mated_(0-0)\t" + str(UU_ascounter) + "\n")
             handle_stat.write("Valid_pairs_with_at_least_one_conflicting_mate_(3-)\t" + str(CF_ascounter) + "\n")
-
-
-
diff --git a/bin/mapped_2hic_fragments.py b/bin/mapped_2hic_fragments.py
index e823ee0..cc0e40b 100755
--- a/bin/mapped_2hic_fragments.py
+++ b/bin/mapped_2hic_fragments.py
@@ -32,8 +32,12 @@ def usage():
     print("[-t/--shortestFragmentLength] <Shortest restriction fragment length to consider>")
     print("[-m/--longestFragmentLength] <Longest restriction fragment length to consider>")
     print("[-d/--minCisDist] <Minimum distance between intrachromosomal contact to consider>")
-    print("[-g/--gtag] <Genotype tag. If specified, this tag will be reported in the valid pairs output for allele specific classification>")
-    print("[-a/--all] <Write all additional output files, with information about the discarded reads (self-circle, dangling end, etc.)>")
+    print(
+        "[-g/--gtag] <Genotype tag. If specified, this tag will be reported in the valid pairs output for allele specific classification>"
+    )
+    print(
+        "[-a/--all] <Write all additional output files, with information about the discarded reads (self-circle, dangling end, etc.)>"
+    )
     print("[-S/--sam] <Output an additional SAM file with flag 'CT' for pairs classification>")
     print("[-v/--verbose] <Verbose>")
     print("[-h/--help] <Help>")
@@ -46,13 +50,22 @@ def get_args():
         opts, args = getopt.getopt(
             sys.argv[1:],
             "f:r:o:s:l:t:m:d:g:Svah",
-            ["fragmentFile=",
-             "mappedReadsFile=",
-             "outputDir=", 
-             "minInsertSize=", "maxInsertSize", 
-             "minFragSize", "maxFragSize", 
-             "minDist",
-             "gatg", "sam", "verbose", "all", "help"])
+            [
+                "fragmentFile=",
+                "mappedReadsFile=",
+                "outputDir=",
+                "minInsertSize=",
+                "maxInsertSize",
+                "minFragSize",
+                "maxFragSize",
+                "minDist",
+                "gatg",
+                "sam",
+                "verbose",
+                "all",
+                "help",
+            ],
+        )
     except getopt.GetoptError:
         usage()
         sys.exit(-1)
@@ -66,7 +79,7 @@ def timing(function, *args):
     """
     startTime = time.time()
     result = function(*args)
-    print('{} function took {:.3f}ms'.format(function.__name__, (time.time() - startTime) * 1000))
+    print("{} function took {:.3f}ms".format(function.__name__, (time.time() - startTime) * 1000))
     return result
 
 
@@ -88,7 +101,7 @@ def get_read_strand(read):
 def isIntraChrom(read1, read2):
     """
     Return true is the reads pair is intrachromosomal
-    
+
     read1 : [AlignedRead]
     read2 : [AlignedRead]
 
@@ -99,22 +112,22 @@ def isIntraChrom(read1, read2):
 
 
 def get_cis_dist(read1, read2):
-     """
-     Calculte the contact distance between two intrachromosomal reads
+    """
+    Calculte the contact distance between two intrachromosomal reads
 
-     read1 : [AlignedRead]
-     read2 : [AlignedRead]
+    read1 : [AlignedRead]
+    read2 : [AlignedRead]
 
-     """
-     # Get oriented reads
-     ##r1, r2 = get_ordered_reads(read1, read2)
-     dist = None
-     if not read1.is_unmapped and not read2.is_unmapped:         
-         ## Contact distances can be calculated for intrachromosomal reads only
-         if isIntraChrom(read1, read2):
-             r1pos, r2pos = get_read_pos(read1), get_read_pos(read2)
-             dist = abs(r1pos - r2pos)
-     return dist
+    """
+    # Get oriented reads
+    ##r1, r2 = get_ordered_reads(read1, read2)
+    dist = None
+    if not read1.is_unmapped and not read2.is_unmapped:
+        ## Contact distances can be calculated for intrachromosomal reads only
+        if isIntraChrom(read1, read2):
+            r1pos, r2pos = get_read_pos(read1), get_read_pos(read2)
+            dist = abs(r1pos - r2pos)
+    return dist
 
 
 def get_read_pos(read, st="start"):
@@ -135,12 +148,12 @@ def get_read_pos(read, st="start"):
     """
 
     if st == "middle":
-        pos = read.reference_start + int(read.alen/2)
-    elif st =="start":
+        pos = read.reference_start + int(read.alen / 2)
+    elif st == "start":
         pos = get_read_start(read)
     elif st == "left":
         pos = read.reference_start
-    
+
     return pos
 
 
@@ -149,11 +162,12 @@ def get_read_start(read):
     Return the 5' end of the read
     """
     if read.is_reverse:
-        pos = read.reference_start + read.alen -1
+        pos = read.reference_start + read.alen - 1
     else:
         pos = read.reference_start
     return pos
 
+
 def get_ordered_reads(read1, read2):
     """
     Reorient reads
@@ -183,9 +197,10 @@ def get_ordered_reads(read1, read2):
             r1, r2 = read1, read2
         else:
             r1, r2 = read2, read1
-                
+
     return r1, r2
 
+
 def load_restriction_fragment(in_file, minfragsize=None, maxfragsize=None, verbose=False):
     """
     Read a BED file and store the intervals in a tree
@@ -204,37 +219,37 @@ def load_restriction_fragment(in_file, minfragsize=None, maxfragsize=None, verbo
     nline = 0
     nfilt = 0
     for line in bed_handle:
-         nline += 1
-         bedtab = line.split("\t")
-         try:
-              chromosome, start, end, name = bedtab[:4]
-         except ValueError:
-              print("Warning : wrong input format in line {}. Not a BED file ?!".format(nline))
-              continue
+        nline += 1
+        bedtab = line.split("\t")
+        try:
+            chromosome, start, end, name = bedtab[:4]
+        except ValueError:
+            print("Warning : wrong input format in line {}. Not a BED file ?!".format(nline))
+            continue
 
         # BED files are zero-based as Intervals objects
-         start = int(start)  # + 1
-         end = int(end)
-         fragl = abs(end - start)
-         name = name.strip()
-
-         ## Discard fragments outside the size range
-         filt = False
-         if minfragsize != None and int(fragl) < int(minfragsize):
-             nfilt += 1
-             filt = True
-         elif maxfragsize != None and int(fragl) > int(maxfragsize):
-             nfilt += 1
-             filt = True
-       
-         if chromosome in resFrag:
-             tree = resFrag[chromosome]
-             tree.add_interval(Interval(start, end, value={'name': name, 'filter': filt}))
-         else:
-             tree = Intersecter()
-             tree.add_interval(Interval(start, end, value={'name': name, 'filter': filt}))
-             resFrag[chromosome] = tree
-    
+        start = int(start)  # + 1
+        end = int(end)
+        fragl = abs(end - start)
+        name = name.strip()
+
+        ## Discard fragments outside the size range
+        filt = False
+        if minfragsize != None and int(fragl) < int(minfragsize):
+            nfilt += 1
+            filt = True
+        elif maxfragsize != None and int(fragl) > int(maxfragsize):
+            nfilt += 1
+            filt = True
+
+        if chromosome in resFrag:
+            tree = resFrag[chromosome]
+            tree.add_interval(Interval(start, end, value={"name": name, "filter": filt}))
+        else:
+            tree = Intersecter()
+            tree.add_interval(Interval(start, end, value={"name": name, "filter": filt}))
+            resFrag[chromosome] = tree
+
     if nfilt > 0:
         print("Warning : {} fragment(s) outside of range and discarded. {} remaining.".format(nfilt, nline - nfilt))
     bed_handle.close()
@@ -253,10 +268,10 @@ def get_overlapping_restriction_fragment(resFrag, chrom, read):
     """
     # Get read position (middle or start)
     pos = get_read_pos(read, st="middle")
-    
+
     if chrom in resFrag:
         # Overlap with the position of the read (zero-based)
-        resfrag = resFrag[chrom].find(pos, pos+1)
+        resfrag = resFrag[chrom].find(pos, pos + 1)
         if len(resfrag) > 1:
             print("Warning : {} restictions fragments found for {} -skipped".format(len(resfrag), read.query_name))
             return None
@@ -271,21 +286,22 @@ def get_overlapping_restriction_fragment(resFrag, chrom, read):
 
 
 def are_contiguous_fragments(frag1, frag2, chr1, chr2):
-    '''
+    """
     Compare fragment positions to check if they are contiguous
-    '''
+    """
     ret = False
     if chr1 == chr2:
         if int(frag1.start) < int(frag2.start):
             d = int(frag2.start) - int(frag1.end)
         else:
             d = int(frag1.start) - int(frag2.end)
-            
+
         if d == 0:
             ret = True
-    
+
     return ret
 
+
 def is_religation(read1, read2, frag1, frag2):
     """
     Reads are expected to map adjacent fragments
@@ -294,8 +310,8 @@ def is_religation(read1, read2, frag1, frag2):
     """
     ret = False
     if are_contiguous_fragments(frag1, frag2, read1.tid, read2.tid):
-        #r1, r2 = get_ordered_reads(read1, read2)
-        #if get_read_strand(r1) == "+" and get_read_strand(r2) == "-":
+        # r1, r2 = get_ordered_reads(read1, read2)
+        # if get_read_strand(r1) == "+" and get_read_strand(r2) == "-":
         ret = True
     return ret
 
@@ -405,8 +421,7 @@ def get_PE_fragment_size(read1, read2, resFrag1, resFrag2, interactionType):
     return fragmentsize
 
 
-def get_interaction_type(read1, read1_chrom, resfrag1, read2,
-                         read2_chrom, resfrag2, verbose):
+def get_interaction_type(read1, read1_chrom, resfrag1, read2, read2_chrom, resfrag2, verbose):
     """
     Returns the interaction type
 
@@ -433,7 +448,7 @@ def get_interaction_type(read1, read1_chrom, resfrag1, read2,
     # If returned InteractionType=None -> Same restriction fragment
     # and same strand = Dump
     interactionType = None
-      
+
     if not read1.is_unmapped and not read2.is_unmapped and resfrag1 is not None and resfrag2 is not None:
         # same restriction fragment
         if resfrag1 == resfrag2:
@@ -549,29 +564,29 @@ if __name__ == "__main__":
     CF_ascounter = 0
 
     baseReadsFile = os.path.basename(mappedReadsFile)
-    baseReadsFile = re.sub(r'\.bam$|\.sam$', '', baseReadsFile)
+    baseReadsFile = re.sub(r"\.bam$|\.sam$", "", baseReadsFile)
 
     # Open handlers for output files
-    handle_valid = open(outputDir + '/' + baseReadsFile + '.validPairs', 'w')
+    handle_valid = open(outputDir + "/" + baseReadsFile + ".validPairs", "w")
 
     if allOutput:
-        handle_de = open(outputDir + '/' + baseReadsFile + '.DEPairs', 'w')
-        handle_re = open(outputDir + '/' + baseReadsFile + '.REPairs', 'w')
-        handle_sc = open(outputDir + '/' + baseReadsFile + '.SCPairs', 'w')
-        handle_dump = open(outputDir + '/' + baseReadsFile + '.DumpPairs', 'w')
-        handle_single = open(outputDir + '/' + baseReadsFile + '.SinglePairs', 'w')
-        handle_filt = open(outputDir + '/' + baseReadsFile + '.FiltPairs', 'w')
+        handle_de = open(outputDir + "/" + baseReadsFile + ".DEPairs", "w")
+        handle_re = open(outputDir + "/" + baseReadsFile + ".REPairs", "w")
+        handle_sc = open(outputDir + "/" + baseReadsFile + ".SCPairs", "w")
+        handle_dump = open(outputDir + "/" + baseReadsFile + ".DumpPairs", "w")
+        handle_single = open(outputDir + "/" + baseReadsFile + ".SinglePairs", "w")
+        handle_filt = open(outputDir + "/" + baseReadsFile + ".FiltPairs", "w")
 
     # Read the BED file
     resFrag = timing(load_restriction_fragment, fragmentFile, minFragSize, maxFragSize, verbose)
-     
+
     # Read the SAM/BAM file
     if verbose:
         print("## Opening SAM/BAM file {} ...".format(mappedReadsFile))
     samfile = pysam.Samfile(mappedReadsFile, "rb")
 
     if samOut:
-        handle_sam = pysam.AlignmentFile(outputDir + '/' + baseReadsFile + '_interaction.bam', "wb", template=samfile)
+        handle_sam = pysam.AlignmentFile(outputDir + "/" + baseReadsFile + "_interaction.bam", "wb", template=samfile)
 
     # Reads are 0-based too (for both SAM and BAM format)
     # Loop on all reads
@@ -608,22 +623,24 @@ if __name__ == "__main__":
                 interactionType = get_interaction_type(r1, r1_chrom, r1_resfrag, r2, r2_chrom, r2_resfrag, verbose)
                 dist = get_PE_fragment_size(r1, r2, r1_resfrag, r2_resfrag, interactionType)
                 cdist = get_cis_dist(r1, r2)
-                
+
                 ## Filter based on restriction fragments
-                if (r1_resfrag is not None and r1_resfrag.value['filter'] == True) or (r2_resfrag is not None and r2_resfrag.value['filter']) == True:
+                if (r1_resfrag is not None and r1_resfrag.value["filter"] == True) or (
+                    r2_resfrag is not None and r2_resfrag.value["filter"]
+                ) == True:
                     interactionType = "FILT"
-   
+
                 # Check Insert size criteria - FILT
-                if (minInsertSize is not None and dist is not None and
-                    dist < int(minInsertSize)) or \
-                    (maxInsertSize is not None and dist is not None and dist > int(maxInsertSize)):
+                if (minInsertSize is not None and dist is not None and dist < int(minInsertSize)) or (
+                    maxInsertSize is not None and dist is not None and dist > int(maxInsertSize)
+                ):
                     interactionType = "FILT"
 
                 # Check Distance criteria - FILT
                 # Done for VI otherwise this criteria will overwrite all other invalid classification
-                if (interactionType == "VI" and minDist is not None and cdist is not None and cdist < int(minDist)):
+                if interactionType == "VI" and minDist is not None and cdist is not None and cdist < int(minDist):
                     interactionType = "FILT"
-        
+
                 if interactionType == "VI":
                     valid_counter += 1
                     cur_handler = handle_valid
@@ -677,11 +694,11 @@ if __name__ == "__main__":
                 elif interactionType == "SI":
                     single_counter += 1
                     cur_handler = handle_single if allOutput else None
-                
+
                 elif interactionType == "FILT":
                     filt_counter += 1
                     cur_handler = handle_filt if allOutput else None
-                
+
                 else:
                     interactionType = "DUMP"
                     dump_counter += 1
@@ -694,17 +711,17 @@ if __name__ == "__main__":
 
             ## Write results in right handler
             if cur_handler is not None:
-                if not r1.is_unmapped and not r2.is_unmapped:                 
+                if not r1.is_unmapped and not r2.is_unmapped:
                     ##reorient reads to ease duplicates removal
                     or1, or2 = get_ordered_reads(r1, r2)
                     or1_chrom = samfile.get_reference_name(or1.tid)
                     or2_chrom = samfile.get_reference_name(or2.tid)
-                    
+
                     ##reset as tag now that the reads are oriented
                     r1as = get_read_tag(or1, gtag)
                     r2as = get_read_tag(or2, gtag)
                     if gtag is not None:
-                        htag = str(r1as)+"-"+str(r2as)
+                        htag = str(r1as) + "-" + str(r2as)
 
                     ##get fragment name and reorient if necessary
                     if or1 == r1 and or2 == r2:
@@ -715,73 +732,113 @@ if __name__ == "__main__":
                         or2_resfrag = r1_resfrag
 
                     if or1_resfrag is not None:
-                        or1_fragname = or1_resfrag.value['name']
+                        or1_fragname = or1_resfrag.value["name"]
                     else:
-                        or1_fragname = 'None'
-                        
+                        or1_fragname = "None"
+
                     if or2_resfrag is not None:
-                        or2_fragname = or2_resfrag.value['name']
+                        or2_fragname = or2_resfrag.value["name"]
                     else:
-                        or2_fragname = 'None'
-                        
+                        or2_fragname = "None"
+
                     cur_handler.write(
-                        or1.query_name + "\t" +
-                        or1_chrom + "\t" +
-                        str(get_read_pos(or1)+1) + "\t" +
-                        str(get_read_strand(or1)) + "\t" +
-                        or2_chrom + "\t" +
-                        str(get_read_pos(or2)+1) + "\t" +
-                        str(get_read_strand(or2)) + "\t" +
-                        str(dist) + "\t" + 
-                        or1_fragname + "\t" +
-                        or2_fragname + "\t" +
-                        str(or1.mapping_quality) + "\t" + 
-                        str(or2.mapping_quality) + "\t" + 
-                        str(htag) + "\n")
+                        or1.query_name
+                        + "\t"
+                        + or1_chrom
+                        + "\t"
+                        + str(get_read_pos(or1) + 1)
+                        + "\t"
+                        + str(get_read_strand(or1))
+                        + "\t"
+                        + or2_chrom
+                        + "\t"
+                        + str(get_read_pos(or2) + 1)
+                        + "\t"
+                        + str(get_read_strand(or2))
+                        + "\t"
+                        + str(dist)
+                        + "\t"
+                        + or1_fragname
+                        + "\t"
+                        + or2_fragname
+                        + "\t"
+                        + str(or1.mapping_quality)
+                        + "\t"
+                        + str(or2.mapping_quality)
+                        + "\t"
+                        + str(htag)
+                        + "\n"
+                    )
 
                 elif r2.is_unmapped and not r1.is_unmapped:
                     if r1_resfrag is not None:
-                        r1_fragname = r1_resfrag.value['name']
-                          
+                        r1_fragname = r1_resfrag.value["name"]
+
                     cur_handler.write(
-                        r1.query_name + "\t" +
-                        r1_chrom + "\t" +
-                        str(get_read_pos(r1)+1) + "\t" +
-                        str(get_read_strand(r1)) + "\t" +
-                        "*" + "\t" +
-                        "*" + "\t" +
-                        "*" + "\t" +
-                        "*" + "\t" + 
-                        r1_fragname + "\t" +
-                        "*" + "\t" +
-                        str(r1.mapping_quality) + "\t" + 
-                        "*" + "\n")
+                        r1.query_name
+                        + "\t"
+                        + r1_chrom
+                        + "\t"
+                        + str(get_read_pos(r1) + 1)
+                        + "\t"
+                        + str(get_read_strand(r1))
+                        + "\t"
+                        + "*"
+                        + "\t"
+                        + "*"
+                        + "\t"
+                        + "*"
+                        + "\t"
+                        + "*"
+                        + "\t"
+                        + r1_fragname
+                        + "\t"
+                        + "*"
+                        + "\t"
+                        + str(r1.mapping_quality)
+                        + "\t"
+                        + "*"
+                        + "\n"
+                    )
                 elif r1.is_unmapped and not r2.is_unmapped:
                     if r2_resfrag is not None:
-                        r2_fragname = r2_resfrag.value['name']
-                    
+                        r2_fragname = r2_resfrag.value["name"]
+
                     cur_handler.write(
-                        r2.query_name + "\t" +
-                        "*" + "\t" +
-                        "*" + "\t" +
-                        "*" + "\t" +
-                        r2_chrom + "\t" +
-                        str(get_read_pos(r2)+1) + "\t" +
-                        str(get_read_strand(r2)) + "\t" +
-                        "*" + "\t" +
-                        "*" + "\t" +
-                        r2_fragname + "\t" +
-                        "*" + "\t" +
-                        str(r2.mapping_quality) + "\n")
-
-                ## Keep initial order    
+                        r2.query_name
+                        + "\t"
+                        + "*"
+                        + "\t"
+                        + "*"
+                        + "\t"
+                        + "*"
+                        + "\t"
+                        + r2_chrom
+                        + "\t"
+                        + str(get_read_pos(r2) + 1)
+                        + "\t"
+                        + str(get_read_strand(r2))
+                        + "\t"
+                        + "*"
+                        + "\t"
+                        + "*"
+                        + "\t"
+                        + r2_fragname
+                        + "\t"
+                        + "*"
+                        + "\t"
+                        + str(r2.mapping_quality)
+                        + "\n"
+                    )
+
+                ## Keep initial order
                 if samOut:
-                    r1.tags = r1.tags + [('CT', str(interactionType))]
-                    r2.tags = r2.tags + [('CT', str(interactionType))]
+                    r1.tags = r1.tags + [("CT", str(interactionType))]
+                    r2.tags = r2.tags + [("CT", str(interactionType))]
                     handle_sam.write(r1)
                     handle_sam.write(r2)
 
-            if (reads_counter % 100000 == 0 and verbose):
+            if reads_counter % 100000 == 0 and verbose:
                 print("##", reads_counter)
 
     # Close handler
@@ -794,9 +851,8 @@ if __name__ == "__main__":
         handle_single.close()
         handle_filt.close()
 
-
     # Write stats file
-    handle_stat = open(outputDir + '/' + baseReadsFile + '.RSstat', 'w')
+    handle_stat = open(outputDir + "/" + baseReadsFile + ".RSstat", "w")
     handle_stat.write("## Hi-C processing\n")
     handle_stat.write("Valid_interaction_pairs\t" + str(valid_counter) + "\n")
     handle_stat.write("Valid_interaction_pairs_FF\t" + str(valid_counter_FF) + "\n")
@@ -815,10 +871,20 @@ if __name__ == "__main__":
         handle_stat.write("## ======================================\n")
         handle_stat.write("## Allele specific information\n")
         handle_stat.write("Valid_pairs_from_ref_genome_(1-1)\t" + str(G1G1_ascounter) + "\n")
-        handle_stat.write("Valid_pairs_from_ref_genome_with_one_unassigned_mate_(0-1/1-0)\t" + str(UG1_ascounter+G1U_ascounter) + "\n")
+        handle_stat.write(
+            "Valid_pairs_from_ref_genome_with_one_unassigned_mate_(0-1/1-0)\t"
+            + str(UG1_ascounter + G1U_ascounter)
+            + "\n"
+        )
         handle_stat.write("Valid_pairs_from_alt_genome_(2-2)\t" + str(G2G2_ascounter) + "\n")
-        handle_stat.write("Valid_pairs_from_alt_genome_with_one_unassigned_mate_(0-2/2-0)\t" + str(UG2_ascounter+G2U_ascounter) + "\n")
-        handle_stat.write("Valid_pairs_from_alt_and_ref_genome_(1-2/2-1)\t" + str(G1G2_ascounter+G2G1_ascounter) + "\n")
+        handle_stat.write(
+            "Valid_pairs_from_alt_genome_with_one_unassigned_mate_(0-2/2-0)\t"
+            + str(UG2_ascounter + G2U_ascounter)
+            + "\n"
+        )
+        handle_stat.write(
+            "Valid_pairs_from_alt_and_ref_genome_(1-2/2-1)\t" + str(G1G2_ascounter + G2G1_ascounter) + "\n"
+        )
         handle_stat.write("Valid_pairs_with_both_unassigned_mated_(0-0)\t" + str(UU_ascounter) + "\n")
         handle_stat.write("Valid_pairs_with_at_least_one_conflicting_mate_(3-)\t" + str(CF_ascounter) + "\n")
 
diff --git a/bin/mergeSAM.py b/bin/mergeSAM.py
index a907fd7..82ab8c3 100755
--- a/bin/mergeSAM.py
+++ b/bin/mergeSAM.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python
 
 ## HiC-Pro
-## Copyright (c) 2015 Institut Curie                               
+## Copyright (c) 2015 Institut Curie
 ## Author(s): Nicolas Servant, Eric Viara
 ## Contact: nicolas.servant@curie.fr
 ## This software is distributed without any guarantee under the terms of the BSD-3 licence.
@@ -20,6 +20,7 @@ import os
 import re
 import pysam
 
+
 def usage():
     """Usage function"""
     print("Usage : python mergeSAM.py")
@@ -41,10 +42,8 @@ def get_args():
         opts, args = getopt.getopt(
             sys.argv[1:],
             "f:r:o:q:smtvh",
-            ["forward=",
-             "reverse=",
-             "output=", "qual=", 
-             "single", "multi", "stat", "verbose", "help"])
+            ["forward=", "reverse=", "output=", "qual=", "single", "multi", "stat", "verbose", "help"],
+        )
     except getopt.GetoptError:
         usage()
         sys.exit(-1)
@@ -53,24 +52,26 @@ def get_args():
 
 def is_unique_bowtie2(read):
     ret = False
-    if not read.is_unmapped and read.has_tag('AS'):
-        if read.has_tag('XS'):
-            primary =  read.get_tag('AS')
-            secondary = read.get_tag('XS')
-            if (primary > secondary):
+    if not read.is_unmapped and read.has_tag("AS"):
+        if read.has_tag("XS"):
+            primary = read.get_tag("AS")
+            secondary = read.get_tag("XS")
+            if primary > secondary:
                 ret = True
         else:
             ret = True
     return ret
 
+
 ## Remove everything after "/" or " " in read's name
 def get_read_name(read):
     name = read.query_name
-    #return name.split("/",1)[0]
-    return re.split('/| ', name)[0]
+    # return name.split("/",1)[0]
+    return re.split("/| ", name)[0]
+
 
 def sam_flag(read1, read2, hr1, hr2):
-	
+
     f1 = read1.flag
     f2 = read2.flag
 
@@ -81,7 +82,7 @@ def sam_flag(read1, read2, hr1, hr2):
     if r2.is_unmapped == False:
         r2_chrom = hr2.get_reference_name(r2.reference_id)
     else:
-        r2_chrom="*"
+        r2_chrom = "*"
 
     ##Relevant bitwise flags (flag in an 11-bit binary number)
     ##1 The read is one of a pair
@@ -92,54 +93,53 @@ def sam_flag(read1, read2, hr1, hr2):
     ##32 The other mate in the paired-end alignment is aligned to the reverse reference strand
     ##64 The read is the first (#1) mate in a pair
     ##128 The read is the second (#2) mate in a pair
-  
-    ##The reads were mapped as single-end data, so should expect flags of 
+
+    ##The reads were mapped as single-end data, so should expect flags of
     ##0 (map to the '+' strand) or 16 (map to the '-' strand)
-    ##Output example: a paired-end read that aligns to the reverse strand 
+    ##Output example: a paired-end read that aligns to the reverse strand
     ##and is the first mate in the pair will have flag 83 (= 64 + 16 + 2 + 1)
-  
+
     if f1 & 0x4:
         f1 = f1 | 0x8
 
     if f2 & 0x4:
         f2 = f2 | 0x8
-    
-    if (not (f1 & 0x4) and not (f2 & 0x4)):
+
+    if not (f1 & 0x4) and not (f2 & 0x4):
         ##The flag should now indicate this is paired-end data
         f1 = f1 | 0x1
         f1 = f1 | 0x2
         f2 = f2 | 0x1
-        f2 = f2 | 0x2  
-    
+        f2 = f2 | 0x2
+
     ##Indicate if the pair is on the reverse strand
     if f1 & 0x10:
         f2 = f2 | 0x20
-  
+
     if f2 & 0x10:
         f1 = f1 | 0x20
-  
+
     ##Is this first or the second pair?
     f1 = f1 | 0x40
     f2 = f2 | 0x80
-  
+
     ##Insert the modified bitwise flags into the reads
     read1.flag = f1
     read2.flag = f2
-	
+
     ##Determine the RNEXT and PNEXT values (i.e. the positional values of a read's pair)
-    #RNEXT
+    # RNEXT
     if r1_chrom == r2_chrom:
         read1.next_reference_id = r1.reference_id
         read2.next_reference_id = r1.reference_id
     else:
         read1.next_reference_id = r2.reference_id
         read2.next_reference_id = r1.reference_id
-    #PNEXT
+    # PNEXT
     read1.next_reference_start = read2.reference_start
     read2.next_reference_start = read1.reference_start
 
-    return(read1, read2)
-
+    return (read1, read2)
 
 
 if __name__ == "__main__":
@@ -196,13 +196,13 @@ if __name__ == "__main__":
     tot_pairs_counter = 0
     multi_pairs_counter = 0
     uniq_pairs_counter = 0
-    unmapped_pairs_counter = 0 
+    unmapped_pairs_counter = 0
     lowq_pairs_counter = 0
     multi_singles_counter = 0
     uniq_singles_counter = 0
     lowq_singles_counter = 0
 
-    #local_counter = 0
+    # local_counter = 0
     paired_reads_counter = 0
     singleton_counter = 0
     reads_counter = 0
@@ -213,31 +213,31 @@ if __name__ == "__main__":
     ## Loop on all reads
     if verbose:
         print("## Merging forward and reverse tags ...")
-    
-    with pysam.Samfile(R1file, "rb") as hr1, pysam.Samfile(R2file, "rb") as hr2: 
+
+    with pysam.Samfile(R1file, "rb") as hr1, pysam.Samfile(R2file, "rb") as hr2:
         if output == "-":
             outfile = pysam.AlignmentFile(output, "w", template=hr1)
         else:
             outfile = pysam.AlignmentFile(output, "wb", template=hr1)
-	
+
         for r1, r2 in zip(hr1.fetch(until_eof=True), hr2.fetch(until_eof=True)):
-            reads_counter +=1
-            if (reads_counter % 1000000 == 0 and verbose):
+            reads_counter += 1
+            if reads_counter % 1000000 == 0 and verbose:
                 print("##", reads_counter)
-                
+
             if get_read_name(r1) == get_read_name(r2):
                 ## both unmapped
                 if r1.is_unmapped == True and r2.is_unmapped == True:
                     unmapped_pairs_counter += 1
                     continue
-                    
+
                 ## both mapped
                 elif r1.is_unmapped == False and r2.is_unmapped == False:
                     ## quality
                     if mapq != None and (r1.mapping_quality < int(mapq) or r2.mapping_quality < int(mapq)):
                         lowq_pairs_counter += 1
                         continue
-                 
+
                     ## Unique mapping
                     if is_unique_bowtie2(r1) == True and is_unique_bowtie2(r2) == True:
                         uniq_pairs_counter += 1
@@ -253,7 +253,7 @@ if __name__ == "__main__":
                         continue
                     if r1.is_unmapped == False:  ## first end is mapped, second is not
                         ## quality
-                        if mapq != None and (r1.mapping_quality < int(mapq)): 
+                        if mapq != None and (r1.mapping_quality < int(mapq)):
                             lowq_singles_counter += 1
                             continue
                         ## Unique mapping
@@ -265,7 +265,7 @@ if __name__ == "__main__":
                                 continue
                     else:  ## second end is mapped, first is not
                         ## quality
-                        if mapq != None and (r2.mapping_quality < int(mapq)): 
+                        if mapq != None and (r2.mapping_quality < int(mapq)):
                             lowq_singles_counter += 1
                             continue
                         ## Unique mapping
@@ -276,34 +276,95 @@ if __name__ == "__main__":
                             if report_multi == False:
                                 continue
 
-                tot_pairs_counter += 1          
-                (r1, r2) = sam_flag(r1,r2, hr1, hr2)
+                tot_pairs_counter += 1
+                (r1, r2) = sam_flag(r1, r2, hr1, hr2)
 
                 ## Write output
                 outfile.write(r1)
                 outfile.write(r2)
-                
+
             else:
-                print("Forward and reverse reads not paired. Check that BAM files have the same read names and are sorted.")
+                print(
+                    "Forward and reverse reads not paired. Check that BAM files have the same read names and are sorted."
+                )
                 sys.exit(1)
 
         if stat:
-            if output == '-':
+            if output == "-":
                 statfile = "pairing.stat"
             else:
-                statfile = re.sub('\.bam$', '.pairstat', output)
-            with open(statfile, 'w') as handle_stat:
-                handle_stat.write("Total_pairs_processed\t" + str(reads_counter) + "\t" + str(round(float(reads_counter)/float(reads_counter)*100,3)) + "\n")
-                handle_stat.write("Unmapped_pairs\t" + str(unmapped_pairs_counter) + "\t" + str(round(float(unmapped_pairs_counter)/float(reads_counter)*100,3)) + "\n")
-                handle_stat.write("Low_qual_pairs\t" + str(lowq_pairs_counter) + "\t" + str(round(float(lowq_pairs_counter)/float(reads_counter)*100,3)) + "\n")
-                handle_stat.write("Unique_paired_alignments\t" + str(uniq_pairs_counter) + "\t" + str(round(float(uniq_pairs_counter)/float(reads_counter)*100,3)) + "\n")
-                handle_stat.write("Multiple_pairs_alignments\t" + str(multi_pairs_counter) + "\t" + str(round(float(multi_pairs_counter)/float(reads_counter)*100,3)) + "\n")
-                handle_stat.write("Pairs_with_singleton\t" + str(singleton_counter) + "\t" + str(round(float(singleton_counter)/float(reads_counter)*100,3)) + "\n")  
-                handle_stat.write("Low_qual_singleton\t" + str(lowq_singles_counter) + "\t" + str(round(float(lowq_singles_counter)/float(reads_counter)*100,3)) + "\n")
-                handle_stat.write("Unique_singleton_alignments\t" + str(uniq_singles_counter) + "\t" + str(round(float(uniq_singles_counter)/float(reads_counter)*100,3)) + "\n")
-                handle_stat.write("Multiple_singleton_alignments\t" + str(multi_singles_counter) + "\t" + str(round(float(multi_singles_counter)/float(reads_counter)*100,3)) + "\n")
-                handle_stat.write("Reported_pairs\t" + str(tot_pairs_counter) + "\t" + str(round(float(tot_pairs_counter)/float(reads_counter)*100,3)) + "\n")
+                statfile = re.sub("\.bam$", ".pairstat", output)
+            with open(statfile, "w") as handle_stat:
+                handle_stat.write(
+                    "Total_pairs_processed\t"
+                    + str(reads_counter)
+                    + "\t"
+                    + str(round(float(reads_counter) / float(reads_counter) * 100, 3))
+                    + "\n"
+                )
+                handle_stat.write(
+                    "Unmapped_pairs\t"
+                    + str(unmapped_pairs_counter)
+                    + "\t"
+                    + str(round(float(unmapped_pairs_counter) / float(reads_counter) * 100, 3))
+                    + "\n"
+                )
+                handle_stat.write(
+                    "Low_qual_pairs\t"
+                    + str(lowq_pairs_counter)
+                    + "\t"
+                    + str(round(float(lowq_pairs_counter) / float(reads_counter) * 100, 3))
+                    + "\n"
+                )
+                handle_stat.write(
+                    "Unique_paired_alignments\t"
+                    + str(uniq_pairs_counter)
+                    + "\t"
+                    + str(round(float(uniq_pairs_counter) / float(reads_counter) * 100, 3))
+                    + "\n"
+                )
+                handle_stat.write(
+                    "Multiple_pairs_alignments\t"
+                    + str(multi_pairs_counter)
+                    + "\t"
+                    + str(round(float(multi_pairs_counter) / float(reads_counter) * 100, 3))
+                    + "\n"
+                )
+                handle_stat.write(
+                    "Pairs_with_singleton\t"
+                    + str(singleton_counter)
+                    + "\t"
+                    + str(round(float(singleton_counter) / float(reads_counter) * 100, 3))
+                    + "\n"
+                )
+                handle_stat.write(
+                    "Low_qual_singleton\t"
+                    + str(lowq_singles_counter)
+                    + "\t"
+                    + str(round(float(lowq_singles_counter) / float(reads_counter) * 100, 3))
+                    + "\n"
+                )
+                handle_stat.write(
+                    "Unique_singleton_alignments\t"
+                    + str(uniq_singles_counter)
+                    + "\t"
+                    + str(round(float(uniq_singles_counter) / float(reads_counter) * 100, 3))
+                    + "\n"
+                )
+                handle_stat.write(
+                    "Multiple_singleton_alignments\t"
+                    + str(multi_singles_counter)
+                    + "\t"
+                    + str(round(float(multi_singles_counter) / float(reads_counter) * 100, 3))
+                    + "\n"
+                )
+                handle_stat.write(
+                    "Reported_pairs\t"
+                    + str(tot_pairs_counter)
+                    + "\t"
+                    + str(round(float(tot_pairs_counter) / float(reads_counter) * 100, 3))
+                    + "\n"
+                )
     hr1.close()
     hr2.close()
     outfile.close()
-
diff --git a/bin/merge_statfiles.py b/bin/merge_statfiles.py
index dc11bf7..c3986e1 100755
--- a/bin/merge_statfiles.py
+++ b/bin/merge_statfiles.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python
 
 ## nf-core-hic
-## Copyright (c) 2020 Institut Curie                               
+## Copyright (c) 2020 Institut Curie
 ## Author(s): Nicolas Servant
 ## Contact: nicolas.servant@curie.fr
 ## This software is distributed without any guarantee under the terms of the BSD-3 licence.
@@ -17,6 +17,7 @@ import glob
 import os
 from collections import OrderedDict
 
+
 def num(s):
     try:
         return int(s)
@@ -26,30 +27,30 @@ def num(s):
 
 if __name__ == "__main__":
     ## Read command line arguments
-    parser = argparse.ArgumentParser()      
-    parser.add_argument("-f", "--files", help="List of input file(s)", type=str, nargs='+')
-    parser.add_argument("-v", "--verbose", help="verbose mode", action='store_true')
+    parser = argparse.ArgumentParser()
+    parser.add_argument("-f", "--files", help="List of input file(s)", type=str, nargs="+")
+    parser.add_argument("-v", "--verbose", help="verbose mode", action="store_true")
     args = parser.parse_args()
-               
+
     infiles = args.files
     li = len(infiles)
 
     if li > 0:
         if args.verbose:
             print("## merge_statfiles.py")
-            print("## Merging "+ str(li)+" files")
- 
+            print("## Merging " + str(li) + " files")
+
         ## Reading first file to get the template
         template = OrderedDict()
         if args.verbose:
-            print("## Use "+infiles[0]+" as template")
+            print("## Use " + infiles[0] + " as template")
         with open(infiles[0]) as f:
             for line in f:
                 if not line.startswith("#"):
                     lsp = line.strip().split("\t")
-                    data = map(num, lsp[1:len(lsp)])
+                    data = map(num, lsp[1 : len(lsp)])
                     template[str(lsp[0])] = list(data)
-                
+
         if len(template) == 0:
             print("Cannot find template files !")
             sys.exit(1)
@@ -63,20 +64,21 @@ if __name__ == "__main__":
                         if lsp[0] in template:
                             for i in list(range(1, len(lsp))):
                                 if isinstance(num(lsp[i]), int):
-                                    template[lsp[0]][i-1] += num(lsp[i])
+                                    template[lsp[0]][i - 1] += num(lsp[i])
                                 else:
-                                    template[lsp[0]][i-1] = round((template[lsp[0]][i-1] + num(lsp[i]))/2,3)
+                                    template[lsp[0]][i - 1] = round((template[lsp[0]][i - 1] + num(lsp[i])) / 2, 3)
                         else:
-                            sys.stderr.write("Warning : '"+lsp[0]+"' not found in template ["+infiles[fidx]+"]\n")
-                            
+                            sys.stderr.write(
+                                "Warning : '" + lsp[0] + "' not found in template [" + infiles[fidx] + "]\n"
+                            )
+
         ## Print template
         for x in template:
             sys.stdout.write(x)
             for y in template[x]:
-                sys.stdout.write("\t"+str(y))
+                sys.stdout.write("\t" + str(y))
             sys.stdout.write("\n")
 
     else:
         print("No files to merge - stop")
         sys.exit(1)
-
-- 
GitLab