diff --git a/hi-classifier/classify.py b/hi-classifier/classify.py index f69d72bc788de2ca66aecadfa79d747c49107751..3cc610100722a3d63f160901498aa3eb68698797 100644 --- a/hi-classifier/classify.py +++ b/hi-classifier/classify.py @@ -55,6 +55,8 @@ def categorize(args, logging): input_queue = Queue() Header_Queue = Queue() output_queue = Queue() + Counts_Dang_queue = Queue() + Counts_Biot_queue = Queue() write_processes, compute_processes = partitionning(num_threads) # communicate(write_processes=write_processes, compute_processes=compute_processes) @@ -83,6 +85,8 @@ def categorize(args, logging): args=( input_queue, output_queue, + Counts_Dang_queue, + Counts_Biot_queue, mapq_score, dict_digested, ligation_site_list, @@ -92,6 +96,17 @@ def categorize(args, logging): for _ in range(compute_processes) ] + # Process for processing items + [ + manager.start_worker( + target= + args=( + + ), + ) + for _ in range(compute_processes) + ] + # Process for writing pairs manager.start_worker( target=write_bam_pair, diff --git a/hi-classifier/event_collector.py b/hi-classifier/event_collector.py new file mode 100644 index 0000000000000000000000000000000000000000..417ee75281a7518282628312c7796ff821e01132 --- /dev/null +++ b/hi-classifier/event_collector.py @@ -0,0 +1,111 @@ + +from readmanager import PairedRead + + +def update_restriction_counts(paired_read, label, counts_VL, counts_DL): + """ + Met à jour le dictionnaire 'counts' avec le comptage des sites de restriction + pour la paire de lectures 'paired_read', selon le label de classification. + + - Si le label est dans {"Up", "Down", "Close", "Joined", "Self-Circle"}, + on incrémente le compteur pour les sites re_1 et re_2. + - Si le label contient "DLLeft", on incrémente le compteur pour re_proximal_1. + - Si le label contient "DLRight", on incrémente le compteur pour re_proximal_2. + - Si le label contient "DLBoth", on incrémente les compteurs pour les deux. + + Le dictionnaire 'counts' est organisé de la façon suivante : + { chromosome: { site_position: count, ... } } + + Parameters: + paired_read: Objet (de type PairedRead) qui possède les attributs : + - chrom_1 (et chrom_2, supposés identiques ici) + - re_1 et re_2: tuples (site_num, position) + - re_proximal_1 et re_proximal_2: tuples (site_num, position) + label (str): Le label de classification obtenu (ex: "Up", "Close-DLLeft", etc.) + counts (dict): Dictionnaire de compteurs à mettre à jour. + + Returns: + dict: Le dictionnaire mis à jour. + + Examples: + >>> restr_index = { + ... "chr1": { + ... "sites": [ + ... (1, 100), + ... (2, 200), + ... (3, 300), + ... (4, 400) + ... ], + ... "fragments": [ + ... (1, (0, 100)), + ... (2, (100, 200)), + ... (3, (200, 300)), + ... (4, (300, 400)), + ... (5, (400, 500)) + ... ] + ... } + ... } + >>> from readmanager import PairedRead + >>> from positionfrag import annotate_paired_read + >>> read1 = ("r1", 16, "chr1", 101, 30, "50M", None, None, 0, "ACGT") + >>> read2 = ("r2", 0, "chr1", 300, 30, "50M", None, None, 0, "ACGT") + >>> pr = PairedRead(read1, read2) + >>> annotate_paired_read(pr, restr_index) + >>> label = Classified_inter(pr, tolerance=3) + >>> counts_VL = {} + >>> counts_DL = {} + >>> cvl, cdl = update_restriction_counts(paired_read, label, counts_VL, counts_DL) + >>> cvl + {} + >>> cdl + {} + >>> cvl, cdl = update_restriction_counts(paired_read, label, counts_VL, counts_DL) + >>> cvl + {} + >>> cdl + {} + >>> cvl, cdl = update_restriction_counts(paired_read, label, counts_VL, counts_DL) + >>> cvl + {} + >>> cdl + {} + >>> read1 = ("r1", 16, "chr1", 305, 30, "50M", None, None, 0, "ACGT") + >>> read2 = ("r2", 0, "chr1", 135, 30, "50M", None, None, 0, "ACGT") + >>> pr = PairedRead(read1, read2) + >>> annotate_paired_read(pr, restr_index) + >>> label = Classified_inter(pr, tolerance=3) + >>> cvl, cdl = update_restriction_counts(paired_read, label, counts_VL, counts_DL) + >>> cvl + {} + >>> cdl + {} + """ + # On suppose que les lectures sont sur le même chromosome + chrom = paired_read.chrom_1 + if chrom not in counts: + counts[chrom] = {} + # Si label indique un contact valide, on incrémente re_1 et re_2 + if label in {"Up", "Down", "Close", "Joined", "SelfCircle"}: + for site in [paired_read.re_1, paired_read.re_2]: + if site is not None: + pos = site[1] + counts_VL[chrom][pos] = counts_VL[chrom].get(pos, 0) + 1 + # Si label contient "DanglingLeft", on incrémente re_proximal_1 + if "DLLeft" in label: + if paired_read.re_proximal_1 is not None: + pos = paired_read.re_proximal_1[1] + counts_DL[chrom][pos] = counts_DL[chrom].get(pos, 0) + 1 + # Si label contient "DanglingRight", on incrémente re_proximal_2 + if "DLRight" in label: + if paired_read.re_proximal_2 is not None: + pos = paired_read.re_proximal_2[1] + counts_DL[chrom][pos] = counts_DL[chrom].get(pos, 0) + 1 + # Si label contient "DanglingBoth", on incrémente pour les deux + if "DLBoth" in label: + if paired_read.re_proximal_1 is not None: + pos = paired_read.re_proximal_1[1] + counts_DL[chrom][pos] = counts_DL[chrom].get(pos, 0) + 1 + if paired_read.re_proximal_2 is not None: + pos = paired_read.re_proximal_2[1] + counts_DL[chrom][pos] = counts_DL[chrom].get(pos, 0) + 1 + return counts_VL, counts_DL diff --git a/hi-classifier/filter_interchrom.py b/hi-classifier/filter_interchrom.py new file mode 100644 index 0000000000000000000000000000000000000000..de10b9ddd13943bb8f0422ef14cd6bc4130608dd --- /dev/null +++ b/hi-classifier/filter_interchrom.py @@ -0,0 +1,207 @@ +from filter_intrachrom import ( + condition_dangling_both, + condition_dangling_left, + condition_dangling_right, + condition_orientation_convergent, + condition_orientation_divergent, + condition_orientation_identical_down, + condition_orientation_identical_up, +) + + +def compute_mask_inter(paired_read, tolerance=3): + """ + Calcule un mask binaire (dictionnaire de booléens) pour la paire de lectures. + Ce mask contient les conditions évaluées pour : + - Les fragments (simple ou multiple) : Identical, Adjacent, Neither + - L'orientation : Convergent, Divergent, Identical + - La présence de dangling (gauche et droite) + + + Évalue un ensemble de conditions sur le PairedRead et retourne un + dictionnaire de booléens indiquant si chaque condition est remplie. + + Parameters: + paired_read (PairedRead): La paire de lectures à analyser. + tolerance (int): Tolérance utilisée pour les conditions de type 'dangling'. + + Returns: + Dict[str, bool]: Un dictionnaire contenant les résultats des conditions, + par exemple: + { + "orientation_convergent": ..., + "orientation_divergent": ..., + "orientation_identical_up": ..., + "orientation_identical_down": ..., + "dangling_left": ..., + "dangling_right": ..., + "dangling_both": ... + } + + Examples: + >>> restr_index = { + ... "chr1": { + ... "sites": [ + ... (1, 100), + ... (2, 200), + ... (3, 300), + ... (4, 400) + ... ], + ... "fragments": [ + ... (1, (0, 100)), + ... (2, (100, 200)), + ... (3, (200, 300)), + ... (4, (300, 400)), + ... (5, (400, 500)) + ... ] + ... } + ... } + >>> from readmanager import PairedRead + >>> from positionfrag import annotate_paired_read + >>> read1 = ("r1", 0, "chr1", 120, 30, "50M", None, None, 0, "ACGT") + >>> read2 = ("r2", 0, "chr1", 210, 30, "50M", None, None, 0, "ACGT") + >>> pr = PairedRead(read1, read2) + >>> annotate_paired_read(pr, restr_index) + >>> compute_mask_inter(pr, tolerance=3) + {'orientation_convergent': False, 'orientation_divergent': False, 'orientation_identical_up': True, 'orientation_identical_down': False, 'dangling_left': False, 'dangling_right': False, 'dangling_both': False} + + """ + + mask = {} + + # -- Basé sur l'orientation -- + mask["orientation_convergent"] = condition_orientation_convergent( + paired_read + ) + mask["orientation_divergent"] = condition_orientation_divergent( + paired_read + ) + mask["orientation_identical_up"] = condition_orientation_identical_up( + paired_read + ) + mask["orientation_identical_down"] = condition_orientation_identical_down( + paired_read + ) + + # -- Basé sur le "dangling" (sites de restriction proches) -- + mask["dangling_left"] = condition_dangling_left(paired_read, tolerance) + mask["dangling_right"] = condition_dangling_right(paired_read, tolerance) + mask["dangling_both"] = condition_dangling_both(paired_read, tolerance) + + return mask + + +def Classified_inter(paired_read, tolerance=3): + """ + Classifie la paire de lectures (paired_read) selon les catégories de Contact valide + et ajoute, en information secondaire, la présence de dangling (DLLeft, DLRight, DLBoth). + + Priorité principale (orientation) : + - Si orientation_divergent: "Close" + - Si orientation_identical_up: "Up" + - Si orientation_convergent: "Far" + - Si orientation_identical_down: "Down" + - Sinon: "Other" + + Ensuite, si une condition de dangling est vraie, le label est complété par un suffixe + indiquant la ou les conditions (par exemple, "DLLeft", "DLRight" ou "DLBoth"). + + Returns: + str: Le label de classification, par exemple "Close-DLLeft" ou "Far" etc. + + Examples: + >>> restr_index = { + ... "chr1": { + ... "sites": [ + ... (1, 100), + ... (2, 200), + ... (3, 300), + ... (4, 400) + ... ], + ... "fragments": [ + ... (1, (0, 100)), + ... (2, (100, 200)), + ... (3, (200, 300)), + ... (4, (300, 400)), + ... (5, (400, 500)) + ... ] + ... } + ... } + >>> from readmanager import PairedRead + >>> from positionfrag import annotate_paired_read + >>> read1 = ("r1", 16, "chr1", 121, 30, "50M", None, None, 0, "ACGT") + >>> read2 = ("r2", 0, "chr1", 310, 30, "50M", None, None, 0, "ACGT") + >>> pr = PairedRead(read1, read2) + >>> annotate_paired_read(pr, restr_index) + >>> Classified_inter(pr, tolerance=3) + 'Close-None' + >>> # Test "Up" sans dangling + >>> read1 = ("r1", 0, "chr1", 121, 30, "50M", None, None, 0, "ACGT") + >>> read2 = ("r2", 0, "chr1", 310, 30, "50M", None, None, 0, "ACGT") + >>> pr = PairedRead(read1, read2) + >>> annotate_paired_read(pr, restr_index) + >>> Classified_inter(pr, tolerance=3) + 'Up-None' + >>> # Test "Far" sans dangling + >>> read1 = ("r1", 0, "chr1", 121, 30, "50M", None, None, 0, "ACGT") + >>> read2 = ("r2", 16, "chr1", 310, 30, "50M", None, None, 0, "ACGT") + >>> pr = PairedRead(read1, read2) + >>> annotate_paired_read(pr, restr_index) + >>> Classified_inter(pr, tolerance=3) + 'Far-None' + >>> # Test "Down" sans dangling + >>> read1 = ("r1", 16, "chr1", 121, 30, "50M", None, None, 0, "ACGT") + >>> read2 = ("r2", 16, "chr1", 310, 30, "50M", None, None, 0, "ACGT") + >>> pr = PairedRead(read1, read2) + >>> annotate_paired_read(pr, restr_index) + >>> Classified_inter(pr, tolerance=3) + 'Down-None' + >>> # Test ajout du suffixe dangling : simulons un cas "Close" avec dangling_left + >>> read1 = ("r1", 16, "chr1", 101, 30, "50M", None, None, 0, "ACGT") + >>> read2 = ("r2", 0, "chr1", 310, 30, "50M", None, None, 0, "ACGT") + >>> pr = PairedRead(read1, read2) + >>> annotate_paired_read(pr, restr_index) + >>> Classified_inter(pr, tolerance=3) + 'Close-DLLeft' + >>> # Test suffixe dangling_both : prioritaire sur les autres + >>> read1 = ("r1", 16, "chr1", 100, 30, "50M", None, None, 0, "ACGT") + >>> read2 = ("r2", 0, "chr1", 300, 30, "50M", None, None, 0, "ACGT") + >>> pr = PairedRead(read1, read2) + >>> annotate_paired_read(pr, restr_index) + >>> Classified_inter(pr, tolerance=3) + 'Close-DLBoth' + >>> # Test suffixe dangling_right seul + >>> read1 = ("r1", 0, "chr1", 120, 30, "50M", None, None, 0, "ACGT") + >>> read2 = ("r2", 16, "chr1", 300, 30, "50M", None, None, 0, "ACGT") + >>> pr = PairedRead(read1, read2) + >>> annotate_paired_read(pr, restr_index) + >>> Classified_inter(pr, tolerance=3) + 'Far-DLRight' + """ + mask = compute_mask_inter(paired_read, tolerance) + main_label = [] + + # Détermination du label principal selon l'orientation + if mask["orientation_divergent"]: + main_label = "Close" + elif mask["orientation_identical_up"]: + main_label = "Up" + elif mask["orientation_convergent"]: + main_label = "Far" + elif mask["orientation_identical_down"]: + main_label = "Down" + else: + main_label = "Erreur_Algorythmique" + + # Informations secondaires sur le dangling + dl_labels = [] + if mask["dangling_both"]: + dl_labels = "DLBoth" + elif mask["dangling_left"]: + dl_labels = "DLLeft" + elif mask["dangling_right"]: + dl_labels = "DLRight" + else: + dl_labels = "None" + + return main_label + "-" + dl_labels diff --git a/hi-classifier/filter_intrachrom.py b/hi-classifier/filter_intrachrom.py new file mode 100644 index 0000000000000000000000000000000000000000..20c18866e95af2fd09ec23b6c49c9635460b7bdc --- /dev/null +++ b/hi-classifier/filter_intrachrom.py @@ -0,0 +1,839 @@ +############################################ +# CONDITIONS BASÉES SUR LES FRAGMENTS SIMPLES +############################################ + + +def condition_single_fragment(paired_read): + """ + Renvoie True si chaque read est assigné à UN SEUL fragment. + + Examples: + >>> from readmanager import PairedRead + >>> # On crée un PairedRead factice + >>> read1 = ("r1", 0, "chr1", 100, 30, "50M", None, None, 0, "ACGT") + >>> read2 = ("r2", 0, "chr1", 200, 30, "50M", None, None, 0, "ACGT") + >>> pr = PairedRead(read1, read2) + >>> pr.fragment_1 = [(3, (100,200))] + >>> pr.fragment_2 = [(4, (201,300))] + >>> condition_single_fragment(pr) + True + + >>> pr.fragment_1 = [(3, (100,200))] + >>> pr.fragment_2 = [(4, (201,300)), (5, (301, 400))] + >>> condition_single_fragment(pr) + False + """ + return (len(paired_read.fragment_1) == 1) and ( + len(paired_read.fragment_2) == 1 + ) + + +def condition_frag_identical(paired_read): + """ + Pour une paire de lectures sur un fragment simple, + renvoie True si le numéro du fragment de read1 est égal à celui de read2. + + Examples: + >>> from readmanager import PairedRead + >>> read1 = ("r1", 0, "chr1", 100, 30, "50M", None, None, 0, "ACGT") + >>> read2 = ("r2", 0, "chr1", 200, 30, "50M", None, None, 0, "ACGT") + >>> pr = PairedRead(read1, read2) + >>> pr.fragment_1 = [(3, (100,200))] + >>> pr.fragment_2 = [(3, (201,300))] # même numéro de fragment : 3 + >>> condition_frag_identical(pr) + True + >>> pr.fragment_2 = [(4, (201,300))] + >>> condition_frag_identical(pr) + False + """ + if condition_single_fragment(paired_read): + frag1_num = paired_read.fragment_1[0][0] + frag2_num = paired_read.fragment_2[0][0] + return frag1_num == frag2_num + return False + + +def condition_frag_adjacent(paired_read): + """ + Pour une paire de lectures sur un fragment simple, + renvoie True si les numéros des fragments diffèrent exactement de 1. + + Examples: + >>> from readmanager import PairedRead + >>> read1 = ("r1", 0, "chr1", 100, 30, "50M", None, None, 0, "ACGT") + >>> read2 = ("r2", 0, "chr1", 300, 30, "50M", None, None, 0, "ACGT") + >>> pr = PairedRead(read1, read2) + >>> pr.fragment_1 = [(10, (100,200))] + >>> pr.fragment_2 = [(9, (201,300))] + >>> condition_frag_adjacent(pr) + True + >>> pr.fragment_2 = [(11, (201,300))] + >>> condition_frag_adjacent(pr) + True + >>> pr.fragment_2 = [(12, (201,300))] + >>> condition_frag_adjacent(pr) + False + """ + if condition_single_fragment(paired_read): + frag1_num = paired_read.fragment_1[0][0] + frag2_num = paired_read.fragment_2[0][0] + return abs(frag1_num - frag2_num) == 1 + return False + + +def condition_frag_neither(paired_read): + """ + Pour une paire sur un fragment simple, + renvoie True si les fragments ne sont ni identiques ni adjacents. + + Examples: + >>> from readmanager import PairedRead + >>> read1 = ("r1", 0, "chr1", 100, 30, "50M", None, None, 0, "ACGT") + >>> read2 = ("r2", 0, "chr1", 300, 30, "50M", None, None, 0, "ACGT") + >>> pr = PairedRead(read1, read2) + >>> pr.fragment_1 = [(10, (100,200))] + >>> pr.fragment_2 = [(12, (201,300))] + >>> condition_frag_neither(pr) + True + >>> pr.fragment_2 = [(10, (201,300))] + >>> condition_frag_neither(pr) + False + """ + if condition_single_fragment(paired_read): + frag1_num = paired_read.fragment_1[0][0] + frag2_num = paired_read.fragment_2[0][0] + return (frag1_num != frag2_num) and (abs(frag1_num - frag2_num) != 1) + return False + + +############################################ +# CONDITIONS BASÉES SUR LES FRAGMENTS MULTIPLES +############################################ + + +def condition_multiple_fragment(paired_read): + """ + Renvoie True si chaque read est assigné à UN SEUL fragment. + + Examples: + >>> from readmanager import PairedRead + >>> read1 = ("r1", 0, "chr1", 100, 30, "50M", None, None, 0, "ACGT") + >>> read2 = ("r2", 0, "chr1", 200, 30, "50M", None, None, 0, "ACGT") + >>> pr = PairedRead(read1, read2) + >>> pr.fragment_1 = [(3, (100,200)), (4, (200,300))] + >>> pr.fragment_2 = [(5, (300,400))] + >>> condition_multiple_fragment(pr) + True + >>> pr.fragment_1 = [(3, (100,200))] + >>> condition_multiple_fragment(pr) + False + """ + return (len(paired_read.fragment_1) != 1) or ( + len(paired_read.fragment_2) != 1 + ) + + +def condition_frag_multiple_identical(paired_read): + """ + Pour une paire non simple, renvoie True si au moins un fragment de read1 + est identique à au moins un fragment de read2. + + Examples: + >>> from readmanager import PairedRead + >>> read1 = ("r1", 0, "chr1", 100, 30, "50M", None, None, 0, "ACGT") + >>> read2 = ("r2", 0, "chr1", 200, 30, "50M", None, None, 0, "ACGT") + >>> pr = PairedRead(read1, read2) + >>> pr.fragment_1 = [(2, (100,200)), (3, (200,300))] + >>> pr.fragment_2 = [(1, (50,99)), (3, (200,300))] + >>> # pr est "non simple" (fragment_1 a 2 éléments) + >>> condition_frag_multiple_identical(pr) + True + >>> pr.fragment_2 = [(1, (50,99)), (4, (301,400))] + >>> condition_frag_multiple_identical(pr) + False + """ + if not condition_single_fragment(paired_read): + for frag1 in paired_read.fragment_1: + for frag2 in paired_read.fragment_2: + if frag1[0] == frag2[0]: + return True + return False + + +def condition_frag_multiple_adjacent(paired_read): + """ + Pour une paire non simple, renvoie True si TOUTES les combinaisons de fragments + entre read1 et read2 sont adjacentes (différence de 1). + + Examples: + >>> from readmanager import PairedRead + >>> read1 = ("r1", 0, "chr1", 100, 30, "50M", None, None, 0, "ACGT") + >>> read2 = ("r2", 0, "chr1", 200, 30, "50M", None, None, 0, "ACGT") + >>> pr = PairedRead(read1, read2) + >>> pr.fragment_1 = [(2, (100,200)), (3, (200,300))] + >>> pr.fragment_2 = [(2, (100,200)), (4, (301,400))] + >>> condition_frag_multiple_adjacent(pr) + True + >>> pr.fragment_2 = [(1, (50,99))] + >>> condition_frag_multiple_adjacent(pr) + True + >>> pr.fragment_2 = [(5, (401,500))] + >>> condition_frag_multiple_adjacent(pr) + False + """ + if not condition_single_fragment(paired_read): + for frag1 in paired_read.fragment_1: + for frag2 in paired_read.fragment_2: + if abs(frag1[0] - frag2[0]) <= 1: + return True + return False + return False + + +def condition_frag_multiple_neither(paired_read): + """ + Pour une paire non simple, renvoie True si aucun des fragments de read1 n'est + voisin d'aucun fragment de read2 (ni identique, ni adjacents). + + Examples: + >>> from readmanager import PairedRead + >>> read1 = ("r1", 0, "chr1", 100, 30, "50M", None, None, 0, "ACGT") + >>> read2 = ("r2", 0, "chr1", 200, 30, "50M", None, None, 0, "ACGT") + >>> pr = PairedRead(read1, read2) + >>> pr.fragment_1 = [(2, (100,200)), (3, (200,300))] + >>> pr.fragment_2 = [(5, (400,500)), (6, (501,600))] + >>> condition_frag_multiple_neither(pr) + True + >>> pr.fragment_2 = [(3, (200,300)), (5, (400,500))] + >>> condition_frag_multiple_neither(pr) + False + """ + if not condition_single_fragment(paired_read): + for frag1 in paired_read.fragment_1: + for frag2 in paired_read.fragment_2: + if (frag1[0] == frag2[0]) or (abs(frag1[0] - frag2[0]) == 1): + return False + return True + return False + + +############################################ +# CONDITIONS BASÉES SUR L'ORIENTATION +############################################ + + +def condition_orientation_convergent(paired_read): + """ + Convergence : read1 en '+' et read2 en '-'. + + Examples: + >>> from readmanager import PairedRead + >>> read1 = ("r1", 0, "chr1", 100, 30, "50M", None, None, 0, "ACGT") + >>> read2 = ("r2", 0x10, "chr1", 200, 30, "50M", None, None, 0, "ACGT") # 0x10 => brin négatif + >>> pr = PairedRead(read1, read2) + >>> condition_orientation_convergent(pr) + True + >>> condition_orientation_divergent(pr) + False + """ + return paired_read.strand_1 == "+" and paired_read.strand_2 == "-" + + +def condition_orientation_divergent(paired_read): + """ + Divergence : read1 en '-' et read2 en '+'. + + Examples: + >>> from readmanager import PairedRead + >>> read1 = ("r1", 0x10, "chr1", 100, 30, "50M", None, None, 0, "ACGT") + >>> read2 = ("r2", 0, "chr1", 200, 30, "50M", None, None, 0, "ACGT") + >>> pr = PairedRead(read1, read2) + >>> condition_orientation_divergent(pr) + True + >>> condition_orientation_convergent(pr) + False + """ + return paired_read.strand_1 == "-" and paired_read.strand_2 == "+" + + +def condition_orientation_identical_up(paired_read): + """ + Identique : les deux reads ont le même brin, '+'. + + Examples: + >>> from readmanager import PairedRead + >>> read1 = ("r1", 0, "chr1", 100, 30, "50M", None, None, 0, "ACGT") + >>> read2 = ("r2", 0, "chr1", 200, 30, "50M", None, None, 0, "ACGT") + >>> pr = PairedRead(read1, read2) + >>> condition_orientation_identical_up(pr) + True + >>> condition_orientation_identical_down(pr) + False + """ + return (paired_read.strand_1 == "+") and (paired_read.strand_2 == "+") + + +def condition_orientation_identical_down(paired_read): + """ + Identique : les deux reads ont le même brin, '-'. + + Examples: + >>> from readmanager import PairedRead + >>> read1 = ("r1", 0x10, "chr1", 100, 30, "50M", None, None, 0, "ACGT") + >>> read2 = ("r2", 0x10, "chr1", 200, 30, "50M", None, None, 0, "ACGT") + >>> pr = PairedRead(read1, read2) + >>> condition_orientation_identical_down(pr) + True + """ + return (paired_read.strand_1 == "-") and (paired_read.strand_2 == "-") + + +############################################ +# CONDITIONS BASÉES SUR LES SITES DE RESTRICTION +############################################ + + +def condition_dangling_left(paired_read, TOLERANCE): + """ + Dangling Left : le début du read1 doit être sur un site de restriction. + Ici, on suppose que paired_read.re_1 contient la position du site de restriction assigné à read1. + + Examples: + >>> from readmanager import PairedRead + >>> read1 = ("r1", 0, "chr1", 105, 30, "50M", None, None, 0, "ACGT") + >>> read2 = ("r2", 0, "chr1", 200, 30, "50M", None, None, 0, "ACGT") + >>> pr = PairedRead(read1, read2) + >>> pr.re_proximal_1 = (1, 100) # site de restriction à 100 + >>> condition_dangling_left(pr, TOLERANCE=10) + True + >>> condition_dangling_left(pr, TOLERANCE=4) + False + """ + return abs(paired_read.start_1 - paired_read.re_proximal_1[1]) <= TOLERANCE + + +def condition_dangling_right(paired_read, TOLERANCE): + """ + Dangling Right : le début du read2 doit être sur un site de restriction. + + Examples: + >>> from readmanager import PairedRead + >>> read1 = ("r1", 0, "chr1", 100, 30, "50M", None, None, 0, "ACGT") + >>> read2 = ("r2", 0, "chr1", 208, 30, "50M", None, None, 0, "ACGT") + >>> pr = PairedRead(read1, read2) + >>> pr.re_proximal_2 = (1, 210) + >>> condition_dangling_right(pr, TOLERANCE=5) + True + >>> condition_dangling_right(pr, TOLERANCE=1) + False + """ + return abs(paired_read.start_2 - paired_read.re_proximal_2[1]) <= TOLERANCE + + +def condition_dangling_both(paired_read, TOLERANCE): + """ + Dangling Both : les deux reads doivent être sur des sites de restriction. + + Examples: + >>> from readmanager import PairedRead + >>> read1 = ("r1", 0, "chr1", 105, 30, "50M", None, None, 0, "ACGT") + >>> read2 = ("r2", 16, "chr1", 208, 30, "50M", None, None, 0, "ACGT") + >>> pr = PairedRead(read1, read2) + >>> pr.re_proximal_1 = (1, 100) + >>> pr.re_proximal_2 = (2, 210) + >>> condition_dangling_both(pr, TOLERANCE=5) + True + >>> condition_dangling_both(pr, TOLERANCE=1) + False + """ + return condition_dangling_left( + paired_read, TOLERANCE + ) and condition_dangling_right(paired_read, TOLERANCE) + + +############################################ +# CONDITIONS BASÉES SUR LES SITES DE RESTRICTION +############################################ + + +def condition_identical_RE(paired_read): + """ + Identical RE : les deux reads doivent se rapporter au même site de restriction. + + Examples: + >>> from readmanager import PairedRead + >>> read1 = ("r1", 0, "chr1", 100, 30, "50M", None, None, 0, "ACGT") + >>> read2 = ("r2", 0, "chr1", 200, 30, "50M", None, None, 0, "ACGT") + >>> pr = PairedRead(read1, read2) + >>> pr.re_1 = (10, 100) # (site_num, position) + >>> pr.re_2 = (10, 200) + >>> condition_identical_RE(pr) + True + >>> pr.re_2 = (11, 200) + >>> condition_identical_RE(pr) + False + """ + return paired_read.re_1[0] == paired_read.re_2[0] + + +def condition_adjacent_RE(paired_read): + """ + Adjacent RE : les deux reads doivent se rapporter au même site de restriction. + + Examples: + >>> from readmanager import PairedRead + >>> read1 = ("r1", 0, "chr1", 100, 30, "50M", None, None, 0, "ACGT") + >>> read2 = ("r2", 0, "chr1", 200, 30, "50M", None, None, 0, "ACGT") + >>> pr = PairedRead(read1, read2) + >>> pr.re_1 = (10, 100) + >>> pr.re_2 = (11, 200) + >>> condition_adjacent_RE(pr) + True + >>> pr.re_2 = (12, 200) + >>> condition_adjacent_RE(pr) + False + """ + return abs(paired_read.re_1[0] - paired_read.re_2[0]) == 1 + + +def condition_distant_RE(paired_read): + """ + Distant RE : les deux reads doivent se rapporter au même site de restriction. + + Examples: + >>> from readmanager import PairedRead + >>> read1 = ("r1", 0, "chr1", 100, 30, "50M", None, None, 0, "ACGT") + >>> read2 = ("r2", 0, "chr1", 200, 30, "50M", None, None, 0, "ACGT") + >>> pr = PairedRead(read1, read2) + >>> pr.re_1 = (10, 100) + >>> pr.re_2 = (13, 200) + >>> condition_distant_RE(pr) + True + >>> pr.re_2 = (11, 200) + >>> condition_distant_RE(pr) + False + """ + return abs(paired_read.re_1[0] - paired_read.re_2[0]) > 1 + + +def compute_mask(paired_read, tolerance=3): + """ + Calcule un mask binaire (dictionnaire de booléens) pour la paire de lectures. + Ce mask contient les conditions évaluées pour : + - Les fragments (simple ou multiple) : Identical, Adjacent, Neither + - L'orientation : Convergent, Divergent, Identical + - La présence de dangling (gauche et droite) + + + Évalue un ensemble de conditions sur le PairedRead et retourne un + dictionnaire de booléens indiquant si chaque condition est remplie. + + Parameters: + paired_read (PairedRead): La paire de lectures à analyser. + tolerance (int): Tolérance utilisée pour les conditions de type 'dangling'. + + Returns: + Dict[str, bool]: Un dictionnaire contenant les résultats des conditions, + par exemple: + { + "single_fragment": True/False, + "frag_identical": ..., + "frag_adjacent": ..., + "frag_neither": ..., + "multiple_fragment": ..., + "frag_multiple_identical": ..., + "frag_multiple_adjacent": ..., + "frag_multiple_neither": ..., + "orientation_convergent": ..., + "orientation_divergent": ..., + "orientation_identical_up": ..., + "orientation_identical_down": ..., + "dangling_left": ..., + "dangling_right": ..., + "dangling_both": ..., + "re_identical": ..., + "re_adjacent": ..., + "re_distant": ... + } + """ + mask = {} + + # -- Basé sur les fragments simples -- + mask["single_fragment"] = condition_single_fragment(paired_read) + mask["frag_identical"] = condition_frag_identical(paired_read) + mask["frag_adjacent"] = condition_frag_adjacent(paired_read) + mask["frag_neither"] = condition_frag_neither(paired_read) + + # -- Basé sur les fragments multiples -- + mask["multiple_fragment"] = condition_multiple_fragment(paired_read) + mask["frag_multiple_identical"] = condition_frag_multiple_identical( + paired_read + ) + mask["frag_multiple_adjacent"] = condition_frag_multiple_adjacent( + paired_read + ) + mask["frag_multiple_neither"] = condition_frag_multiple_neither( + paired_read + ) + + # -- Basé sur l'orientation -- + mask["orientation_convergent"] = condition_orientation_convergent( + paired_read + ) + mask["orientation_divergent"] = condition_orientation_divergent( + paired_read + ) + mask["orientation_identical_up"] = condition_orientation_identical_up( + paired_read + ) + mask["orientation_identical_down"] = condition_orientation_identical_down( + paired_read + ) + + # -- Basé sur le "dangling" (sites de restriction proches) -- + mask["dangling_left"] = condition_dangling_left(paired_read, tolerance) + mask["dangling_right"] = condition_dangling_right(paired_read, tolerance) + mask["dangling_both"] = condition_dangling_both(paired_read, tolerance) + + # -- Basé sur les sites de restriction (re_1, re_2) -- + mask["re_identical"] = condition_identical_RE(paired_read) + mask["re_adjacent"] = condition_adjacent_RE(paired_read) + mask["re_distant"] = condition_distant_RE(paired_read) + + return mask + + +def Classified_intra(paired_read, tolerance=3): + """ + Classifie la paire de lectures (paired_read) selon les catégories décrites. + Elle s'appuie sur la fonction compute_mask(paired_read, tolerance) + qui renvoie un dictionnaire de conditions booléennes. + + Ordre d'évaluation (prioritaire) : + 1) Contact valide et biotinylés (condition_distant_RE == True) + - Close : orientation_divergent, single_fragment => ni identique ni adjacent + - Up : orientation_identical_up, single_fragment => ni identique ni adjacent + - Far : orientation_convergent, single_fragment => ni identique ni adjacent + - Down : orientation_identical_down, single_fragment => ni identique ni adjacent + - Si fragment_multiple => on n'applique pas de condition supplémentaire dans cet exemple + + 2) Contact informatif et biotinylés (condition_distant_RE == False) + - SelfCircle : orientation_divergent + re_adjacent + single_fragment + frag_identical + - Joined : orientation_convergent + re_identical + * si frag_multiple => frag_multiple_adjacent ou frag_multiple_identical + * si frag_simple => frag_adjacent + + 3) Semi-biotinylés (single_fragment + frag_identical + orientation_convergent) + - Dangling Left : condition_dangling_left + - Dangling Right : condition_dangling_right + - Dangling Both : condition_dangling_both + + 4) Combinaisons (Joined × Dangling) - exemple + - Joined × Dangling Left : ... + - Joined × Dangling Right : ... + - Joined × Dangling Both : ... + (Ici on peut faire un label combiné ou un label distinct) + + 5) Divers => "Other" + + Returns: + str: Le label de classification (ex: "Close", "Joined", "Semi-Biot Right", etc.) + + Examples: + >>> from readmanager import PairedRead + >>> from positionfrag import annotate_paired_read + >>> restr_index = { + ... "chr1": { + ... "sites": [ + ... (1, 100), + ... (2, 200), + ... (3, 300), + ... (4, 400) + ... ], + ... "fragments": [ + ... (1, (0, 100)), + ... (2, (100, 200)), + ... (3, (200, 300)), + ... (4, (300, 400)), + ... (5, (400, 500)) + ... ] + ... } + ... } + >>> read1 = ("r1", 0, "chr1", 101, 30, "50M", None, None, 0, "ACGT") + >>> read2 = ("r2", 0, "chr1", 300, 30, "50M", None, None, 0, "ACGT") # brin négatif + >>> pr = PairedRead(read1, read2) + >>> # On annote pr avec restr_index + >>> annotate_paired_read(pr, restr_index) + >>> pr.re_1 + (2, 200) + >>> pr.strand_1 + '+' + >>> pr.start_1 + 101 + >>> pr.end_1 + 151 + >>> pr.fragment_1 + [(2, (100, 200))] + >>> pr.re_2 + (4, 400) + >>> pr.strand_2 + '+' + >>> pr.start_2 + 300 + >>> pr.end_2 + 350 + >>> pr.fragment_2 + [(4, (300, 400))] + >>> # On appelle maintenant Classified_intra + >>> label = Classified_intra(pr, tolerance=5) + >>> label + 'Up' + >>> ################################################################# + >>> # Exemple 1 : "Close" + >>> # Conditions : re_distant True, orientation_divergent True, + >>> # single_fragment True, frag_identical False, frag_adjacent False. + >>> read1 = ("r1", 16, "chr1", 256, 30, "50M", None, None, 0, "ACGT") + >>> read2 = ("r2", 0, "chr1", 326, 30, "50M", None, None, 0, "ACGT") # read2 sur '-' + >>> pr = PairedRead(read1, read2) + >>> annotate_paired_read(pr, restr_index) + >>> label = Classified_intra(pr, tolerance=5) + >>> label + 'Close' + >>> ################################################################# + >>> # Exemple 2 : "Up" + >>> read1 = ("r1", 0, "chr1", 42, 30, "50M", None, None, 0, "ACGT") + >>> read2 = ("r2", 0, "chr1", 265, 30, "50M", None, None, 0, "ACGT") + >>> pr = PairedRead(read1, read2) + >>> annotate_paired_read(pr, restr_index) + >>> label = Classified_intra(pr, tolerance=5) + >>> label + 'Up' + >>> ################################################################# + >>> # Exemple 3 : "Far" + >>> read1 = ("r1", 0, "chr1", 42, 30, "50M", None, None, 0, "ACGT") + >>> read2 = ("r2", 18, "chr1", 358, 30, "50M", None, None, 0, "ACGT") + >>> pr = PairedRead(read1, read2) + >>> annotate_paired_read(pr, restr_index) + >>> label = Classified_intra(pr, tolerance=5) + >>> label + 'Far' + >>> ################################################################# + # Exemple 4 : "Joined-Dangling-Left" + >>> read1 = ("r1", 0, "chr1", 102, 30, "50M", None, None, 0, "ACGT") + >>> read2 = ("r2", 16, "chr1", 291, 30, "70M", None, None, 0, "ACGT") + >>> pr = PairedRead(read1, read2) + >>> annotate_paired_read(pr, restr_index) + >>> label = Classified_intra(pr, tolerance=4) + >>> label + 'Joined-Dangling-Left' + >>> label = Classified_intra(pr, tolerance=1) + >>> label + 'Joined' + >>> label = Classified_intra(pr, tolerance=10) + >>> label + 'Joined-Dangling-Both' + >>> ################################################################# + >>> # Exemple 5 : "SelfCircle" + >>> read1 = ("r1", 48, "chr1", 140, 30, "32M", None, None, 0, "ACGT") + >>> read2 = ("r2", 0, "chr1", 151, 30, "45M", None, None, 0, "ACGT") + >>> pr = PairedRead(read1, read2) + >>> annotate_paired_read(pr, restr_index) + >>> label = Classified_intra(pr, tolerance=5) + >>> label + 'SelfCircle' + >>> ################################################################# + # Exemple 6 : "Joined" (informative) + >>> read1 = ("r1", 0, "chr1", 120, 30, "50M", None, None, 0, "ACGT") + >>> read2 = ("r2", 16, "chr1", 254, 30, "50M", None, None, 0, "ACGT") + >>> pr = PairedRead(read1, read2) + >>> annotate_paired_read(pr, restr_index) + >>> label = Classified_intra(pr, tolerance=5) + >>> label + 'Joined' + >>> ################################################################# + >>> # Exemple 7 : "DLLeft" + >>> read1 = ("r1", 0, "chr1", 110, 30, "50M", None, None, 0, "ACGT") + >>> read2 = ("r2", 16, "chr1", 185, 30, "30M", None, None, 0, "ACGT") + >>> pr = PairedRead(read1, read2) + >>> annotate_paired_read(pr, restr_index) + >>> label = Classified_intra(pr, tolerance=1) + >>> label + 'Other' + >>> label = Classified_intra(pr, tolerance=10) + >>> label + 'DLLeft' + >>> ################################################################# + >>> # Exemple 8 : "DLBoth" + >>> read1 = ("r1", 0, "chr1", 100, 30, "50M", None, None, 0, "ACGT") + >>> read2 = ("r2", 16, "chr1", 195, 30, "50M", None, None, 0, "ACGT") + >>> pr = PairedRead(read1, read2) + >>> annotate_paired_read(pr, restr_index) + >>> label = Classified_intra(pr, tolerance=5) + >>> label + 'DLBoth' + >>> ################################################################# + >>> # Exemple 9 : "Other" + >>> read1 = ("r1", 0, "chr1", 100, 30, "50M", None, None, 0, "ACGT") + >>> read2 = ("r2", 0, "chr1", 100, 30, "50M", None, None, 0, "ACGT") + >>> pr = PairedRead(read1, read2) + >>> annotate_paired_read(pr, restr_index) + >>> label = Classified_intra(pr, tolerance=5) + >>> label + 'Other' + """ + mask = compute_mask(paired_read, tolerance) + + # --------------------------------------------------------- + # 1) CONTACT VALIDE (re_distant == True) + # --------------------------------------------------------- + if mask["re_distant"]: + # On sous-classe ensuite selon l'orientation + # et le fait d'être single_fragment => ni identique ni adjacent + # NOTE : vous pouvez adapter la logique si fragment_multiple est autorisé. + + # => "non identique et non adjacent" => pas identical et pas adjacent + + if mask["orientation_divergent"]: + # => "Close" si single and not identical/adjacent + if mask["dangling_left"]: + return "Close-DLLeft" + elif mask["dangling_right"]: + return "Close-DLRight" + elif mask["dangling_both"]: + return "Close-DLBoth" + else: + return "Close" # par défaut + + elif mask["orientation_identical_up"]: + if mask["dangling_left"]: + return "Up-DLLeft" + elif mask["dangling_right"]: + return "Up-DLRight" + elif mask["dangling_both"]: + return "Up-DLBoth" + else: + return "Up" + + elif mask["orientation_convergent"]: + # => "Far" + if mask["dangling_left"]: + return "Far-DLLeft" + elif mask["dangling_right"]: + return "Far-DLRight" + elif mask["dangling_both"]: + return "Far-DLBoth" + else: + return "Far" + + elif mask["orientation_identical_down"]: + # => "Down" + if mask["dangling_left"]: + return "Down-DLLeft" + elif mask["dangling_right"]: + return "Down-DLRight" + elif mask["dangling_both"]: + return "Down-DLBoth" + else: + return "Down" + + # Si on ne matche aucune orientation "possible", on passe en "Erreur_Algorythmique" + return "Erreur_Algorythmique" # Amas Zingue ! + + # --------------------------------------------------------- + # 2) CONTACT INFORMATIF (re_distant == False) + # --------------------------------------------------------- + # a) SelfCircle: orientation_divergent + re_adjacent + single_fragment + frag_identical + if ( + mask["orientation_divergent"] + and mask["re_adjacent"] + and mask["single_fragment"] + and mask["frag_identical"] + ): + return "SelfCircle" + + # b) Joined : orientation_convergent + re_identical + if mask["orientation_convergent"] and mask["re_identical"]: + # Si fragment multiple => frag_multiple_adjacent ou frag_multiple_identical + if mask["multiple_fragment"] and ( + mask["frag_multiple_adjacent"] or mask["frag_multiple_identical"] + ): + return "Joined" + # Si single => adjacent + if mask["single_fragment"] and mask["frag_adjacent"]: + # Combinaison spécifique : + if mask["dangling_both"]: + return "Joined-DLBoth" + + elif mask["dangling_left"]: + return "Joined-DLLeft" + + elif mask["dangling_right"]: + return "Joined-DLRight" + + else: + return "Joined" + + # --------------------------------------------------------- + # 3) SEMI-BIOTINYLÉS : single + frag_identical + orientation_convergent + # + dangling + # --------------------------------------------------------- + if ( + mask["single_fragment"] + and mask["frag_identical"] + and mask["orientation_convergent"] + ): + # On regarde si c'est dangling left, right, both + dl = mask["dangling_left"] + dr = mask["dangling_right"] + db = mask["dangling_both"] # généralement = dl & dr + + if db: + return "DLBoth" + elif dl: + return "DLLeft" + elif dr: + return "DLRight" + else: + return "Other" # :o + + # --------------------------------------------------------- + # 4) DIVERS => "Other" + # --------------------------------------------------------- + return "Other" + + +""" +Contact valide et biotinylés : (condition première : condition_distant_RE) + Close : sens divergent + - fragment simple : non identique + - fragment multiple : pas de condition spécifique supplémentaire + Up : sens identical right + - fragment simple : ni identique, ni adjacent + - fragment multiple : non identique + Far : sens convergent + - fragment simple : ni identique, ni adjacent + - fragment multiple : non identique + Down : sens identical left + - fragment simple : ni identique, ni adjacent + - fragment multiple : non identique + +Contact informatif et biotinylés : + Self - Circle : sens divergent + condition_adjacent_RE + fragment simple et identitique + Joined : sens convergent + RE identique + - Si frag multiple : frag_multiple_adjacent or identique + - Si frag simple : adjacent + +Catégorie semi-biotinylés : (fragment simple + fragment identique) + Dangling Left : sens convergent + condition_dangling_left + Dangling Right : sens convergent condition_dangling_right + Dangling Both : sens convergent condition_dangling_both + +Combinaisons : + Joined x Dangling Left : toute les conditions de DL Left et de Joined combiné + Joined x Dangling Right : toute les conditions de DL Right et de Joined combiné + Joined x Dangling Both : toute les conditions de DL Both et de Joined combiné + +Divers : + Other + +""" diff --git a/hi-classifier/positionfrag.py b/hi-classifier/positionfrag.py index 00e6f0f53b81fb40ab86b16916b10378d9df170b..7ce4fcaee01be1fd2bcc3af2e895d533a710167f 100644 --- a/hi-classifier/positionfrag.py +++ b/hi-classifier/positionfrag.py @@ -217,7 +217,7 @@ def get_fragments_for_read(chrom, start, end, restr_index): return overlapping_fragments -def annotate_intra_paired_read(paired_read, restr_index): +def annotate_paired_read(paired_read, restr_index): """ Pour une instance de PairedRead, met à jour ses attributs avec les informations extraites à partir de l'index de restriction. @@ -250,7 +250,7 @@ def annotate_intra_paired_read(paired_read, restr_index): >>> read1 = ("r1", 0, "chr1", 149, 30, "50M", None, None, 0, "ACGT") >>> read2 = ("r2", 16, "chr1", 351, 30, "50M", None, None, 0, "ACGT") # brin négatif >>> pr = PairedRead(read1, read2) - >>> annotate_intra_paired_read(pr, restr_index) + >>> annotate_paired_read(pr, restr_index) >>> pr.re_1, pr.re_2 ((2, 200), (3, 300)) >>> pr.re_2 @@ -264,7 +264,7 @@ def annotate_intra_paired_read(paired_read, restr_index): >>> read1 = ("r1", 18, "chr1", 149, 30, "50M", None, None, 0, "ACGT") # brin négatif >>> read2 = ("r2", 16, "chr1", 349, 30, "50M", None, None, 0, "ACGT") # brin négatif >>> pr = PairedRead(read1, read2) - >>> annotate_intra_paired_read(pr, restr_index) + >>> annotate_paired_read(pr, restr_index) >>> pr.fragment_1 [(1, (0, 100)), (2, (100, 200))] >>> pr.fragment_2 diff --git a/hi-classifier/processing.py b/hi-classifier/processing.py index 7e2e9f411fb2721a8326932c22324eff6931d275..97192f70017b2c5fa02b84c6e0ffc6d1fca676af 100644 --- a/hi-classifier/processing.py +++ b/hi-classifier/processing.py @@ -1,6 +1,8 @@ def process_items( input_queue, output_queue, + Counts_Dang_queue, + Counts_Biot_queue, mapq_score, dict_digested, ligation_site_list, @@ -16,16 +18,25 @@ def process_items( import sys from auxiliary import check_data - from positionfrag import annotate_intra_paired_read + from event_collector import update_restriction_counts + from filter_interchrom import Classified_inter + from filter_intrachrom import Classified_intra + from positionfrag import annotate_paired_read from readmanager import PairedRead try: + Counts_Dangling = dict() + Counts_Biotin = dict() while True: # affichage = ok data = input_queue.get() if data is None: + Counts_Dang_queue.put(Counts_Dangling) + Counts_Biot_queue.put(Counts_Biotin) output_queue.put(None) + Counts_Dang_queue.put(None) + Counts_Biot_queue.put(None) break if check_data(data): @@ -37,18 +48,47 @@ def process_items( ): if current_pair.chrom_1 == current_pair.chrom_2: # Récupération des informations sur la paire de read - annotate_intra_paired_read(current_pair, dict_digested) + annotate_paired_read(current_pair, dict_digested) + label = Classified_intra(current_pair, tolerance=1) + # + # Pensez à contrôler la longueur ! + # + if label != "Other" + if len("0") == 1: + Counts_Biotin, Counts_Dangling = ( + update_restriction_counts( + current_pair, + label, + Counts_Biotin, + Counts_Dangling, + ) + ) - else: + output_queue.put( (label, [data[0], data[1]]) ) + + elif current_pair.interchrom: + annotate_paired_read(current_pair, dict_digested) + label = Classified_inter(current_pair, tolerance=1) # - # Pour le moment, il n'y a pas de classification pour les contacts extrachromosomique + # Pensez à contrôler la longueur ! # - # Idée : Prendre uniquement les catégories Up, Down... - # Et considérer toujours que la première lecture est issue - # du chr au chiffre le plus faible ! - pass + if label != "Other" + if len("0") == 1: + Counts_Biotin, Counts_Dangling = ( + update_restriction_counts( + current_pair, + label, + Counts_Biotin, + Counts_Dangling, + ) + ) + + output_queue.put( (label, [data[0], data[1]]) ) + + else: + raise ValueError("Chromosone Name's problems") + - output_queue.put([data[0], data[1]]) else: raise ValueError( diff --git a/hi-classifier/readmanager.py b/hi-classifier/readmanager.py index d4409ed195f02b0386f79eeb62a46aff856715c4..3839483d9a2b6c9ce8e88e4aec4b3ad4611c6534 100644 --- a/hi-classifier/readmanager.py +++ b/hi-classifier/readmanager.py @@ -175,6 +175,9 @@ class PairedRead: else: self.end_2 = self.start_2 - ref_len_2 + # Indique si c'est interchromosomique + self.interchrom = self.chrom_1 != self.chrom_2 + # --- Imposer l'ordre si même chromosome --- if ( self.chrom_1 == self.chrom_2 @@ -196,8 +199,9 @@ class PairedRead: if leftmost_2 < leftmost_1: self._swap_reads() - # Indique si c'est interchromosomique - self.interchrom = self.chrom_1 != self.chrom_2 + # --- Imposer l'ordre si différents chromosomes --- + if self.chrom_1 < self.chrom_2: + self._swap_reads() def _swap_reads(self): """ @@ -208,13 +212,21 @@ class PairedRead: >>> # => le code ne va pas faire le swap dans __init__ car chrom_1 != chrom_2 >>> from readmanager import PairedRead >>> read1 = ("r1", 0, "chr1", 500, 30, "50M", None, None, 0, "ACGT") - >>> read2 = ("r2", 16, "chr2", 100, 30, "50M", None, None, 0, "ACGT") + >>> read2 = ("r2", 16, "chr1", 100, 30, "50M", None, None, 0, "ACGT") >>> pr = PairedRead(read1, read2) >>> pr.query_name_1, pr.query_name_2 - ('r1', 'r2') + ('r2', 'r1') >>> pr._swap_reads() >>> pr.query_name_1, pr.query_name_2 + ('r1', 'r2') + >>> read1 = ("r1", 0, "chr2", 500, 30, "50M", None, None, 0, "ACGT") + >>> read2 = ("r2", 16, "chr3", 100, 30, "50M", None, None, 0, "ACGT") + >>> pr = PairedRead(read1, read2) + >>> pr.query_name_1, pr.query_name_2 ('r2', 'r1') + >>> pr._swap_reads() + >>> pr.query_name_1, pr.query_name_2 + ('r1', 'r2') """ # On échange : query_name, flag, chrom, start, end, mapq, cigar, strand (self.query_name_1, self.query_name_2) = ( diff --git a/hi-classifier/send_label.py b/hi-classifier/send_label.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391