Skip to content
Snippets Groups Projects
Commit f21e6654 authored by Bertache's avatar Bertache
Browse files

Fin de Classif, Debut Dico Event, Ecriture Non Commencé

parent 63699ea9
No related branches found
No related tags found
No related merge requests found
...@@ -55,6 +55,8 @@ def categorize(args, logging): ...@@ -55,6 +55,8 @@ def categorize(args, logging):
input_queue = Queue() input_queue = Queue()
Header_Queue = Queue() Header_Queue = Queue()
output_queue = Queue() output_queue = Queue()
Counts_Dang_queue = Queue()
Counts_Biot_queue = Queue()
write_processes, compute_processes = partitionning(num_threads) write_processes, compute_processes = partitionning(num_threads)
# communicate(write_processes=write_processes, compute_processes=compute_processes) # communicate(write_processes=write_processes, compute_processes=compute_processes)
...@@ -83,6 +85,8 @@ def categorize(args, logging): ...@@ -83,6 +85,8 @@ def categorize(args, logging):
args=( args=(
input_queue, input_queue,
output_queue, output_queue,
Counts_Dang_queue,
Counts_Biot_queue,
mapq_score, mapq_score,
dict_digested, dict_digested,
ligation_site_list, ligation_site_list,
...@@ -92,6 +96,17 @@ def categorize(args, logging): ...@@ -92,6 +96,17 @@ def categorize(args, logging):
for _ in range(compute_processes) for _ in range(compute_processes)
] ]
# Process for processing items
[
manager.start_worker(
target=
args=(
),
)
for _ in range(compute_processes)
]
# Process for writing pairs # Process for writing pairs
manager.start_worker( manager.start_worker(
target=write_bam_pair, target=write_bam_pair,
......
from readmanager import PairedRead
def update_restriction_counts(paired_read, label, counts_VL, counts_DL):
"""
Met à jour le dictionnaire 'counts' avec le comptage des sites de restriction
pour la paire de lectures 'paired_read', selon le label de classification.
- Si le label est dans {"Up", "Down", "Close", "Joined", "Self-Circle"},
on incrémente le compteur pour les sites re_1 et re_2.
- Si le label contient "DLLeft", on incrémente le compteur pour re_proximal_1.
- Si le label contient "DLRight", on incrémente le compteur pour re_proximal_2.
- Si le label contient "DLBoth", on incrémente les compteurs pour les deux.
Le dictionnaire 'counts' est organisé de la façon suivante :
{ chromosome: { site_position: count, ... } }
Parameters:
paired_read: Objet (de type PairedRead) qui possède les attributs :
- chrom_1 (et chrom_2, supposés identiques ici)
- re_1 et re_2: tuples (site_num, position)
- re_proximal_1 et re_proximal_2: tuples (site_num, position)
label (str): Le label de classification obtenu (ex: "Up", "Close-DLLeft", etc.)
counts (dict): Dictionnaire de compteurs à mettre à jour.
Returns:
dict: Le dictionnaire mis à jour.
Examples:
>>> restr_index = {
... "chr1": {
... "sites": [
... (1, 100),
... (2, 200),
... (3, 300),
... (4, 400)
... ],
... "fragments": [
... (1, (0, 100)),
... (2, (100, 200)),
... (3, (200, 300)),
... (4, (300, 400)),
... (5, (400, 500))
... ]
... }
... }
>>> from readmanager import PairedRead
>>> from positionfrag import annotate_paired_read
>>> read1 = ("r1", 16, "chr1", 101, 30, "50M", None, None, 0, "ACGT")
>>> read2 = ("r2", 0, "chr1", 300, 30, "50M", None, None, 0, "ACGT")
>>> pr = PairedRead(read1, read2)
>>> annotate_paired_read(pr, restr_index)
>>> label = Classified_inter(pr, tolerance=3)
>>> counts_VL = {}
>>> counts_DL = {}
>>> cvl, cdl = update_restriction_counts(paired_read, label, counts_VL, counts_DL)
>>> cvl
{}
>>> cdl
{}
>>> cvl, cdl = update_restriction_counts(paired_read, label, counts_VL, counts_DL)
>>> cvl
{}
>>> cdl
{}
>>> cvl, cdl = update_restriction_counts(paired_read, label, counts_VL, counts_DL)
>>> cvl
{}
>>> cdl
{}
>>> read1 = ("r1", 16, "chr1", 305, 30, "50M", None, None, 0, "ACGT")
>>> read2 = ("r2", 0, "chr1", 135, 30, "50M", None, None, 0, "ACGT")
>>> pr = PairedRead(read1, read2)
>>> annotate_paired_read(pr, restr_index)
>>> label = Classified_inter(pr, tolerance=3)
>>> cvl, cdl = update_restriction_counts(paired_read, label, counts_VL, counts_DL)
>>> cvl
{}
>>> cdl
{}
"""
# On suppose que les lectures sont sur le même chromosome
chrom = paired_read.chrom_1
if chrom not in counts:
counts[chrom] = {}
# Si label indique un contact valide, on incrémente re_1 et re_2
if label in {"Up", "Down", "Close", "Joined", "SelfCircle"}:
for site in [paired_read.re_1, paired_read.re_2]:
if site is not None:
pos = site[1]
counts_VL[chrom][pos] = counts_VL[chrom].get(pos, 0) + 1
# Si label contient "DanglingLeft", on incrémente re_proximal_1
if "DLLeft" in label:
if paired_read.re_proximal_1 is not None:
pos = paired_read.re_proximal_1[1]
counts_DL[chrom][pos] = counts_DL[chrom].get(pos, 0) + 1
# Si label contient "DanglingRight", on incrémente re_proximal_2
if "DLRight" in label:
if paired_read.re_proximal_2 is not None:
pos = paired_read.re_proximal_2[1]
counts_DL[chrom][pos] = counts_DL[chrom].get(pos, 0) + 1
# Si label contient "DanglingBoth", on incrémente pour les deux
if "DLBoth" in label:
if paired_read.re_proximal_1 is not None:
pos = paired_read.re_proximal_1[1]
counts_DL[chrom][pos] = counts_DL[chrom].get(pos, 0) + 1
if paired_read.re_proximal_2 is not None:
pos = paired_read.re_proximal_2[1]
counts_DL[chrom][pos] = counts_DL[chrom].get(pos, 0) + 1
return counts_VL, counts_DL
from filter_intrachrom import (
condition_dangling_both,
condition_dangling_left,
condition_dangling_right,
condition_orientation_convergent,
condition_orientation_divergent,
condition_orientation_identical_down,
condition_orientation_identical_up,
)
def compute_mask_inter(paired_read, tolerance=3):
"""
Calcule un mask binaire (dictionnaire de booléens) pour la paire de lectures.
Ce mask contient les conditions évaluées pour :
- Les fragments (simple ou multiple) : Identical, Adjacent, Neither
- L'orientation : Convergent, Divergent, Identical
- La présence de dangling (gauche et droite)
Évalue un ensemble de conditions sur le PairedRead et retourne un
dictionnaire de booléens indiquant si chaque condition est remplie.
Parameters:
paired_read (PairedRead): La paire de lectures à analyser.
tolerance (int): Tolérance utilisée pour les conditions de type 'dangling'.
Returns:
Dict[str, bool]: Un dictionnaire contenant les résultats des conditions,
par exemple:
{
"orientation_convergent": ...,
"orientation_divergent": ...,
"orientation_identical_up": ...,
"orientation_identical_down": ...,
"dangling_left": ...,
"dangling_right": ...,
"dangling_both": ...
}
Examples:
>>> restr_index = {
... "chr1": {
... "sites": [
... (1, 100),
... (2, 200),
... (3, 300),
... (4, 400)
... ],
... "fragments": [
... (1, (0, 100)),
... (2, (100, 200)),
... (3, (200, 300)),
... (4, (300, 400)),
... (5, (400, 500))
... ]
... }
... }
>>> from readmanager import PairedRead
>>> from positionfrag import annotate_paired_read
>>> read1 = ("r1", 0, "chr1", 120, 30, "50M", None, None, 0, "ACGT")
>>> read2 = ("r2", 0, "chr1", 210, 30, "50M", None, None, 0, "ACGT")
>>> pr = PairedRead(read1, read2)
>>> annotate_paired_read(pr, restr_index)
>>> compute_mask_inter(pr, tolerance=3)
{'orientation_convergent': False, 'orientation_divergent': False, 'orientation_identical_up': True, 'orientation_identical_down': False, 'dangling_left': False, 'dangling_right': False, 'dangling_both': False}
"""
mask = {}
# -- Basé sur l'orientation --
mask["orientation_convergent"] = condition_orientation_convergent(
paired_read
)
mask["orientation_divergent"] = condition_orientation_divergent(
paired_read
)
mask["orientation_identical_up"] = condition_orientation_identical_up(
paired_read
)
mask["orientation_identical_down"] = condition_orientation_identical_down(
paired_read
)
# -- Basé sur le "dangling" (sites de restriction proches) --
mask["dangling_left"] = condition_dangling_left(paired_read, tolerance)
mask["dangling_right"] = condition_dangling_right(paired_read, tolerance)
mask["dangling_both"] = condition_dangling_both(paired_read, tolerance)
return mask
def Classified_inter(paired_read, tolerance=3):
"""
Classifie la paire de lectures (paired_read) selon les catégories de Contact valide
et ajoute, en information secondaire, la présence de dangling (DLLeft, DLRight, DLBoth).
Priorité principale (orientation) :
- Si orientation_divergent: "Close"
- Si orientation_identical_up: "Up"
- Si orientation_convergent: "Far"
- Si orientation_identical_down: "Down"
- Sinon: "Other"
Ensuite, si une condition de dangling est vraie, le label est complété par un suffixe
indiquant la ou les conditions (par exemple, "DLLeft", "DLRight" ou "DLBoth").
Returns:
str: Le label de classification, par exemple "Close-DLLeft" ou "Far" etc.
Examples:
>>> restr_index = {
... "chr1": {
... "sites": [
... (1, 100),
... (2, 200),
... (3, 300),
... (4, 400)
... ],
... "fragments": [
... (1, (0, 100)),
... (2, (100, 200)),
... (3, (200, 300)),
... (4, (300, 400)),
... (5, (400, 500))
... ]
... }
... }
>>> from readmanager import PairedRead
>>> from positionfrag import annotate_paired_read
>>> read1 = ("r1", 16, "chr1", 121, 30, "50M", None, None, 0, "ACGT")
>>> read2 = ("r2", 0, "chr1", 310, 30, "50M", None, None, 0, "ACGT")
>>> pr = PairedRead(read1, read2)
>>> annotate_paired_read(pr, restr_index)
>>> Classified_inter(pr, tolerance=3)
'Close-None'
>>> # Test "Up" sans dangling
>>> read1 = ("r1", 0, "chr1", 121, 30, "50M", None, None, 0, "ACGT")
>>> read2 = ("r2", 0, "chr1", 310, 30, "50M", None, None, 0, "ACGT")
>>> pr = PairedRead(read1, read2)
>>> annotate_paired_read(pr, restr_index)
>>> Classified_inter(pr, tolerance=3)
'Up-None'
>>> # Test "Far" sans dangling
>>> read1 = ("r1", 0, "chr1", 121, 30, "50M", None, None, 0, "ACGT")
>>> read2 = ("r2", 16, "chr1", 310, 30, "50M", None, None, 0, "ACGT")
>>> pr = PairedRead(read1, read2)
>>> annotate_paired_read(pr, restr_index)
>>> Classified_inter(pr, tolerance=3)
'Far-None'
>>> # Test "Down" sans dangling
>>> read1 = ("r1", 16, "chr1", 121, 30, "50M", None, None, 0, "ACGT")
>>> read2 = ("r2", 16, "chr1", 310, 30, "50M", None, None, 0, "ACGT")
>>> pr = PairedRead(read1, read2)
>>> annotate_paired_read(pr, restr_index)
>>> Classified_inter(pr, tolerance=3)
'Down-None'
>>> # Test ajout du suffixe dangling : simulons un cas "Close" avec dangling_left
>>> read1 = ("r1", 16, "chr1", 101, 30, "50M", None, None, 0, "ACGT")
>>> read2 = ("r2", 0, "chr1", 310, 30, "50M", None, None, 0, "ACGT")
>>> pr = PairedRead(read1, read2)
>>> annotate_paired_read(pr, restr_index)
>>> Classified_inter(pr, tolerance=3)
'Close-DLLeft'
>>> # Test suffixe dangling_both : prioritaire sur les autres
>>> read1 = ("r1", 16, "chr1", 100, 30, "50M", None, None, 0, "ACGT")
>>> read2 = ("r2", 0, "chr1", 300, 30, "50M", None, None, 0, "ACGT")
>>> pr = PairedRead(read1, read2)
>>> annotate_paired_read(pr, restr_index)
>>> Classified_inter(pr, tolerance=3)
'Close-DLBoth'
>>> # Test suffixe dangling_right seul
>>> read1 = ("r1", 0, "chr1", 120, 30, "50M", None, None, 0, "ACGT")
>>> read2 = ("r2", 16, "chr1", 300, 30, "50M", None, None, 0, "ACGT")
>>> pr = PairedRead(read1, read2)
>>> annotate_paired_read(pr, restr_index)
>>> Classified_inter(pr, tolerance=3)
'Far-DLRight'
"""
mask = compute_mask_inter(paired_read, tolerance)
main_label = []
# Détermination du label principal selon l'orientation
if mask["orientation_divergent"]:
main_label = "Close"
elif mask["orientation_identical_up"]:
main_label = "Up"
elif mask["orientation_convergent"]:
main_label = "Far"
elif mask["orientation_identical_down"]:
main_label = "Down"
else:
main_label = "Erreur_Algorythmique"
# Informations secondaires sur le dangling
dl_labels = []
if mask["dangling_both"]:
dl_labels = "DLBoth"
elif mask["dangling_left"]:
dl_labels = "DLLeft"
elif mask["dangling_right"]:
dl_labels = "DLRight"
else:
dl_labels = "None"
return main_label + "-" + dl_labels
This diff is collapsed.
...@@ -217,7 +217,7 @@ def get_fragments_for_read(chrom, start, end, restr_index): ...@@ -217,7 +217,7 @@ def get_fragments_for_read(chrom, start, end, restr_index):
return overlapping_fragments return overlapping_fragments
def annotate_intra_paired_read(paired_read, restr_index): def annotate_paired_read(paired_read, restr_index):
""" """
Pour une instance de PairedRead, met à jour ses attributs avec les informations extraites Pour une instance de PairedRead, met à jour ses attributs avec les informations extraites
à partir de l'index de restriction. à partir de l'index de restriction.
...@@ -250,7 +250,7 @@ def annotate_intra_paired_read(paired_read, restr_index): ...@@ -250,7 +250,7 @@ def annotate_intra_paired_read(paired_read, restr_index):
>>> read1 = ("r1", 0, "chr1", 149, 30, "50M", None, None, 0, "ACGT") >>> read1 = ("r1", 0, "chr1", 149, 30, "50M", None, None, 0, "ACGT")
>>> read2 = ("r2", 16, "chr1", 351, 30, "50M", None, None, 0, "ACGT") # brin négatif >>> read2 = ("r2", 16, "chr1", 351, 30, "50M", None, None, 0, "ACGT") # brin négatif
>>> pr = PairedRead(read1, read2) >>> pr = PairedRead(read1, read2)
>>> annotate_intra_paired_read(pr, restr_index) >>> annotate_paired_read(pr, restr_index)
>>> pr.re_1, pr.re_2 >>> pr.re_1, pr.re_2
((2, 200), (3, 300)) ((2, 200), (3, 300))
>>> pr.re_2 >>> pr.re_2
...@@ -264,7 +264,7 @@ def annotate_intra_paired_read(paired_read, restr_index): ...@@ -264,7 +264,7 @@ def annotate_intra_paired_read(paired_read, restr_index):
>>> read1 = ("r1", 18, "chr1", 149, 30, "50M", None, None, 0, "ACGT") # brin négatif >>> read1 = ("r1", 18, "chr1", 149, 30, "50M", None, None, 0, "ACGT") # brin négatif
>>> read2 = ("r2", 16, "chr1", 349, 30, "50M", None, None, 0, "ACGT") # brin négatif >>> read2 = ("r2", 16, "chr1", 349, 30, "50M", None, None, 0, "ACGT") # brin négatif
>>> pr = PairedRead(read1, read2) >>> pr = PairedRead(read1, read2)
>>> annotate_intra_paired_read(pr, restr_index) >>> annotate_paired_read(pr, restr_index)
>>> pr.fragment_1 >>> pr.fragment_1
[(1, (0, 100)), (2, (100, 200))] [(1, (0, 100)), (2, (100, 200))]
>>> pr.fragment_2 >>> pr.fragment_2
......
def process_items( def process_items(
input_queue, input_queue,
output_queue, output_queue,
Counts_Dang_queue,
Counts_Biot_queue,
mapq_score, mapq_score,
dict_digested, dict_digested,
ligation_site_list, ligation_site_list,
...@@ -16,16 +18,25 @@ def process_items( ...@@ -16,16 +18,25 @@ def process_items(
import sys import sys
from auxiliary import check_data from auxiliary import check_data
from positionfrag import annotate_intra_paired_read from event_collector import update_restriction_counts
from filter_interchrom import Classified_inter
from filter_intrachrom import Classified_intra
from positionfrag import annotate_paired_read
from readmanager import PairedRead from readmanager import PairedRead
try: try:
Counts_Dangling = dict()
Counts_Biotin = dict()
while True: while True:
# affichage = ok # affichage = ok
data = input_queue.get() data = input_queue.get()
if data is None: if data is None:
Counts_Dang_queue.put(Counts_Dangling)
Counts_Biot_queue.put(Counts_Biotin)
output_queue.put(None) output_queue.put(None)
Counts_Dang_queue.put(None)
Counts_Biot_queue.put(None)
break break
if check_data(data): if check_data(data):
...@@ -37,18 +48,47 @@ def process_items( ...@@ -37,18 +48,47 @@ def process_items(
): ):
if current_pair.chrom_1 == current_pair.chrom_2: if current_pair.chrom_1 == current_pair.chrom_2:
# Récupération des informations sur la paire de read # Récupération des informations sur la paire de read
annotate_intra_paired_read(current_pair, dict_digested) annotate_paired_read(current_pair, dict_digested)
label = Classified_intra(current_pair, tolerance=1)
#
# Pensez à contrôler la longueur !
#
if label != "Other"
if len("0") == 1:
Counts_Biotin, Counts_Dangling = (
update_restriction_counts(
current_pair,
label,
Counts_Biotin,
Counts_Dangling,
)
)
else: output_queue.put( (label, [data[0], data[1]]) )
elif current_pair.interchrom:
annotate_paired_read(current_pair, dict_digested)
label = Classified_inter(current_pair, tolerance=1)
# #
# Pour le moment, il n'y a pas de classification pour les contacts extrachromosomique # Pensez à contrôler la longueur !
# #
# Idée : Prendre uniquement les catégories Up, Down... if label != "Other"
# Et considérer toujours que la première lecture est issue if len("0") == 1:
# du chr au chiffre le plus faible ! Counts_Biotin, Counts_Dangling = (
pass update_restriction_counts(
current_pair,
label,
Counts_Biotin,
Counts_Dangling,
)
)
output_queue.put( (label, [data[0], data[1]]) )
else:
raise ValueError("Chromosone Name's problems")
output_queue.put([data[0], data[1]])
else: else:
raise ValueError( raise ValueError(
......
...@@ -175,6 +175,9 @@ class PairedRead: ...@@ -175,6 +175,9 @@ class PairedRead:
else: else:
self.end_2 = self.start_2 - ref_len_2 self.end_2 = self.start_2 - ref_len_2
# Indique si c'est interchromosomique
self.interchrom = self.chrom_1 != self.chrom_2
# --- Imposer l'ordre si même chromosome --- # --- Imposer l'ordre si même chromosome ---
if ( if (
self.chrom_1 == self.chrom_2 self.chrom_1 == self.chrom_2
...@@ -196,8 +199,9 @@ class PairedRead: ...@@ -196,8 +199,9 @@ class PairedRead:
if leftmost_2 < leftmost_1: if leftmost_2 < leftmost_1:
self._swap_reads() self._swap_reads()
# Indique si c'est interchromosomique # --- Imposer l'ordre si différents chromosomes ---
self.interchrom = self.chrom_1 != self.chrom_2 if self.chrom_1 < self.chrom_2:
self._swap_reads()
def _swap_reads(self): def _swap_reads(self):
""" """
...@@ -208,13 +212,21 @@ class PairedRead: ...@@ -208,13 +212,21 @@ class PairedRead:
>>> # => le code ne va pas faire le swap dans __init__ car chrom_1 != chrom_2 >>> # => le code ne va pas faire le swap dans __init__ car chrom_1 != chrom_2
>>> from readmanager import PairedRead >>> from readmanager import PairedRead
>>> read1 = ("r1", 0, "chr1", 500, 30, "50M", None, None, 0, "ACGT") >>> read1 = ("r1", 0, "chr1", 500, 30, "50M", None, None, 0, "ACGT")
>>> read2 = ("r2", 16, "chr2", 100, 30, "50M", None, None, 0, "ACGT") >>> read2 = ("r2", 16, "chr1", 100, 30, "50M", None, None, 0, "ACGT")
>>> pr = PairedRead(read1, read2) >>> pr = PairedRead(read1, read2)
>>> pr.query_name_1, pr.query_name_2 >>> pr.query_name_1, pr.query_name_2
('r1', 'r2') ('r2', 'r1')
>>> pr._swap_reads() >>> pr._swap_reads()
>>> pr.query_name_1, pr.query_name_2 >>> pr.query_name_1, pr.query_name_2
('r1', 'r2')
>>> read1 = ("r1", 0, "chr2", 500, 30, "50M", None, None, 0, "ACGT")
>>> read2 = ("r2", 16, "chr3", 100, 30, "50M", None, None, 0, "ACGT")
>>> pr = PairedRead(read1, read2)
>>> pr.query_name_1, pr.query_name_2
('r2', 'r1') ('r2', 'r1')
>>> pr._swap_reads()
>>> pr.query_name_1, pr.query_name_2
('r1', 'r2')
""" """
# On échange : query_name, flag, chrom, start, end, mapq, cigar, strand # On échange : query_name, flag, chrom, start, end, mapq, cigar, strand
(self.query_name_1, self.query_name_2) = ( (self.query_name_1, self.query_name_2) = (
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment