diff --git a/src/nt_composition/make_nt_correlation.py b/src/nt_composition/make_nt_correlation.py index 1ca0097157e47ed14a3f5b2513e5720f060eb2cc..df1de4f222ef6cdd34bdecef509ea189e9a1d0c8 100644 --- a/src/nt_composition/make_nt_correlation.py +++ b/src/nt_composition/make_nt_correlation.py @@ -16,7 +16,6 @@ import numpy as np import doctest from .config import ConfigNt from ..logging_conf import logging_def -from tqdm import tqdm from typing import Dict, Tuple, Any, List import seaborn as sns import matplotlib.pyplot as plt @@ -25,17 +24,41 @@ from itertools import product from random import random import multiprocessing as mp import os -from time import time class NoInteractionError(Exception): pass +def get_select_addition(global_weight: int, get_weight: bool, same_gene: bool): + """ + Return the column to add if get_weight is not false. + + :param global_weight: The global weight to consider. if \ + the global weight is equal to 0 then then density figure are calculated \ + by project, else all projet are merge together and the interaction \ + seen in `global_weight` project are taken into account + :param get_weight: Say if we want to recover the weight of the interaction + :param same_gene: Say if we consider as co-localised exon within the \ + same gene + :return: The additional column to get + """ + if get_weight: + if global_weight == 0 and same_gene: + return ', weight' + elif global_weight == 0 and not same_gene: + return ', t1.weight' + elif global_weight >= 0 and same_gene: + return ', AVG(weight)' + elif global_weight >= 0 and not same_gene: + return ', AVG(t1.weight)' + return '' + + def get_project_colocalisation(cnx: sqlite3.Connection, project: str, weight: int, - global_weight: int, same_gene: bool - ) -> np.array: + global_weight: int, same_gene: bool, + get_weight: bool = False) -> np.array: """ Get the interactions in project `project` @@ -46,17 +69,21 @@ def get_project_colocalisation(cnx: sqlite3.Connection, project: str, the global weight is equal to 0 then then density figure are calculated \ by project, else all projet are merge together and the interaction \ seen in `global_weight` project are taken into account + :param get_weight: Say if we want to recover the weight of the interaction + :param same_gene: Say if we consider as co-localised exon within the \ + same gene :return: The table containing the number of interaction by projects """ logging.debug(f'Recovering interaction ({os.getpid()})') + select_add = get_select_addition(global_weight, get_weight, same_gene) if global_weight == 0: if same_gene: - query = "SELECT exon1, exon2 " \ + query = f"SELECT exon1, exon2{select_add} " \ "FROM cin_exon_interaction " \ f"WHERE weight >= {weight} " \ f"AND id_project = '{project}' " else: - query = f"""SELECT t1.exon1, t1.exon2 + query = f"""SELECT t1.exon1, t1.exon2{select_add} FROM cin_exon_interaction t1, cin_exon t2, cin_exon t3 WHERE t1.weight >= {weight} AND id_project = '{project}' @@ -65,13 +92,13 @@ def get_project_colocalisation(cnx: sqlite3.Connection, project: str, AND t2.id_gene != t3.id_gene""" else: if same_gene: - query = f"SELECT exon1, exon2 " \ + query = f"SELECT exon1, exon2{select_add} " \ f"FROM cin_exon_interaction " \ f"WHERE weight >= {weight} " \ f"GROUP BY exon1, exon2 " \ f"HAVING COUNT(*) >= {global_weight}" else: - query = f"""SELECT t1.exon1, t1.exon2 + query = f"""SELECT t1.exon1, t1.exon2{select_add} FROM cin_exon_interaction t1, cin_exon t2, cin_exon t3 WHERE t1.weight >= {weight} AND t1.exon1 = t2.id