diff --git a/src/nt_composition/make_nt_correlation.py b/src/nt_composition/make_nt_correlation.py index f1fcfb709922c12bc1b56b6d333c044887e02b97..2466cce99b74949f623fde86ca0a61aa1b40fddc 100644 --- a/src/nt_composition/make_nt_correlation.py +++ b/src/nt_composition/make_nt_correlation.py @@ -24,7 +24,7 @@ from itertools import product from random import random import multiprocessing as mp import os -from ..find_interaction_cluster.config import get_communities +from ..find_interaction_cluster.config import get_communities_basefile class NoInteractionError(Exception): @@ -346,7 +346,8 @@ def create_density_fig(df: pd.DataFrame, project: str, ft_type: str, ft: str, def create_density_figure(nt: str, ft_type: str, project: str, weight: int, global_weight: int, - same_gene: bool, compute_mean: bool, + same_gene: bool, inflation: float, + compute_mean: bool, community_size: Optional[int], inter_chr: bool = False, level: str = "exon", logging_level: str = "DISABLE" @@ -365,6 +366,7 @@ def create_density_figure(nt: str, ft_type: str, seen in `global_weight` project are taken into account :param same_gene: Say if we consider as co-localised exon within the \ same gene + :param inflation: The inflation parameter :param compute_mean: True to compute the mean frequency of co-localised \ exons, false to only compute the frequency of one co-localized exons. :param community_size: The size of the community to consider in the \ @@ -382,8 +384,8 @@ def create_density_figure(nt: str, ft_type: str, inter_chr=inter_chr) if not outfile.is_file(): exons_bc = recover_exon_in_big_communities(community_size, project, - weight, - global_weight) + weight, global_weight, + inflation, level) cnx = sqlite3.connect(ConfigNt.db_file) arr_interaction = get_project_colocalisation(cnx, project, weight, global_weight, same_gene, @@ -454,6 +456,7 @@ def execute_density_figure_function(di: pd.DataFrame, project : str, ft_type: str, ft: str, weight: int, global_weight: int, same_gene: bool, + inflation: float, compute_mean: bool, community_size: Optional[int], inter_chr: bool = False, @@ -473,6 +476,7 @@ def execute_density_figure_function(di: pd.DataFrame, project : str, seen in `global_weight` project are taken into account :param same_gene: Say if we consider as co-localised exon within the \ same gene + :param inflation: The inflation parameter :param compute_mean: True to compute the mean frequency of co-localised \ exons, false to only compute the frequency of one co-localized exons. :param community_size: he size of the community to consider in the \ @@ -484,7 +488,8 @@ def execute_density_figure_function(di: pd.DataFrame, project : str, """ logging.info(f'Working on {project}, {ft_type}, {ft} - {os.getpid()}') r, p = create_density_figure(ft, ft_type, project, weight, - global_weight, same_gene, compute_mean, + global_weight, same_gene, inflation, + compute_mean, community_size, inter_chr, level) if global_weight == 0: return {"project": project, "ft_type": ft_type, @@ -511,8 +516,8 @@ def combine_dic(list_dic: List[Dict]) -> Dict: def recover_exon_in_big_communities(community_size: Optional[int], project: str, weight: int, - global_weight: int - ) -> Optional[np.array]: + global_weight: int, inflation: float, + level: str) -> Optional[np.array]: """ Recover the list of exon present in community with a larger size than \ `community_size` @@ -525,19 +530,22 @@ def recover_exon_in_big_communities(community_size: Optional[int], by project, else all projet are merge together and the interaction \ seen in `global_weight` project are taken into account :param weight: The weight of interaction to consider + :parma inflation: The inflation parameter + :param level: The kind of feature to analyse (exon or gene) :return: The list of exon of interest """ if community_size is None: return None outfile = ConfigNt.get_community_file(project, weight, global_weight, - True, "_communities.txt") + True, inflation, level, + "_communities.txt") if not outfile.is_file(): msg = f"The file {outfile} was not found ! You must try " \ f"to launch find_interaction_cluster script with " \ f"the same parameters !" logging.exception(msg) raise FileNotFoundError(msg) - communities = get_communities(outfile, community_size) + communities = get_communities_basefile(outfile) res = [] for c in communities: res += c @@ -546,7 +554,8 @@ def recover_exon_in_big_communities(community_size: Optional[int], def create_all_frequency_figures(ps: int, weight: int = 1, global_weight: int = 0, ft_type: str = "nt", - same_gene = True, compute_mean: bool = True, + same_gene = True, inflation: float = 1.5, + compute_mean: bool = True, community_size: Optional[int] = None, inter_chr: bool = False, level: str = "exon", logging_level: str = "DISABLE"): @@ -562,6 +571,7 @@ def create_all_frequency_figures(ps: int, weight: int = 1, :param ft_type: The kind of feature to analyse :param same_gene: Say if we consider as co-localised exon within the \ same gene + :param inflation: The inflation parameter :param compute_mean: True to compute the mean frequency of co-localised \ exons, false to only compute the frequency of one co-localized exons. :param community_size: The size of the community to consider in the \ @@ -585,7 +595,7 @@ def create_all_frequency_figures(ps: int, weight: int = 1, processes = [] for project, ft, ft_type in param: args = [di, project, ft_type, ft, weight, global_weight, same_gene, - compute_mean, community_size, inter_chr, level] + inflation, compute_mean, community_size, inter_chr, level] processes.append(pool.apply_async(execute_density_figure_function, args)) results = [proc.get(timeout=None) for proc in processes]