diff --git a/src/nt_composition/make_nt_correlation.py b/src/nt_composition/make_nt_correlation.py index c94e1080c24841e0bb743b5a79e022395a8f11cb..aae5b2122d1f439783d3bb82e4a33d92791a24c5 100644 --- a/src/nt_composition/make_nt_correlation.py +++ b/src/nt_composition/make_nt_correlation.py @@ -80,7 +80,9 @@ def get_project_colocalisation(cnx: sqlite3.Connection, project: str, :param exon_bc: exons in big communities :param inter_chr: True to only get inter-chromosomal interactions \ False else - :param the feature that we want to recover + :param level: the feature that we want to recover + :param cell: Interactions are only selected from projects made on \ + a specific cell line (ALL to disable this filter). (default ALL) :return: The table containing the number of interaction by projects """ logging.debug(f'Recovering interaction ({os.getpid()})') @@ -333,7 +335,7 @@ def create_density_fig(df: pd.DataFrame, project: str, ft_type: str, ft: str, plt.xlabel(f"Freq of {ft} in {ml}") plt.ylabel(f"Freq of {ft} in co-localized {level}s") title = f'Freq of {ft} of {level}s and their co-localized partner in ' \ - f'{project}' + f'{project}' if inter_chr: title += '\n(inter chromosomal interactions)' plt.title(title) @@ -347,7 +349,7 @@ def create_density_fig(df: pd.DataFrame, project: str, ft_type: str, ft: str, def create_density_figure(nt: str, ft_type: str, project: str, weight: int, global_weight: int, - same_gene: bool, inflation: float, + same_gene: bool, inflation: float, cell_line: str, compute_mean: bool, community_size: Optional[int], inter_chr: bool = False, level: str = "exon", @@ -368,6 +370,8 @@ def create_density_figure(nt: str, ft_type: str, :param same_gene: Say if we consider as co-localised exon within the \ same gene :param inflation: The inflation parameter + :param cell_line: Interactions are only selected from projects made on \ + a specific cell line (ALL to disable this filter). (default ALL) :param compute_mean: True to compute the mean frequency of co-localised \ exons, false to only compute the frequency of one co-localized exons. :param community_size: The size of the community to consider in the \ @@ -386,7 +390,7 @@ def create_density_figure(nt: str, ft_type: str, if not outfile.is_file(): exons_bc = recover_exon_in_big_communities(community_size, project, weight, global_weight, - inflation, level) + inflation, cell_line, level) cnx = sqlite3.connect(ConfigNt.db_file) arr_interaction = get_project_colocalisation(cnx, project, weight, global_weight, same_gene, @@ -453,15 +457,16 @@ def create_scatterplot(df_cor: pd.DataFrame, ft_type: str, ft: str, plt.close() -def execute_density_figure_function(di: pd.DataFrame, project : str, +def execute_density_figure_function(di: pd.DataFrame, project: str, ft_type: str, ft: str, weight: int, global_weight: int, same_gene: bool, inflation: float, + cell_line: str, compute_mean: bool, community_size: Optional[int], inter_chr: bool = False, - level: str= "exon", + level: str = "exon", ) -> Dict[str, Any]: """ Execute create_density_figure and organized the results in a dictionary. @@ -478,6 +483,8 @@ def execute_density_figure_function(di: pd.DataFrame, project : str, :param same_gene: Say if we consider as co-localised exon within the \ same gene :param inflation: The inflation parameter + :param cell_line: Interactions are only selected from projects made on \ + a specific cell line (ALL to disable this filter). (default ALL) :param compute_mean: True to compute the mean frequency of co-localised \ exons, false to only compute the frequency of one co-localized exons. :param community_size: he size of the community to consider in the \ @@ -490,15 +497,15 @@ def execute_density_figure_function(di: pd.DataFrame, project : str, logging.info(f'Working on {project}, {ft_type}, {ft} - {os.getpid()}') r, p = create_density_figure(ft, ft_type, project, weight, global_weight, same_gene, inflation, - compute_mean, + cell_line, compute_mean, community_size, inter_chr, level) if global_weight == 0: return {"project": project, "ft_type": ft_type, - "ft": ft, "cor": r, "pval": p, - 'nb_interaction': di[di['projects'] == project].iloc[0, 1]} + "ft": ft, "cor": r, "pval": p, + 'nb_interaction': di[di['projects'] == project].iloc[0, 1]} else: return {"project": project, "ft_type": ft_type, - "ft": ft, "cor": r, "pval": p} + "ft": ft, "cor": r, "pval": p} def combine_dic(list_dic: List[Dict]) -> Dict: @@ -518,6 +525,7 @@ def combine_dic(list_dic: List[Dict]) -> Dict: def recover_exon_in_big_communities(community_size: Optional[int], project: str, weight: int, global_weight: int, inflation: float, + cell_line: str, level: str) -> Optional[np.array]: """ Recover the list of exon present in community with a larger size than \ @@ -532,13 +540,15 @@ def recover_exon_in_big_communities(community_size: Optional[int], seen in `global_weight` project are taken into account :param weight: The weight of interaction to consider :parma inflation: The inflation parameter + :param cell_line: Interactions are only selected from projects made on \ + a specific cell line (ALL to disable this filter). (default ALL) :param level: The kind of feature to analyse (exon or gene) :return: The list of exon of interest """ if community_size is None: return None outfile = ConfigNt.get_community_file(project, weight, global_weight, - True, inflation, level, + True, inflation, cell_line, level, "_communities.txt") if not outfile.is_file(): msg = f"The file {outfile} was not found ! You must try " \ @@ -555,7 +565,8 @@ def recover_exon_in_big_communities(community_size: Optional[int], def create_all_frequency_figures(ps: int, weight: int = 1, global_weight: int = 0, ft_type: str = "nt", - same_gene = True, inflation: float = 1.5, + same_gene=True, inflation: float = 1.5, + cell_line: str = "ALL", compute_mean: bool = True, community_size: Optional[int] = None, inter_chr: bool = False, level: str = "exon", @@ -573,6 +584,8 @@ def create_all_frequency_figures(ps: int, weight: int = 1, :param same_gene: Say if we consider as co-localised exon within the \ same gene :param inflation: The inflation parameter + :param cell_line: Interactions are only selected from projects made on \ + a specific cell line (ALL to disable this filter). (default ALL) :param compute_mean: True to compute the mean frequency of co-localised \ exons, false to only compute the frequency of one co-localized exons. :param community_size: The size of the community to consider in the \ @@ -596,7 +609,8 @@ def create_all_frequency_figures(ps: int, weight: int = 1, processes = [] for project, ft, ft_type in param: args = [di, project, ft_type, ft, weight, global_weight, same_gene, - inflation, compute_mean, community_size, inter_chr, level] + inflation, cell_line, compute_mean, community_size, inter_chr, + level] processes.append(pool.apply_async(execute_density_figure_function, args)) results = [proc.get(timeout=None) for proc in processes]