diff --git a/src/nt_composition/__main__.py b/src/nt_composition/__main__.py index 5f4859a4691bd9213c4c43aa3013f8216035e0b3..dd1bb8fa4a62dacbb83b031e61a6935b3b6eeb2f 100644 --- a/src/nt_composition/__main__.py +++ b/src/nt_composition/__main__.py @@ -12,21 +12,23 @@ from .make_nt_correlation import create_all_frequency_figures from .get_projects_interaction import get_interactions_number import lazyparser as lp -@lp.parse(weight='weight > 0') -def launcher(weight: int = 1, global_weight: int = 0, +@lp.parse(weight='weight > 0', ft_type=['nt', 'dnt', 'tnt', 'codon', 'aa', + 'properties']) +def launcher(weight: int = 1, global_weight: int = 0, ft_type: str = 'nt', logging_level: str = "DISABLE"): """ Launch the creation of density file. - :param The weight of interaction to consider + :param weight: The weight of interaction to consider (default 1) :param global_weight: The global weight to consider. if \ the global weight is equal to 0 then then density figure are calculated \ - by project, else all projet are merge together and the interaction \ - seen in `global_weight` project are taken into account - :param logging_level: The level of data to display + by project, else all project are merge together and the interaction \ + seen in `global_weight` project are taken into account (default 0) + :param ft_type: The feature type of interest (default 'nt') + :param logging_level: The level of data to display (default 'DISABLE') """ get_interactions_number(weight, logging_level) - create_all_frequency_figures(ConfigNt.cpu, weight, global_weight, + create_all_frequency_figures(ConfigNt.cpu, weight, global_weight, ft_type, logging_level) diff --git a/src/nt_composition/make_nt_correlation.py b/src/nt_composition/make_nt_correlation.py index 7234a2625dd46c784d8c79d3599a7cf962c30a94..6283ecb396c08c7f6440fac28261eae4346185d4 100644 --- a/src/nt_composition/make_nt_correlation.py +++ b/src/nt_composition/make_nt_correlation.py @@ -102,27 +102,27 @@ def get_all_exon_interacting_with_another(exon: str, arr: np.array return exons[exons != exon] -def get_frequency_dic(cnx: sqlite3.Connection, nt: str, ft_type: str +def get_frequency_dic(cnx: sqlite3.Connection, ft: str, ft_type: str ) -> Dict[str, float]: """ - Get the frequency of a nucleotide for every exon in \ + Get the frequency of a feature for every exon in \ the chia-pet database. :param cnx: Connection to chia-pet database - :param nt: The nucleotide of interest + :param ft: The feature of interest :param ft_type: The type of feature of interest :return: The frequency dic """ logging.debug('Calculating frequency table') query = "SELECT id_exon, frequency " \ "FROM cin_exon_frequency " \ - f"WHERE ft = '{nt}' " \ + f"WHERE ft = '{ft}' " \ f"AND ft_type = '{ft_type}'" c = cnx.cursor() c.execute(query) result = c.fetchall() if len(result) == 0: - msg = f'No Frequencies found for {nt} ({ft_type})' + msg = f'No Frequencies found for {ft} ({ft_type})' logging.exception(msg) raise NoInteractionError(msg) dic = {} @@ -164,10 +164,10 @@ def create_density_table(arr_interaction: np.array, dic_freq: Dict[str, float], return df -def create_density_fig(df: pd.DataFrame, project: str, ft_type: str, nt: str, +def create_density_fig(df: pd.DataFrame, project: str, ft_type: str, ft: str, weight: int, global_weight: int) -> Tuple[float, float]: """ - Compute a density file showing if the nucleotide frequency of \ + Compute a density file showing if the feature frequency of \ an exons correlates with the frequency in other co-localised exons. :param df: The dataframe containing frequency of exons and \ @@ -175,7 +175,7 @@ def create_density_fig(df: pd.DataFrame, project: str, ft_type: str, nt: str, :param project: The name of the project where the co-localisation \ where observed :param ft_type: The type of feature of interest - :param nt: The nucleotide of interest + :param ft: The feature of interest :param weight: The minimum weight of interaction to consider :param global_weight: The global weight to consider. if \ the global weight is equal to 0 then then density figure are calculated \ @@ -195,12 +195,12 @@ def create_density_fig(df: pd.DataFrame, project: str, ft_type: str, nt: str, plt.plot(df.freq_exon, i + s * df.freq_exon, 'r', label=f'r={round(r,4)}, p={round(p, 4)}') plt.legend() - plt.xlabel(f"Freq of {nt} in an exon") - plt.ylabel(f"Freq of {nt} in co-localized exons") - plt.title(f'Freq of {nt} of exons and their co-localized partner in ' + plt.xlabel(f"Freq of {ft} in an exon") + plt.ylabel(f"Freq of {ft} in co-localized exons") + plt.title(f'Freq of {ft} of exons and their co-localized partner in ' f'{project}') plt.savefig(ConfigNt.get_density_file(weight, global_weight, project, - ft_type, nt, fig=True)) + ft_type, ft, fig=True)) plt.close() return r, p @@ -326,7 +326,7 @@ def combine_dic(list_dic: List[Dict]) -> Dict: def create_all_frequency_figures(ps: int, weight: int = 1, - global_weight: int = 0, + global_weight: int = 0, ft_type: str = "nt", logging_level: str = "DISABLE"): """ Make density figure for every selected projects. @@ -337,6 +337,7 @@ def create_all_frequency_figures(ps: int, weight: int = 1, the global weight is equal to 0 then then density figure are calculated \ by project, else all projet are merge together and the interaction \ seen in `global_weight` project are taken into account + :param ft_type: The kind of feature to analyse :param ps: The number of processes to create """ logging_def(ConfigNt.interaction, __file__, logging_level) @@ -349,12 +350,12 @@ def create_all_frequency_figures(ps: int, weight: int = 1, # with open(ConfigNt.selected_project, 'r') as f: # projects = f.read().splitlines() # projects = projects[:-4] - nt_list = ['A', 'C', 'G', 'T', 'S', 'W'] - param = product(projects, nt_list, ['nt']) + ft_list = ConfigNt.get_features(ft_type) + param = product(projects, ft_list, [ft_type]) pool = mp.Pool(processes=ps) processes = [] - for project, nt, ft_type in param: - args = [di, project, ft_type, nt, weight, global_weight] + for project, ft, ft_type in param: + args = [di, project, ft_type, ft, weight, global_weight] processes.append(pool.apply_async(execute_density_figure_function, args)) results = [] @@ -366,8 +367,8 @@ def create_all_frequency_figures(ps: int, weight: int = 1, 'ALL', fig=False), sep="\t") if global_weight == 0: - for nt in nt_list: - create_scatterplot(df_corr, "nt", nt, weight, global_weight) + for ft in ft_list: + create_scatterplot(df_corr, ft_type, ft, weight, global_weight) if __name__ == "__main__":