diff --git a/src/find_interaction_cluster/community_figures/__main__.py b/src/find_interaction_cluster/community_figures/__main__.py index 7cef8dd4ac65fccb0d7296154482de8f1822c3dd..e01081b74bdc737a8ddba3584afa4dc77cfbe615 100644 --- a/src/find_interaction_cluster/community_figures/__main__.py +++ b/src/find_interaction_cluster/community_figures/__main__.py @@ -49,11 +49,11 @@ def load_and_check_table(table: str, feature: str, target_col: str): return df -@lp.parse(table="file", output="folder", test_type=["lm", "permutation"], - iteration="20 < iteration") +@lp.parse(table="file", test_type=["lm", "permutation"], + iteration="20 < iteration", sd_community = ["y", "n", "Y", "N"]) def create_community_figures(table: str, feature: str, target_col: str, output: str, outfile: str, test_type: str, - target_kind: str = "", + target_kind: str = "", sd_community: str = "y", iteration: int = 10000) -> None: """ Create a dataframe with a control community, save it as a table and \ @@ -73,13 +73,17 @@ def create_community_figures(table: str, feature: str, target_col: str, target_col. :param iteration: The number of sub samples to create. This parameter \ is only used if test_type = 'permutation' (default 10000). + :param sd_community: y to display the standard deviation for communities \ + false else. """ df = load_and_check_table(table, feature, target_col) if not outfile.endswith(".pdf"): raise FileNameError("The output figure must be in pdf format !") moutfile = Path(output) / outfile + sd_community = sd_community.lower() == 'y' create_community_fig(df, feature, target_col, moutfile, test_type, - target_kind=target_kind, iteration=iteration) + target_kind=target_kind, iteration=iteration, + sd_community=sd_community) if __name__ == "__main__": diff --git a/src/find_interaction_cluster/community_figures/fig_functions.py b/src/find_interaction_cluster/community_figures/fig_functions.py index 124bf0db76fd59256cf69caf18cfedf6d497548b..20747cae31f8df8677c4f55bb701387f6bbc9dea 100644 --- a/src/find_interaction_cluster/community_figures/fig_functions.py +++ b/src/find_interaction_cluster/community_figures/fig_functions.py @@ -315,7 +315,8 @@ def expand_results_perm(df: pd.DataFrame, rdf: pd.DataFrame, target_col: str, def make_barplot(df_bar: pd.DataFrame, outfile: Path, - target_col: str, feature: str, target_kind: str = "") -> None: + target_col: str, feature: str, target_kind: str = "", + sd_community: Optional[str] = "sd") -> None: """ Create a barplot showing the frequency of `nt` for every community \ of exons/gene in `df_bar`. @@ -327,11 +328,13 @@ def make_barplot(df_bar: pd.DataFrame, outfile: Path, target_col. :param target_col: The name of the column containing the data of interest :param feature: The king of feature of interest + :param sd_community: sd to display community error bar, None to display \ + nothing """ sns.set(context="poster") g = sns.catplot(x="community", y=target_col, data=df_bar, kind="point", - ci="sd", aspect=2.5, height=14, errwidth=0.5, capsize=.4, - scale=0.5, + ci=sd_community, aspect=2.5, height=14, errwidth=0.5, + capsize=.4, scale=0.5, palette=["red"] + ["darkgray"] * (df_bar.shape[0] - 1)) g2 = sns.catplot(x="community", y=target_col, data=df_bar, kind="bar", ci="sd", aspect=2.5, height=14, errwidth=0.5, capsize=.4, @@ -347,7 +350,9 @@ def make_barplot(df_bar: pd.DataFrame, outfile: Path, for i, p in enumerate(g2.ax.patches): stats = "*" if df_bara.iloc[i, :]["p-adj"] < 0.05 else "" com = df_bara.iloc[i, :]["community"] - csd = np.std(df_bar.loc[df_bar["community"] == com, target_col]) + csd = 0 + if sd_community == "sd": + csd = np.std(df_bar.loc[df_bar["community"] == com, target_col]) g.ax.annotate(stats, (p.get_x() + p.get_width() / 2., p.get_height() + csd), ha='center', va='center', xytext=(0, 10), fontsize=12, @@ -357,7 +362,8 @@ def make_barplot(df_bar: pd.DataFrame, outfile: Path, def make_barplot_perm(df_bar: pd.DataFrame, outfile: Path, target_col: str, feature: str, - target_kind: str = "") -> None: + target_kind: str = "", + sd_community: Optional[str] = "sd") -> None: """ Create a barplot showing the frequency of `nt` for every community \ of exons/gene in `df_bar`. @@ -369,6 +375,8 @@ def make_barplot_perm(df_bar: pd.DataFrame, outfile: Path, target_col. :param target_col: The name of the column containing the data of interest :param feature: The king of feature of interest + :param sd_community: sd to display community error bar, None to display \ + nothing """ sns.set(context="poster") df_ctrl = df_bar.loc[df_bar[f"id_{feature}"] == 'ctrl', :] @@ -377,13 +385,17 @@ def make_barplot_perm(df_bar: pd.DataFrame, outfile: Path, ci="sd", aspect=2.5, height=14, errwidth=0.5, capsize=.4, palette=["darkgray"] * (df_bar.shape[0])) g = sns.catplot(x="community", y=target_col, data=df_bar, kind="point", - ci="sd", aspect=2.5, height=14, errwidth=0.5, capsize=.4, - scale=0.5, palette=["darkgray"] * (df_bar.shape[0])) + ci=sd_community, aspect=2.5, height=14, errwidth=0.5, + capsize=.4, scale=0.5, + palette=["darkgray"] * (df_bar.shape[0])) xrange = g.ax.get_xlim() + yrange = g.ax.get_ylim() df_ctrl.plot(x="community", y=target_col, kind="scatter", ax=g.ax, yerr="ctrl_std", legend=False, zorder=10, color=(0.8, 0.2, 0.2, 0.4)) g.ax.set_xlim(xrange) + if sd_community is None: + g.ax.set_ylim(yrange) g.fig.subplots_adjust(top=0.9) target_kind = f" ({target_kind})" if target_kind else "" g.fig.suptitle(f"Mean frequency of {target_col}{target_kind}" @@ -395,7 +407,9 @@ def make_barplot_perm(df_bar: pd.DataFrame, outfile: Path, for i, p in enumerate(g2.ax.patches): stats = "*" if df_bara.iloc[i, :]["p-adj"] < 0.05 else "" com = df_bara.iloc[i, :]["community"] - csd = np.std(df_bar.loc[df_bar["community"] == com, target_col]) + csd = 0 + if sd_community == "sd": + csd = np.std(df_bar.loc[df_bar["community"] == com, target_col]) g.ax.annotate(stats, (p.get_x() + p.get_width() / 2., p.get_height() + csd), ha='center', va='center', xytext=(0, 10), fontsize=12, @@ -405,7 +419,7 @@ def make_barplot_perm(df_bar: pd.DataFrame, outfile: Path, def barplot_creation(df_bar: pd.DataFrame, outfig: Path, cpnt: str, test_type: str, feature: str, - target_kind) -> None: + target_kind: str, sd_community: bool) -> None: """ Reformat a dataframe with the enrichment of a nucleotide frequency \ for every feature for every community and then create a \ @@ -421,11 +435,15 @@ def barplot_creation(df_bar: pd.DataFrame, outfig: Path, :param test_type: The type of test to make (permutation or lm) :param target_kind: An optional name that describe a bit further \ target_col. + :param sd_community: True to display the errors bars for communities, + False else. """ + sd_community = "sd" if sd_community else None if test_type == "lm": make_barplot(df_bar, outfig, cpnt, feature, target_kind) else: - make_barplot_perm(df_bar, outfig, cpnt, feature, target_kind) + make_barplot_perm(df_bar, outfig, cpnt, feature, target_kind, + sd_community) def get_feature_by_community(df: pd.DataFrame, feature: str) -> Dict: @@ -461,7 +479,8 @@ def create_community_fig(df: pd.DataFrame, feature: str, outfile_ctrl: Path, test_type: str, dic_com: Optional[Dict] = None, target_kind: str = "", - iteration: int = 10000) -> None: + iteration: int = 10000, + sd_community: bool = True) -> None: """ Create a dataframe with a control community, save it as a table and \ as a barplot figure. @@ -480,6 +499,8 @@ def create_community_fig(df: pd.DataFrame, feature: str, :param target_kind: An optional name that describe a bit further \ target_col. :param iteration: The number of sub samples to create + :param sd_community: True to display the errors bars for communities, + False else. """ if dic_com is None: dic_com = {} if test_type == 'lm' \ @@ -495,4 +516,4 @@ def create_community_fig(df: pd.DataFrame, feature: str, bar_outfile = str(outfile_ctrl).replace(".pdf", "_bar.txt") df_bar.to_csv(bar_outfile, sep="\t", index=False) barplot_creation(df_bar, outfile_ctrl, target_col, test_type, feature, - target_kind) + target_kind, sd_community) diff --git a/src/nt_composition/__main__.py b/src/nt_composition/__main__.py index 8948a419cae14906965339c7d97260551c1f709e..9b0bd1187bf4a50fead22fe6f52938920a4ca6f7 100644 --- a/src/nt_composition/__main__.py +++ b/src/nt_composition/__main__.py @@ -22,6 +22,7 @@ def launcher(weight: int = 1, global_weight: int = 0, ft_type: str = 'nt', same_gene: bool = False, compute_mean: bool = True, iteration: int = 10000, kind: str = 'density', community_size: int = -1, inter_chr: bool = False, + level: str = "exon", logging_level: str = "DISABLE"): """ Launch the creation of density file. @@ -48,6 +49,7 @@ def launcher(weight: int = 1, global_weight: int = 0, ft_type: str = 'nt', than community size. if community_size = -1. This option is deactivated. :param inter_chr: True to only get inter-chromosomal interactions \ False else (default: False) + :param level: The kind of feature to analyse (exon or gene) :param logging_level: The level of data to display (default 'DISABLE') """ get_interactions_number(weight, same_gene, inter_chr, logging_level) @@ -56,12 +58,12 @@ def launcher(weight: int = 1, global_weight: int = 0, ft_type: str = 'nt', if kind == "density": create_all_frequency_figures(ConfigNt.cpu, weight, global_weight, ft_type, same_gene, compute_mean, - community_size, inter_chr, + community_size, inter_chr, level, logging_level) else: create_all_distance_figures(ConfigNt.cpu, weight, global_weight, ft_type, same_gene, iteration, inter_chr, - logging_level) + level, logging_level) launcher() \ No newline at end of file diff --git a/src/nt_composition/distance.py b/src/nt_composition/distance.py index 5ec3ebdbc1b0695e8f98cfcd80c5a3ed9a9b5e40..41c64c12bc0406061ecaa39c18b05b25f3f6b800 100644 --- a/src/nt_composition/distance.py +++ b/src/nt_composition/distance.py @@ -123,7 +123,11 @@ def compute_mean_distance(arr_interaction: np.array, # for exon1, exon2 in arr_interaction])) res = [] for exon1, exon2 in arr_interaction: - if exon1 != exon2: + if ( + exon1 != exon2 + and dic_freq[exon1] is not None + and dic_freq[exon2] is not None + ): val = abs(dic_freq[exon1] - dic_freq[exon2]) res.append(val) return float(np.nanmean(res)) @@ -204,7 +208,7 @@ def create_distance_table(arr_interaction: np.array, """ dic = {"iteration": [], 'kind': [], 'distance': [], 'coloc': []} - # Â Add ctrl distance to the dictionary + # Add ctrl distance to the dictionary ctrl_distance, nb_int = compute_controls_distances(arr_interaction, dic_freq, iteration) for k, d in enumerate(ctrl_distance): @@ -223,7 +227,7 @@ def create_distance_table(arr_interaction: np.array, def create_distance_figure(df: pd.DataFrame, project: str, figfile: Path, - ft: str, iteration: int): + ft: str, iteration: int, level: str): """ Create a barplot figure showing the nucleotide distance between \ co-localised exons in a ChIA-PET project and what this distance would be \ @@ -234,6 +238,7 @@ def create_distance_figure(df: pd.DataFrame, project: str, figfile: Path, :param figfile: The file where the figure will be created :param ft: The feature of interest :param iteration: Number of iteration + :param level: The kind of feature to analyse (exon or gene) """ sns.set(context='talk') pval = df.loc[-df['kind'].isin(['CTRL', project]), 'kind'].values[0] @@ -245,7 +250,7 @@ def create_distance_figure(df: pd.DataFrame, project: str, figfile: Path, loc='inside', box_pair_list=[['CTRL', project, pval]], linewidth=2, min_pval=1/iteration) plt.title(f"Difference of {ft} frequency distance between " - f"co-localized exons and\nthe one of those same exons " + f"co-localized {level} and\nthe one of those same {level} " f"if the co-localisation where random ({iteration} " f"samples).") g.savefig(figfile) @@ -255,7 +260,7 @@ def create_distance_figure(df: pd.DataFrame, project: str, figfile: Path, def create_figures(ft: str, ft_type: str, project: str, weight: int, global_weight: int, same_gene: bool, iteration: int, inter_chr: bool = False, - logging_level: str = "DISABLE" + level: str = "exon", logging_level: str = "DISABLE" ) -> None: """ Create a distance figure for the project `project` for the nucleotide \ @@ -274,6 +279,7 @@ def create_figures(ft: str, ft_type: str, :param inter_chr: True to only get inter-chromosomal interactions \ False else :param logging_level: The level of information to display + :param level: The kind of feature to analyse (exon or gene) :param iteration: The number of iteration """ logging_def(ConfigNt.interaction, __file__, logging_level) @@ -285,8 +291,9 @@ def create_figures(ft: str, ft_type: str, cnx = sqlite3.connect(ConfigNt.db_file) arr_interaction = get_project_colocalisation(cnx, project, weight, global_weight, same_gene, - inter_chr=inter_chr) - dic_freq = get_frequency_dic(cnx, ft, ft_type) + inter_chr=inter_chr, + level=level) + dic_freq = get_frequency_dic(cnx, ft, ft_type, level) df = create_distance_table(arr_interaction, dic_freq, project, iteration, ft, ft_type) df.to_csv(outfile, sep="\t", index=False) @@ -294,7 +301,7 @@ def create_figures(ft: str, ft_type: str, ft_type, ft, fig=True, kind="distance", inter_chr=inter_chr) - create_distance_figure(df, project, figfile, ft, iteration) + create_distance_figure(df, project, figfile, ft, iteration, level) def execute_density_figure_function(project: str, @@ -302,7 +309,8 @@ def execute_density_figure_function(project: str, global_weight: int, same_gene: bool, iteration: int, - inter_chr: bool = False) -> None: + inter_chr: bool = False, + level: str = "exon") -> None: """ Execute create_density_figure and organized the results in a dictionary. @@ -320,17 +328,18 @@ def execute_density_figure_function(project: str, the control set. :param inter_chr: True to only get inter-chromosomal interactions \ False else + :param level: The kind of feature to analyse (exon or gene) """ logging.info(f'Working on {project}, {ft_type}, {ft} - {os.getpid()}') create_figures(ft, ft_type, project, weight, - global_weight, same_gene, iteration, inter_chr) + global_weight, same_gene, iteration, inter_chr, level) def create_all_distance_figures(ps: int, weight: int = 1, global_weight: int = 0, ft_type: str = "nt", same_gene: bool = True, iteration: int = 10000, - inter_chr: bool = False, + inter_chr: bool = False, level: str = "exon", logging_level: str = "DISABLE"): """ Make density figure for every selected projects. @@ -348,6 +357,7 @@ def create_all_distance_figures(ps: int, weight: int = 1, :param inter_chr: True to only get inter-chromosomal interactions \ False else :param ps: The number of processes to create + :param level: The kind of feature to analyse (exon or gene) """ logging_def(ConfigNt.interaction, __file__, logging_level) if global_weight == 0: @@ -360,7 +370,7 @@ def create_all_distance_figures(ps: int, weight: int = 1, processes = [] for project, ft, ft_type in param: args = [project, ft_type, ft, weight, global_weight, same_gene, - iteration, inter_chr] + iteration, inter_chr, level] processes.append(pool.apply_async(execute_density_figure_function, args)) for proc in processes: diff --git a/src/nt_composition/make_nt_correlation.py b/src/nt_composition/make_nt_correlation.py index 58969b9e7c329f8729656d85dd995a688a0b2d47..f1fcfb709922c12bc1b56b6d333c044887e02b97 100644 --- a/src/nt_composition/make_nt_correlation.py +++ b/src/nt_composition/make_nt_correlation.py @@ -152,8 +152,8 @@ def get_project_colocalisation(cnx: sqlite3.Connection, project: str, return result -def get_frequency_dic(cnx: sqlite3.Connection, ft: str, ft_type: str - ) -> Dict[str, float]: +def get_frequency_dic(cnx: sqlite3.Connection, ft: str, ft_type: str, + level: str = "exon") -> Dict[str, float]: """ Get the frequency of a feature for every exon in \ the chia-pet database. @@ -161,23 +161,30 @@ def get_frequency_dic(cnx: sqlite3.Connection, ft: str, ft_type: str :param cnx: Connection to chia-pet database :param ft: The feature of interest :param ft_type: The type of feature of interest + :param level: The kind of feature to analyse (exon or gene) :return: The frequency dic """ logging.debug('Calculating frequency table') - query = "SELECT id_exon, frequency " \ - "FROM cin_exon_frequency " \ + reg = '' + if level == "gene": + reg = "AND region='gene'" + query = f"SELECT id_{level}, frequency " \ + f"FROM cin_{level}_frequency " \ f"WHERE ft = '{ft}' " \ - f"AND ft_type = '{ft_type}'" + f"AND ft_type = '{ft_type}' " \ + f"{reg}" + c = cnx.cursor() c.execute(query) result = c.fetchall() if len(result) == 0: - msg = f'No Frequencies found for {ft} ({ft_type})' + msg = f'No Frequencies found for {level} {ft} ({ft_type})' logging.exception(msg) raise NoInteractionError(msg) - dic = {} - for exon, freq in result: - dic[exon] = freq + if level == "exon": + dic = {exon: freq for exon, freq in result} + else: + dic = {str(exon): freq for exon, freq in result} logging.debug({k: dic[k] for k in list(dic.keys())[0:5]}) return dic @@ -203,7 +210,8 @@ def get_dic_co_regulated_exon(arr_interaction: np.array): return d -def density_mean(arr_interaction: np.array, dic_freq: Dict[str, float]): +def density_mean(arr_interaction: np.array, dic_freq: Dict[str, float], + level: str): """ Create the density table, a table showing the frequency of \ a nucleotide in every exon in a chia-pet project and the mean \ @@ -211,26 +219,29 @@ def density_mean(arr_interaction: np.array, dic_freq: Dict[str, float]): :param arr_interaction: array of interaction between exons. :param dic_freq: The frequency dataframe. + :param level: The kind of feature to analyse (exon or gene) :return: The density table """ exons_dic = get_dic_co_regulated_exon(arr_interaction) - dic = {'exon': [], 'freq_exon': [], 'freq_coloc_exon': [], 'oexon': []} + dic = {f'{level}': [], f'freq_{level}': [], f'freq_coloc_{level}': [], + f'o{level}': []} for exon in exons_dic.keys(): freq_ex = dic_freq[exon] oexon = np.unique(exons_dic[exon]) freq_oexon = np.nanmean(np.asarray([dic_freq[ex] for ex in oexon], dtype=float)) if freq_ex is not None and not np.isnan(freq_oexon): - dic['exon'].append(exon) - dic['freq_exon'].append(freq_ex) - dic['freq_coloc_exon'].append(freq_oexon) - dic['oexon'].append(", ".join(oexon)) + dic[level].append(exon) + dic[f'freq_{level}'].append(freq_ex) + dic[f'freq_coloc_{level}'].append(freq_oexon) + dic[f'o{level}'].append(", ".join(oexon)) df = pd.DataFrame(dic) logging.debug(df.head()) return df -def simple_density(arr_interaction: np.array, dic_freq: Dict[str, float]): +def simple_density(arr_interaction: np.array, dic_freq: Dict[str, float], + level: str): """ Create the density table, a table showing the frequency of \ a nucleotide in every exon in a chia-pet project and the \ @@ -238,24 +249,27 @@ def simple_density(arr_interaction: np.array, dic_freq: Dict[str, float]): :param arr_interaction: array of interaction between exons. :param dic_freq: The frequency dataframe. + :param level: The kind of feature to analyse (exon or gene) :return: The density table """ - dic = {'exon1': [], 'freq_exon': [], 'freq_coloc_exon': [], 'exon2': []} + dic = {f'{level}1': [], f'freq_{level}': [], + f'freq_coloc_{level}': [], f'{level}2': []} for exon1, exon2 in arr_interaction: freq_ex = dic_freq[exon1] freq_ex2 = dic_freq[exon2] if freq_ex is not None and freq_ex2 is not None: - dic['exon1'].append(exon1) - dic['freq_exon'].append(freq_ex) - dic['freq_coloc_exon'].append(freq_ex2) - dic['exon2'].append(exon2) + dic[f'{level}1'].append(exon1) + dic[f'freq_{level}'].append(freq_ex) + dic[f'freq_coloc_{level}'].append(freq_ex2) + dic[f'{level}2'].append(exon2) df = pd.DataFrame(dic) logging.debug(df.head()) return df def create_density_table(arr_interaction: np.array, dic_freq: Dict[str, float], - compute_mean: bool = False) -> pd.DataFrame: + compute_mean: bool = False, level: str = "exon" + ) -> pd.DataFrame: """ Create the density table, a table showing the frequency of \ a nucleotide in every exon in a chia-pet project and the mean \ @@ -266,19 +280,21 @@ def create_density_table(arr_interaction: np.array, dic_freq: Dict[str, float], :param dic_freq: The frequency dataframe. :param compute_mean: True to compute the mean frequency of co-localised \ exons, false to only compute the frequency of one co-localized exons. + :param level: The kind of feature to analyse (exon or gene) :return: The density table """ logging.debug(f'Calculating density table ({os.getpid()})') if compute_mean: - return density_mean(arr_interaction, dic_freq) + return density_mean(arr_interaction, dic_freq, level) else: - return simple_density(arr_interaction, dic_freq) + return simple_density(arr_interaction, dic_freq, level) def create_density_fig(df: pd.DataFrame, project: str, ft_type: str, ft: str, weight: int, global_weight: int, community_size: Optional[int], - inter_chr: bool) -> Tuple[float, float]: + inter_chr: bool, level: str + ) -> Tuple[float, float]: """ Compute a density file showing if the feature frequency of \ an exons correlates with the frequency in other co-localised exons. @@ -296,23 +312,26 @@ def create_density_fig(df: pd.DataFrame, project: str, ft_type: str, ft: str, seen in `global_weight` project are taken into account :param inter_chr: True to only get inter-chromosomal interactions \ False else + :param level: The kind of feature to analyse (exon or gene) :return: The correlation and the p-value """ logging.debug('Creating the density figure') sns.set() sns.set_context('talk') plt.figure(figsize=(20, 12)) - df2 = df[['freq_exon', 'freq_coloc_exon']].copy() - s, i, r, p, stderr = linregress(df.freq_exon, df.freq_coloc_exon) - sns.kdeplot(df2.freq_exon, df2.freq_coloc_exon, shade=True, + df2 = df[[f'freq_{level}', f'freq_coloc_{level}']].copy() + s, i, r, p, stderr = linregress(df[f"freq_{level}"], + df[f'freq_coloc_{level}']) + sns.kdeplot(df2[f'freq_{level}'], df2[f'freq_coloc_{level}'], shade=True, shade_lowest=False, cmap="YlOrBr") - plt.plot(df.freq_exon, i + s * df.freq_exon, 'r', + plt.plot(df[f"freq_{level}"], i + s * df[f"freq_{level}"], 'r', label=f'r={round(r,4)}, p={round(p, 4)}') plt.legend() - plt.xlabel(f"Freq of {ft} in an exon") - plt.ylabel(f"Freq of {ft} in co-localized exons") - title = f'Freq of {ft} of exons and their co-localized partner in ' \ + ml = "an exon" if level == "exon" else "a gene" + plt.xlabel(f"Freq of {ft} in {ml}") + plt.ylabel(f"Freq of {ft} in co-localized {level}s") + title = f'Freq of {ft} of {level}s and their co-localized partner in ' \ f'{project}' if inter_chr: title += '\n(inter chromosomal interactions)' @@ -329,7 +348,7 @@ def create_density_figure(nt: str, ft_type: str, project: str, weight: int, global_weight: int, same_gene: bool, compute_mean: bool, community_size: Optional[int], - inter_chr: bool = False, + inter_chr: bool = False, level: str = "exon", logging_level: str = "DISABLE" ) -> Tuple[float, float]: """ @@ -352,6 +371,7 @@ def create_density_figure(nt: str, ft_type: str, analysis. :param inter_chr: True to only get inter-chromosomal interactions \ False else + :param level: The kind of feature to analyse (exon or gene) :param logging_level: The level of information to display :return: The correlation and the p-value """ @@ -368,17 +388,20 @@ def create_density_figure(nt: str, ft_type: str, arr_interaction = get_project_colocalisation(cnx, project, weight, global_weight, same_gene, False, exons_bc, - inter_chr) - dic_freq = get_frequency_dic(cnx, nt, ft_type) - df = create_density_table(arr_interaction, dic_freq, compute_mean) + inter_chr, level=level) + dic_freq = get_frequency_dic(cnx, nt, ft_type, level) + df = create_density_table(arr_interaction, dic_freq, compute_mean, + level) df.to_csv(outfile, sep="\t", index=False) r, p = create_density_fig(df, project, ft_type, nt, weight, - global_weight, community_size, inter_chr) + global_weight, community_size, inter_chr, + level) else: logging.debug(f'The file {outfile} exist, recovering data ' f'({os.getpid()})') df = pd.read_csv(outfile, sep="\t") - s, i, r, p, stderr = linregress(df.freq_exon, df.freq_coloc_exon) + s, i, r, p, stderr = linregress(df[f"freq_{level}"], + df[f"freq_coloc_{level}"]) return r, p @@ -433,7 +456,8 @@ def execute_density_figure_function(di: pd.DataFrame, project : str, same_gene: bool, compute_mean: bool, community_size: Optional[int], - inter_chr: bool = False + inter_chr: bool = False, + level: str= "exon", ) -> Dict[str, Any]: """ Execute create_density_figure and organized the results in a dictionary. @@ -455,20 +479,20 @@ def execute_density_figure_function(di: pd.DataFrame, project : str, analysis. :param inter_chr: True to only get inter-chromosomal interactions \ False else + :param level: The kind of feature to analyse (exon or gene) :return: """ logging.info(f'Working on {project}, {ft_type}, {ft} - {os.getpid()}') r, p = create_density_figure(ft, ft_type, project, weight, global_weight, same_gene, compute_mean, - community_size, inter_chr) + community_size, inter_chr, level) if global_weight == 0: - tmp = {"project": project, "ft_type": ft_type, + return {"project": project, "ft_type": ft_type, "ft": ft, "cor": r, "pval": p, 'nb_interaction': di[di['projects'] == project].iloc[0, 1]} else: - tmp = {"project": project, "ft_type": ft_type, + return {"project": project, "ft_type": ft_type, "ft": ft, "cor": r, "pval": p} - return tmp def combine_dic(list_dic: List[Dict]) -> Dict: @@ -524,7 +548,7 @@ def create_all_frequency_figures(ps: int, weight: int = 1, global_weight: int = 0, ft_type: str = "nt", same_gene = True, compute_mean: bool = True, community_size: Optional[int] = None, - inter_chr: bool = False, + inter_chr: bool = False, level: str = "exon", logging_level: str = "DISABLE"): """ Make density figure for every selected projects. @@ -545,6 +569,7 @@ def create_all_frequency_figures(ps: int, weight: int = 1, :param ps: The number of processes to create :param inter_chr: True to only get inter-chromosomal interactions \ False else + :param level: The kind of feature to analyse (exon or gene) """ logging_def(ConfigNt.interaction, __file__, logging_level) if global_weight == 0: @@ -560,7 +585,7 @@ def create_all_frequency_figures(ps: int, weight: int = 1, processes = [] for project, ft, ft_type in param: args = [di, project, ft_type, ft, weight, global_weight, same_gene, - compute_mean, community_size, inter_chr] + compute_mean, community_size, inter_chr, level] processes.append(pool.apply_async(execute_density_figure_function, args)) results = [proc.get(timeout=None) for proc in processes]