diff --git a/src/find_interaction_cluster/community_calibration.py b/src/find_interaction_cluster/community_calibration.py index 3363835827d77b5b111916a2193a33f44564e113..eb7cc0e120af6d28192189ddadb53b7f70c58d47 100644 --- a/src/find_interaction_cluster/community_calibration.py +++ b/src/find_interaction_cluster/community_calibration.py @@ -21,7 +21,22 @@ from pathlib import Path import seaborn as sns from math import log from .community_finder import logging_def, get_project_colocalisation, \ - write_interaction_file, create_graph, find_communities + create_graph, find_communities + + +def write_input(arr_interaction: np.array, outfile: Path, use_weight: bool): + """ + :param arr_interaction: Each couples of co-localized feature within a \ + project. + :param outfile: the input file + :param use_weight: Say if we want to write the weight into the result file. + :return: + """ + with outfile.open('w') as f: + for exon1, exon2, cweight in arr_interaction: + if not use_weight: + cweight = 1 + f.write(f"{exon1}\t{exon2}\t{cweight}\n") def get_out_name(weight: int, global_weight: int, inflation: float, @@ -41,14 +56,23 @@ def get_out_name(weight: int, global_weight: int, inflation: float, same gene (True) or not (False) (default False) :param feature: The feature we want to analyse (default 'exon') :param use_weight: Say if we want to write the weight into the result file. - :return: + :return: The file containing communities, + the input of hiMCL and the output """ w = "weigthed" if use_weight else "unweigthed" if global_weight != 0: project = f"global-weight-{global_weight}" - return ConfigGraph.community_calibration_folder / "community_files" / \ + output = ConfigGraph.community_calibration_folder / "community_files" / \ f"{project}_weight-{weight}_same_gene-{same_gene}_{feature}_" \ f"{inflation}_{w}.txt" + input_hip = output.parent / "hipMCL_files" / \ + output.name.replace(".txt", "_input.txt") + output_hip = input_hip.parent / input_hip.name.replace("_input.txt", + "output.txt") + output.parent.mkdir(exist_ok=True, parents=True) + input_hip.parent.mkdir(exist_ok=True, parents=True) + return output, input_hip, output_hip + def get_figname(weight: int, global_weight: int, @@ -97,23 +121,18 @@ def community_finder(weight: int, global_weight: int, inflation: float, """ inflation = round(inflation, 2) logging.info(f"Working with inflation {inflation}") - outfile = get_out_name(weight, global_weight, inflation, - project, same_gene, feature, use_weight) - outfile.parent.mkdir(exist_ok=True, parents=True) + outfile, in_hipmcl, out_hipmcl = get_out_name( + weight, global_weight, inflation, project, same_gene, feature, + use_weight) if outfile.is_file(): return pd.read_csv(outfile, sep="\t") cnx = sqlite3.connect(ConfigGraph.db_file) interaction = get_project_colocalisation(cnx, project, weight, global_weight, same_gene, True, level=feature) - outfileg, result_file = write_interaction_file(interaction, project, - weight, global_weight, - same_gene, - inflation, - feature=feature, - use_weight=use_weight) + write_input(interaction, in_hipmcl, use_weight) graph = create_graph(interaction) - df, dic_community = find_communities(graph, project, outfileg, result_file, + df, dic_community = find_communities(graph, project, in_hipmcl, out_hipmcl, feature, inflation=inflation, compute_ec_cov=False) logging.debug('Writing results ...') @@ -240,10 +259,10 @@ def create_community_size_fig(df_infl: pd.DataFrame, fig_name: Path) -> None: @lp.parse(weight=range(1, 11), global_weight=range(11), feature=('gene', 'exon'), istart="1.1 <= istart < 2.5", istop="1.1 < istop <= 2.5", istep="0 < istep <= 1") -def make_calibration(weight: int, global_weight: int, istart: float =1.1, - istop: float=2.5, istep: float=0.1, project: str = "", +def make_calibration(weight: int, global_weight: int, istart: float = 1.1, + istop: float = 2.5, istep: float = 0.1, project: str = "", same_gene=True, feature: str = "exon", - use_weight: bool=False, logging_level: str = "INFO"): + use_weight: bool = False, logging_level: str = "INFO"): logging_def(ConfigGraph.output_folder, __file__, logging_level) inflations = np.arange(istart, istop + istep, istep) list_df = [