src/find_interaction_cluster/community_calibration.py: creation of input and...

src/find_interaction_cluster/community_calibration.py: creation of input and output for hipMCL inside it's result folder

src/find_interaction_cluster/community_calibration.py: creation of input and...
4a363356 · nfontrod · cc536f0b · 4a363356
Commit 4a363356 authored 4 years ago by nfontrod
--- a/src/find_interaction_cluster/community_calibration.py
+++ b/src/find_interaction_cluster/community_calibration.py
@@ -21,7 +21,22 @@ from pathlib import Path
 import seaborn as sns
 from math import log
 from .community_finder import logging_def, get_project_colocalisation, \
-    write_interaction_file, create_graph, find_communities
+    create_graph, find_communities
+
+
+def write_input(arr_interaction: np.array, outfile: Path, use_weight: bool):
+    """
+    :param arr_interaction:  Each couples of co-localized feature within a \
+    project.
+    :param outfile: the input file
+    :param use_weight: Say if we want to write the weight into the result file.
+    :return:
+    """
+    with outfile.open('w') as f:
+        for exon1, exon2, cweight in arr_interaction:
+            if not use_weight:
+                cweight = 1
+            f.write(f"{exon1}\t{exon2}\t{cweight}\n")


 def get_out_name(weight: int, global_weight: int, inflation: float,
@@ -41,14 +56,23 @@ def get_out_name(weight: int, global_weight: int, inflation: float,
    same gene (True) or not (False) (default False)
    :param feature: The feature we want to analyse (default 'exon')
    :param use_weight: Say if we want to write the weight into the result file.
-    :return:
+    :return: The file containing communities,
+    the input of hiMCL and the output
    """
    w = "weigthed" if use_weight else "unweigthed"
    if global_weight != 0:
        project = f"global-weight-{global_weight}"
-    return ConfigGraph.community_calibration_folder / "community_files" / \
+    output = ConfigGraph.community_calibration_folder / "community_files" / \
        f"{project}_weight-{weight}_same_gene-{same_gene}_{feature}_" \
        f"{inflation}_{w}.txt"
+    input_hip = output.parent / "hipMCL_files" / \
+            output.name.replace(".txt", "_input.txt")
+    output_hip = input_hip.parent / input_hip.name.replace("_input.txt",
+                                                           "output.txt")
+    output.parent.mkdir(exist_ok=True, parents=True)
+    input_hip.parent.mkdir(exist_ok=True, parents=True)
+    return output, input_hip, output_hip
+


 def get_figname(weight: int, global_weight: int,
@@ -97,23 +121,18 @@ def community_finder(weight: int, global_weight: int, inflation: float,
    """
    inflation = round(inflation, 2)
    logging.info(f"Working with inflation {inflation}")
-    outfile = get_out_name(weight, global_weight, inflation,
-                           project, same_gene, feature, use_weight)
-    outfile.parent.mkdir(exist_ok=True, parents=True)
+    outfile, in_hipmcl, out_hipmcl = get_out_name(
+        weight, global_weight, inflation, project, same_gene, feature,
+        use_weight)
    if outfile.is_file():
        return pd.read_csv(outfile, sep="\t")
    cnx = sqlite3.connect(ConfigGraph.db_file)
    interaction = get_project_colocalisation(cnx, project, weight,
                                             global_weight, same_gene, True,
                                             level=feature)
-    outfileg, result_file = write_interaction_file(interaction, project,
-                                                   weight, global_weight,
-                                                   same_gene,
-                                                   inflation,
-                                                   feature=feature,
-                                                   use_weight=use_weight)
+    write_input(interaction, in_hipmcl, use_weight)
    graph = create_graph(interaction)
-    df, dic_community = find_communities(graph, project, outfileg, result_file,
+    df, dic_community = find_communities(graph, project, in_hipmcl, out_hipmcl,
                                         feature, inflation=inflation,
                                         compute_ec_cov=False)
    logging.debug('Writing results ...')
@@ -240,10 +259,10 @@ def create_community_size_fig(df_infl: pd.DataFrame, fig_name: Path) -> None:
 @lp.parse(weight=range(1, 11), global_weight=range(11),
          feature=('gene', 'exon'), istart="1.1 <= istart < 2.5",
          istop="1.1 < istop <= 2.5", istep="0 < istep <= 1")
-def make_calibration(weight: int, global_weight: int, istart: float =1.1,
-                     istop: float=2.5, istep: float=0.1, project: str = "",
+def make_calibration(weight: int, global_weight: int, istart: float = 1.1,
+                     istop: float = 2.5, istep: float = 0.1, project: str = "",
                     same_gene=True, feature: str = "exon",
-                     use_weight: bool=False, logging_level: str = "INFO"):
+                     use_weight: bool = False, logging_level: str = "INFO"):
    logging_def(ConfigGraph.output_folder, __file__, logging_level)
    inflations = np.arange(istart, istop + istep, istep)
    list_df = [