From cb228b23853775360556aa9a5d9400b8e450bbc4 Mon Sep 17 00:00:00 2001 From: Fontrodona Nicolas <nicolas.fontrodona@ens-lyon.fr> Date: Fri, 6 Nov 2020 16:08:57 +0100 Subject: [PATCH] modification of get_community_file function to be able to chose the subfolder in which we want to create file --- src/find_interaction_cluster/config.py | 10 +++++----- src/find_interaction_cluster/nt_and_community.py | 14 ++++++-------- src/find_interaction_cluster/sf_and_communities.py | 11 +++++++---- 3 files changed, 18 insertions(+), 17 deletions(-) diff --git a/src/find_interaction_cluster/config.py b/src/find_interaction_cluster/config.py index 4c6cbcfb..3c3d262c 100644 --- a/src/find_interaction_cluster/config.py +++ b/src/find_interaction_cluster/config.py @@ -37,7 +37,7 @@ def get_weight_folder(weight: int, global_weight: int): def get_community_file(project: str, weight: int, global_weight: int, same_gene: bool, feature: str = 'exon', - ext: str = ".txt", stat: bool = False): + ext: str = ".txt", sub_fold: str = ''): """ Get the output file of interest. @@ -51,13 +51,12 @@ def get_community_file(project: str, weight: int, global_weight: int, same gene :param the kind of feature analyzed :param ext: The file extension - :param stat: True to place the result in 'sf_community_enrichment' \ - subfolder + :param subfolder: if filled, then the data are recovered from a subfolder :return: The filename of interest """ folder = get_weight_folder(weight, global_weight) - if stat: - folder = folder / 'sf_community_enrichment' + if sub_fold != '': + folder = folder / sub_fold folder.mkdir(exist_ok=True, parents=True) if global_weight != 0: project = f"global-weight-{global_weight}" @@ -116,4 +115,5 @@ class ConfigGraph: get_hip_folder = get_hipmcl_folder get_hipmcl_prog = get_hipmcl_prog good_projects = get_good_project() + ppi_threshold = 700 diff --git a/src/find_interaction_cluster/nt_and_community.py b/src/find_interaction_cluster/nt_and_community.py index 3f769be1..0b7a0140 100644 --- a/src/find_interaction_cluster/nt_and_community.py +++ b/src/find_interaction_cluster/nt_and_community.py @@ -253,7 +253,7 @@ def get_stat_nt_communities(df: pd.DataFrame, project: str, weight: int, global_weight, same_gene, feature, f"{nt}_stat.txt", - True) + "sf_community_enrichment") nfolder = outfile.parent / "nt_analysis" nfolder.mkdir(exist_ok=True, parents=True) noutfile = nfolder / outfile.name @@ -269,7 +269,7 @@ def get_stat_nt_communities(df: pd.DataFrame, project: str, weight: int, global_weight, same_gene, feature, f"{nt}_VS_CTRL_stat.txt", - True) + "sf_community_enrichment") noutfile_ctrl = nfolder / outfile_ctrl.name sum_df.to_csv(noutfile_ctrl, sep="\t", index=False) return pd.Series(res) @@ -345,11 +345,9 @@ def multiple_nt_lmm_launcher(ps: int, weights: List[int], else: df = create_dataframe(project, weight, global_weight, same_gene, feature) - nfile_table = ConfigGraph.get_community_file(project, weight, - global_weight, - same_gene, feature, - f"_nt_table.txt", - True) + nfile_table = ConfigGraph.get_community_file( + project, weight, global_weight, same_gene, feature, + f"_nt_table.txt", "sf_community_enrichment") df.to_csv(nfile_table, sep="\t", index=False) dic_df[ckey] = df args = [df, project, weight, global_weight, same_gene, nt, feature, @@ -370,7 +368,7 @@ def multiple_nt_lmm_launcher(ps: int, weights: List[int], dic_project[project], same_gene, feature, f"lmm-nt_stat.txt", - True) + "sf_community_enrichment") nfolder = outfile.parent / "nt_analysis" noutfile = nfolder / outfile.name fdf.to_csv(noutfile, sep="\t", index=False) diff --git a/src/find_interaction_cluster/sf_and_communities.py b/src/find_interaction_cluster/sf_and_communities.py index b950c8b2..ac965851 100644 --- a/src/find_interaction_cluster/sf_and_communities.py +++ b/src/find_interaction_cluster/sf_and_communities.py @@ -182,7 +182,7 @@ def glmm_maker(expanded_df: pd.DataFrame, outfile: Path) -> float: ... "%reg in community": [40, 42.85], 'pval': [1, 0.5], 'padj': [1, 1]}) >>> e_df = expand_dataframe(d) >>> outfile = ConfigGraph.get_community_file("Test", 1, 1, True, - ... "_stat.txt", True) + ... "_stat.txt", "sf_community_enrichment") >>> glmm_maker(e_df, outfile) 1.0 """ @@ -235,7 +235,8 @@ def glmm_statistics(df: pd.DataFrame, sf_name: str, reg: str, expanded_df = expand_dataframe(ndf) outfile = ConfigGraph.get_community_file(project, weight, global_weight, same_gene, feature, - f"{sf_name}_{reg}_stat.txt", True) + f"{sf_name}_{reg}_stat.txt", + "sf_community_enrichment") noutfold = outfile.parent / "expanded_df" noutfold.mkdir(exist_ok=True, parents=True) noutfile = noutfold / outfile.name @@ -411,7 +412,8 @@ def multiple_stat_launcher(ps: int, weights: List[int], outfile = ConfigGraph.get_community_file(project, weight, dic_project[project], same_gene, feature, - "_stat.txt", True) + "_stat.txt", + "sf_community_enrichment") df.to_csv(outfile, sep="\t", index=False) glm_df = pd.DataFrame(list_series) glm_df["padj"] = multipletests(glm_df['pval'].values, @@ -419,7 +421,8 @@ def multiple_stat_launcher(ps: int, weights: List[int], outfile = ConfigGraph.get_community_file(project, weight, dic_project[project], same_gene, feature, - "_glmm_stat.txt", True) + "_glmm_stat.txt", + "sf_community_enrichment") glm_df.to_csv(outfile, sep="\t", index=False) -- GitLab