src/find_interaction_cluster/nt_and_community.py: modification of multiple_nt_lmm_launcher launcher

10a59086 · nfontrod · f05e0585 · 10a59086
Commit 10a59086 authored 4 years ago by nfontrod
--- a/src/find_interaction_cluster/nt_and_community.py
+++ b/src/find_interaction_cluster/nt_and_community.py
@@ -22,7 +22,6 @@ from .community_finder import get_projects
 from ..logging_conf import logging_def
 from itertools import product
 import multiprocessing as mp
-from .sf_and_communities import get_key
 def get_nt_frequency(cnx: sqlite3.Connection, list_ft: List[str],
@@ -264,6 +263,7 @@ def get_stat_nt_communities(df: pd.DataFrame, project: str, weight: int,
    res['pval'] = pval
    nt_ctrl_table = noutfile.parent / noutfile.name.replace("_stat.txt",
                                                            "_ctrl.txt")
+    print(df.head())
    ndf = create_ctrl_community(df, nt_ctrl_table, feature, region)
    sum_df = lmm_maker_summary(ndf, outfile, nt)
    outfile_ctrl = ConfigGraph.get_community_file(project, weight,
@@ -334,44 +334,34 @@ def multiple_nt_lmm_launcher(ps: int,
    project = get_projects(global_weight, project)
    nt_list = ["A", "C", "G", "T", "S", "W"]
    condition = list(product([project], [weight], nt_list))
-    processes = {}
+    processes = []
    pool = mp.Pool(processes=min(ps, len(condition)))
    logging.debug("Calculating stats...")
-    dic_df = {}
    for project, weight, nt in condition:
-        ckey = get_key(project, weight)
+        df = create_dataframe(project, weight, global_weight, same_gene,
-        if ckey in dic_df:
+                              feature)
-            df = dic_df[ckey]
+        nfile_table = ConfigGraph.get_community_file(
-        else:
-            df = create_dataframe(project, weight, global_weight, same_gene,
-                                  feature)
-            nfile_table = ConfigGraph.get_community_file(
                project, weight, global_weight, same_gene, feature,
                f"_nt_table.txt", "sf_community_enrichment")
-            df.to_csv(nfile_table, sep="\t", index=False)
+        df.to_csv(nfile_table, sep="\t", index=False)
-            dic_df[ckey] = df
        args = [df, project, weight, global_weight, same_gene, nt, feature,
                region]
-        if ckey in processes:
+        processes.append(pool.apply_async(get_stat_nt_communities, args))
-            processes[ckey].append(
+    results = [p.get(timeout=None) for p in processes]
-                pool.apply_async(get_stat_nt_communities, args))
+    pool.close()
-        else:
+    pool.join()
-            processes[ckey] = [pool.apply_async(get_stat_nt_communities, args)]
+    fdf = pd.DataFrame(results)
-    for p, value in processes.items():
+    fdf["padj"] = multipletests(fdf['pval'].values, method='fdr_bh')[1]
-        project, weight = p.split("_")
+    outfile = ConfigGraph.get_community_file(project, weight,
-        results = [p.get(timeout=None) for p in value]
+                                             global_weight,
-        pool.close()
+                                             same_gene, feature,
-        pool.join()
+                                             f"lmm-nt_stat.txt",
-        fdf = pd.DataFrame(results)
+                                             "sf_community_enrichment")
-        fdf["padj"] = multipletests(fdf['pval'].values, method='fdr_bh')[1]
+    nfolder = outfile.parent / "nt_analysis"
-        outfile = ConfigGraph.get_community_file(project, weight,
+    noutfile = nfolder / outfile.name
-                                                 global_weight,
+    fdf.to_csv(noutfile, sep="\t", index=False)
-                                                 same_gene, feature,
-                                                 f"lmm-nt_stat.txt",
-                                                 "sf_community_enrichment")
-        nfolder = outfile.parent / "nt_analysis"
-        noutfile = nfolder / outfile.name
-        fdf.to_csv(noutfile, sep="\t", index=False)
 if __name__ == "__main__":