From 10a59086a0a49b66d02f8c27ea2ad2e754549301 Mon Sep 17 00:00:00 2001
From: Fontrodona Nicolas <nicolas.fontrodona@ens-lyon.fr>
Date: Mon, 23 Nov 2020 10:05:26 +0100
Subject: [PATCH] src/find_interaction_cluster/nt_and_community.py:
 modification of multiple_nt_lmm_launcher launcher

---
 .../nt_and_community.py                       | 54 ++++++++-----------
 1 file changed, 22 insertions(+), 32 deletions(-)

diff --git a/src/find_interaction_cluster/nt_and_community.py b/src/find_interaction_cluster/nt_and_community.py
index 8e7b6394..d95347d5 100644
--- a/src/find_interaction_cluster/nt_and_community.py
+++ b/src/find_interaction_cluster/nt_and_community.py
@@ -22,7 +22,6 @@ from .community_finder import get_projects
 from ..logging_conf import logging_def
 from itertools import product
 import multiprocessing as mp
-from .sf_and_communities import get_key
 
 
 def get_nt_frequency(cnx: sqlite3.Connection, list_ft: List[str],
@@ -264,6 +263,7 @@ def get_stat_nt_communities(df: pd.DataFrame, project: str, weight: int,
     res['pval'] = pval
     nt_ctrl_table = noutfile.parent / noutfile.name.replace("_stat.txt",
                                                             "_ctrl.txt")
+    print(df.head())
     ndf = create_ctrl_community(df, nt_ctrl_table, feature, region)
     sum_df = lmm_maker_summary(ndf, outfile, nt)
     outfile_ctrl = ConfigGraph.get_community_file(project, weight,
@@ -334,44 +334,34 @@ def multiple_nt_lmm_launcher(ps: int,
     project = get_projects(global_weight, project)
     nt_list = ["A", "C", "G", "T", "S", "W"]
     condition = list(product([project], [weight], nt_list))
-    processes = {}
+    processes = []
     pool = mp.Pool(processes=min(ps, len(condition)))
     logging.debug("Calculating stats...")
-    dic_df = {}
     for project, weight, nt in condition:
-        ckey = get_key(project, weight)
-        if ckey in dic_df:
-            df = dic_df[ckey]
-        else:
-            df = create_dataframe(project, weight, global_weight, same_gene,
-                                  feature)
-            nfile_table = ConfigGraph.get_community_file(
+        df = create_dataframe(project, weight, global_weight, same_gene,
+                              feature)
+        nfile_table = ConfigGraph.get_community_file(
                 project, weight, global_weight, same_gene, feature,
                 f"_nt_table.txt", "sf_community_enrichment")
-            df.to_csv(nfile_table, sep="\t", index=False)
-            dic_df[ckey] = df
+        df.to_csv(nfile_table, sep="\t", index=False)
+
+
         args = [df, project, weight, global_weight, same_gene, nt, feature,
                 region]
-        if ckey in processes:
-            processes[ckey].append(
-                pool.apply_async(get_stat_nt_communities, args))
-        else:
-            processes[ckey] = [pool.apply_async(get_stat_nt_communities, args)]
-    for p, value in processes.items():
-        project, weight = p.split("_")
-        results = [p.get(timeout=None) for p in value]
-        pool.close()
-        pool.join()
-        fdf = pd.DataFrame(results)
-        fdf["padj"] = multipletests(fdf['pval'].values, method='fdr_bh')[1]
-        outfile = ConfigGraph.get_community_file(project, weight,
-                                                 global_weight,
-                                                 same_gene, feature,
-                                                 f"lmm-nt_stat.txt",
-                                                 "sf_community_enrichment")
-        nfolder = outfile.parent / "nt_analysis"
-        noutfile = nfolder / outfile.name
-        fdf.to_csv(noutfile, sep="\t", index=False)
+        processes.append(pool.apply_async(get_stat_nt_communities, args))
+    results = [p.get(timeout=None) for p in processes]
+    pool.close()
+    pool.join()
+    fdf = pd.DataFrame(results)
+    fdf["padj"] = multipletests(fdf['pval'].values, method='fdr_bh')[1]
+    outfile = ConfigGraph.get_community_file(project, weight,
+                                             global_weight,
+                                             same_gene, feature,
+                                             f"lmm-nt_stat.txt",
+                                             "sf_community_enrichment")
+    nfolder = outfile.parent / "nt_analysis"
+    noutfile = nfolder / outfile.name
+    fdf.to_csv(noutfile, sep="\t", index=False)
 
 
 if __name__ == "__main__":
-- 
GitLab