Skip to content
Snippets Groups Projects
Commit 10a59086 authored by nfontrod's avatar nfontrod
Browse files

src/find_interaction_cluster/nt_and_community.py: modification of multiple_nt_lmm_launcher launcher

parent f05e0585
No related branches found
No related tags found
No related merge requests found
...@@ -22,7 +22,6 @@ from .community_finder import get_projects ...@@ -22,7 +22,6 @@ from .community_finder import get_projects
from ..logging_conf import logging_def from ..logging_conf import logging_def
from itertools import product from itertools import product
import multiprocessing as mp import multiprocessing as mp
from .sf_and_communities import get_key
def get_nt_frequency(cnx: sqlite3.Connection, list_ft: List[str], def get_nt_frequency(cnx: sqlite3.Connection, list_ft: List[str],
...@@ -264,6 +263,7 @@ def get_stat_nt_communities(df: pd.DataFrame, project: str, weight: int, ...@@ -264,6 +263,7 @@ def get_stat_nt_communities(df: pd.DataFrame, project: str, weight: int,
res['pval'] = pval res['pval'] = pval
nt_ctrl_table = noutfile.parent / noutfile.name.replace("_stat.txt", nt_ctrl_table = noutfile.parent / noutfile.name.replace("_stat.txt",
"_ctrl.txt") "_ctrl.txt")
print(df.head())
ndf = create_ctrl_community(df, nt_ctrl_table, feature, region) ndf = create_ctrl_community(df, nt_ctrl_table, feature, region)
sum_df = lmm_maker_summary(ndf, outfile, nt) sum_df = lmm_maker_summary(ndf, outfile, nt)
outfile_ctrl = ConfigGraph.get_community_file(project, weight, outfile_ctrl = ConfigGraph.get_community_file(project, weight,
...@@ -334,44 +334,34 @@ def multiple_nt_lmm_launcher(ps: int, ...@@ -334,44 +334,34 @@ def multiple_nt_lmm_launcher(ps: int,
project = get_projects(global_weight, project) project = get_projects(global_weight, project)
nt_list = ["A", "C", "G", "T", "S", "W"] nt_list = ["A", "C", "G", "T", "S", "W"]
condition = list(product([project], [weight], nt_list)) condition = list(product([project], [weight], nt_list))
processes = {} processes = []
pool = mp.Pool(processes=min(ps, len(condition))) pool = mp.Pool(processes=min(ps, len(condition)))
logging.debug("Calculating stats...") logging.debug("Calculating stats...")
dic_df = {}
for project, weight, nt in condition: for project, weight, nt in condition:
ckey = get_key(project, weight) df = create_dataframe(project, weight, global_weight, same_gene,
if ckey in dic_df: feature)
df = dic_df[ckey] nfile_table = ConfigGraph.get_community_file(
else:
df = create_dataframe(project, weight, global_weight, same_gene,
feature)
nfile_table = ConfigGraph.get_community_file(
project, weight, global_weight, same_gene, feature, project, weight, global_weight, same_gene, feature,
f"_nt_table.txt", "sf_community_enrichment") f"_nt_table.txt", "sf_community_enrichment")
df.to_csv(nfile_table, sep="\t", index=False) df.to_csv(nfile_table, sep="\t", index=False)
dic_df[ckey] = df
args = [df, project, weight, global_weight, same_gene, nt, feature, args = [df, project, weight, global_weight, same_gene, nt, feature,
region] region]
if ckey in processes: processes.append(pool.apply_async(get_stat_nt_communities, args))
processes[ckey].append( results = [p.get(timeout=None) for p in processes]
pool.apply_async(get_stat_nt_communities, args)) pool.close()
else: pool.join()
processes[ckey] = [pool.apply_async(get_stat_nt_communities, args)] fdf = pd.DataFrame(results)
for p, value in processes.items(): fdf["padj"] = multipletests(fdf['pval'].values, method='fdr_bh')[1]
project, weight = p.split("_") outfile = ConfigGraph.get_community_file(project, weight,
results = [p.get(timeout=None) for p in value] global_weight,
pool.close() same_gene, feature,
pool.join() f"lmm-nt_stat.txt",
fdf = pd.DataFrame(results) "sf_community_enrichment")
fdf["padj"] = multipletests(fdf['pval'].values, method='fdr_bh')[1] nfolder = outfile.parent / "nt_analysis"
outfile = ConfigGraph.get_community_file(project, weight, noutfile = nfolder / outfile.name
global_weight, fdf.to_csv(noutfile, sep="\t", index=False)
same_gene, feature,
f"lmm-nt_stat.txt",
"sf_community_enrichment")
nfolder = outfile.parent / "nt_analysis"
noutfile = nfolder / outfile.name
fdf.to_csv(noutfile, sep="\t", index=False)
if __name__ == "__main__": if __name__ == "__main__":
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment