Skip to content
Snippets Groups Projects
Commit d832cda5 authored by nfontrod's avatar nfontrod
Browse files

src/nt_composition/make_nt_correlation.py: add inflation parameter

parent cc520e2a
No related branches found
No related tags found
No related merge requests found
......@@ -24,7 +24,7 @@ from itertools import product
from random import random
import multiprocessing as mp
import os
from ..find_interaction_cluster.config import get_communities
from ..find_interaction_cluster.config import get_communities_basefile
class NoInteractionError(Exception):
......@@ -346,7 +346,8 @@ def create_density_fig(df: pd.DataFrame, project: str, ft_type: str, ft: str,
def create_density_figure(nt: str, ft_type: str,
project: str, weight: int, global_weight: int,
same_gene: bool, compute_mean: bool,
same_gene: bool, inflation: float,
compute_mean: bool,
community_size: Optional[int],
inter_chr: bool = False, level: str = "exon",
logging_level: str = "DISABLE"
......@@ -365,6 +366,7 @@ def create_density_figure(nt: str, ft_type: str,
seen in `global_weight` project are taken into account
:param same_gene: Say if we consider as co-localised exon within the \
same gene
:param inflation: The inflation parameter
:param compute_mean: True to compute the mean frequency of co-localised \
exons, false to only compute the frequency of one co-localized exons.
:param community_size: The size of the community to consider in the \
......@@ -382,8 +384,8 @@ def create_density_figure(nt: str, ft_type: str,
inter_chr=inter_chr)
if not outfile.is_file():
exons_bc = recover_exon_in_big_communities(community_size, project,
weight,
global_weight)
weight, global_weight,
inflation, level)
cnx = sqlite3.connect(ConfigNt.db_file)
arr_interaction = get_project_colocalisation(cnx, project, weight,
global_weight, same_gene,
......@@ -454,6 +456,7 @@ def execute_density_figure_function(di: pd.DataFrame, project : str,
ft_type: str, ft: str, weight: int,
global_weight: int,
same_gene: bool,
inflation: float,
compute_mean: bool,
community_size: Optional[int],
inter_chr: bool = False,
......@@ -473,6 +476,7 @@ def execute_density_figure_function(di: pd.DataFrame, project : str,
seen in `global_weight` project are taken into account
:param same_gene: Say if we consider as co-localised exon within the \
same gene
:param inflation: The inflation parameter
:param compute_mean: True to compute the mean frequency of co-localised \
exons, false to only compute the frequency of one co-localized exons.
:param community_size: he size of the community to consider in the \
......@@ -484,7 +488,8 @@ def execute_density_figure_function(di: pd.DataFrame, project : str,
"""
logging.info(f'Working on {project}, {ft_type}, {ft} - {os.getpid()}')
r, p = create_density_figure(ft, ft_type, project, weight,
global_weight, same_gene, compute_mean,
global_weight, same_gene, inflation,
compute_mean,
community_size, inter_chr, level)
if global_weight == 0:
return {"project": project, "ft_type": ft_type,
......@@ -511,8 +516,8 @@ def combine_dic(list_dic: List[Dict]) -> Dict:
def recover_exon_in_big_communities(community_size: Optional[int],
project: str, weight: int,
global_weight: int
) -> Optional[np.array]:
global_weight: int, inflation: float,
level: str) -> Optional[np.array]:
"""
Recover the list of exon present in community with a larger size than \
`community_size`
......@@ -525,19 +530,22 @@ def recover_exon_in_big_communities(community_size: Optional[int],
by project, else all projet are merge together and the interaction \
seen in `global_weight` project are taken into account
:param weight: The weight of interaction to consider
:parma inflation: The inflation parameter
:param level: The kind of feature to analyse (exon or gene)
:return: The list of exon of interest
"""
if community_size is None:
return None
outfile = ConfigNt.get_community_file(project, weight, global_weight,
True, "_communities.txt")
True, inflation, level,
"_communities.txt")
if not outfile.is_file():
msg = f"The file {outfile} was not found ! You must try " \
f"to launch find_interaction_cluster script with " \
f"the same parameters !"
logging.exception(msg)
raise FileNotFoundError(msg)
communities = get_communities(outfile, community_size)
communities = get_communities_basefile(outfile)
res = []
for c in communities:
res += c
......@@ -546,7 +554,8 @@ def recover_exon_in_big_communities(community_size: Optional[int],
def create_all_frequency_figures(ps: int, weight: int = 1,
global_weight: int = 0, ft_type: str = "nt",
same_gene = True, compute_mean: bool = True,
same_gene = True, inflation: float = 1.5,
compute_mean: bool = True,
community_size: Optional[int] = None,
inter_chr: bool = False, level: str = "exon",
logging_level: str = "DISABLE"):
......@@ -562,6 +571,7 @@ def create_all_frequency_figures(ps: int, weight: int = 1,
:param ft_type: The kind of feature to analyse
:param same_gene: Say if we consider as co-localised exon within the \
same gene
:param inflation: The inflation parameter
:param compute_mean: True to compute the mean frequency of co-localised \
exons, false to only compute the frequency of one co-localized exons.
:param community_size: The size of the community to consider in the \
......@@ -585,7 +595,7 @@ def create_all_frequency_figures(ps: int, weight: int = 1,
processes = []
for project, ft, ft_type in param:
args = [di, project, ft_type, ft, weight, global_weight, same_gene,
compute_mean, community_size, inter_chr, level]
inflation, compute_mean, community_size, inter_chr, level]
processes.append(pool.apply_async(execute_density_figure_function,
args))
results = [proc.get(timeout=None) for proc in processes]
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment