Skip to content
Snippets Groups Projects
Commit e2d22cc1 authored by nfontrod's avatar nfontrod
Browse files

src/nt_composition/make_nt_correlation.py src/nt_composition/__main__.py:...

src/nt_composition/make_nt_correlation.py src/nt_composition/__main__.py: Creation of a new parameter same_gene allowing to remove or keep the co-localisation within the same gene
parent d37a1c7d
No related branches found
No related tags found
No related merge requests found
......@@ -12,9 +12,12 @@ from .make_nt_correlation import create_all_frequency_figures
from .get_projects_interaction import get_interactions_number
import lazyparser as lp
@lp.flag(same_gene=True)
@lp.parse(weight='weight > 0', ft_type=['nt', 'dnt', 'tnt', 'codon', 'aa',
'properties'])
def launcher(weight: int = 1, global_weight: int = 0, ft_type: str = 'nt',
same_gene: bool = False,
logging_level: str = "DISABLE"):
"""
Launch the creation of density file.
......@@ -25,11 +28,13 @@ def launcher(weight: int = 1, global_weight: int = 0, ft_type: str = 'nt',
by project, else all project are merge together and the interaction \
seen in `global_weight` project are taken into account (default 0)
:param ft_type: The feature type of interest (default 'nt')
:param same_gene: Say if we consider as co-localised, exons within the \
same gene (True) or not (False) (default False)
:param logging_level: The level of data to display (default 'DISABLE')
"""
get_interactions_number(weight, logging_level)
create_all_frequency_figures(ConfigNt.cpu, weight, global_weight, ft_type,
logging_level)
same_gene, logging_level)
launcher()
\ No newline at end of file
......@@ -25,6 +25,7 @@ from itertools import product
from random import random
import multiprocessing as mp
import os
import re
class NoInteractionError(Exception):
......@@ -85,7 +86,7 @@ def get_interacting_exon(arr: np.array) -> np.array:
return np.unique(arr.flatten())
def get_all_exon_interacting_with_another(exon: str, arr: np.array
def get_all_exon_interacting_with_another(exon: str, arr: np.array,
) -> np.array:
"""
From an exons, get the other exons directly co-localised with it.
......@@ -102,6 +103,21 @@ def get_all_exon_interacting_with_another(exon: str, arr: np.array
return exons[exons != exon]
def remove_exon_in_same_gene(prefix: str, oexon: np.array):
"""
Remove all exons having the prefix `prefix` from `oexon`.
:param prefix: A prefix
:param oexon: The list of interacting exons
:return: Removing all exon with the preifx in oexon
>>> remove_exon_in_same_gene("19_",
... np.asarray(["1_5", "1_3", "19_1", "19_19", "3_3"]))
array(['1_5', '1_3', '3_3'], dtype='<U5')
"""
return oexon[np.nonzero(np.core.defchararray.find(oexon, prefix))]
def get_frequency_dic(cnx: sqlite3.Connection, ft: str, ft_type: str
) -> Dict[str, float]:
"""
......@@ -133,7 +149,7 @@ def get_frequency_dic(cnx: sqlite3.Connection, ft: str, ft_type: str
def create_density_table(arr_interaction: np.array, dic_freq: Dict[str, float],
analyse_id) -> pd.DataFrame:
analyse_id: str, same_gene: bool) -> pd.DataFrame:
"""
Create the density table, a table showing the frequency of \
a nucleotide in every exon in a chia-pet projet and the mean \
......@@ -142,6 +158,8 @@ def create_density_table(arr_interaction: np.array, dic_freq: Dict[str, float],
:param arr_interaction: array of interaction between exons.
:param dic_freq: The frequency dataframe.
:param analyse_id The id of the current analysis
:param same_gene: Say if we consider as co-localised exon within the \
same gene
:return: The density table
"""
logging.debug(f'Calculating density table ({os.getpid()})')
......@@ -149,9 +167,15 @@ def create_density_table(arr_interaction: np.array, dic_freq: Dict[str, float],
dic = {'exon': [], 'freq_exon': [], 'freq_coloc_exon': [], 'oexon': []}
pbar = tqdm(exons_list, desc=f"Working on {analyse_id} ({os.getpid()}) :",
position=mp.current_process()._identity[0] - 1)
pat = None
if not same_gene:
pat = re.compile(r"_\d+")
for exon in pbar:
freq_ex = dic_freq[exon]
oexon = get_all_exon_interacting_with_another(exon, arr_interaction)
if not same_gene:
prefix = re.sub(pat, '_', exon)
oexon = remove_exon_in_same_gene(prefix, oexon)
freq_oexon = np.nanmean(np.asarray([dic_freq[ex] for ex in oexon],
dtype=float))
if freq_ex is not None and not np.isnan(freq_oexon):
......@@ -207,6 +231,7 @@ def create_density_fig(df: pd.DataFrame, project: str, ft_type: str, ft: str,
def create_density_figure(nt: str, ft_type: str,
project: str, weight: int, global_weight: int,
same_gene: bool,
logging_level: str = "DISABLE"
) -> Tuple[float, float]:
"""
......@@ -221,6 +246,8 @@ def create_density_figure(nt: str, ft_type: str,
the global weight is equal to 0 then then density figure are calculated \
by project, else all projet are merge together and the interaction \
seen in `global_weight` project are taken into account
:param same_gene: Say if we consider as co-localised exon within the \
same gene
:param logging_level: The level of information to display
:return: The correlation and the p-value
"""
......@@ -233,7 +260,8 @@ def create_density_figure(nt: str, ft_type: str,
global_weight)
dic_freq = get_frequency_dic(cnx, nt, ft_type)
analyse_id = f"{project}_{ft_type}_{nt}"
df = create_density_table(arr_interaction, dic_freq, analyse_id)
df = create_density_table(arr_interaction, dic_freq, analyse_id,
same_gene)
df.to_csv(outfile, sep="\t", index=False)
r, p = create_density_fig(df, project, ft_type, nt, weight,
global_weight)
......@@ -283,7 +311,8 @@ def create_scatterplot(df_cor: pd.DataFrame, ft_type: str, ft: str,
def execute_density_figure_function(di: pd.DataFrame, project : str,
ft_type: str, ft: str, weight: int,
global_weight: int) -> Dict[str, Any]:
global_weight: int,
same_gene: bool) -> Dict[str, Any]:
"""
Execute create_density_figure and organized the results in a dictionary.
......@@ -296,11 +325,13 @@ def execute_density_figure_function(di: pd.DataFrame, project : str,
the global weight is equal to 0 then then density figure are calculated \
by project, else all projet are merge together and the interaction \
seen in `global_weight` project are taken into account
:param same_gene: Say if we consider as co-localised exon within the \
same gene
:return:
"""
logging.info(f'Working on {project}, {ft_type}, {ft} - {os.getpid()}')
r, p = create_density_figure(ft, ft_type, project, weight,
global_weight)
global_weight, same_gene)
if global_weight == 0:
tmp = {"project": project, "ft_type": ft_type,
"ft": ft, "cor": r, "pval": p,
......@@ -327,6 +358,7 @@ def combine_dic(list_dic: List[Dict]) -> Dict:
def create_all_frequency_figures(ps: int, weight: int = 1,
global_weight: int = 0, ft_type: str = "nt",
same_gene = True,
logging_level: str = "DISABLE"):
"""
Make density figure for every selected projects.
......@@ -338,6 +370,8 @@ def create_all_frequency_figures(ps: int, weight: int = 1,
by project, else all projet are merge together and the interaction \
seen in `global_weight` project are taken into account
:param ft_type: The kind of feature to analyse
:param same_gene: Say if we consider as co-localised exon within the \
same gene
:param ps: The number of processes to create
"""
logging_def(ConfigNt.interaction, __file__, logging_level)
......@@ -355,7 +389,7 @@ def create_all_frequency_figures(ps: int, weight: int = 1,
pool = mp.Pool(processes=ps)
processes = []
for project, ft, ft_type in param:
args = [di, project, ft_type, ft, weight, global_weight]
args = [di, project, ft_type, ft, weight, global_weight, same_gene]
processes.append(pool.apply_async(execute_density_figure_function,
args))
results = []
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment