Skip to content
Snippets Groups Projects
Commit 30eb71fa authored by nfontrod's avatar nfontrod
Browse files

src/nt_composition/make_nt_correlation.py: modification of...

src/nt_composition/make_nt_correlation.py: modification of get_project_colocalisation function to keep or not the co-localisation comming from the same gene. And deletion of remove_exonsrc/nt_composition/make_nt_correlation.pyin_the_same_gene function
parent d1232a66
No related branches found
No related tags found
No related merge requests found
......@@ -25,7 +25,6 @@ from itertools import product
from random import random
import multiprocessing as mp
import os
import re
class NoInteractionError(Exception):
......@@ -34,7 +33,7 @@ class NoInteractionError(Exception):
def get_project_colocalisation(cnx: sqlite3.Connection, project: str,
weight: int,
global_weight: int,
global_weight: int, same_gene: bool
) -> np.array:
"""
Get the interactions in project `project`
......@@ -50,16 +49,35 @@ def get_project_colocalisation(cnx: sqlite3.Connection, project: str,
"""
logging.debug(f'Recovering interaction ({os.getpid()})')
if global_weight == 0:
query = "SELECT exon1, exon2 " \
"FROM cin_exon_interaction " \
f"WHERE weight >= {weight} " \
f"AND id_project = '{project}' "
if same_gene:
query = "SELECT exon1, exon2 " \
"FROM cin_exon_interaction " \
f"WHERE weight >= {weight} " \
f"AND id_project = '{project}' "
else:
query = f"""SELECT t1.exon1, t1.exon2
FROM cin_exon_interaction t1, cin_exon t2, cin_exon t3
WHERE t1.weight >= {weight}
AND id_project = '{project}'
AND t1.exon1 = t2.id
AND t1.exon2 = t3.id
AND t2.id_gene != t3.id_gene"""
else:
query = f"SELECT exon1, exon2 " \
f"FROM cin_exon_interaction " \
f"WHERE weight >= {weight} " \
f"GROUP BY exon1, exon2 " \
f"HAVING COUNT(*) >= {global_weight}"
if same_gene:
query = f"SELECT exon1, exon2 " \
f"FROM cin_exon_interaction " \
f"WHERE weight >= {weight} " \
f"GROUP BY exon1, exon2 " \
f"HAVING COUNT(*) >= {global_weight}"
else:
query = f"""SELECT t1.exon1, t1.exon2
FROM cin_exon_interaction t1, cin_exon t2, cin_exon t3
WHERE t1.weight >= {weight}
AND t1.exon1 = t2.id
AND t1.exon2 = t3.id
AND t2.id_gene != t3.id_gene
GROUP BY exon1, exon2
HAVING COUNT(*) >= {global_weight}"""
c = cnx.cursor()
c.execute(query)
......@@ -103,21 +121,6 @@ def get_all_exon_interacting_with_another(exon: str, arr: np.array,
return exons[exons != exon]
def remove_exon_in_same_gene(prefix: str, oexon: np.array):
"""
Remove all exons having the prefix `prefix` from `oexon`.
:param prefix: A prefix
:param oexon: The list of interacting exons
:return: Removing all exon with the preifx in oexon
>>> remove_exon_in_same_gene("19_",
... np.asarray(["1_5", "1_3", "19_1", "19_19", "3_3"]))
array(['1_5', '1_3', '3_3'], dtype='<U5')
"""
return oexon[np.nonzero(np.core.defchararray.find(oexon, prefix))]
def get_frequency_dic(cnx: sqlite3.Connection, ft: str, ft_type: str
) -> Dict[str, float]:
"""
......@@ -149,7 +152,7 @@ def get_frequency_dic(cnx: sqlite3.Connection, ft: str, ft_type: str
def create_density_table(arr_interaction: np.array, dic_freq: Dict[str, float],
analyse_id: str, same_gene: bool) -> pd.DataFrame:
analyse_id: str) -> pd.DataFrame:
"""
Create the density table, a table showing the frequency of \
a nucleotide in every exon in a chia-pet projet and the mean \
......@@ -158,24 +161,16 @@ def create_density_table(arr_interaction: np.array, dic_freq: Dict[str, float],
:param arr_interaction: array of interaction between exons.
:param dic_freq: The frequency dataframe.
:param analyse_id The id of the current analysis
:param same_gene: Say if we consider as co-localised exon within the \
same gene
:return: The density table
"""
logging.debug(f'Calculating density table ({os.getpid()})')
exons_list = get_interacting_exon(arr_interaction)
dic = {'exon': [], 'freq_exon': [], 'freq_coloc_exon': [], 'oexon': []}
pbar = tqdm(exons_list, desc=f"Working on {analyse_id} ({os.getpid()}) :",
position=mp.current_process()._identity[0] - 1)
pat = None
if not same_gene:
pat = re.compile(r"_\d+")
position=mp.current_process()._identity[0] - 1, disable=None)
for exon in pbar:
freq_ex = dic_freq[exon]
oexon = get_all_exon_interacting_with_another(exon, arr_interaction)
if not same_gene:
prefix = re.sub(pat, '_', exon)
oexon = remove_exon_in_same_gene(prefix, oexon)
freq_oexon = np.nanmean(np.asarray([dic_freq[ex] for ex in oexon],
dtype=float))
if freq_ex is not None and not np.isnan(freq_oexon):
......@@ -257,11 +252,10 @@ def create_density_figure(nt: str, ft_type: str,
if not outfile.is_file():
cnx = sqlite3.connect(ConfigNt.db_file)
arr_interaction = get_project_colocalisation(cnx, project, weight,
global_weight)
global_weight, same_gene)
dic_freq = get_frequency_dic(cnx, nt, ft_type)
analyse_id = f"{project}_{ft_type}_{nt}"
df = create_density_table(arr_interaction, dic_freq, analyse_id,
same_gene)
df = create_density_table(arr_interaction, dic_freq, analyse_id)
df.to_csv(outfile, sep="\t", index=False)
r, p = create_density_fig(df, project, ft_type, nt, weight,
global_weight)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment