Skip to content
Snippets Groups Projects
Commit 661d27aa authored by nfontrod's avatar nfontrod
Browse files

src/find_interaction_cluster/community_figures/fig_functions.py: deletion of...

src/find_interaction_cluster/community_figures/fig_functions.py: deletion of unused import and functions + creation of dic_com when the test type is permutation
parent 00ca1723
No related branches found
No related tags found
No related merge requests found
......@@ -9,8 +9,6 @@ Description:
import pandas as pd
from pathlib import Path
from typing import Dict, Tuple, List, Optional
import sqlite3
from ..config import ConfigGraph
from tqdm import tqdm
from rpy2.robjects import r, pandas2ri
from statsmodels.stats.multitest import multipletests
......@@ -19,74 +17,6 @@ from ..radomization_test_ppi import get_pvalue
import seaborn as sns
def get_cpnt_frequency(cnx: sqlite3.Connection, list_ft: List[str],
feature: str, region: str = "",
component_type: str = "nt") -> pd.DataFrame:
"""
Get the frequency of every nucleotides for features in list_ft.
:param cnx: Connection to chia-pet database
:param list_ft: The list of exons for which we want to get
:param feature: the kind of feature analysed
:param region: The region of gene analysed if feature is gene
:param component_type: The type of component to analyse; It \
can be 'nt', 'dnt' or 'aa'.
:return: the frequency of nucleotides for the list of exons.
>>> d = get_cpnt_frequency(sqlite3.connect(ConfigGraph.db_file),
... ["1_1", "1_2"], "exon")
>>> d[["id_exon", 'A', 'C', 'G', 'T']]
ft id_exon A C G T
0 1_1 16.63480 34.60803 32.12237 16.63480
1 1_2 16.06426 26.10442 39.75904 18.07229
>>> d = get_cpnt_frequency(sqlite3.connect(ConfigGraph.db_file),
... ['1', '2'], "gene")
>>> d[["id_gene", 'A', 'C', 'G', 'T']]
ft id_gene A C G T
0 1 29.49376 18.34271 18.43874 33.72479
1 2 31.90401 16.40251 18.79033 32.90315
>>> d = get_cpnt_frequency(sqlite3.connect(ConfigGraph.db_file),
... ['1', '2'], "gene", 'exon', 'aa')
>>> d[["id_gene", "R", "K", "D", "Q", "E"]]
ft id_gene R K D Q E
0 1 4.75247 5.19300 5.95391 4.07997 6.96189
1 2 4.34203 6.23736 6.77708 5.21984 7.01769
"""
query_region = ""
if feature == "gene":
list_ft = [int(ft) for ft in list_ft]
if region == "":
region = "gene"
query_region = f"AND region = '{region}'"
query = f"""
SELECT ft, id_{feature}, frequency
FROM cin_{feature}_frequency
WHERE id_{feature} IN {tuple(list_ft)}
AND ft_type = '{component_type}'
{query_region}
"""
df = pd.read_sql_query(query, cnx)
df = df.pivot_table(index=f"id_{feature}", columns="ft",
values="frequency").reset_index()
df[f"id_{feature}"] = df[f"id_{feature}"].astype(str)
return df
def get_ft_id(cnx: sqlite3.Connection, feature: str = "exon") -> List[str]:
"""
Return the id of every gene/exons in chia-pet database.
:param cnx: A connection to chiapet database
:param feature: The feature of interest
:return: The list of feature id
"""
query = f"SELECT DISTINCT id FROM cin_{feature}"
c = cnx.cursor()
c.execute(query)
res = c.fetchall()
return [str(cid[0]) for cid in res]
def get_community_table(communities: List[List[str]],
size_threshold: int, feature: str) -> pd.DataFrame:
"""
......@@ -545,7 +475,8 @@ def create_community_fig(df: pd.DataFrame, feature: str,
:param iteration: The number of sub samples to create
"""
if dic_com is None:
dic_com = get_feature_by_community(df, feature)
dic_com = {} if test_type == 'lm' \
else get_feature_by_community(df, feature)
if test_type == "lm":
ndf, rdf = lm_with_ctrl(df, target_col, outfile_ctrl)
df_bar = expand_results_lm(ndf, rdf, target_col, feature)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment