From 661d27aa95beb6c7d4c3a521a02088f8719cb755 Mon Sep 17 00:00:00 2001 From: Fontrodona Nicolas <nicolas.fontrodona@ens-lyon.fr> Date: Mon, 4 Jan 2021 12:39:10 +0100 Subject: [PATCH] src/find_interaction_cluster/community_figures/fig_functions.py: deletion of unused import and functions + creation of dic_com when the test type is permutation --- .../community_figures/fig_functions.py | 73 +------------------ 1 file changed, 2 insertions(+), 71 deletions(-) diff --git a/src/find_interaction_cluster/community_figures/fig_functions.py b/src/find_interaction_cluster/community_figures/fig_functions.py index 62f9d73a..5e754029 100644 --- a/src/find_interaction_cluster/community_figures/fig_functions.py +++ b/src/find_interaction_cluster/community_figures/fig_functions.py @@ -9,8 +9,6 @@ Description: import pandas as pd from pathlib import Path from typing import Dict, Tuple, List, Optional -import sqlite3 -from ..config import ConfigGraph from tqdm import tqdm from rpy2.robjects import r, pandas2ri from statsmodels.stats.multitest import multipletests @@ -19,74 +17,6 @@ from ..radomization_test_ppi import get_pvalue import seaborn as sns -def get_cpnt_frequency(cnx: sqlite3.Connection, list_ft: List[str], - feature: str, region: str = "", - component_type: str = "nt") -> pd.DataFrame: - """ - Get the frequency of every nucleotides for features in list_ft. - - :param cnx: Connection to chia-pet database - :param list_ft: The list of exons for which we want to get - :param feature: the kind of feature analysed - :param region: The region of gene analysed if feature is gene - :param component_type: The type of component to analyse; It \ - can be 'nt', 'dnt' or 'aa'. - :return: the frequency of nucleotides for the list of exons. - - >>> d = get_cpnt_frequency(sqlite3.connect(ConfigGraph.db_file), - ... ["1_1", "1_2"], "exon") - >>> d[["id_exon", 'A', 'C', 'G', 'T']] - ft id_exon A C G T - 0 1_1 16.63480 34.60803 32.12237 16.63480 - 1 1_2 16.06426 26.10442 39.75904 18.07229 - >>> d = get_cpnt_frequency(sqlite3.connect(ConfigGraph.db_file), - ... ['1', '2'], "gene") - >>> d[["id_gene", 'A', 'C', 'G', 'T']] - ft id_gene A C G T - 0 1 29.49376 18.34271 18.43874 33.72479 - 1 2 31.90401 16.40251 18.79033 32.90315 - >>> d = get_cpnt_frequency(sqlite3.connect(ConfigGraph.db_file), - ... ['1', '2'], "gene", 'exon', 'aa') - >>> d[["id_gene", "R", "K", "D", "Q", "E"]] - ft id_gene R K D Q E - 0 1 4.75247 5.19300 5.95391 4.07997 6.96189 - 1 2 4.34203 6.23736 6.77708 5.21984 7.01769 - """ - query_region = "" - if feature == "gene": - list_ft = [int(ft) for ft in list_ft] - if region == "": - region = "gene" - query_region = f"AND region = '{region}'" - query = f""" - SELECT ft, id_{feature}, frequency - FROM cin_{feature}_frequency - WHERE id_{feature} IN {tuple(list_ft)} - AND ft_type = '{component_type}' - {query_region} - """ - df = pd.read_sql_query(query, cnx) - df = df.pivot_table(index=f"id_{feature}", columns="ft", - values="frequency").reset_index() - df[f"id_{feature}"] = df[f"id_{feature}"].astype(str) - return df - - -def get_ft_id(cnx: sqlite3.Connection, feature: str = "exon") -> List[str]: - """ - Return the id of every gene/exons in chia-pet database. - - :param cnx: A connection to chiapet database - :param feature: The feature of interest - :return: The list of feature id - """ - query = f"SELECT DISTINCT id FROM cin_{feature}" - c = cnx.cursor() - c.execute(query) - res = c.fetchall() - return [str(cid[0]) for cid in res] - - def get_community_table(communities: List[List[str]], size_threshold: int, feature: str) -> pd.DataFrame: """ @@ -545,7 +475,8 @@ def create_community_fig(df: pd.DataFrame, feature: str, :param iteration: The number of sub samples to create """ if dic_com is None: - dic_com = get_feature_by_community(df, feature) + dic_com = {} if test_type == 'lm' \ + else get_feature_by_community(df, feature) if test_type == "lm": ndf, rdf = lm_with_ctrl(df, target_col, outfile_ctrl) df_bar = expand_results_lm(ndf, rdf, target_col, feature) -- GitLab