src/find_interaction_cluster/community_figures/fig_functions.py: deletion of...

src/find_interaction_cluster/community_figures/fig_functions.py: deletion of unused import and functions + creation of dic_com when the test type is permutation

src/find_interaction_cluster/community_figures/fig_functions.py: deletion of...
src/find_interaction_cluster/community_figures/fig_functions.py: deletion of unused import and functions + creation of dic_com when the test type is permutation
661d27aa · nfontrod · 00ca1723 · 661d27aa
Commit 661d27aa authored 4 years ago by nfontrod
--- a/src/find_interaction_cluster/community_figures/fig_functions.py
+++ b/src/find_interaction_cluster/community_figures/fig_functions.py
@@ -9,8 +9,6 @@ Description:
 import pandas as pd
 from pathlib import Path
 from typing import Dict, Tuple, List, Optional
-import sqlite3
-from ..config import ConfigGraph
 from tqdm import tqdm
 from rpy2.robjects import r, pandas2ri
 from statsmodels.stats.multitest import multipletests
@@ -19,74 +17,6 @@ from ..radomization_test_ppi import get_pvalue
 import seaborn as sns


-def get_cpnt_frequency(cnx: sqlite3.Connection, list_ft: List[str],
-                       feature: str, region: str = "",
-                       component_type: str = "nt") -> pd.DataFrame:
-    """
-    Get the frequency of every nucleotides for features in list_ft.
-
-    :param cnx: Connection to chia-pet database
-    :param list_ft: The list of exons for which we want to get
-    :param feature: the kind of feature analysed
-    :param region: The region of gene analysed if feature is gene
-    :param component_type: The type of component to analyse; It \
-    can be 'nt', 'dnt' or 'aa'.
-    :return: the frequency of nucleotides for the list of exons.
-
-    >>> d = get_cpnt_frequency(sqlite3.connect(ConfigGraph.db_file),
-    ... ["1_1", "1_2"], "exon")
-    >>> d[["id_exon", 'A', 'C', 'G', 'T']]
-    ft id_exon         A         C         G         T
-    0      1_1  16.63480  34.60803  32.12237  16.63480
-    1      1_2  16.06426  26.10442  39.75904  18.07229
-    >>> d = get_cpnt_frequency(sqlite3.connect(ConfigGraph.db_file),
-    ... ['1', '2'], "gene")
-    >>> d[["id_gene", 'A', 'C', 'G', 'T']]
-    ft id_gene         A         C         G         T
-    0        1  29.49376  18.34271  18.43874  33.72479
-    1        2  31.90401  16.40251  18.79033  32.90315
-    >>> d = get_cpnt_frequency(sqlite3.connect(ConfigGraph.db_file),
-    ... ['1', '2'], "gene", 'exon', 'aa')
-    >>> d[["id_gene", "R", "K", "D", "Q", "E"]]
-        ft id_gene        R        K        D        Q        E
-    0        1  4.75247  5.19300  5.95391  4.07997  6.96189
-    1        2  4.34203  6.23736  6.77708  5.21984  7.01769
-    """
-    query_region = ""
-    if feature == "gene":
-        list_ft = [int(ft) for ft in list_ft]
-        if region == "":
-            region = "gene"
-        query_region = f"AND region = '{region}'"
-    query = f"""
-             SELECT ft, id_{feature}, frequency
-             FROM cin_{feature}_frequency
-             WHERE id_{feature} IN {tuple(list_ft)}
-             AND ft_type = '{component_type}' 
-             {query_region}
-             """
-    df = pd.read_sql_query(query, cnx)
-    df = df.pivot_table(index=f"id_{feature}", columns="ft",
-                        values="frequency").reset_index()
-    df[f"id_{feature}"] = df[f"id_{feature}"].astype(str)
-    return df
-
-
-def get_ft_id(cnx: sqlite3.Connection, feature: str = "exon") -> List[str]:
-    """
-    Return the id of every gene/exons in chia-pet database.
-
-    :param cnx: A connection to chiapet database
-    :param feature: The feature of interest
-    :return: The list of feature id
-    """
-    query = f"SELECT DISTINCT id FROM cin_{feature}"
-    c = cnx.cursor()
-    c.execute(query)
-    res = c.fetchall()
-    return [str(cid[0]) for cid in res]
-
-
 def get_community_table(communities: List[List[str]],
                        size_threshold: int, feature: str) -> pd.DataFrame:
    """
@@ -545,7 +475,8 @@ def create_community_fig(df: pd.DataFrame, feature: str,
    :param iteration: The number of sub samples to create
    """
    if dic_com is None:
-        dic_com = get_feature_by_community(df, feature)
+        dic_com = {} if test_type == 'lm' \
+            else get_feature_by_community(df, feature)
    if test_type == "lm":
        ndf, rdf = lm_with_ctrl(df, target_col, outfile_ctrl)
        df_bar = expand_results_lm(ndf, rdf, target_col, feature)