From d1232a66cd7019cf2bcb4ab3f64db35d0c98eb37 Mon Sep 17 00:00:00 2001 From: Fontrodona Nicolas <nicolas.fontrodona@ens-lyon.fr> Date: Fri, 19 Jun 2020 14:38:02 +0200 Subject: [PATCH] src/nt_composition/get_projects_interaction.py: add a new parameter, same_gene to choose whether or not to keep the co-localised exons within the same gene + modification of get_interaction_by_project function to use this new parameter --- .../get_projects_interaction.py | 30 ++++++++++++++----- 1 file changed, 22 insertions(+), 8 deletions(-) diff --git a/src/nt_composition/get_projects_interaction.py b/src/nt_composition/get_projects_interaction.py index 8a485638..f9dbc9df 100644 --- a/src/nt_composition/get_projects_interaction.py +++ b/src/nt_composition/get_projects_interaction.py @@ -21,20 +21,31 @@ from ..logging_conf import logging_def import logging -def get_interaction_by_project(cnx: sqlite3.Connection, weight: int - ) -> pd.DataFrame: +def get_interaction_by_project(cnx: sqlite3.Connection, weight: int, + same_gene: bool) -> pd.DataFrame: """ Get the number of interactions by projects. :param cnx: Connection to chia-pet database :param weight: A weight threshold + :param same_gene: Say if we are considering interaction within the same \ + gene :return: The table containing the number of interaction by projects """ logging.debug('Getting interaction from database') - query = f"SELECT id_project, COUNT(*) " \ - f"FROM cin_exon_interaction " \ - f"WHERE weight >= {weight} " \ - f"GROUP BY id_project" + if same_gene: + query = f"SELECT id_project, COUNT(*) " \ + f"FROM cin_exon_interaction " \ + f"WHERE weight >= {weight} " \ + f"GROUP BY id_project" + else: + query = f"""SELECT id_project, COUNT(*) + FROM cin_exon_interaction t1, cin_exon t2, cin_exon t3 + WHERE t1.weight >= {weight} + AND t1.exon1 = t2.id + AND t1.exon2 = t3.id + AND t2.id_gene != t3.id_gene + GROUP BY id_project""" df = pd.read_sql_query(query, cnx) df.columns = ['projects', 'interaction_count'] df.sort_values('interaction_count', ascending=True, inplace=True) @@ -79,16 +90,19 @@ def select_projects(df: pd.DataFrame): outf.write("\n".join(sp) + "\n") -def get_interactions_number(weight: int = 1, logging_level: str = "DISABLE"): +def get_interactions_number(weight: int = 1, same_gene: bool = False, + logging_level: str = "DISABLE"): """ Get the number of interaction by projects :param weight: The minimum weight of correlation to consider them + :param same_gene: Say if we are considering interaction within the same \ + gene """ logging_def(ConfigNt.interaction, __file__, logging_level) logging.info(f'Recovering interaction count with a weight of {weight}') cnx = sqlite3.connect(ConfigNt.db_file) - df = get_interaction_by_project(cnx, weight) + df = get_interaction_by_project(cnx, weight, same_gene) make_barplot(df, weight) df.to_csv(ConfigNt.get_interaction_file(weight), sep="\t", index=False) -- GitLab