From 1f8d5beff192539ee4d4052212276537df1a3bac Mon Sep 17 00:00:00 2001 From: Fontrodona Nicolas <nicolas.fontrodona@ens-lyon.fr> Date: Wed, 10 Jun 2020 14:29:00 +0200 Subject: [PATCH] src/nt_composition/get_projects_interaction.py: This script creates a figure that shows the number of co-localisation in chia-pet projects, and select some projects with different number of interaction --- .../get_projects_interaction.py | 93 +++++++++++++++++++ 1 file changed, 93 insertions(+) create mode 100644 src/nt_composition/get_projects_interaction.py diff --git a/src/nt_composition/get_projects_interaction.py b/src/nt_composition/get_projects_interaction.py new file mode 100644 index 00000000..d2aa5cde --- /dev/null +++ b/src/nt_composition/get_projects_interaction.py @@ -0,0 +1,93 @@ +#!/usr/bin/env python3 + +# -*- coding: UTF-8 -*- + +""" +Description: The goal of this script is to get the total \ +number of interaction by projects and select 9 projects with the following \ +requirements: + +* 3 projects must be those with the minimum possible interactions. +* 3 projects must be those with the greatest number of interactions +* 3 projects must contains an average number of interactions +""" + +import sqlite3 +import pandas as pd +from .config import ConfigNt +import seaborn as sns +import matplotlib.pyplot as plt +from ..logging_conf import logging_def +import logging + + +def get_interaction_by_project(cnx: sqlite3.Connection) -> pd.DataFrame: + """ + Get the number of interactions by projects. + + :param cnx: Connection to chia-pet database + :return: The table containing the number of interaction by projects + """ + logging.debug('Getting interaction from database') + query = "SELECT id_project, COUNT(*) " \ + "FROM cin_exon_interaction " \ + "GROUP BY id_project" + df = pd.read_sql_query(query, cnx) + df.columns = ['projects', 'interaction_count'] + df.sort_values('interaction_count', ascending=True, inplace=True) + logging.debug(df.head()) + return df + + +def make_barplot(df: pd.DataFrame): + """ + Make a barplot displaying the number of interactions for every project. + + :param df: The dataframe containing the number of interaction by \ + projects + """ + logging.debug("Creating barplot figure") + ConfigNt.interaction.mkdir(parents=True, exist_ok=True) + sns.set() + sns.set_context('talk') + plt.figure(figsize=(20, 12)) + sns.barplot(x="projects", y="interaction_count", data=df) + plt.xticks(rotation=90) + plt.savefig(ConfigNt.interaction_file.parent / + ConfigNt.interaction_file.name.replace('txt', 'pdf'), + bbox_inches='tight') + plt.close() + + +def select_projects(df: pd.DataFrame): + """ + Select the wanted projects and write them in a file + + :param df: The dataframe containing the number of interaction by \ + projects + """ + logging.debug("Selecting projects") + sp = list(df[df['interaction_count'] > 2000].projects.values)[0:2] + sp += list(df[df['interaction_count'] > 30000].projects.values)[0:2] + sp += list(df[df['interaction_count'] > 100000].projects.values)[0:2] + sp += list(df[df['interaction_count'] > 400000].projects.values)[0:2] + with ConfigNt.selected_project.open('w') as outf: + outf.write("\n".join(sp) + "\n") + + +def get_interactions_number(logging_level: str = "DISABLE"): + """ + Get the number of interaction by projects + """ + logging_def(ConfigNt.interaction, __file__, logging_level) + cnx = sqlite3.connect(ConfigNt.db_file) + df = get_interaction_by_project(cnx) + make_barplot(df) + df.to_csv(ConfigNt.interaction_file, + sep="\t", index=False) + sns.barplot() + select_projects(df) + + +if __name__ == "__main__": + get_interactions_number() -- GitLab