Skip to content
Snippets Groups Projects
Commit 1f8d5bef authored by nfontrod's avatar nfontrod
Browse files

src/nt_composition/get_projects_interaction.py: This script creates a figure...

src/nt_composition/get_projects_interaction.py: This script creates a figure that shows the number of co-localisation in chia-pet projects, and select some projects with different number of interaction
parent 43d32045
No related branches found
No related tags found
No related merge requests found
#!/usr/bin/env python3
# -*- coding: UTF-8 -*-
"""
Description: The goal of this script is to get the total \
number of interaction by projects and select 9 projects with the following \
requirements:
* 3 projects must be those with the minimum possible interactions.
* 3 projects must be those with the greatest number of interactions
* 3 projects must contains an average number of interactions
"""
import sqlite3
import pandas as pd
from .config import ConfigNt
import seaborn as sns
import matplotlib.pyplot as plt
from ..logging_conf import logging_def
import logging
def get_interaction_by_project(cnx: sqlite3.Connection) -> pd.DataFrame:
"""
Get the number of interactions by projects.
:param cnx: Connection to chia-pet database
:return: The table containing the number of interaction by projects
"""
logging.debug('Getting interaction from database')
query = "SELECT id_project, COUNT(*) " \
"FROM cin_exon_interaction " \
"GROUP BY id_project"
df = pd.read_sql_query(query, cnx)
df.columns = ['projects', 'interaction_count']
df.sort_values('interaction_count', ascending=True, inplace=True)
logging.debug(df.head())
return df
def make_barplot(df: pd.DataFrame):
"""
Make a barplot displaying the number of interactions for every project.
:param df: The dataframe containing the number of interaction by \
projects
"""
logging.debug("Creating barplot figure")
ConfigNt.interaction.mkdir(parents=True, exist_ok=True)
sns.set()
sns.set_context('talk')
plt.figure(figsize=(20, 12))
sns.barplot(x="projects", y="interaction_count", data=df)
plt.xticks(rotation=90)
plt.savefig(ConfigNt.interaction_file.parent /
ConfigNt.interaction_file.name.replace('txt', 'pdf'),
bbox_inches='tight')
plt.close()
def select_projects(df: pd.DataFrame):
"""
Select the wanted projects and write them in a file
:param df: The dataframe containing the number of interaction by \
projects
"""
logging.debug("Selecting projects")
sp = list(df[df['interaction_count'] > 2000].projects.values)[0:2]
sp += list(df[df['interaction_count'] > 30000].projects.values)[0:2]
sp += list(df[df['interaction_count'] > 100000].projects.values)[0:2]
sp += list(df[df['interaction_count'] > 400000].projects.values)[0:2]
with ConfigNt.selected_project.open('w') as outf:
outf.write("\n".join(sp) + "\n")
def get_interactions_number(logging_level: str = "DISABLE"):
"""
Get the number of interaction by projects
"""
logging_def(ConfigNt.interaction, __file__, logging_level)
cnx = sqlite3.connect(ConfigNt.db_file)
df = get_interaction_by_project(cnx)
make_barplot(df)
df.to_csv(ConfigNt.interaction_file,
sep="\t", index=False)
sns.barplot()
select_projects(df)
if __name__ == "__main__":
get_interactions_number()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment