Skip to content
Snippets Groups Projects
Commit d1232a66 authored by nfontrod's avatar nfontrod
Browse files

src/nt_composition/get_projects_interaction.py: add a new parameter, same_gene...

src/nt_composition/get_projects_interaction.py: add a new parameter, same_gene to choose whether or not to keep the co-localised exons within the same gene + modification of get_interaction_by_project function to use this new parameter
parent e2d22cc1
No related branches found
No related tags found
No related merge requests found
......@@ -21,20 +21,31 @@ from ..logging_conf import logging_def
import logging
def get_interaction_by_project(cnx: sqlite3.Connection, weight: int
) -> pd.DataFrame:
def get_interaction_by_project(cnx: sqlite3.Connection, weight: int,
same_gene: bool) -> pd.DataFrame:
"""
Get the number of interactions by projects.
:param cnx: Connection to chia-pet database
:param weight: A weight threshold
:param same_gene: Say if we are considering interaction within the same \
gene
:return: The table containing the number of interaction by projects
"""
logging.debug('Getting interaction from database')
query = f"SELECT id_project, COUNT(*) " \
f"FROM cin_exon_interaction " \
f"WHERE weight >= {weight} " \
f"GROUP BY id_project"
if same_gene:
query = f"SELECT id_project, COUNT(*) " \
f"FROM cin_exon_interaction " \
f"WHERE weight >= {weight} " \
f"GROUP BY id_project"
else:
query = f"""SELECT id_project, COUNT(*)
FROM cin_exon_interaction t1, cin_exon t2, cin_exon t3
WHERE t1.weight >= {weight}
AND t1.exon1 = t2.id
AND t1.exon2 = t3.id
AND t2.id_gene != t3.id_gene
GROUP BY id_project"""
df = pd.read_sql_query(query, cnx)
df.columns = ['projects', 'interaction_count']
df.sort_values('interaction_count', ascending=True, inplace=True)
......@@ -79,16 +90,19 @@ def select_projects(df: pd.DataFrame):
outf.write("\n".join(sp) + "\n")
def get_interactions_number(weight: int = 1, logging_level: str = "DISABLE"):
def get_interactions_number(weight: int = 1, same_gene: bool = False,
logging_level: str = "DISABLE"):
"""
Get the number of interaction by projects
:param weight: The minimum weight of correlation to consider them
:param same_gene: Say if we are considering interaction within the same \
gene
"""
logging_def(ConfigNt.interaction, __file__, logging_level)
logging.info(f'Recovering interaction count with a weight of {weight}')
cnx = sqlite3.connect(ConfigNt.db_file)
df = get_interaction_by_project(cnx, weight)
df = get_interaction_by_project(cnx, weight, same_gene)
make_barplot(df, weight)
df.to_csv(ConfigNt.get_interaction_file(weight),
sep="\t", index=False)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment