diff --git a/src/figures_utils/TF_venn_of_project_with_common_origin.py b/src/figures_utils/TF_venn_of_project_with_common_origin.py new file mode 100644 index 0000000000000000000000000000000000000000..a2fdb2a995df0c585b9fc27895f61e9c367f64f3 --- /dev/null +++ b/src/figures_utils/TF_venn_of_project_with_common_origin.py @@ -0,0 +1,83 @@ +#!/usr/bin/env python3 + +# -*- coding: UTF-8 -*- + +""" +Description: The goal of this script is to create venn diagram of \ +transcription factor projects with common origin +""" + +import sqlite3 +from pathlib import Path +from .tf_function import get_de_events +from matplotlib_venn import venn2 +from matplotlib import pyplot as plt +from .config_figures import Config +from typing import Dict + + +def get_project_with_same_origin(cnx: sqlite3.connect) -> Dict[int, int]: + """ + Get the projects couples originating from the same Encode experiments. + + :param cnx: Connection to the ChIA-PET database + :return: A dictionary linking a project with another originating from \ + the same encode dataset. + + >>> get_project_with_same_origin(sqlite3.connect(Config.db_file)) == { + ... 8: 549, 13: 555, 18: 572, 24: 565, 27: 525, 28: 531, 33: 504, + ... 34: 501, 45: 527} + True + """ + c = cnx.cursor() + query = """SELECT t1.id, t2.id + FROM cin_project_tf t1, cin_project_tf t2 + WHERE t1.source_db = "Encode" + AND t2.source_db = "KnockTF" + AND t1.db_id_project = t2.db_id_project""" + c.execute(query) + results = c.fetchall() + return {r[0]: r[1] for r in results} + + +def create_venn_diagram_for_2_projects(cnx: sqlite3.connect, + project1: int, project2: int, + regulation: str, + output: Path) -> None: + """ + Create a venn diagram for every project with the same origin but \ + obtained with different analysis method. + + :param cnx: Connection to the ChIA-PET database + :param project1: The id of a project + :param project2: Another project id + :param regulation: The regulation of interest + :param output: The folder where the result will be created + """ + res1 = get_de_events(cnx, project1) + gene1 = [gene[1] for gene in res1 if gene[0] == regulation] + res2 = get_de_events(cnx, project2) + gene2 = [gene[1] for gene in res2 if gene[0] == regulation] + venn2([set(gene1), set(gene2)], + set_labels=[f"{project1}_{regulation}", f"{project2}_{regulation}"]) + plt.savefig(output / f"{project1}-{project2}_{regulation}.pdf") + plt.clf() + plt.close() + + +def create_venn_figures(): + """ + Create a venn diagram for every project with the same origin but \ + obtained with different analysis method. + + """ + Config.venn_tf.mkdir(exist_ok=True) + cnx = sqlite3.connect(Config.db_file) + dic_project = get_project_with_same_origin(cnx) + for i, v in dic_project.items(): + for reg in ["up", "down"]: + create_venn_diagram_for_2_projects(cnx, i, v, reg, Config.venn_tf) + + +if __name__ == "__main__": + create_venn_figures() \ No newline at end of file