From 5975e7c33a9c9743b2604896b41e63df07495e05 Mon Sep 17 00:00:00 2001 From: Fontrodona Nicolas <nicolas.fontrodona@ens-lyon.fr> Date: Wed, 4 Mar 2020 10:07:18 +0100 Subject: [PATCH] src/db_utils/fill_splicing_lore_tables.py, src/db_utils/__main__.py: creation of fill_splicing_lore_tables.py that fills cin_ase_event and cin_project_splicing_lore tables --- src/db_utils/__main__.py | 22 ++------ src/db_utils/fill_splicing_lore_tables.py | 63 +++++++++++++++++++++++ 2 files changed, 67 insertions(+), 18 deletions(-) create mode 100644 src/db_utils/fill_splicing_lore_tables.py diff --git a/src/db_utils/__main__.py b/src/db_utils/__main__.py index c6679832..55599b19 100755 --- a/src/db_utils/__main__.py +++ b/src/db_utils/__main__.py @@ -10,8 +10,7 @@ from .config import Config from ..logging_conf import logging_def from .db_creation import main_create_db from .fill_exon_n_gene_table import main_fill_exon_n_gene -# from .create_freq_table import create_freq_table -# from .populate_database import populate +from .fill_splicing_lore_tables import fill_splicing_lore_data import logging @@ -24,21 +23,8 @@ def launcher(logging_level: str = "INFO"): main_create_db('DISABLE') logging.info('Filling cin_gene and cin_exon') main_fill_exon_n_gene('DISABLE') - # logging.info(f"Creation of {Config.ctrl_exon_file} file") - # get_ctrl(Config.exon_intern) - # logging.info(f"Creation of {Config.exon_file} file") - # get_exon_table(Config.ctrl_exon_file, Config.gene_file, logging_level) - # logging.info(f"Creation of {Config.frequency_file} file") - # create_freq_table(Config.bed_orf, Config.bed_exon, Config.ctrl_exon_file, - # logging_level) - # - #Â mpopulate = populate.__wrapped__ - # logging.info(f"Filling {Config.tables[0]} table") - # mpopulate(Config.tables[0], Config.gene_file, "y", logging_level) - # logging.info(f"Filling {Config.tables[1]} table") - # mpopulate(Config.tables[1], Config.exon_file, "y", logging_level) - # logging.info(f"Filling {Config.tables[2]} table") - # mpopulate(Config.tables[2], Config.frequency_file, "y", logging_level) + logging.info('Filling splicing lore tables') + fill_splicing_lore_data('DISABLE') -launcher(logging_level = "DEBUG") \ No newline at end of file +launcher(logging_level="DEBUG") diff --git a/src/db_utils/fill_splicing_lore_tables.py b/src/db_utils/fill_splicing_lore_tables.py new file mode 100644 index 00000000..63cadbb0 --- /dev/null +++ b/src/db_utils/fill_splicing_lore_tables.py @@ -0,0 +1,63 @@ +#!/usr/bin/env python3 + +# -*- coding: UTF-8 -*- + +""" +Description: The goal of this script is to fill the tables \ +cin_ase_event and cin_project_splicing_lore +""" + +from .config import Config +import pandas as pd +import logging +from ..logging_conf import logging_def +from .populate_database import populate_df + + +def load_splicing_projects() -> pd.DataFrame: + """ + Load the projects of splicing lore database. + + :return: The dataframe of the splicing lore projects + """ + df = pd.read_csv(Config.splicing_projects, sep="\t") + logging.debug(df.head()) + return df + + +def load_ase_event() -> pd.DataFrame: + """ + Load the alternative splicing events of detected in the splicing lore \ + database. + + :return: The dataframe of splicing lore alternative splicing events + """ + logging.debug('loading cin_project_splicing_lore') + df = pd.read_csv(Config.ase_event_file, sep="\t") + df.drop(["gene_symbol", "chromosome", "start", "stop", "exons_flanquants"], + inplace=True, axis=1) + cols = list(df.columns) + if cols.index('exon_skipped') != -1: + cols[cols.index('exon_skipped')] = 'pos' + df.columns = cols + df['exon_id'] = df['gene_id'].astype(str) + '_' + df['pos'].astype(str) + logging.debug(df.head()) + return df + + +def fill_splicing_lore_data(logging_level: str = 'DISABLE') -> None: + """ + Fill the tables cin_ase_event and cin_project_splicing_lore + """ + logging_def(Config.results, __file__, logging_level) + + sf_projects = load_splicing_projects() + ase_events = load_ase_event() + logging.debug('Filling cin_project_splicing_lore') + populate_df(table='cin_project_splicing_lore', df=sf_projects, clean='y') + logging.debug('Filling cin_ase_event') + populate_df(table='cin_ase_event', df=ase_events, clean='y') + + +if __name__ == "__main__": + fill_splicing_lore_data('DEBUG') -- GitLab