Skip to content
Snippets Groups Projects
Commit 4139ade2 authored by alapendr's avatar alapendr
Browse files

Merge branch 'master' of gitbio.ens-lyon.fr:LBMC/regards/chia-pet_network

parents d2437d02 6ba5d245
No related merge requests found
......@@ -10,8 +10,7 @@ from .config import Config
from ..logging_conf import logging_def
from .db_creation import main_create_db
from .fill_exon_n_gene_table import main_fill_exon_n_gene
# from .create_freq_table import create_freq_table
# from .populate_database import populate
from .fill_splicing_lore_tables import fill_splicing_lore_data
import logging
......@@ -24,21 +23,8 @@ def launcher(logging_level: str = "INFO"):
main_create_db('DISABLE')
logging.info('Filling cin_gene and cin_exon')
main_fill_exon_n_gene('DISABLE')
# logging.info(f"Creation of {Config.ctrl_exon_file} file")
# get_ctrl(Config.exon_intern)
# logging.info(f"Creation of {Config.exon_file} file")
# get_exon_table(Config.ctrl_exon_file, Config.gene_file, logging_level)
# logging.info(f"Creation of {Config.frequency_file} file")
# create_freq_table(Config.bed_orf, Config.bed_exon, Config.ctrl_exon_file,
# logging_level)
#
# mpopulate = populate.__wrapped__
# logging.info(f"Filling {Config.tables[0]} table")
# mpopulate(Config.tables[0], Config.gene_file, "y", logging_level)
# logging.info(f"Filling {Config.tables[1]} table")
# mpopulate(Config.tables[1], Config.exon_file, "y", logging_level)
# logging.info(f"Filling {Config.tables[2]} table")
# mpopulate(Config.tables[2], Config.frequency_file, "y", logging_level)
logging.info('Filling splicing lore tables')
fill_splicing_lore_data('DISABLE')
launcher(logging_level = "DEBUG")
\ No newline at end of file
launcher(logging_level="DEBUG")
......@@ -13,8 +13,11 @@ class Config:
"""
A class containing every parameters used in the submodule db_utils
"""
db_file = Path(__file__).parents[2] / "results" / 'chia_pet_database.db'
tables = ["cin_gene", "cin_exon", "cin_frequency", "cin_interaction"]
bed_exon = Path(__file__).parents[2] / 'data' / 'bed' / 'exon.bed'
bed_gene = Path(__file__).parents[2] / 'data' / 'bed' / 'gene.bed'
data = Path(__file__).parents[2] / 'data'
results = Path(__file__).parents[2] / "results"
db_file = results / 'chia_pet_database.db'
bed_exon = data / 'bed' / 'exon.bed'
bed_gene = data / 'bed' / 'gene.bed'
ase_event_file = data / 'splicing_lore_data' / 'ase_event.txt'
splicing_projects = data / 'splicing_lore_data' / \
'splicing_lore_projects.txt'
#!/usr/bin/env python3
# -*- coding: UTF-8 -*-
"""
Description: The goal of this script is to fill the tables \
cin_ase_event and cin_project_splicing_lore
"""
from .config import Config
import pandas as pd
import logging
from ..logging_conf import logging_def
from .populate_database import populate_df
def load_splicing_projects() -> pd.DataFrame:
"""
Load the projects of splicing lore database.
:return: The dataframe of the splicing lore projects
"""
df = pd.read_csv(Config.splicing_projects, sep="\t")
logging.debug(df.head())
return df
def load_ase_event() -> pd.DataFrame:
"""
Load the alternative splicing events of detected in the splicing lore \
database.
:return: The dataframe of splicing lore alternative splicing events
"""
logging.debug('loading cin_project_splicing_lore')
df = pd.read_csv(Config.ase_event_file, sep="\t")
df.drop(["gene_symbol", "chromosome", "start", "stop", "exons_flanquants"],
inplace=True, axis=1)
cols = list(df.columns)
if cols.index('exon_skipped') != -1:
cols[cols.index('exon_skipped')] = 'pos'
df.columns = cols
df['exon_id'] = df['gene_id'].astype(str) + '_' + df['pos'].astype(str)
logging.debug(df.head())
return df
def fill_splicing_lore_data(logging_level: str = 'DISABLE') -> None:
"""
Fill the tables cin_ase_event and cin_project_splicing_lore
"""
logging_def(Config.results, __file__, logging_level)
sf_projects = load_splicing_projects()
ase_events = load_ase_event()
logging.debug('Filling cin_project_splicing_lore')
populate_df(table='cin_project_splicing_lore', df=sf_projects, clean='y')
logging.debug('Filling cin_ase_event')
populate_df(table='cin_ase_event', df=ase_events, clean='y')
if __name__ == "__main__":
fill_splicing_lore_data('DEBUG')
......@@ -113,16 +113,31 @@ def insert_data(table: str, content: List[Tuple], cnx: sqlite3.Connection
cursor.close()
def check_table_name(table : str):
def get_table_names(cnx: sqlite3.Connection) -> List[str]:
"""
Get the list of available table names.
:param cnx: The connection to ChIA-PET database.
:return: The list of availbale tables
"""
c = cnx.cursor()
c.execute("""SELECT name FROM sqlite_master WHERE type = 'table';""")
res = c.fetchall()
res = [r[0] for r in res]
return res
def check_table_name(cnx: sqlite3.Connection, table: str):
"""
Check if we can use the table name `table`.
:param cnx: Connection to ChIA-PET database
:param table: The name of the table to fille
:return: The same name with the prefix gin if it wasn't here.
"""
if "cin" not in table:
table = f"cin_{table.lower()}"
if table not in Config.tables:
tables = get_table_names(cnx)
if table not in tables:
msg = f"The name {table} is not available." \
f" If the table exist in the database, " \
f"change the config file to add the table name " \
......@@ -175,8 +190,8 @@ def populate_df(table: str, df: pd.DataFrame, clean: str):
tab.
:param clean: y to remove the data in the table, n else.
"""
table = check_table_name(table)
cnx = sqlite3.connect(Config.db_file)
table = check_table_name(cnx, table)
check_content_clean_and_insert(table, df, cnx, clean)
......@@ -195,8 +210,8 @@ def populate(table: str, file: str, clean: str, logging_level: str =
"""
logging_def(Config.db_file.parent, __file__, logging_level)
mfile = Path(file)
table = check_table_name(table)
cnx = sqlite3.connect(Config.db_file)
table = check_table_name(cnx, table)
check_content_clean_and_insert(table, mfile, cnx, clean)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment