diff --git a/src/db_utils/populate_database.py b/src/db_utils/populate_database.py index 2fb89619d9a9e34b1d04ac701ed1ae6caf886cd2..56ff2750217e745a2cf2131d62fe80a65d563950 100755 --- a/src/db_utils/populate_database.py +++ b/src/db_utils/populate_database.py @@ -8,9 +8,10 @@ Description: This file can be used to populate the database of the website. import sqlite3 -from .config import Config, logging_def +from .config import Config +from ..logging_conf import logging_def from pathlib import Path -from typing import List, Tuple +from typing import List, Tuple, Union import lazyparser as lp import logging import pandas as pd @@ -46,7 +47,8 @@ def get_number_columns(table: str, cnx: sqlite3.Connection) -> List[str]: return col_name -def check_file(table: str, file: Path, cnx: sqlite3.Connection) -> List[Tuple]: +def check_file(table: str, file: Union[Path, pd.DataFrame], + cnx: sqlite3.Connection) -> List[Tuple]: """ Check is the file has alwas the same nu_mber of columns and if \ it match the number of columns find in the table ``table``. @@ -54,12 +56,15 @@ def check_file(table: str, file: Path, cnx: sqlite3.Connection) -> List[Tuple]: :param table: The name of the table in the database in which we want \ to add data. :param file: A tabulated file containing the data to insert in the table \ - tab. + tab or a pandas dataframe. :param cnx: Connection to the database :return: The row in file. """ column_names = get_number_columns(table, cnx) - df = pd.read_csv(file, sep="\t") + if isinstance(file, Path): + df = pd.read_csv(file, sep="\t") + else: + df = file if len(df.columns) != len(column_names): msg = "Wrong number of columns" logging.exception(msg) @@ -70,7 +75,7 @@ def check_file(table: str, file: Path, cnx: sqlite3.Connection) -> List[Tuple]: f"and the columns name of the database table {table} : " \ f"{df.columns} differs" logging.exception(msg) - raise ColumnsNameError(msg) + raise ColumnsNameError(msg) df = df[column_names] return df.values @@ -108,23 +113,15 @@ def insert_data(table: str, content: List[Tuple], cnx: sqlite3.Connection cursor.close() -@lp.parse(file="file", clean=["y", "Y", "n", "N"]) -def populate(table: str, file: str, clean: str, logging_level: str = - "DISABLE"): +def check_table_name(table : str): """ - Update the content of the database of the web interface. + Check if we can use the table name `table`. - :param table: The name of the table in the database in which we want \ - to add data. - :param file: A tabulated file containing the data to insert in the table \ - tab. - :param clean: y to remove the data in the table, n else. - :param logging_level: The level of information to display + :param table: The name of the table to fille + :return: The same name with the prefix gin if it wasn't here. """ - logging_def(Config.output, logging_level) - mfile = Path(file) - if "gin" not in table: - table = f"gin_{table.lower()}" + if "cin" not in table: + table = f"cin_{table.lower()}" if table not in Config.tables: msg = f"The name {table} is not available." \ f" If the table exist in the database, " \ @@ -132,15 +129,76 @@ def populate(table: str, file: str, clean: str, logging_level: str = f"wanted in 'tables' field" logging.exception(msg) raise TableNameError(msg) - cnx = sqlite3.connect(Config.db_file) - logging.debug("Checking file ...") - content = check_file(table, mfile, cnx) + return table + + +def cleaning(cnx: sqlite3.Connection, clean: str, table: str) -> None: + """ + Clean the table ``table`` if asked. + + :param cnx: Connection to ChIA-PET database + :param clean: Y to clean the database + :param table: The name of the table to fill in the database + """ if clean.upper() == "Y": logging.debug("Cleaning table") clean_table(table, cnx) + + +def check_content_clean_and_insert(table: str, df: Union[Path, pd.DataFrame], + cnx: sqlite3.Connection, clean: str): + """ + Check content of df, clean database if needed and fill database. + + :param table: The name of the table in the database in which we want \ + to add data. + :param df: A tabulated file containing the data to insert in the table \ + tab or a pandas dataframe. + :param cnx: Connection to ChIA-PET database + :param clean: y to remove the data in the table, n else. + :return: + """ + logging.debug("Checking file ...") + content = check_file(table, df, cnx) + cleaning(cnx, clean, table) logging.debug("Inserting data ...") insert_data(table, content, cnx) +def populate_df(table: str, df: pd.DataFrame, clean: str): + """ + Update the content of the database of the web interface. + + :param table: The name of the table in the database in which we want \ + to add data. + :param df: A tabulated file containing the data to insert in the table \ + tab. + :param clean: y to remove the data in the table, n else. + """ + table = check_table_name(table) + cnx = sqlite3.connect(Config.db_file) + check_content_clean_and_insert(table, df, cnx, clean) + + +@lp.parse(file="file", clean=["y", "Y", "n", "N"]) +def populate(table: str, file: str, clean: str, logging_level: str = + "DISABLE"): + """ + Update the content of the database of the web interface. + + :param table: The name of the table in the database in which we want \ + to add data. + :param file: A tabulated file containing the data to insert in the table \ + tab. + :param clean: y to remove the data in the table, n else. + :param logging_level: The level of information to display + """ + logging_def(Config.db_file.parent, __file__, logging_level) + mfile = Path(file) + table = check_table_name(table) + cnx = sqlite3.connect(Config.db_file) + check_content_clean_and_insert(table, mfile, cnx, clean) + + if __name__ == "__main__": populate()