Skip to content
Snippets Groups Projects
Commit 181f87ed authored by nfontrod's avatar nfontrod
Browse files

src/db_utils/populate_database.py: creation of populate_df function (and other...

src/db_utils/populate_database.py: creation of populate_df function (and other functions to lower code duplication) that fill the database directly with a pandas dataframe
parent ac389ede
No related branches found
No related tags found
No related merge requests found
......@@ -8,9 +8,10 @@ Description: This file can be used to populate the database of the website.
import sqlite3
from .config import Config, logging_def
from .config import Config
from ..logging_conf import logging_def
from pathlib import Path
from typing import List, Tuple
from typing import List, Tuple, Union
import lazyparser as lp
import logging
import pandas as pd
......@@ -46,7 +47,8 @@ def get_number_columns(table: str, cnx: sqlite3.Connection) -> List[str]:
return col_name
def check_file(table: str, file: Path, cnx: sqlite3.Connection) -> List[Tuple]:
def check_file(table: str, file: Union[Path, pd.DataFrame],
cnx: sqlite3.Connection) -> List[Tuple]:
"""
Check is the file has alwas the same nu_mber of columns and if \
it match the number of columns find in the table ``table``.
......@@ -54,12 +56,15 @@ def check_file(table: str, file: Path, cnx: sqlite3.Connection) -> List[Tuple]:
:param table: The name of the table in the database in which we want \
to add data.
:param file: A tabulated file containing the data to insert in the table \
tab.
tab or a pandas dataframe.
:param cnx: Connection to the database
:return: The row in file.
"""
column_names = get_number_columns(table, cnx)
df = pd.read_csv(file, sep="\t")
if isinstance(file, Path):
df = pd.read_csv(file, sep="\t")
else:
df = file
if len(df.columns) != len(column_names):
msg = "Wrong number of columns"
logging.exception(msg)
......@@ -70,7 +75,7 @@ def check_file(table: str, file: Path, cnx: sqlite3.Connection) -> List[Tuple]:
f"and the columns name of the database table {table} : " \
f"{df.columns} differs"
logging.exception(msg)
raise ColumnsNameError(msg)
raise ColumnsNameError(msg)
df = df[column_names]
return df.values
......@@ -108,23 +113,15 @@ def insert_data(table: str, content: List[Tuple], cnx: sqlite3.Connection
cursor.close()
@lp.parse(file="file", clean=["y", "Y", "n", "N"])
def populate(table: str, file: str, clean: str, logging_level: str =
"DISABLE"):
def check_table_name(table : str):
"""
Update the content of the database of the web interface.
Check if we can use the table name `table`.
:param table: The name of the table in the database in which we want \
to add data.
:param file: A tabulated file containing the data to insert in the table \
tab.
:param clean: y to remove the data in the table, n else.
:param logging_level: The level of information to display
:param table: The name of the table to fille
:return: The same name with the prefix gin if it wasn't here.
"""
logging_def(Config.output, logging_level)
mfile = Path(file)
if "gin" not in table:
table = f"gin_{table.lower()}"
if "cin" not in table:
table = f"cin_{table.lower()}"
if table not in Config.tables:
msg = f"The name {table} is not available." \
f" If the table exist in the database, " \
......@@ -132,15 +129,76 @@ def populate(table: str, file: str, clean: str, logging_level: str =
f"wanted in 'tables' field"
logging.exception(msg)
raise TableNameError(msg)
cnx = sqlite3.connect(Config.db_file)
logging.debug("Checking file ...")
content = check_file(table, mfile, cnx)
return table
def cleaning(cnx: sqlite3.Connection, clean: str, table: str) -> None:
"""
Clean the table ``table`` if asked.
:param cnx: Connection to ChIA-PET database
:param clean: Y to clean the database
:param table: The name of the table to fill in the database
"""
if clean.upper() == "Y":
logging.debug("Cleaning table")
clean_table(table, cnx)
def check_content_clean_and_insert(table: str, df: Union[Path, pd.DataFrame],
cnx: sqlite3.Connection, clean: str):
"""
Check content of df, clean database if needed and fill database.
:param table: The name of the table in the database in which we want \
to add data.
:param df: A tabulated file containing the data to insert in the table \
tab or a pandas dataframe.
:param cnx: Connection to ChIA-PET database
:param clean: y to remove the data in the table, n else.
:return:
"""
logging.debug("Checking file ...")
content = check_file(table, df, cnx)
cleaning(cnx, clean, table)
logging.debug("Inserting data ...")
insert_data(table, content, cnx)
def populate_df(table: str, df: pd.DataFrame, clean: str):
"""
Update the content of the database of the web interface.
:param table: The name of the table in the database in which we want \
to add data.
:param df: A tabulated file containing the data to insert in the table \
tab.
:param clean: y to remove the data in the table, n else.
"""
table = check_table_name(table)
cnx = sqlite3.connect(Config.db_file)
check_content_clean_and_insert(table, df, cnx, clean)
@lp.parse(file="file", clean=["y", "Y", "n", "N"])
def populate(table: str, file: str, clean: str, logging_level: str =
"DISABLE"):
"""
Update the content of the database of the web interface.
:param table: The name of the table in the database in which we want \
to add data.
:param file: A tabulated file containing the data to insert in the table \
tab.
:param clean: y to remove the data in the table, n else.
:param logging_level: The level of information to display
"""
logging_def(Config.db_file.parent, __file__, logging_level)
mfile = Path(file)
table = check_table_name(table)
cnx = sqlite3.connect(Config.db_file)
check_content_clean_and_insert(table, mfile, cnx, clean)
if __name__ == "__main__":
populate()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment