From c229c568269e33ad537fb45250caf3c0b792f03d Mon Sep 17 00:00:00 2001 From: alapendr <audrey.lapendry@ens-lyon.fr> Date: Tue, 10 Mar 2020 10:40:51 +0100 Subject: [PATCH] get_fill_metadata.py: problems solving --- .../projects_metadata/get_fill_metadata.py | 25 +++++++++++-------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/src/db_utils/projects_metadata/get_fill_metadata.py b/src/db_utils/projects_metadata/get_fill_metadata.py index 0afb89ea..1886020d 100644 --- a/src/db_utils/projects_metadata/get_fill_metadata.py +++ b/src/db_utils/projects_metadata/get_fill_metadata.py @@ -15,6 +15,8 @@ import pandas as pd from io import StringIO from ..populate_database import populate_df import re +import logging +from ...logging_conf import logging_def def launch_PmagicGEO_script() -> None: @@ -25,8 +27,8 @@ def launch_PmagicGEO_script() -> None: :param GSM_file: A file with the list of GSM or GSE for which we want to \ obtain metadata. """ - subprocess.check_output(f"PmagicGEO.pl -i {Config.geo_id_file} " \ - f"-p {Config.output} " f"-o {Config.outfile}", \ + subprocess.check_output(f"PmagicGEO.pl -i {Config.geo_id_file} " + f"-p {Config.output} " f"-o {Config.outfile}", shell=True, stderr=subprocess.STDOUT) @@ -67,24 +69,27 @@ def merge_metadata() -> pd.DataFrame: """ Merging of metadata obtained through PmagicGEO.pl and manually. """ - projects = load_metadata().merge(load_manual_metadata(), left_on = "gsm", \ - right_on = "GSM") + projects = load_metadata().merge(load_manual_metadata(), left_on = "gsm", + right_on = "GSM") projects = projects.drop(["gsm", "GSM"], axis=1) - projects.rename(columns={"GSM-GSE": "id_project", "GSM-Title": "name", \ - "GSM-Characteristics": "description", \ - "GSM_FAMILYSOFT-contact_institute": "institute", \ - "GSEA-Citation(s)": "citation", "Cell line": "cell_line", \ - "Real source": "database", "Kept GSM": "id_sample", \ + projects.rename(columns={"GSM-GSE": "id_project", "GSM-Title": "name", + "GSM-Characteristics": "description", + "GSM_FAMILYSOFT-contact_institute": "institute", + "GSEA-Citation(s)": "citation", "Cell line": "cell_line", + "Real source": "database", "Kept GSM": "id_sample", "Antibody": "antibody"}, inplace=True) projects = projects.reset_index().rename(columns={"index":"id"}) logging.debug(projects.head()) return projects -def fill_projects_table() -> None: +def fill_projects_table(logging_level: str = "DISABLE",) -> None: """ Fill the table projects + + :param logging_level: The level of data to display. """ + logging_def(Config.output, __file__, logging_level) logging.debug('Filling cin_projects') populate_df(table='cin_projects', df=merge_metadata(), clean='y') -- GitLab