diff --git a/src/figures_utils/config_figures.py b/src/figures_utils/config_figures.py index 7e3e7044d6f34e03f6a8c3070cbcaad0496555a6..6fc79b4f9af13778808eb36fb78718a9399110f3 100644 --- a/src/figures_utils/config_figures.py +++ b/src/figures_utils/config_figures.py @@ -8,6 +8,51 @@ Description: Configuration class from pathlib import Path +from typing import List +from itertools import filterfalse +from doctest import testmod +import re + + +def get_p(x: str): + """ + return False if x contains 'Keep' True else + + >>> get_p('Keep: GMX32, GSM78') + False + >>> get_p('Delete: Bou, foo, bar') + True + """ + if "Keep" in x: + return False + else: + return True + + +def get_good_project() -> List[str]: + """ + Return only good ChIA-PET projects base on TAD co-localisation results + + :return: The list of project to keep + + >>> res = get_good_project() + >>> type(res) + <class 'list'> + >>> len(res) > 0 + True + >>> "GSM1018963_GSM1018964" in res + True + >>> res[0:2] + ['GSM1018963_GSM1018964', 'GSM1018961_GSM1018962'] + >>> pat = re.compile(r"[GSM0-9_]+") + >>> sum([re.findall(pat, c)[0] == c for c in res]) == len(res) + True + """ + + with Config.file_datasets_filtering.open('r') as f: + line = next(filterfalse(get_p, f)).replace("Keep: ", "").\ + replace('\n', '') + return line.split(', ') class Config: @@ -22,3 +67,7 @@ class Config: db_file = results / 'chia_pet_database.db' draw_number = 1000 file_datasets_filtering = results / "projects_filtering.txt" + + +if __name__ == "__main__": + testmod() \ No newline at end of file