From cc536f0beda6c23e3ccc326b06a54a8277fc3e27 Mon Sep 17 00:00:00 2001 From: Fontrodona Nicolas <nicolas.fontrodona@ens-lyon.fr> Date: Wed, 27 Jan 2021 11:24:52 +0100 Subject: [PATCH] src/find_interaction_cluster/clip_figures/__main__.py src/find_interaction_cluster/clip_figures/clip_analyser.py: ad dinflation parameter --- .../clip_figures/__main__.py | 4 ++- .../clip_figures/clip_analyser.py | 33 +++++++++++-------- 2 files changed, 23 insertions(+), 14 deletions(-) diff --git a/src/find_interaction_cluster/clip_figures/__main__.py b/src/find_interaction_cluster/clip_figures/__main__.py index e3c9fffc..5c07cf85 100644 --- a/src/find_interaction_cluster/clip_figures/__main__.py +++ b/src/find_interaction_cluster/clip_figures/__main__.py @@ -15,6 +15,7 @@ from .clip_analyser import clip_folder_analysis @lp.parse(test_type=["permutation", "lm"], feature=["gene", "exon"]) def clip_analysis(clip_folder: str, weight: int = -1, global_weight: int = -1, same_gene: bool = True, + inflation: float = 1.5, feature: str = "exon", project: str = "GSM1018963_GSM1018964", test_type: str = "permutation", iteration: int = 10000, @@ -32,6 +33,7 @@ def clip_analysis(clip_folder: str, weight: int = -1, seen in `global_weight` project are taken into account :param same_gene: Say if we consider as co-localised, exons within the \ same gene (True) or not (False) (default False) + :param inflation: The inflation parameter (default 1.5) :param feature: The feature we want to analyse (default 'exon') :param clip_folder: A folder containing clip file :param test_type: The kind of test to perform for frequency analysis. \ @@ -57,7 +59,7 @@ def clip_analysis(clip_folder: str, weight: int = -1, if not clip_folder.is_dir(): raise NotADirectoryError(f"{clip_folder} is not an existing directory") clip_folder_analysis(clip_folder, project, weight, global_weight, - same_gene, feature, test_type, iteration, + same_gene, inflation, feature, test_type, iteration, display_size, community_file, sl_reg, ps, logging_level) diff --git a/src/find_interaction_cluster/clip_figures/clip_analyser.py b/src/find_interaction_cluster/clip_figures/clip_analyser.py index 5e53c78c..eb2c8aae 100644 --- a/src/find_interaction_cluster/clip_figures/clip_analyser.py +++ b/src/find_interaction_cluster/clip_figures/clip_analyser.py @@ -60,7 +60,8 @@ def bedtools_intersect(gene_bed: Path, clip_bed: Path, def find_or_create_community(project: str, weight: int, global_weight: int, - same_gene: bool, feature: str) -> Path: + same_gene: bool, inflation: float, + feature: str) -> Path: """ Find the community file of interest, or if it doesn't exist, create it. @@ -72,24 +73,26 @@ def find_or_create_community(project: str, weight: int, global_weight: int, seen in `global_weight` project are taken into account :param same_gene: Say if we consider as co-localised, exons within the \ same gene (True) or not (False) (default False) + :param inflation: The inflation parameter. :param feature: The feature we want to analyse (default 'exon') :return: The file containing community - >>> res = find_or_create_community("", 3, 3, True, "gene") + >>> res = find_or_create_community("", 3, 3, True, 1.5, "gene") >>> res_path = str(res.relative_to(Path(__file__).parents[3])) >>> res_path.endswith("global-weight-3_weight-3_same_gene-True_gene.txt") True >>> f = "results/community_of_co-localized-exons/" + \ - "communities/weight-3_global_weight-3" + "communities/weight-3_global_weight-3-1.5" >>> res_path.startswith(f) True """ cfile = ConfigGraph.get_community_file(project, weight, global_weight, - same_gene, feature, ".txt") + same_gene, inflation, feature, + ".txt") if not cfile.is_file(): project = get_projects(global_weight, project) multiple_community_launcher(weight, global_weight, project, same_gene, - False, feature) + inflation, False, feature) if not cfile.is_file(): raise FileNotFoundError(f"The file {cfile} could not be created !") return cfile @@ -176,7 +179,7 @@ def create_table(feature: str, clip_file: Path, :param com_file: A file containing communities :return: The final dataframe that can be used to create clip figures - >>> cf = find_or_create_community("", 3, 3, True, "gene") + >>> cf = find_or_create_community("", 3, 3, True, 1.5, "gene") >>> create_table("gene", ConfigClip.test_clip_bed, ... ConfigClip.test_gene_bed, cf) id_gene clip_peak peak_density community community_size @@ -203,7 +206,7 @@ def create_table(feature: str, clip_file: Path, def select_community_file(project: str, weight: int, global_weight: int, - same_gene: bool, feature: str, + same_gene: bool, inflation: float, feature: str, community_file: str = "") -> Tuple[Path, Path]: """ Return the community file and output folder that will be used. @@ -216,6 +219,7 @@ def select_community_file(project: str, weight: int, global_weight: int, seen in `global_weight` project are taken into account :param same_gene: Say if we consider as co-localised, exons within the \ same gene (True) or not (False) (default False) + :param inflation: The inflation parameter :param feature: The feature we want to analyse (default 'exon') :param community_file: A file containing custom communities. If \ it equals to '' then weight, global weight and same genes parameter are \ @@ -225,7 +229,7 @@ def select_community_file(project: str, weight: int, global_weight: int, """ if community_file == "": com_file = find_or_create_community(project, weight, global_weight, - same_gene, feature) + same_gene, inflation, feature) output = com_file.parent / f"CLIP_community_figures_{feature}" else: com_file = Path(community_file) @@ -297,7 +301,8 @@ def add_regulation_column(df_table: pd.DataFrame, sf_name: str, feature: str, def create_figure(project: str, weight: int, global_weight: int, - same_gene: bool, feature: str, clip_file: Path, + same_gene: bool, inflation: float, feature: str, + clip_file: Path, feature_bed: Path, test_type: str = "permutation", iteration: int = 10000, display_size: bool = False, community_file: str = "", sl_reg: bool = False) -> None: @@ -311,6 +316,7 @@ def create_figure(project: str, weight: int, global_weight: int, seen in `global_weight` project are taken into account :param same_gene: Say if we consider as co-localised, exons within the \ same gene (True) or not (False) (default False) + :param inflation: The inflation parameter :param feature: The feature we want to analyse (default 'exon') :param clip_file: A bed file containing clip :param feature_bed: A bed files containing exons or genes depending on \ @@ -328,7 +334,7 @@ def create_figure(project: str, weight: int, global_weight: int, """ logging.info(f"Working on {clip_file}") com_file, output = select_community_file(project, weight, global_weight, - same_gene, feature, + same_gene, inflation, feature, community_file) output.mkdir(exist_ok=True, parents=True) outfile = output / f"{clip_file.name.split('.')[0]}.pdf" @@ -343,8 +349,8 @@ def create_figure(project: str, weight: int, global_weight: int, def clip_folder_analysis(clip_folder: Path, project: str, weight: int, - global_weight: int, same_gene: bool, feature: str, - test_type: str = "permutation", + global_weight: int, same_gene: bool, inflation: float, + feature: str, test_type: str = "permutation", iteration: int = 10000, display_size: bool=False, community_file: str = "", sl_reg: bool = False, ps: int = 1, logging_level: str = "DEBUG") -> None: @@ -358,6 +364,7 @@ def clip_folder_analysis(clip_folder: Path, project: str, weight: int, seen in `global_weight` project are taken into account :param same_gene: Say if we consider as co-localised, exons within the \ same gene (True) or not (False) (default False) + :param inflation: The inflation parameter :param feature: The feature we want to analyse (default 'exon') :param clip_folder: A folder containing clip file :param test_type: The king of test to perform for frequency analysis. \ @@ -381,7 +388,7 @@ def clip_folder_analysis(clip_folder: Path, project: str, weight: int, pool = mp.Pool(processes=min(len(files), ps)) processes = [] for mfile in files: - args = [project, weight, global_weight, same_gene, feature, + args = [project, weight, global_weight, same_gene, inflation, feature, mfile, feature_bed, test_type, iteration, display_size, community_file, sl_reg] processes.append(pool.apply_async(create_figure, args)) -- GitLab