diff --git a/src/find_interaction_cluster/clip_figures/clip_analyser.py b/src/find_interaction_cluster/clip_figures/clip_analyser.py index eb2c8aae99a1ac7470c6b97223e1aae2a14c6fe8..230f64b97e15bbe6cfe90a38409855040d9004c9 100644 --- a/src/find_interaction_cluster/clip_figures/clip_analyser.py +++ b/src/find_interaction_cluster/clip_figures/clip_analyser.py @@ -60,7 +60,7 @@ def bedtools_intersect(gene_bed: Path, clip_bed: Path, def find_or_create_community(project: str, weight: int, global_weight: int, - same_gene: bool, inflation: float, + same_gene: bool, inflation: float, cell_line: str, feature: str) -> Path: """ Find the community file of interest, or if it doesn't exist, create it. @@ -74,10 +74,12 @@ def find_or_create_community(project: str, weight: int, global_weight: int, :param same_gene: Say if we consider as co-localised, exons within the \ same gene (True) or not (False) (default False) :param inflation: The inflation parameter. + :param cell_line: Interactions are only selected from projects made on \ + a specific cell line (ALL to disable this filter). (default ALL) :param feature: The feature we want to analyse (default 'exon') :return: The file containing community - >>> res = find_or_create_community("", 3, 3, True, 1.5, "gene") + >>> res = find_or_create_community("", 3, 3, True, 1.5, "ALL", "gene") >>> res_path = str(res.relative_to(Path(__file__).parents[3])) >>> res_path.endswith("global-weight-3_weight-3_same_gene-True_gene.txt") True @@ -87,12 +89,13 @@ def find_or_create_community(project: str, weight: int, global_weight: int, True """ cfile = ConfigGraph.get_community_file(project, weight, global_weight, - same_gene, inflation, feature, - ".txt") + same_gene, inflation, cell_line, + feature, ".txt") if not cfile.is_file(): project = get_projects(global_weight, project) multiple_community_launcher(weight, global_weight, project, same_gene, - inflation, False, feature) + inflation, cell_line, False, + feature=feature) if not cfile.is_file(): raise FileNotFoundError(f"The file {cfile} could not be created !") return cfile @@ -206,7 +209,8 @@ def create_table(feature: str, clip_file: Path, def select_community_file(project: str, weight: int, global_weight: int, - same_gene: bool, inflation: float, feature: str, + same_gene: bool, inflation: float, cell_line: str, + feature: str, community_file: str = "") -> Tuple[Path, Path]: """ Return the community file and output folder that will be used. @@ -220,6 +224,8 @@ def select_community_file(project: str, weight: int, global_weight: int, :param same_gene: Say if we consider as co-localised, exons within the \ same gene (True) or not (False) (default False) :param inflation: The inflation parameter + :param cell_line: Interactions are only selected from projects made on \ + a specific cell line (ALL to disable this filter). (default ALL) :param feature: The feature we want to analyse (default 'exon') :param community_file: A file containing custom communities. If \ it equals to '' then weight, global weight and same genes parameter are \ @@ -229,7 +235,8 @@ def select_community_file(project: str, weight: int, global_weight: int, """ if community_file == "": com_file = find_or_create_community(project, weight, global_weight, - same_gene, inflation, feature) + same_gene, inflation, cell_line, + feature) output = com_file.parent / f"CLIP_community_figures_{feature}" else: com_file = Path(community_file) @@ -301,8 +308,8 @@ def add_regulation_column(df_table: pd.DataFrame, sf_name: str, feature: str, def create_figure(project: str, weight: int, global_weight: int, - same_gene: bool, inflation: float, feature: str, - clip_file: Path, + same_gene: bool, inflation: float, cell_line: str, + feature: str, clip_file: Path, feature_bed: Path, test_type: str = "permutation", iteration: int = 10000, display_size: bool = False, community_file: str = "", sl_reg: bool = False) -> None: @@ -317,6 +324,8 @@ def create_figure(project: str, weight: int, global_weight: int, :param same_gene: Say if we consider as co-localised, exons within the \ same gene (True) or not (False) (default False) :param inflation: The inflation parameter + :param cell_line: Interactions are only selected from projects made on \ + a specific cell line (ALL to disable this filter). (default ALL) :param feature: The feature we want to analyse (default 'exon') :param clip_file: A bed file containing clip :param feature_bed: A bed files containing exons or genes depending on \ @@ -334,8 +343,8 @@ def create_figure(project: str, weight: int, global_weight: int, """ logging.info(f"Working on {clip_file}") com_file, output = select_community_file(project, weight, global_weight, - same_gene, inflation, feature, - community_file) + same_gene, inflation, cell_line, + feature, community_file) output.mkdir(exist_ok=True, parents=True) outfile = output / f"{clip_file.name.split('.')[0]}.pdf" final_table = create_table(feature, clip_file, feature_bed, com_file) @@ -350,6 +359,7 @@ def create_figure(project: str, weight: int, global_weight: int, def clip_folder_analysis(clip_folder: Path, project: str, weight: int, global_weight: int, same_gene: bool, inflation: float, + cell_line: str, feature: str, test_type: str = "permutation", iteration: int = 10000, display_size: bool=False, community_file: str = "", sl_reg: bool = False, @@ -365,6 +375,8 @@ def clip_folder_analysis(clip_folder: Path, project: str, weight: int, :param same_gene: Say if we consider as co-localised, exons within the \ same gene (True) or not (False) (default False) :param inflation: The inflation parameter + :param cell_line: Interactions are only selected from projects made on \ + a specific cell line (ALL to disable this filter). (default ALL) :param feature: The feature we want to analyse (default 'exon') :param clip_folder: A folder containing clip file :param test_type: The king of test to perform for frequency analysis. \ @@ -388,7 +400,8 @@ def clip_folder_analysis(clip_folder: Path, project: str, weight: int, pool = mp.Pool(processes=min(len(files), ps)) processes = [] for mfile in files: - args = [project, weight, global_weight, same_gene, inflation, feature, + args = [project, weight, global_weight, same_gene, inflation, + cell_line, feature, mfile, feature_bed, test_type, iteration, display_size, community_file, sl_reg] processes.append(pool.apply_async(create_figure, args))