diff --git a/src/find_interaction_cluster/community_calibration.py b/src/find_interaction_cluster/community_calibration.py index eb7cc0e120af6d28192189ddadb53b7f70c58d47..d387f63b034dba5e0ecdd6f5b37bd56e9add2297 100644 --- a/src/find_interaction_cluster/community_calibration.py +++ b/src/find_interaction_cluster/community_calibration.py @@ -41,7 +41,7 @@ def write_input(arr_interaction: np.array, outfile: Path, use_weight: bool): def get_out_name(weight: int, global_weight: int, inflation: float, project: str = "", same_gene=True, feature: str = "exon", - use_weight: bool = False): + use_weight: bool = False, cell_line: str = "ALL"): """ return the output file where the communities are stored. @@ -56,19 +56,22 @@ def get_out_name(weight: int, global_weight: int, inflation: float, same gene (True) or not (False) (default False) :param feature: The feature we want to analyse (default 'exon') :param use_weight: Say if we want to write the weight into the result file. + :param cell_line: Interactions are recovered only from project made \ + on this cell line. :return: The file containing communities, the input of hiMCL and the output """ w = "weigthed" if use_weight else "unweigthed" + cell = f"_{cell_line}" if cell_line != "ALL" else "" if global_weight != 0: project = f"global-weight-{global_weight}" output = ConfigGraph.community_calibration_folder / "community_files" / \ f"{project}_weight-{weight}_same_gene-{same_gene}_{feature}_" \ - f"{inflation}_{w}.txt" + f"{inflation}{cell}_{w}.txt" input_hip = output.parent / "hipMCL_files" / \ output.name.replace(".txt", "_input.txt") output_hip = input_hip.parent / input_hip.name.replace("_input.txt", - "output.txt") + "_output.txt") output.parent.mkdir(exist_ok=True, parents=True) input_hip.parent.mkdir(exist_ok=True, parents=True) return output, input_hip, output_hip @@ -77,7 +80,7 @@ def get_out_name(weight: int, global_weight: int, inflation: float, def get_figname(weight: int, global_weight: int, project: str = "", same_gene=True, feature: str = "exon", - use_weight: bool = False): + use_weight: bool = False, cell_line: str = "ALL"): """ return the output file where the communities are stored. @@ -91,18 +94,22 @@ def get_figname(weight: int, global_weight: int, same gene (True) or not (False) (default False) :param feature: The feature we want to analyse (default 'exon') :param use_weight: Say if we want to write the weight into the result file. + :param cell_line: Interactions are recovered only from project made \ + on this cell line. :return: """ w = "weigthed" if use_weight else "unweigthed" + cell = f"_{cell_line}" if cell_line != "ALL" else "" if global_weight != 0: project = f"global-weight-{global_weight}" return ConfigGraph.community_calibration_folder / \ - f"{project}_weight-{weight}_same_gene-{same_gene}_{feature}_{w}.pdf" + f"{project}_weight-{weight}_same_gene-{same_gene}_{feature}" \ + f"{cell}_{w}.pdf" def community_finder(weight: int, global_weight: int, inflation: float, project: str = "", same_gene=True, feature: str = "exon", - use_weight: bool = False): + use_weight: bool = False, cell_line: str = "ALL"): """ Find communities inside co-localisation between exons found in \ a ChIA-PET project. @@ -118,18 +125,19 @@ def community_finder(weight: int, global_weight: int, inflation: float, same gene (True) or not (False) (default False) :param use_weight: Say if we want to write the weight into the result file. :param feature: The feature we want to analyse (default 'exon') + :param cell_line: The cell line chosen """ inflation = round(inflation, 2) logging.info(f"Working with inflation {inflation}") outfile, in_hipmcl, out_hipmcl = get_out_name( weight, global_weight, inflation, project, same_gene, feature, - use_weight) + use_weight, cell_line) if outfile.is_file(): return pd.read_csv(outfile, sep="\t") cnx = sqlite3.connect(ConfigGraph.db_file) interaction = get_project_colocalisation(cnx, project, weight, global_weight, same_gene, True, - level=feature) + level=feature, cell=cell_line) write_input(interaction, in_hipmcl, use_weight) graph = create_graph(interaction) df, dic_community = find_communities(graph, project, in_hipmcl, out_hipmcl, @@ -262,18 +270,19 @@ def create_community_size_fig(df_infl: pd.DataFrame, fig_name: Path) -> None: def make_calibration(weight: int, global_weight: int, istart: float = 1.1, istop: float = 2.5, istep: float = 0.1, project: str = "", same_gene=True, feature: str = "exon", - use_weight: bool = False, logging_level: str = "INFO"): + use_weight: bool = False, cell_line: str = "ALL", + logging_level: str = "INFO"): logging_def(ConfigGraph.output_folder, __file__, logging_level) inflations = np.arange(istart, istop + istep, istep) list_df = [ community_finder(weight, global_weight, i, project, same_gene, feature, - use_weight=use_weight) + use_weight=use_weight, cell_line=cell_line) for i in inflations ] df_infl = create_dataframe(list_df, inflations) df_size = create_community_size_dataframe(list_df, inflations) figname = get_figname(weight, global_weight, project, same_gene, feature, - use_weight) + use_weight, cell_line) create_scatter(df_infl, figname) create_community_size_fig(df_size, figname.parent / figname.name.replace(".pdf", "_sizes.pdf"))