diff --git a/src/find_interaction_cluster/periodicity_analysis/get_community_periodicity.py b/src/find_interaction_cluster/periodicity_analysis/get_community_periodicity.py index c73e1c0771933efec4af4fc879a0fd56b5e5e072..e173030ab1c96b62e2ef878adeea93ca7b3a9e5e 100644 --- a/src/find_interaction_cluster/periodicity_analysis/get_community_periodicity.py +++ b/src/find_interaction_cluster/periodicity_analysis/get_community_periodicity.py @@ -27,12 +27,15 @@ from .get_periodicity import periodicity_enrichment sys.path.insert(0, str(Path(__file__).parent)) -def load_bed(dic_com: Dict[str, List]) -> Dict: +def load_bed( + dic_com: Dict[str, List], bed_file: Path = ConfigPeriod.bed_gene +) -> Dict: """ Load the bed file containing the features of interest :param dic_com: A dictionary containing the list of features (values) \ contained in each community (key) + :param bed_file: The bed file to use :return: The dataframe of genes contained >>> load_bed({"C1": ['ENSG00000186092', 'ENSG00000284733', @@ -46,7 +49,7 @@ def load_bed(dic_com: Dict[str, List]) -> Dict: True """ df = pd.read_csv( - ConfigPeriod.bed_gene, + bed_file, sep="\t", names=["chr", "start", "stop", "id", "name", "strand"], ) @@ -194,6 +197,7 @@ def get_periodicity_4_communities( @lp.parse def compute_periodicity( com_file: str, + bed_file: str = str(ConfigPeriod.bed_gene), cpnt_type: str = "nt", periods: List[int] = list(range(2, 50)), smooth: int = 3, @@ -207,6 +211,7 @@ def compute_periodicity( """ :param com_file: A community file + :param bed_file: A bed file containing the coordinates of each sequences :param cpnt_type: The type of component we want to analyze (nt, dnt) \ (default nt) :param periods: The periods for which we want to test the periodicity @@ -227,7 +232,7 @@ def compute_periodicity( range_spec: Tuple[int, int] = (range_spectrum[0], range_spectrum[1]) logging_def(ConfigPeriod.output, __file__, logging_level) dic_com = get_communities(Path(com_file), "gene", 1) - dic_bed = load_bed(dic_com) + dic_bed = load_bed(dic_com, Path(bed_file)) genome = Fasta(str(ConfigPeriod.hg38)) df = get_periodicity_4_communities( dic_com,