diff --git a/src/bed_handler/select_regulated_near_ctcf_exons.py b/src/bed_handler/select_regulated_near_ctcf_exons.py index c5fcf6c611ef40e7c5b0e860280b3a5d8e7c642e..4389c9ce578e4bed88b33fe33eb24ce5b9dfeb4b 100644 --- a/src/bed_handler/select_regulated_near_ctcf_exons.py +++ b/src/bed_handler/select_regulated_near_ctcf_exons.py @@ -32,23 +32,19 @@ def filter_ctcf_distance_table(df: pd.DataFrame, reg: str, threshold: int, 0 DSC2_1 -4 siPP_DOWN 1_1 1 DSC2_2 -3 siPP_DOWN 1_2 2 DSC2_3 -2 siPP_DOWN 1_3 - 4 DSC2_5 0 siPP_DOWN 1_5 >>> rdf = filter_ctcf_distance_table(cdf, 'down', 2, 'upstream') >>> rdf[['exon_name', 'dist', 'group', 'id']] exon_name dist group id 2 DSC2_3 -2 siPP_DOWN 1_3 - 4 DSC2_5 0 siPP_DOWN 1_5 >>> rdf = filter_ctcf_distance_table(cdf, 'down', 2, 'downstream') >>> rdf[['exon_name', 'dist', 'group', 'id']] exon_name dist group id - 4 DSC2_5 0 siPP_DOWN 1_5 5 DSC2_6 1 siPP_DOWN 1_6 6 DSC2_7 2 siPP_DOWN 1_7 >>> rdf = filter_ctcf_distance_table(cdf, 'down', 2, 'both') >>> rdf[['exon_name', 'dist', 'group', 'id']] exon_name dist group id 2 DSC2_3 -2 siPP_DOWN 1_3 - 4 DSC2_5 0 siPP_DOWN 1_5 5 DSC2_6 1 siPP_DOWN 1_6 6 DSC2_7 2 siPP_DOWN 1_7 >>> rdf = filter_ctcf_distance_table(cdf, 'up', 2, 'both') @@ -65,7 +61,6 @@ def filter_ctcf_distance_table(df: pd.DataFrame, reg: str, threshold: int, exon_name dist group id 2 DSC2_3 -2 siPP_DOWN 1_3 3 DSC2_4 -1 siPP_UP 1_4 - 4 DSC2_5 0 siPP_DOWN 1_5 5 DSC2_6 1 siPP_DOWN 1_6 6 DSC2_7 2 siPP_DOWN 1_7 >>> filter_ctcf_distance_table(cdf, 'lul', 2, 'both') @@ -77,6 +72,10 @@ def filter_ctcf_distance_table(df: pd.DataFrame, reg: str, threshold: int, ... ValueError: location parameter should be in ['upstream', \ 'downstream', 'both'] + >>> rdf = filter_ctcf_distance_table(cdf, 'down', 0, 'both') + >>> rdf[['exon_name', 'dist', 'group', 'id']] + exon_name dist group id + 4 DSC2_5 0 siPP_DOWN 1_5 """ if reg not in ['down', 'up', 'all']: raise ValueError(f"reg parameter should be one in: " @@ -88,12 +87,16 @@ def filter_ctcf_distance_table(df: pd.DataFrame, reg: str, threshold: int, df = df.loc[df["group"] == f"siPP_{reg.upper()}", :] if location == "upstream": df = df.loc[(df["dist"] >= threshold * -1) & - (df["dist"] <= 0), :] + (df["dist"] < 0), :] elif location == "downstream": df = df.loc[(df["dist"] <= threshold) & - (df["dist"] >= 0), :] + (df["dist"] > 0), :] else: - df = df.loc[abs(df["dist"]) <= threshold, :] + if threshold == 0: + df = df.loc[abs(df["dist"]) == 0, :] + else: + df = df.loc[(abs(df["dist"]) <= threshold) & + (df["dist"] != 0), :] return df