Skip to content
Snippets Groups Projects
Commit a16f26ca authored by nfontrod's avatar nfontrod
Browse files

src/bed_handler/select_regulated_near_ctcf_exons.py: exon containing CTCF...

src/bed_handler/select_regulated_near_ctcf_exons.py: exon containing CTCF sites are dropped when the threshold is above 0
parent 6c9063df
No related branches found
No related tags found
No related merge requests found
Pipeline #117 passed
......@@ -32,23 +32,19 @@ def filter_ctcf_distance_table(df: pd.DataFrame, reg: str, threshold: int,
0 DSC2_1 -4 siPP_DOWN 1_1
1 DSC2_2 -3 siPP_DOWN 1_2
2 DSC2_3 -2 siPP_DOWN 1_3
4 DSC2_5 0 siPP_DOWN 1_5
>>> rdf = filter_ctcf_distance_table(cdf, 'down', 2, 'upstream')
>>> rdf[['exon_name', 'dist', 'group', 'id']]
exon_name dist group id
2 DSC2_3 -2 siPP_DOWN 1_3
4 DSC2_5 0 siPP_DOWN 1_5
>>> rdf = filter_ctcf_distance_table(cdf, 'down', 2, 'downstream')
>>> rdf[['exon_name', 'dist', 'group', 'id']]
exon_name dist group id
4 DSC2_5 0 siPP_DOWN 1_5
5 DSC2_6 1 siPP_DOWN 1_6
6 DSC2_7 2 siPP_DOWN 1_7
>>> rdf = filter_ctcf_distance_table(cdf, 'down', 2, 'both')
>>> rdf[['exon_name', 'dist', 'group', 'id']]
exon_name dist group id
2 DSC2_3 -2 siPP_DOWN 1_3
4 DSC2_5 0 siPP_DOWN 1_5
5 DSC2_6 1 siPP_DOWN 1_6
6 DSC2_7 2 siPP_DOWN 1_7
>>> rdf = filter_ctcf_distance_table(cdf, 'up', 2, 'both')
......@@ -65,7 +61,6 @@ def filter_ctcf_distance_table(df: pd.DataFrame, reg: str, threshold: int,
exon_name dist group id
2 DSC2_3 -2 siPP_DOWN 1_3
3 DSC2_4 -1 siPP_UP 1_4
4 DSC2_5 0 siPP_DOWN 1_5
5 DSC2_6 1 siPP_DOWN 1_6
6 DSC2_7 2 siPP_DOWN 1_7
>>> filter_ctcf_distance_table(cdf, 'lul', 2, 'both')
......@@ -77,6 +72,10 @@ def filter_ctcf_distance_table(df: pd.DataFrame, reg: str, threshold: int,
...
ValueError: location parameter should be in ['upstream', \
'downstream', 'both']
>>> rdf = filter_ctcf_distance_table(cdf, 'down', 0, 'both')
>>> rdf[['exon_name', 'dist', 'group', 'id']]
exon_name dist group id
4 DSC2_5 0 siPP_DOWN 1_5
"""
if reg not in ['down', 'up', 'all']:
raise ValueError(f"reg parameter should be one in: "
......@@ -88,12 +87,16 @@ def filter_ctcf_distance_table(df: pd.DataFrame, reg: str, threshold: int,
df = df.loc[df["group"] == f"siPP_{reg.upper()}", :]
if location == "upstream":
df = df.loc[(df["dist"] >= threshold * -1) &
(df["dist"] <= 0), :]
(df["dist"] < 0), :]
elif location == "downstream":
df = df.loc[(df["dist"] <= threshold) &
(df["dist"] >= 0), :]
(df["dist"] > 0), :]
else:
df = df.loc[abs(df["dist"]) <= threshold, :]
if threshold == 0:
df = df.loc[abs(df["dist"]) == 0, :]
else:
df = df.loc[(abs(df["dist"]) <= threshold) &
(df["dist"] != 0), :]
return df
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment