Skip to content
Snippets Groups Projects
Commit f6f97b98 authored by alapendr's avatar alapendr
Browse files

db_utils/interactions/features_interactions.py: removal of completely...

db_utils/interactions/features_interactions.py: removal of completely overlapping anchors, genes and exons
parent 5ee057fc
No related branches found
No related tags found
No related merge requests found
......@@ -81,7 +81,8 @@ def del_overlaps(pet: pd.DataFrame):
9:139773532..139778733 9:139778161..139781850 7
:param pet: In this format: chr1:start1..end1 chr2:start2..end2 weight1-2
:return: Pet dataframe without pet that have overlapping anchors
:return: Pet dataframe without pet that have overlapping anchors (partial
and complete)
"""
pet[["chr1", "start1", "space1", "end1"]] = pet["anchor1"].str.\
split(r"[:..]", expand=True)
......@@ -89,6 +90,7 @@ def del_overlaps(pet: pd.DataFrame):
split(r"[:..]", expand=True)
pet = pet.drop(["anchor1", "anchor2", "space1", "space2"], axis=1)
pet.loc[pet["chr1"] != pet["chr2"], "delete"] = "no"
# Removal of a partial overlap
pet.loc[(pet["chr1"] == pet["chr2"]) & ((pet["start1"].astype(int) >=
pet["start2"].astype(int)) &
(pet["start1"].astype(int) <=
......@@ -98,6 +100,12 @@ def del_overlaps(pet: pd.DataFrame):
(pet["end1"].astype(int) <=
pet["end2"].astype(int))),
"delete"] = "yes"
# Removal of a complete overlap, e.g. anchor1 is fully included in anchor2
pet.loc[(pet["chr1"] == pet["chr2"]) & ((pet["start1"] >= pet["start2"]) &
(pet["end2"] >= pet["end1"]) |
(pet["start1"] <= pet["start2"]) &
(pet["end2"] <= pet["end1"])),
"delete"] = "yes"
to_del = pet[pet.delete == "yes"].index.tolist()
pet = pet.drop(to_del)
pet["anchor1"] = pet["chr1"] + ":" + pet["start1"] + ".." + pet["end1"]
......@@ -283,7 +291,8 @@ def add_info_distance_between_features(df: pd.DataFrame) -> pd.DataFrame:
If the result is NULL it is because we study two exons or genes located in
the same chromosome.
If the result is 0, it is because we study two exons or genes which have
different identifiers, but strictly identical coordinates.
different identifiers, but strictly identical coordinates OR two exons or
genes which overlap (partially or completely).
:param df: Result of the "filtering_2" function
:return df: df with distances added or NULL or (null) see before for more
......@@ -312,6 +321,18 @@ def add_info_distance_between_features(df: pd.DataFrame) -> pd.DataFrame:
(df["start1"] - df["stop2"] + 1)
df.loc[(df["start1"] == df["start2"]) & (df["stop1"] == df["stop2"]),
"distance"] = 0
# Removal of a partial overlap
df.loc[(df["chr1"] == df["chr2"]) & ((df["start1"] <= df["start2"]) &
(df["start2"] <= df["stop1"]) |
(df["start1"] <= df["stop2"]) &
(df["stop2"] <= df["stop1"])),
"distance"] = 0
# Removal of a complete overlap, e.g. exon1 is fully included in exon2
df.loc[(df["chr1"] == df["chr2"]) & ((df["start1"] >= df["start2"]) &
(df["stop2"] >= df["stop1"]) |
(df["start1"] <= df["start2"]) &
(df["stop2"] <= df["stop1"])),
"distance"] = 0
df.loc[df["chr1"] != df["chr2"], "distance"] = "NULL"
df.drop(["start1", "ID1", "chr1", "stop1", "ID2", "chr2", "start2",
"stop2"], axis='columns', inplace=True)
......@@ -335,6 +356,18 @@ def add_info_distance_between_features(df: pd.DataFrame) -> pd.DataFrame:
(df["start1"] - df["stop2"] + 1)
df.loc[(df["start1"] == df["start2"]) & (df["stop1"] == df["stop2"]),
"distance"] = 0
# Removal of a partial overlap
df.loc[(df["chr1"] == df["chr2"]) & ((df["start1"] <= df["start2"]) &
(df["start2"] <= df["stop1"]) |
(df["start1"] <= df["stop2"]) &
(df["stop2"] <= df["stop1"])),
"distance"] = 0
# Removal of a complete overlap, e.g. exon1 is fully included in exon2
df.loc[(df["chr1"] == df["chr2"]) & ((df["start1"] >= df["start2"]) &
(df["stop2"] >= df["stop1"]) |
(df["start1"] <= df["start2"]) &
(df["stop2"] <= df["stop1"])),
"distance"] = 0
df.loc[df["chr1"] != df["chr2"], "distance"] = "NULL"
df.drop(["start1", "ID1", "chr1", "stop1", "ID2", "chr2",
"start2", "stop2"], axis='columns', inplace=True)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment