Skip to content
Snippets Groups Projects
Commit 0bf2d5d0 authored by nfontrod's avatar nfontrod
Browse files

Merge branch 'master' of gitlab_lbmc:LBMC/regards/chia-pet_network into dev

parents 2a804a99 52f73b13
No related branches found
No related tags found
No related merge requests found
......@@ -124,7 +124,7 @@ def create_cin_exon_interaction_table(conn: sqlite3.Connection) -> None:
[exon2] VARCHAR(30) NOT NULL,
[id_project] INT NOT NULL,
[level] VARCHAR(25) NOT NULL,
[distance] VARCHAR(30),
[distance] INT,
PRIMARY KEY ([id]),
FOREIGN KEY ([exon1]) REFERENCES cin_exon([id]),
FOREIGN KEY ([exon2]) REFERENCES cin_exon([id]),
......@@ -147,7 +147,7 @@ def create_cin_gene_interaction_table(conn: sqlite3.Connection) -> None:
[gene2] INT NOT NULL,
[id_project] INT NOT NULL,
[level] VARCHAR(25) NOT NULL,
[distance] VARCHAR(30),
[distance] INT,
PRIMARY KEY ([id]),
FOREIGN KEY ([gene1]) REFERENCES cin_gene([id]),
FOREIGN KEY ([gene2]) REFERENCES cin_gene([id]),
......
......@@ -81,7 +81,8 @@ def del_overlaps(pet: pd.DataFrame):
9:139773532..139778733 9:139778161..139781850 7
:param pet: In this format: chr1:start1..end1 chr2:start2..end2 weight1-2
:return: Pet dataframe without pet that have overlapping anchors
:return: Pet dataframe without pet that have overlapping anchors (partial
and complete)
"""
pet[["chr1", "start1", "space1", "end1"]] = pet["anchor1"].str.\
split(r"[:..]", expand=True)
......@@ -89,6 +90,7 @@ def del_overlaps(pet: pd.DataFrame):
split(r"[:..]", expand=True)
pet = pet.drop(["anchor1", "anchor2", "space1", "space2"], axis=1)
pet.loc[pet["chr1"] != pet["chr2"], "delete"] = "no"
# Removal of a partial overlap
pet.loc[(pet["chr1"] == pet["chr2"]) & ((pet["start1"].astype(int) >=
pet["start2"].astype(int)) &
(pet["start1"].astype(int) <=
......@@ -98,6 +100,12 @@ def del_overlaps(pet: pd.DataFrame):
(pet["end1"].astype(int) <=
pet["end2"].astype(int))),
"delete"] = "yes"
# Removal of a complete overlap, e.g. anchor1 is fully included in anchor2
pet.loc[(pet["chr1"] == pet["chr2"]) & ((pet["start1"] >= pet["start2"]) &
(pet["end2"] >= pet["end1"]) |
(pet["start1"] <= pet["start2"]) &
(pet["end2"] <= pet["end1"])),
"delete"] = "yes"
to_del = pet[pet.delete == "yes"].index.tolist()
pet = pet.drop(to_del)
pet["anchor1"] = pet["chr1"] + ":" + pet["start1"] + ".." + pet["end1"]
......@@ -282,8 +290,9 @@ def add_info_distance_between_features(df: pd.DataFrame) -> pd.DataFrame:
studied in interaction and to add this information in the result dataframe.
If the result is NULL it is because we study two exons or genes located in
the same chromosome.
If the result is (null), so NaN, it is because we study two exons or genes
which have different identifiers, but strictly identical coordinates.
If the result is 0, it is because we study two exons or genes which have
different identifiers, but strictly identical coordinates OR two exons or
genes which overlap (partially or completely).
:param df: Result of the "filtering_2" function
:return df: df with distances added or NULL or (null) see before for more
......@@ -310,6 +319,20 @@ def add_info_distance_between_features(df: pd.DataFrame) -> pd.DataFrame:
(df["start1"] - df["stop2"] + 1)
df.loc[df["stop1"] > df["stop2"], "distance"] = \
(df["start1"] - df["stop2"] + 1)
df.loc[(df["start1"] == df["start2"]) & (df["stop1"] == df["stop2"]),
"distance"] = 0
# Removal of a partial overlap
df.loc[(df["chr1"] == df["chr2"]) & ((df["start1"] <= df["start2"]) &
(df["start2"] <= df["stop1"]) |
(df["start1"] <= df["stop2"]) &
(df["stop2"] <= df["stop1"])),
"distance"] = 0
# Removal of a complete overlap, e.g. exon1 is fully included in exon2
df.loc[(df["chr1"] == df["chr2"]) & ((df["start1"] >= df["start2"]) &
(df["stop2"] >= df["stop1"]) |
(df["start1"] <= df["start2"]) &
(df["stop2"] <= df["stop1"])),
"distance"] = 0
df.loc[df["chr1"] != df["chr2"], "distance"] = "NULL"
df.drop(["start1", "ID1", "chr1", "stop1", "ID2", "chr2", "start2",
"stop2"], axis='columns', inplace=True)
......@@ -331,6 +354,20 @@ def add_info_distance_between_features(df: pd.DataFrame) -> pd.DataFrame:
(df["start1"] - df["stop2"] + 1)
df.loc[df["stop1"] > df["stop2"], "distance"] = \
(df["start1"] - df["stop2"] + 1)
df.loc[(df["start1"] == df["start2"]) & (df["stop1"] == df["stop2"]),
"distance"] = 0
# Removal of a partial overlap
df.loc[(df["chr1"] == df["chr2"]) & ((df["start1"] <= df["start2"]) &
(df["start2"] <= df["stop1"]) |
(df["start1"] <= df["stop2"]) &
(df["stop2"] <= df["stop1"])),
"distance"] = 0
# Removal of a complete overlap, e.g. exon1 is fully included in exon2
df.loc[(df["chr1"] == df["chr2"]) & ((df["start1"] >= df["start2"]) &
(df["stop2"] >= df["stop1"]) |
(df["start1"] <= df["start2"]) &
(df["stop2"] <= df["stop1"])),
"distance"] = 0
df.loc[df["chr1"] != df["chr2"], "distance"] = "NULL"
df.drop(["start1", "ID1", "chr1", "stop1", "ID2", "chr2",
"start2", "stop2"], axis='columns', inplace=True)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment