Skip to content
Snippets Groups Projects
Commit 772b8999 authored by nfontrod's avatar nfontrod
Browse files

src/db_utils/interactions/features_interactions.py: Creation of a function...

src/db_utils/interactions/features_interactions.py: Creation of a function create_interaction_table that calls every other function from that file
parent 2904ac24
No related branches found
No related tags found
No related merge requests found
......@@ -21,9 +21,11 @@ duplicate BED6 file of ChIA-PET data, that is in the following format:
import pandas as pd
from datetime import datetime
from .config_interactions import ConfigInteractions as Config
print("Start:", str(datetime.now()))
def get_id_col(m_series: pd.Series) -> str:
"""
Allows to produce a id sorted, under this format: anchor1$anchor2.
......@@ -45,7 +47,8 @@ def work_on_pet():
:return:
"""
pet = pd.read_csv("/home/audrey/IE/ChIA_PET_network/data/interactions_files/chia_pet/GSM1517080.bed",sep="\t", header=None)
pet = pd.read_csv(Config.chia_pet / "GSM1517080.bed", sep="\t",
header=None)
pet = pet[[3]].drop_duplicates()
pet = pet.iloc[:, 0].str.split(r"-|,", expand=True)
pet.columns = ["anchor1", "anchor2", "weight"]
......@@ -57,16 +60,16 @@ def work_on_pet():
return pet
def del_overlaps():
def del_overlaps(pet: pd.DataFrame):
"""
This works on the previous dataframe result (pet). Input format is:
#chr1:start1..end1 chr2:start2..end2 weight1-2
We delete from this dataframe the pet that has overlapping anchors, e.g.
9:139773532..139778733 9:139778161..139781850 7
:param pet:
:return:
"""
pet = work_on_pet()
pet[["chr1", "start1", "space1", "end1"]] = pet["anchor1"].str.\
split(r"[:..]", expand=True)
pet[["chr2", "start2", "space2", "end2"]] = pet["anchor2"].str.\
......@@ -104,7 +107,9 @@ def work_on_intersection():
:return:
"""
inter = pd.read_csv("/home/audrey/IE/ChIA_PET_network/results/interactions/intersections_gene/gene_w200_vs_GSM1517080.bed", sep="\t", header=None)
inter = pd.read_csv(Config.pet_vs_gene_output /
"gene_w200_vs_GSM1517080.bed",
sep="\t", header=None)
inter = inter.iloc[:, [3, 6, 7, 8]]
inter.columns = ["id_region", "chr", "start", "end"]
inter["id_anchor"] = inter["chr"].astype("str") + ":" + inter["start"].\
......@@ -113,7 +118,7 @@ def work_on_intersection():
return inter
def interactions():
def interactions(pet: pd.DataFrame, inter: pd.DataFrame):
"""
Allows to determine which couples of genomic regions interact, according to
what weight.
......@@ -122,14 +127,14 @@ def interactions():
It means that gene 7832 interacts with gene 16755, according to a weight of
2.
:param pet:
:param inter:
:return:
"""
pet = del_overlaps()
df_final = pd.DataFrame()
for index, row in pet.iterrows():
if index == 50:
break
inter = work_on_intersection()
match_a = inter.loc[inter["id_anchor"] == row[0], :]
match_b = inter.loc[inter["id_anchor"] == row[1], :]
table_a = pd.DataFrame({"id_region": match_a["id_region"],
......@@ -154,7 +159,7 @@ def interactions():
return df_final
def add_level():
def add_level(df_level: pd.DataFrame):
"""
Add the level to the previous dataframe result (df_final):
- intrachromosomique: the two genomic regions that interact are in the same
......@@ -164,9 +169,9 @@ def add_level():
#id_region_a1 id_anchor_a1 id_region_a2 id_anchor_a2 weight level
7832 10:10019900..10020058 16755 11:5834473..5834631 2 interchromosomique
:param df_level:
:return:
"""
df_level = interactions()
df_level[["chr_a1", "coordinates_a1"]] = df_level.id_anchor_a1.str.\
split(":", expand=True)
df_level[["chr_a2", "coordinates_a2"]] = df_level.id_anchor_a2.str.\
......@@ -180,16 +185,16 @@ def add_level():
return df_level
def filtering_1():
def filtering_1(df_filter_1: pd.DataFrame):
"""
Filtering of the previous dataframe result (df_level) by removing:
- the genomic regions that interact with itself, e.g.
#id_region_a1 id_anchor_a1 id_region_a2 id_anchor_a2 weight level
7832 10:10019900..10020058 7832 10:10019900..10020088 2 intrachromosomique
:param df_filter_1:
:return:
"""
df_filter_1 = add_level()
df_filter_1.loc[df_filter_1["id_region_a1"] == df_filter_1["id_region_a2"],
"identical_or_not"] = "identical"
df_filter_1.loc[df_filter_1["id_region_a1"] != df_filter_1["id_region_a2"],
......@@ -201,7 +206,7 @@ def filtering_1():
return df_filter_1
def filtering_2():
def filtering_2(df_filter_2: pd.DataFrame):
"""
Filtering of the previous dataframe result (df_filter_1) by adding:
- the weights of the interactions that describe the same interaction, e.g.
......@@ -211,12 +216,12 @@ def filtering_2():
--> #id_region_a1 id_region_a2 weight level
--> 7832 16755 4 interchromosomique
:param df_filter_2:
:return:
"""
df_filter_2 = filtering_1()
df_filter_2 = df_filter_2.drop_duplicates()
df_filter_2["id"] = df_filter_2["id_region_a1"].astype(str) + "$" + \
df_filter_2["id_region_a2"].astype(str)
df_filter_2["id_region_a2"].astype(str)
df_filter_2.drop(["id_anchor_a1", "id_anchor_a2", "id_region_a1",
"id_region_a2"], axis="columns", inplace=True)
df_filter_2["weight"] = df_filter_2["weight"].astype(int)
......@@ -230,5 +235,20 @@ def filtering_2():
return df_filter_2
def create_interaction_table():
"""
Create the interaction table.
:return: The table of interaction
"""
inter_df = work_on_intersection()
df = work_on_pet()
df = del_overlaps(df)
df = interactions(df, inter_df)
df = add_level(df)
df = filtering_1(df)
return filtering_2(df)
if __name__ == "__main__":
filtering_2()
create_interaction_table()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment