From 5ee057fc50dde3eac44db751f94f7c9ee933187b Mon Sep 17 00:00:00 2001 From: alapendr <audrey.lapendry@ens-lyon.fr> Date: Thu, 24 Sep 2020 12:01:40 +0200 Subject: [PATCH] db_utils/interactions/features_interactions.py: replace distance (null) with 0 for exons that have different IDs, but identical coordinates --- src/db_utils/interactions/features_interactions.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/db_utils/interactions/features_interactions.py b/src/db_utils/interactions/features_interactions.py index 92253d8b..fe99989a 100644 --- a/src/db_utils/interactions/features_interactions.py +++ b/src/db_utils/interactions/features_interactions.py @@ -282,8 +282,8 @@ def add_info_distance_between_features(df: pd.DataFrame) -> pd.DataFrame: studied in interaction and to add this information in the result dataframe. If the result is NULL it is because we study two exons or genes located in the same chromosome. - If the result is (null), so NaN, it is because we study two exons or genes - which have different identifiers, but strictly identical coordinates. + If the result is 0, it is because we study two exons or genes which have + different identifiers, but strictly identical coordinates. :param df: Result of the "filtering_2" function :return df: df with distances added or NULL or (null) see before for more @@ -310,6 +310,8 @@ def add_info_distance_between_features(df: pd.DataFrame) -> pd.DataFrame: (df["start1"] - df["stop2"] + 1) df.loc[df["stop1"] > df["stop2"], "distance"] = \ (df["start1"] - df["stop2"] + 1) + df.loc[(df["start1"] == df["start2"]) & (df["stop1"] == df["stop2"]), + "distance"] = 0 df.loc[df["chr1"] != df["chr2"], "distance"] = "NULL" df.drop(["start1", "ID1", "chr1", "stop1", "ID2", "chr2", "start2", "stop2"], axis='columns', inplace=True) @@ -331,6 +333,8 @@ def add_info_distance_between_features(df: pd.DataFrame) -> pd.DataFrame: (df["start1"] - df["stop2"] + 1) df.loc[df["stop1"] > df["stop2"], "distance"] = \ (df["start1"] - df["stop2"] + 1) + df.loc[(df["start1"] == df["start2"]) & (df["stop1"] == df["stop2"]), + "distance"] = 0 df.loc[df["chr1"] != df["chr2"], "distance"] = "NULL" df.drop(["start1", "ID1", "chr1", "stop1", "ID2", "chr2", "start2", "stop2"], axis='columns', inplace=True) -- GitLab