From bb8ce1bab3ad098548a2343ac458a4f5848658fa Mon Sep 17 00:00:00 2001
From: Fontrodona Nicolas <nicolas.fontrodona@ens-lyon.fr>
Date: Thu, 7 Jan 2021 19:32:46 +0100
Subject: [PATCH] src/bed_handler/bed_resize.py: add a parameter type

---
 src/bed_handler/bed_resize.py | 130 +++++++++++++++++++++++++++++-----
 1 file changed, 112 insertions(+), 18 deletions(-)

diff --git a/src/bed_handler/bed_resize.py b/src/bed_handler/bed_resize.py
index 8c10bda..51b1e96 100644
--- a/src/bed_handler/bed_resize.py
+++ b/src/bed_handler/bed_resize.py
@@ -12,35 +12,36 @@ from doctest import testmod
 from .config import OutputBed
 
 
-def resize_row_from_start(rowo: pd.Series, size: int, resize_from: str
-                          ) -> pd.Series:
+def resize_row_inner(rowo: pd.Series, size: int, resize_from: str
+                     ) -> pd.Series:
     """
-    Resize the bed feature from start inside a bed row.
+    Resize the bed feature inside a bed row.
 
     :param rowo: A bed row
     :param size: The maximum size the feature must have
     :param resize_from: The coordinate for which we want to resize (default \
     "start")
+    :param type: The type of resize to make
     :return: The row resized
 
     >>> crow = pd.Series({"#ref": 10, "start": 10, "end": 20, "id": 1,
     ... "score": 1, "strand": "+"})
-    >>> resize_row_from_start(crow, 5, "start").to_dict()
+    >>> resize_row_inner(crow, 5, "start").to_dict()
     {'#ref': 10, 'start': 10, 'end': 15, 'id': 1, 'score': 1, 'strand': '+'}
-    >>> resize_row_from_start(crow, 5, "end").to_dict()
+    >>> resize_row_inner(crow, 5, "end").to_dict()
     {'#ref': 10, 'start': 15, 'end': 20, 'id': 1, 'score': 1, 'strand': '+'}
-    >>> resize_row_from_start(crow, 20, "start").to_dict()
-    {'#ref': 10, 'start': 10, 'end': 20, 'id': 1, 'score': 1, 'strand': '+'}
+    >>> resize_row_inner(crow, 7, "start").to_dict()
+    {'#ref': 10, 'start': 10, 'end': 17, 'id': 1, 'score': 1, 'strand': '+'}
     >>> crow = pd.Series({"#ref": 10, "start": 50, "end": 60, "id": 1,
     ... "score": 1, "strand": "-"})
-    >>> resize_row_from_start(crow, 5, "start").to_dict()
+    >>> resize_row_inner(crow, 5, "start").to_dict()
     {'#ref': 10, 'start': 55, 'end': 60, 'id': 1, 'score': 1, 'strand': '-'}
-    >>> resize_row_from_start(crow, 5, "end").to_dict()
+    >>> resize_row_inner(crow, 5, "end").to_dict()
     {'#ref': 10, 'start': 50, 'end': 55, 'id': 1, 'score': 1, 'strand': '-'}
     """
     row = rowo.copy()
     row_strand = row["strand"]
-    d= {"+": "-", "-": "+"}
+    d = {"+": "-", "-": "+"}
     row_strand = d[row_strand] if resize_from == "end" else row_strand
     if row["end"] - row["start"] <= size:
         return row
@@ -51,8 +52,90 @@ def resize_row_from_start(rowo: pd.Series, size: int, resize_from: str
     return row
 
 
+def resize_row_outer(rowo: pd.Series, size: int, resize_from: str
+                     ) -> pd.Series:
+    """
+    Resize the bed feature inside a bed row.
+
+    :param rowo: A bed row
+    :param size: The maximum size the feature must have
+    :param resize_from: The coordinate for which we want to resize (default \
+    "start")
+    :param type: The type of resize to make
+    :return: The row resized
+
+    >>> crow = pd.Series({"#ref": 10, "start": 10, "end": 20, "id": 1,
+    ... "score": 1, "strand": "+"})
+    >>> resize_row_outer(crow, 5, "start").to_dict()
+    {'#ref': 10, 'start': 5, 'end': 10, 'id': 1, 'score': 1, 'strand': '+'}
+    >>> resize_row_outer(crow, 5, "end").to_dict()
+    {'#ref': 10, 'start': 20, 'end': 25, 'id': 1, 'score': 1, 'strand': '+'}
+    >>> crow = pd.Series({"#ref": 10, "start": 50, "end": 60, "id": 1,
+    ... "score": 1, "strand": "-"})
+    >>> resize_row_outer(crow, 5, "start").to_dict()
+    {'#ref': 10, 'start': 60, 'end': 65, 'id': 1, 'score': 1, 'strand': '-'}
+    >>> resize_row_outer(crow, 5, "end").to_dict()
+    {'#ref': 10, 'start': 45, 'end': 50, 'id': 1, 'score': 1, 'strand': '-'}
+    """
+    row = rowo.copy()
+    row_strand = row["strand"]
+    if (
+            resize_from == "start"
+            and row_strand == "+"
+            or resize_from != "start"
+            and row_strand != "+"
+    ):
+        row['end'] = row['start']
+        row['start'] -= size
+    else:
+        row['start'] = row["end"]
+        row["end"] += size
+    return row
+
+
+def resize_row(rowo: pd.Series, size: int, resize_from: str,
+               type: str) -> pd.Series:
+    """
+    Resize the bed feature inside a bed row.
+
+    :param rowo: A bed row
+    :param size: The maximum size the feature must have
+    :param resize_from: The coordinate for which we want to resize (default \
+    "start")
+    :param type: The type of resize to make
+    :return: The row resized
+
+    >>> crow = pd.Series({"#ref": 10, "start": 10, "end": 20, "id": 1,
+    ... "score": 1, "strand": "+"})
+    >>> resize_row(crow, 5, "start", "inner").to_dict()
+    {'#ref': 10, 'start': 10, 'end': 15, 'id': 1, 'score': 1, 'strand': '+'}
+    >>> resize_row(crow, 5, "end", "inner").to_dict()
+    {'#ref': 10, 'start': 15, 'end': 20, 'id': 1, 'score': 1, 'strand': '+'}
+    >>> resize_row(crow, 7, "start", "inner").to_dict()
+    {'#ref': 10, 'start': 10, 'end': 17, 'id': 1, 'score': 1, 'strand': '+'}
+    >>> resize_row(crow, 5, "start", "outer").to_dict()
+    {'#ref': 10, 'start': 5, 'end': 10, 'id': 1, 'score': 1, 'strand': '+'}
+    >>> resize_row(crow, 5, "end", "outer").to_dict()
+    {'#ref': 10, 'start': 20, 'end': 25, 'id': 1, 'score': 1, 'strand': '+'}
+    >>> crow = pd.Series({"#ref": 10, "start": 50, "end": 60, "id": 1,
+    ... "score": 1, "strand": "-"})
+    >>> resize_row(crow, 5, "start", "inner").to_dict()
+    {'#ref': 10, 'start': 55, 'end': 60, 'id': 1, 'score': 1, 'strand': '-'}
+    >>> resize_row(crow, 5, "end", "inner").to_dict()
+    {'#ref': 10, 'start': 50, 'end': 55, 'id': 1, 'score': 1, 'strand': '-'}
+    >>> resize_row(crow, 5, "start", "outer").to_dict()
+    {'#ref': 10, 'start': 60, 'end': 65, 'id': 1, 'score': 1, 'strand': '-'}
+    >>> resize_row(crow, 5, "end", "outer").to_dict()
+    {'#ref': 10, 'start': 45, 'end': 50, 'id': 1, 'score': 1, 'strand': '-'}
+    """
+    if type == "inner":
+        return resize_row_inner(rowo, size, resize_from)
+    else:
+        return resize_row_outer(rowo, size, resize_from)
+
+
 def update_bed(df_bed: pd.DataFrame, size: int,
-               resize_from: str) -> pd.DataFrame:
+               resize_from: str, type: str) -> pd.DataFrame:
     """
     Resize each feature in a bed dataframe.
 
@@ -61,28 +144,38 @@ def update_bed(df_bed: pd.DataFrame, size: int,
     :param resize_from: The coordinate for which we want to resize (default \
     "start")
     :return: The dataframe resized
+    :param type: The type of resize to make
 
     >>> cdf = pd.DataFrame({"#ref": [1, 1], "start": [10, 50],
     ... "end": [20, 60], "id": [1, 2], "strand": ["+", "-"]})
-    >>> update_bed(cdf, 5, "start")
+    >>> update_bed(cdf, 5, "start", "inner")
        #ref  start  end  id strand
     0     1     10   15   1      +
     1     1     55   60   2      -
-    >>> update_bed(cdf, 5, "end")
+    >>> update_bed(cdf, 5, "end", "inner")
        #ref  start  end  id strand
     0     1     15   20   1      +
     1     1     50   55   2      -
+    >>> update_bed(cdf, 5, "start", "outer")
+       #ref  start  end  id strand
+    0     1      5   10   1      +
+    1     1     60   65   2      -
+    >>> update_bed(cdf, 5, "end", "outer")
+       #ref  start  end  id strand
+    0     1     20   25   1      +
+    1     1     45   50   2      -
     """
     list_s = [
-        resize_row_from_start(df_bed.iloc[i, :], size, resize_from)
+        resize_row(df_bed.iloc[i, :], size, resize_from, type)
         for i in range(df_bed.shape[0])
     ]
     return pd.DataFrame(list_s)
 
 
-@lp.parse(bed="file", size="size > 0", resize_from=["start", "end"])
+@lp.parse(bed="file", size="size > 0", resize_from=["start", "end"],
+          type=["inner", "outer"])
 def bed_resizer(bed: str, size: int, outfile: str,
-                resize_from: str = "start") -> None:
+                resize_from: str = "start", type: str = "inner") -> None:
     """
     Resize bed features inside a bed file from their start or stop \
     position (according to their strand).
@@ -92,11 +185,12 @@ def bed_resizer(bed: str, size: int, outfile: str,
     :param outfile: The output bed name
     :param resize_from: The coordinate for which we want to resize (default \
     "start")
+    :param type: The type of resize to make
     """
     df = pd.read_csv(bed, sep="\t")
-    ndf = update_bed(df, size, resize_from)
+    ndf = update_bed(df, size, resize_from, type)
     ndf.to_csv(OutputBed.output / outfile, sep="\t", index=False)
 
 
 if __name__ == "__main__":
-    bed_resizer()
\ No newline at end of file
+    bed_resizer()
-- 
GitLab