Commit bb8ce1ba authored by nfontrod's avatar nfontrod
Browse files

src/bed_handler/bed_resize.py: add a parameter type

parent 1144c187
......@@ -12,35 +12,36 @@ from doctest import testmod
from .config import OutputBed
def resize_row_from_start(rowo: pd.Series, size: int, resize_from: str
) -> pd.Series:
def resize_row_inner(rowo: pd.Series, size: int, resize_from: str
) -> pd.Series:
"""
Resize the bed feature from start inside a bed row.
Resize the bed feature inside a bed row.
:param rowo: A bed row
:param size: The maximum size the feature must have
:param resize_from: The coordinate for which we want to resize (default \
"start")
:param type: The type of resize to make
:return: The row resized
>>> crow = pd.Series({"#ref": 10, "start": 10, "end": 20, "id": 1,
... "score": 1, "strand": "+"})
>>> resize_row_from_start(crow, 5, "start").to_dict()
>>> resize_row_inner(crow, 5, "start").to_dict()
{'#ref': 10, 'start': 10, 'end': 15, 'id': 1, 'score': 1, 'strand': '+'}
>>> resize_row_from_start(crow, 5, "end").to_dict()
>>> resize_row_inner(crow, 5, "end").to_dict()
{'#ref': 10, 'start': 15, 'end': 20, 'id': 1, 'score': 1, 'strand': '+'}
>>> resize_row_from_start(crow, 20, "start").to_dict()
{'#ref': 10, 'start': 10, 'end': 20, 'id': 1, 'score': 1, 'strand': '+'}
>>> resize_row_inner(crow, 7, "start").to_dict()
{'#ref': 10, 'start': 10, 'end': 17, 'id': 1, 'score': 1, 'strand': '+'}
>>> crow = pd.Series({"#ref": 10, "start": 50, "end": 60, "id": 1,
... "score": 1, "strand": "-"})
>>> resize_row_from_start(crow, 5, "start").to_dict()
>>> resize_row_inner(crow, 5, "start").to_dict()
{'#ref': 10, 'start': 55, 'end': 60, 'id': 1, 'score': 1, 'strand': '-'}
>>> resize_row_from_start(crow, 5, "end").to_dict()
>>> resize_row_inner(crow, 5, "end").to_dict()
{'#ref': 10, 'start': 50, 'end': 55, 'id': 1, 'score': 1, 'strand': '-'}
"""
row = rowo.copy()
row_strand = row["strand"]
d= {"+": "-", "-": "+"}
d = {"+": "-", "-": "+"}
row_strand = d[row_strand] if resize_from == "end" else row_strand
if row["end"] - row["start"] <= size:
return row
......@@ -51,8 +52,90 @@ def resize_row_from_start(rowo: pd.Series, size: int, resize_from: str
return row
def resize_row_outer(rowo: pd.Series, size: int, resize_from: str
) -> pd.Series:
"""
Resize the bed feature inside a bed row.
:param rowo: A bed row
:param size: The maximum size the feature must have
:param resize_from: The coordinate for which we want to resize (default \
"start")
:param type: The type of resize to make
:return: The row resized
>>> crow = pd.Series({"#ref": 10, "start": 10, "end": 20, "id": 1,
... "score": 1, "strand": "+"})
>>> resize_row_outer(crow, 5, "start").to_dict()
{'#ref': 10, 'start': 5, 'end': 10, 'id': 1, 'score': 1, 'strand': '+'}
>>> resize_row_outer(crow, 5, "end").to_dict()
{'#ref': 10, 'start': 20, 'end': 25, 'id': 1, 'score': 1, 'strand': '+'}
>>> crow = pd.Series({"#ref": 10, "start": 50, "end": 60, "id": 1,
... "score": 1, "strand": "-"})
>>> resize_row_outer(crow, 5, "start").to_dict()
{'#ref': 10, 'start': 60, 'end': 65, 'id': 1, 'score': 1, 'strand': '-'}
>>> resize_row_outer(crow, 5, "end").to_dict()
{'#ref': 10, 'start': 45, 'end': 50, 'id': 1, 'score': 1, 'strand': '-'}
"""
row = rowo.copy()
row_strand = row["strand"]
if (
resize_from == "start"
and row_strand == "+"
or resize_from != "start"
and row_strand != "+"
):
row['end'] = row['start']
row['start'] -= size
else:
row['start'] = row["end"]
row["end"] += size
return row
def resize_row(rowo: pd.Series, size: int, resize_from: str,
type: str) -> pd.Series:
"""
Resize the bed feature inside a bed row.
:param rowo: A bed row
:param size: The maximum size the feature must have
:param resize_from: The coordinate for which we want to resize (default \
"start")
:param type: The type of resize to make
:return: The row resized
>>> crow = pd.Series({"#ref": 10, "start": 10, "end": 20, "id": 1,
... "score": 1, "strand": "+"})
>>> resize_row(crow, 5, "start", "inner").to_dict()
{'#ref': 10, 'start': 10, 'end': 15, 'id': 1, 'score': 1, 'strand': '+'}
>>> resize_row(crow, 5, "end", "inner").to_dict()
{'#ref': 10, 'start': 15, 'end': 20, 'id': 1, 'score': 1, 'strand': '+'}
>>> resize_row(crow, 7, "start", "inner").to_dict()
{'#ref': 10, 'start': 10, 'end': 17, 'id': 1, 'score': 1, 'strand': '+'}
>>> resize_row(crow, 5, "start", "outer").to_dict()
{'#ref': 10, 'start': 5, 'end': 10, 'id': 1, 'score': 1, 'strand': '+'}
>>> resize_row(crow, 5, "end", "outer").to_dict()
{'#ref': 10, 'start': 20, 'end': 25, 'id': 1, 'score': 1, 'strand': '+'}
>>> crow = pd.Series({"#ref": 10, "start": 50, "end": 60, "id": 1,
... "score": 1, "strand": "-"})
>>> resize_row(crow, 5, "start", "inner").to_dict()
{'#ref': 10, 'start': 55, 'end': 60, 'id': 1, 'score': 1, 'strand': '-'}
>>> resize_row(crow, 5, "end", "inner").to_dict()
{'#ref': 10, 'start': 50, 'end': 55, 'id': 1, 'score': 1, 'strand': '-'}
>>> resize_row(crow, 5, "start", "outer").to_dict()
{'#ref': 10, 'start': 60, 'end': 65, 'id': 1, 'score': 1, 'strand': '-'}
>>> resize_row(crow, 5, "end", "outer").to_dict()
{'#ref': 10, 'start': 45, 'end': 50, 'id': 1, 'score': 1, 'strand': '-'}
"""
if type == "inner":
return resize_row_inner(rowo, size, resize_from)
else:
return resize_row_outer(rowo, size, resize_from)
def update_bed(df_bed: pd.DataFrame, size: int,
resize_from: str) -> pd.DataFrame:
resize_from: str, type: str) -> pd.DataFrame:
"""
Resize each feature in a bed dataframe.
......@@ -61,28 +144,38 @@ def update_bed(df_bed: pd.DataFrame, size: int,
:param resize_from: The coordinate for which we want to resize (default \
"start")
:return: The dataframe resized
:param type: The type of resize to make
>>> cdf = pd.DataFrame({"#ref": [1, 1], "start": [10, 50],
... "end": [20, 60], "id": [1, 2], "strand": ["+", "-"]})
>>> update_bed(cdf, 5, "start")
>>> update_bed(cdf, 5, "start", "inner")
#ref start end id strand
0 1 10 15 1 +
1 1 55 60 2 -
>>> update_bed(cdf, 5, "end")
>>> update_bed(cdf, 5, "end", "inner")
#ref start end id strand
0 1 15 20 1 +
1 1 50 55 2 -
>>> update_bed(cdf, 5, "start", "outer")
#ref start end id strand
0 1 5 10 1 +
1 1 60 65 2 -
>>> update_bed(cdf, 5, "end", "outer")
#ref start end id strand
0 1 20 25 1 +
1 1 45 50 2 -
"""
list_s = [
resize_row_from_start(df_bed.iloc[i, :], size, resize_from)
resize_row(df_bed.iloc[i, :], size, resize_from, type)
for i in range(df_bed.shape[0])
]
return pd.DataFrame(list_s)
@lp.parse(bed="file", size="size > 0", resize_from=["start", "end"])
@lp.parse(bed="file", size="size > 0", resize_from=["start", "end"],
type=["inner", "outer"])
def bed_resizer(bed: str, size: int, outfile: str,
resize_from: str = "start") -> None:
resize_from: str = "start", type: str = "inner") -> None:
"""
Resize bed features inside a bed file from their start or stop \
position (according to their strand).
......@@ -92,11 +185,12 @@ def bed_resizer(bed: str, size: int, outfile: str,
:param outfile: The output bed name
:param resize_from: The coordinate for which we want to resize (default \
"start")
:param type: The type of resize to make
"""
df = pd.read_csv(bed, sep="\t")
ndf = update_bed(df, size, resize_from)
ndf = update_bed(df, size, resize_from, type)
ndf.to_csv(OutputBed.output / outfile, sep="\t", index=False)
if __name__ == "__main__":
bed_resizer()
\ No newline at end of file
bed_resizer()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment