Skip to content
Snippets Groups Projects
Commit 1144c187 authored by nfontrod's avatar nfontrod
Browse files

src/bed_handler/bed_resize.py: allow to resize the bed file

parent a5e8bfbb
No related branches found
No related tags found
No related merge requests found
#!/usr/bin/env python3
# -*- coding: UTF-8 -*-
"""
Description: The goal of this script is to resize a bed
"""
import pandas as pd
import lazyparser as lp
from doctest import testmod
from .config import OutputBed
def resize_row_from_start(rowo: pd.Series, size: int, resize_from: str
) -> pd.Series:
"""
Resize the bed feature from start inside a bed row.
:param rowo: A bed row
:param size: The maximum size the feature must have
:param resize_from: The coordinate for which we want to resize (default \
"start")
:return: The row resized
>>> crow = pd.Series({"#ref": 10, "start": 10, "end": 20, "id": 1,
... "score": 1, "strand": "+"})
>>> resize_row_from_start(crow, 5, "start").to_dict()
{'#ref': 10, 'start': 10, 'end': 15, 'id': 1, 'score': 1, 'strand': '+'}
>>> resize_row_from_start(crow, 5, "end").to_dict()
{'#ref': 10, 'start': 15, 'end': 20, 'id': 1, 'score': 1, 'strand': '+'}
>>> resize_row_from_start(crow, 20, "start").to_dict()
{'#ref': 10, 'start': 10, 'end': 20, 'id': 1, 'score': 1, 'strand': '+'}
>>> crow = pd.Series({"#ref": 10, "start": 50, "end": 60, "id": 1,
... "score": 1, "strand": "-"})
>>> resize_row_from_start(crow, 5, "start").to_dict()
{'#ref': 10, 'start': 55, 'end': 60, 'id': 1, 'score': 1, 'strand': '-'}
>>> resize_row_from_start(crow, 5, "end").to_dict()
{'#ref': 10, 'start': 50, 'end': 55, 'id': 1, 'score': 1, 'strand': '-'}
"""
row = rowo.copy()
row_strand = row["strand"]
d= {"+": "-", "-": "+"}
row_strand = d[row_strand] if resize_from == "end" else row_strand
if row["end"] - row["start"] <= size:
return row
if row_strand == "+":
row['end'] = row['start'] + size
else:
row['start'] = row['end'] - size
return row
def update_bed(df_bed: pd.DataFrame, size: int,
resize_from: str) -> pd.DataFrame:
"""
Resize each feature in a bed dataframe.
:param df_bed: The dataframe corresponding to a bed to resize
:param size: The maximum size the feature must have
:param resize_from: The coordinate for which we want to resize (default \
"start")
:return: The dataframe resized
>>> cdf = pd.DataFrame({"#ref": [1, 1], "start": [10, 50],
... "end": [20, 60], "id": [1, 2], "strand": ["+", "-"]})
>>> update_bed(cdf, 5, "start")
#ref start end id strand
0 1 10 15 1 +
1 1 55 60 2 -
>>> update_bed(cdf, 5, "end")
#ref start end id strand
0 1 15 20 1 +
1 1 50 55 2 -
"""
list_s = [
resize_row_from_start(df_bed.iloc[i, :], size, resize_from)
for i in range(df_bed.shape[0])
]
return pd.DataFrame(list_s)
@lp.parse(bed="file", size="size > 0", resize_from=["start", "end"])
def bed_resizer(bed: str, size: int, outfile: str,
resize_from: str = "start") -> None:
"""
Resize bed features inside a bed file from their start or stop \
position (according to their strand).
:param bed: A bed file with the features to resize
:param size: The maximum size the feature must have
:param outfile: The output bed name
:param resize_from: The coordinate for which we want to resize (default \
"start")
"""
df = pd.read_csv(bed, sep="\t")
ndf = update_bed(df, size, resize_from)
ndf.to_csv(OutputBed.output / outfile, sep="\t", index=False)
if __name__ == "__main__":
bed_resizer()
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment