Skip to content
Snippets Groups Projects
Commit 00ca1723 authored by nfontrod's avatar nfontrod
Browse files

src/find_interaction_cluster/community_figures/*.py: script that creates a...

src/find_interaction_cluster/community_figures/*.py: script that creates a barplot showing the frequency/value of a particular item in every community of genomic features (genes of exons) that are close in the nucleus
parent 74856aba
No related branches found
No related tags found
No related merge requests found
#!/usr/bin/env python3
# -*- coding: UTF-8 -*-
"""
Description:
"""
#!/usr/bin/env python3
# -*- coding: UTF-8 -*-
"""
Description: Create a barplot showing the frequency/value of a particular \
item in every community of genomic features (genes of exons) that are close \
in the nucleus compared to a control list of features.
"""
import lazyparser as lp
import pandas as pd
from pathlib import Path
from .fig_functions import create_community_fig
class FileNameError(Exception):
pass
class MissingColumnError(Exception):
pass
def load_and_check_table(table: str, feature: str, target_col: str):
"""
Load a file containing a dataframe. It must contains the following \
columns: id_feature, target_col, community and community_size.
:param table: A file containing a table with the id of the chosen \
`feature` (i.e FasterDB id of genes or exons), a column with data of \
interest ( this column must have the name *target_col*) and two columns \
with the community and the size of the community of the feature if it \
has one (None, else).
:param feature: The kind of feature analysed
:param target_col: The name of the column containing the data of interest
:return: The loaded dataframe
"""
if table.endswith(".gz"):
df = pd.read_csv(table, sep="\t", compression="gzip")
else:
df = pd.read_csv(table, sep="\t")
required_cols = [f"id_{feature}", target_col, "community",
"community_size"]
for rqd in required_cols:
if rqd not in df.columns:
raise MissingColumnError(f"The column {rqd} is missing !")
return df
@lp.parse(table="file", output="folder", test_type=["lm", "permutation"],
iteration="0 < iteration < 20")
def create_community_figures(table: str, feature: str, target_col: str,
output: str, outfile: str, test_type: str,
target_kind: str = "",
iteration: int = 10000) -> None:
"""
Create a dataframe with a control community, save it as a table and \
as a barplot figure.
:param table: A file containing a table with the id of the chosen \
`feature` (i.e FasterDB id of genes or exons), a column with data of \
interest ( this column must have the name *target_col*) and two columns \
with the community and the size of the community of the feature if it \
has one (None, else).
:param feature: The kind of feature analysed (exons or genes)
:param target_col: The name of the column containing the data of interest
:param output: The output folder
:param outfile: The name of the output figure file (pdf format)
:param test_type: The type of test to make (permutation or lm)
:param target_kind: An optional name that describe a bit further \
target_col.
:param iteration: The number of sub samples to create. This parameter \
is only used if test_type = 'permutation' (default 10000).
"""
df = load_and_check_table(table, feature, target_col)
if not outfile.endswith(".pdf"):
raise FileNameError("The output figure must be in pdf format !")
moutfile = Path(output) / outfile
create_community_fig(df, feature, target_col, moutfile, test_type,
target_kind=target_kind, iteration=iteration)
if __name__ == "__main__":
create_community_figures()
This diff is collapsed.
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment