src/find_interaction_cluster/community_figures/*.py: script that creates a...

src/find_interaction_cluster/community_figures/*.py: script that creates a barplot showing the frequency/value of a particular item in every community of genomic features (genes of exons) that are close in the nucleus

src/find_interaction_cluster/community_figures/*.py: script that creates a...
src/find_interaction_cluster/community_figures/*.py: script that creates a barplot showing the frequency/value of a particular item in every community of genomic features (genes of exons) that are close in the nucleus
00ca1723 · nfontrod · 74856aba · 00ca1723 · 00ca1723 · 00ca1723
Commit 00ca1723 authored 4 years ago by nfontrod
--- a/src/find_interaction_cluster/community_figures/__file__.py
+++ b/src/find_interaction_cluster/community_figures/__file__.py
+#!/usr/bin/env python3
+# -*- coding: UTF-8 -*-
+"""
+Description:
+"""
--- a/src/find_interaction_cluster/community_figures/__main__.py
+++ b/src/find_interaction_cluster/community_figures/__main__.py
+#!/usr/bin/env python3
+# -*- coding: UTF-8 -*-
+"""
+Description: Create a barplot showing the frequency/value of a particular \
+item in every community of genomic features (genes of exons) that are close \
+in the nucleus compared to a control list of features.
+"""
+import lazyparser as lp
+import pandas as pd
+from pathlib import Path
+from .fig_functions import create_community_fig
+class FileNameError(Exception):
+    pass
+class MissingColumnError(Exception):
+    pass
+def load_and_check_table(table: str, feature: str, target_col: str):
+    """
+    Load a file containing a dataframe. It must contains the following \
+    columns: id_feature, target_col, community and community_size.
+    :param table: A file containing a table with the id of the chosen \
+    `feature` (i.e FasterDB id of genes or exons), a column with data of \
+    interest ( this column must have the name *target_col*) and two columns \
+    with the community and the size of the community of the feature if it \
+    has one (None, else).
+    :param feature: The kind of feature analysed
+    :param target_col: The name of the column containing the data of interest
+    :return: The loaded dataframe
+    """
+    if table.endswith(".gz"):
+        df = pd.read_csv(table, sep="\t", compression="gzip")
+    else:
+        df = pd.read_csv(table, sep="\t")
+    required_cols = [f"id_{feature}", target_col, "community",
+                     "community_size"]
+    for rqd in required_cols:
+        if rqd not in df.columns:
+            raise MissingColumnError(f"The column {rqd} is missing !")
+    return df
+@lp.parse(table="file", output="folder", test_type=["lm", "permutation"],
+          iteration="0 < iteration < 20")
+def create_community_figures(table: str, feature: str, target_col: str,
+                             output: str, outfile: str, test_type: str,
+                             target_kind: str = "",
+                             iteration: int = 10000) -> None:
+    """
+    Create a dataframe with a control community, save it as a table and \
+    as a barplot figure.
+    :param table: A file containing a table with the id of the chosen \
+    `feature` (i.e FasterDB id of genes or exons), a column with data of \
+     interest ( this column must have the name *target_col*) and two columns \
+     with the community and the size of the community of the feature if it \
+     has one (None, else).
+    :param feature: The kind of feature analysed (exons or genes)
+    :param target_col: The name of the column containing the data of interest
+    :param output: The output folder
+    :param outfile: The name of the output figure file (pdf format)
+    :param test_type: The type of test to make (permutation or lm)
+    :param target_kind: An optional name that describe a bit further \
+    target_col.
+    :param iteration: The number of sub samples to create. This parameter \
+    is only used if test_type = 'permutation' (default 10000).
+    """
+    df = load_and_check_table(table, feature, target_col)
+    if not outfile.endswith(".pdf"):
+        raise FileNameError("The output figure must be in pdf format !")
+    moutfile = Path(output) / outfile
+    create_community_fig(df, feature, target_col, moutfile, test_type,
+                         target_kind=target_kind, iteration=iteration)
+if __name__ == "__main__":
+    create_community_figures()
--- a/src/find_interaction_cluster/community_figures/fig_functions.py
+++ b/src/find_interaction_cluster/community_figures/fig_functions.py