From 387d5650cd6ec559583b392fc7e52f3c4bdafa86 Mon Sep 17 00:00:00 2001
From: Fontrodona Nicolas <nicolas.fontrodona@ens-lyon.fr>
Date: Fri, 5 Feb 2021 15:28:57 +0100
Subject: [PATCH] src/find_interaction_cluster/clip_figures/clip_analyser.py:
add a parameter cell_line
---
.../clip_figures/clip_analyser.py | 37 +++++++++++++------
1 file changed, 25 insertions(+), 12 deletions(-)
diff --git a/src/find_interaction_cluster/clip_figures/clip_analyser.py b/src/find_interaction_cluster/clip_figures/clip_analyser.py
index eb2c8aae..230f64b9 100644
--- a/src/find_interaction_cluster/clip_figures/clip_analyser.py
+++ b/src/find_interaction_cluster/clip_figures/clip_analyser.py
@@ -60,7 +60,7 @@ def bedtools_intersect(gene_bed: Path, clip_bed: Path,
def find_or_create_community(project: str, weight: int, global_weight: int,
- same_gene: bool, inflation: float,
+ same_gene: bool, inflation: float, cell_line: str,
feature: str) -> Path:
"""
Find the community file of interest, or if it doesn't exist, create it.
@@ -74,10 +74,12 @@ def find_or_create_community(project: str, weight: int, global_weight: int,
:param same_gene: Say if we consider as co-localised, exons within the \
same gene (True) or not (False) (default False)
:param inflation: The inflation parameter.
+ :param cell_line: Interactions are only selected from projects made on \
+ a specific cell line (ALL to disable this filter). (default ALL)
:param feature: The feature we want to analyse (default 'exon')
:return: The file containing community
- >>> res = find_or_create_community("", 3, 3, True, 1.5, "gene")
+ >>> res = find_or_create_community("", 3, 3, True, 1.5, "ALL", "gene")
>>> res_path = str(res.relative_to(Path(__file__).parents[3]))
>>> res_path.endswith("global-weight-3_weight-3_same_gene-True_gene.txt")
True
@@ -87,12 +89,13 @@ def find_or_create_community(project: str, weight: int, global_weight: int,
True
"""
cfile = ConfigGraph.get_community_file(project, weight, global_weight,
- same_gene, inflation, feature,
- ".txt")
+ same_gene, inflation, cell_line,
+ feature, ".txt")
if not cfile.is_file():
project = get_projects(global_weight, project)
multiple_community_launcher(weight, global_weight, project, same_gene,
- inflation, False, feature)
+ inflation, cell_line, False,
+ feature=feature)
if not cfile.is_file():
raise FileNotFoundError(f"The file {cfile} could not be created !")
return cfile
@@ -206,7 +209,8 @@ def create_table(feature: str, clip_file: Path,
def select_community_file(project: str, weight: int, global_weight: int,
- same_gene: bool, inflation: float, feature: str,
+ same_gene: bool, inflation: float, cell_line: str,
+ feature: str,
community_file: str = "") -> Tuple[Path, Path]:
"""
Return the community file and output folder that will be used.
@@ -220,6 +224,8 @@ def select_community_file(project: str, weight: int, global_weight: int,
:param same_gene: Say if we consider as co-localised, exons within the \
same gene (True) or not (False) (default False)
:param inflation: The inflation parameter
+ :param cell_line: Interactions are only selected from projects made on \
+ a specific cell line (ALL to disable this filter). (default ALL)
:param feature: The feature we want to analyse (default 'exon')
:param community_file: A file containing custom communities. If \
it equals to '' then weight, global weight and same genes parameter are \
@@ -229,7 +235,8 @@ def select_community_file(project: str, weight: int, global_weight: int,
"""
if community_file == "":
com_file = find_or_create_community(project, weight, global_weight,
- same_gene, inflation, feature)
+ same_gene, inflation, cell_line,
+ feature)
output = com_file.parent / f"CLIP_community_figures_{feature}"
else:
com_file = Path(community_file)
@@ -301,8 +308,8 @@ def add_regulation_column(df_table: pd.DataFrame, sf_name: str, feature: str,
def create_figure(project: str, weight: int, global_weight: int,
- same_gene: bool, inflation: float, feature: str,
- clip_file: Path,
+ same_gene: bool, inflation: float, cell_line: str,
+ feature: str, clip_file: Path,
feature_bed: Path, test_type: str = "permutation",
iteration: int = 10000, display_size: bool = False,
community_file: str = "", sl_reg: bool = False) -> None:
@@ -317,6 +324,8 @@ def create_figure(project: str, weight: int, global_weight: int,
:param same_gene: Say if we consider as co-localised, exons within the \
same gene (True) or not (False) (default False)
:param inflation: The inflation parameter
+ :param cell_line: Interactions are only selected from projects made on \
+ a specific cell line (ALL to disable this filter). (default ALL)
:param feature: The feature we want to analyse (default 'exon')
:param clip_file: A bed file containing clip
:param feature_bed: A bed files containing exons or genes depending on \
@@ -334,8 +343,8 @@ def create_figure(project: str, weight: int, global_weight: int,
"""
logging.info(f"Working on {clip_file}")
com_file, output = select_community_file(project, weight, global_weight,
- same_gene, inflation, feature,
- community_file)
+ same_gene, inflation, cell_line,
+ feature, community_file)
output.mkdir(exist_ok=True, parents=True)
outfile = output / f"{clip_file.name.split('.')[0]}.pdf"
final_table = create_table(feature, clip_file, feature_bed, com_file)
@@ -350,6 +359,7 @@ def create_figure(project: str, weight: int, global_weight: int,
def clip_folder_analysis(clip_folder: Path, project: str, weight: int,
global_weight: int, same_gene: bool, inflation: float,
+ cell_line: str,
feature: str, test_type: str = "permutation",
iteration: int = 10000, display_size: bool=False,
community_file: str = "", sl_reg: bool = False,
@@ -365,6 +375,8 @@ def clip_folder_analysis(clip_folder: Path, project: str, weight: int,
:param same_gene: Say if we consider as co-localised, exons within the \
same gene (True) or not (False) (default False)
:param inflation: The inflation parameter
+ :param cell_line: Interactions are only selected from projects made on \
+ a specific cell line (ALL to disable this filter). (default ALL)
:param feature: The feature we want to analyse (default 'exon')
:param clip_folder: A folder containing clip file
:param test_type: The king of test to perform for frequency analysis. \
@@ -388,7 +400,8 @@ def clip_folder_analysis(clip_folder: Path, project: str, weight: int,
pool = mp.Pool(processes=min(len(files), ps))
processes = []
for mfile in files:
- args = [project, weight, global_weight, same_gene, inflation, feature,
+ args = [project, weight, global_weight, same_gene, inflation,
+ cell_line, feature,
mfile, feature_bed, test_type, iteration, display_size,
community_file, sl_reg]
processes.append(pool.apply_async(create_figure, args))
--
GitLab