Skip to content
Snippets Groups Projects
Commit 6f00da86 authored by nfontrod's avatar nfontrod
Browse files

src/find_interaction_cluster/clip_figures/clip_launcher_4_many_communities.py:...

src/find_interaction_cluster/clip_figures/clip_launcher_4_many_communities.py: create for each clip the community figures obtained using many community files
parent 6a6f734d
Branches
No related tags found
No related merge requests found
#!/usr/bin/env python3
# -*- coding: UTF-8 -*-
"""
Description: Launch Clip analysis for many communities
"""
import logging
from typing import Tuple, Dict
from pathlib import Path
from .config import ConfigClip
from .clip_analyser import create_table, \
add_regulation_column, create_community_fig, find_or_create_community, \
ConfigGraph, logging_def
import multiprocessing as mp
from itertools import product
import lazyparser as lp
import numpy as np
import subprocess as sp
def select_community_file(project: str, weight: int, global_weight: int,
same_gene: bool, inflation: float, cell_line: str,
feature: str, clip_file: Path,
community_file: str = "") -> Tuple[Path, Path]:
"""
Return the community file and output folder that will be used.
:param project: The name of the project of interest
:param weight: The minimum weight of interaction to consider
:param global_weight: The global weight to consider. if \
the global weight is equal to 0 then then density figure are calculated \
by project, else all projet are merge together and the interaction \
seen in `global_weight` project are taken into account
:param same_gene: Say if we consider as co-localised, exons within the \
same gene (True) or not (False) (default False)
:param inflation: The inflation parameter
:param cell_line: Interactions are only selected from projects made on \
a specific cell line (ALL to disable this filter). (default ALL)
:param feature: The feature we want to analyse (default 'exon')
:param clip_file: A bed file containing clip
:param community_file: A file containing custom communities. If \
it equals to '' then weight, global weight and same genes parameter are \
used to find the community files computed with ChIA-PET data.
:return: The community file used and the output folder used.
"""
if community_file == "":
com_file = find_or_create_community(project, weight, global_weight,
same_gene, inflation, cell_line,
feature)
else:
com_file = Path(community_file)
if not com_file.is_file():
raise FileNotFoundError(f"File {com_file} was not found !")
outname = clip_file.name.split('.')[0]
output = ConfigClip.output_folder / \
f"multiTAD_CLIP_community_figures-{feature}-{cell_line}" / outname
return com_file, output
def create_figure(p: Dict , clip_file: Path,
feature_bed: Path,
community_file: Tuple[str, str],
test_type: str = "permutation",
iteration: int = 10000, display_size: bool = False,
sl_reg: bool = False) -> Path:
"""
Create the final figure
:param p: A dictionary containing parameter used to compute \
HipMCL communities
:param clip_file: A bed file containing clip
:param feature_bed: A bed files containing exons or genes depending on \
feature parameter.
:param test_type: The king of test to perform for frequency analysis. \
(default 'lm') (choose from 'lm', 'permutation')
:param iteration: The number of iteration to make
:param community_file: A Tuple containing a file containing custom \
communities. If it equals to '' then weight, global weight and \
same genes parameter are used to find the community files computed \
with ChIA-PET data. The second item of the tuple is it's name.
:param display_size: True to display the size of the community. \
False to display nothing. (default False)
:param sl_reg: True to display the FaRLine regulation of the \
same factor, False to not display it.
:return: Folder containing he figures
"""
logging.info(f"Working on {clip_file} - {community_file[0]} - "
f"{community_file[1]}")
com_file, output = select_community_file(p["project"], p["weight"],
p["global_weight"],
p["same_gene"], p["inflation"],
p["cell_line"],
p["feature"], clip_file,
community_file[0])
output.mkdir(exist_ok=True, parents=True)
outfile = output / f"{community_file[1]}.tmp.pdf"
final_table = create_table(p["feature"], clip_file, feature_bed, com_file)
if sl_reg:
final_table = add_regulation_column(final_table,
clip_file.name.split("_")[0],
p["feature"])
create_community_fig(final_table, p["feature"], "peak_density", outfile,
test_type, iteration=iteration,
display_size=display_size)
return outfile.parent
def merge_figures(folder: Path) -> None:
"""
Merge the figures together using imageMagick
:param folder: A folder containing pdf files
"""
fig_name = folder.name
cmd = f"montage -geometry +1+1 -tile 1X6 " \
f"-compress jpeg -density 100 " \
f"{folder}/*.tmp.pdf {folder}/{fig_name}.pdf"
sp.check_call(cmd, shell=True)
@lp.parse(test_type=["permutation", "lm"], feature=["gene", "exon"])
def clip_folder_analysis(clip_folder: str, weight: int,
global_weight: int, same_gene: bool = True,
project: str = "GSM1018963_GSM1018964",
inflation: float = 1.5,
cell_line: str = "ALL",
feature: str = "exon",
test_type: str = "permutation",
iteration: int = 10000, display_size: bool=False,
sl_reg: bool = False) -> None:
"""
Create the final figure
:param project: The name of the project of interest
:param weight: The minimum weight of interaction to consider
:param global_weight: The global weight to consider. if \
the global weight is equal to 0 then then density figure are calculated \
by project, else all projet are merge together and the interaction \
seen in `global_weight` project are taken into account
:param same_gene: Say if we consider as co-localised, exons within the \
same gene (True) or not (False) (default False)
:param inflation: The inflation parameter
:param cell_line: Interactions are only selected from projects made on \
a specific cell line (ALL to disable this filter). (default ALL)
:param feature: The feature we want to analyse (default 'exon')
:param clip_folder: A folder containing clip file
:param test_type: The king of test to perform for frequency analysis. \
(default 'lm') (choose from 'lm', 'permutation')
:param iteration: The number of iteration to make
:param display_size: True to display the size of the community. \
False to display nothing. (default False)
:param sl_reg: True to display the FaRLine regulation of the \
same factor, False to not display it.
:param ps: The number of processes to create (default 1)
"""
logging_def(ConfigGraph.community_folder, __file__, "INFO")
clip_folder = Path(clip_folder)
feature_bed = ConfigClip.bed_gene if feature == "gene" \
else ConfigClip.bed_exon
files = list(clip_folder.glob("*.bed")) + \
list(clip_folder.glob("*.bed.gz"))
files = [files[0]]
processes = []
p = {"project": project, "weight": weight, "global_weight": global_weight,
"same_gene": same_gene, "inflation": inflation,
"cell_line": cell_line, "feature": feature}
prod = list(product(files, zip(ConfigClip.communities,
ConfigClip.communities_name)))
pool = mp.Pool(processes=min(len(prod), ConfigGraph.cpu))
for mfile, community_file in prod:
if community_file[0] == "":
tmp = \
f"HIPMCL_g{global_weight}_w{weight}_{inflation}"
community_file = (community_file[0], tmp)
args = [p, mfile, feature_bed, community_file, test_type, iteration,
display_size, sl_reg]
processes.append(pool.apply_async(create_figure, args))
list_path = [str(p.get(timeout=None)) for p in processes]
list_path = np.unique(list_path)
for my_folder in list_path:
merge_figures(Path(my_folder))
if __name__ == "__main__":
clip_folder_analysis()
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment