Skip to content
Snippets Groups Projects
Commit 8344a444 authored by nfontrod's avatar nfontrod
Browse files

src/visu/figure_maker.py src/visu/__main__.py: bin0_norm -> norm parameter...

 src/visu/figure_maker.py src/visu/__main__.py: bin0_norm -> norm parameter that can either be a file or a bin number or None
parent f9647cba
No related branches found
No related tags found
No related merge requests found
......@@ -7,7 +7,6 @@ Description: Create a figure showing the ChIP-Seq coverage of particular \
gene regions from ChIP-seq experiment.
"""
from .figure_maker import create_figure
import lazyparser as lp
from pathlib import Path
......@@ -16,10 +15,10 @@ from typing import List
@lp.parse(design='file', region_bed='file',
nb_bin="nb_bin > 5", figure_type=['metagene', 'barplot'],
show_replicate=['y', 'n', 'Y', 'N'], norm_bin0=['y', 'n', 'Y', 'N'])
show_replicate=['y', 'n', 'Y', 'N'])
def launcher(design: str, bw_folder: str, region_bed: str,
region_name: str, nb_bin: int = 100,
figure_type: str = 'metagene', norm_bin0: str = 'y',
figure_type: str = 'metagene', norm: str = 'None',
show_replicate: str = 'y', environment: List[int] = (0, 0),
border_names: List[str] = ('', ''),
output: str = '.') -> None:
......@@ -37,7 +36,9 @@ def launcher(design: str, bw_folder: str, region_bed: str,
:param nb_bin: The number of bins used to represents the regions of \
'region_bed'.
:param figure_type: The kind of representation wanted (barplot or metagene)
:param norm_bin0: True to normalize the figure by the 0bin false else.
:param norm_bin0: A number corresponding to a bin, a file with \
the normalisation value to apply for each replicate. 'None' for no \
normalisation (default 'None')
:param show_replicate: True to create a figure showing the replicate \
false else.
:param environment: A list of two int. The first contains the number of \
......@@ -47,14 +48,18 @@ def launcher(design: str, bw_folder: str, region_bed: str,
:param output: Folder where the results will be created
"""
if environment[0] < 0 or environment[1] < 0 or \
environment[0] < environment[1]:
environment[0] < environment[1]:
raise ValueError(f"The two values given with --environment must "
f"be greater than 0 and the first value must be "
f"greater than the second")
show_rep = True if show_replicate.lower() == 'y' else False
norm_b0 = True if norm_bin0.lower() == 'y' else False
norm = int(norm) if norm.isdigit() else None if norm == 'None' else norm
if isinstance(norm, str):
norm = Path(norm)
if not norm.is_file():
raise FileNotFoundError(f"The file {norm} was not found")
create_figure(Path(design), Path(bw_folder), Path(region_bed),
region_name, nb_bin, figure_type, norm_b0, show_rep,
region_name, nb_bin, figure_type, norm, show_rep,
environment, border_names, Path(output))
......
......@@ -214,7 +214,7 @@ def create_df_summary(df_cov: pd.DataFrame, figure_type: str, nb_bin: int,
def figure_metagene(df_sum: pd.DataFrame, show_replicate: bool,
border_names: List[str], nb_bin: int,
environment: List[int], region_name: str,
output: Path, norm_bin0: bool) -> None:
output: Path, norm: Union[int, Path]) -> None:
"""
Create a metagene figure on the region of interest.
......@@ -227,7 +227,9 @@ def figure_metagene(df_sum: pd.DataFrame, show_replicate: bool,
the number of bin used to represent those surrounding regions.
:param output: Folder where the figure will be created
:param region_name: The region of interest
:param norm_bin0: True to normalize the figure by the 0bin false else.
:param norm: an integer corresponding to the bin used to normalise \
the samples or a file containing the normalisations to apply to \
each samples
"""
sns.set(context='poster', style='white')
if show_replicate:
......@@ -254,8 +256,10 @@ def figure_metagene(df_sum: pd.DataFrame, show_replicate: bool,
g.fig.suptitle(title)
outfile_title = f"metagene_{region_name}_{nb_bin}bin_" \
f"{environment[0]}_nt-around-{environment[1]}-bin"
if norm_bin0:
outfile_title += "_b0_norm"
if isinstance(norm, int):
outfile_title += f"_b{norm}_norm"
elif isinstance(norm, Path):
outfile_title += f"_file_norm"
outfile_title += ".pdf"
g.savefig(output / outfile_title)
g.fig.clf()
......@@ -264,7 +268,7 @@ def figure_metagene(df_sum: pd.DataFrame, show_replicate: bool,
def figure_barplot(df_sum: pd.DataFrame, show_replicate: bool,
nb_bin: int,
environment: List[int], region_name: str,
output: Path, norm_bin0: bool) -> None:
output: Path, norm: Union[int, Path]) -> None:
"""
Create a barplot figure on the region of interest.
......@@ -276,7 +280,9 @@ def figure_barplot(df_sum: pd.DataFrame, show_replicate: bool,
the number of bin used to represent those surrounding regions.
:param output: Folder where the figure will be created
:param region_name: The region of interest
:param norm_bin0: True to normalize the figure by the 0bin false else.
:param norm: an integer corresponding to the bin used to normalise \
the samples or a file containing the normalisations to apply to \
each samples
"""
sns.set(context='poster', style='white')
if show_replicate:
......@@ -294,24 +300,42 @@ def figure_barplot(df_sum: pd.DataFrame, show_replicate: bool,
g.fig.suptitle(title)
outfile_title = f"barplot_{region_name}_{nb_bin}bin_" \
f"{environment[0]}_nt-around-{environment[1]}-bin"
if norm_bin0:
outfile_title += "_b0_norm"
if isinstance(norm, int):
outfile_title += f"_b{norm}_norm"
elif isinstance(norm, Path):
outfile_title += f"_file_norm"
outfile_title += ".pdf"
g.savefig(output / outfile_title)
g.fig.clf()
def bin0_normalisation(df: pd.DataFrame) -> pd.DataFrame:
def bin_normalisation(df: pd.DataFrame, norm: Union[int, Path],
outfile: Path) -> pd.DataFrame:
"""
Normalise the bins coverage by the average overage on bin 0.
Normalise the bins coverage by the average coverage on a particular bin \
or by a value given in a particular file.
:param df: he dataframe of coverage
:param norm: The bin used to normalise the sample or a file containing \
the value used to normalise the samples.
:param outfile: The table containing coverage values
:return: the dataframe with normalised coverage
"""
df_val = df.loc[df['bin'] == 0,
['coverage', 'condition', 'replicate']] \
.groupby(['condition', 'replicate']).mean().reset_index()
df_val.rename({"coverage": "coef"}, axis=1, inplace=True)
if isinstance(norm, int):
if norm not in list(df['bin'].unique()):
raise ValueError(f"the bin {norm} was not found in the coverage "
f"dataframe.")
df_val = df.loc[df['bin'] == norm,
['coverage', 'condition', 'replicate']] \
.groupby(['condition', 'replicate']).mean().reset_index()
df_val.rename({"coverage": "coef"}, axis=1, inplace=True)
noutfile = outfile.parent / 'coef_table' / \
(outfile.name.replace(".txt.gz", "") +
f".txt")
noutfile.parent.mkdir(exist_ok=True, parents=True)
df_val.to_csv(noutfile, sep="\t", index=False)
else:
df_val = pd.read_csv(norm, sep="\t")
df = df.merge(df_val, how="left", on=['condition', 'replicate'])
df['coverage'] = df['coverage'] / df['coef']
df.drop('coef', axis=1, inplace=True)
......@@ -320,7 +344,8 @@ def bin0_normalisation(df: pd.DataFrame) -> pd.DataFrame:
def create_figure(design: Path, bw_folder: Path, region_bed: Path,
region_name: str, nb_bin: int = 100,
figure_type: str = 'metagene', norm_bin0: bool = False,
figure_type: str = 'metagene',
norm: Union[int, Path, None] = None,
show_replicate: bool = True, environment: List[int] = (0, 0),
border_names: List[str] = ('', ''),
output: Path = Path('.')) -> None:
......@@ -338,7 +363,9 @@ def create_figure(design: Path, bw_folder: Path, region_bed: Path,
:param nb_bin: The number of bins used to represents the regions of \
'region_bed'.
:param figure_type: The kind of representation wanted (barplot or metagene)
:param norm_bin0: True to normalize the figure by the 0bin false else.
:param norm: an integer corresponding to the bin used to normalise \
the samples or a file containing the normalisations to apply to \
each samples
:param show_replicate: True to create a figure showing the replicate \
false else.
:param environment: A list of two int. The first contains the number of \
......@@ -352,8 +379,10 @@ def create_figure(design: Path, bw_folder: Path, region_bed: Path,
region_bed_name = region_bed.name.replace('.bed', '')
outfile = f'tmp_cov_table_{design.name}_{region_bed_name}_{nb_bin}bin_' \
f'{environment[0]}_nt-around-{environment[1]}-bin'
if norm_bin0:
outfile += '_bin0_norm'
if isinstance(norm, int):
outfile += f'_bin{norm}_norm'
elif isinstance(norm, Path):
outfile += f'_file_norm'
outfile += '.txt.gz'
cov_file = output / outfile
if cov_file.is_file():
......@@ -361,8 +390,8 @@ def create_figure(design: Path, bw_folder: Path, region_bed: Path,
else:
df_cov = create_full_table(df_exp, regions, nb_bin, environment,
bw_folder)
if norm_bin0:
df_cov = bin0_normalisation(df_cov)
if norm is not None:
df_cov = bin_normalisation(df_cov, norm, cov_file)
df_cov.to_csv(cov_file, sep="\t", index=False, compression='gzip')
ordered_condition = []
for condition in df_exp['condition'].to_list():
......@@ -372,16 +401,16 @@ def create_figure(design: Path, bw_folder: Path, region_bed: Path,
region_name, ordered_condition)
if figure_type == "metagene":
figure_metagene(df_sum, show_replicate, border_names, nb_bin,
environment, region_name, output, norm_bin0)
environment, region_name, output, norm)
else:
if 'location' in df_sum.columns:
for cur_region in df_sum['location'].unique():
df_tmp = df_sum.loc[df_sum['location'] == cur_region, :]
figure_barplot(df_tmp, show_replicate, nb_bin, environment,
cur_region, output, norm_bin0)
cur_region, output, norm)
else:
figure_barplot(df_sum, show_replicate, nb_bin, environment,
region_name, output, norm_bin0)
region_name, output, norm)
if __name__ == "__main__":
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment