Skip to content
Snippets Groups Projects
Commit f0558847 authored by nfontrod's avatar nfontrod
Browse files

add a parameter norm_bin0

parent 94c02db1
Branches
No related tags found
No related merge requests found
...@@ -16,10 +16,10 @@ from typing import List ...@@ -16,10 +16,10 @@ from typing import List
@lp.parse(design='file', region_bed='file', @lp.parse(design='file', region_bed='file',
nb_bin="nb_bin > 5", figure_type=['metagene', 'barplot'], nb_bin="nb_bin > 5", figure_type=['metagene', 'barplot'],
show_replicate=['y', 'n', 'Y', 'N']) show_replicate=['y', 'n', 'Y', 'N'], norm_bin0=['y', 'n', 'Y', 'N'])
def launcher(design: str, bw_folder: str, region_bed: str, def launcher(design: str, bw_folder: str, region_bed: str,
region_name: str, nb_bin: int = 100, region_name: str, nb_bin: int = 100,
figure_type: str = 'metagene', figure_type: str = 'metagene', norm_bin0: str = 'y',
show_replicate: str = 'y', environment: List[int] = (0, 0), show_replicate: str = 'y', environment: List[int] = (0, 0),
border_names: List[str] = ('', ''), border_names: List[str] = ('', ''),
output: str = '.') -> None: output: str = '.') -> None:
...@@ -37,6 +37,7 @@ def launcher(design: str, bw_folder: str, region_bed: str, ...@@ -37,6 +37,7 @@ def launcher(design: str, bw_folder: str, region_bed: str,
:param nb_bin: The number of bins used to represents the regions of \ :param nb_bin: The number of bins used to represents the regions of \
'region_bed'. 'region_bed'.
:param figure_type: The kind of representation wanted (barplot or metagene) :param figure_type: The kind of representation wanted (barplot or metagene)
:param norm_bin0: True to normalize the figure by the 0bin false else.
:param show_replicate: True to create a figure showing the replicate \ :param show_replicate: True to create a figure showing the replicate \
false else. false else.
:param environment: A list of two int. The first contains the number of \ :param environment: A list of two int. The first contains the number of \
...@@ -51,9 +52,10 @@ def launcher(design: str, bw_folder: str, region_bed: str, ...@@ -51,9 +52,10 @@ def launcher(design: str, bw_folder: str, region_bed: str,
f"be greater than 0 and the first value must be " f"be greater than 0 and the first value must be "
f"greater than the second") f"greater than the second")
show_rep = True if show_replicate.lower() == 'y' else False show_rep = True if show_replicate.lower() == 'y' else False
norm_b0 = True if norm_bin0.lower() == 'y' else False
create_figure(Path(design), Path(bw_folder), Path(region_bed), create_figure(Path(design), Path(bw_folder), Path(region_bed),
region_name, nb_bin, figure_type, show_rep, environment, region_name, nb_bin, figure_type, norm_b0, show_rep,
border_names, Path(output)) environment, border_names, Path(output))
launcher() launcher()
...@@ -215,7 +215,7 @@ def create_df_summary(df_cov: pd.DataFrame, figure_type: str, nb_bin: int, ...@@ -215,7 +215,7 @@ def create_df_summary(df_cov: pd.DataFrame, figure_type: str, nb_bin: int,
def figure_metagene(df_sum: pd.DataFrame, show_replicate: bool, def figure_metagene(df_sum: pd.DataFrame, show_replicate: bool,
border_names: List[str], nb_bin: int, border_names: List[str], nb_bin: int,
environment: List[int], region_name: str, environment: List[int], region_name: str,
output: Path) -> None: output: Path, norm_bin0: bool) -> None:
""" """
Create a metagene figure on the region of interest. Create a metagene figure on the region of interest.
...@@ -228,6 +228,7 @@ def figure_metagene(df_sum: pd.DataFrame, show_replicate: bool, ...@@ -228,6 +228,7 @@ def figure_metagene(df_sum: pd.DataFrame, show_replicate: bool,
the number of bin used to represent those surrounding regions. the number of bin used to represent those surrounding regions.
:param output: Folder where the figure will be created :param output: Folder where the figure will be created
:param region_name: The region of interest :param region_name: The region of interest
:param norm_bin0: True to normalize the figure by the 0bin false else.
""" """
sns.set(context='poster', style='white') sns.set(context='poster', style='white')
if show_replicate: if show_replicate:
...@@ -252,15 +253,19 @@ def figure_metagene(df_sum: pd.DataFrame, show_replicate: bool, ...@@ -252,15 +253,19 @@ def figure_metagene(df_sum: pd.DataFrame, show_replicate: bool,
if environment[0] != 0: if environment[0] != 0:
title += f"\nand in their surrounding regions of {environment[0]} nt" title += f"\nand in their surrounding regions of {environment[0]} nt"
g.fig.suptitle(title) g.fig.suptitle(title)
g.savefig(output / f"metagene_{region_name}_{nb_bin}bin_" \ outfile_title = f"metagene_{region_name}_{nb_bin}bin_" \
f"{environment[0]}_nt-around-{environment[1]}-bin.pdf") f"{environment[0]}_nt-around-{environment[1]}-bin"
if norm_bin0:
outfile_title += "_b0_norm"
outfile_title += ".pdf"
g.savefig(output / outfile_title)
g.fig.clf() g.fig.clf()
def figure_barplot(df_sum: pd.DataFrame, show_replicate: bool, def figure_barplot(df_sum: pd.DataFrame, show_replicate: bool,
nb_bin: int, nb_bin: int,
environment: List[int], region_name: str, environment: List[int], region_name: str,
output: Path) -> None: output: Path, norm_bin0: bool) -> None:
""" """
Create a barplot figure on the region of interest. Create a barplot figure on the region of interest.
...@@ -272,6 +277,7 @@ def figure_barplot(df_sum: pd.DataFrame, show_replicate: bool, ...@@ -272,6 +277,7 @@ def figure_barplot(df_sum: pd.DataFrame, show_replicate: bool,
the number of bin used to represent those surrounding regions. the number of bin used to represent those surrounding regions.
:param output: Folder where the figure will be created :param output: Folder where the figure will be created
:param region_name: The region of interest :param region_name: The region of interest
:param norm_bin0: True to normalize the figure by the 0bin false else.
""" """
sns.set(context='poster', style='white') sns.set(context='poster', style='white')
if show_replicate: if show_replicate:
...@@ -287,14 +293,35 @@ def figure_barplot(df_sum: pd.DataFrame, show_replicate: bool, ...@@ -287,14 +293,35 @@ def figure_barplot(df_sum: pd.DataFrame, show_replicate: bool,
plt.subplots_adjust(top=0.9) plt.subplots_adjust(top=0.9)
title = f"Average coverage in region '{region_name}'" title = f"Average coverage in region '{region_name}'"
g.fig.suptitle(title) g.fig.suptitle(title)
g.savefig(output / f"barplot_{region_name}_{nb_bin}bin_" \ outfile_title = f"barplot_{region_name}_{nb_bin}bin_" \
f"{environment[0]}_nt-around-{environment[1]}-bin.pdf") f"{environment[0]}_nt-around-{environment[1]}-bin"
if norm_bin0:
outfile_title += "_b0_norm"
outfile_title += ".pdf"
g.savefig(output / outfile_title)
g.fig.clf() g.fig.clf()
def bin0_normalisation(df: pd.DataFrame) -> pd.DataFrame:
"""
Normalise the bins coverage by the average overage on bin 0.
:param df: he dataframe of coverage
:return: the dataframe with normalised coverage
"""
df_val = df.loc[df['bin'] == 0,
['coverage', 'condition', 'replicate']]\
.groupby(['condition', 'replicate']).mean().reset_index()
df_val.rename({"coverage": "coef"}, axis=1, inplace=True)
df = df.merge(df_val, how="left", on=['condition', 'replicate'])
df['coverage'] = df['coverage'] / df['coef']
df.drop('coef', axis=1, inplace=True)
return df
def create_figure(design: Path, bw_folder: Path, region_bed: Path, def create_figure(design: Path, bw_folder: Path, region_bed: Path,
region_name: str, nb_bin: int = 100, region_name: str, nb_bin: int = 100,
figure_type: str = 'metagene', figure_type: str = 'metagene', norm_bin0: bool = False,
show_replicate: bool = True, environment: List[int] = (0, 0), show_replicate: bool = True, environment: List[int] = (0, 0),
border_names: List[str] = ('', ''), border_names: List[str] = ('', ''),
output: Path = Path('.')) -> None: output: Path = Path('.')) -> None:
...@@ -312,6 +339,7 @@ def create_figure(design: Path, bw_folder: Path, region_bed: Path, ...@@ -312,6 +339,7 @@ def create_figure(design: Path, bw_folder: Path, region_bed: Path,
:param nb_bin: The number of bins used to represents the regions of \ :param nb_bin: The number of bins used to represents the regions of \
'region_bed'. 'region_bed'.
:param figure_type: The kind of representation wanted (barplot or metagene) :param figure_type: The kind of representation wanted (barplot or metagene)
:param norm_bin0: True to normalize the figure by the 0bin false else.
:param show_replicate: True to create a figure showing the replicate \ :param show_replicate: True to create a figure showing the replicate \
false else. false else.
:param environment: A list of two int. The first contains the number of \ :param environment: A list of two int. The first contains the number of \
...@@ -324,13 +352,18 @@ def create_figure(design: Path, bw_folder: Path, region_bed: Path, ...@@ -324,13 +352,18 @@ def create_figure(design: Path, bw_folder: Path, region_bed: Path,
regions = load_bed(region_bed) regions = load_bed(region_bed)
region_bed_name = region_bed.name.replace('.bed', '') region_bed_name = region_bed.name.replace('.bed', '')
outfile = f'tmp_cov_table_{region_bed_name}_{nb_bin}bin_' \ outfile = f'tmp_cov_table_{region_bed_name}_{nb_bin}bin_' \
f'{environment[0]}_nt-around-{environment[1]}-bin.txt.gz' f'{environment[0]}_nt-around-{environment[1]}-bin'
if norm_bin0:
outfile += '_bin0_norm'
outfile += '.txt.gz'
cov_file = output / outfile cov_file = output / outfile
if cov_file.is_file(): if cov_file.is_file():
df_cov = pd.read_csv(cov_file, sep="\t", compression='gzip') df_cov = pd.read_csv(cov_file, sep="\t", compression='gzip')
else: else:
df_cov = create_full_table(df_exp, regions, nb_bin, environment, df_cov = create_full_table(df_exp, regions, nb_bin, environment,
bw_folder) bw_folder)
if norm_bin0:
df_cov = bin0_normalisation(df_cov)
df_cov.to_csv(cov_file, sep="\t", index=False, compression='gzip') df_cov.to_csv(cov_file, sep="\t", index=False, compression='gzip')
ordered_condition = [] ordered_condition = []
for condition in df_exp['condition'].to_list(): for condition in df_exp['condition'].to_list():
...@@ -340,16 +373,16 @@ def create_figure(design: Path, bw_folder: Path, region_bed: Path, ...@@ -340,16 +373,16 @@ def create_figure(design: Path, bw_folder: Path, region_bed: Path,
region_name, ordered_condition) region_name, ordered_condition)
if figure_type == "metagene": if figure_type == "metagene":
figure_metagene(df_sum, show_replicate, border_names, nb_bin, figure_metagene(df_sum, show_replicate, border_names, nb_bin,
environment, region_name, output) environment, region_name, output, norm_bin0)
else: else:
if 'location' in df_sum.columns: if 'location' in df_sum.columns:
for cur_region in df_sum['location'].unique(): for cur_region in df_sum['location'].unique():
df_tmp = df_sum.loc[df_sum['location'] == cur_region, :] df_tmp = df_sum.loc[df_sum['location'] == cur_region, :]
figure_barplot(df_tmp, show_replicate, nb_bin, environment, figure_barplot(df_tmp, show_replicate, nb_bin, environment,
cur_region, output) cur_region, output, norm_bin0)
else: else:
figure_barplot(df_sum, show_replicate, nb_bin, environment, figure_barplot(df_sum, show_replicate, nb_bin, environment,
region_name, output) region_name, output, norm_bin0)
if __name__ == "__main__": if __name__ == "__main__":
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment