Skip to content
Snippets Groups Projects
Commit a6614856 authored by nfontrod's avatar nfontrod
Browse files

initial commit

parents
No related branches found
No related tags found
No related merge requests found
Showing with 875 additions and 0 deletions
.idea/*
src/visu/__pycache__/*.pyc
src/bed_handler/__pycache__/*.pyc
*
!.gitignore
*
!.gitignore
#!/usr/bin/env python3
# -*- coding: UTF-8 -*-
"""
Description:
"""
#!/usr/bin/env python3
# -*- coding: UTF-8 -*-
"""
Description: Create the bed file that will be used for the bigwig \
visualisation
"""
from .filter_gene import create_filtered_bed
from .get_gene_locations import create_region_bed
def launcher():
"""
Create the necessary bed file to visualise bigwig file
"""
create_filtered_bed()
create_region_bed()
launcher()
#!/usr/bin/env python3
# -*- coding: UTF-8 -*-
"""
Description: A class containing all the variables used in this submodule
"""
from pathlib import Path
class OutputBed:
"""
A class containing the location of output bed files
"""
output = Path(__file__).parents[2] / "results" / "bed_file"
filtered_gene = output / "filtered_gene.bed"
body_gene = output / "body_gene.bed"
tss_gene = output / "tss_gene.bed"
tts_gene = output / "tts_gene.bed"
after_gene = output / "after_gene.bed"
class BedConfig:
"""
A class containing all the variables used in this submodule
"""
base = Path(__file__).parents[2]
ddx_genes = base / "data" / "DDX5_17_genes.txt"
gene_bed = base / "data" / "bed" / "gene.bed"
exon_bed = base / "data" / "bed" / "exon.bed"
bed = OutputBed
size = 5000
class TestConfig:
"""Contains variable used in docstring tests"""
base = Path(__file__).parents[2] / "tests" / "files"
list_genes = base / "list_genes.txt"
gene_bed = base / "genes.bed"
exon_bed = base / "exons.bed"
small_bw = base / "small.bw"
\ No newline at end of file
#!/usr/bin/env python3
# -*- coding: UTF-8 -*-
"""
Description: Contains functions to filter the gene of interest in a bed file.
"""
import pandas as pd
from pathlib import Path
from doctest import testmod
from .config import BedConfig, TestConfig
from typing import List
def select_gene_of_interest(gene_file: Path) -> List[int]:
"""
Get the fasterDb gene id located in tge file `gene_file`.
:param gene_file: A file containing a list of gene of interest
:return: The list of gene of interest
>>> select_gene_of_interest(TestConfig.list_genes)
[73, 75, 89, 123, 128]
"""
with gene_file.open('r') as infile:
gene_list = infile.read().splitlines()
return [int(gene_id) for gene_id in gene_list if gene_list]
def filter_bed(bed_file: Path, gene_list: List[int]) -> pd.DataFrame:
"""
load a bed containing FasterDB gene and only recover the gene of \
interest within it.
:param bed_file: A bed file containing genes
:param gene_list: a list of gene of interest
:return: The bed file bed containing only genes located in gene_list
>>> filter_bed(TestConfig.gene_bed, [1, 5, 9])
#ref start end id score strand
0 18 28645943 28682388 1 DSC2 -
4 13 45766989 45775176 5 KCTD4 -
8 13 45967450 45992516 9 SLC25A30 -
"""
df = pd.read_csv(bed_file, sep="\t")
return df[df["id"].isin(gene_list)]
def create_filtered_bed() -> None:
"""
Create a bed file containing only the genes of interest.
"""
gene_list = select_gene_of_interest(BedConfig.ddx_genes)
df = filter_bed(BedConfig.gene_bed, gene_list)
BedConfig.bed.output.mkdir(exist_ok=True, parents=True)
df.to_csv(BedConfig.bed.filtered_gene, sep="\t", index=False)
if __name__ == "__main__":
testmod()
#!/usr/bin/env python3
# -*- coding: UTF-8 -*-
"""
Description: Create a bed file containing the gene locations of interest.
"""
from pathlib import Path
from typing import Dict, List
from .config import BedConfig, TestConfig
from doctest import testmod
def load_exon_bed(bed_exon: Path) -> Dict:
"""
Load a bed file containing exons.
:param bed_exon: A bed of exons
:return: A dictionary linking each gene to it's exons
>>> d = load_exon_bed(TestConfig.exon_bed)[1]
>>> d[1]
['18', '28681865', '28682388', '1_1', '0', '-']
>>> d[2]
['18', '28681183', '28681432', '1_2', '0', '-']
"""
dic = {}
with bed_exon.open('r') as inbed:
for line in inbed:
if not line.startswith("#"):
cline = line.replace("\n", "").split("\t")
gene, pos = map(int, cline[3].split("_"))
if gene not in dic:
dic[gene] = {pos: cline}
else:
dic[gene][pos] = cline
return dic
def get_gene_body(gene: List[str], exons: Dict) -> List:
"""
Get the gene body of the gene `gene`.
:param gene: A gene
:param exons: A dictionary of the exons inside that gene
:return: The gene body of the gene
>>> e = {1: ['5', '100', '110', '1_1', 'Test', '+'],
... 2: ['5', '130', '140', '1_2', 'Test', '+'],
... 3: ['5', '160', '200', '1_3', 'Test', '+']}
>>> get_gene_body(['5', '100', '200', '1', 'Test', '+'], e)
['5', '130', '140', '1', 'Test', '+']
>>> e = {1: ['5', '100', '110', '1_1', 'Test', '+'],
... 2: ['5', '130', '140', '1_2', 'Test', '+'],
... 3: ['5', '160', '170', '1_3', 'Test', '+'],
... 4: ['5', '190', '200', '1_3', 'Test', '+']}
>>> get_gene_body(['5', '100', '200', '1', 'Test', '+'], e)
['5', '130', '170', '1', 'Test', '+']
>>> e = {4: ['5', '100', '110', '1_4', 'Test', '-'],
... 3: ['5', '130', '140', '1_3', 'Test', '-'],
... 2: ['5', '160', '170', '1_2', 'Test', '-'],
... 1: ['5', '190', '200', '1_1', 'Test', '-']}
>>> get_gene_body(['5', '100', '200', '1', 'Test', '-'], e)
['5', '130', '170', '1', 'Test', '-']
"""
exon_positions = sorted(list(exons.keys()))
if gene[5] == "+":
gene[1] = exons[exon_positions[1]][1]
gene[2] = exons[exon_positions[-2]][2]
else:
gene[1] = exons[exon_positions[-2]][1]
gene[2] = exons[exon_positions[1]][2]
return gene
def get_gene_tss(gene: List[str], exons: Dict) -> List:
"""
Get the gene tss of the gene `gene`.
:param gene: A gene
:param exons: A dictionary of the exons inside that gene
:return: The gene tss of the gene
>>> e = {1: ['5', '100', '110', '1_1', 'Test', '+'],
... 2: ['5', '130', '140', '1_2', 'Test', '+'],
... 3: ['5', '160', '170', '1_3', 'Test', '+'],
... 4: ['5', '190', '200', '1_3', 'Test', '+']}
>>> get_gene_tss(['5', '100', '200', '1', 'Test', '+'], e)
['5', '100', '130', '1', 'Test', '+']
>>> e = {4: ['5', '100', '110', '1_4', 'Test', '-'],
... 3: ['5', '130', '140', '1_3', 'Test', '-'],
... 2: ['5', '160', '170', '1_2', 'Test', '-'],
... 1: ['5', '190', '200', '1_1', 'Test', '-']}
>>> get_gene_tss(['5', '100', '200', '1', 'Test', '-'], e)
['5', '170', '200', '1', 'Test', '-']
"""
exon_positions = sorted(list(exons.keys()))
if gene[5] == "+":
gene[2] = exons[exon_positions[1]][1]
else:
gene[1] = exons[exon_positions[1]][2]
return gene
def get_gene_tts(gene: List[str], exons: Dict) -> List:
"""
Get the gene tts of the gene `gene`.
:param gene: A gene
:param exons: A dictionary of the exons inside that gene
:return: The gene tts of the gene
>>> e = {1: ['5', '100', '110', '1_1', 'Test', '+'],
... 2: ['5', '130', '140', '1_2', 'Test', '+'],
... 3: ['5', '160', '170', '1_3', 'Test', '+'],
... 4: ['5', '190', '200', '1_3', 'Test', '+']}
>>> get_gene_tts(['5', '100', '200', '1', 'Test', '+'], e)
['5', '170', '200', '1', 'Test', '+']
>>> e = {4: ['5', '100', '110', '1_4', 'Test', '-'],
... 3: ['5', '130', '140', '1_3', 'Test', '-'],
... 2: ['5', '160', '170', '1_2', 'Test', '-'],
... 1: ['5', '190', '200', '1_1', 'Test', '-']}
>>> get_gene_tts(['5', '100', '200', '1', 'Test', '-'], e)
['5', '100', '130', '1', 'Test', '-']
"""
exon_positions = sorted(list(exons.keys()))
if gene[5] == "+":
gene[1] = exons[exon_positions[-2]][2]
else:
gene[2] = exons[exon_positions[-2]][1]
return gene
def get_after_gene(gene: List[str], size: int) -> List:
"""
Get the gene tts of the gene `gene`.
:param gene: A gene
:param size: The size of the region after the gene to check
>>> get_after_gene(['5', '100', '200', '1', 'Test', '+'], 100)
['5', '200', '300', '1', 'Test', '+']
>>> get_after_gene(['5', '100', '200', '1', 'Test', '-'], 100)
['5', '0', '100', '1', 'Test', '-']
"""
if gene[5] == "+":
gene[1] = gene[2]
gene[2] = str(int(gene[2]) + size)
else:
gene[2] = gene[1]
gene[1] = str(int(gene[1]) - size)
return gene
def write_bed(bed_file: Path, dic_exon: Dict, region: str,
outfile: Path) -> None:
"""
Write a bed file containing the gene body, the gene tss or tts or \
the region after the gene.
:param bed_file: A bed file containing the genes of interest.
:param dic_exon: A dictionary containing the exons within the genes
:param region: The region of interest
:param outfile: The output file of interest
"""
with bed_file.open('r') as infile, outfile.open('w') as out:
for line in infile:
if line.startswith("#"):
out.write(line)
else:
cline = line.replace('\n', '').split('\t')
gene = int(cline[3])
exons = dic_exon[gene]
if len(exons) < 3:
raise ValueError(f"The gene have to few exons "
f"{len(exons)}. It should at "
f"least have 3 exons")
if region == "body":
cline = get_gene_body(cline, exons)
elif region == 'tss':
cline = get_gene_tss(cline, exons)
elif region == 'tts':
cline = get_gene_tts(cline, exons)
else:
cline = get_after_gene(cline, BedConfig.size)
out.write('\t'.join(cline) + '\n')
def create_region_bed() -> None:
"""
Create the bed with the wanted regions.
"""
dic_exon = load_exon_bed(BedConfig.exon_bed)
write_bed(BedConfig.bed.filtered_gene, dic_exon, 'body',
BedConfig.bed.body_gene)
write_bed(BedConfig.bed.filtered_gene, dic_exon, 'tss',
BedConfig.bed.tss_gene)
write_bed(BedConfig.bed.filtered_gene, dic_exon, 'tts',
BedConfig.bed.tts_gene)
write_bed(BedConfig.bed.filtered_gene, dic_exon, 'after',
BedConfig.bed.after_gene)
if __name__ == "__main__":
testmod()
#!/usr/bin/env python3
# -*- coding: UTF-8 -*-
"""
Description:
"""
#!/usr/bin/env python3
# -*- coding: UTF-8 -*-
"""
Description: Create a figure showing the ChIP-Seq coverage of particular \
gene regions from ChIP-seq experiment.
"""
from .figure_maker import create_figure
import lazyparser as lp
from pathlib import Path
from typing import List
@lp.parse(design='file', region_bed='file',
nb_bin="nb_bin > 5", figure_type=['metagene', 'barplot'],
show_replicate=['y', 'n', 'Y', 'N'])
def launcher(design: str, bw_folder: str, region_bed: str,
region_name: str, nb_bin: int = 100,
figure_type: str = 'metagene',
show_replicate: str = 'y', environment: List[int] = (0, 0),
border_names: List[str] = ('', ''),
output: str = '.') -> None:
"""
Create A metagene or a barplot figure from bigwig file on regions defined \
in the bed file provided with 'region_bed' parameter.
:param design: A tabulated file containing 3 columns. The first columns \
contains a bigwig filename, the second contains the condition name and \
the last one contains the replicate of the condition.
:param bw_folder: The folder containing the bigwig file mentioned in \
the first column of the 'design' table.
:param region_bed: A bed file containing the regions to visualise
:param region_name: The name of the region analysed
:param nb_bin: The number of bins used to represents the regions of \
'region_bed'.
:param figure_type: The kind of representation wanted (barplot or metagene)
:param show_replicate: True to create a figure showing the replicate \
false else.
:param environment: A list of two int. The first contains the number of \
nucleotide to represent around the region of interest and the second,
the number of bin used to represent those surrounding regions.
:param border_names: The name of the borders
:param output: Folder where the results will be created
"""
if environment[0] < 0 or environment[1] < 0 or \
environment[0] < environment[1]:
raise ValueError(f"The two values given with --environment must "
f"be greater than 0 and the first value must be "
f"greater than the second")
show_rep = True if show_replicate.lower() == 'y' else False
create_figure(Path(design), Path(bw_folder), Path(region_bed),
region_name, nb_bin, figure_type, show_rep, environment,
border_names, Path(output))
launcher()
#!/usr/bin/env python3
# -*- coding: UTF-8 -*-
"""
Description:
"""
from ..bed_handler.config import BedConfig
\ No newline at end of file
#!/usr/bin/env python3
# -*- coding: UTF-8 -*-
"""
Description:
"""
from pathlib import Path
from typing import List, Union, Any
from doctest import testmod
from ..bed_handler.config import TestConfig
import pandas as pd
import pyBigWig as pbw
import seaborn as sns
import matplotlib.pyplot as plt
from tqdm import tqdm
def load_bed(bed: Path) -> List[List[Union[int, str]]]:
"""
Read a bed file and return the lines within it.
:param bed: A bed file containing the regions of interest
:return:The list of feature inside the bed
>>> load_bed(TestConfig.gene_bed)[0]
['18', 28645943, 28682388, 1, 'DSC2', '-']
"""
list_regions = []
with bed.open('r') as inbed:
for line in inbed:
if not line.startswith("#"):
cline = line.replace("\n", "").split("\t")
list_regions.append([cline[0], int(cline[1]), int(cline[2]),
int(cline[3]), cline[4], cline[5]])
return list_regions
def inspect_bigwig_regions(bw: Any, region: List,
replicate: str, nb_bin: int, resize: List[int],
condition_name: str,
) -> pd.DataFrame:
"""
get the coverage value inside the bigwig region `region`.
:param bw: A opened bigwig file
:param region: The region of interest
:param replicate: The replicate name
:param nb_bin: The number of bin that will represent the region
:param resize: The number of nucleotide used to extend the region
in both side
:param condition_name: the name of the condition
:return: a table with the coverage of this region
>>> my_bw = pbw.open(str(TestConfig.small_bw))
>>> region = ['1', 10, 25, 1, 'Test', '+']
>>> inspect_bigwig_regions(my_bw, region, 'R1', 5, [4, 2], 'cond1')
coverage bin condition replicate
0 0.000000 -2 cond1 R1
1 0.500000 -1 cond1 R1
2 75.000000 0 cond1 R1
3 20.000000 1 cond1 R1
4 10.000000 2 cond1 R1
5 4.666667 3 cond1 R1
6 2.000000 4 cond1 R1
7 1.000000 5 cond1 R1
8 0.500000 6 cond1 R1
>>> region = ['1', 110, 133, 1, 'Test', '-']
>>> inspect_bigwig_regions(my_bw, region, 'R1', 5, [4, 2], 'cond1')
coverage bin condition replicate
0 0.00 -2 cond1 R1
1 12.50 -1 cond1 R1
2 42.00 0 cond1 R1
3 8.00 1 cond1 R1
4 4.25 2 cond1 R1
5 2.00 3 cond1 R1
6 2.00 4 cond1 R1
7 1.00 5 cond1 R1
8 1.00 6 cond1 R1
"""
val = bw.stats(region[0], region[1], region[2], nBins=nb_bin, exact=True)
bins = list(range(len(val)))
if len(bins) != nb_bin:
raise ValueError("The lenght of bins should be equals to nb_bin")
if resize[0] > 0:
max_loc = max(region[1] - resize[0], 0)
val_before = bw.stats(region[0], max_loc, region[1], nBins=resize[1],
exact=True)
min_loc = min(region[2] + resize[0], bw.chroms(region[0]))
val_after = bw.stats(region[0], region[2], min_loc, nBins=resize[1],
exact=True)
if None in val_after:
val_after = val_after[::-1]
print(f"Warning ! None values found in {region} - "
f"{[region[0], region[2], min_loc]}")
if region[5] == "+":
bin_before = list(range(-len(val_before), 0))
bin_after = list(range(bins[-1] + 1,
bins[-1] + 1 + len(val_after)))
val = val_before + val + val_after
else:
bin_before = list(range(-len(val_after), 0))
bin_after = list(range(bins[-1] + 1,
bins[-1] + 1 + len(val_before)))
val = val_after[::-1] + val[::-1] + val_before[::-1]
bins = bin_before + bins + bin_after
dic = {"coverage": val, "bin": bins}
df = pd.DataFrame(dic)
df['condition'] = [condition_name] * df.shape[0]
df['replicate'] = [replicate] * df.shape[0]
return df
def create_sample_table(bw_file: Path, regions: List[List],
replicate: str, nb_bin: int, resize: List[int],
condition_name: str,
) -> pd.DataFrame:
"""
Get the table for all the regions of interest
:param bw_file: A bigwig file
:param regions: The regions of interest
:param replicate: The replicate name
:param nb_bin: The number of bin that will represent the region
:param resize: The number of nucleotide used to extend the region
in both side
:param condition_name: the name of the condition
:return: a table with the coverage of this region
"""
list_df = []
bw = pbw.open(str(bw_file))
for region in tqdm(regions, desc="scanning coverage ..."):
list_df.append(inspect_bigwig_regions(bw, region, replicate, nb_bin,
resize, condition_name))
return pd.concat(list_df, axis=0, ignore_index=True)
def create_full_table(df_exp: pd.DataFrame, regions: List[List],
nb_bin: int, resize: List[int],
input_folder: Path) -> pd.DataFrame:
"""
get the regions for every bigwig files.
:param df_exp: A dataframe containing the bigwig file that \
we want to analyse
:param regions: The regions to visualise
:param nb_bin: The number of bin used to resize the regions
:param resize: The number of nucleotides \
of the regions localised at each sides of the genomic regions inside
`regions`.
:param input_folder: Folder where the bigwig file are located
:return: The full coverage table
"""
list_df = []
for i in range(df_exp.shape[0]):
mline = df_exp.iloc[i, :]
bw_file = input_folder / mline['bigwig']
print(f"working on file {bw_file}")
condition = mline['condition']
replicate = mline['replicate']
list_df.append(create_sample_table(bw_file, regions, replicate,
nb_bin, resize, condition))
return pd.concat(list_df, axis=0, ignore_index=True)
def create_df_summary(df_cov: pd.DataFrame, figure_type: str, nb_bin: int,
environment: List[int],
region_name: str, order_condition: List[str]
) -> pd.DataFrame:
"""
summarize the data in df_cov.
:param df_cov: A dataframe of coverage for each bin.
:param figure_type: The kind of figure to make (metagene or barplot)
:param nb_bin: The number of bin used to represent the region of interest
:param environment: A list of two int. The first contains the number of \
nucleotide to represent around the region of interest and the second,
the number of bin used to represent those surrounding regions.
:param region_name: the name of the region analysed
:param order_condition: The order of conditions
:return: The summarised dataframe
"""
df_sum = df_cov.groupby(['bin', 'condition', 'replicate']).mean()\
.reset_index()
if figure_type == "metagene":
return df_sum
if environment[0] != 0:
df_sum['location'] = df_cov['bin'].apply(
lambda x: f"before_{region_name}" if x < 0 else
f"after_{region_name}" if x >= nb_bin else region_name)
df_sum.drop('bin', axis=1, inplace=True)
if environment[0] != 0:
col_merge = ['condition', 'replicate', 'location']
else:
col_merge = ['condition', 'replicate']
df_sum = df_sum.groupby(col_merge).mean().reset_index()
if 'location' in df_sum.columns:
df_sum['location'] = pd.Categorical(
df_sum['location'], ordered=True,
categories=[f"before_{region_name}", region_name,
f"after_{region_name}"]
)
df_sum['condition'] = pd.Categorical(
df_sum['condition'], ordered=True,
categories=order_condition
)
df_sum.sort_values(['condition', 'location'], ascending=True,
inplace=True)
return df_sum
def figure_metagene(df_sum: pd.DataFrame, show_replicate: bool,
border_names: List[str], nb_bin: int,
environment: List[int], region_name: str,
output: Path) -> None:
"""
Create a metagene figure on the region of interest.
:param df_sum: The summarized coverage table
:param show_replicate: True to show the replicate, false else
:param border_names: The name of borders of the region of interest
:param nb_bin: The number of bins representing the regions of interest
:param environment: A list of two int. The first contains the number of \
nucleotide to represent around the region of interest and the second,
the number of bin used to represent those surrounding regions.
:param output: Folder where the figure will be created
:param region_name: The region of interest
"""
sns.set(context='poster', style='white')
if show_replicate:
g = sns.relplot('bin', 'coverage', hue='condition', data=df_sum,
kind='line', style='replicate', ci=None,
height=12, aspect=1.7)
else:
g = sns.relplot('bin', 'coverage', hue='condition', data=df_sum,
kind='line', ci="sd", height=12, aspect=1.7)
y_val = g.ax.get_ylim()[1] * 0.99
if border_names[0] != '':
g.ax.axvline(x=0, color='k', linestyle='--', alpha=0.1)
g.ax.annotate(border_names[0], [0, y_val], ha="center", va='center')
if border_names[1] != '':
g.ax.axvline(x=nb_bin - 1, color='k', linestyle='--', alpha=0.1)
g.ax.annotate(border_names[1], [nb_bin - 1, y_val], ha="center",
va='center')
g.set_xlabels('Bins')
g.set_ylabels('Coverage')
plt.subplots_adjust(top=0.9)
title = f"Average coverage in region '{region_name}'"
if environment[0] != 0:
title += f"\nand in their surrounding regions of {environment[0]} nt"
g.fig.suptitle(title)
g.savefig(output / f"metagene_{region_name}_{nb_bin}bin_" \
f"{environment[0]}_nt-around-{environment[1]}-bin.pdf")
g.fig.clf()
def figure_barplot(df_sum: pd.DataFrame, show_replicate: bool,
nb_bin: int,
environment: List[int], region_name: str,
output: Path) -> None:
"""
Create a barplot figure on the region of interest.
:param df_sum: The summarized coverage table
:param show_replicate: True to show the replicate, false else
:param nb_bin: The number of bins representing the regions of interest
:param environment: A list of two int. The first contains the number of \
nucleotide to represent around the region of interest and the second,
the number of bin used to represent those surrounding regions.
:param output: Folder where the figure will be created
:param region_name: The region of interest
"""
sns.set(context='poster', style='white')
if show_replicate:
g = sns.catplot(x="condition", y="coverage", hue="replicate",
kind="bar", data=df_sum, height=12, aspect=1.77,
ci=None)
else:
g = sns.catplot(x="condition", y="coverage",
kind="bar", data=df_sum, height=12, aspect=1.77,
ci='sd')
g.set_xlabels('')
g.set_ylabels('Coverage')
plt.subplots_adjust(top=0.9)
title = f"Average coverage in region '{region_name}'"
g.fig.suptitle(title)
g.savefig(output / f"barplot_{region_name}_{nb_bin}bin_" \
f"{environment[0]}_nt-around-{environment[1]}-bin.pdf")
g.fig.clf()
def create_figure(design: Path, bw_folder: Path, region_bed: Path,
region_name: str, nb_bin: int = 100,
figure_type: str = 'metagene',
show_replicate: bool = True, environment: List[int] = (0, 0),
border_names: List[str] = ('', ''),
output: Path = Path('.')) -> None:
"""
Create A metagene or a barplot figure from bigwig file on regions defined \
in the bed file provided with 'region_bed' parameter.
:param design: A tabulated file containing 3 columns. The first columns \
contains a bigwig filename, the second contains the condition name and \
the last one contains the replicate of the condition.
:param bw_folder: The folder containing the bigwig file mentioned in \
the first column of the 'design' table.
:param region_bed: A bed file containing the regions to visualise
:param region_name: The name of the region analysed
:param nb_bin: The number of bins used to represents the regions of \
'region_bed'.
:param figure_type: The kind of representation wanted (barplot or metagene)
:param show_replicate: True to create a figure showing the replicate \
false else.
:param environment: A list of two int. The first contains the number of \
nucleotide to represent around the region of interest and the second,
the number of bin used to represent those surrounding regions.
:param border_names: The name of the borders
:param output: Folder where the results will be created
"""
df_exp = pd.read_csv(design, sep="\t")
regions = load_bed(region_bed)
region_bed_name = region_bed.name.replace('.bed', '')
outfile = f'tmp_cov_table_{region_bed_name}_{nb_bin}bin_' \
f'{environment[0]}_nt-around-{environment[1]}-bin.txt.gz'
cov_file = output / outfile
if cov_file.is_file():
df_cov = pd.read_csv(cov_file, sep="\t", compression='gzip')
else:
df_cov = create_full_table(df_exp, regions, nb_bin, environment,
bw_folder)
df_cov.to_csv(cov_file, sep="\t", index=False, compression='gzip')
ordered_condition = []
for condition in df_exp['condition'].to_list():
if condition not in ordered_condition:
ordered_condition.append(condition)
df_sum = create_df_summary(df_cov, figure_type, nb_bin, environment,
region_name, ordered_condition)
if figure_type == "metagene":
figure_metagene(df_sum, show_replicate, border_names, nb_bin,
environment, region_name, output)
else:
if 'location' in df_sum.columns:
for cur_region in df_sum['location'].unique():
df_tmp = df_sum.loc[df_sum['location'] == cur_region, :]
figure_barplot(df_tmp, show_replicate, nb_bin, environment,
cur_region, output)
else:
figure_barplot(df_sum, show_replicate, nb_bin, environment,
region_name, output)
if __name__ == "__main__":
testmod()
1 0 9 0
1 9 10 1
1 10 11 100
1 11 12 75
1 12 13 50
1 13 15 25
1 15 20 10
1 20 25 2
1 25 28 1
1 28 100 0
1 100 110 1
1 110 120 2
1 120 125 5
1 125 128 10
1 128 130 15
1 130 131 30
1 131 132 50
1 132 133 100
1 133 134 20
1 134 135 5
1 135 999 0
#ref start end id score strand
18 28681865 28682388 1_1 0 -
18 28681183 28681432 1_2 0 -
18 28673521 28673606 1_3 0 -
18 28672063 28672263 1_4 0 -
18 28671489 28671530 1_5 0 -
18 28670990 28671110 1_6 0 -
18 28669401 28669557 1_7 0 -
18 28667631 28667776 1_8 0 -
18 28666538 28666705 1_9 0 -
#ref start end id score strand
18 28645943 28682388 1 DSC2 -
18 28709190 28742819 2 DSC1 -
18 28898050 28937394 3 DSG1 +
18 28956739 28994869 4 DSG4 +
13 45766989 45775176 5 KCTD4 -
13 45911001 45915347 6 TPT1 -
18 48918411 49088839 7 AC011260.1 +
18 49866541 51062273 8 DCC +
13 45967450 45992516 9 SLC25A30 -
73
75
89
123
128
File added
#!/usr/bin/env python3
# -*- coding: UTF-8 -*-
"""
Description:
"""
import doctest
from pathlib import Path
from typing import List
import unittest
import sys
sys.path.insert(0, str(Path(__file__).parents[1].resolve()))
# recover ignored files
def get_ignored_files() -> List[str]:
"""
Recover ignored python files in gitignore
"""
gitignore = Path(__file__).parents[1] / '.gitignore'
if not gitignore.is_file():
return []
with gitignore.open('r') as f:
files = f.read().splitlines()
return [cfile.replace('.py', '').replace('/', '.')
for cfile in files if cfile.endswith('.py')]
# Loading every python file in this folder
list_mod = [str(mfile.relative_to(Path(__file__).resolve().parents[1]))
for mfile in list((Path(__file__).resolve().parents[1] / "src").rglob('*.py'))]
list_mod2 = [m.replace('.py', '').replace('/', '.') for m in list_mod
if '__init__' not in m
and '__main__' not in m
and 'test' not in m
and 'config' not in m]
final_mod = [mod for mod in list_mod2 if mod not in get_ignored_files()]
def load_tests(loader, tests, ignore):
for cmod in final_mod:
tests.addTest(doctest.DocTestSuite(cmod))
return tests
if __name__ == "__main__":
unittest.main()
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment