Skip to content
Snippets Groups Projects
Commit fbb7ac9b authored by nfontrod's avatar nfontrod
Browse files

src/find_interaction_cluster/clip_figures/clip_analyser.py: analyse now the clip density

parent 706d947a
No related branches found
No related tags found
No related merge requests found
......@@ -30,18 +30,18 @@ def bedtools_intersect(gene_bed: Path, clip_bed: Path,
>>> bedtools_intersect(ConfigClip.test_gene_bed, ConfigClip.test_clip_bed,
... "gene")
id_gene clip_peak
0 1 2
1 2 1
2 3 3
3 4 0
4 5 0
5 6 0
6 7 0
7 8 0
8 9 0
9 415 0
10 10123 0
id_gene clip_peak peak_density
0 1 2 0.054877
1 2 1 0.029736
2 3 3 0.076251
3 4 0 0.000000
4 5 0 0.000000
5 6 0 0.000000
6 7 0 0.000000
7 8 0 0.000000
8 9 0 0.000000
9 415 0 0.000000
10 10123 0 0.000000
"""
res = sp.check_output(f"bedtools intersect -a {gene_bed} "
......@@ -49,7 +49,9 @@ def bedtools_intersect(gene_bed: Path, clip_bed: Path,
res = StringIO(str(res, "utf-8"))
id_col = f"id_{feature}"
columns = ["chr", "start", "end", id_col, "name", "strand", "clip_peak"]
return pd.read_csv(res, sep="\t", names=columns)[[id_col, "clip_peak"]]
df = pd.read_csv(res, sep="\t", names=columns)
df['peak_density'] = df['clip_peak'] / ((df['end'] - df['start']) / 1000)
return df[[f'id_{feature}', 'clip_peak', 'peak_density']]
def find_or_create_community(project: str, weight: int, global_weight: int,
......@@ -137,20 +139,21 @@ def merge_dataframes(peak_df: pd.DataFrame, com_df: pd.DataFrame,
:return: The dataframe merged
>>> d1 = pd.DataFrame({"id_gene": [0, 1, 2, 3, 4, 5, 6, 7],
... "clip_peak": [2, 1, 3, 0, 0, 0, 0, 0]})
... "peak_density": [0.054877, 0.029736, 0.076251, 0, 0, 0, 0, 0]})
>>> d2 = pd.DataFrame({"id_gene": [0, 1, 2, 3, 4],
... "community": ["C1", "C1", "C2", "C2", "C2"],
... "community_size": [2, 2, 3, 3, 3]})
>>> merge_dataframes(d1, d2, "gene")
id_gene clip_peak community community_size
0 0 2 C1 2.0
1 1 1 C1 2.0
2 2 3 C2 3.0
3 3 0 C2 3.0
4 4 0 C2 3.0
5 5 0 NaN NaN
6 6 0 NaN NaN
7 7 0 NaN NaN
id_gene peak_density community community_size
0 0 0.054877 C1 2.0
1 1 0.029736 C1 2.0
2 2 0.076251 C2 3.0
3 3 0.000000 C2 3.0
4 4 0.000000 C2 3.0
5 5 0.000000 NaN NaN
6 6 0.000000 NaN NaN
7 7 0.000000 NaN NaN
"""
return peak_df.merge(com_df, how="left", on=f"id_{feature}")
......@@ -171,18 +174,19 @@ def create_table(feature: str, clip_file: Path,
>>> cf = find_or_create_community("", 3, 3, True, "gene")
>>> create_table("gene", ConfigClip.test_clip_bed,
... ConfigClip.test_gene_bed, cf)
id_gene clip_peak community community_size
0 1 2 NaN NaN
1 2 1 NaN NaN
2 3 3 NaN NaN
3 4 0 NaN NaN
4 5 0 NaN NaN
5 6 0 NaN NaN
6 7 0 NaN NaN
7 8 0 NaN NaN
8 9 0 NaN NaN
9 415 0 C1 11.0
10 10123 0 NaN NaN
id_gene clip_peak peak_density community community_size
0 1 2 0.054877 NaN NaN
1 2 1 0.029736 NaN NaN
2 3 3 0.076251 NaN NaN
3 4 0 0.000000 NaN NaN
4 5 0 0.000000 NaN NaN
5 6 0 0.000000 NaN NaN
6 7 0 0.000000 NaN NaN
7 8 0 0.000000 NaN NaN
8 9 0 0.000000 NaN NaN
9 415 0 0.000000 C1 11.0
10 10123 0 0.000000 NaN NaN
"""
threshold = 10 if feature == "gene" else 50
......@@ -219,7 +223,7 @@ def create_figure(project: str, weight: int, global_weight: int,
output.mkdir(exist_ok=True)
outfile = output / f"{clip_file.name.split('.')[0]}.pdf"
final_table = create_table(feature, clip_file, feature_bed, com_file)
create_community_fig(final_table, feature, "clip_peak", outfile,
create_community_fig(final_table, feature, "peak_density", outfile,
test_type)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment