Skip to content
Snippets Groups Projects
Commit c296fc19 authored by nfontrod's avatar nfontrod
Browse files

src/bed_handler/select_regulated_near_ctcf_exons.py: add filter_expressed function

parent b2bc05e3
No related branches found
No related tags found
No related merge requests found
......@@ -15,6 +15,7 @@ import warnings
from .get_other_exon_in_same_gene import create_gene_bed4norm
from pathlib import Path
import lazyparser as lp
from typing import List
def filter_ctcf_distance_table(df: pd.DataFrame, reg: str, threshold: int,
......@@ -126,6 +127,17 @@ def filter_ctcf_distance_table(df: pd.DataFrame, reg: str, threshold: int,
return df
def filter_expressed(exon_list: List[str]) -> List[str]:
"""
Filter only expressed exons.
:param exon_list: A list of exons
:return: The list of expressed exons
"""
egenes = BedConfig.expressed_genes.open('r').read().splitlines()
return [exon for exon in exon_list if exon.split("_")[0] in egenes]
def create_bed_ctcf_exon(reg: str, threshold: int,
location: str, include0: bool = False,
near_ctcf: bool = True) -> None:
......@@ -169,6 +181,7 @@ def create_bed_ctcf_exon(reg: str, threshold: int,
bad_id = df['id'].to_list() if include0 \
else df['id'].to_list() + df.loc[df['dist'] == 0, 'id'].to_list()
list_exons = [e for e in tmp_exons if e not in bad_id]
list_exons = filter_expressed(list_exons)
list_genes = [int(exon.split('_')[0]) for exon in list_exons]
df_exon = filter_bed(BedConfig.exon_bed, list_exons)
df_gene = filter_bed(BedConfig.gene_bed, list_genes)
......@@ -239,6 +252,7 @@ def get_bed_ctcf_exon(exon_bed: str, threshold: int,
bad_id = df['id'].to_list() if include0 \
else df['id'].to_list() + df.loc[df['dist'] == 0, 'id'].to_list()
list_exons = [e for e in tmp_exons if e not in bad_id]
list_exons = filter_expressed(list_exons)
list_genes = [int(exon.split('_')[0]) for exon in list_exons]
df_exon = filter_bed(BedConfig.exon_bed, list_exons)
df_gene = filter_bed(BedConfig.gene_bed, list_genes)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment