Skip to content
Snippets Groups Projects
Commit 0d1a73d9 authored by nfontrod's avatar nfontrod
Browse files

src/find_interaction_cluster/sf_and_communities.py: get_sfname: update the...

src/find_interaction_cluster/sf_and_communities.py: get_sfname: update the function to get all the splicing factor stored in the database
parent 13ae4d4e
No related branches found
No related tags found
No related merge requests found
......@@ -25,6 +25,7 @@ import statsmodels.formula.api as api
from statsmodels.stats.multitest import multipletests
from rpy2.robjects import r, pandas2ri
from pathlib import Path
import datetime
def get_ft_regulated_in_communities(community: List[str],
......@@ -341,22 +342,66 @@ def get_stat4communities(sf_name: str, reg: str,
return df, s
def get_sfname() -> List[str]:
def get_sfname(regulation: str = "one", threshold: int = 50) -> List[str]:
"""
Recover the name of every splicing factor of interest.
:return: The list of splicing factor
>>> len(get_sfname()) == 42
Function that return the splicing factor names in the ChIA-PET database \
that up or down-regulates at least `threshold` exons.
:param regulation: up, down, all, one. If regulation is up or down, \
then the function returns only the splicing factor names up or down \
regulating at least threshold exons. If 'all' is chosen then \
only splicing factor that up-regulates and down-regulates at least \
`threshold` exons are returned. If 'one' is chosen, then \
the splicing factor must have at least `threshold` up-regulated exons OR \
down-regulated exons.
:param threshold: The minimum number of exons that must be regulated \
by the splicing factor
:return: The list of splicing factor regulating at least threshold \
exons
>>> get_sfname() == ['PTBP1', 'SRSF1', 'MBNL1_2', 'SF3B1', 'SNRPC',
... 'U2AF2', 'DAZAP1', 'HNRNPC', 'PTBP1_2', 'RBFOX2', 'RBM10', 'ESRP2',
... 'HNRNPA1', 'HNRNPA2B1', 'HNRNPF', 'HNRNPH1', 'HNRNPM', 'HNRNPU',
... 'HNRNPL', 'SRSF3', 'HNRNPK', 'MBNL1', 'SRSF2', 'U2AF1', 'RBM25',
... 'RBM47', 'TRA2A_B', 'DDX5_DDX17', 'RBM17', 'RBM39', 'SF1', 'SFPQ',
... 'KHSRP', 'PRPF8', 'QKI', 'SF3B4', 'RBM15', 'RBM22', 'SF3A3',
... 'SNRNP70', 'SRSF7', 'SRSF9', 'TIA1', 'SRSF5', 'SNRNP200', 'PCBP1',
... 'PCBP2', 'FUS', 'RBMX', 'SRSF6', 'MBNL2', 'AGO2', 'AGO3', 'AKAP8L',
... 'AQR', 'CCAR1', 'CELF1', 'DDX5', 'EFTUD2', 'EWSR1', 'FMR1', 'FUBP1',
... 'GEMIN5', 'GPKOW', 'HNRNPLL', 'KHDRBS1', 'MATR3', 'NCBP2', 'PPIG',
... 'PPP1R8', 'PRPF4', 'PUF60', 'RAVER1', 'SAFB2', 'SART3', 'SMN1',
... 'SMNDC1', 'SUGP2', 'TAF15', 'TARDBP', 'TIAL1']
True
"""
logging.debug('recovering sf factors ...')
return ["PCBP2", "HNRNPA1", "HNRNPU", "QKI", "PTBP1", "TRA2A_B", "KHSRP",
"MBNL1", "HNRNPL", "HNRNPK", "SRSF7", "HNRNPA2B1", "SFPQ", "RBM15",
"HNRNPM", "FUS", "DAZAP1", "RBM39", "SRSF9", "RBM25", "RBM22",
"HNRNPF", "SRSF5", "PCBP1", "RBFOX2", "HNRNPH1", "RBMX", "SRSF6",
"MBNL2", "SRSF1","SRSF2", "HNRNPC", "SRSF3", "U2AF1", "SF3B1",
"SNRNP70", "SNRPC", "DDX5_DDX17", "SF1", "SF3A3", "SF3B4", "U2AF2"]
outfile = ConfigGraph.results / \
f"sf_list_reg-{regulation}_t{threshold}.txt"
if outfile.is_file():
return outfile.open("r").read().strip(r"[|]").replace("'", "")\
.split(", ")
cnx = sqlite3.connect(ConfigGraph.db_file)
c = cnx.cursor()
query = "SELECT DISTINCT sf_name from cin_project_splicing_lore"
c.execute(query)
res = [sf[0] for sf in c.fetchall()]
list_sf = []
for sf in res:
if regulation in ["up", "down"]:
val = len(get_every_events_4_a_sl(cnx, sf, regulation)
[0][f"{sf}_{regulation}"])
elif regulation == "one":
val = max(len(get_every_events_4_a_sl(cnx, sf, x)[0][f"{sf}_{x}"])
for x in ["up", "down"])
else:
val = min(
len(get_every_events_4_a_sl(cnx, sf, x)[0][f"{sf}_{x}"])
for x in ["up", "down"]
)
if val >= threshold:
list_sf.append(sf)
outfile.write_text(str(list_sf), encoding="UTF-8")
return list_sf
def get_key(project: str, weight: int) -> str:
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment