Skip to content
Snippets Groups Projects
Verified Commit 9c206182 authored by nfontrod's avatar nfontrod
Browse files

script to merge the communities together

parent 426b2bca
Branches
No related tags found
No related merge requests found
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Description: The goal of this script is to merge multiple
communities file together and remove redundant clusters, cluster
with the same genes
"""
from pathlib import Path
from typing import List
import lazyparser as lp
import polars as pl
from loguru import logger
from src.location_pp.config import ConfigLocation
def load_communities(files: list[Path]) -> pl.DataFrame:
"""Load communities from files."""
df = pl.concat(
[
pl.read_csv(file, separator="\t").select(
["community", "nodes", "genes"]
)
for file in files
]
)
row = df.shape[0]
df = df.unique(subset=["genes"], keep="first")
final = df.shape[0]
logger.info(
f" {row - final} / {row} communities were duplicated and removed"
)
return df
@lp.parse(files="file")
def main_merge(files: List[str], outname: str) -> pl.DataFrame:
"""Merge communities from files."""
df = load_communities([Path(f) for f in files])
df.write_csv(ConfigLocation.output / f"{outname}.txt", separator="\t")
if __name__ == "__main__":
main_merge()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment