Select Git revision
docker_init.sh
Forked from
LBMC / nextflow
Source project has a limited visibility.
gc_stats.py 1.26 KiB
#!/usr/bin/env python3
# -*- coding: UTF-8 -*-
"""
Description: The goal of this script is make a statistical analysis
"""
import pandas as pd
from typing import List, Any
from scipy.stats import mannwhitneyu
from itertools import combinations
def mann_whitney(df: pd.DataFrame) -> List[Any]:
"""
Return the man withney test comparing the gc content and the different \
regions in the dataframe.
:param df: A dataframe of gc content
:return: The name of the groups and their p-value
"""
regions = df['region'].unique()
for r1, r2 in combinations(regions, 2):
v1 = df.loc[df['region'] == r1, "gc_content"]
v2 = df.loc[df['region'] == r2, "gc_content"]
return [r1, r2, mannwhitneyu(v1, v2)[-1]]
def make_stat(df: pd.DataFrame) -> List[List[Any]]:
"""
:param df: A dataframe of gc content
:return: The list of pvalues of interest
"""
if len(df['location'].unique()) == 1:
return [mann_whitney(df)]
else:
list_pval = []
for loc in df['location'].unique():
df_tmp = df.loc[df["location"] == loc, :]
res = mann_whitney(df_tmp)
res[0] = (loc, res[0])
res[1] = (loc, res[1])
list_pval.append(res)
return list_pval