Skip to content
Snippets Groups Projects
Verified Commit 098c0efe authored by Mia Croiset's avatar Mia Croiset
Browse files

read all trace files and plot benchmark (to clean)

parent dcc53642
No related branches found
No related tags found
No related merge requests found
import polars as pl
from pathlib import Path
files = list(Path("./wt_AD281").glob("*/pipeline_info/*.txt"))
list_df = [pl.read_csv(file, separator="\t").with_columns(file=pl.lit(str(file))).select(pl.all().cast(pl.Utf8)) for file in files]
df = pl.concat(list_df)
df.write_csv("recap.txt", separator="\t")
install.packages("tidyverse")
install.packages("ggplot2")
library(tidyverse)
library(ggplot2)
library(dplyr)
df <- read.csv("/home/mcroiset/HiC/benchmark/recap.txt", sep = "\t", header = TRUE)
convert_time <- function(x){
parts <- strsplit(x, " ")
list_durations <- character(0)
for( part in parts ) {
sec <- 0
minute <- 0
hours <- 0
for ( single in part ) {
if (grepl('s', single, fixed = TRUE)) {
sec <- as.numeric(substr(single,1,nchar(single)-1))
sec <- sec/60
sec <- as.numeric(format(round(sec, 3), nsmall = 3))
#print(sec)
}
if (grepl('m', single, fixed = TRUE)) {
minute <- as.numeric(substr(single,1,nchar(single)-1))
minute <- as.numeric(format(round(minute, 2), nsmall = 2))
#print(minute)
}
if (grepl('h', single, fixed = TRUE)) {
hours <- as.numeric(substr(single,1,nchar(single)-1))
hours <- hours*60
hours <- as.numeric(format(round(hours, 2), nsmall = 2))
#print(hours)
#print(single)
}
#print(sec)
#print(minute)
#print(hours)
}
new_duration <- (hours+minute+sec)
list_durations <- append(list_durations,new_duration)
#print(new_duration)
#print(df$duration[i])
#df$duration[i] <- new_duration
}
#print(list_durations)
return(list_durations)
}
list_dur <- convert_time(df$duration)
df$duration
df$duration_minutes <- list_dur
df$duration_minutes <- as.numeric(df$duration_minutes)
val <- df %>% group_by(name) %>% summarise(moy = mean(duration_minutes), ect = sd(duration_minutes))
ggplot(df, aes(x=name, y=duration_minutes))+
geom_violin()+
ylim(0,500)+
facet_wrap(~ name, ncol = 8)
# for (x in df$duration_minutes) {
# print(class(x))
# }
# mean(df$duration_minutes)
# sd(df$duration_minutes)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment