Skip to content
Snippets Groups Projects
Verified Commit 098c0efe authored by Mia Croiset's avatar Mia Croiset
Browse files

read all trace files and plot benchmark (to clean)

parent dcc53642
Branches
Tags v0.2.8
No related merge requests found
import polars as pl
from pathlib import Path
files = list(Path("./wt_AD281").glob("*/pipeline_info/*.txt"))
list_df = [pl.read_csv(file, separator="\t").with_columns(file=pl.lit(str(file))).select(pl.all().cast(pl.Utf8)) for file in files]
df = pl.concat(list_df)
df.write_csv("recap.txt", separator="\t")
install.packages("tidyverse")
install.packages("ggplot2")
library(tidyverse)
library(ggplot2)
library(dplyr)
df <- read.csv("/home/mcroiset/HiC/benchmark/recap.txt", sep = "\t", header = TRUE)
convert_time <- function(x){
parts <- strsplit(x, " ")
list_durations <- character(0)
for( part in parts ) {
sec <- 0
minute <- 0
hours <- 0
for ( single in part ) {
if (grepl('s', single, fixed = TRUE)) {
sec <- as.numeric(substr(single,1,nchar(single)-1))
sec <- sec/60
sec <- as.numeric(format(round(sec, 3), nsmall = 3))
#print(sec)
}
if (grepl('m', single, fixed = TRUE)) {
minute <- as.numeric(substr(single,1,nchar(single)-1))
minute <- as.numeric(format(round(minute, 2), nsmall = 2))
#print(minute)
}
if (grepl('h', single, fixed = TRUE)) {
hours <- as.numeric(substr(single,1,nchar(single)-1))
hours <- hours*60
hours <- as.numeric(format(round(hours, 2), nsmall = 2))
#print(hours)
#print(single)
}
#print(sec)
#print(minute)
#print(hours)
}
new_duration <- (hours+minute+sec)
list_durations <- append(list_durations,new_duration)
#print(new_duration)
#print(df$duration[i])
#df$duration[i] <- new_duration
}
#print(list_durations)
return(list_durations)
}
list_dur <- convert_time(df$duration)
df$duration
df$duration_minutes <- list_dur
df$duration_minutes <- as.numeric(df$duration_minutes)
val <- df %>% group_by(name) %>% summarise(moy = mean(duration_minutes), ect = sd(duration_minutes))
ggplot(df, aes(x=name, y=duration_minutes))+
geom_violin()+
ylim(0,500)+
facet_wrap(~ name, ncol = 8)
# for (x in df$duration_minutes) {
# print(class(x))
# }
# mean(df$duration_minutes)
# sd(df$duration_minutes)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment