From 098c0efe8e5870bed6417c34b7b71db8f4fd87b1 Mon Sep 17 00:00:00 2001 From: Mia Croiset <mia.croiset@ens-lyon.fr> Date: Mon, 17 Jun 2024 16:15:27 +0200 Subject: [PATCH] read all trace files and plot benchmark (to clean) --- benchmark/open_traces.py | 6 ++++ benchmark/plots.r | 68 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 74 insertions(+) create mode 100755 benchmark/open_traces.py create mode 100644 benchmark/plots.r diff --git a/benchmark/open_traces.py b/benchmark/open_traces.py new file mode 100755 index 0000000..e264b29 --- /dev/null +++ b/benchmark/open_traces.py @@ -0,0 +1,6 @@ +import polars as pl +from pathlib import Path +files = list(Path("./wt_AD281").glob("*/pipeline_info/*.txt")) +list_df = [pl.read_csv(file, separator="\t").with_columns(file=pl.lit(str(file))).select(pl.all().cast(pl.Utf8)) for file in files] +df = pl.concat(list_df) +df.write_csv("recap.txt", separator="\t") diff --git a/benchmark/plots.r b/benchmark/plots.r new file mode 100644 index 0000000..13dd0f2 --- /dev/null +++ b/benchmark/plots.r @@ -0,0 +1,68 @@ +install.packages("tidyverse") +install.packages("ggplot2") +library(tidyverse) +library(ggplot2) +library(dplyr) + +df <- read.csv("/home/mcroiset/HiC/benchmark/recap.txt", sep = "\t", header = TRUE) + +convert_time <- function(x){ + parts <- strsplit(x, " ") + list_durations <- character(0) + for( part in parts ) { + sec <- 0 + minute <- 0 + hours <- 0 + for ( single in part ) { + if (grepl('s', single, fixed = TRUE)) { + sec <- as.numeric(substr(single,1,nchar(single)-1)) + sec <- sec/60 + sec <- as.numeric(format(round(sec, 3), nsmall = 3)) + #print(sec) + } + if (grepl('m', single, fixed = TRUE)) { + minute <- as.numeric(substr(single,1,nchar(single)-1)) + minute <- as.numeric(format(round(minute, 2), nsmall = 2)) + #print(minute) + } + if (grepl('h', single, fixed = TRUE)) { + hours <- as.numeric(substr(single,1,nchar(single)-1)) + hours <- hours*60 + hours <- as.numeric(format(round(hours, 2), nsmall = 2)) + #print(hours) + #print(single) + } + #print(sec) + #print(minute) + #print(hours) + } + new_duration <- (hours+minute+sec) + list_durations <- append(list_durations,new_duration) + #print(new_duration) + #print(df$duration[i]) + #df$duration[i] <- new_duration + } + #print(list_durations) + return(list_durations) +} + +list_dur <- convert_time(df$duration) +df$duration + +df$duration_minutes <- list_dur +df$duration_minutes <- as.numeric(df$duration_minutes) + +val <- df %>% group_by(name) %>% summarise(moy = mean(duration_minutes), ect = sd(duration_minutes)) + +ggplot(df, aes(x=name, y=duration_minutes))+ + geom_violin()+ + ylim(0,500)+ + facet_wrap(~ name, ncol = 8) + + +# for (x in df$duration_minutes) { +# print(class(x)) +# } + +# mean(df$duration_minutes) +# sd(df$duration_minutes) -- GitLab