diff --git a/benchmark/open_traces.py b/benchmark/open_traces.py new file mode 100755 index 0000000000000000000000000000000000000000..e264b2974cc04a0d207a839f132289463ddd4f05 --- /dev/null +++ b/benchmark/open_traces.py @@ -0,0 +1,6 @@ +import polars as pl +from pathlib import Path +files = list(Path("./wt_AD281").glob("*/pipeline_info/*.txt")) +list_df = [pl.read_csv(file, separator="\t").with_columns(file=pl.lit(str(file))).select(pl.all().cast(pl.Utf8)) for file in files] +df = pl.concat(list_df) +df.write_csv("recap.txt", separator="\t") diff --git a/benchmark/plots.r b/benchmark/plots.r new file mode 100644 index 0000000000000000000000000000000000000000..13dd0f232b57b5717088b2eb301e3ecfefbf7426 --- /dev/null +++ b/benchmark/plots.r @@ -0,0 +1,68 @@ +install.packages("tidyverse") +install.packages("ggplot2") +library(tidyverse) +library(ggplot2) +library(dplyr) + +df <- read.csv("/home/mcroiset/HiC/benchmark/recap.txt", sep = "\t", header = TRUE) + +convert_time <- function(x){ + parts <- strsplit(x, " ") + list_durations <- character(0) + for( part in parts ) { + sec <- 0 + minute <- 0 + hours <- 0 + for ( single in part ) { + if (grepl('s', single, fixed = TRUE)) { + sec <- as.numeric(substr(single,1,nchar(single)-1)) + sec <- sec/60 + sec <- as.numeric(format(round(sec, 3), nsmall = 3)) + #print(sec) + } + if (grepl('m', single, fixed = TRUE)) { + minute <- as.numeric(substr(single,1,nchar(single)-1)) + minute <- as.numeric(format(round(minute, 2), nsmall = 2)) + #print(minute) + } + if (grepl('h', single, fixed = TRUE)) { + hours <- as.numeric(substr(single,1,nchar(single)-1)) + hours <- hours*60 + hours <- as.numeric(format(round(hours, 2), nsmall = 2)) + #print(hours) + #print(single) + } + #print(sec) + #print(minute) + #print(hours) + } + new_duration <- (hours+minute+sec) + list_durations <- append(list_durations,new_duration) + #print(new_duration) + #print(df$duration[i]) + #df$duration[i] <- new_duration + } + #print(list_durations) + return(list_durations) +} + +list_dur <- convert_time(df$duration) +df$duration + +df$duration_minutes <- list_dur +df$duration_minutes <- as.numeric(df$duration_minutes) + +val <- df %>% group_by(name) %>% summarise(moy = mean(duration_minutes), ect = sd(duration_minutes)) + +ggplot(df, aes(x=name, y=duration_minutes))+ + geom_violin()+ + ylim(0,500)+ + facet_wrap(~ name, ncol = 8) + + +# for (x in df$duration_minutes) { +# print(class(x)) +# } + +# mean(df$duration_minutes) +# sd(df$duration_minutes)