From 098c0efe8e5870bed6417c34b7b71db8f4fd87b1 Mon Sep 17 00:00:00 2001
From: Mia Croiset <mia.croiset@ens-lyon.fr>
Date: Mon, 17 Jun 2024 16:15:27 +0200
Subject: [PATCH] read all trace files and plot benchmark (to clean)

---
 benchmark/open_traces.py |  6 ++++
 benchmark/plots.r        | 68 ++++++++++++++++++++++++++++++++++++++++
 2 files changed, 74 insertions(+)
 create mode 100755 benchmark/open_traces.py
 create mode 100644 benchmark/plots.r

diff --git a/benchmark/open_traces.py b/benchmark/open_traces.py
new file mode 100755
index 0000000..e264b29
--- /dev/null
+++ b/benchmark/open_traces.py
@@ -0,0 +1,6 @@
+import polars as pl
+from pathlib import Path
+files = list(Path("./wt_AD281").glob("*/pipeline_info/*.txt"))
+list_df = [pl.read_csv(file, separator="\t").with_columns(file=pl.lit(str(file))).select(pl.all().cast(pl.Utf8)) for file in files]
+df = pl.concat(list_df)
+df.write_csv("recap.txt", separator="\t")
diff --git a/benchmark/plots.r b/benchmark/plots.r
new file mode 100644
index 0000000..13dd0f2
--- /dev/null
+++ b/benchmark/plots.r
@@ -0,0 +1,68 @@
+install.packages("tidyverse")
+install.packages("ggplot2")
+library(tidyverse)
+library(ggplot2)
+library(dplyr)
+
+df <- read.csv("/home/mcroiset/HiC/benchmark/recap.txt", sep = "\t", header = TRUE)
+
+convert_time <- function(x){
+  parts <- strsplit(x, " ")
+  list_durations <- character(0)
+  for( part in parts ) {
+    sec <- 0
+    minute <- 0
+    hours <- 0
+    for ( single in part ) {
+      if (grepl('s', single, fixed = TRUE)) {
+        sec <- as.numeric(substr(single,1,nchar(single)-1))
+        sec <- sec/60
+        sec <- as.numeric(format(round(sec, 3), nsmall = 3))
+        #print(sec)
+      }
+      if (grepl('m', single, fixed = TRUE)) {
+        minute <- as.numeric(substr(single,1,nchar(single)-1))
+        minute <- as.numeric(format(round(minute, 2), nsmall = 2))
+        #print(minute)
+      }
+      if (grepl('h', single, fixed = TRUE)) {
+        hours <- as.numeric(substr(single,1,nchar(single)-1))
+        hours <- hours*60
+        hours <- as.numeric(format(round(hours, 2), nsmall = 2))
+        #print(hours)
+        #print(single)
+      }
+      #print(sec)
+      #print(minute)
+      #print(hours)
+    }
+    new_duration <- (hours+minute+sec)
+    list_durations <- append(list_durations,new_duration)
+    #print(new_duration)
+    #print(df$duration[i])
+    #df$duration[i] <- new_duration
+  }
+  #print(list_durations)
+  return(list_durations)
+}
+
+list_dur <- convert_time(df$duration)
+df$duration
+
+df$duration_minutes <- list_dur
+df$duration_minutes <- as.numeric(df$duration_minutes)
+
+val <- df %>% group_by(name) %>% summarise(moy = mean(duration_minutes), ect = sd(duration_minutes))
+
+ggplot(df, aes(x=name, y=duration_minutes))+
+  geom_violin()+
+  ylim(0,500)+
+  facet_wrap(~ name, ncol = 8)
+
+
+# for (x in df$duration_minutes) {
+#   print(class(x))
+# }
+
+# mean(df$duration_minutes)
+# sd(df$duration_minutes)
-- 
GitLab