From 5dd5017cb64105d51a8536d53d1ca9a16239f9f7 Mon Sep 17 00:00:00 2001 From: Mia Croiset <mia.croiset@ens-lyon.fr> Date: Mon, 24 Jun 2024 17:11:59 +0200 Subject: [PATCH] order process by category and pipeline order --- benchmark/plots.r | 56 ++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 50 insertions(+), 6 deletions(-) diff --git a/benchmark/plots.r b/benchmark/plots.r index 4e6d6d1..28a0609 100644 --- a/benchmark/plots.r +++ b/benchmark/plots.r @@ -3,6 +3,7 @@ install.packages("ggplot2") library(tidyverse) library(ggplot2) library(dplyr) +library(forcats) df <- read.csv("/home/mcroiset/HiC/benchmark/recap.txt", sep = "\t", header = TRUE) @@ -79,7 +80,50 @@ df$file <- filenames important_processes <- c(15, 16, 17, 18, 7, 1, 1, 1, 1, 1, 1, 1, 1,1,1,1,1,9,1,2,5,6,1,1,1,1,1,15,11,1,1,1,1,12,1,1,1,1,1) -pdf("process_time.pdf") +categories <- character(0) +for (x in df$name) { + if (grepl('\\w*_ALIGN\\w*', x) || grepl('\\w*TRIM\\w*', x) || grepl('MERGE_BOWTIE2', x)) { + categories <- append(categories, "align") + } + else if (grepl('\\w*COOLER\\w*', x)) { + categories <- append(categories, "cooler") + } + else if (grepl('\\w*PAIRS\\w*', x)) { + categories<- append(categories, "pairs") + } + else if (grepl('\\w*MATRIX\\w*', x)) { + categories<- append(categories, "matrix") + } + else if (grepl('\\w*FILTER\\w*', x) || grepl('\\w*PICARD\\w*', x) || grepl('\\w*SAMTOOLS\\w*', x)) { + categories<- append(categories, "filter") + } + else if (grepl('\\w*CUTSITE\\w*', x)) { + categories<- append(categories, "cutsite") + } + else if (grepl('\\w*ITERALIGN\\w*', x)) { + categories<- append(categories, "iteralign") + } + else if (grepl('\\w*SAMPLESHEET\\w*', x) || (grepl('BOWTIE2_BUILD', x) || grepl('\\w*GETCHROM\\w*', x) || grepl('\\w*GET_RESTRIC\\w*', x))) { + categories<- append(categories, "data_prep") + } + else { + categories <- append(categories, "n") + } +} +# categories +df$categorie <- categories + +print(levels(as.factor(df$name))) +ordered_processes <- tibble(levels(as.factor(df$name))) +order <- c(18, 7, 8, 7, 2, 20, 21, 14, 24, 23, 27, 22, 25, 30, 31, 32, 3, 6, 5, 19, 13, 19, 9, 4, 15, 28, 29, 18, 7, 8, 17, 16, 33, 11, 1, 10, 12, 27, 8) +ordered_processes <- ordered_processes %>% add_column(order = order) +ordered_processes <- rename(ordered_processes, name = `levels(as.factor(df$name))`) + +df <- left_join(df, ordered_processes, by= c("name" = "name")) + + + +# pdf("process_time.pdf") ggplot(df, aes(x = name, y = duration_minutes)) + geom_point(aes(color = name, shape = name)) + @@ -91,15 +135,15 @@ ggplot(df, aes(x = name, y = duration_minutes)) + theme(axis.text.x=element_blank(), axis.ticks.x=element_blank()) + ggtitle("Duration time of each process") -ggplot(df, aes(x = name, y = realtime_minutes)) + - geom_point(aes(color = name, shape = name)) + +ggplot(df, aes(x = fct_reorder(name, order), y = realtime_minutes)) + + geom_point(aes(color = categorie)) + scale_shape_manual(values = important_processes) + facet_wrap(~ file, ncol = 8) + - scale_y_continuous(breaks = seq(0, 490, by = 30)) + xlab("Processes") + ylab("Duration in minutes") + theme(axis.text.x=element_blank(), axis.ticks.x=element_blank())+ - ggtitle("Real execution time of each process") + ggtitle("Real execution time of each process") + + scale_y_log10() -dev.off() +# dev.off() -- GitLab