Skip to content
Snippets Groups Projects
Verified Commit 5dd5017c authored by Mia Croiset's avatar Mia Croiset
Browse files

order process by category and pipeline order

parent 96eb4d6e
No related branches found
No related tags found
No related merge requests found
...@@ -3,6 +3,7 @@ install.packages("ggplot2") ...@@ -3,6 +3,7 @@ install.packages("ggplot2")
library(tidyverse) library(tidyverse)
library(ggplot2) library(ggplot2)
library(dplyr) library(dplyr)
library(forcats)
df <- read.csv("/home/mcroiset/HiC/benchmark/recap.txt", sep = "\t", header = TRUE) df <- read.csv("/home/mcroiset/HiC/benchmark/recap.txt", sep = "\t", header = TRUE)
...@@ -79,7 +80,50 @@ df$file <- filenames ...@@ -79,7 +80,50 @@ df$file <- filenames
important_processes <- c(15, 16, 17, 18, 7, 1, 1, 1, 1, 1, 1, 1, 1,1,1,1,1,9,1,2,5,6,1,1,1,1,1,15,11,1,1,1,1,12,1,1,1,1,1) important_processes <- c(15, 16, 17, 18, 7, 1, 1, 1, 1, 1, 1, 1, 1,1,1,1,1,9,1,2,5,6,1,1,1,1,1,15,11,1,1,1,1,12,1,1,1,1,1)
pdf("process_time.pdf") categories <- character(0)
for (x in df$name) {
if (grepl('\\w*_ALIGN\\w*', x) || grepl('\\w*TRIM\\w*', x) || grepl('MERGE_BOWTIE2', x)) {
categories <- append(categories, "align")
}
else if (grepl('\\w*COOLER\\w*', x)) {
categories <- append(categories, "cooler")
}
else if (grepl('\\w*PAIRS\\w*', x)) {
categories<- append(categories, "pairs")
}
else if (grepl('\\w*MATRIX\\w*', x)) {
categories<- append(categories, "matrix")
}
else if (grepl('\\w*FILTER\\w*', x) || grepl('\\w*PICARD\\w*', x) || grepl('\\w*SAMTOOLS\\w*', x)) {
categories<- append(categories, "filter")
}
else if (grepl('\\w*CUTSITE\\w*', x)) {
categories<- append(categories, "cutsite")
}
else if (grepl('\\w*ITERALIGN\\w*', x)) {
categories<- append(categories, "iteralign")
}
else if (grepl('\\w*SAMPLESHEET\\w*', x) || (grepl('BOWTIE2_BUILD', x) || grepl('\\w*GETCHROM\\w*', x) || grepl('\\w*GET_RESTRIC\\w*', x))) {
categories<- append(categories, "data_prep")
}
else {
categories <- append(categories, "n")
}
}
# categories
df$categorie <- categories
print(levels(as.factor(df$name)))
ordered_processes <- tibble(levels(as.factor(df$name)))
order <- c(18, 7, 8, 7, 2, 20, 21, 14, 24, 23, 27, 22, 25, 30, 31, 32, 3, 6, 5, 19, 13, 19, 9, 4, 15, 28, 29, 18, 7, 8, 17, 16, 33, 11, 1, 10, 12, 27, 8)
ordered_processes <- ordered_processes %>% add_column(order = order)
ordered_processes <- rename(ordered_processes, name = `levels(as.factor(df$name))`)
df <- left_join(df, ordered_processes, by= c("name" = "name"))
# pdf("process_time.pdf")
ggplot(df, aes(x = name, y = duration_minutes)) + ggplot(df, aes(x = name, y = duration_minutes)) +
geom_point(aes(color = name, shape = name)) + geom_point(aes(color = name, shape = name)) +
...@@ -91,15 +135,15 @@ ggplot(df, aes(x = name, y = duration_minutes)) + ...@@ -91,15 +135,15 @@ ggplot(df, aes(x = name, y = duration_minutes)) +
theme(axis.text.x=element_blank(), axis.ticks.x=element_blank()) + theme(axis.text.x=element_blank(), axis.ticks.x=element_blank()) +
ggtitle("Duration time of each process") ggtitle("Duration time of each process")
ggplot(df, aes(x = name, y = realtime_minutes)) + ggplot(df, aes(x = fct_reorder(name, order), y = realtime_minutes)) +
geom_point(aes(color = name, shape = name)) + geom_point(aes(color = categorie)) +
scale_shape_manual(values = important_processes) + scale_shape_manual(values = important_processes) +
facet_wrap(~ file, ncol = 8) + facet_wrap(~ file, ncol = 8) +
scale_y_continuous(breaks = seq(0, 490, by = 30)) +
xlab("Processes") + xlab("Processes") +
ylab("Duration in minutes") + ylab("Duration in minutes") +
theme(axis.text.x=element_blank(), axis.ticks.x=element_blank())+ theme(axis.text.x=element_blank(), axis.ticks.x=element_blank())+
ggtitle("Real execution time of each process") ggtitle("Real execution time of each process") +
scale_y_log10()
dev.off() # dev.off()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment