diff --git a/benchmark/plots.r b/benchmark/plots.r index 07e84b39b395ef5d719ff14f83e8c1083ba64ccf..f44b775d38aaadb59168effae0c2aee35741f344 100644 --- a/benchmark/plots.r +++ b/benchmark/plots.r @@ -32,25 +32,25 @@ convert_time <- function(x){ hours <- 0 mili <- 0 for ( single in part ) { - if (grepl('\\d+s', single)) #for seconds + if (grepl('\\d+s', single)) #for seconds { sec <- as.numeric(substr(single,1,nchar(single)-1)) sec <- sec/60 sec <- as.numeric(format(round(sec, 3), nsmall = 3)) } - if (grepl('\\d+m$', single)) #for minutes + if (grepl('\\d+m$', single)) #for minutes { minute <- as.numeric(substr(single,1,nchar(single)-1)) minute <- as.numeric(format(round(minute, 2), nsmall = 2)) } - if (grepl('\\d+h', single)) #for days + if (grepl('\\d+h', single)) #for days { hours <- as.numeric(substr(single,1,nchar(single)-1)) hours <- hours*60 hours <- as.numeric(format(round(hours, 2), nsmall = 2)) } - if (grepl('\\d+ms', single)) #for milliseconds, may be equal to 0 already + if (grepl('\\d+ms', single)) #for milliseconds, may be equal to 0 already { mili <- as.numeric(substr(single,1,nchar(single)-2)) if (mili > 0) { @@ -169,7 +169,7 @@ ggplot(merge_df, aes(x = fct_reorder(name, order), y = duration_minutes, color = facet_wrap(~ file, ncol = 7) + xlab("Processes") + ylab("Duration in minutes") + - theme(axis.text.x=element_blank(), axis.ticks.x=element_blank())+ + theme(axis.text.x=element_blank(), axis.ticks.x=element_blank())+ ggtitle("Duration time of each process") + scale_y_log10() + theme_bw() @@ -183,7 +183,7 @@ ggplot(merge_df, aes(x = fct_reorder(name, order), y = realtime_minutes, color = ylab("Duration in minutes") + ggtitle("Real execution time of each process") + scale_y_log10() + - theme_bw() + + theme_bw() + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) ggplot(merge_df, aes(x = fct_reorder(name, order), y = realtime_minutes, color = categorie)) + @@ -212,7 +212,7 @@ ggplot(merge_df.ordered, aes(x = fct_reorder(name, order), y = cum_time, group = xlab("Processes") + ylab("Duration in minutes") + ggtitle("Duration per conformation") + - theme_bw() + + theme_bw() + theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1)) + geom_point(aes(fill = categorie), shape = 21, size = 2.5) + scale_fill_manual(values = categories_colors) + @@ -227,7 +227,7 @@ ggplot(merge_df.ordered, aes(x = fct_reorder(name, order), y = cum_time, group = ####################################### #get the 3 replicates matrices for all conformation -listFiles <- read.csv("/home/mcroiset/HiC/hic/benchmark/matrices.txt", sep = "\n") +listFiles <- read.csv("matrices.txt", sep = "\n") listFiles2 <- read.csv("/home/mcroiset/HiC/hic/benchmark/matrices_rep2.txt", sep = "\n") listFiles3 <- read.csv("/home/mcroiset/HiC/hic/benchmark/matrices_rep3.txt", sep = "\n") @@ -236,6 +236,7 @@ names(listFiles2) <- "File" names(listFiles3) <- "File" merge_lf <- bind_rows(listFiles, listFiles2, listFiles3) +merge_lf <- listFiles #set the dataframe with the file and the number of contact (= number of line in the raw matrix) df.counts <- tibble(File = character(), Counts = numeric()) @@ -311,13 +312,13 @@ df.excludePicard <- df.filtered %>% #plot the number of contacts per conformation, with or without Picard filtering ggplot(df.filtered, aes(x = reorder(File,Counts), y = Counts, shape = align, color = filtering)) + geom_point(size = 5) + - scale_y_log10() + - theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))+ + scale_y_log10() + + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))+ ggtitle("Number of contacts") ggplot(df.excludePicard, aes(x = reorder(File,Counts), y = Counts, shape = align, color = filtering)) + geom_point(size = 5) + - scale_y_log10() + - theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))+ + scale_y_log10() + + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))+ ggtitle("Number of contacts (without Picard)") diff --git a/benchmark/plots_contact.r b/benchmark/plots_contact.r new file mode 100644 index 0000000000000000000000000000000000000000..82ba61e698873690a497542a17679fe6b2878074 --- /dev/null +++ b/benchmark/plots_contact.r @@ -0,0 +1,44 @@ +library(tidyverse) +library(LaF) +####################################### +# # +# PLOTS ON NUMBER OF CONTACTS # +# # +####################################### + +path_root = "/Users/laurent/projects/physbio/hic/results/" + +count_contact <- function(x) { + read_log(x, col_names = F)$X5[1] +} + +data <- tibble( + file = paste0(path_root, "/", list.files(path = path_root, pattern =".*matrix_sparse\\.log", recursive = T)) +) %>% + mutate( + name = str_extract(file, ".*\\/(.*_.*_[^\\/]*)\\/", group = 1), + correction = map(name, function(x){str_split(x, "_")[[1]][2]}), + counts = map(file, count_contact) + ) %>% + unnest(c(correction, counts)) %>% + mutate( + algorithm = ifelse(str_detect(correction, ".*cutsite.*"), "cutsite", ifelse(str_detect(correction, ".*parasplit.*"), "parasplit", NA)), + option = ifelse(str_detect(correction, ".*fr.*"), "forward-backward", ifelse(str_detect(correction, ".*cutsite.*"), "forward-backward", "all")), + option = ifelse(str_detect(correction, ".*seed0.*"), paste0(option, ", seed=0"), paste0(option, ", seed=20")), + name = paste(algorithm, option) + ) + + +save(data, file = "number_of_contacts.Rdata") + + +ggplot(data, aes(x = reorder(name,counts), y = counts, color = algorithm, shape = option)) + + geom_point(size = 5) + + scale_y_log10() + + theme_bw() + + theme(axis.title.x=element_blank(), + axis.text.x=element_blank(), + axis.ticks.x=element_blank()) + + ggtitle("Number of contacts") +ggsave("number_of_contacts.pdf") +