Skip to content
Snippets Groups Projects
Verified Commit de0ac1b8 authored by Laurent Modolo's avatar Laurent Modolo
Browse files

update benchmark/plot.r and add plot_contact.r

parent 9d2d7f76
No related branches found
No related tags found
No related merge requests found
......@@ -32,25 +32,25 @@ convert_time <- function(x){
hours <- 0
mili <- 0
for ( single in part ) {
if (grepl('\\d+s', single)) #for seconds
if (grepl('\\d+s', single)) #for seconds
{
sec <- as.numeric(substr(single,1,nchar(single)-1))
sec <- sec/60
sec <- as.numeric(format(round(sec, 3), nsmall = 3))
}
if (grepl('\\d+m$', single)) #for minutes
if (grepl('\\d+m$', single)) #for minutes
{
minute <- as.numeric(substr(single,1,nchar(single)-1))
minute <- as.numeric(format(round(minute, 2), nsmall = 2))
}
if (grepl('\\d+h', single)) #for days
if (grepl('\\d+h', single)) #for days
{
hours <- as.numeric(substr(single,1,nchar(single)-1))
hours <- hours*60
hours <- as.numeric(format(round(hours, 2), nsmall = 2))
}
if (grepl('\\d+ms', single)) #for milliseconds, may be equal to 0 already
if (grepl('\\d+ms', single)) #for milliseconds, may be equal to 0 already
{
mili <- as.numeric(substr(single,1,nchar(single)-2))
if (mili > 0) {
......@@ -169,7 +169,7 @@ ggplot(merge_df, aes(x = fct_reorder(name, order), y = duration_minutes, color =
facet_wrap(~ file, ncol = 7) +
xlab("Processes") +
ylab("Duration in minutes") +
theme(axis.text.x=element_blank(), axis.ticks.x=element_blank())+
theme(axis.text.x=element_blank(), axis.ticks.x=element_blank())+
ggtitle("Duration time of each process") +
scale_y_log10() +
theme_bw()
......@@ -183,7 +183,7 @@ ggplot(merge_df, aes(x = fct_reorder(name, order), y = realtime_minutes, color =
ylab("Duration in minutes") +
ggtitle("Real execution time of each process") +
scale_y_log10() +
theme_bw() +
theme_bw() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
ggplot(merge_df, aes(x = fct_reorder(name, order), y = realtime_minutes, color = categorie)) +
......@@ -212,7 +212,7 @@ ggplot(merge_df.ordered, aes(x = fct_reorder(name, order), y = cum_time, group =
xlab("Processes") +
ylab("Duration in minutes") +
ggtitle("Duration per conformation") +
theme_bw() +
theme_bw() +
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1)) +
geom_point(aes(fill = categorie), shape = 21, size = 2.5) +
scale_fill_manual(values = categories_colors) +
......@@ -227,7 +227,7 @@ ggplot(merge_df.ordered, aes(x = fct_reorder(name, order), y = cum_time, group =
#######################################
#get the 3 replicates matrices for all conformation
listFiles <- read.csv("/home/mcroiset/HiC/hic/benchmark/matrices.txt", sep = "\n")
listFiles <- read.csv("matrices.txt", sep = "\n")
listFiles2 <- read.csv("/home/mcroiset/HiC/hic/benchmark/matrices_rep2.txt", sep = "\n")
listFiles3 <- read.csv("/home/mcroiset/HiC/hic/benchmark/matrices_rep3.txt", sep = "\n")
......@@ -236,6 +236,7 @@ names(listFiles2) <- "File"
names(listFiles3) <- "File"
merge_lf <- bind_rows(listFiles, listFiles2, listFiles3)
merge_lf <- listFiles
#set the dataframe with the file and the number of contact (= number of line in the raw matrix)
df.counts <- tibble(File = character(), Counts = numeric())
......@@ -311,13 +312,13 @@ df.excludePicard <- df.filtered %>%
#plot the number of contacts per conformation, with or without Picard filtering
ggplot(df.filtered, aes(x = reorder(File,Counts), y = Counts, shape = align, color = filtering)) +
geom_point(size = 5) +
scale_y_log10() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))+
scale_y_log10() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))+
ggtitle("Number of contacts")
ggplot(df.excludePicard, aes(x = reorder(File,Counts), y = Counts, shape = align, color = filtering)) +
geom_point(size = 5) +
scale_y_log10() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))+
scale_y_log10() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))+
ggtitle("Number of contacts (without Picard)")
library(tidyverse)
library(LaF)
#######################################
# #
# PLOTS ON NUMBER OF CONTACTS #
# #
#######################################
path_root = "/Users/laurent/projects/physbio/hic/results/"
count_contact <- function(x) {
read_log(x, col_names = F)$X5[1]
}
data <- tibble(
file = paste0(path_root, "/", list.files(path = path_root, pattern =".*matrix_sparse\\.log", recursive = T))
) %>%
mutate(
name = str_extract(file, ".*\\/(.*_.*_[^\\/]*)\\/", group = 1),
correction = map(name, function(x){str_split(x, "_")[[1]][2]}),
counts = map(file, count_contact)
) %>%
unnest(c(correction, counts)) %>%
mutate(
algorithm = ifelse(str_detect(correction, ".*cutsite.*"), "cutsite", ifelse(str_detect(correction, ".*parasplit.*"), "parasplit", NA)),
option = ifelse(str_detect(correction, ".*fr.*"), "forward-backward", ifelse(str_detect(correction, ".*cutsite.*"), "forward-backward", "all")),
option = ifelse(str_detect(correction, ".*seed0.*"), paste0(option, ", seed=0"), paste0(option, ", seed=20")),
name = paste(algorithm, option)
)
save(data, file = "number_of_contacts.Rdata")
ggplot(data, aes(x = reorder(name,counts), y = counts, color = algorithm, shape = option)) +
geom_point(size = 5) +
scale_y_log10() +
theme_bw() +
theme(axis.title.x=element_blank(),
axis.text.x=element_blank(),
axis.ticks.x=element_blank()) +
ggtitle("Number of contacts")
ggsave("number_of_contacts.pdf")
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment