Skip to content
Snippets Groups Projects
Verified Commit cb6f4067 authored by Mia Croiset's avatar Mia Croiset
Browse files

Merge branch 'master' of gitbio.ens-lyon.fr:LBMC/hub/hic

parents 776f5128 de0ac1b8
No related branches found
No related tags found
No related merge requests found
...@@ -34,25 +34,25 @@ convert_time <- function(x){ ...@@ -34,25 +34,25 @@ convert_time <- function(x){
hours <- 0 hours <- 0
mili <- 0 mili <- 0
for ( single in part ) { for ( single in part ) {
if (grepl('\\d+s', single)) #for seconds if (grepl('\\d+s', single)) #for seconds
{ {
sec <- as.numeric(substr(single,1,nchar(single)-1)) sec <- as.numeric(substr(single,1,nchar(single)-1))
sec <- sec/60 sec <- sec/60
sec <- as.numeric(format(round(sec, 3), nsmall = 3)) sec <- as.numeric(format(round(sec, 3), nsmall = 3))
} }
if (grepl('\\d+m$', single)) #for minutes if (grepl('\\d+m$', single)) #for minutes
{ {
minute <- as.numeric(substr(single,1,nchar(single)-1)) minute <- as.numeric(substr(single,1,nchar(single)-1))
minute <- as.numeric(format(round(minute, 2), nsmall = 2)) minute <- as.numeric(format(round(minute, 2), nsmall = 2))
} }
if (grepl('\\d+h', single)) #for days if (grepl('\\d+h', single)) #for days
{ {
hours <- as.numeric(substr(single,1,nchar(single)-1)) hours <- as.numeric(substr(single,1,nchar(single)-1))
hours <- hours*60 hours <- hours*60
hours <- as.numeric(format(round(hours, 2), nsmall = 2)) hours <- as.numeric(format(round(hours, 2), nsmall = 2))
} }
if (grepl('\\d+ms', single)) #for milliseconds, may be equal to 0 already if (grepl('\\d+ms', single)) #for milliseconds, may be equal to 0 already
{ {
mili <- as.numeric(substr(single,1,nchar(single)-2)) mili <- as.numeric(substr(single,1,nchar(single)-2))
if (mili > 0) { if (mili > 0) {
...@@ -210,7 +210,7 @@ ggplot(merge_df, aes(x = fct_reorder(name, order), y = duration_minutes, color = ...@@ -210,7 +210,7 @@ ggplot(merge_df, aes(x = fct_reorder(name, order), y = duration_minutes, color =
facet_wrap(~ file, ncol = 7) + facet_wrap(~ file, ncol = 7) +
xlab("Processes") + xlab("Processes") +
ylab("Duration in minutes") + ylab("Duration in minutes") +
theme(axis.text.x=element_blank(), axis.ticks.x=element_blank())+ theme(axis.text.x=element_blank(), axis.ticks.x=element_blank())+
ggtitle("Duration time of each process") + ggtitle("Duration time of each process") +
scale_y_log10() + scale_y_log10() +
theme_bw() theme_bw()
...@@ -224,7 +224,7 @@ ggplot(merge_df, aes(x = fct_reorder(name, order), y = realtime_minutes, color = ...@@ -224,7 +224,7 @@ ggplot(merge_df, aes(x = fct_reorder(name, order), y = realtime_minutes, color =
ylab("Duration in minutes") + ylab("Duration in minutes") +
ggtitle("Real execution time of each process") + ggtitle("Real execution time of each process") +
scale_y_log10() + scale_y_log10() +
theme_bw() + theme_bw() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
ggplot(merge_df, aes(x = fct_reorder(name, order), y = realtime_minutes, color = categorie)) + ggplot(merge_df, aes(x = fct_reorder(name, order), y = realtime_minutes, color = categorie)) +
...@@ -253,7 +253,7 @@ ggplot(merge_df.ordered, aes(x = fct_reorder(name, order), y = cum_time, group = ...@@ -253,7 +253,7 @@ ggplot(merge_df.ordered, aes(x = fct_reorder(name, order), y = cum_time, group =
xlab("Processes") + xlab("Processes") +
ylab("Duration in minutes") + ylab("Duration in minutes") +
ggtitle("Duration per conformation") + ggtitle("Duration per conformation") +
theme_bw() + theme_bw() +
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1)) + theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1)) +
geom_point(aes(fill = categorie), shape = 21, size = 2.5) + geom_point(aes(fill = categorie), shape = 21, size = 2.5) +
scale_fill_manual(values = categories_colors) + scale_fill_manual(values = categories_colors) +
...@@ -366,13 +366,13 @@ df.excludePicard <- df.filtered %>% ...@@ -366,13 +366,13 @@ df.excludePicard <- df.filtered %>%
#plot the number of contacts per conformation, with or without Picard filtering #plot the number of contacts per conformation, with or without Picard filtering
ggplot(df.filtered, aes(x = reorder(File,Counts), y = Counts, shape = align, color = filtering)) + ggplot(df.filtered, aes(x = reorder(File,Counts), y = Counts, shape = align, color = filtering)) +
geom_point(size = 5) + geom_point(size = 5) +
scale_y_log10() + scale_y_log10() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))+ theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))+
ggtitle("Number of contacts") ggtitle("Number of contacts")
ggplot(df.excludePicard, aes(x = reorder(File,Counts), y = Counts, shape = align, color = filtering)) + ggplot(df.excludePicard, aes(x = reorder(File,Counts), y = Counts, shape = align, color = filtering)) +
geom_point(size = 5) + geom_point(size = 5) +
scale_y_log10() + scale_y_log10() +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))+ theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))+
ggtitle("Number of contacts (without Picard)") ggtitle("Number of contacts (without Picard)")
library(tidyverse)
library(LaF)
#######################################
# #
# PLOTS ON NUMBER OF CONTACTS #
# #
#######################################
path_root = "/Users/laurent/projects/physbio/hic/results/"
count_contact <- function(x) {
read_log(x, col_names = F)$X5[1]
}
data <- tibble(
file = paste0(path_root, "/", list.files(path = path_root, pattern =".*matrix_sparse\\.log", recursive = T))
) %>%
mutate(
name = str_extract(file, ".*\\/(.*_.*_[^\\/]*)\\/", group = 1),
correction = map(name, function(x){str_split(x, "_")[[1]][2]}),
counts = map(file, count_contact)
) %>%
unnest(c(correction, counts)) %>%
mutate(
algorithm = ifelse(str_detect(correction, ".*cutsite.*"), "cutsite", ifelse(str_detect(correction, ".*parasplit.*"), "parasplit", NA)),
option = ifelse(str_detect(correction, ".*fr.*"), "forward-backward", ifelse(str_detect(correction, ".*cutsite.*"), "forward-backward", "all")),
option = ifelse(str_detect(correction, ".*seed0.*"), paste0(option, ", seed=0"), paste0(option, ", seed=20")),
name = paste(algorithm, option)
)
save(data, file = "number_of_contacts.Rdata")
ggplot(data, aes(x = reorder(name,counts), y = counts, color = algorithm, shape = option)) +
geom_point(size = 5) +
scale_y_log10() +
theme_bw() +
theme(axis.title.x=element_blank(),
axis.text.x=element_blank(),
axis.ticks.x=element_blank()) +
ggtitle("Number of contacts")
ggsave("number_of_contacts.pdf")
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment