diff --git a/src/.docker_modules/r-bolero/1.0/Junctions_NanoSplicer.R b/src/.docker_modules/r-bolero/1.0/Junctions_NanoSplicer.R index 835a0f9d86a96a6d0110c10ac9fda184a7650ff7..0efefc1e77e8d72d214d28d18d6a4ff9c7f05bc6 100644 --- a/src/.docker_modules/r-bolero/1.0/Junctions_NanoSplicer.R +++ b/src/.docker_modules/r-bolero/1.0/Junctions_NanoSplicer.R @@ -2,6 +2,7 @@ ################################################################################ ### NEED TO ADD A CASE OF NO SPLICED-VARIANTS ARE IDENTIFIED !!!!!!!!!!!!!!! ### +### PROBLEM IF THERE IS ONLY MULTIPLE JUNCTIONS CONTAINING READS ############### ################################################################################ library(ggplot2, quietly = TRUE) library(tidyr, quietly = TRUE) @@ -25,8 +26,16 @@ colnames(reads_pos) <- c("id", reads_pos[1,2:length(reads_pos[1,])]) reads_pos <- reads_pos[2:length(reads_pos$id),] # Load Nanosplicer results: -df <- read.csv(opt$jwr) -colnames(df)[1] <- "juncNumber" +if (file.exists(opt$jwr)) { + df <- read.csv(opt$jwr) + colnames(df)[1] <- "juncNumber" +} else { # Define column names + column_names <- c("juncNumber", "id", "mapQ", "transcript_strand", "chrID", "loc", "JAQ") + + # Create an empty dataframe + df <- data.frame(matrix(ncol = length(column_names), nrow = 0)) + colnames(df) <- column_names +} # split donor and acceptor positions: df <- df %>% @@ -164,15 +173,14 @@ SP_assignation_single <- function(site, promoter) { } } + #dplyr version: -single_junction <- single_junction %>% dplyr::group_by(id) %>% - dplyr::mutate(SP_name = SP_assignation_single(junction, promoter)) +single_junction <- single_junction %>% dplyr::group_by(id) -#plyr version -# single_junction <- ddply(single_junction, -# .(id), -# mutate, -# SP_name = SP_assignation_single(junction, promoter)) +if (length(single_junction$promoter) != 0) { + single_junction <- single_junction %>% + dplyr::mutate(SP_name = SP_assignation_single(junction, promoter)) +} else { single_junction$SP_name = "" } SP_assignation_multiple <- function(read_id, combinaison, promoter) { if (promoter == "pgRNA" | promoter == "preCore") { @@ -283,7 +291,9 @@ df_combinaison <- data.frame(matrix(nrow = 0, ncol = 2)) colnames(df_combinaison) <- c("id", "SP_name") for (read_id in list_read_multiple) { - SP_name_computed <- SP_assignation_multiple(read_id, tmp[tmp$id == read_id,]$junction, tmp[tmp$id == read_id,]$promoter[1]) + SP_name_computed <- SP_assignation_multiple(read_id, + tmp[tmp$id == read_id,]$junction, + tmp[tmp$id == read_id,]$promoter[1]) res_vector <- data.frame(t(c(read_id, SP_name_computed))) colnames(res_vector) <- colnames(df_combinaison) df_combinaison <- rbind(df_combinaison, res_vector) @@ -296,7 +306,8 @@ multiple_junction <- merge(multiple_junction, df_combinaison, by="id") df_SPvariants <- rbind(as.data.frame(single_junction), multiple_junction) SP_variant_unique <- df_SPvariants %>% select(id, SP_name) -SP_variant_unique <- SP_variant_unique[!duplicated(SP_variant_unique$id),] # distinct(SP_variant_unique, id) +SP_variant_unique <- SP_variant_unique[!duplicated(SP_variant_unique$id),] +# distinct(SP_variant_unique, id) write.table(df_SPvariants, paste0(opt$barcode, "_identified_SPvariants.csv"),