diff --git a/src/.docker_modules/r-bolero/1.0/HBV_RNAs_count.R b/src/.docker_modules/r-bolero/1.0/HBV_RNAs_count.R index ea15c73677d5bd5d9a21cd23eee4d02a6d4c0fad..16d67f3f8dd8a9161ff7006c410bda1291d45e63 100644 --- a/src/.docker_modules/r-bolero/1.0/HBV_RNAs_count.R +++ b/src/.docker_modules/r-bolero/1.0/HBV_RNAs_count.R @@ -74,7 +74,7 @@ countSP <- dplyr::inner_join(palette_complete, #print(names(countSP)) countSP$nom <- factor(countSP$nom, levels = all_species_name) -countSP <- mutate(countSP, +countSP <- dplyr::mutate(countSP, proportion = (as.numeric(n)/sum(as.numeric(n))*100)) #print(countSP) ggplot(countSP, aes(x = "percent", @@ -112,7 +112,7 @@ classified_reads <- read.table(file = opt$classification, header = TRUE) not_spliced <- classified_reads[!(classified_reads$read_ID %in% clean_SP$id),] -not_spliced <- mutate(not_spliced, +not_spliced <- dplyr::mutate(not_spliced, species = not_spliced$promoter) #print(not_spliced) not_spliced <- not_spliced %>% select(read_ID, species) @@ -124,7 +124,7 @@ colnames(clean_SP_type) <- c("id", "species") df_species <- rbind.data.frame(not_spliced, clean_SP_type, stringsAsFactors = FALSE) count_species <- df_species %>% count(species) -count_species <- mutate(count_species, +count_species <- dplyr::mutate(count_species, percent = (as.numeric(n)/sum(as.numeric(n))*100)) #print(count_species) write.table(df_species, file = "All_reads_identified.csv", @@ -161,7 +161,7 @@ count_species_SPxx <- rbind.data.frame(count_species_SPxx, stringsAsFactors = FALSE) count_species_SPxx <- count_species_SPxx[count_species_SPxx$species %in% all_species_name[c(1:3,5,35)],] -count_species_SPxx <- mutate(count_species_SPxx, +count_species_SPxx <- dplyr::mutate(count_species_SPxx, percent=(as.numeric(n)/sum(as.numeric(n))*100)) #print(count_species_SPxx) # save the tab: @@ -229,7 +229,7 @@ ggsave(file = "Count_RNAs_species_clear.png", # SP composition clear: count_clear <- clean_SP[clean_SP$SP_name %in% SPvariants,] %>% count(SP_name) -count_clear <- mutate(count_clear, +count_clear <- dplyr::mutate(count_clear, proportion=(as.numeric(n)/sum(as.numeric(n))*100)) #print(count_clear) count_clear <- dplyr::inner_join(palette_complete, diff --git a/src/.docker_modules/r-bolero/1.0/Junctions_NanoSplicer.R b/src/.docker_modules/r-bolero/1.0/Junctions_NanoSplicer.R index bce38457601babac31813c62f3362a8736683671..5567a378b0c61e716092c77a7702f2360b2bf2b2 100644 --- a/src/.docker_modules/r-bolero/1.0/Junctions_NanoSplicer.R +++ b/src/.docker_modules/r-bolero/1.0/Junctions_NanoSplicer.R @@ -36,7 +36,7 @@ df <- df %>% df$donor <- str_replace(df$donor, '[(]', '') df$acceptor <- str_replace(df$acceptor, '[)]', '') -df <- mutate(df, +df <- dplyr::mutate(df, pg_donor = as.numeric(donor)-122, pg_acceptor = as.numeric(acceptor)-122) @@ -110,7 +110,7 @@ assignation_acceptor <- function(pg_acceptor) { df$donor_site <- sapply(df$pg_donor, assignation_donor) df$acceptor_site <- sapply(df$pg_acceptor, assignation_acceptor) -df <- mutate(df, +df <- dplyr::mutate(df, junction = paste0(donor_site, acceptor_site)) write.table(df, file = "JWR_check_parsed.csv", row.names = FALSE, sep = "\t") diff --git a/src/.docker_modules/r-bolero/1.0/Start_positions.R b/src/.docker_modules/r-bolero/1.0/Start_positions.R index 683336e0aadc3fa2dd71374f3e4f3a2b4a07c6f5..f31afaae99dbcfd8686576f070d979db06277800 100644 --- a/src/.docker_modules/r-bolero/1.0/Start_positions.R +++ b/src/.docker_modules/r-bolero/1.0/Start_positions.R @@ -47,9 +47,9 @@ parsingData <- function(df) { tmp$Start <- as.numeric(tmp$Start) df2 <- as_tibble(tmp) %>% - mutate(bin = round(Start/binsize)*binsize) %>% + dplyr::mutate(bin = round(Start/binsize)*binsize) %>% group_by(bin) %>% - summarize(nb_reads = sum(Freq, na.rm = T)) + dplyr::summarize(nb_reads = sum(Freq, na.rm = T)) df2[is.na(df2)] <- 0 df2[3] <- rep(df[1,3], length(df2$bin)) colnames(df2) <- c("Start_position", "nb_reads", "Barcode") diff --git a/src/bolero.nf b/src/bolero.nf index f0e8cc3d03697080fde815f4b959ec21617144a0..41e7bda50028b4e50f65d8e00f29b0cda06c1cc3 100755 --- a/src/bolero.nf +++ b/src/bolero.nf @@ -1,6 +1,7 @@ #!/usr/bin/env nextflow nextflow.enable.dsl=2 +//syntax extension DSL2 /* ======================================================================================================================== @@ -73,7 +74,7 @@ if (params.help || params.h) { **************************************************************** */ -/* params in */ +/* Params in */ params.skipBC = true params.gpu_mode = false @@ -100,6 +101,8 @@ params.seqkit_grep_out = "03_fastq/" params.cutadapt_out = "04_cutadapt/" params.minimap2_genome_out = "05_minimap2/" params.start_position_counts_out = "06_start_positions/" +params.nanosplicer_out = "07_nanosplicer/" +params.rna_count_out = "08_RNA_count/" params.pycoQC_out = "pycoQC/" /* @@ -108,6 +111,7 @@ params.pycoQC_out = "pycoQC/" **************************************************************** */ +//to print multiline informations log.info "fast5/q folder : ${params.input}" log.info "5'RACE adapter sequence : ${params.adapt}" if(!params.skipBC) log.info "Guppy basecalling calculation using GPU mode : ${params.gpu_mode}." @@ -161,13 +165,21 @@ if(!params.skipBC) { // Replace concatenate by seqkit fct to parallelization: // include { concatenate } from "./nf_modules/seqkit/main.nf" include { concatenate } from "./nf_modules/concatenate/main.nf" - include { cut_5pRACE } from "./nf_modules/cutadapt/main.nf" include { hbv_genome } from "./nf_modules/minimap2/main.nf" include { seqkit_grep } from "./nf_modules/seqkit/main.nf" -include { sort_bam as sort_bam_genome } from './nf_modules/samtools/main.nf' addParams(sort_bam_out: params.minimap2_genome_out) -include { index_bam as index_bam_genome } from './nf_modules/samtools/main.nf' addParams(index_bam_out: params.minimap2_genome_out) +include { sort_bam } from './nf_modules/samtools/main.nf' addParams(sort_bam_out: params.minimap2_genome_out) +include { index_bam } from './nf_modules/samtools/main.nf' addParams(index_bam_out: params.minimap2_genome_out) +include { sort_index_bam } from './nf_modules/samtools/main.nf' addParams(indexed_bam_out: params.minimap2_genome_out) include { start_position_counts } from "./nf_modules/samtools/main.nf" +include { start_position_individuals } from "./nf_modules/start_positions/main.nf" +include { jwr_checker } from "./nf_modules/nanosplicer/main.nf" +include { junctions_nanosplicer } from "./nf_modules/junction_nanosplicer/main.nf" +include { rna_count } from "./nf_modules/rna_count/main.nf" + +///////////////////////////////////////////////////////// +// script R avec classification des reads par type d'ARN et graphiques associés + // creation des fonctions NanoSplicer: // include { jwr_check } from "./nf_modules/nanosplicer/main.nf" @@ -200,19 +212,33 @@ workflow { } //####################### PREPROCESSING ####################### - /* + + + //Filtration (seqkit_grep looks for the 5'RACE and the gsp patterns in the reads to keep only mature ARNs) seqkit_grep(concatenate.out.merged_fastq, params.adapt, params.gsp) + + //Cut of the 5'RACE sequence cut_5pRACE(seqkit_grep.out.filtered_fastq, params.adapt) //########################## MAPPING ########################## - + + hbv_genome(cut_5pRACE.out.fastq_cutadapt, genome) - sort_bam_genome(hbv_genome.out.bam) - index_bam_genome(sort_bam_genome.out.sorted_bam.collect()) + sort_index_bam(hbv_genome.out.bam) + // index_bam(sort_bam_genome.out.sorted_bam.collect()) - //###################### QUANTIFICATION ####################### + //###################### START POSITIONS ####################### - start_position_counts(sort_bam_genome.out.sorted_bam) - */ + start_position_counts(sort_index_bam.out.indexed_bam) + start_position_individuals(start_position_counts.out.count) -} \ No newline at end of file + //#################### VARIANTS D'EPISSAGE #################### + + jwr_checker(sort_index_bam.out.indexed_bam) + junctions_nanosplicer(start_position_individuals.out.classification_of_reads, jwr_checker.out.nanosplicer_jwr) + + //#################### VARIANTS D'EPISSAGE #################### + + rna_count(junctions_nanosplicer.out.identified_SPvariants, start_position_individuals.out.classification_of_reads) + +} diff --git a/src/nf_modules/junction_nanosplicer/main.nf b/src/nf_modules/junction_nanosplicer/main.nf index 4af81e2a33592f4ec2f53a9c137c365f58b94eba..9000bc89d64e21522d056647e9867bca66a8ec15 100644 --- a/src/nf_modules/junction_nanosplicer/main.nf +++ b/src/nf_modules/junction_nanosplicer/main.nf @@ -1,13 +1,13 @@ version = "1.0" -container_url = "xgrand/r-scripts:${version}" +container_url = "xgrand/r-bolero:${version}" -params.junctions_out = "" +params.nanosplicer_out = "" process junctions_nanosplicer{ container = "${container_url}" label "small_mem_mono_cpus" tag "identification de variants d'épissage" - if (params.junctions_out != "") { - publishDir "results/${params.junctions_out}", mode: 'copy' + if (params.nanosplicer_out != "") { + publishDir "results/${params.nanosplicer_out}", mode: 'copy' } input: @@ -17,10 +17,11 @@ process junctions_nanosplicer{ output: path("Rplots.pdf") path("JWR_check_parsed.csv") + path("*.jpg") path("identified_SPvariants.csv"), emit: identified_SPvariants script: """ - Rscript Junctions_NanoSplicer.R -c txt -j csv + Rscript /Junctions_NanoSplicer.R -c ${txt} -j ${csv} """ } \ No newline at end of file diff --git a/src/nf_modules/ont-guppy/main.fr b/src/nf_modules/ont-guppy/main.nf similarity index 100% rename from src/nf_modules/ont-guppy/main.fr rename to src/nf_modules/ont-guppy/main.nf diff --git a/src/nf_modules/rna_count/main.nf b/src/nf_modules/rna_count/main.nf index 5899f2ed15d6e833006b1e799394ab2346972bcf..4d9b6f40308b9267327d7e3d032757b59cb686f2 100644 --- a/src/nf_modules/rna_count/main.nf +++ b/src/nf_modules/rna_count/main.nf @@ -1,5 +1,5 @@ version = "1.0" -container_url = "xgrand/r-scripts:${version}" +container_url = "xgrand/r-bolero:${version}" params.rna_count_out = "" process rna_count{ @@ -17,9 +17,10 @@ process rna_count{ output: path("*.csv") path("*.pdf") + path("*.jpg") script: """ - Rscript HBV_RNAs_count.R -s spvariants -c classification + Rscript /HBV_RNAs_count.R -s ${spvariants} -c ${classification} """ } diff --git a/src/nf_modules/start_positions/main.nf b/src/nf_modules/start_positions/main.nf index 4a29e9fed7d7dd37dfa1610086c2232e13250794..4b97e0c562d341772229efaed144f31e8ceb53f5 100644 --- a/src/nf_modules/start_positions/main.nf +++ b/src/nf_modules/start_positions/main.nf @@ -1,7 +1,7 @@ version = "1.0" -container_url = "xgrand/r-scripts:${version}" +container_url = "xgrand/r-bolero:${version}" -params.start_position_counts_out = "" +params.start_position_counts_out ="" process start_position_individuals{ container = "${container_url}" label "small_mem_mono_cpus" @@ -9,17 +9,17 @@ process start_position_individuals{ if (params.start_position_counts_out != "") { publishDir "results/${params.start_position_counts_out}", mode: 'copy' } - input: path(start_position_counts) output: path("Rplots.pdf") + path("*.jpg") path("Count_reads_per_promoter.tsv") path("classification_of_reads_per_RNA.txt"), emit: classification_of_reads script: """ - Rscript start_positions.R -i start_position_counts + Rscript /Start_positions.R -i ${start_position_counts} """ } \ No newline at end of file