Skip to content
Snippets Groups Projects
Commit 62517165 authored by Xavier Grand's avatar Xavier Grand
Browse files

Merge branch 'Alia' into 'master'

Alia

See merge request testoni-lab/bolero!3
parents 9bffecff f048b1cc
No related branches found
No related tags found
No related merge requests found
...@@ -74,7 +74,7 @@ countSP <- dplyr::inner_join(palette_complete, ...@@ -74,7 +74,7 @@ countSP <- dplyr::inner_join(palette_complete,
#print(names(countSP)) #print(names(countSP))
countSP$nom <- factor(countSP$nom, levels = all_species_name) countSP$nom <- factor(countSP$nom, levels = all_species_name)
countSP <- mutate(countSP, countSP <- dplyr::mutate(countSP,
proportion = (as.numeric(n)/sum(as.numeric(n))*100)) proportion = (as.numeric(n)/sum(as.numeric(n))*100))
#print(countSP) #print(countSP)
ggplot(countSP, aes(x = "percent", ggplot(countSP, aes(x = "percent",
...@@ -112,7 +112,7 @@ classified_reads <- read.table(file = opt$classification, ...@@ -112,7 +112,7 @@ classified_reads <- read.table(file = opt$classification,
header = TRUE) header = TRUE)
not_spliced <- classified_reads[!(classified_reads$read_ID %in% clean_SP$id),] not_spliced <- classified_reads[!(classified_reads$read_ID %in% clean_SP$id),]
not_spliced <- mutate(not_spliced, not_spliced <- dplyr::mutate(not_spliced,
species = not_spliced$promoter) species = not_spliced$promoter)
#print(not_spliced) #print(not_spliced)
not_spliced <- not_spliced %>% select(read_ID, species) not_spliced <- not_spliced %>% select(read_ID, species)
...@@ -124,7 +124,7 @@ colnames(clean_SP_type) <- c("id", "species") ...@@ -124,7 +124,7 @@ colnames(clean_SP_type) <- c("id", "species")
df_species <- rbind.data.frame(not_spliced, clean_SP_type, df_species <- rbind.data.frame(not_spliced, clean_SP_type,
stringsAsFactors = FALSE) stringsAsFactors = FALSE)
count_species <- df_species %>% count(species) count_species <- df_species %>% count(species)
count_species <- mutate(count_species, count_species <- dplyr::mutate(count_species,
percent = (as.numeric(n)/sum(as.numeric(n))*100)) percent = (as.numeric(n)/sum(as.numeric(n))*100))
#print(count_species) #print(count_species)
write.table(df_species, file = "All_reads_identified.csv", write.table(df_species, file = "All_reads_identified.csv",
...@@ -161,7 +161,7 @@ count_species_SPxx <- rbind.data.frame(count_species_SPxx, ...@@ -161,7 +161,7 @@ count_species_SPxx <- rbind.data.frame(count_species_SPxx,
stringsAsFactors = FALSE) stringsAsFactors = FALSE)
count_species_SPxx <- count_species_SPxx[count_species_SPxx$species %in% count_species_SPxx <- count_species_SPxx[count_species_SPxx$species %in%
all_species_name[c(1:3,5,35)],] all_species_name[c(1:3,5,35)],]
count_species_SPxx <- mutate(count_species_SPxx, count_species_SPxx <- dplyr::mutate(count_species_SPxx,
percent=(as.numeric(n)/sum(as.numeric(n))*100)) percent=(as.numeric(n)/sum(as.numeric(n))*100))
#print(count_species_SPxx) #print(count_species_SPxx)
# save the tab: # save the tab:
...@@ -229,7 +229,7 @@ ggsave(file = "Count_RNAs_species_clear.png", ...@@ -229,7 +229,7 @@ ggsave(file = "Count_RNAs_species_clear.png",
# SP composition clear: # SP composition clear:
count_clear <- clean_SP[clean_SP$SP_name %in% SPvariants,] %>% count(SP_name) count_clear <- clean_SP[clean_SP$SP_name %in% SPvariants,] %>% count(SP_name)
count_clear <- mutate(count_clear, count_clear <- dplyr::mutate(count_clear,
proportion=(as.numeric(n)/sum(as.numeric(n))*100)) proportion=(as.numeric(n)/sum(as.numeric(n))*100))
#print(count_clear) #print(count_clear)
count_clear <- dplyr::inner_join(palette_complete, count_clear <- dplyr::inner_join(palette_complete,
......
...@@ -36,7 +36,7 @@ df <- df %>% ...@@ -36,7 +36,7 @@ df <- df %>%
df$donor <- str_replace(df$donor, '[(]', '') df$donor <- str_replace(df$donor, '[(]', '')
df$acceptor <- str_replace(df$acceptor, '[)]', '') df$acceptor <- str_replace(df$acceptor, '[)]', '')
df <- mutate(df, df <- dplyr::mutate(df,
pg_donor = as.numeric(donor)-122, pg_donor = as.numeric(donor)-122,
pg_acceptor = as.numeric(acceptor)-122) pg_acceptor = as.numeric(acceptor)-122)
...@@ -110,7 +110,7 @@ assignation_acceptor <- function(pg_acceptor) { ...@@ -110,7 +110,7 @@ assignation_acceptor <- function(pg_acceptor) {
df$donor_site <- sapply(df$pg_donor, assignation_donor) df$donor_site <- sapply(df$pg_donor, assignation_donor)
df$acceptor_site <- sapply(df$pg_acceptor, assignation_acceptor) df$acceptor_site <- sapply(df$pg_acceptor, assignation_acceptor)
df <- mutate(df, df <- dplyr::mutate(df,
junction = paste0(donor_site, acceptor_site)) junction = paste0(donor_site, acceptor_site))
write.table(df, file = "JWR_check_parsed.csv", row.names = FALSE, sep = "\t") write.table(df, file = "JWR_check_parsed.csv", row.names = FALSE, sep = "\t")
......
...@@ -47,9 +47,9 @@ parsingData <- function(df) { ...@@ -47,9 +47,9 @@ parsingData <- function(df) {
tmp$Start <- as.numeric(tmp$Start) tmp$Start <- as.numeric(tmp$Start)
df2 <- as_tibble(tmp) %>% df2 <- as_tibble(tmp) %>%
mutate(bin = round(Start/binsize)*binsize) %>% dplyr::mutate(bin = round(Start/binsize)*binsize) %>%
group_by(bin) %>% group_by(bin) %>%
summarize(nb_reads = sum(Freq, na.rm = T)) dplyr::summarize(nb_reads = sum(Freq, na.rm = T))
df2[is.na(df2)] <- 0 df2[is.na(df2)] <- 0
df2[3] <- rep(df[1,3], length(df2$bin)) df2[3] <- rep(df[1,3], length(df2$bin))
colnames(df2) <- c("Start_position", "nb_reads", "Barcode") colnames(df2) <- c("Start_position", "nb_reads", "Barcode")
......
#!/usr/bin/env nextflow #!/usr/bin/env nextflow
nextflow.enable.dsl=2 nextflow.enable.dsl=2
//syntax extension DSL2
/* /*
======================================================================================================================== ========================================================================================================================
...@@ -73,7 +74,7 @@ if (params.help || params.h) { ...@@ -73,7 +74,7 @@ if (params.help || params.h) {
**************************************************************** ****************************************************************
*/ */
/* params in */ /* Params in */
params.skipBC = true params.skipBC = true
params.gpu_mode = false params.gpu_mode = false
...@@ -100,6 +101,8 @@ params.seqkit_grep_out = "03_fastq/" ...@@ -100,6 +101,8 @@ params.seqkit_grep_out = "03_fastq/"
params.cutadapt_out = "04_cutadapt/" params.cutadapt_out = "04_cutadapt/"
params.minimap2_genome_out = "05_minimap2/" params.minimap2_genome_out = "05_minimap2/"
params.start_position_counts_out = "06_start_positions/" params.start_position_counts_out = "06_start_positions/"
params.nanosplicer_out = "07_nanosplicer/"
params.rna_count_out = "08_RNA_count/"
params.pycoQC_out = "pycoQC/" params.pycoQC_out = "pycoQC/"
/* /*
...@@ -108,6 +111,7 @@ params.pycoQC_out = "pycoQC/" ...@@ -108,6 +111,7 @@ params.pycoQC_out = "pycoQC/"
**************************************************************** ****************************************************************
*/ */
//to print multiline informations
log.info "fast5/q folder : ${params.input}" log.info "fast5/q folder : ${params.input}"
log.info "5'RACE adapter sequence : ${params.adapt}" log.info "5'RACE adapter sequence : ${params.adapt}"
if(!params.skipBC) log.info "Guppy basecalling calculation using GPU mode : ${params.gpu_mode}." if(!params.skipBC) log.info "Guppy basecalling calculation using GPU mode : ${params.gpu_mode}."
...@@ -161,13 +165,21 @@ if(!params.skipBC) { ...@@ -161,13 +165,21 @@ if(!params.skipBC) {
// Replace concatenate by seqkit fct to parallelization: // Replace concatenate by seqkit fct to parallelization:
// include { concatenate } from "./nf_modules/seqkit/main.nf" // include { concatenate } from "./nf_modules/seqkit/main.nf"
include { concatenate } from "./nf_modules/concatenate/main.nf" include { concatenate } from "./nf_modules/concatenate/main.nf"
include { cut_5pRACE } from "./nf_modules/cutadapt/main.nf" include { cut_5pRACE } from "./nf_modules/cutadapt/main.nf"
include { hbv_genome } from "./nf_modules/minimap2/main.nf" include { hbv_genome } from "./nf_modules/minimap2/main.nf"
include { seqkit_grep } from "./nf_modules/seqkit/main.nf" include { seqkit_grep } from "./nf_modules/seqkit/main.nf"
include { sort_bam as sort_bam_genome } from './nf_modules/samtools/main.nf' addParams(sort_bam_out: params.minimap2_genome_out) include { sort_bam } from './nf_modules/samtools/main.nf' addParams(sort_bam_out: params.minimap2_genome_out)
include { index_bam as index_bam_genome } from './nf_modules/samtools/main.nf' addParams(index_bam_out: params.minimap2_genome_out) include { index_bam } from './nf_modules/samtools/main.nf' addParams(index_bam_out: params.minimap2_genome_out)
include { sort_index_bam } from './nf_modules/samtools/main.nf' addParams(indexed_bam_out: params.minimap2_genome_out)
include { start_position_counts } from "./nf_modules/samtools/main.nf" include { start_position_counts } from "./nf_modules/samtools/main.nf"
include { start_position_individuals } from "./nf_modules/start_positions/main.nf"
include { jwr_checker } from "./nf_modules/nanosplicer/main.nf"
include { junctions_nanosplicer } from "./nf_modules/junction_nanosplicer/main.nf"
include { rna_count } from "./nf_modules/rna_count/main.nf"
/////////////////////////////////////////////////////////
// script R avec classification des reads par type d'ARN et graphiques associés
// creation des fonctions NanoSplicer: // creation des fonctions NanoSplicer:
// include { jwr_check } from "./nf_modules/nanosplicer/main.nf" // include { jwr_check } from "./nf_modules/nanosplicer/main.nf"
...@@ -200,19 +212,33 @@ workflow { ...@@ -200,19 +212,33 @@ workflow {
} }
//####################### PREPROCESSING ####################### //####################### PREPROCESSING #######################
/*
//Filtration (seqkit_grep looks for the 5'RACE and the gsp patterns in the reads to keep only mature ARNs)
seqkit_grep(concatenate.out.merged_fastq, params.adapt, params.gsp) seqkit_grep(concatenate.out.merged_fastq, params.adapt, params.gsp)
//Cut of the 5'RACE sequence
cut_5pRACE(seqkit_grep.out.filtered_fastq, params.adapt) cut_5pRACE(seqkit_grep.out.filtered_fastq, params.adapt)
//########################## MAPPING ########################## //########################## MAPPING ##########################
hbv_genome(cut_5pRACE.out.fastq_cutadapt, genome) hbv_genome(cut_5pRACE.out.fastq_cutadapt, genome)
sort_bam_genome(hbv_genome.out.bam) sort_index_bam(hbv_genome.out.bam)
index_bam_genome(sort_bam_genome.out.sorted_bam.collect()) // index_bam(sort_bam_genome.out.sorted_bam.collect())
//###################### QUANTIFICATION ####################### //###################### START POSITIONS #######################
start_position_counts(sort_bam_genome.out.sorted_bam) start_position_counts(sort_index_bam.out.indexed_bam)
*/ start_position_individuals(start_position_counts.out.count)
} //#################### VARIANTS D'EPISSAGE ####################
\ No newline at end of file
jwr_checker(sort_index_bam.out.indexed_bam)
junctions_nanosplicer(start_position_individuals.out.classification_of_reads, jwr_checker.out.nanosplicer_jwr)
//#################### VARIANTS D'EPISSAGE ####################
rna_count(junctions_nanosplicer.out.identified_SPvariants, start_position_individuals.out.classification_of_reads)
}
version = "1.0" version = "1.0"
container_url = "xgrand/r-scripts:${version}" container_url = "xgrand/r-bolero:${version}"
params.junctions_out = "" params.nanosplicer_out = ""
process junctions_nanosplicer{ process junctions_nanosplicer{
container = "${container_url}" container = "${container_url}"
label "small_mem_mono_cpus" label "small_mem_mono_cpus"
tag "identification de variants d'épissage" tag "identification de variants d'épissage"
if (params.junctions_out != "") { if (params.nanosplicer_out != "") {
publishDir "results/${params.junctions_out}", mode: 'copy' publishDir "results/${params.nanosplicer_out}", mode: 'copy'
} }
input: input:
...@@ -17,10 +17,11 @@ process junctions_nanosplicer{ ...@@ -17,10 +17,11 @@ process junctions_nanosplicer{
output: output:
path("Rplots.pdf") path("Rplots.pdf")
path("JWR_check_parsed.csv") path("JWR_check_parsed.csv")
path("*.jpg")
path("identified_SPvariants.csv"), emit: identified_SPvariants path("identified_SPvariants.csv"), emit: identified_SPvariants
script: script:
""" """
Rscript Junctions_NanoSplicer.R -c txt -j csv Rscript /Junctions_NanoSplicer.R -c ${txt} -j ${csv}
""" """
} }
\ No newline at end of file
File moved
version = "1.0" version = "1.0"
container_url = "xgrand/r-scripts:${version}" container_url = "xgrand/r-bolero:${version}"
params.rna_count_out = "" params.rna_count_out = ""
process rna_count{ process rna_count{
...@@ -17,9 +17,10 @@ process rna_count{ ...@@ -17,9 +17,10 @@ process rna_count{
output: output:
path("*.csv") path("*.csv")
path("*.pdf") path("*.pdf")
path("*.jpg")
script: script:
""" """
Rscript HBV_RNAs_count.R -s spvariants -c classification Rscript /HBV_RNAs_count.R -s ${spvariants} -c ${classification}
""" """
} }
version = "1.0" version = "1.0"
container_url = "xgrand/r-scripts:${version}" container_url = "xgrand/r-bolero:${version}"
params.start_position_counts_out = "" params.start_position_counts_out =""
process start_position_individuals{ process start_position_individuals{
container = "${container_url}" container = "${container_url}"
label "small_mem_mono_cpus" label "small_mem_mono_cpus"
...@@ -9,17 +9,17 @@ process start_position_individuals{ ...@@ -9,17 +9,17 @@ process start_position_individuals{
if (params.start_position_counts_out != "") { if (params.start_position_counts_out != "") {
publishDir "results/${params.start_position_counts_out}", mode: 'copy' publishDir "results/${params.start_position_counts_out}", mode: 'copy'
} }
input: input:
path(start_position_counts) path(start_position_counts)
output: output:
path("Rplots.pdf") path("Rplots.pdf")
path("*.jpg")
path("Count_reads_per_promoter.tsv") path("Count_reads_per_promoter.tsv")
path("classification_of_reads_per_RNA.txt"), emit: classification_of_reads path("classification_of_reads_per_RNA.txt"), emit: classification_of_reads
script: script:
""" """
Rscript start_positions.R -i start_position_counts Rscript /Start_positions.R -i ${start_position_counts}
""" """
} }
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment