Skip to content
Snippets Groups Projects
Commit 4f985908 authored by Xavier Grand's avatar Xavier Grand
Browse files

Merge branch 'Alia' of http://10.69.67.13/testoni-lab/bolero

parents 077d1693 ca26a648
No related branches found
No related tags found
No related merge requests found
...@@ -13,7 +13,9 @@ option_list = list( ...@@ -13,7 +13,9 @@ option_list = list(
make_option(c("-s", "--SPvariants"), type="character", default=NULL, make_option(c("-s", "--SPvariants"), type="character", default=NULL,
help="input identified SP variants table (.csv)", metavar="character"), help="input identified SP variants table (.csv)", metavar="character"),
make_option(c("-c", "--classification"), type="character", default=NULL, make_option(c("-c", "--classification"), type="character", default=NULL,
help="input classification of reads file (.txt)", metavar="character")) help="input classification of reads file (.txt)", metavar="character"),
make_option(c("-b", "--barcode"), type="character", default=NULL,
help="input barcode", metavar="character"))
opt_parser = OptionParser(option_list=option_list) opt_parser = OptionParser(option_list=option_list)
opt = parse_args(opt_parser) opt = parse_args(opt_parser)
...@@ -85,7 +87,7 @@ ggplot(countSP, aes(x = "percent", ...@@ -85,7 +87,7 @@ ggplot(countSP, aes(x = "percent",
scale_fill_manual(values = countSP$teinte) + scale_fill_manual(values = countSP$teinte) +
labs(fill = "spliced-variants") labs(fill = "spliced-variants")
ggsave(file = "SP_proportion_camembert.png", ggsave(file = paste0(opt$barcode, "_SP_proportion_piechart.png"),
scale = 2, scale = 2,
width = 1920, width = 1920,
height = 1080, height = 1080,
...@@ -100,7 +102,7 @@ ggplot(countSP, aes(x = nom, y = proportion, fill = nom)) + ...@@ -100,7 +102,7 @@ ggplot(countSP, aes(x = nom, y = proportion, fill = nom)) +
xlab(label = "spliced-variants") + xlab(label = "spliced-variants") +
ylab(label = "percent") ylab(label = "percent")
ggsave(file = "SP_proportion.png", ggsave(file = paste0(opt$barcode, "_SP_proportion.png"),
scale = 2, scale = 2,
width = 1920, width = 1920,
height = 1080, height = 1080,
...@@ -127,7 +129,7 @@ count_species <- df_species %>% count(species) ...@@ -127,7 +129,7 @@ count_species <- df_species %>% count(species)
count_species <- dplyr::mutate(count_species, count_species <- dplyr::mutate(count_species,
percent = (as.numeric(n)/sum(as.numeric(n))*100)) percent = (as.numeric(n)/sum(as.numeric(n))*100))
#print(count_species) #print(count_species)
write.table(df_species, file = "All_reads_identified.csv", write.table(df_species, file = paste0(opt$barcode, "_all_reads_identified.csv"),
sep = "\t", quote = FALSE, row.names = FALSE) sep = "\t", quote = FALSE, row.names = FALSE)
# Null dataset: # Null dataset:
...@@ -165,7 +167,7 @@ count_species_SPxx <- dplyr::mutate(count_species_SPxx, ...@@ -165,7 +167,7 @@ count_species_SPxx <- dplyr::mutate(count_species_SPxx,
percent=(as.numeric(n)/sum(as.numeric(n))*100)) percent=(as.numeric(n)/sum(as.numeric(n))*100))
#print(count_species_SPxx) #print(count_species_SPxx)
# save the tab: # save the tab:
write.csv(count_species_SPxx, file = "Count_canonical_species_SPxx.csv") write.csv(count_species_SPxx, file = paste0(opt$barcode, "_count_canonical_species_SPxx.csv"))
# prepare to plot: # prepare to plot:
count_species_SPxx <- dplyr::inner_join(palette_complete, count_species_SPxx <- dplyr::inner_join(palette_complete,
...@@ -176,7 +178,7 @@ count_species_SPxx <- dplyr::inner_join(palette_complete, ...@@ -176,7 +178,7 @@ count_species_SPxx <- dplyr::inner_join(palette_complete,
count_species_SPxx$nom <- factor(count_species_SPxx$nom, levels = all_species_name) count_species_SPxx$nom <- factor(count_species_SPxx$nom, levels = all_species_name)
# Save: # Save:
write.csv(count_species, file = "Count_species.csv") write.csv(count_species, file = paste0(opt$barcode, "_count_species.csv"))
# RNA species composition all species: # RNA species composition all species:
count_species <- dplyr::inner_join(palette_complete, count_species, count_species <- dplyr::inner_join(palette_complete, count_species,
...@@ -194,7 +196,7 @@ ggplot(count_species, ...@@ -194,7 +196,7 @@ ggplot(count_species,
labs(fill = "RNA species & spliced-variants") + labs(fill = "RNA species & spliced-variants") +
xlab(label = "RNA species & spliced-variants") xlab(label = "RNA species & spliced-variants")
ggsave(file = "Count_RNAs_species.png", ggsave(file = paste0(opt$barcode, "_count_RNAs_species.png"),
scale = 2, scale = 2,
width = 1920, width = 1920,
height = 1080, height = 1080,
...@@ -220,7 +222,7 @@ ggplot(count_species_clear, ...@@ -220,7 +222,7 @@ ggplot(count_species_clear,
labs(fill = "RNA species & spliced-variants") + labs(fill = "RNA species & spliced-variants") +
xlab(label = "RNA species & spliced-variants") xlab(label = "RNA species & spliced-variants")
ggsave(file = "Count_RNAs_species_clear.png", ggsave(file = paste0(opt$barcode, "_count_RNAs_species_clear.png"),
scale = 2, scale = 2,
width = 1920, width = 1920,
height = 1080, height = 1080,
...@@ -246,7 +248,7 @@ ggplot(count_clear, aes(x = "percent", ...@@ -246,7 +248,7 @@ ggplot(count_clear, aes(x = "percent",
scale_fill_manual(values = count_clear$teinte) + scale_fill_manual(values = count_clear$teinte) +
labs(fill = "spliced-variants") labs(fill = "spliced-variants")
ggsave(file = "SP_clear_proportion_camembert.png", ggsave(file = paste0(opt$barcode, "_SP_clear_proportion_piechart.png"),
scale = 2, scale = 2,
width = 1920, width = 1920,
height = 1080, height = 1080,
...@@ -263,7 +265,7 @@ ggplot(count_clear, aes(x = nom, ...@@ -263,7 +265,7 @@ ggplot(count_clear, aes(x = nom,
xlab(label = "spliced-variants") + xlab(label = "spliced-variants") +
ylab(label = "percent") ylab(label = "percent")
ggsave(file = "SP_clear_proportion.png", ggsave(file = paste0(opt$barcode, "_SP_clear_proportion.png"),
scale = 2, scale = 2,
width = 1920, width = 1920,
height = 1080, height = 1080,
...@@ -281,7 +283,7 @@ ggplot(count_species_SPxx, aes(x = "species", ...@@ -281,7 +283,7 @@ ggplot(count_species_SPxx, aes(x = "species",
ylab(label = "TSS usage") + ylab(label = "TSS usage") +
xlab(label = "percent") xlab(label = "percent")
ggsave(file = "Count_RNAs_species_camembert.png", ggsave(file = paste0(opt$barcode, "_count_RNAs_species_piechart.png"),
scale = 2, scale = 2,
width = 1920, width = 1920,
height = 1080, height = 1080,
......
...@@ -15,7 +15,9 @@ option_list = list( ...@@ -15,7 +15,9 @@ option_list = list(
make_option(c("-c", "--classification"), type="character", default="./classification.txt", make_option(c("-c", "--classification"), type="character", default="./classification.txt",
help="input classification or reads file (.txt)", metavar="character"), help="input classification or reads file (.txt)", metavar="character"),
make_option(c("-j", "--jwr"), type="character", default=NULL, make_option(c("-j", "--jwr"), type="character", default=NULL,
help="input nanosplicer results table (.csv)", metavar="character")) help="input nanosplicer results table (.csv)", metavar="character"),
make_option(c("-b", "--barcode"), type="character", default=NULL,
help="input barcode", metavar="character"))
opt_parser = OptionParser(option_list=option_list) opt_parser = OptionParser(option_list=option_list)
opt = parse_args(opt_parser) opt = parse_args(opt_parser)
reads_pos <- read.table(opt$classification, reads_pos <- read.table(opt$classification,
...@@ -113,7 +115,7 @@ df$acceptor_site <- sapply(df$pg_acceptor, assignation_acceptor) ...@@ -113,7 +115,7 @@ df$acceptor_site <- sapply(df$pg_acceptor, assignation_acceptor)
df <- dplyr::mutate(df, df <- dplyr::mutate(df,
junction = paste0(donor_site, acceptor_site)) junction = paste0(donor_site, acceptor_site))
write.table(df, file = "JWR_check_parsed.csv", row.names = FALSE, sep = "\t") write.table(df, file = paste0(opt$barcode, "_JWR_check_parsed.csv"), row.names = FALSE, sep = "\t")
duplicated2 <- function(x){ duplicated2 <- function(x){
if (sum(dup <- duplicated(x))==0) if (sum(dup <- duplicated(x))==0)
...@@ -293,107 +295,8 @@ SP_variant_unique <- df_SPvariants %>% select(id, SP_name) ...@@ -293,107 +295,8 @@ SP_variant_unique <- df_SPvariants %>% select(id, SP_name)
SP_variant_unique <- SP_variant_unique[!duplicated(SP_variant_unique$id),] # distinct(SP_variant_unique, id) SP_variant_unique <- SP_variant_unique[!duplicated(SP_variant_unique$id),] # distinct(SP_variant_unique, id)
write.table(df_SPvariants, write.table(df_SPvariants,
"identified_SPvariants.csv", paste0(opt$barcode, "_identified_SPvariants.csv"),
row.names = FALSE, row.names = FALSE,
sep = "\t", sep = "\t",
quote = FALSE) quote = FALSE)
ggplot(df, aes(x=pg_donor)) +
geom_histogram(aes(y=after_stat(density)),color="darkblue", fill="lightblue") +
geom_density(alpha=.2, fill="lightblue") +
geom_vline(aes(xintercept=median(pg_donor)),
color="blue", linetype="dashed", linewidth=1) +
geom_vline(aes(xintercept=quantile(pg_donor, 0.025)),
linetype="dashed", linewidth=0.25) +
geom_vline(aes(xintercept=quantile(pg_donor, 0.975)),
linetype="dashed", linewidth=0.25) +
geom_vline(aes(xintercept=quantile(pg_donor, 0.01)),
color="green", linetype="dashed", linewidth=0.25) +
geom_vline(aes(xintercept=quantile(pg_donor, 0.99)),
color="green", linetype="dashed", linewidth=0.25) +
geom_vline(aes(xintercept=(median(pg_donor)+sd(pg_donor))),
color="red", linewidth=0.5) +
geom_vline(aes(xintercept=(median(pg_donor)-sd(pg_donor))),
color="red", linewidth=0.5) +
scale_x_continuous(breaks=c(min(df$pg_donor),
quantile(df$pg_donor, 0.025),
quantile(df$pg_donor, 0.005),
median(df$pg_donor)-sd(df$pg_donor),
median(df$pg_donor),
median(df$pg_donor)+sd(df$pg_donor),
quantile(df$pg_donor, 0.975),
quantile(df$pg_donor, 0.995),
max(df$pg_donor)),
label = c(min(df$pg_donor),
floor(quantile(df$pg_donor, 0.025)),
floor(quantile(df$pg_donor, 0.005)),
round(median(df$pg_donor)-sd(df$pg_donor)),
median(df$pg_donor),
round(median(df$pg_donor)+sd(df$pg_donor)),
floor(quantile(df$pg_donor, 0.975))+1,
round(quantile(df$pg_donor, 0.995))+1,
max(df$pg_donor))) +
theme(axis.text.x = element_text(angle = 45))
ggsave(filename = "Donor_curve.png",
device = "png",
scale = 1,
width = 1920,
height = 1080,
units = "px",
dpi = 320)
ggplot(df, aes(x=pg_acceptor)) +
geom_histogram(aes(y=after_stat(density)),color="red", fill="darksalmon") +
geom_density(alpha=.2, fill="darksalmon") +
geom_vline(aes(xintercept=median(pg_acceptor)),
color="red", linetype="dashed", linewidth=1) +
geom_vline(aes(xintercept=quantile(pg_acceptor, 0.025)),
linetype="dashed", linewidth=0.25) +
geom_vline(aes(xintercept=quantile(pg_acceptor, 0.975)),
linetype="dashed", linewidth=0.25) +
geom_vline(aes(xintercept=quantile(pg_acceptor, 0.005)),
color="green", linetype="dashed", linewidth=0.25) +
geom_vline(aes(xintercept=quantile(pg_acceptor, 0.995)),
color="green", linetype="dashed", linewidth=0.25) +
geom_vline(aes(xintercept=(median(pg_acceptor)+sd(pg_acceptor))),
color="blue", linewidth=0.5) +
geom_vline(aes(xintercept=(median(pg_acceptor)-sd(pg_acceptor))),
color="blue", linewidth=0.5) +
scale_x_continuous(breaks=c(min(df$pg_acceptor),
quantile(df$pg_acceptor, 0.025),
quantile(df$pg_acceptor, 0.005),
median(df$pg_acceptor)-sd(df$pg_acceptor),
median(df$pg_acceptor),
median(df$pg_acceptor)+sd(df$pg_acceptor),
quantile(df$pg_acceptor, 0.975),
quantile(df$pg_acceptor, 0.995),
max(df$pg_acceptor)),
label = c(min(df$pg_acceptor),
floor(quantile(df$pg_acceptor, 0.025)),
floor(quantile(df$pg_acceptor, 0.005)),
round(median(df$pg_acceptor)-sd(df$pg_acceptor)),
median(df$pg_acceptor),
round(median(df$pg_acceptor)+sd(df$pg_acceptor)),
floor(quantile(df$pg_acceptor, 0.975))+1,
floor(quantile(df$pg_acceptor, 0.995))+1,
max(df$pg_acceptor))) +
theme(axis.text.x = element_text(angle = 45))
ggsave(filename = "Acceptor_curve.png",
device = "png",
scale = 1,
width = 1920,
height = 1080,
units = "px",
dpi = 320)
# Graphs and tests:
# sink("test_shapiro.txt")
# print("Normality test: Shapiro-Wilk")
# print("Donor site:")
# print(shapiro.test(df$pg_donor))
# print("Acceptor site:")
# print(shapiro.test(df$pg_acceptor))
# sink()
\ No newline at end of file
...@@ -14,7 +14,9 @@ conflict_prefer("lag", "dplyr") ...@@ -14,7 +14,9 @@ conflict_prefer("lag", "dplyr")
# Load Start_positions_count files: # Load Start_positions_count files:
option_list = list( option_list = list(
make_option(c("-i", "--input"), type="character", default=NULL, make_option(c("-i", "--input"), type="character", default=NULL,
help="input start position file (.txt)", metavar="character") help="input start position file (.txt)", metavar="character"),
make_option(c("-b", "--barcode"), type="character", default=NULL,
help="input barcode", metavar="character")
) )
opt_parser = OptionParser(option_list=option_list) opt_parser = OptionParser(option_list=option_list)
...@@ -109,7 +111,7 @@ sam_bc01$promoter <- sapply(sam_bc01$start_position, ...@@ -109,7 +111,7 @@ sam_bc01$promoter <- sapply(sam_bc01$start_position,
classify_reads) classify_reads)
write.table(sam_bc01, write.table(sam_bc01,
file = "classification_of_reads_per_RNA.txt", file = paste0(opt$barcode, "_classification_of_reads_per_RNA.txt"),
quote = FALSE, quote = FALSE,
sep = "\t", sep = "\t",
row.names = FALSE) row.names = FALSE)
...@@ -164,7 +166,7 @@ abs_count_reads <- cbind(c(as.vector(promoters),"total"), abs_count_reads) ...@@ -164,7 +166,7 @@ abs_count_reads <- cbind(c(as.vector(promoters),"total"), abs_count_reads)
colnames(abs_count_reads) <- c("promoter", "read_number") colnames(abs_count_reads) <- c("promoter", "read_number")
write.table(abs_count_reads, write.table(abs_count_reads,
file = "Count_reads_per_promoter.tsv", file = paste0(opt$barcode, "_count_reads_per_promoter.tsv"),
quote = FALSE, quote = FALSE,
sep = "\t", sep = "\t",
row.names = FALSE) row.names = FALSE)
...@@ -201,7 +203,7 @@ plot_camembert <- function(barcode, df, tot) { ...@@ -201,7 +203,7 @@ plot_camembert <- function(barcode, df, tot) {
print(camembert) print(camembert)
ggsave(filename = paste0("./Reads_start_promoters_", barcode, "_camembert.png"), ggsave(filename = paste0("./", opt$barcode, "_reads_start_promoters_piechart.png"),
plot = last_plot(), plot = last_plot(),
scale = 1, scale = 1,
width = 1920, width = 1920,
......
...@@ -129,9 +129,9 @@ Channel ...@@ -129,9 +129,9 @@ Channel
.set { input } .set { input }
Channel Channel
.of( params.adapt ) .of( params.adapt )
.ifEmpty { error "No adapter sequence defined." } .ifEmpty { error "No adapter sequence defined." }
.set { adapt } .set { adapt }
Channel Channel
.fromPath( params.genome ) .fromPath( params.genome )
...@@ -143,7 +143,10 @@ Channel ...@@ -143,7 +143,10 @@ Channel
.ifEmpty { error "No annotation defined, a gtf file describing transcripts and splice variants." } .ifEmpty { error "No annotation defined, a gtf file describing transcripts and splice variants." }
.set { gtf } .set { gtf }
// .map( it -> [it.baseName, it]) Channel
.fromPath(params.input+'*/', type: 'dir')
.map(it -> [it.baseName, it])
.set{barcodes}
/* /*
**************************************************************** ****************************************************************
...@@ -161,10 +164,8 @@ if(!params.skipBC) { ...@@ -161,10 +164,8 @@ if(!params.skipBC) {
} }
} }
// Replace concatenate by seqkit fct to parallelization: include { barecode } from "./nf_modules/barecode/main.nf"
include { concatenate } from "./nf_modules/seqkit/main.nf" include { concatenate } from "./nf_modules/seqkit/main.nf"
//include { concatenate } from "./nf_modules/concatenate/main.nf"
include { cut_5pRACE } from "./nf_modules/cutadapt/main.nf" include { cut_5pRACE } from "./nf_modules/cutadapt/main.nf"
include { hbv_genome } from "./nf_modules/minimap2/main.nf" include { hbv_genome } from "./nf_modules/minimap2/main.nf"
include { seqkit_grep } from "./nf_modules/seqkit/main.nf" include { seqkit_grep } from "./nf_modules/seqkit/main.nf"
...@@ -178,9 +179,6 @@ include { junctions_nanosplicer } from "./nf_modules/junction_nanosplicer/main.n ...@@ -178,9 +179,6 @@ include { junctions_nanosplicer } from "./nf_modules/junction_nanosplicer/main.n
include { rna_count } from "./nf_modules/rna_count/main.nf" include { rna_count } from "./nf_modules/rna_count/main.nf"
// creation des fonctions NanoSplicer:
// include { jwr_check } from "./nf_modules/nanosplicer/main.nf"
/* /*
**************************************************************** ****************************************************************
Workflow Workflow
...@@ -189,42 +187,41 @@ include { rna_count } from "./nf_modules/rna_count/main.nf" ...@@ -189,42 +187,41 @@ include { rna_count } from "./nf_modules/rna_count/main.nf"
workflow { workflow {
//######################## BASECALLING ######################## //######################## BASECALLING ########################
if(params.skipBC) { if(params.skipBC) { // we take fastq files as input and skip basecalling
concatenate(params.input) concatenate(barcodes)
// Replace by seqkit scat to parallelization
} }
else {
//il reste à adapter ça
else { // we take fast5 files as input and proceed to basecalling with guppy
if(params.gpu_mode) { if(params.gpu_mode) {
basecall_fast5_gpu(input) basecall_fast5_gpu(input)
concatenate(basecall_fast5_gpu.out.pass) concatenate(basecall_fast5_gpu.out.pass)
// Replace by seqkit scat to parallelization
} }
else { else {
basecall_fast5_cpu(input) basecall_fast5_cpu(input)
concatenate(basecall_fast5_cpu.out.pass) concatenate(basecall_fast5_cpu.out.pass)
// Replace by seqkit scat to parallelization
} }
} }
//####################### PREPROCESSING ####################### //####################### PREPROCESSING #######################
//Filtration (seqkit_grep looks for the 5'RACE and the gsp patterns in the reads to keep only mature ARNs) //Filtration (seqkit_grep looks for the 5'RACE and the gsp patterns in the reads to keep only mature ARNs)
seqkit_grep(concatenate.out.merged_fastq, params.adapt, params.gsp) seqkit_grep(concatenate.out.merged_fastq, params.adapt, params.gsp)
//Cut of the 5'RACE sequence //Cut of the 5'RACE sequence
cut_5pRACE(seqkit_grep.out.filtered_fastq, params.adapt) cut_5pRACE(seqkit_grep.out.filtered_fastq, params.adapt)
//########################## MAPPING ########################## //########################## MAPPING ##########################
hbv_genome(cut_5pRACE.out.fastq_cutadapt, genome.collect())
hbv_genome(cut_5pRACE.out.fastq_cutadapt, genome)
sort_index_bam(hbv_genome.out.bam) sort_index_bam(hbv_genome.out.bam)
// index_bam(sort_bam_genome.out.sorted_bam.collect())
//###################### START POSITIONS ####################### //###################### START POSITIONS #######################
......
...@@ -4,23 +4,23 @@ container_url = "xgrand/cutadapt:${version}" ...@@ -4,23 +4,23 @@ container_url = "xgrand/cutadapt:${version}"
process cut_5pRACE { process cut_5pRACE {
container = "${container_url}" container = "${container_url}"
label "small_mem_mono_cpus" label "small_mem_mono_cpus"
tag "cutadapt" tag "${barcode}"
if (params.cutadapt_out != "") { if (params.cutadapt_out != "") {
publishDir "results/${params.cutadapt_out}", mode: 'copy' publishDir "results/${params.cutadapt_out}", mode: 'copy'
} }
input: input:
path(fastq) tuple val(barcode), path(fastq)
val(adapt) val(adapt)
output: output:
path("*_cut_*"), emit: fastq_cutadapt tuple val(barcode), path("${barcode}_merged_porechoped_cut_fastq.fastq"), emit: fastq_cutadapt
""" """
cutadapt -e 0.2 -g ${adapt} \ cutadapt -e 0.2 -g ${adapt} \
--revcomp \ --revcomp \
-o "merged_porechoped_cut_fastq.fastq" \ -o "${barcode}_merged_porechoped_cut_fastq.fastq" \
${fastq} ${fastq}
""" """
} }
\ No newline at end of file
...@@ -5,23 +5,24 @@ params.nanosplicer_out = "" ...@@ -5,23 +5,24 @@ params.nanosplicer_out = ""
process junctions_nanosplicer{ process junctions_nanosplicer{
container = "${container_url}" container = "${container_url}"
label "small_mem_mono_cpus" label "small_mem_mono_cpus"
tag "identification de variants d'épissage" tag "${barcode}"
if (params.nanosplicer_out != "") { if (params.nanosplicer_out != "") {
publishDir "results/${params.nanosplicer_out}", mode: 'copy' publishDir "results/${params.nanosplicer_out}", mode: 'copy'
} }
input: input:
path(txt) tuple val(barcode), path(txt)
path(csv) tuple val(barcode), path(csv)
output: output:
path("Rplots.pdf") path("${barcode}/JWR_check_parsed.csv")
path("JWR_check_parsed.csv") tuple val(barcode), path("${barcode}/${barcode}_identified_SPvariants.csv"), emit: identified_SPvariants
path("*.png")
path("identified_SPvariants.csv"), emit: identified_SPvariants
script: script:
""" """
Rscript /Junctions_NanoSplicer.R -c ${txt} -j ${csv} mkdir ${barcode}
cd ${barcode}/
Rscript /Junctions_NanoSplicer.R -c ../${txt} -j ../${csv}
mv identified_SPvariants.csv ${barcode}_identified_SPvariants.csv
""" """
} }
\ No newline at end of file
...@@ -89,22 +89,25 @@ params.mapping_hbv_genome = "-ax splice --secondary=no -G 1650 -u n --eqx" ...@@ -89,22 +89,25 @@ params.mapping_hbv_genome = "-ax splice --secondary=no -G 1650 -u n --eqx"
process hbv_genome { process hbv_genome {
container = "${container_url}" container = "${container_url}"
label "big_mem_multi_cpus" label "big_mem_multi_cpus"
tag "${barcode}"
if (params.minimap2_genome_out != "") { if (params.minimap2_genome_out != "") {
publishDir "results/${params.minimap2_genome_out}", mode: 'copy' publishDir "results/${params.minimap2_genome_out}", mode: 'copy'
} }
input: input:
path(fastq) tuple val(barcode), path(fastq)
path(genome) path(genome)
output: output:
path("*"), emit: bam tuple val(barcode), path("${barcode}/${barcode}_res.bam"), emit: bam
script: script:
memory = "${task.memory}" - ~/\s*GB/ memory = "${task.memory}" - ~/\s*GB/
memory = memory.toInteger() / (task.cpus + 1.0) memory = memory.toInteger() / (task.cpus + 1.0)
""" """
minimap2 ${params.mapping_hbv_genome} -t${task.cpus} -K ${memory} ${genome} ${fastq} | mkdir ${barcode}
samtools view -Shb - > res.bam cd ${barcode}/
minimap2 ${params.mapping_hbv_genome} -t ${task.cpus} -K ${memory} ../${genome} ../${fastq} |
samtools view -Shb - > ${barcode}_res.bam
""" """
} }
\ No newline at end of file
version = "1.0"
container_url = "xgrand/nanosplicer:${version}"
params.nanosplicer_out = ""
process jwr_checker {
container = "${container_url}"
label "big_mem_multi_cpus"
tag "${barcode}"
if (params.nanosplicer_out != "") {
publishDir "results/${params.nanosplicer_out}", mode: 'copy'
}
input:
tuple val(barcode), path(bam), path(index)
output:
tuple val(barcode), path("${barcode}/${barcode}_JWR_check.h5.csv"), emit: nanosplicer_jwr
script:
"""
mkdir ${barcode}
cd ${barcode}/
python3 /NanoSplicer/bin/JWR_checker.py --output_csv ../${bam} ${barcode}_JWR_check.h5
"""
}
...@@ -5,22 +5,24 @@ params.rna_count_out = "" ...@@ -5,22 +5,24 @@ params.rna_count_out = ""
process rna_count{ process rna_count{
container = "${container_url}" container = "${container_url}"
label "small_mem_mono_cpus" label "small_mem_mono_cpus"
tag "RNA quantification" tag "${barcode}"
if (params.rna_count_out != "") { if (params.rna_count_out != "") {
publishDir "results/${params.rna_count_out}", mode: 'copy' publishDir "results/${params.rna_count_out}", mode: 'copy'
} }
input: input:
path(spvariants) tuple val(barcode), path(spvariants)
path(classification) tuple val(barcode), path(classification)
output: output:
path("*.csv") path("${barcode}/*.csv")
path("*.pdf") path("${barcode}/*.pdf")
path("*.png") path("${barcode}/*.png")
script: script:
""" """
Rscript /HBV_RNAs_count.R -s ${spvariants} -c ${classification} mkdir ${barcode}
cd ${barcode}/
Rscript /HBV_RNAs_count.R -s ../${spvariants} -c ../${classification}
""" """
} }
...@@ -24,21 +24,23 @@ samtools sort -@ ${task.cpus} ${bam} -O BAM -o ${bam.simpleName}_sorted.bam ...@@ -24,21 +24,23 @@ samtools sort -@ ${task.cpus} ${bam} -O BAM -o ${bam.simpleName}_sorted.bam
params.start_position_counts_out = "" params.start_position_counts_out = ""
process start_position_counts { process start_position_counts {
tag "Start positions count" tag "${barcode}"
label "big_mem_multi_cpus" label "big_mem_multi_cpus"
publishDir "results/${params.start_position_counts_out}", mode: 'copy' publishDir "results/${params.start_position_counts_out}", mode: 'copy'
input: input:
tuple path(bam), path(index) tuple val(barcode), path(bam), path(index)
output: output:
path "*", emit: count tuple val(barcode), path("${barcode}/${barcode}_start_positions_counts.txt"), emit: count
script: script:
""" """
samtools view -F 260 ${bam} | mkdir ${barcode}
cd ${barcode}/
samtools view -F 260 ../${bam} |
cut -f 1,4 | cut -f 1,4 |
sort > Start_positions_counts.txt sort > ${barcode}_start_positions_counts.txt
""" """
} }
...@@ -67,20 +69,22 @@ params.indexed_bam_out ="" ...@@ -67,20 +69,22 @@ params.indexed_bam_out =""
process sort_index_bam { process sort_index_bam {
container = "${container_url}" container = "${container_url}"
label "big_mem_multi_cpus" label "big_mem_multi_cpus"
tag "sorting" tag "${barcode}"
if (params.indexed_bam_out != "") { if (params.indexed_bam_out != "") {
publishDir "results/${params.indexed_bam_out}", mode: 'copy' publishDir "results/${params.indexed_bam_out}", mode: 'copy'
} }
input: input:
path(bam) tuple val(barcode), path(bam)
output: output:
tuple path("*sorted.bam"), path("*.bai"), emit: indexed_bam tuple val(barcode), path("${barcode}/*sorted.bam"), path("${barcode}/*.bai"), emit: indexed_bam
script: script:
""" """
samtools sort -@ ${task.cpus} ${bam} -o ${bam.simpleName}_sorted.bam mkdir ${barcode}
samtools index -@ ${task.cpus} ${bam.simpleName}_sorted.bam cd ${barcode}/
samtools sort -@ ${task.cpus} ../${bam} -o ${barcode}_sorted.bam
samtools index -@ ${task.cpus} ${barcode}_sorted.bam
""" """
} }
\ No newline at end of file
...@@ -29,35 +29,37 @@ params.seqkit_grep_out = "" ...@@ -29,35 +29,37 @@ params.seqkit_grep_out = ""
process seqkit_grep { process seqkit_grep {
container = "${container_url}" container = "${container_url}"
label "small_mem_multi_cpus" label "small_mem_multi_cpus"
tag "Filter_reads" tag "${barcode}"
if (params.seqkit_grep_out != "") { if (params.seqkit_grep_out != "") {
publishDir "results/${params.seqkit_grep_out}", mode: 'copy' publishDir "results/${params.seqkit_grep_out}", mode: 'copy'
} }
input: input:
path(fastq) tuple val(barcode), path(fastq)
val(adapt) val(adapt)
val(gsp) val(gsp)
output: output:
path("filtered_5RACE_GSP.fastq"), emit: filtered_fastq tuple val(barcode), path("${barcode}/${barcode}_filtered_5RACE_GSP.fastq"), emit: filtered_fastq
path("seq_stats.csv") path("${barcode}/*.csv")
path("*.txt") path("${barcode}/*.txt")
path("filtered_5RACE.fastq") path("${barcode}/${barcode}_filtered_5RACE.fastq")
script: script:
lgadapt = Math.round(adapt.size().div(10)) lgadapt = Math.round(adapt.size().div(10))
lggsp = Math.round(gsp.size().div(10)) lggsp = Math.round(gsp.size().div(10))
""" """
mkdir ${barcode}
cd ${barcode}/
echo "mismatch allowed to 5'RACE adapter: ${lgadapt}" > mismatch.txt echo "mismatch allowed to 5'RACE adapter: ${lgadapt}" > mismatch.txt
echo "mismatch allowed to Gene Specific primer: ${lggsp}" >> mismatch.txt echo "mismatch allowed to Gene Specific primer: ${lggsp}" >> mismatch.txt
echo ${adapt} > adapt.txt echo ${adapt} > adapt.txt
echo ${gsp} > gsp.txt echo ${gsp} > gsp.txt
seqkit grep -i -f adapt.txt -m ${lgadapt} ${fastq} -o filtered_5RACE.fastq -j ${task.cpus} seqkit grep -i -f adapt.txt -m ${lgadapt} ../${fastq} -o ${barcode}_filtered_5RACE.fastq -j ${task.cpus}
seqkit grep -i -f gsp.txt -m ${lggsp} filtered_5RACE.fastq -o filtered_5RACE_GSP.fastq -j ${task.cpus} seqkit grep -i -f gsp.txt -m ${lggsp} ${barcode}_filtered_5RACE.fastq -o ${barcode}_filtered_5RACE_GSP.fastq -j ${task.cpus}
seqkit stats ${fastq} -T -j ${task.cpus} > seq_stats.csv seqkit stats ../${fastq} -T -j ${task.cpus} > ${barcode}_seq_stats.csv
seqkit stats filtered_5RACE.fastq -T -j ${task.cpus} | tail -n1 >> seq_stats.csv seqkit stats ${barcode}_filtered_5RACE.fastq -T -j ${task.cpus} | tail -n1 >> ${barcode}_seq_stats.csv
seqkit stats filtered_5RACE_GSP.fastq -T -j ${task.cpus} | tail -n1 >> seq_stats.csv seqkit stats ${barcode}_filtered_5RACE_GSP.fastq -T -j ${task.cpus} | tail -n1 >> ${barcode}_seq_stats.csv
""" """
} }
...@@ -65,21 +67,24 @@ params.fastq_out = "" ...@@ -65,21 +67,24 @@ params.fastq_out = ""
process concatenate { process concatenate {
container = "${container_url}" container = "${container_url}"
label "big_mem_multi_cpus" label "big_mem_multi_cpus"
tag "Concatenate_reads" tag "${barcode}"
if (params.fastq_out != "") { if (params.fastq_out != "") {
publishDir "results/${params.fastq_out}", mode: 'copy' publishDir "results/${params.fastq_out}", mode: 'copy'
} }
input: input:
path fastq tuple val(barcode), path(fastq)
output: output:
path "merged.fastq.gz", emit: merged_fastq tuple val(barcode), path("${barcode}/${barcode}_merged.fastq.gz"), emit: merged_fastq
script: script:
""" """
path=\$(readlink -f ${fastq}) mv ${fastq} path_${fastq}
seqkit scat -j ${task.cpus} -f \${path} --gz-only > merged.fastq mkdir ${barcode}
gzip merged.fastq cd ${barcode}/
path=\$(readlink -f ../path_${fastq})
seqkit scat -j ${task.cpus} -f \${path} --gz-only > ${barcode}_merged.fastq
gzip ${barcode}_merged.fastq
""" """
} }
\ No newline at end of file
...@@ -5,21 +5,26 @@ params.start_position_counts_out ="" ...@@ -5,21 +5,26 @@ params.start_position_counts_out =""
process start_position_individuals{ process start_position_individuals{
container = "${container_url}" container = "${container_url}"
label "small_mem_mono_cpus" label "small_mem_mono_cpus"
tag "start positions" tag "${barcode}"
if (params.start_position_counts_out != "") { if (params.start_position_counts_out != "") {
publishDir "results/${params.start_position_counts_out}", mode: 'copy' publishDir "results/${params.start_position_counts_out}", mode: 'copy'
} }
input: input:
path(start_position_counts) tuple val(barcode), path(start_position_counts)
output: output:
path("Rplots.pdf") path("${barcode}/*.pdf")
path("*.png") path("${barcode}/*.png")
path("Count_reads_per_promoter.tsv") path("${barcode}/*.tsv")
path("classification_of_reads_per_RNA.txt"), emit: classification_of_reads tuple val(barcode), path("${barcode}/${barcode}_classification_of_reads_per_RNA.txt"), emit: classification_of_reads
script: script:
""" """
Rscript /Start_positions.R -i ${start_position_counts} mkdir ${barcode}
cd ${barcode}/
Rscript /Start_positions.R -i ../${start_position_counts}
mv classification_of_reads_per_RNA.txt ${barcode}_classification_of_reads_per_RNA.txt
mv Count_reads_per_promoter.tsv ${barcode}_count_reads_per_promoter.tsv
mv Rplots.pdf ${barcode}_Rplots.pdf
""" """
} }
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment