Skip to content
Snippets Groups Projects
Commit 6a48c301 authored by aliarifki's avatar aliarifki
Browse files

Ajout des barcodes

parent f2dca093
No related branches found
No related tags found
No related merge requests found
...@@ -129,9 +129,9 @@ Channel ...@@ -129,9 +129,9 @@ Channel
.set { input } .set { input }
Channel Channel
.of( params.adapt ) .of( params.adapt )
.ifEmpty { error "No adapter sequence defined." } .ifEmpty { error "No adapter sequence defined." }
.set { adapt } .set { adapt }
Channel Channel
.fromPath( params.genome ) .fromPath( params.genome )
...@@ -143,7 +143,10 @@ Channel ...@@ -143,7 +143,10 @@ Channel
.ifEmpty { error "No annotation defined, a gtf file describing transcripts and splice variants." } .ifEmpty { error "No annotation defined, a gtf file describing transcripts and splice variants." }
.set { gtf } .set { gtf }
// .map( it -> [it.baseName, it]) Channel
.fromPath(params.input+'*/', type: 'dir')
.map(it -> [it.baseName, it])
.set{barcodes}
/* /*
**************************************************************** ****************************************************************
...@@ -161,10 +164,8 @@ if(!params.skipBC) { ...@@ -161,10 +164,8 @@ if(!params.skipBC) {
} }
} }
// Replace concatenate by seqkit fct to parallelization: include { barecode } from "./nf_modules/barecode/main.nf"
include { concatenate } from "./nf_modules/seqkit/main.nf" include { concatenate } from "./nf_modules/seqkit/main.nf"
//include { concatenate } from "./nf_modules/concatenate/main.nf"
include { cut_5pRACE } from "./nf_modules/cutadapt/main.nf" include { cut_5pRACE } from "./nf_modules/cutadapt/main.nf"
include { hbv_genome } from "./nf_modules/minimap2/main.nf" include { hbv_genome } from "./nf_modules/minimap2/main.nf"
include { seqkit_grep } from "./nf_modules/seqkit/main.nf" include { seqkit_grep } from "./nf_modules/seqkit/main.nf"
...@@ -178,9 +179,6 @@ include { junctions_nanosplicer } from "./nf_modules/junction_nanosplicer/main.n ...@@ -178,9 +179,6 @@ include { junctions_nanosplicer } from "./nf_modules/junction_nanosplicer/main.n
include { rna_count } from "./nf_modules/rna_count/main.nf" include { rna_count } from "./nf_modules/rna_count/main.nf"
// creation des fonctions NanoSplicer:
// include { jwr_check } from "./nf_modules/nanosplicer/main.nf"
/* /*
**************************************************************** ****************************************************************
Workflow Workflow
...@@ -189,42 +187,41 @@ include { rna_count } from "./nf_modules/rna_count/main.nf" ...@@ -189,42 +187,41 @@ include { rna_count } from "./nf_modules/rna_count/main.nf"
workflow { workflow {
//######################## BASECALLING ######################## //######################## BASECALLING ########################
if(params.skipBC) { if(params.skipBC) { // we take fastq files as input and skip basecalling
concatenate(params.input) concatenate(barcodes)
// Replace by seqkit scat to parallelization
} }
else {
//il reste à adapter ça
else { // we take fast5 files as input and proceed to basecalling with guppy
if(params.gpu_mode) { if(params.gpu_mode) {
basecall_fast5_gpu(input) basecall_fast5_gpu(input)
concatenate(basecall_fast5_gpu.out.pass) concatenate(basecall_fast5_gpu.out.pass)
// Replace by seqkit scat to parallelization
} }
else { else {
basecall_fast5_cpu(input) basecall_fast5_cpu(input)
concatenate(basecall_fast5_cpu.out.pass) concatenate(basecall_fast5_cpu.out.pass)
// Replace by seqkit scat to parallelization
} }
} }
//####################### PREPROCESSING ####################### //####################### PREPROCESSING #######################
//Filtration (seqkit_grep looks for the 5'RACE and the gsp patterns in the reads to keep only mature ARNs) //Filtration (seqkit_grep looks for the 5'RACE and the gsp patterns in the reads to keep only mature ARNs)
seqkit_grep(concatenate.out.merged_fastq, params.adapt, params.gsp) seqkit_grep(concatenate.out.merged_fastq, params.adapt, params.gsp)
//Cut of the 5'RACE sequence //Cut of the 5'RACE sequence
cut_5pRACE(seqkit_grep.out.filtered_fastq, params.adapt) cut_5pRACE(seqkit_grep.out.filtered_fastq, params.adapt)
//########################## MAPPING ########################## //########################## MAPPING ##########################
hbv_genome(cut_5pRACE.out.fastq_cutadapt, genome.collect())
hbv_genome(cut_5pRACE.out.fastq_cutadapt, genome)
sort_index_bam(hbv_genome.out.bam) sort_index_bam(hbv_genome.out.bam)
// index_bam(sort_bam_genome.out.sorted_bam.collect())
//###################### START POSITIONS ####################### //###################### START POSITIONS #######################
......
...@@ -4,23 +4,23 @@ container_url = "xgrand/cutadapt:${version}" ...@@ -4,23 +4,23 @@ container_url = "xgrand/cutadapt:${version}"
process cut_5pRACE { process cut_5pRACE {
container = "${container_url}" container = "${container_url}"
label "small_mem_mono_cpus" label "small_mem_mono_cpus"
tag "cutadapt" tag "${barcode}"
if (params.cutadapt_out != "") { if (params.cutadapt_out != "") {
publishDir "results/${params.cutadapt_out}", mode: 'copy' publishDir "results/${params.cutadapt_out}", mode: 'copy'
} }
input: input:
path(fastq) tuple val(barcode), path(fastq)
val(adapt) val(adapt)
output: output:
path("*_cut_*"), emit: fastq_cutadapt tuple val(barcode), path("${barcode}_merged_porechoped_cut_fastq.fastq"), emit: fastq_cutadapt
""" """
cutadapt -e 0.2 -g ${adapt} \ cutadapt -e 0.2 -g ${adapt} \
--revcomp \ --revcomp \
-o "merged_porechoped_cut_fastq.fastq" \ -o "${barcode}_merged_porechoped_cut_fastq.fastq" \
${fastq} ${fastq}
""" """
} }
\ No newline at end of file
...@@ -5,23 +5,24 @@ params.nanosplicer_out = "" ...@@ -5,23 +5,24 @@ params.nanosplicer_out = ""
process junctions_nanosplicer{ process junctions_nanosplicer{
container = "${container_url}" container = "${container_url}"
label "small_mem_mono_cpus" label "small_mem_mono_cpus"
tag "identification de variants d'épissage" tag "${barcode}"
if (params.nanosplicer_out != "") { if (params.nanosplicer_out != "") {
publishDir "results/${params.nanosplicer_out}", mode: 'copy' publishDir "results/${params.nanosplicer_out}", mode: 'copy'
} }
input: input:
path(txt) tuple val(barcode), path(txt)
path(csv) tuple val(barcode), path(csv)
output: output:
path("Rplots.pdf") path("${barcode}/JWR_check_parsed.csv")
path("JWR_check_parsed.csv") tuple val(barcode), path("${barcode}/${barcode}_identified_SPvariants.csv"), emit: identified_SPvariants
path("*.png")
path("identified_SPvariants.csv"), emit: identified_SPvariants
script: script:
""" """
Rscript /Junctions_NanoSplicer.R -c ${txt} -j ${csv} mkdir ${barcode}
cd ${barcode}/
Rscript /Junctions_NanoSplicer.R -c ../${txt} -j ../${csv}
mv identified_SPvariants.csv ${barcode}_identified_SPvariants.csv
""" """
} }
\ No newline at end of file
...@@ -89,22 +89,25 @@ params.mapping_hbv_genome = "-ax splice --secondary=no -G 1650 -u n --eqx" ...@@ -89,22 +89,25 @@ params.mapping_hbv_genome = "-ax splice --secondary=no -G 1650 -u n --eqx"
process hbv_genome { process hbv_genome {
container = "${container_url}" container = "${container_url}"
label "big_mem_multi_cpus" label "big_mem_multi_cpus"
tag "${barcode}"
if (params.minimap2_genome_out != "") { if (params.minimap2_genome_out != "") {
publishDir "results/${params.minimap2_genome_out}", mode: 'copy' publishDir "results/${params.minimap2_genome_out}", mode: 'copy'
} }
input: input:
path(fastq) tuple val(barcode), path(fastq)
path(genome) path(genome)
output: output:
path("*"), emit: bam tuple val(barcode), path("${barcode}/${barcode}_res.bam"), emit: bam
script: script:
memory = "${task.memory}" - ~/\s*GB/ memory = "${task.memory}" - ~/\s*GB/
memory = memory.toInteger() / (task.cpus + 1.0) memory = memory.toInteger() / (task.cpus + 1.0)
""" """
minimap2 ${params.mapping_hbv_genome} -t${task.cpus} -K ${memory} ${genome} ${fastq} | mkdir ${barcode}
samtools view -Shb - > res.bam cd ${barcode}/
minimap2 ${params.mapping_hbv_genome} -t ${task.cpus} -K ${memory} ../${genome} ../${fastq} |
samtools view -Shb - > ${barcode}_res.bam
""" """
} }
\ No newline at end of file
version = "1.0"
container_url = "xgrand/nanosplicer:${version}"
params.nanosplicer_out = ""
process jwr_checker {
container = "${container_url}"
label "big_mem_multi_cpus"
tag "${barcode}"
if (params.nanosplicer_out != "") {
publishDir "results/${params.nanosplicer_out}", mode: 'copy'
}
input:
tuple val(barcode), path(bam), path(index)
output:
tuple val(barcode), path("${barcode}/${barcode}_JWR_check.h5.csv"), emit: nanosplicer_jwr
script:
"""
mkdir ${barcode}
cd ${barcode}/
python3 /NanoSplicer/bin/JWR_checker.py --output_csv ../${bam} ${barcode}_JWR_check.h5
"""
}
...@@ -5,22 +5,24 @@ params.rna_count_out = "" ...@@ -5,22 +5,24 @@ params.rna_count_out = ""
process rna_count{ process rna_count{
container = "${container_url}" container = "${container_url}"
label "small_mem_mono_cpus" label "small_mem_mono_cpus"
tag "RNA quantification" tag "${barcode}"
if (params.rna_count_out != "") { if (params.rna_count_out != "") {
publishDir "results/${params.rna_count_out}", mode: 'copy' publishDir "results/${params.rna_count_out}", mode: 'copy'
} }
input: input:
path(spvariants) tuple val(barcode), path(spvariants)
path(classification) tuple val(barcode), path(classification)
output: output:
path("*.csv") path("${barcode}/*.csv")
path("*.pdf") path("${barcode}/*.pdf")
path("*.png") path("${barcode}/*.png")
script: script:
""" """
Rscript /HBV_RNAs_count.R -s ${spvariants} -c ${classification} mkdir ${barcode}
cd ${barcode}/
Rscript /HBV_RNAs_count.R -s ../${spvariants} -c ../${classification}
""" """
} }
...@@ -24,21 +24,23 @@ samtools sort -@ ${task.cpus} ${bam} -O BAM -o ${bam.simpleName}_sorted.bam ...@@ -24,21 +24,23 @@ samtools sort -@ ${task.cpus} ${bam} -O BAM -o ${bam.simpleName}_sorted.bam
params.start_position_counts_out = "" params.start_position_counts_out = ""
process start_position_counts { process start_position_counts {
tag "Start positions count" tag "${barcode}"
label "big_mem_multi_cpus" label "big_mem_multi_cpus"
publishDir "results/${params.start_position_counts_out}", mode: 'copy' publishDir "results/${params.start_position_counts_out}", mode: 'copy'
input: input:
tuple path(bam), path(index) tuple val(barcode), path(bam), path(index)
output: output:
path "*", emit: count tuple val(barcode), path("${barcode}/${barcode}_start_positions_counts.txt"), emit: count
script: script:
""" """
samtools view -F 260 ${bam} | mkdir ${barcode}
cd ${barcode}/
samtools view -F 260 ../${bam} |
cut -f 1,4 | cut -f 1,4 |
sort > Start_positions_counts.txt sort > ${barcode}_start_positions_counts.txt
""" """
} }
...@@ -67,20 +69,22 @@ params.indexed_bam_out ="" ...@@ -67,20 +69,22 @@ params.indexed_bam_out =""
process sort_index_bam { process sort_index_bam {
container = "${container_url}" container = "${container_url}"
label "big_mem_multi_cpus" label "big_mem_multi_cpus"
tag "sorting" tag "${barcode}"
if (params.indexed_bam_out != "") { if (params.indexed_bam_out != "") {
publishDir "results/${params.indexed_bam_out}", mode: 'copy' publishDir "results/${params.indexed_bam_out}", mode: 'copy'
} }
input: input:
path(bam) tuple val(barcode), path(bam)
output: output:
tuple path("*sorted.bam"), path("*.bai"), emit: indexed_bam tuple val(barcode), path("${barcode}/*sorted.bam"), path("${barcode}/*.bai"), emit: indexed_bam
script: script:
""" """
samtools sort -@ ${task.cpus} ${bam} -o ${bam.simpleName}_sorted.bam mkdir ${barcode}
samtools index -@ ${task.cpus} ${bam.simpleName}_sorted.bam cd ${barcode}/
samtools sort -@ ${task.cpus} ../${bam} -o ${barcode}_sorted.bam
samtools index -@ ${task.cpus} ${barcode}_sorted.bam
""" """
} }
\ No newline at end of file
...@@ -29,35 +29,37 @@ params.seqkit_grep_out = "" ...@@ -29,35 +29,37 @@ params.seqkit_grep_out = ""
process seqkit_grep { process seqkit_grep {
container = "${container_url}" container = "${container_url}"
label "small_mem_multi_cpus" label "small_mem_multi_cpus"
tag "Filter_reads" tag "${barcode}"
if (params.seqkit_grep_out != "") { if (params.seqkit_grep_out != "") {
publishDir "results/${params.seqkit_grep_out}", mode: 'copy' publishDir "results/${params.seqkit_grep_out}", mode: 'copy'
} }
input: input:
path(fastq) tuple val(barcode), path(fastq)
val(adapt) val(adapt)
val(gsp) val(gsp)
output: output:
path("filtered_5RACE_GSP.fastq"), emit: filtered_fastq tuple val(barcode), path("${barcode}/${barcode}_filtered_5RACE_GSP.fastq"), emit: filtered_fastq
path("seq_stats.csv") path("${barcode}/*.csv")
path("*.txt") path("${barcode}/*.txt")
path("filtered_5RACE.fastq") path("${barcode}/${barcode}_filtered_5RACE.fastq")
script: script:
lgadapt = Math.round(adapt.size().div(10)) lgadapt = Math.round(adapt.size().div(10))
lggsp = Math.round(gsp.size().div(10)) lggsp = Math.round(gsp.size().div(10))
""" """
mkdir ${barcode}
cd ${barcode}/
echo "mismatch allowed to 5'RACE adapter: ${lgadapt}" > mismatch.txt echo "mismatch allowed to 5'RACE adapter: ${lgadapt}" > mismatch.txt
echo "mismatch allowed to Gene Specific primer: ${lggsp}" >> mismatch.txt echo "mismatch allowed to Gene Specific primer: ${lggsp}" >> mismatch.txt
echo ${adapt} > adapt.txt echo ${adapt} > adapt.txt
echo ${gsp} > gsp.txt echo ${gsp} > gsp.txt
seqkit grep -i -f adapt.txt -m ${lgadapt} ${fastq} -o filtered_5RACE.fastq -j ${task.cpus} seqkit grep -i -f adapt.txt -m ${lgadapt} ../${fastq} -o ${barcode}_filtered_5RACE.fastq -j ${task.cpus}
seqkit grep -i -f gsp.txt -m ${lggsp} filtered_5RACE.fastq -o filtered_5RACE_GSP.fastq -j ${task.cpus} seqkit grep -i -f gsp.txt -m ${lggsp} ${barcode}_filtered_5RACE.fastq -o ${barcode}_filtered_5RACE_GSP.fastq -j ${task.cpus}
seqkit stats ${fastq} -T -j ${task.cpus} > seq_stats.csv seqkit stats ../${fastq} -T -j ${task.cpus} > ${barcode}_seq_stats.csv
seqkit stats filtered_5RACE.fastq -T -j ${task.cpus} | tail -n1 >> seq_stats.csv seqkit stats ${barcode}_filtered_5RACE.fastq -T -j ${task.cpus} | tail -n1 >> ${barcode}_seq_stats.csv
seqkit stats filtered_5RACE_GSP.fastq -T -j ${task.cpus} | tail -n1 >> seq_stats.csv seqkit stats ${barcode}_filtered_5RACE_GSP.fastq -T -j ${task.cpus} | tail -n1 >> ${barcode}_seq_stats.csv
""" """
} }
...@@ -65,21 +67,24 @@ params.fastq_out = "" ...@@ -65,21 +67,24 @@ params.fastq_out = ""
process concatenate { process concatenate {
container = "${container_url}" container = "${container_url}"
label "big_mem_multi_cpus" label "big_mem_multi_cpus"
tag "Concatenate_reads" tag "${barcode}"
if (params.fastq_out != "") { if (params.fastq_out != "") {
publishDir "results/${params.fastq_out}", mode: 'copy' publishDir "results/${params.fastq_out}", mode: 'copy'
} }
input: input:
path fastq tuple val(barcode), path(fastq)
output: output:
path "merged.fastq.gz", emit: merged_fastq tuple val(barcode), path("${barcode}/${barcode}_merged.fastq.gz"), emit: merged_fastq
script: script:
""" """
path=\$(readlink -f ${fastq}) mv ${fastq} path_${fastq}
seqkit scat -j ${task.cpus} -f \${path} --gz-only > merged.fastq mkdir ${barcode}
gzip merged.fastq cd ${barcode}/
path=\$(readlink -f ../path_${fastq})
seqkit scat -j ${task.cpus} -f \${path} --gz-only > ${barcode}_merged.fastq
gzip ${barcode}_merged.fastq
""" """
} }
\ No newline at end of file
...@@ -5,21 +5,26 @@ params.start_position_counts_out ="" ...@@ -5,21 +5,26 @@ params.start_position_counts_out =""
process start_position_individuals{ process start_position_individuals{
container = "${container_url}" container = "${container_url}"
label "small_mem_mono_cpus" label "small_mem_mono_cpus"
tag "start positions" tag "${barcode}"
if (params.start_position_counts_out != "") { if (params.start_position_counts_out != "") {
publishDir "results/${params.start_position_counts_out}", mode: 'copy' publishDir "results/${params.start_position_counts_out}", mode: 'copy'
} }
input: input:
path(start_position_counts) tuple val(barcode), path(start_position_counts)
output: output:
path("Rplots.pdf") path("${barcode}/*.pdf")
path("*.png") path("${barcode}/*.png")
path("Count_reads_per_promoter.tsv") path("${barcode}/*.tsv")
path("classification_of_reads_per_RNA.txt"), emit: classification_of_reads tuple val(barcode), path("${barcode}/${barcode}_classification_of_reads_per_RNA.txt"), emit: classification_of_reads
script: script:
""" """
Rscript /Start_positions.R -i ${start_position_counts} mkdir ${barcode}
cd ${barcode}/
Rscript /Start_positions.R -i ../${start_position_counts}
mv classification_of_reads_per_RNA.txt ${barcode}_classification_of_reads_per_RNA.txt
mv Count_reads_per_promoter.tsv ${barcode}_count_reads_per_promoter.tsv
mv Rplots.pdf ${barcode}_Rplots.pdf
""" """
} }
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment