Skip to content
Snippets Groups Projects
Commit eb1df0db authored by Xavier Grand's avatar Xavier Grand
Browse files

Add porechop and compress all fastq files during the workflow.

parent 29498d03
No related branches found
No related tags found
No related merge requests found
......@@ -30,6 +30,4 @@ RUN python3 setup.py install && \
apt remove --purge --yes git build-essential && \
apt autoremove --purge --yes
# Set entrypoint so container can be used as executable
ENTRYPOINT ["porechop"]
CMD ["-h"]
\ No newline at end of file
CMD ["bash"]
\ No newline at end of file
......@@ -7,4 +7,4 @@
# docker pull xgrand/porechop:0.2.4
docker build src/.docker_modules/porechop/0.2.4 -t 'xgrand/porechop:0.2.4'
docker push xgrand/porechop:0.2.4
docker buildx build --platform linux/amd64,linux/arm64 -t "xgrand/porechop:0.2.4" --push src/.docker_modules/porechop/0.2.4
\ No newline at end of file
# docker buildx build --platform linux/amd64,linux/arm64 -t "xgrand/porechop:0.2.4" --push src/.docker_modules/porechop/0.2.4
\ No newline at end of file
......@@ -104,7 +104,8 @@ params.kit_barcoding = ""
params.basecalling_out = "01_basecalling/"
params.barcoding_out = "02_barcoding/"
params.fastq_out = "03_fastq/"
params.seqkit_grep_out = "03_fastq/"
params.seqkit_grep_out = "14_seqkit/"
params.porechop_out = "15_porechop/"
params.cutadapt_out = "04_cutadapt/"
params.minimap2_genome_out = "05_minimap2/"
params.start_position_counts_out = "06_start_positions/"
......@@ -186,6 +187,7 @@ include { start_position_individuals } from "./nf_modules/start_positions/main.n
include { jwr_checker } from "./nf_modules/nanosplicer/main.nf"
include { junctions_nanosplicer } from "./nf_modules/junction_nanosplicer/main.nf"
include { rna_count } from "./nf_modules/rna_count/main.nf"
include { porechop } from "./nf_modules/porechop/main.nf"
/*
****************************************************************
......@@ -244,9 +246,13 @@ workflow {
//Filtration (seqkit_grep looks for the 5'RACE and the gsp patterns in the reads to keep only mature ARNs)
seqkit_grep(concatenate.out.merged_fastq, params.adapt, params.gsp)
//Trimming with porechop
porechop(seqkit_grep.out.filtered_fastq)
//Cut of the 5'RACE sequence
cut_5pRACE(seqkit_grep.out.filtered_fastq, params.adapt)
cut_5pRACE(porechop.out.porechoped_fastq, params.adapt)
//cut_5pRACE(seqkit_grep.out.filtered_fastq, params.adapt)
//########################## MAPPING ##########################
......
......@@ -15,12 +15,12 @@ process cut_5pRACE {
val(adapt)
output:
tuple val(barcode), path("${barcode}_merged_porechoped_cut_fastq.fastq"), emit: fastq_cutadapt
tuple val(barcode), path("${barcode}_merged_porechoped_cut.fastq.gz"), emit: fastq_cutadapt
"""
cutadapt -e 0.2 -g ${adapt} \
--revcomp \
-o "${barcode}_merged_porechoped_cut_fastq.fastq" \
-o "${barcode}_merged_porechoped_cut.fastq.gz" \
${fastq}
"""
}
\ No newline at end of file
......@@ -108,6 +108,6 @@ process hbv_genome {
mkdir ${barcode}
cd ${barcode}/
minimap2 ${params.mapping_hbv_genome} -t ${task.cpus} -K ${memory} ../${genome} ../${fastq} |
samtools view -Shb - > ${barcode}_res.bam
samtools view -Shb -F4 -F2048 -F2064 - > ${barcode}_res.bam
"""
}
\ No newline at end of file
version = "0.2.4"
container_url = "xgrand/porechop:${version}"
params.porechop_out = ""
process porechop {
container = "${container_url}"
label "small_mem_multi_cpus"
tag "$file_id"
tag "$barcode"
if (params.porechop_out != "") {
publishDir "results/${params.porechop_out}", mode: 'copy'
}
input:
path(merged_fastq)
tuple val(barcode), path(fastq)
output:
path("*"), emit: porechoped_fastq
tuple val(barcode), path("*"), emit: porechoped_fastq
script:
"""
porechop -i ${merged_fastq} -o merged_porechoped.fastq --threads ${task.cpus}
porechop --input ${fastq} -o ${barcode}_merged_porechoped.fastq.gz --threads ${task.cpus}
"""
}
\ No newline at end of file
......@@ -40,11 +40,11 @@ process seqkit_grep {
val(gsp)
output:
tuple val(barcode), path("${barcode}/${barcode}_390bp_filtered_5RACE_GSP.fastq"), emit: filtered_fastq
tuple val(barcode), path("${barcode}/${barcode}_390bp_filtered_5RACE_GSP.fastq.gz"), emit: filtered_fastq
path("${barcode}/*.csv")
path("${barcode}/*.txt")
path("${barcode}/${barcode}_filtered_5RACE.fastq")
path("${barcode}/${barcode}_filtered_5RACE_GSP.fastq")
path("${barcode}/${barcode}_filtered_5RACE.fastq.gz")
path("${barcode}/${barcode}_filtered_5RACE_GSP.fastq.gz")
script:
lgadapt = Math.round(adapt.size().div(10))
......@@ -56,13 +56,14 @@ process seqkit_grep {
echo "mismatch allowed to Gene Specific primer: ${lggsp}" >> mismatch.txt
echo ${adapt} > adapt.txt
echo ${gsp} > gsp.txt
seqkit grep -i -f adapt.txt -m ${lgadapt} ../${fastq} -o ${barcode}_filtered_5RACE.fastq -j ${task.cpus}
seqkit grep -i -f gsp.txt -m ${lggsp} ${barcode}_filtered_5RACE.fastq -o ${barcode}_filtered_5RACE_GSP.fastq -j ${task.cpus}
seqkit seq --min-len 390 --remove-gaps ${barcode}_filtered_5RACE_GSP.fastq -j ${task.cpus} > ${barcode}_390bp_filtered_5RACE_GSP.fastq
seqkit grep -i -f adapt.txt -m ${lgadapt} ../${fastq} -o ${barcode}_filtered_5RACE.fastq.gz -j ${task.cpus}
seqkit grep -i -f gsp.txt -m ${lggsp} ${barcode}_filtered_5RACE.fastq.gz -o ${barcode}_filtered_5RACE_GSP.fastq.gz -j ${task.cpus}
seqkit seq --min-len 390 --remove-gaps ${barcode}_filtered_5RACE_GSP.fastq.gz -j ${task.cpus} > ${barcode}_390bp_filtered_5RACE_GSP.fastq
gzip ${barcode}_390bp_filtered_5RACE_GSP.fastq
seqkit stats ../${fastq} -T -j ${task.cpus} > ${barcode}_seq_stats.csv
seqkit stats ${barcode}_filtered_5RACE.fastq -T -j ${task.cpus} | tail -n1 >> ${barcode}_seq_stats.csv
seqkit stats ${barcode}_filtered_5RACE_GSP.fastq -T -j ${task.cpus} | tail -n1 >> ${barcode}_seq_stats.csv
seqkit stats ${barcode}_390bp_filtered_5RACE_GSP.fastq -T -j ${task.cpus} | tail -n1 >> ${barcode}_seq_stats.csv
seqkit stats ${barcode}_filtered_5RACE.fastq.gz -T -j ${task.cpus} | tail -n1 >> ${barcode}_seq_stats.csv
seqkit stats ${barcode}_filtered_5RACE_GSP.fastq.gz -T -j ${task.cpus} | tail -n1 >> ${barcode}_seq_stats.csv
seqkit stats ${barcode}_390bp_filtered_5RACE_GSP.fastq.gz -T -j ${task.cpus} | tail -n1 >> ${barcode}_seq_stats.csv
"""
}
......@@ -91,23 +92,3 @@ process concatenate {
gzip ${barcode}_merged.fastq
"""
}
process concatenate_BC {
container = "${container_url}"
label "big_mem_multi_cpus"
tag "${barcode}"
if (params.fastq_out != "") {
publishDir "results/${params.fastq_out}", mode: 'copy'
}
input:
path(path)
output:
path("test.txt")
script:
"""
echo ${path} \$(readlink -f ${path}) > test.txt
"""
}
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment