Skip to content
Snippets Groups Projects
Commit eb1df0db authored by Xavier Grand's avatar Xavier Grand
Browse files

Add porechop and compress all fastq files during the workflow.

parent 29498d03
No related branches found
No related tags found
No related merge requests found
...@@ -30,6 +30,4 @@ RUN python3 setup.py install && \ ...@@ -30,6 +30,4 @@ RUN python3 setup.py install && \
apt remove --purge --yes git build-essential && \ apt remove --purge --yes git build-essential && \
apt autoremove --purge --yes apt autoremove --purge --yes
# Set entrypoint so container can be used as executable CMD ["bash"]
ENTRYPOINT ["porechop"] \ No newline at end of file
CMD ["-h"]
\ No newline at end of file
...@@ -7,4 +7,4 @@ ...@@ -7,4 +7,4 @@
# docker pull xgrand/porechop:0.2.4 # docker pull xgrand/porechop:0.2.4
docker build src/.docker_modules/porechop/0.2.4 -t 'xgrand/porechop:0.2.4' docker build src/.docker_modules/porechop/0.2.4 -t 'xgrand/porechop:0.2.4'
docker push xgrand/porechop:0.2.4 docker push xgrand/porechop:0.2.4
docker buildx build --platform linux/amd64,linux/arm64 -t "xgrand/porechop:0.2.4" --push src/.docker_modules/porechop/0.2.4 # docker buildx build --platform linux/amd64,linux/arm64 -t "xgrand/porechop:0.2.4" --push src/.docker_modules/porechop/0.2.4
\ No newline at end of file \ No newline at end of file
...@@ -104,7 +104,8 @@ params.kit_barcoding = "" ...@@ -104,7 +104,8 @@ params.kit_barcoding = ""
params.basecalling_out = "01_basecalling/" params.basecalling_out = "01_basecalling/"
params.barcoding_out = "02_barcoding/" params.barcoding_out = "02_barcoding/"
params.fastq_out = "03_fastq/" params.fastq_out = "03_fastq/"
params.seqkit_grep_out = "03_fastq/" params.seqkit_grep_out = "14_seqkit/"
params.porechop_out = "15_porechop/"
params.cutadapt_out = "04_cutadapt/" params.cutadapt_out = "04_cutadapt/"
params.minimap2_genome_out = "05_minimap2/" params.minimap2_genome_out = "05_minimap2/"
params.start_position_counts_out = "06_start_positions/" params.start_position_counts_out = "06_start_positions/"
...@@ -186,6 +187,7 @@ include { start_position_individuals } from "./nf_modules/start_positions/main.n ...@@ -186,6 +187,7 @@ include { start_position_individuals } from "./nf_modules/start_positions/main.n
include { jwr_checker } from "./nf_modules/nanosplicer/main.nf" include { jwr_checker } from "./nf_modules/nanosplicer/main.nf"
include { junctions_nanosplicer } from "./nf_modules/junction_nanosplicer/main.nf" include { junctions_nanosplicer } from "./nf_modules/junction_nanosplicer/main.nf"
include { rna_count } from "./nf_modules/rna_count/main.nf" include { rna_count } from "./nf_modules/rna_count/main.nf"
include { porechop } from "./nf_modules/porechop/main.nf"
/* /*
**************************************************************** ****************************************************************
...@@ -244,9 +246,13 @@ workflow { ...@@ -244,9 +246,13 @@ workflow {
//Filtration (seqkit_grep looks for the 5'RACE and the gsp patterns in the reads to keep only mature ARNs) //Filtration (seqkit_grep looks for the 5'RACE and the gsp patterns in the reads to keep only mature ARNs)
seqkit_grep(concatenate.out.merged_fastq, params.adapt, params.gsp) seqkit_grep(concatenate.out.merged_fastq, params.adapt, params.gsp)
//Trimming with porechop
porechop(seqkit_grep.out.filtered_fastq)
//Cut of the 5'RACE sequence //Cut of the 5'RACE sequence
cut_5pRACE(seqkit_grep.out.filtered_fastq, params.adapt) cut_5pRACE(porechop.out.porechoped_fastq, params.adapt)
//cut_5pRACE(seqkit_grep.out.filtered_fastq, params.adapt)
//########################## MAPPING ########################## //########################## MAPPING ##########################
......
...@@ -15,12 +15,12 @@ process cut_5pRACE { ...@@ -15,12 +15,12 @@ process cut_5pRACE {
val(adapt) val(adapt)
output: output:
tuple val(barcode), path("${barcode}_merged_porechoped_cut_fastq.fastq"), emit: fastq_cutadapt tuple val(barcode), path("${barcode}_merged_porechoped_cut.fastq.gz"), emit: fastq_cutadapt
""" """
cutadapt -e 0.2 -g ${adapt} \ cutadapt -e 0.2 -g ${adapt} \
--revcomp \ --revcomp \
-o "${barcode}_merged_porechoped_cut_fastq.fastq" \ -o "${barcode}_merged_porechoped_cut.fastq.gz" \
${fastq} ${fastq}
""" """
} }
\ No newline at end of file
...@@ -108,6 +108,6 @@ process hbv_genome { ...@@ -108,6 +108,6 @@ process hbv_genome {
mkdir ${barcode} mkdir ${barcode}
cd ${barcode}/ cd ${barcode}/
minimap2 ${params.mapping_hbv_genome} -t ${task.cpus} -K ${memory} ../${genome} ../${fastq} | minimap2 ${params.mapping_hbv_genome} -t ${task.cpus} -K ${memory} ../${genome} ../${fastq} |
samtools view -Shb - > ${barcode}_res.bam samtools view -Shb -F4 -F2048 -F2064 - > ${barcode}_res.bam
""" """
} }
\ No newline at end of file
version = "0.2.4" version = "0.2.4"
container_url = "xgrand/porechop:${version}" container_url = "xgrand/porechop:${version}"
params.porechop_out = ""
process porechop { process porechop {
container = "${container_url}" container = "${container_url}"
label "small_mem_multi_cpus" label "small_mem_multi_cpus"
tag "$file_id" tag "$barcode"
if (params.porechop_out != "") { if (params.porechop_out != "") {
publishDir "results/${params.porechop_out}", mode: 'copy' publishDir "results/${params.porechop_out}", mode: 'copy'
} }
input: input:
path(merged_fastq) tuple val(barcode), path(fastq)
output: output:
path("*"), emit: porechoped_fastq tuple val(barcode), path("*"), emit: porechoped_fastq
script: script:
""" """
porechop -i ${merged_fastq} -o merged_porechoped.fastq --threads ${task.cpus} porechop --input ${fastq} -o ${barcode}_merged_porechoped.fastq.gz --threads ${task.cpus}
""" """
} }
\ No newline at end of file
...@@ -40,11 +40,11 @@ process seqkit_grep { ...@@ -40,11 +40,11 @@ process seqkit_grep {
val(gsp) val(gsp)
output: output:
tuple val(barcode), path("${barcode}/${barcode}_390bp_filtered_5RACE_GSP.fastq"), emit: filtered_fastq tuple val(barcode), path("${barcode}/${barcode}_390bp_filtered_5RACE_GSP.fastq.gz"), emit: filtered_fastq
path("${barcode}/*.csv") path("${barcode}/*.csv")
path("${barcode}/*.txt") path("${barcode}/*.txt")
path("${barcode}/${barcode}_filtered_5RACE.fastq") path("${barcode}/${barcode}_filtered_5RACE.fastq.gz")
path("${barcode}/${barcode}_filtered_5RACE_GSP.fastq") path("${barcode}/${barcode}_filtered_5RACE_GSP.fastq.gz")
script: script:
lgadapt = Math.round(adapt.size().div(10)) lgadapt = Math.round(adapt.size().div(10))
...@@ -56,13 +56,14 @@ process seqkit_grep { ...@@ -56,13 +56,14 @@ process seqkit_grep {
echo "mismatch allowed to Gene Specific primer: ${lggsp}" >> mismatch.txt echo "mismatch allowed to Gene Specific primer: ${lggsp}" >> mismatch.txt
echo ${adapt} > adapt.txt echo ${adapt} > adapt.txt
echo ${gsp} > gsp.txt echo ${gsp} > gsp.txt
seqkit grep -i -f adapt.txt -m ${lgadapt} ../${fastq} -o ${barcode}_filtered_5RACE.fastq -j ${task.cpus} seqkit grep -i -f adapt.txt -m ${lgadapt} ../${fastq} -o ${barcode}_filtered_5RACE.fastq.gz -j ${task.cpus}
seqkit grep -i -f gsp.txt -m ${lggsp} ${barcode}_filtered_5RACE.fastq -o ${barcode}_filtered_5RACE_GSP.fastq -j ${task.cpus} seqkit grep -i -f gsp.txt -m ${lggsp} ${barcode}_filtered_5RACE.fastq.gz -o ${barcode}_filtered_5RACE_GSP.fastq.gz -j ${task.cpus}
seqkit seq --min-len 390 --remove-gaps ${barcode}_filtered_5RACE_GSP.fastq -j ${task.cpus} > ${barcode}_390bp_filtered_5RACE_GSP.fastq seqkit seq --min-len 390 --remove-gaps ${barcode}_filtered_5RACE_GSP.fastq.gz -j ${task.cpus} > ${barcode}_390bp_filtered_5RACE_GSP.fastq
gzip ${barcode}_390bp_filtered_5RACE_GSP.fastq
seqkit stats ../${fastq} -T -j ${task.cpus} > ${barcode}_seq_stats.csv seqkit stats ../${fastq} -T -j ${task.cpus} > ${barcode}_seq_stats.csv
seqkit stats ${barcode}_filtered_5RACE.fastq -T -j ${task.cpus} | tail -n1 >> ${barcode}_seq_stats.csv seqkit stats ${barcode}_filtered_5RACE.fastq.gz -T -j ${task.cpus} | tail -n1 >> ${barcode}_seq_stats.csv
seqkit stats ${barcode}_filtered_5RACE_GSP.fastq -T -j ${task.cpus} | tail -n1 >> ${barcode}_seq_stats.csv seqkit stats ${barcode}_filtered_5RACE_GSP.fastq.gz -T -j ${task.cpus} | tail -n1 >> ${barcode}_seq_stats.csv
seqkit stats ${barcode}_390bp_filtered_5RACE_GSP.fastq -T -j ${task.cpus} | tail -n1 >> ${barcode}_seq_stats.csv seqkit stats ${barcode}_390bp_filtered_5RACE_GSP.fastq.gz -T -j ${task.cpus} | tail -n1 >> ${barcode}_seq_stats.csv
""" """
} }
...@@ -91,23 +92,3 @@ process concatenate { ...@@ -91,23 +92,3 @@ process concatenate {
gzip ${barcode}_merged.fastq gzip ${barcode}_merged.fastq
""" """
} }
process concatenate_BC {
container = "${container_url}"
label "big_mem_multi_cpus"
tag "${barcode}"
if (params.fastq_out != "") {
publishDir "results/${params.fastq_out}", mode: 'copy'
}
input:
path(path)
output:
path("test.txt")
script:
"""
echo ${path} \$(readlink -f ${path}) > test.txt
"""
}
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment