version = "2.1.0" container_url = "xgrand/seqkit:${version}" params.reversecomp_out = "" process reversecomp { container = "${container_url}" label "small_mem_mono_cpus" tag "rev-comp" if (params.reversecomp_out != "") { publishDir "results/${params.reversecomp_out}", mode: 'copy' } input: val(adapt) output: path("adapt.fasta"), emit: adapt_fst path("adaptRC.fasta"), emit: adaptRC_fst script: """ echo ">adapt" >> adapt.fasta echo ${adapt} >> adapt.fasta seqkit seq adapt.fasta -r -p -r -p -t DNA -v > adaptRC.fasta """ } params.seqkit_grep_out = "" process seqkit_grep { container = "${container_url}" label "small_mem_multi_cpus" tag "Filter_reads" if (params.seqkit_grep_out != "") { publishDir "results/${params.seqkit_grep_out}", mode: 'copy' } input: path(fastq) val(adapt) val(gsp) output: path("filtered_5RACE_GSP.fastq"), emit: filtered_fastq path("seq_stats.csv") path("*.txt") path("filtered_5RACE.fastq") script: lgadapt = Math.round(adapt.size().div(10)) lggsp = Math.round(gsp.size().div(10)) """ echo "mismatch allowed to 5'RACE adapter: ${lgadapt}" > mismatch.txt echo "mismatch allowed to Gene Specific primer: ${lggsp}" >> mismatch.txt echo ${adapt} > adapt.txt echo ${gsp} > gsp.txt seqkit grep -i -f adapt.txt -m ${lgadapt} ${fastq} -o filtered_5RACE.fastq -j ${task.cpus} seqkit grep -i -f gsp.txt -m ${lggsp} filtered_5RACE.fastq -o filtered_5RACE_GSP.fastq -j ${task.cpus} seqkit stats ${fastq} -T -j ${task.cpus} > seq_stats.csv seqkit stats filtered_5RACE.fastq -T -j ${task.cpus} | tail -n1 >> seq_stats.csv seqkit stats filtered_5RACE_GSP.fastq -T -j ${task.cpus} | tail -n1 >> seq_stats.csv """ } params.fastq_out = "" process concatenate { container = "${container_url}" label "big_mem_multi_cpus" tag "Concatenate_reads" if (params.fastq_out != "") { publishDir "results/${params.fastq_out}", mode: 'copy' } input: path(fastq_dir) output: path "merged.fastq.gz", emit: merged_fastq script: """ seqkit scat -j ${tasks.cpus} -f ${fastq_dir} --gz-only > merged.fastq gzip merged.fastq """ }