// SPDX-FileCopyrightText: 2022 Laurent Modolo <laurent.modolo@ens-lyon.fr> // // SPDX-License-Identifier: AGPL-3.0-or-later version = "dd96682" container_url = "lbmc/alntools:${version}" params.bam2ec = "" params.bam2ec_out = "" process bam2ec { container = "${container_url}" label "big_mem_mono_cpus" tag "$file_id" if (params.bam2ec_out != "") { publishDir "results/${params.bam2ec_out}", mode: 'copy' } input: tuple val(file_id), path(bam), path(bam_idx) tuple val(transcripts_lengths_id), path(transcripts_lengths) output: tuple val(file_id), path("${bam.simpleName}.bin"), emit: bin tuple val(transcripts_lengths_id), path("${transcripts_lengths}"), emit: tsv tuple val(file_id), path("${bam.simpleName}_bam2ec_report.txt"), emit: report script: """ mkdir tmp alntools bam2ec \ -c 1 ${params.bam2ec} \ -d ./tmp \ -t ${transcripts_lengths} \ -v \ ${bam} ${bam.simpleName}.bin &> \ ${bam.simpleName}_bam2ec_report.txt """ } params.gtf_to_transcripts_lengths = "" params.gtf_to_transcripts_lengths_out = "" process gtf_to_transcripts_lengths { container = "${container_url}" label "big_mem_mono_cpus" tag "$file_id" if (params.gtf_to_transcripts_lengths != "") { publishDir "results/${params.gtf_to_transcripts_lengths}", mode: 'copy' } input: tuple val(file_id), path(gtf) output: tuple val(file_id), path("${gtf.simpleName}_transcripts_lengths.tsv"), emit: tsv script: """ awk -F"[\\t;]" ' \$3=="exon" { ID=gensub(/transcript_id \\"(.*)\\"/, "\\\\1", "g", \$11); LEN[ID]+=\$5-\$4+1; } END{ for(i in LEN) {print i"\\t"LEN[i]} } ' ${gtf} > ${gtf.simpleName}_transcripts_lengths.tsv """ }