Verified Commit 6231fc3f authored by Laurent Modolo's avatar Laurent Modolo
Browse files

alntools: separate transcript length computation

parent c355cfa8
......@@ -13,16 +13,38 @@ process bam2ec {
input:
tuple val(file_id), path(bam), path(bam_idx)
tuple val(gtf_id), path(gtf)
tuple val(transcripts_lengths_id), path(transcripts_lengths)
output:
tuple val(file_id), path("${bam.simpleName}.bin"), emit: bin
tuple val(gtf_id), path("${gtf.simpleName}_transcripts_lengths.tsv"), emit: tsv
tuple val(gtf_id), path("${transcripts_lengths}"), emit: tsv
script:
"""
cp ${bam} file_bam.bam
cp ${bam_idx} file_bam.bam.bai
alntools bam2ec ${params.bam2ec} -t ${transcripts_lengths} file_bam.bam ${bam.simpleName}.bin
"""
}
params.gtf_to_transcripts_lengths = ""
params.gtf_to_transcripts_lengths_out = ""
process gtf_to_transcripts_lengths {
container = "${container_url}"
label "big_mem_mono_cpus"
tag "$file_id"
if (params.gtf_to_transcripts_lengths != "") {
publishDir "results/${params.gtf_to_transcripts_lengths}", mode: 'copy'
}
input:
tuple val(file_id), path(gtf)
output:
tuple val(file_id), path("${gtf.simpleName}_transcripts_lengths.tsv"), emit: tsv
script:
"""
awk -F"[\\t;]" '
\$3=="exon" {
ID=gensub(/transcript_id \\"(.*)\\"/, "\\\\1", "g", \$11);
......@@ -33,6 +55,5 @@ END{
{print i"\\t"LEN[i]}
}
' ${gtf} > ${gtf.simpleName}_transcripts_lengths.tsv
alntools bam2ec ${params.bam2ec} -t ${gtf.simpleName}_transcripts_lengths.tsv file_bam.bam ${bam.simpleName}.bin
"""
}
\ No newline at end of file
}
version = "1.0"
container_url = "lbmc/bioawk:${version}"
params.fasta_to_transcripts_lengths = ""
params.fasta_to_transcripts_lengths_out = ""
process fasta_to_transcripts_lengths {
container = "${container_url}"
label "big_mem_mono_cpus"
tag "$file_id"
if (params.fasta_to_transcripts_lengths_out != "") {
publishDir "results/${params.fasta_to_transcripts_lengths_out}", mode: 'copy'
}
input:
tuple val(file_id), path(fasta)
output:
tuple val(file_id), path("${fasta.simpleName}_transcripts_lengths.tsv"), emit: tsv
script:
"""
bioawk -c fastx '{print \$name length(\$seq)}' ${fasta} > ${fasta.simpleName}_transcripts_lengths.tsv
"""
}
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment