From c355cfa86a126d36b114db6ffe041d0239fa69fb Mon Sep 17 00:00:00 2001 From: Laurent Modolo <laurent.modolo@ens-lyon.fr> Date: Tue, 25 May 2021 14:23:47 +0200 Subject: [PATCH] gffread: add awk cmd to remove duplicated transcript --- src/nf_modules/gffread/main.nf | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/nf_modules/gffread/main.nf b/src/nf_modules/gffread/main.nf index dcc4bf5e..07e010e7 100644 --- a/src/nf_modules/gffread/main.nf +++ b/src/nf_modules/gffread/main.nf @@ -25,6 +25,7 @@ process gffread { file_prefix = file_id } """ - gffread ${gtf} -g ${fasta} -M -x ${file_prefix}.fasta + gffread ${gtf} -g ${fasta} -M -x dup_${file_prefix}.fasta + awk 'BEGIN {i = 1;} { if (\$1 ~ /^>/) { tmp = h[i]; h[i] = \$1; } else if (!a[\$1]) { s[i] = \$1; a[\$1] = "1"; i++; } else { h[i] = tmp; } } END { for (j = 1; j < i; j++) { print h[j]; print s[j]; } }' < dup_${file_prefix}.fasta > ${file_prefix}.fasta """ } -- GitLab