diff --git a/src/nf_modules/alntools/main.nf b/src/nf_modules/alntools/main.nf index 9644c02c145cc3bd77be4e44105dfc31ab030476..26ba48f82d5267dfc604c48ba1c00924da7a7485 100644 --- a/src/nf_modules/alntools/main.nf +++ b/src/nf_modules/alntools/main.nf @@ -13,16 +13,38 @@ process bam2ec { input: tuple val(file_id), path(bam), path(bam_idx) - tuple val(gtf_id), path(gtf) + tuple val(transcripts_lengths_id), path(transcripts_lengths) output: tuple val(file_id), path("${bam.simpleName}.bin"), emit: bin - tuple val(gtf_id), path("${gtf.simpleName}_transcripts_lengths.tsv"), emit: tsv + tuple val(gtf_id), path("${transcripts_lengths}"), emit: tsv script: """ cp ${bam} file_bam.bam cp ${bam_idx} file_bam.bam.bai +alntools bam2ec ${params.bam2ec} -t ${transcripts_lengths} file_bam.bam ${bam.simpleName}.bin +""" +} + +params.gtf_to_transcripts_lengths = "" +params.gtf_to_transcripts_lengths_out = "" +process gtf_to_transcripts_lengths { + container = "${container_url}" + label "big_mem_mono_cpus" + tag "$file_id" + if (params.gtf_to_transcripts_lengths != "") { + publishDir "results/${params.gtf_to_transcripts_lengths}", mode: 'copy' + } + + input: + tuple val(file_id), path(gtf) + + output: + tuple val(file_id), path("${gtf.simpleName}_transcripts_lengths.tsv"), emit: tsv + + script: +""" awk -F"[\\t;]" ' \$3=="exon" { ID=gensub(/transcript_id \\"(.*)\\"/, "\\\\1", "g", \$11); @@ -33,6 +55,5 @@ END{ {print i"\\t"LEN[i]} } ' ${gtf} > ${gtf.simpleName}_transcripts_lengths.tsv -alntools bam2ec ${params.bam2ec} -t ${gtf.simpleName}_transcripts_lengths.tsv file_bam.bam ${bam.simpleName}.bin """ -} \ No newline at end of file +} diff --git a/src/nf_modules/bioawk/main.nf b/src/nf_modules/bioawk/main.nf new file mode 100644 index 0000000000000000000000000000000000000000..2322a0394baab55ee83ff644975fb2eb89807f05 --- /dev/null +++ b/src/nf_modules/bioawk/main.nf @@ -0,0 +1,24 @@ +version = "1.0" +container_url = "lbmc/bioawk:${version}" + +params.fasta_to_transcripts_lengths = "" +params.fasta_to_transcripts_lengths_out = "" +process fasta_to_transcripts_lengths { + container = "${container_url}" + label "big_mem_mono_cpus" + tag "$file_id" + if (params.fasta_to_transcripts_lengths_out != "") { + publishDir "results/${params.fasta_to_transcripts_lengths_out}", mode: 'copy' + } + + input: + tuple val(file_id), path(fasta) + + output: + tuple val(file_id), path("${fasta.simpleName}_transcripts_lengths.tsv"), emit: tsv + + script: +""" +bioawk -c fastx '{print \$name length(\$seq)}' ${fasta} > ${fasta.simpleName}_transcripts_lengths.tsv +""" +} \ No newline at end of file