diff --git a/src/.docker_modules/kb/0.26.3/Dockerfile b/src/.docker_modules/kb/0.26.3/Dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..8ca889e8ba7af9517a322070c797279dd6114742 --- /dev/null +++ b/src/.docker_modules/kb/0.26.3/Dockerfile @@ -0,0 +1,11 @@ +FROM python:3.9-slim + +ENV KB_VERSION="0.26.3" + +RUN apt update && apt install -y procps && pip3 install kb-python==${KB_VERSION} gffutils==0.10.1 + +COPY t2g.py /usr/bin/ + +RUN chmod +x /usr/bin/t2g.py + +CMD [ "bash" ] diff --git a/src/.docker_modules/kb/0.26.3/docker_init.sh b/src/.docker_modules/kb/0.26.3/docker_init.sh new file mode 100755 index 0000000000000000000000000000000000000000..0294274a2518ef8622027bb56b49c4baded4a9d8 --- /dev/null +++ b/src/.docker_modules/kb/0.26.3/docker_init.sh @@ -0,0 +1,4 @@ +#!/bin/sh +docker pull lbmc/kb:0.26.3 +docker build src/.docker_modules/kb/0.26.3 -t 'lbmc/kb:0.26.3' +docker push lbmc/kb:0.26.3 diff --git a/src/.docker_modules/kb/0.26.3/t2g.py b/src/.docker_modules/kb/0.26.3/t2g.py new file mode 100755 index 0000000000000000000000000000000000000000..f9f0b45dc89b385c3ed52dc252f8f09eb3bc8c74 --- /dev/null +++ b/src/.docker_modules/kb/0.26.3/t2g.py @@ -0,0 +1,47 @@ +#!/usr/local/bin/python +import os +import gffutils +import argparse + + +def validate_file(f): + if not os.path.exists(f): + # Argparse uses the ArgumentTypeError to give a rejection message like: + # error: argument input: x does not exist + raise argparse.ArgumentTypeError("{0} does not exist".format(f)) + return f + + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description="create transcript to genes file from a gtf file." + ) + parser.add_argument( + "-g", "--gtf", dest="gtf", required=True, type=validate_file, + help="gtf file", metavar="FILE" + ) + args = parser.parse_args() + + db = gffutils.create_db( + args.gtf, + dbfn=":memory:", + force=True, + merge_strategy="merge", + disable_infer_transcripts=False, + disable_infer_genes=False + ) + with open("t2g.txt", "w") as t2g: + for gene in db.all_features(): + for transcript in db.children( + gene, featuretype='transcript', order_by='start' + ): + t2g_line = str(transcript["transcript_id"][0]) + \ + "\t" + \ + str(gene["gene_id"][0]) + t2g_line = t2g_line.split("\t") + t2g.write( + str(t2g_line[0].split(".")[0]) + + "\t" + + str(t2g_line[1].split(".")[0]) + + "\n" + ) diff --git a/src/nf_modules/kb/main.nf b/src/nf_modules/kb/main.nf index a6cbfa759925b1bdff0abde20f1942ca40a22b7a..f7cf0e0841a65ebf692946a39cf67eb2f4f9f963 100644 --- a/src/nf_modules/kb/main.nf +++ b/src/nf_modules/kb/main.nf @@ -1,4 +1,4 @@ -version = "0.26.0" +version = "0.26.3" container_url = "lbmc/kb:${version}" params.index_fasta = ""