Skip to content
Snippets Groups Projects
Verified Commit 6efd55d6 authored by Laurent Modolo's avatar Laurent Modolo
Browse files

kb: add transcript to gene script to Docker

parent c97a2a20
No related branches found
No related tags found
Loading
......@@ -2,6 +2,10 @@ FROM python:3.9-slim
ENV KB_VERSION="0.26.0"
RUN apt update && apt install -y procps && pip3 install kb-python==${KB_VERSION}
RUN apt update && apt install -y procps && pip3 install kb-python==${KB_VERSION} gffutils==0.10.1
COPY t2g.py /usr/bin/
RUN chmod +x /usr/bin/t2g.py
CMD [ "bash" ]
#!/usr/local/bin/python
import os
import gffutils
import argparse
def validate_file(f):
if not os.path.exists(f):
# Argparse uses the ArgumentTypeError to give a rejection message like:
# error: argument input: x does not exist
raise argparse.ArgumentTypeError("{0} does not exist".format(f))
return f
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="create transcript to genes file from a gtf file.")
parser.add_argument("-g", "--gtf", dest="gtf", required=True, type=validate_file,
help="gtf file", metavar="FILE")
args = parser.parse_args()
db = gffutils.create_db(args.gtf, dbfn = ":memory:", force = True, merge_strategy="merge", disable_infer_transcripts=False, disable_infer_genes=False)
with open("t2g.txt", "w") as t2g:
for gene in db.all_features():
for transcript in db.children(gene, featuretype='transcript', order_by='start'):
t2g.write(str(gene["gene_id"][0]) + "\t" + str(transcript["transcript_id"][0]) + "\n")
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment