GATC_finder.py 829 Bytes
Newer Older
nlecouvr's avatar
nlecouvr committed
1
2
3
4
import sys
import re
from Bio import SeqIO

nlecouvr's avatar
nlecouvr committed
5
print("test")
nlecouvr's avatar
nlecouvr committed
6
7

# Gets the arguments in the command line
nlecouvr's avatar
nlecouvr committed
8
out_file_path = "/GATC_finder"
nlecouvr's avatar
nlecouvr committed
9
10
11
genome_file = str(sys.argv[1])

# Opening the file to write the positions in
nlecouvr's avatar
nlecouvr committed
12
f = open("sites.bed", "w")
nlecouvr's avatar
nlecouvr committed
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30

# Motif we are looking for
motif = "GATC"

# Cycles through the parsed chromosomes from the fasta file
for seq_record in SeqIO.parse(genome_file, "fasta"):
    
    # Gets the id of the chormosome in the file
    chrom = seq_record.id
    
    # Cycle throught all the motif that are found in the chromosome
    for match in re.finditer(motif, str(seq_record.seq)):
        
        start_pos = match.start() +1
        end_pos = match.end() + 1
        
        # Writes the position in the .bed file (chro/start/end)
        line = f"{chrom}\t{start_pos}\t{end_pos}\n"
nlecouvr's avatar
nlecouvr committed
31
        f.write(line)