Commit e8997bda authored by nlecouvr's avatar nlecouvr
Browse files

made finder runnable

parent 0b77a330
import re
from Bio import SeqIO
def main(genome_file, out_file_path):
"""[Gets all the GATC file from the given genome or sequence and puts them in a .bed file]
Args:
genome_file ([string]): [full path to the fasta file]
out_file_path ([string]): [full path to the output file]
"""
# Opening the file to write the positions in
f = open(out_file_path, "w")
motif = "GATC"
# Cycles through the parsed chromosomes from the fasta file
for seq_record in SeqIO.parse(genome_file, "fasta"):
# Gets the id of the chormosome in the file
chrom = seq_record.id
# Cycle throught all the motif that are found in the chromosome
for match in re.finditer(motif, str(seq_record.seq)):
start_pos = match.start() +1
end_pos = match.end() + 1
# Writes the position in the .bed file (chro/start/end)
line = f"{chrom}\t{start_pos}\t{end_pos}\n"
f.write(line)
if __name__ == "__main__":
main()
......@@ -69,9 +69,15 @@ for chrom, regions, name in zip(chromosomes, chrom_regions, id_list):
if j >= 5:
j = 0
i += 1
pos = np.arange(1, int(max(regions)), 1 )
y = np.full(len(pos), 39)
print(len(pos))
print(len(y))
axes[i, j].set_title(name)
axes[i, j].set_ylabel("site number / bin")
axes[i, j].plot(pos, y, color = "black")
axes[i, j].plot(regions, chrom)
j += 1
......
import re
import matplotlib.pyplot as plt
import numpy as np
import pandas
from Bio import SeqIO, motifs
from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord
from Bio import SeqIO
def main():
def main(genome_file, out_file_path):
"""[Gets all the GATC file from the given genome or sequence and puts them in a .bed file]
Args:
genome_file ([string]): [full path to the fasta file]
out_file_path ([string]): [full path to the output file]
"""
# Opening the file to write the positions in
f = open(out_file_path, "w")
f = open("/home/nathan/projects/vscode_nextflow/nextflow-nathan/results/GATC/sites.bed", "w")
motif = "GATC"
pos_list = list()
for seq_record in SeqIO.parse("/home/nathan/projects/vscode_nextflow/nextflow-nathan/data/genome/data_G.fasta", "fasta"):
chrom = seq_record.id
# Cycles through the parsed chromosomes from the fasta file
for seq_record in SeqIO.parse(genome_file, "fasta"):
# Gets the id of the chormosome in the file
chrom = seq_record.id
# Cycle throught all the motif that are found in the chromosome
for match in re.finditer(motif, str(seq_record.seq)):
start_pos = match.start() +1
end_pos = match.end() + 1
line = f"{chrom}\t{start_pos}\t{end_pos}\n"
f.write(line)
# Writes the position in the .bed file (chro/start/end)
line = f"{chrom}\t{start_pos}\t{end_pos}\n"
f.write(line)
if __name__ == "__main__":
main()
\ No newline at end of file
main()
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment