Skip to content
Snippets Groups Projects
Verified Commit 7e59ad40 authored by Laurent Modolo's avatar Laurent Modolo
Browse files

rmi_splitter.py: add a Reads class to replace SeqIO

parent a4e06218
Branches
No related tags found
No related merge requests found
......@@ -11,8 +11,6 @@ from contextlib import ExitStack
import sys
import getopt
from gzip import open as gzopen
from Bio import SeqIO
from Bio.SeqRecord import SeqRecord
import yaml # pip install pyyaml
from . import suffix_tree
......@@ -39,8 +37,8 @@ class Reads:
read a read from an open buffer
"""
try:
self.header = str(fin.readline())
self.seq = str(fin.readline())
self.header = str(fin.readline())[1:-1]
self.seq = str(fin.readline())[:-1]
fin.readline()
self.str2qual(str(fin.readline()))
except IOError as err:
......@@ -55,6 +53,7 @@ class Reads:
del self.qual
self.qual = list()
for base_qual in qual:
if base_qual != "\n":
self.qual.append(ord(base_qual)-64)
def qual2str(self):
......@@ -71,7 +70,7 @@ class Reads:
write a read in an open buffer
"""
try:
fout.write(self.header + "\n")
fout.write("@" + self.header + "\n")
fout.write(self.seq + "\n")
fout.write("+\n")
fout.write(self.qual2str() + "\n")
......@@ -80,6 +79,16 @@ class Reads:
fout.close()
raise err
def __str__(self):
"""
write a read as an str
"""
reads = "@" + self.header + "\n"
reads += self.seq + "\n"
reads += "+\n"
reads += self.qual2str() + "\n"
return reads
def load_yaml(path):
"""
......@@ -194,9 +203,9 @@ def extract_barcode_pos(reads, start, stop, header):
stop = stop + 1
if header:
if start == 0:
seq = reads.head[-stop:]
seq = reads.header[-stop:]
return {'seq': seq, 'qual': [40 for x in range(len(seq))]}
seq = reads.head[-stop:-start]
seq = reads.header[-stop:-start]
return {'seq': seq, 'qual': [40 for x in range(len(seq))]}
if start == 0:
return {
......@@ -240,7 +249,9 @@ def write_umi_in_header(reads, config, adaptator, ntuple=1, verbose=False):
params: adaptator
"""
umi = extract_barcode(reads, config, adaptator, ntuple, verbose)
reads.header += str('_' + str(umi['seq']))
header = reads.header.split(" ")
header[0] += "_" + str(umi['seq'])
reads.header = " ".join(header)
return reads
......@@ -318,7 +329,7 @@ def match_barcode(reads, config, adaptator, barcode_dictionary, ntuple=1,
)
return barcode_dictionary.search_reads(
adaptator=adaptator,
seq=reads['seq'],
seq=read['seq'],
qual=read['qual'],
cache=True,
verbose=verbose
......@@ -326,7 +337,7 @@ def match_barcode(reads, config, adaptator, barcode_dictionary, ntuple=1,
except KeyError:
if verbose:
print("error: match_barcode() \"" +
str(read['seq']) +
str(reads.seq) +
"\" not found in \"" +
str(adaptator) +
"\" for reads " +
......@@ -334,7 +345,7 @@ def match_barcode(reads, config, adaptator, barcode_dictionary, ntuple=1,
except IndexError:
if verbose:
print("error: match_barcode() \"" +
str(read['seq']) +
str(reads.seq) +
"\" not found in \"" +
str(adaptator) +
"\" for reads " +
......@@ -427,15 +438,12 @@ def remove_barcode_pos(reads, start, stop, header, verbose=False):
reads.header[(size-start):]
else:
if start == 0:
reads.qual = \
reads.qual[stop:]
reads.seq = reads.seq[stop:]
reads.qual = reads.qual[stop:]
else:
stop = stop - 1
reads.qual = \
reads.qual[0:start] + \
reads.qual[stop:]
reads.seq = reads.seq[0:start] + reads.seq[stop:]
reads.qual = reads.qual[0:start] + reads.qual[stop:]
return reads
......@@ -522,7 +530,7 @@ def remove_barcodes(reads, config, ntuple=1, verbose=False):
params: reads int()
return: seq
"""
if isinstance(Reads, reads):
if not isinstance(reads, Reads):
if verbose:
print("error: remove_barcode(), reads is not of type Reads")
raise ValueError
......@@ -621,7 +629,7 @@ def parse_ntuples_fastq(ffastqs,
config=config,
barcode_dictionary=barcode_dictionary,
verbose=verbose)
write_seqs(seqs=reads_list,
write_seqs(reads_list=reads_list,
fouts=fouts,
config=config,
sample=sample,
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment