Skip to content
Snippets Groups Projects
Verified Commit f7fe919a authored by Laurent Modolo's avatar Laurent Modolo
Browse files

rmi_splitter.py: change ntuple to start from 0

parent 0619e862
Branches
Tags
No related merge requests found
...@@ -133,7 +133,7 @@ def update_config(config): ...@@ -133,7 +133,7 @@ def update_config(config):
return config return config
def test_adaptator(config, adaptator, ntuple=0, verbose=False): def test_adaptator(config, adaptator, ntuple, verbose=False):
""" """
Run tests on the adaptator Run tests on the adaptator
...@@ -237,7 +237,7 @@ def extract_barcode_pos(reads, start, stop, header): ...@@ -237,7 +237,7 @@ def extract_barcode_pos(reads, start, stop, header):
} }
def extract_barcode(reads, config, adaptator, ntuple=0, verbose=False): def extract_barcode(reads, config, adaptator, ntuple, verbose=False):
""" """
Extract barcode from config from adaptator. Extract barcode from config from adaptator.
...@@ -258,7 +258,7 @@ def extract_barcode(reads, config, adaptator, ntuple=0, verbose=False): ...@@ -258,7 +258,7 @@ def extract_barcode(reads, config, adaptator, ntuple=0, verbose=False):
header=coords['header']) header=coords['header'])
def write_umi_in_header(reads, config, adaptator, ntuple=0, verbose=False): def write_umi_in_header(reads, config, adaptator, ntuple, verbose=False):
""" """
Copy the UMI in the header separated by an _ to use later with UMI_tools. Copy the UMI in the header separated by an _ to use later with UMI_tools.
...@@ -273,7 +273,7 @@ def write_umi_in_header(reads, config, adaptator, ntuple=0, verbose=False): ...@@ -273,7 +273,7 @@ def write_umi_in_header(reads, config, adaptator, ntuple=0, verbose=False):
return reads return reads
def list_adaptator_barcode(config, adaptator, ntuple=0, verbose=False): def list_adaptator_barcode(config, adaptator, ntuple, verbose=False):
""" """
Create a list of concatened barecode seq and a list of concatened barcode Create a list of concatened barecode seq and a list of concatened barcode
names from the config. names from the config.
...@@ -327,7 +327,7 @@ def create_barcode_dictionaries(config, mismatch=None): ...@@ -327,7 +327,7 @@ def create_barcode_dictionaries(config, mismatch=None):
return adaptators_dict return adaptators_dict
def match_barcode(reads, config, adaptator, barcode_dictionary, ntuple=0, def match_barcode(reads, config, adaptator, barcode_dictionary, ntuple,
verbose=False): verbose=False):
""" """
Search barcode suffixtree. Search barcode suffixtree.
...@@ -370,7 +370,7 @@ def match_barcode(reads, config, adaptator, barcode_dictionary, ntuple=0, ...@@ -370,7 +370,7 @@ def match_barcode(reads, config, adaptator, barcode_dictionary, ntuple=0,
str(ntuple)) str(ntuple))
def match_barcodes(reads, config, barcode_dictionary, ntuple=0, verbose=False): def match_barcodes(reads, config, barcode_dictionary, ntuple, verbose=False):
""" """
Search all barcodes Search all barcodes
...@@ -467,7 +467,7 @@ def remove_barcode_pos(reads, start, stop, header): ...@@ -467,7 +467,7 @@ def remove_barcode_pos(reads, start, stop, header):
return reads return reads
def remove_barcode(reads, config, adaptator, ntuple=0, verbose=False): def remove_barcode(reads, config, adaptator, ntuple, verbose=False):
""" """
Remove barcode from sequence (i.e trim read). Remove barcode from sequence (i.e trim read).
...@@ -476,7 +476,7 @@ def remove_barcode(reads, config, adaptator, ntuple=0, verbose=False): ...@@ -476,7 +476,7 @@ def remove_barcode(reads, config, adaptator, ntuple=0, verbose=False):
params: adaptator params: adaptator
""" """
if verbose: if verbose:
test_adaptator(config=config, adaptator=adaptator, ntuple=0, test_adaptator(config=config, adaptator=adaptator, ntuple=ntuple,
verbose=verbose) verbose=verbose)
coords = config[adaptator]['coords'] coords = config[adaptator]['coords']
if adaptator == 'UMI': if adaptator == 'UMI':
...@@ -495,7 +495,7 @@ def remove_barcode(reads, config, adaptator, ntuple=0, verbose=False): ...@@ -495,7 +495,7 @@ def remove_barcode(reads, config, adaptator, ntuple=0, verbose=False):
) )
def update_position(config, adaptator, adapt, adaptator_length, ntuple=0): def update_position(config, adaptator, adapt, adaptator_length, ntuple):
""" """
Update barcode position in config file when a barcode is removed Update barcode position in config file when a barcode is removed
...@@ -519,7 +519,7 @@ def update_position(config, adaptator, adapt, adaptator_length, ntuple=0): ...@@ -519,7 +519,7 @@ def update_position(config, adaptator, adapt, adaptator_length, ntuple=0):
return config return config
def update_positions(config, adaptator, ntuple=0): def update_positions(config, adaptator, ntuple):
""" """
Update barcode position in config file when a barcode is removed Update barcode position in config file when a barcode is removed
...@@ -543,7 +543,7 @@ def update_positions(config, adaptator, ntuple=0): ...@@ -543,7 +543,7 @@ def update_positions(config, adaptator, ntuple=0):
return config return config
def remove_barcodes(reads, config, ntuple=0, verbose=False): def remove_barcodes(reads, config, ntuple, verbose=False):
""" """
Remove barcodes from sequence (i.e trim read). Remove barcodes from sequence (i.e trim read).
can be call once by read, otherwise adaptator coords doesn't macht anymore can be call once by read, otherwise adaptator coords doesn't macht anymore
...@@ -559,7 +559,7 @@ def remove_barcodes(reads, config, ntuple=0, verbose=False): ...@@ -559,7 +559,7 @@ def remove_barcodes(reads, config, ntuple=0, verbose=False):
raise ValueError raise ValueError
for adaptator in config: for adaptator in config:
if (not adaptator == 'conditions' and if (not adaptator == 'conditions' and
ntuple == config[adaptator]['coords']['reads']-1): ntuple == config[adaptator]['coords']['reads']):
reads = remove_barcode( reads = remove_barcode(
reads=reads, reads=reads,
config=config, config=config,
...@@ -570,7 +570,7 @@ def remove_barcodes(reads, config, ntuple=0, verbose=False): ...@@ -570,7 +570,7 @@ def remove_barcodes(reads, config, ntuple=0, verbose=False):
return reads return reads
def write_seq(reads, fout, config, ntuple=0, verbose=False): def write_seq(reads, fout, config, ntuple, verbose=False):
""" """
write sequence without adaptor in the correct file write sequence without adaptor in the correct file
...@@ -617,13 +617,13 @@ def read_reads(fins, reads_list, ntuple, line_number): ...@@ -617,13 +617,13 @@ def read_reads(fins, reads_list, ntuple, line_number):
""" """
if line_number == 0: if line_number == 0:
for reads in ntuple: for reads in ntuple:
reads_list[reads-1].header = fins[reads-1] reads_list[reads].header = fins[reads]
if line_number == 1: if line_number == 1:
for reads in ntuple: for reads in ntuple:
reads_list[reads-1].seq = fins[reads-1] reads_list[reads].seq = fins[reads]
if line_number == 3: if line_number == 3:
for reads in ntuple: for reads in ntuple:
reads_list[reads-1].str2qual(fins[reads-1]) reads_list[reads].str2qual(fins[reads])
return reads_list return reads_list
......
...@@ -16,7 +16,7 @@ single end ...@@ -16,7 +16,7 @@ single end
CONFIG_TOY = { CONFIG_TOY = {
'RT': { 'RT': {
'coords': { 'coords': {
'reads': 1, 'reads': 0,
'start': 6, 'start': 6,
'stop': 13, 'stop': 13,
'header': False, 'header': False,
...@@ -32,7 +32,7 @@ CONFIG_TOY = { ...@@ -32,7 +32,7 @@ CONFIG_TOY = {
}, },
'PCR': { 'PCR': {
'coords': { 'coords': {
'reads': 1, 'reads': 0,
'start': 0, 'start': 0,
'stop': 5, 'stop': 5,
'header': True, 'header': True,
...@@ -49,7 +49,7 @@ CONFIG_TOY = { ...@@ -49,7 +49,7 @@ CONFIG_TOY = {
}, },
'UMI': { 'UMI': {
'coords': { 'coords': {
'reads': 1, 'reads': 0,
'start': 0, 'start': 0,
'stop': 5, 'stop': 5,
'header': False, 'header': False,
...@@ -122,7 +122,7 @@ paired-end ...@@ -122,7 +122,7 @@ paired-end
CONFIG_TOY_PAIRED = { CONFIG_TOY_PAIRED = {
'RT': { 'RT': {
'coords': { 'coords': {
'reads': 1, 'reads': 0,
'start': 6, 'start': 6,
'stop': 13, 'stop': 13,
'header': False 'header': False
...@@ -136,7 +136,7 @@ CONFIG_TOY_PAIRED = { ...@@ -136,7 +136,7 @@ CONFIG_TOY_PAIRED = {
}, },
'PCR': { 'PCR': {
'coords': { 'coords': {
'reads': 3, 'reads': 2,
'start': 0, 'start': 0,
'stop': 5, 'stop': 5,
'header': False 'header': False
...@@ -152,7 +152,7 @@ CONFIG_TOY_PAIRED = { ...@@ -152,7 +152,7 @@ CONFIG_TOY_PAIRED = {
}, },
'UMI': { 'UMI': {
'coords': { 'coords': {
'reads': 1, 'reads': 0,
'start': 0, 'start': 0,
'stop': 5, 'stop': 5,
'header': False 'header': False
......
...@@ -40,7 +40,7 @@ class ConfigLoadTest(unittest.TestCase): ...@@ -40,7 +40,7 @@ class ConfigLoadTest(unittest.TestCase):
""" """
test on the adaptator position extraction test on the adaptator position extraction
""" """
pos_object = {'reads': 1, pos_object = {'reads': 0,
'start': 6, 'start': 6,
'start_update': 6, 'start_update': 6,
'stop': 13, 'stop': 13,
...@@ -62,7 +62,9 @@ class ConfigLoadTest(unittest.TestCase): ...@@ -62,7 +62,9 @@ class ConfigLoadTest(unittest.TestCase):
rmi_splitter.list_adaptator_barcode( rmi_splitter.list_adaptator_barcode(
config=rmi_splitter.load_yaml( config=rmi_splitter.load_yaml(
path="rmi_splitter/tests/data/toy_file.yaml"), path="rmi_splitter/tests/data/toy_file.yaml"),
adaptator="RT"), adaptator="RT",
ntuple=0
),
{"RT1": "TAGTGCC", {"RT1": "TAGTGCC",
"RT2": "GCTACCC", "RT2": "GCTACCC",
"RT3": "ATCGACC", "RT3": "ATCGACC",
...@@ -115,7 +117,9 @@ class ReadsReadTest(unittest.TestCase): ...@@ -115,7 +117,9 @@ class ReadsReadTest(unittest.TestCase):
} }
} }
}, },
"RT") "RT",
0
)
except KeyError: except KeyError:
self.assertEqual(1, 1) self.assertEqual(1, 1)
...@@ -134,7 +138,9 @@ class ReadsReadTest(unittest.TestCase): ...@@ -134,7 +138,9 @@ class ReadsReadTest(unittest.TestCase):
} }
} }
}, },
"RT") "RT",
0
)
except KeyError: except KeyError:
self.assertEqual(1, 1) self.assertEqual(1, 1)
...@@ -153,7 +159,9 @@ class ReadsReadTest(unittest.TestCase): ...@@ -153,7 +159,9 @@ class ReadsReadTest(unittest.TestCase):
} }
} }
}, },
"RT") "RT",
0
)
except KeyError: except KeyError:
self.assertEqual(1, 1) self.assertEqual(1, 1)
...@@ -172,7 +180,9 @@ class ReadsReadTest(unittest.TestCase): ...@@ -172,7 +180,9 @@ class ReadsReadTest(unittest.TestCase):
} }
} }
}, },
"RT") "RT",
0
)
except KeyError: except KeyError:
self.assertEqual(1, 1) self.assertEqual(1, 1)
...@@ -260,10 +270,12 @@ class ReadsReadTest(unittest.TestCase): ...@@ -260,10 +270,12 @@ class ReadsReadTest(unittest.TestCase):
reads_test = copy.deepcopy(data_test.Reads_single) reads_test = copy.deepcopy(data_test.Reads_single)
self.assertEqual( self.assertEqual(
rmi_splitter.extract_barcode( rmi_splitter.extract_barcode(
reads_test, reads=reads_test,
rmi_splitter.load_yaml( config=rmi_splitter.load_yaml(
path="rmi_splitter/tests/data/toy_file.yaml"), path="rmi_splitter/tests/data/toy_file.yaml"),
"RT", verbose=True)['seq'], adaptator="RT",
ntuple=0,
verbose=True)['seq'],
data_test.RT_barcode_single data_test.RT_barcode_single
) )
...@@ -274,10 +286,12 @@ class ReadsReadTest(unittest.TestCase): ...@@ -274,10 +286,12 @@ class ReadsReadTest(unittest.TestCase):
reads_test = copy.deepcopy(data_test.Reads_single) reads_test = copy.deepcopy(data_test.Reads_single)
self.assertEqual( self.assertEqual(
rmi_splitter.extract_barcode( rmi_splitter.extract_barcode(
reads_test, reads=reads_test,
rmi_splitter.load_yaml( config=rmi_splitter.load_yaml(
path="rmi_splitter/tests/data/toy_file.yaml"), path="rmi_splitter/tests/data/toy_file.yaml"),
"PCR")['seq'], adaptator="PCR",
ntuple=0
)['seq'],
data_test.PCR_barcode_single data_test.PCR_barcode_single
) )
...@@ -306,7 +320,11 @@ class ModifyReadTest(unittest.TestCase): ...@@ -306,7 +320,11 @@ class ModifyReadTest(unittest.TestCase):
reads_test = copy.deepcopy(data_test.Reads_single) reads_test = copy.deepcopy(data_test.Reads_single)
self.assertEqual( self.assertEqual(
rmi_splitter.write_umi_in_header( rmi_splitter.write_umi_in_header(
reads_test, data_test.CONFIG_TOY, 'UMI').header, reads=reads_test,
config=data_test.CONFIG_TOY,
adaptator='UMI',
ntuple=0
).header,
data_test.Reads_umi_single.header data_test.Reads_umi_single.header
) )
...@@ -405,9 +423,10 @@ class ModifyReadTest(unittest.TestCase): ...@@ -405,9 +423,10 @@ class ModifyReadTest(unittest.TestCase):
reads_test = copy.deepcopy(data_test.Reads_single) reads_test = copy.deepcopy(data_test.Reads_single)
self.assertEqual( self.assertEqual(
rmi_splitter.remove_barcode( rmi_splitter.remove_barcode(
reads_test, reads=reads_test,
data_test.CONFIG_TOY, config=data_test.CONFIG_TOY,
"RT" adaptator="RT",
ntuple=0
).seq, ).seq,
data_test.Reads_single_noRT.seq data_test.Reads_single_noRT.seq
) )
...@@ -423,7 +442,8 @@ class ModifyReadTest(unittest.TestCase): ...@@ -423,7 +442,8 @@ class ModifyReadTest(unittest.TestCase):
self.assertEqual( self.assertEqual(
rmi_splitter.update_positions( rmi_splitter.update_positions(
config=data_test.CONFIG_TOY, config=data_test.CONFIG_TOY,
adaptator="UMI" adaptator="UMI",
ntuple=0
), ),
rmi_splitter.load_yaml( rmi_splitter.load_yaml(
path="rmi_splitter/tests/data/toy_file.yaml") path="rmi_splitter/tests/data/toy_file.yaml")
...@@ -449,15 +469,18 @@ class ModifyReadTest(unittest.TestCase): ...@@ -449,15 +469,18 @@ class ModifyReadTest(unittest.TestCase):
reads=reads_test, reads=reads_test,
config=rmi_splitter.update_positions( config=rmi_splitter.update_positions(
config=data_test.CONFIG_TOY, config=data_test.CONFIG_TOY,
adaptator="UMI" adaptator="UMI",
ntuple=0
), ),
adaptator="PCR", adaptator="PCR",
ntuple=0,
barcode_dictionary=barcode_dic barcode_dictionary=barcode_dic
), ),
rmi_splitter.match_barcode( rmi_splitter.match_barcode(
reads=reads_test2, reads=reads_test2,
config=data_test.CONFIG_TOY, config=data_test.CONFIG_TOY,
adaptator="PCR", adaptator="PCR",
ntuple=0,
barcode_dictionary=barcode_dic barcode_dictionary=barcode_dic
), ),
) )
...@@ -470,7 +493,8 @@ class ModifyReadTest(unittest.TestCase): ...@@ -470,7 +493,8 @@ class ModifyReadTest(unittest.TestCase):
self.assertEqual( self.assertEqual(
rmi_splitter.remove_barcodes( rmi_splitter.remove_barcodes(
reads_test, reads_test,
data_test.CONFIG_TOY, ntuple=0,
config=data_test.CONFIG_TOY,
).seq, ).seq,
data_test.Reads_trim_single.seq data_test.Reads_trim_single.seq
) )
...@@ -495,6 +519,7 @@ class SeachBarcodeTest(unittest.TestCase): ...@@ -495,6 +519,7 @@ class SeachBarcodeTest(unittest.TestCase):
reads=reads_test, reads=reads_test,
config=config, config=config,
adaptator="PCR", adaptator="PCR",
ntuple=0,
barcode_dictionary=barcode_dic barcode_dictionary=barcode_dic
), ),
"PCR1" "PCR1"
...@@ -515,6 +540,7 @@ class SeachBarcodeTest(unittest.TestCase): ...@@ -515,6 +540,7 @@ class SeachBarcodeTest(unittest.TestCase):
reads=reads_test, reads=reads_test,
config=config, config=config,
adaptator="PCT", adaptator="PCT",
ntuple=0,
barcode_dictionary=barcode_dic, barcode_dictionary=barcode_dic,
verbose=True verbose=True
) )
...@@ -537,6 +563,7 @@ class SeachBarcodeTest(unittest.TestCase): ...@@ -537,6 +563,7 @@ class SeachBarcodeTest(unittest.TestCase):
rmi_splitter.match_barcodes( rmi_splitter.match_barcodes(
reads=reads_test, reads=reads_test,
config=config, config=config,
ntuple=0,
barcode_dictionary=barcode_dic barcode_dictionary=barcode_dic
), ),
['RT1', 'PCR1'] ['RT1', 'PCR1']
...@@ -584,7 +611,7 @@ class HandleFastqTest(unittest.TestCase): ...@@ -584,7 +611,7 @@ class HandleFastqTest(unittest.TestCase):
rmi_splitter.list_reads_number( rmi_splitter.list_reads_number(
config=data_test.CONFIG_TOY_PAIRED config=data_test.CONFIG_TOY_PAIRED
), ),
[1, 2, 3] [0, 1, 2]
) )
def test_assign_reads_single(self): def test_assign_reads_single(self):
...@@ -639,7 +666,7 @@ class HandleFastqTest(unittest.TestCase): ...@@ -639,7 +666,7 @@ class HandleFastqTest(unittest.TestCase):
config=rmi_splitter.load_yaml( config=rmi_splitter.load_yaml(
path="rmi_splitter/tests/data/toy_file_paired.yaml"), path="rmi_splitter/tests/data/toy_file_paired.yaml"),
results_path="../results/", results_path="../results/",
ntuple_param=range(3), ntuple_param=3,
verbose=True verbose=True
) )
self.assertEqual(1, 1) self.assertEqual(1, 1)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment