From f7fe919ab8252b764868d5feeed4150ec8103931 Mon Sep 17 00:00:00 2001 From: Laurent Modolo <laurent.modolo@ens-lyon.fr> Date: Wed, 22 May 2019 14:21:45 +0200 Subject: [PATCH] rmi_splitter.py: change ntuple to start from 0 --- src/rmi_splitter/rmi_splitter.py | 32 +++++----- src/rmi_splitter/tests/data_test.py | 12 ++-- src/rmi_splitter/tests/rmi_splitter_test.py | 69 ++++++++++++++------- 3 files changed, 70 insertions(+), 43 deletions(-) diff --git a/src/rmi_splitter/rmi_splitter.py b/src/rmi_splitter/rmi_splitter.py index ad9a77a..095931c 100644 --- a/src/rmi_splitter/rmi_splitter.py +++ b/src/rmi_splitter/rmi_splitter.py @@ -133,7 +133,7 @@ def update_config(config): return config -def test_adaptator(config, adaptator, ntuple=0, verbose=False): +def test_adaptator(config, adaptator, ntuple, verbose=False): """ Run tests on the adaptator @@ -237,7 +237,7 @@ def extract_barcode_pos(reads, start, stop, header): } -def extract_barcode(reads, config, adaptator, ntuple=0, verbose=False): +def extract_barcode(reads, config, adaptator, ntuple, verbose=False): """ Extract barcode from config from adaptator. @@ -258,7 +258,7 @@ def extract_barcode(reads, config, adaptator, ntuple=0, verbose=False): header=coords['header']) -def write_umi_in_header(reads, config, adaptator, ntuple=0, verbose=False): +def write_umi_in_header(reads, config, adaptator, ntuple, verbose=False): """ Copy the UMI in the header separated by an _ to use later with UMI_tools. @@ -273,7 +273,7 @@ def write_umi_in_header(reads, config, adaptator, ntuple=0, verbose=False): return reads -def list_adaptator_barcode(config, adaptator, ntuple=0, verbose=False): +def list_adaptator_barcode(config, adaptator, ntuple, verbose=False): """ Create a list of concatened barecode seq and a list of concatened barcode names from the config. @@ -327,7 +327,7 @@ def create_barcode_dictionaries(config, mismatch=None): return adaptators_dict -def match_barcode(reads, config, adaptator, barcode_dictionary, ntuple=0, +def match_barcode(reads, config, adaptator, barcode_dictionary, ntuple, verbose=False): """ Search barcode suffixtree. @@ -370,7 +370,7 @@ def match_barcode(reads, config, adaptator, barcode_dictionary, ntuple=0, str(ntuple)) -def match_barcodes(reads, config, barcode_dictionary, ntuple=0, verbose=False): +def match_barcodes(reads, config, barcode_dictionary, ntuple, verbose=False): """ Search all barcodes @@ -467,7 +467,7 @@ def remove_barcode_pos(reads, start, stop, header): return reads -def remove_barcode(reads, config, adaptator, ntuple=0, verbose=False): +def remove_barcode(reads, config, adaptator, ntuple, verbose=False): """ Remove barcode from sequence (i.e trim read). @@ -476,7 +476,7 @@ def remove_barcode(reads, config, adaptator, ntuple=0, verbose=False): params: adaptator """ if verbose: - test_adaptator(config=config, adaptator=adaptator, ntuple=0, + test_adaptator(config=config, adaptator=adaptator, ntuple=ntuple, verbose=verbose) coords = config[adaptator]['coords'] if adaptator == 'UMI': @@ -495,7 +495,7 @@ def remove_barcode(reads, config, adaptator, ntuple=0, verbose=False): ) -def update_position(config, adaptator, adapt, adaptator_length, ntuple=0): +def update_position(config, adaptator, adapt, adaptator_length, ntuple): """ Update barcode position in config file when a barcode is removed @@ -519,7 +519,7 @@ def update_position(config, adaptator, adapt, adaptator_length, ntuple=0): return config -def update_positions(config, adaptator, ntuple=0): +def update_positions(config, adaptator, ntuple): """ Update barcode position in config file when a barcode is removed @@ -543,7 +543,7 @@ def update_positions(config, adaptator, ntuple=0): return config -def remove_barcodes(reads, config, ntuple=0, verbose=False): +def remove_barcodes(reads, config, ntuple, verbose=False): """ Remove barcodes from sequence (i.e trim read). can be call once by read, otherwise adaptator coords doesn't macht anymore @@ -559,7 +559,7 @@ def remove_barcodes(reads, config, ntuple=0, verbose=False): raise ValueError for adaptator in config: if (not adaptator == 'conditions' and - ntuple == config[adaptator]['coords']['reads']-1): + ntuple == config[adaptator]['coords']['reads']): reads = remove_barcode( reads=reads, config=config, @@ -570,7 +570,7 @@ def remove_barcodes(reads, config, ntuple=0, verbose=False): return reads -def write_seq(reads, fout, config, ntuple=0, verbose=False): +def write_seq(reads, fout, config, ntuple, verbose=False): """ write sequence without adaptor in the correct file @@ -617,13 +617,13 @@ def read_reads(fins, reads_list, ntuple, line_number): """ if line_number == 0: for reads in ntuple: - reads_list[reads-1].header = fins[reads-1] + reads_list[reads].header = fins[reads] if line_number == 1: for reads in ntuple: - reads_list[reads-1].seq = fins[reads-1] + reads_list[reads].seq = fins[reads] if line_number == 3: for reads in ntuple: - reads_list[reads-1].str2qual(fins[reads-1]) + reads_list[reads].str2qual(fins[reads]) return reads_list diff --git a/src/rmi_splitter/tests/data_test.py b/src/rmi_splitter/tests/data_test.py index f4be63d..447581a 100644 --- a/src/rmi_splitter/tests/data_test.py +++ b/src/rmi_splitter/tests/data_test.py @@ -16,7 +16,7 @@ single end CONFIG_TOY = { 'RT': { 'coords': { - 'reads': 1, + 'reads': 0, 'start': 6, 'stop': 13, 'header': False, @@ -32,7 +32,7 @@ CONFIG_TOY = { }, 'PCR': { 'coords': { - 'reads': 1, + 'reads': 0, 'start': 0, 'stop': 5, 'header': True, @@ -49,7 +49,7 @@ CONFIG_TOY = { }, 'UMI': { 'coords': { - 'reads': 1, + 'reads': 0, 'start': 0, 'stop': 5, 'header': False, @@ -122,7 +122,7 @@ paired-end CONFIG_TOY_PAIRED = { 'RT': { 'coords': { - 'reads': 1, + 'reads': 0, 'start': 6, 'stop': 13, 'header': False @@ -136,7 +136,7 @@ CONFIG_TOY_PAIRED = { }, 'PCR': { 'coords': { - 'reads': 3, + 'reads': 2, 'start': 0, 'stop': 5, 'header': False @@ -152,7 +152,7 @@ CONFIG_TOY_PAIRED = { }, 'UMI': { 'coords': { - 'reads': 1, + 'reads': 0, 'start': 0, 'stop': 5, 'header': False diff --git a/src/rmi_splitter/tests/rmi_splitter_test.py b/src/rmi_splitter/tests/rmi_splitter_test.py index aaa9cca..ac2d0be 100644 --- a/src/rmi_splitter/tests/rmi_splitter_test.py +++ b/src/rmi_splitter/tests/rmi_splitter_test.py @@ -40,7 +40,7 @@ class ConfigLoadTest(unittest.TestCase): """ test on the adaptator position extraction """ - pos_object = {'reads': 1, + pos_object = {'reads': 0, 'start': 6, 'start_update': 6, 'stop': 13, @@ -62,7 +62,9 @@ class ConfigLoadTest(unittest.TestCase): rmi_splitter.list_adaptator_barcode( config=rmi_splitter.load_yaml( path="rmi_splitter/tests/data/toy_file.yaml"), - adaptator="RT"), + adaptator="RT", + ntuple=0 + ), {"RT1": "TAGTGCC", "RT2": "GCTACCC", "RT3": "ATCGACC", @@ -115,7 +117,9 @@ class ReadsReadTest(unittest.TestCase): } } }, - "RT") + "RT", + 0 + ) except KeyError: self.assertEqual(1, 1) @@ -134,7 +138,9 @@ class ReadsReadTest(unittest.TestCase): } } }, - "RT") + "RT", + 0 + ) except KeyError: self.assertEqual(1, 1) @@ -153,7 +159,9 @@ class ReadsReadTest(unittest.TestCase): } } }, - "RT") + "RT", + 0 + ) except KeyError: self.assertEqual(1, 1) @@ -172,7 +180,9 @@ class ReadsReadTest(unittest.TestCase): } } }, - "RT") + "RT", + 0 + ) except KeyError: self.assertEqual(1, 1) @@ -260,10 +270,12 @@ class ReadsReadTest(unittest.TestCase): reads_test = copy.deepcopy(data_test.Reads_single) self.assertEqual( rmi_splitter.extract_barcode( - reads_test, - rmi_splitter.load_yaml( + reads=reads_test, + config=rmi_splitter.load_yaml( path="rmi_splitter/tests/data/toy_file.yaml"), - "RT", verbose=True)['seq'], + adaptator="RT", + ntuple=0, + verbose=True)['seq'], data_test.RT_barcode_single ) @@ -274,10 +286,12 @@ class ReadsReadTest(unittest.TestCase): reads_test = copy.deepcopy(data_test.Reads_single) self.assertEqual( rmi_splitter.extract_barcode( - reads_test, - rmi_splitter.load_yaml( + reads=reads_test, + config=rmi_splitter.load_yaml( path="rmi_splitter/tests/data/toy_file.yaml"), - "PCR")['seq'], + adaptator="PCR", + ntuple=0 + )['seq'], data_test.PCR_barcode_single ) @@ -306,7 +320,11 @@ class ModifyReadTest(unittest.TestCase): reads_test = copy.deepcopy(data_test.Reads_single) self.assertEqual( rmi_splitter.write_umi_in_header( - reads_test, data_test.CONFIG_TOY, 'UMI').header, + reads=reads_test, + config=data_test.CONFIG_TOY, + adaptator='UMI', + ntuple=0 + ).header, data_test.Reads_umi_single.header ) @@ -405,9 +423,10 @@ class ModifyReadTest(unittest.TestCase): reads_test = copy.deepcopy(data_test.Reads_single) self.assertEqual( rmi_splitter.remove_barcode( - reads_test, - data_test.CONFIG_TOY, - "RT" + reads=reads_test, + config=data_test.CONFIG_TOY, + adaptator="RT", + ntuple=0 ).seq, data_test.Reads_single_noRT.seq ) @@ -423,7 +442,8 @@ class ModifyReadTest(unittest.TestCase): self.assertEqual( rmi_splitter.update_positions( config=data_test.CONFIG_TOY, - adaptator="UMI" + adaptator="UMI", + ntuple=0 ), rmi_splitter.load_yaml( path="rmi_splitter/tests/data/toy_file.yaml") @@ -449,15 +469,18 @@ class ModifyReadTest(unittest.TestCase): reads=reads_test, config=rmi_splitter.update_positions( config=data_test.CONFIG_TOY, - adaptator="UMI" + adaptator="UMI", + ntuple=0 ), adaptator="PCR", + ntuple=0, barcode_dictionary=barcode_dic ), rmi_splitter.match_barcode( reads=reads_test2, config=data_test.CONFIG_TOY, adaptator="PCR", + ntuple=0, barcode_dictionary=barcode_dic ), ) @@ -470,7 +493,8 @@ class ModifyReadTest(unittest.TestCase): self.assertEqual( rmi_splitter.remove_barcodes( reads_test, - data_test.CONFIG_TOY, + ntuple=0, + config=data_test.CONFIG_TOY, ).seq, data_test.Reads_trim_single.seq ) @@ -495,6 +519,7 @@ class SeachBarcodeTest(unittest.TestCase): reads=reads_test, config=config, adaptator="PCR", + ntuple=0, barcode_dictionary=barcode_dic ), "PCR1" @@ -515,6 +540,7 @@ class SeachBarcodeTest(unittest.TestCase): reads=reads_test, config=config, adaptator="PCT", + ntuple=0, barcode_dictionary=barcode_dic, verbose=True ) @@ -537,6 +563,7 @@ class SeachBarcodeTest(unittest.TestCase): rmi_splitter.match_barcodes( reads=reads_test, config=config, + ntuple=0, barcode_dictionary=barcode_dic ), ['RT1', 'PCR1'] @@ -584,7 +611,7 @@ class HandleFastqTest(unittest.TestCase): rmi_splitter.list_reads_number( config=data_test.CONFIG_TOY_PAIRED ), - [1, 2, 3] + [0, 1, 2] ) def test_assign_reads_single(self): @@ -639,7 +666,7 @@ class HandleFastqTest(unittest.TestCase): config=rmi_splitter.load_yaml( path="rmi_splitter/tests/data/toy_file_paired.yaml"), results_path="../results/", - ntuple_param=range(3), + ntuple_param=3, verbose=True ) self.assertEqual(1, 1) -- GitLab