From 0fbd47e240271e19d6ab5fefe2f79b685ee06c5a Mon Sep 17 00:00:00 2001 From: Laurent Modolo <laurent.modolo@ens-lyon.fr> Date: Thu, 16 May 2019 17:05:09 +0200 Subject: [PATCH] tests: replace SeqIO by the Reads class --- src/rmi_splitter/tests/data_test.py | 14 +++ src/rmi_splitter/tests/rmi_splitter_test.py | 100 ++++++++++++-------- 2 files changed, 75 insertions(+), 39 deletions(-) diff --git a/src/rmi_splitter/tests/data_test.py b/src/rmi_splitter/tests/data_test.py index 4d8db45..56e3f44 100644 --- a/src/rmi_splitter/tests/data_test.py +++ b/src/rmi_splitter/tests/data_test.py @@ -6,6 +6,7 @@ This module provides data for the unitary tests for the rmi_splitter project """ from io import StringIO from Bio import SeqIO +from .. import rmi_splitter # code to create yaml toy """ @@ -68,37 +69,44 @@ TCGCCGCTGGTGTAGTGGTATCATGCGAGAAGAGATG\n\ +\n\ AFFJJJJJJJFFJJJJJJJJJJJJJJFJJJJFFFFJ-" SeqIO_single = SeqIO.read(StringIO(read_single), "fastq") +Reads_single = rmi_splitter.Reads(StringIO(read_single)) SeqIO_trim_single = SeqIO.read(StringIO(read_trim_single), "fastq") +Reads_trim_single = rmi_splitter.Reads(StringIO(read_trim_single)) umi_readnoumi = "@K00201:182:HM3TMBBXX:6:2228:17706:1226 1:N:0:NCAGTG\n\ TAGTGCCTCGCCGCTGGTGTAGTGGTATCATGCGAGAAGAGATG\n\ +\n\ JJJJJAAAFFJJJJJJJFFJJJJJJJJJJJJJJFJJJJFFFFJ-" SeqIO_single_noumi = SeqIO.read(StringIO(umi_readnoumi), "fastq") +Reads_single_noumi = rmi_splitter.Reads(StringIO(umi_readnoumi)) umi_readnoRT = "@K00201:182:HM3TMBBXX:6:2228:17706:1226 1:N:0:NCAGTG\n\ NTTCTCTCGCCGCTGGTGTAGTGGTATCATGCGAGAAGAGATG\n\ +\n\ #AAFFJAFFJJJJJJJFFJJJJJJJJJJJJJJFJJJJFFFFJ-" SeqIO_single_noRT = SeqIO.read(StringIO(umi_readnoRT), "fastq") +Reads_single_noRT = rmi_splitter.Reads(StringIO(umi_readnoRT)) umi_read = "@K00201:182:HM3TMBBXX:6:2228:17706:1226_NTTCTC 1:N:0:NCAGTG\n\ NTTCTCTAGTGCCTCGCCGCTGGTGTAGTGGTATCATGCGAGAAGAGATG\n\ +\n\ #AAFFJJJJJJAAAFFJJJJJJJFFJJJJJJJJJJJJJJFJJJJFFFFJ-" SeqIO_umi_single = SeqIO.read(StringIO(umi_read), "fastq") +Reads_umi_single = rmi_splitter.Reads(StringIO(umi_read)) umi_readnoumi = "@K00201:182:HM3TMBBXX:6:2228:17706:1226_NTTCTC 1:N:0:NCAGTG\n\ TAGTGCCTCGCCGCTGGTGTAGTGGTATCATGCGAGAAGAGATG\n\ +\n\ JJJJJAAAFFJJJJJJJFFJJJJJJJJJJJJJJFJJJJFFFFJ-" SeqIO_umi_single_noumi = SeqIO.read(StringIO(umi_readnoumi), "fastq") +Reads_umi_single_noumi = rmi_splitter.Reads(StringIO(umi_readnoumi)) umi_readnoRT = "@K00201:182:HM3TMBBXX:6:2228:17706:1226_NTTCTC 1:N:0:NCAGTG\n\ NTTCTCTCGCCGCTGGTGTAGTGGTATCATGCGAGAAGAGATG\n\ +\n\ #AAFFJAFFJJJJJJJFFJJJJJJJJJJJJJJFJJJJFFFFJ-" SeqIO_umi_single_noRT = SeqIO.read(StringIO(umi_readnoRT), "fastq") +Reads_umi_single_noRT = rmi_splitter.Reads(StringIO(umi_readnoRT)) """ paired-end @@ -166,6 +174,9 @@ NCAACA\n\ SeqIO_paired_1 = SeqIO.read(StringIO(read_paired_1), "fastq") SeqIO_paired_2 = SeqIO.read(StringIO(read_paired_2), "fastq") SeqIO_paired_3 = SeqIO.read(StringIO(read_paired_3), "fastq") +Reads_paired_1 = rmi_splitter.Reads(StringIO(read_paired_1)) +Reads_paired_2 = rmi_splitter.Reads(StringIO(read_paired_2)) +Reads_paired_3 = rmi_splitter.Reads(StringIO(read_paired_3)) RT_barcode_paired = 'TAGTGCC' PCR_barcode_paired = 'NCAACA' @@ -186,3 +197,6 @@ results_read_paired_3 = "@GWNJ-0842:360:GW1809071399:6:1101:18243:1625 2:N:0:1\n SeqIO_results_paired_1 = SeqIO.read(StringIO(read_paired_1), "fastq") SeqIO_results_paired_2 = SeqIO.read(StringIO(read_paired_2), "fastq") SeqIO_results_paired_3 = SeqIO.read(StringIO(read_paired_3), "fastq") +Reads_results_paired_1 = rmi_splitter.Reads(StringIO(read_paired_1)) +Reads_results_paired_2 = rmi_splitter.Reads(StringIO(read_paired_2)) +Reads_results_paired_3 = rmi_splitter.Reads(StringIO(read_paired_3)) diff --git a/src/rmi_splitter/tests/rmi_splitter_test.py b/src/rmi_splitter/tests/rmi_splitter_test.py index b2dc578..1c321c0 100644 --- a/src/rmi_splitter/tests/rmi_splitter_test.py +++ b/src/rmi_splitter/tests/rmi_splitter_test.py @@ -197,9 +197,10 @@ class ReadsReadTest(unittest.TestCase): """ test extraction of umi """ + reads_test = copy.deepcopy(data_test.Reads_single) self.assertEqual( rmi_splitter.extract_barcode_pos( - read=data_test.SeqIO_single, + reads=reads_test, start=0, stop=5, header=False)['seq'], @@ -210,9 +211,10 @@ class ReadsReadTest(unittest.TestCase): """ test extraction of umi """ + reads_test = copy.deepcopy(data_test.Reads_single) self.assertEqual( rmi_splitter.extract_barcode_pos( - read=data_test.SeqIO_single, + reads=reads_test, start=1, stop=3, header=True)['seq'], @@ -223,9 +225,10 @@ class ReadsReadTest(unittest.TestCase): """ test extraction of RT barcode from position """ + reads_test = copy.deepcopy(data_test.Reads_single) self.assertEqual( rmi_splitter.extract_barcode_pos( - read=data_test.SeqIO_single, + reads=reads_test, start=6, stop=13, header=False)['seq'], @@ -236,8 +239,9 @@ class ReadsReadTest(unittest.TestCase): """ test extraction of PCR barcode from position """ + reads_test = copy.deepcopy(data_test.Reads_single) self.assertEqual( - rmi_splitter.extract_barcode_pos(data_test.SeqIO_single, + rmi_splitter.extract_barcode_pos(reads_test, 0, 5, True)['seq'], @@ -248,9 +252,10 @@ class ReadsReadTest(unittest.TestCase): """ test extraction of RT barcode from config """ + reads_test = copy.deepcopy(data_test.Reads_single) self.assertEqual( rmi_splitter.extract_barcode( - data_test.SeqIO_single, + reads_test, rmi_splitter.load_yaml( path="rmi_splitter/tests/data/toy_file.yaml"), "RT", verbose=True)['seq'], @@ -261,9 +266,10 @@ class ReadsReadTest(unittest.TestCase): """ test extraction of PCR barcode from config """ + reads_test = copy.deepcopy(data_test.Reads_single) self.assertEqual( rmi_splitter.extract_barcode( - data_test.SeqIO_single, + reads_test, rmi_splitter.load_yaml( path="rmi_splitter/tests/data/toy_file.yaml"), "PCR")['seq'], @@ -292,90 +298,98 @@ class ModifyReadTest(unittest.TestCase): """ test writting umi in header """ + reads_test = copy.deepcopy(data_test.Reads_single) self.assertEqual( rmi_splitter.write_umi_in_header( - data_test.SeqIO_single, data_test.CONFIG_TOY, 'UMI').id, - data_test.SeqIO_umi_single.id + reads_test, data_test.CONFIG_TOY, 'UMI').header, + data_test.Reads_umi_single.header ) def test_remove_barcode_umi_pos(self): """ test removing umi with pos """ + reads_test = copy.deepcopy(data_test.Reads_single) + reads_test2 = copy.deepcopy(data_test.Reads_single) + reads_test3 = copy.deepcopy(data_test.Reads_single) self.assertEqual( rmi_splitter.remove_barcode_pos( - seq=data_test.SeqIO_single, + reads=reads_test, start=0, stop=5, header=False).seq, - data_test.SeqIO_single_noumi.seq + data_test.Reads_single_noumi.seq ) self.assertEqual( rmi_splitter.remove_barcode_pos( - seq=data_test.SeqIO_single, + reads=reads_test2, start=0, stop=5, - header=False).description, - data_test.SeqIO_single_noumi.description + header=False).header, + data_test.Reads_single_noumi.header ) self.assertEqual( rmi_splitter.remove_barcode_pos( - seq=data_test.SeqIO_single, + reads=reads_test3, start=0, stop=5, - header=False).letter_annotations['phred_quality'], - data_test.SeqIO_single_noumi.letter_annotations['phred_quality'] + header=False).qual, + data_test.Reads_single_noumi.qual ) def test_remove_barcode_rt_pos_seq(self): """ test removing rt with pos """ + reads_test = copy.deepcopy(data_test.Reads_single) self.assertEqual( rmi_splitter.remove_barcode_pos( - seq=data_test.SeqIO_single, + reads=reads_test, start=6, stop=13, header=False).seq, - data_test.SeqIO_single_noRT.seq + data_test.Reads_single_noRT.seq ) def test_remove_barcode_rt_pos_description(self): """ test removing rt with pos """ + reads_test = copy.deepcopy(data_test.Reads_single) self.assertEqual( rmi_splitter.remove_barcode_pos( - seq=data_test.SeqIO_single, + reads=reads_test, start=6, stop=13, - header=False).description, - data_test.SeqIO_single_noRT.description + header=False).header, + data_test.Reads_single_noRT.header ) def test_remove_barcode_rt_pos_qual(self): """ test removing rt with pos """ + reads_test = copy.deepcopy(data_test.Reads_single) self.assertEqual( rmi_splitter.remove_barcode_pos( - seq=data_test.SeqIO_single, + reads=reads_test, start=6, stop=13, - header=False).letter_annotations['phred_quality'], - data_test.SeqIO_single_noRT.letter_annotations['phred_quality'] + header=False).qual, + data_test.Reads_single_noRT.qual ) def test_remove_barcode_pos_header(self): """ test remove of umi """ + reads_test = copy.deepcopy(data_test.Reads_single) self.assertEqual( rmi_splitter.remove_barcode_pos( - seq=data_test.SeqIO_single, + reads=reads_test, start=1, stop=3, - header=True).description, + header=True).header, "K00201:182:HM3TMBBXX:6:2228:17706:1226 1:N:0:NCG" ) @@ -383,13 +397,14 @@ class ModifyReadTest(unittest.TestCase): """ test removing rt with config """ + reads_test = copy.deepcopy(data_test.Reads_single) self.assertEqual( rmi_splitter.remove_barcode( - data_test.SeqIO_single, + reads_test, data_test.CONFIG_TOY, "RT" ).seq, - data_test.SeqIO_single_noRT.seq + data_test.Reads_single_noRT.seq ) def test_update_config(self): @@ -411,9 +426,11 @@ class ModifyReadTest(unittest.TestCase): barcode_dic = rmi_splitter.create_barcode_dictionaries( config=config ) + reads_test = copy.deepcopy(data_test.Reads_single) + reads_test2 = copy.deepcopy(data_test.Reads_single) self.assertEqual( rmi_splitter.match_barcode( - read=data_test.SeqIO_single, + reads=reads_test, config=rmi_splitter.update_positions( config=data_test.CONFIG_TOY, adaptator="UMI" @@ -422,7 +439,7 @@ class ModifyReadTest(unittest.TestCase): barcode_dictionary=barcode_dic ), rmi_splitter.match_barcode( - read=data_test.SeqIO_single, + reads=reads_test2, config=data_test.CONFIG_TOY, adaptator="PCR", barcode_dictionary=barcode_dic @@ -433,12 +450,13 @@ class ModifyReadTest(unittest.TestCase): """ test removing rt with config """ + reads_test = copy.deepcopy(data_test.Reads_single) self.assertEqual( rmi_splitter.remove_barcodes( - data_test.SeqIO_single, + reads_test, data_test.CONFIG_TOY, ).seq, - data_test.SeqIO_trim_single.seq + data_test.Reads_trim_single.seq ) @@ -455,9 +473,10 @@ class SeachBarcodeTest(unittest.TestCase): barcode_dic = rmi_splitter.create_barcode_dictionaries( config=config ) + reads_test = copy.deepcopy(data_test.Reads_single) self.assertEqual( rmi_splitter.match_barcode( - read=data_test.SeqIO_single, + reads=reads_test, config=config, adaptator="PCR", barcode_dictionary=barcode_dic @@ -474,9 +493,10 @@ class SeachBarcodeTest(unittest.TestCase): barcode_dic = rmi_splitter.create_barcode_dictionaries( config=config ) + reads_test = copy.deepcopy(data_test.Reads_single) try: rmi_splitter.match_barcode( - read=data_test.SeqIO_single, + reads=reads_test, config=config, adaptator="PCT", barcode_dictionary=barcode_dic, @@ -496,9 +516,10 @@ class SeachBarcodeTest(unittest.TestCase): barcode_dic = rmi_splitter.create_barcode_dictionaries( config=config ) + reads_test = copy.deepcopy(data_test.Reads_single) self.assertEqual( rmi_splitter.match_barcodes( - read=data_test.SeqIO_single, + reads=reads_test, config=config, barcode_dictionary=barcode_dic ), @@ -558,9 +579,10 @@ class HandleFastqTest(unittest.TestCase): config=rmi_splitter.load_yaml( path="rmi_splitter/tests/data/toy_file.yaml") ) + reads_test = copy.deepcopy(data_test.Reads_single) self.assertEqual( rmi_splitter.assign_reads( - reads_list=[data_test.SeqIO_single], + reads_list=[reads_test], config=rmi_splitter.load_yaml( path="rmi_splitter/tests/data/toy_file.yaml"), barcode_dictionary=barcode_dic, @@ -579,9 +601,9 @@ class HandleFastqTest(unittest.TestCase): ) self.assertEqual( rmi_splitter.assign_reads( - reads_list=[data_test.SeqIO_paired_1, - data_test.SeqIO_paired_2, - data_test.SeqIO_paired_3], + reads_list=[data_test.Reads_paired_1, + data_test.Reads_paired_2, + data_test.Reads_paired_3], config=rmi_splitter.load_yaml( path="rmi_splitter/tests/data/toy_file_paired.yaml"), barcode_dictionary=barcode_dic, -- GitLab