From 0fbd47e240271e19d6ab5fefe2f79b685ee06c5a Mon Sep 17 00:00:00 2001
From: Laurent Modolo <laurent.modolo@ens-lyon.fr>
Date: Thu, 16 May 2019 17:05:09 +0200
Subject: [PATCH] tests: replace SeqIO by the Reads class

---
 src/rmi_splitter/tests/data_test.py         |  14 +++
 src/rmi_splitter/tests/rmi_splitter_test.py | 100 ++++++++++++--------
 2 files changed, 75 insertions(+), 39 deletions(-)

diff --git a/src/rmi_splitter/tests/data_test.py b/src/rmi_splitter/tests/data_test.py
index 4d8db45..56e3f44 100644
--- a/src/rmi_splitter/tests/data_test.py
+++ b/src/rmi_splitter/tests/data_test.py
@@ -6,6 +6,7 @@ This module provides data for the unitary tests for the rmi_splitter project
 """
 from io import StringIO
 from Bio import SeqIO
+from .. import rmi_splitter
 # code to create yaml toy
 
 """
@@ -68,37 +69,44 @@ TCGCCGCTGGTGTAGTGGTATCATGCGAGAAGAGATG\n\
 +\n\
 AFFJJJJJJJFFJJJJJJJJJJJJJJFJJJJFFFFJ-"
 SeqIO_single = SeqIO.read(StringIO(read_single), "fastq")
+Reads_single = rmi_splitter.Reads(StringIO(read_single))
 SeqIO_trim_single = SeqIO.read(StringIO(read_trim_single), "fastq")
+Reads_trim_single = rmi_splitter.Reads(StringIO(read_trim_single))
 
 umi_readnoumi = "@K00201:182:HM3TMBBXX:6:2228:17706:1226 1:N:0:NCAGTG\n\
 TAGTGCCTCGCCGCTGGTGTAGTGGTATCATGCGAGAAGAGATG\n\
 +\n\
 JJJJJAAAFFJJJJJJJFFJJJJJJJJJJJJJJFJJJJFFFFJ-"
 SeqIO_single_noumi = SeqIO.read(StringIO(umi_readnoumi), "fastq")
+Reads_single_noumi = rmi_splitter.Reads(StringIO(umi_readnoumi))
 
 umi_readnoRT = "@K00201:182:HM3TMBBXX:6:2228:17706:1226 1:N:0:NCAGTG\n\
 NTTCTCTCGCCGCTGGTGTAGTGGTATCATGCGAGAAGAGATG\n\
 +\n\
 #AAFFJAFFJJJJJJJFFJJJJJJJJJJJJJJFJJJJFFFFJ-"
 SeqIO_single_noRT = SeqIO.read(StringIO(umi_readnoRT), "fastq")
+Reads_single_noRT = rmi_splitter.Reads(StringIO(umi_readnoRT))
 
 umi_read = "@K00201:182:HM3TMBBXX:6:2228:17706:1226_NTTCTC 1:N:0:NCAGTG\n\
 NTTCTCTAGTGCCTCGCCGCTGGTGTAGTGGTATCATGCGAGAAGAGATG\n\
 +\n\
 #AAFFJJJJJJAAAFFJJJJJJJFFJJJJJJJJJJJJJJFJJJJFFFFJ-"
 SeqIO_umi_single = SeqIO.read(StringIO(umi_read), "fastq")
+Reads_umi_single = rmi_splitter.Reads(StringIO(umi_read))
 
 umi_readnoumi = "@K00201:182:HM3TMBBXX:6:2228:17706:1226_NTTCTC 1:N:0:NCAGTG\n\
 TAGTGCCTCGCCGCTGGTGTAGTGGTATCATGCGAGAAGAGATG\n\
 +\n\
 JJJJJAAAFFJJJJJJJFFJJJJJJJJJJJJJJFJJJJFFFFJ-"
 SeqIO_umi_single_noumi = SeqIO.read(StringIO(umi_readnoumi), "fastq")
+Reads_umi_single_noumi = rmi_splitter.Reads(StringIO(umi_readnoumi))
 
 umi_readnoRT = "@K00201:182:HM3TMBBXX:6:2228:17706:1226_NTTCTC 1:N:0:NCAGTG\n\
 NTTCTCTCGCCGCTGGTGTAGTGGTATCATGCGAGAAGAGATG\n\
 +\n\
 #AAFFJAFFJJJJJJJFFJJJJJJJJJJJJJJFJJJJFFFFJ-"
 SeqIO_umi_single_noRT = SeqIO.read(StringIO(umi_readnoRT), "fastq")
+Reads_umi_single_noRT = rmi_splitter.Reads(StringIO(umi_readnoRT))
 
 """
 paired-end
@@ -166,6 +174,9 @@ NCAACA\n\
 SeqIO_paired_1 = SeqIO.read(StringIO(read_paired_1), "fastq")
 SeqIO_paired_2 = SeqIO.read(StringIO(read_paired_2), "fastq")
 SeqIO_paired_3 = SeqIO.read(StringIO(read_paired_3), "fastq")
+Reads_paired_1 = rmi_splitter.Reads(StringIO(read_paired_1))
+Reads_paired_2 = rmi_splitter.Reads(StringIO(read_paired_2))
+Reads_paired_3 = rmi_splitter.Reads(StringIO(read_paired_3))
 
 RT_barcode_paired = 'TAGTGCC'
 PCR_barcode_paired = 'NCAACA'
@@ -186,3 +197,6 @@ results_read_paired_3 = "@GWNJ-0842:360:GW1809071399:6:1101:18243:1625 2:N:0:1\n
 SeqIO_results_paired_1 = SeqIO.read(StringIO(read_paired_1), "fastq")
 SeqIO_results_paired_2 = SeqIO.read(StringIO(read_paired_2), "fastq")
 SeqIO_results_paired_3 = SeqIO.read(StringIO(read_paired_3), "fastq")
+Reads_results_paired_1 = rmi_splitter.Reads(StringIO(read_paired_1))
+Reads_results_paired_2 = rmi_splitter.Reads(StringIO(read_paired_2))
+Reads_results_paired_3 = rmi_splitter.Reads(StringIO(read_paired_3))
diff --git a/src/rmi_splitter/tests/rmi_splitter_test.py b/src/rmi_splitter/tests/rmi_splitter_test.py
index b2dc578..1c321c0 100644
--- a/src/rmi_splitter/tests/rmi_splitter_test.py
+++ b/src/rmi_splitter/tests/rmi_splitter_test.py
@@ -197,9 +197,10 @@ class ReadsReadTest(unittest.TestCase):
         """
         test extraction of umi
         """
+        reads_test = copy.deepcopy(data_test.Reads_single)
         self.assertEqual(
             rmi_splitter.extract_barcode_pos(
-                read=data_test.SeqIO_single,
+                reads=reads_test,
                 start=0,
                 stop=5,
                 header=False)['seq'],
@@ -210,9 +211,10 @@ class ReadsReadTest(unittest.TestCase):
         """
         test extraction of umi
         """
+        reads_test = copy.deepcopy(data_test.Reads_single)
         self.assertEqual(
             rmi_splitter.extract_barcode_pos(
-                read=data_test.SeqIO_single,
+                reads=reads_test,
                 start=1,
                 stop=3,
                 header=True)['seq'],
@@ -223,9 +225,10 @@ class ReadsReadTest(unittest.TestCase):
         """
         test extraction of RT barcode from position
         """
+        reads_test = copy.deepcopy(data_test.Reads_single)
         self.assertEqual(
             rmi_splitter.extract_barcode_pos(
-                read=data_test.SeqIO_single,
+                reads=reads_test,
                 start=6,
                 stop=13,
                 header=False)['seq'],
@@ -236,8 +239,9 @@ class ReadsReadTest(unittest.TestCase):
         """
         test extraction of PCR barcode from position
         """
+        reads_test = copy.deepcopy(data_test.Reads_single)
         self.assertEqual(
-            rmi_splitter.extract_barcode_pos(data_test.SeqIO_single,
+            rmi_splitter.extract_barcode_pos(reads_test,
                                              0,
                                              5,
                                              True)['seq'],
@@ -248,9 +252,10 @@ class ReadsReadTest(unittest.TestCase):
         """
         test extraction of RT barcode from config
         """
+        reads_test = copy.deepcopy(data_test.Reads_single)
         self.assertEqual(
             rmi_splitter.extract_barcode(
-                data_test.SeqIO_single,
+                reads_test,
                 rmi_splitter.load_yaml(
                     path="rmi_splitter/tests/data/toy_file.yaml"),
                 "RT", verbose=True)['seq'],
@@ -261,9 +266,10 @@ class ReadsReadTest(unittest.TestCase):
         """
         test extraction of PCR barcode from config
         """
+        reads_test = copy.deepcopy(data_test.Reads_single)
         self.assertEqual(
             rmi_splitter.extract_barcode(
-                data_test.SeqIO_single,
+                reads_test,
                 rmi_splitter.load_yaml(
                     path="rmi_splitter/tests/data/toy_file.yaml"),
                 "PCR")['seq'],
@@ -292,90 +298,98 @@ class ModifyReadTest(unittest.TestCase):
         """
         test writting umi in header
         """
+        reads_test = copy.deepcopy(data_test.Reads_single)
         self.assertEqual(
             rmi_splitter.write_umi_in_header(
-                data_test.SeqIO_single, data_test.CONFIG_TOY, 'UMI').id,
-            data_test.SeqIO_umi_single.id
+                reads_test, data_test.CONFIG_TOY, 'UMI').header,
+            data_test.Reads_umi_single.header
         )
 
     def test_remove_barcode_umi_pos(self):
         """
         test removing umi with pos
         """
+        reads_test = copy.deepcopy(data_test.Reads_single)
+        reads_test2 = copy.deepcopy(data_test.Reads_single)
+        reads_test3 = copy.deepcopy(data_test.Reads_single)
         self.assertEqual(
             rmi_splitter.remove_barcode_pos(
-                seq=data_test.SeqIO_single,
+                reads=reads_test,
                 start=0,
                 stop=5,
                 header=False).seq,
-            data_test.SeqIO_single_noumi.seq
+            data_test.Reads_single_noumi.seq
         )
         self.assertEqual(
             rmi_splitter.remove_barcode_pos(
-                seq=data_test.SeqIO_single,
+                reads=reads_test2,
                 start=0,
                 stop=5,
-                header=False).description,
-            data_test.SeqIO_single_noumi.description
+                header=False).header,
+            data_test.Reads_single_noumi.header
         )
         self.assertEqual(
             rmi_splitter.remove_barcode_pos(
-                seq=data_test.SeqIO_single,
+                reads=reads_test3,
                 start=0,
                 stop=5,
-                header=False).letter_annotations['phred_quality'],
-            data_test.SeqIO_single_noumi.letter_annotations['phred_quality']
+                header=False).qual,
+            data_test.Reads_single_noumi.qual
         )
 
     def test_remove_barcode_rt_pos_seq(self):
         """
         test removing rt with pos
         """
+        reads_test = copy.deepcopy(data_test.Reads_single)
         self.assertEqual(
             rmi_splitter.remove_barcode_pos(
-                seq=data_test.SeqIO_single,
+                reads=reads_test,
                 start=6,
                 stop=13,
                 header=False).seq,
-            data_test.SeqIO_single_noRT.seq
+            data_test.Reads_single_noRT.seq
         )
 
     def test_remove_barcode_rt_pos_description(self):
         """
         test removing rt with pos
         """
+        reads_test = copy.deepcopy(data_test.Reads_single)
         self.assertEqual(
             rmi_splitter.remove_barcode_pos(
-                seq=data_test.SeqIO_single,
+                reads=reads_test,
                 start=6,
                 stop=13,
-                header=False).description,
-            data_test.SeqIO_single_noRT.description
+                header=False).header,
+            data_test.Reads_single_noRT.header
         )
 
     def test_remove_barcode_rt_pos_qual(self):
         """
         test removing rt with pos
         """
+        reads_test = copy.deepcopy(data_test.Reads_single)
         self.assertEqual(
             rmi_splitter.remove_barcode_pos(
-                seq=data_test.SeqIO_single,
+                reads=reads_test,
                 start=6,
                 stop=13,
-                header=False).letter_annotations['phred_quality'],
-            data_test.SeqIO_single_noRT.letter_annotations['phred_quality']
+                header=False).qual,
+            data_test.Reads_single_noRT.qual
         )
 
     def test_remove_barcode_pos_header(self):
         """
         test remove of umi
         """
+        reads_test = copy.deepcopy(data_test.Reads_single)
         self.assertEqual(
             rmi_splitter.remove_barcode_pos(
-                seq=data_test.SeqIO_single,
+                reads=reads_test,
                 start=1,
                 stop=3,
-                header=True).description,
+                header=True).header,
             "K00201:182:HM3TMBBXX:6:2228:17706:1226 1:N:0:NCG"
         )
 
@@ -383,13 +397,14 @@ class ModifyReadTest(unittest.TestCase):
         """
         test removing rt with config
         """
+        reads_test = copy.deepcopy(data_test.Reads_single)
         self.assertEqual(
             rmi_splitter.remove_barcode(
-                data_test.SeqIO_single,
+                reads_test,
                 data_test.CONFIG_TOY,
                 "RT"
             ).seq,
-            data_test.SeqIO_single_noRT.seq
+            data_test.Reads_single_noRT.seq
         )
 
     def test_update_config(self):
@@ -411,9 +426,11 @@ class ModifyReadTest(unittest.TestCase):
         barcode_dic = rmi_splitter.create_barcode_dictionaries(
             config=config
         )
+        reads_test = copy.deepcopy(data_test.Reads_single)
+        reads_test2 = copy.deepcopy(data_test.Reads_single)
         self.assertEqual(
             rmi_splitter.match_barcode(
-                read=data_test.SeqIO_single,
+                reads=reads_test,
                 config=rmi_splitter.update_positions(
                     config=data_test.CONFIG_TOY,
                     adaptator="UMI"
@@ -422,7 +439,7 @@ class ModifyReadTest(unittest.TestCase):
                 barcode_dictionary=barcode_dic
             ),
             rmi_splitter.match_barcode(
-                read=data_test.SeqIO_single,
+                reads=reads_test2,
                 config=data_test.CONFIG_TOY,
                 adaptator="PCR",
                 barcode_dictionary=barcode_dic
@@ -433,12 +450,13 @@ class ModifyReadTest(unittest.TestCase):
         """
         test removing rt with config
         """
+        reads_test = copy.deepcopy(data_test.Reads_single)
         self.assertEqual(
             rmi_splitter.remove_barcodes(
-                data_test.SeqIO_single,
+                reads_test,
                 data_test.CONFIG_TOY,
             ).seq,
-            data_test.SeqIO_trim_single.seq
+            data_test.Reads_trim_single.seq
         )
 
 
@@ -455,9 +473,10 @@ class SeachBarcodeTest(unittest.TestCase):
         barcode_dic = rmi_splitter.create_barcode_dictionaries(
             config=config
         )
+        reads_test = copy.deepcopy(data_test.Reads_single)
         self.assertEqual(
             rmi_splitter.match_barcode(
-                read=data_test.SeqIO_single,
+                reads=reads_test,
                 config=config,
                 adaptator="PCR",
                 barcode_dictionary=barcode_dic
@@ -474,9 +493,10 @@ class SeachBarcodeTest(unittest.TestCase):
         barcode_dic = rmi_splitter.create_barcode_dictionaries(
             config=config
         )
+        reads_test = copy.deepcopy(data_test.Reads_single)
         try:
             rmi_splitter.match_barcode(
-                read=data_test.SeqIO_single,
+                reads=reads_test,
                 config=config,
                 adaptator="PCT",
                 barcode_dictionary=barcode_dic,
@@ -496,9 +516,10 @@ class SeachBarcodeTest(unittest.TestCase):
         barcode_dic = rmi_splitter.create_barcode_dictionaries(
             config=config
         )
+        reads_test = copy.deepcopy(data_test.Reads_single)
         self.assertEqual(
             rmi_splitter.match_barcodes(
-                read=data_test.SeqIO_single,
+                reads=reads_test,
                 config=config,
                 barcode_dictionary=barcode_dic
             ),
@@ -558,9 +579,10 @@ class HandleFastqTest(unittest.TestCase):
             config=rmi_splitter.load_yaml(
                 path="rmi_splitter/tests/data/toy_file.yaml")
         )
+        reads_test = copy.deepcopy(data_test.Reads_single)
         self.assertEqual(
             rmi_splitter.assign_reads(
-                reads_list=[data_test.SeqIO_single],
+                reads_list=[reads_test],
                 config=rmi_splitter.load_yaml(
                     path="rmi_splitter/tests/data/toy_file.yaml"),
                 barcode_dictionary=barcode_dic,
@@ -579,9 +601,9 @@ class HandleFastqTest(unittest.TestCase):
         )
         self.assertEqual(
             rmi_splitter.assign_reads(
-                reads_list=[data_test.SeqIO_paired_1,
-                            data_test.SeqIO_paired_2,
-                            data_test.SeqIO_paired_3],
+                reads_list=[data_test.Reads_paired_1,
+                            data_test.Reads_paired_2,
+                            data_test.Reads_paired_3],
                 config=rmi_splitter.load_yaml(
                     path="rmi_splitter/tests/data/toy_file_paired.yaml"),
                 barcode_dictionary=barcode_dic,
-- 
GitLab