def test_canonical_cases(self): canonical = [ # Example 1 <1>A B.</1>C [{0: [{'added_space': False, 'tag_type': 'open', 'text': u'<1>'}], 5: [{'added_space': True, 'tag_type': 'close', 'text': u'</1>'}]}, [[0, 4], [5, 6]], [{0: [{'added_space': False, 'tag_type': 'open', 'text': u'<1>'}], 4: [{'added_space': True, 'tag_type': 'close', 'text': u'</1>','dec_index':True}]}, {}] ], # Example 2 A B.<1>C</1> [{5: [{'added_space': True, 'tag_type': 'open', 'text': u'<1>'}], 6: [{'added_space': False, 'tag_type': 'close', 'text': u'</1>'}]}, [[0, 4], [5, 6]], [{}, {0: [{'added_space': True, 'tag_type': 'open', 'text': u'<1>'}], 1: [{'added_space': False, 'tag_type': 'close', 'text': u'</1>'}]}], ], # Example 3 <1>A B.</1><2>C</2> [{0: [{'added_space': False, 'tag_type': 'open', 'text': u'<1>'}], 5: [{'added_space': True, 'tag_type': 'close', 'text': u'</1>'}, {'added_space': False, 'tag_type': 'open', 'text': u'<2>'}], 6: [{'added_space': False, 'tag_type': 'close', 'text': u'</2>'}]}, [[0, 4], [5, 6]], [{0: [{'added_space': False, 'tag_type': 'open', 'text': u'<1>'}], 4: [{'added_space': True, 'tag_type': 'close', 'text': u'</1>', 'dec_index':True}]}, {0: [{'added_space': False, 'tag_type': 'open', 'text': u'<2>'}], 1: [{'added_space': False, 'tag_type': 'close', 'text': u'</2>'}]}] ], #Example 4 <1>A B.</1><4></4><2>C</2> [{0: [{'added_space': False, 'tag_type': 'open', 'text': u'<1>'}], 5: [{'added_space': True, 'tag_type': 'close', 'text': u'</1>'}, {'added_space': False, 'tag_type': 'open', 'text': u'<4>'}, {'added_space': False, 'tag_type': 'close', 'text': u'</4>'}, {'added_space': False, 'tag_type': 'open', 'text': u'<2>'}], 6: [{'added_space': False, 'tag_type': 'close', 'text': u'</2>'}]}, [[0, 4], [5, 6]], [{0: [{'added_space': False, 'tag_type': 'open', 'text': u'<1>'}], 4: [{'added_space': True, 'tag_type': 'close', 'text': u'</1>', 'dec_index':True}]}, {0: [{'added_space': False, 'tag_type': 'open', 'text': u'<4>'}, {'added_space': False, 'tag_type': 'close', 'text': u'</4>'}, {'added_space': False, 'tag_type': 'open', 'text': u'<2>'}], 1: [{'added_space': False, 'tag_type': 'close', 'text': u'</2>'}]}] ], # Example 5 <1>A B</1>.C [{0: [{'added_space': False, 'tag_type': 'open', 'text': u'<1>'}], 4: [{'added_space': True, 'tag_type': 'close', 'text': u'</1>'}]}, [[0, 4], [5, 6]], [{0: [{'added_space': False, 'tag_type': 'open', 'text': u'<1>'}], 4: [{'added_space': True, 'tag_type': 'close', 'text': u'</1>'}]}, {}] ], ] for i, a in enumerate(canonical): tags, positions, truth = a with self.subTest(i=i): list_tags = split_tag_dict(tags, positions) self.assertEqual(list_tags,truth)
def test_split_join_tags(self): canonical = [ # Example 1 <1>A B.</1>C [{0: [{'added_space': False, 'tag_type': 'open', 'text': u'<1>'}], 5: [{'added_space': True, 'tag_type': 'close', 'text': u'</1>'}]}, [[0, 4], [5, 6]] ], # Example 2 A B.<1>C</1> [{5: [{'added_space': True, 'tag_type': 'open', 'text': u'<1>'}], 6: [{'added_space': False, 'tag_type': 'close', 'text': u'</1>'}]}, [[0, 4], [5, 6]] ], # Example 3 <1>A B.</1><2>C</2> [{0: [{'added_space': False, 'tag_type': 'open', 'text': u'<1>'}], 5: [{'added_space': True, 'tag_type': 'close', 'text': u'</1>'}, {'added_space': False, 'tag_type': 'open', 'text': u'<2>'}], 6: [{'added_space': False, 'tag_type': 'close', 'text': u'</2>'}]}, [[0, 4], [5, 6]] ], #Example 4 <1>A B.</1><4></4><2>C</2> [{0: [{'added_space': False, 'tag_type': 'open', 'text': u'<1>'}], 5: [{'added_space': True, 'tag_type': 'close', 'text': u'</1>'}, {'added_space': False, 'tag_type': 'open', 'text': u'<4>'}, {'added_space': False, 'tag_type': 'close', 'text': u'</4>'}, {'added_space': False, 'tag_type': 'open', 'text': u'<2>'}], 6: [{'added_space': False, 'tag_type': 'close', 'text': u'</2>'}]}, [[0, 4], [5, 6]] ], # Example 5 <1>A B</1>.C [{0: [{'added_space': False, 'tag_type': 'open', 'text': u'<1>'}], 4: [{'added_space': True, 'tag_type': 'close', 'text': u'</1>'}]}, [[0, 4], [5, 6]] ], # Example 7 <1>A B</1><BR/><BR/>.C [{0: [{'added_space': False, 'tag_type': 'open', 'text': u'<1>'}], 4: [{'added_space': True, 'tag_type': 'close', 'text': u'</1>'}, {'added_space': False, 'tag_type': 'close', 'text': u'<br/>'}, {'added_space': False, 'tag_type': 'close', 'text': u'<br/>'}]}, [[0, 4], [5, 6]] ], [{0: [{'added_space': False, 'tag_type': 'open', 'text': u'<1>'}], 4: [{'added_space': True, 'tag_type': 'close', 'text': u'<br/>'}, {'added_space': False, 'tag_type': 'close', 'text': u'<br/>'}, {'added_space': False, 'tag_type': 'close', 'text': u'</1>'}]}, [[0, 4], [5, 6]] ], ] for i, a in enumerate(canonical): tags, positions = a with self.subTest(i=i): start_position = [b[0] for b in positions] list_tags = split_tag_dict(tags, positions) new_tags = join_list_tag_dict(list_tags, start_position) self.assertEqual(tags,new_tags)