class TestRegex(unittest.TestCase): """Tests for TrieRegEx.regex()""" def setUp(self): self.tre = TRE() self.words = ['p', 'pe', 'pea', 'pear'] self.more_words = [ 'orange', 'kumquat', 'tangerine', 'mandarin', 'pomelo', 'yuzu', 'grapefruit', 'lemon', 'lime', 'bergamot', 'citron', 'clementine', 'satsuma', 'tangelo', 'mikan', 'tangor', 'mint', 'peppermint', 'spearmint', 'basil', 'cilantro', 'coriander', 'chives', 'parsley', 'oregano', 'rosemary', 'thyme', 'scallion', 'ginger', 'garlic', 'onion', 'galangal' ] def test_match_all_incrementals(self): self.tre.add(*self.words) found = re.findall(f'\\b{self.tre.regex()}\\b', ' '.join(self.words)) self.assertEqual(sorted(found), sorted(self.words)) def test_does_not_match_larger_string(self): self.tre.add('p') found = re.findall(f'\\b{self.tre.regex()}\\b', 'pe') self.assertEqual(found, []) def test_does_not_match_substring(self): my_words = self.words[1:] # leave out 'p' self.tre.add(*my_words) found = re.findall(f'\\b{self.tre.regex()}\\b', ' '.join(self.words)) self.assertEqual(found, sorted(my_words), "'p' should not be captured") def test_empty_trie_returns_empty_string_regex(self): self.assertEqual(self.tre.regex(), '') def test_match_all_words(self): self.tre.add(*self.more_words) pattern = f'\\b{self.tre.regex()}\\b' found = re.findall(pattern, ' '.join(self.more_words)) self.assertEqual(sorted(found), sorted(self.more_words)) def test_match_all_words_surrounded_by_spaces(self): words = sorted(self.more_words) self.tre.add(*words) found = re.findall(f"(?<= ){self.tre.regex()}(?= )", ' '.join(words)) self.assertEqual( found, words[1:-1], 'First and last item in sorted words list should not be matched.') def test_added_word_reflected_in_new_regex_call(self): self.tre.add(*self.words) self.assertEqual(self.tre.regex(), 'p(?:e(?:ar?)?)?', 'Setup for the real test in the next assertEqual') self.tre.add('peak') self.assertEqual(self.tre.regex(), 'p(?:e(?:a[kr]?)?)?') def test_removed_word_reflected_in_new_regex_call(self): expanded = self.words + ['peak'] self.tre.add(*expanded) self.assertEqual(self.tre.regex(), 'p(?:e(?:a[kr]?)?)?', 'Setup for the real test in the next assertEqual') self.tre.remove('peak') self.assertEqual(self.tre.regex(), 'p(?:e(?:ar?)?)?') def test_multiple_adding_removing_reflected(self): """This test also checks that the memoizer cache clearing is called in the right places so that .add(), .remove(), and .regex() run correctly as expected """ self.tre.add(*self.words) self.assertEqual(self.tre.regex(), 'p(?:e(?:ar?)?)?', 'Setup for the real test in the next assertEqual') self.tre.add('peak') self.tre.remove('pe') self.tre.add('river') self.tre.add('rich') self.tre.remove('pea') self.tre.remove('peak') self.assertEqual(self.tre.regex(), '(?:ri(?:ver|ch)|p(?:ear)?)') self.tre.add('peak') self.tre.remove('peak') self.tre.remove('pear') self.tre.add(*self.words) self.assertEqual(self.tre.regex(), '(?:p(?:e(?:ar?)?)?|ri(?:ver|ch))')
class TestTrieRegEx(unittest.TestCase): """Basic tests for each function in the trieregex.TrieRegEx class. More in-depth tests are located in files bearing their function names. """ def setUp(self): self.words = [ 'heart', 'healthy', 'pear', 'peach', 'lark', 'look', 'change' ] self.tre = TRE(*self.words) def test_add(self): self.assertEqual( self.tre._trie, { 'c': { 'h': { 'a': { 'n': { 'g': { 'e': { '**': {} } } } } } }, 'l': { 'a': { 'r': { 'k': { '**': {} } } }, 'o': { 'o': { 'k': { '**': {} } } } }, 'h': { 'e': { 'a': { 'l': { 't': { 'h': { 'y': { '**': {} } } } }, 'r': { 't': { '**': {} } } } } }, 'p': { 'e': { 'a': { 'c': { 'h': { '**': {} } }, 'r': { '**': {} } } } } }, "Words were not added to the trie (._trie) properly") def test_remove(self): self.tre = TRE(*self.words) self.tre.remove('healthy', 'change') self.assertEqual( self.tre._trie, { 'l': { 'a': { 'r': { 'k': { '**': {} } } }, 'o': { 'o': { 'k': { '**': {} } } } }, 'h': { 'e': { 'a': { 'r': { 't': { '**': {} } } } } }, 'p': { 'e': { 'a': { 'c': { 'h': { '**': {} } }, 'r': { '**': {} } } } } }, "'healthy' and 'change' were not properly removed from the trie") def test_has(self): for word in self.words: self.assertTrue(self.tre.has(word), f"'{word}' should be searchable in trie") non_existing = ['hear', 'ear', 'each', 'hang', 'ok', 'heal', 'pa'] for word in non_existing: self.assertFalse(self.tre.has(word), f"'{word}' should not be searchable in trie") def test_initials_variable(self): self.assertEqual( self.tre._initials, { 'c': 1, 'h': 2, 'l': 2, 'p': 2 }, ) def test_initials(self): self.assertEqual(self.tre.initials(), ['c', 'h', 'l', 'p']) def test_finals_variable(self): self.assertEqual(self.tre._finals, { 'e': 1, 'h': 1, 'k': 2, 'r': 1, 't': 1, 'y': 1 }) def test_finals(self): self.assertEqual(self.tre.finals(), ['e', 'h', 'k', 'r', 't', 'y']) def test_regex(self): self.assertEqual(self.tre.regex(), "(?:hea(?:lthy|rt)|l(?:ark|ook)|pea(?:ch|r)|change)")