示例#1
0
 def test_remove_one_in_multiple_shared(self):
     tre = TRE('brander', 'brandy', 'brandless')
     tre.remove('brandless')
     self.assertEqual(           # "hard" check
         tre._trie,
         {'b': {'r': {'a': {'n': {'d': {'y': {'**': {}},
                                        'e': {'r': {'**': {}}}}}}}}},
         "'brandless' should have been removed (hard check)"
     )
     self.assertEqual(           # "soft" check
         tre._trie,
         TRE('brander', 'brandy')._trie,
         "'brandless' should have been removed (soft check)"
     )
示例#2
0
class TestHas(unittest.TestCase):
    """Tests for TrieRegEx.has()"""
    def setUp(self):
        self.words = ['heal', 'health', 'healthy', 'healthier', 'healthiest']
        self.tre = TRE(*self.words)

    def test_existing_longest_word(self):
        self.assertTrue(self.tre.has('healthiest'))

    def test_existing_substring_word(self):
        self.assertTrue(self.tre.has('health'))
        self.assertTrue(self.tre.has('heal'))

    def test_nonexisting(self):
        self.assertFalse(self.tre.has('wound'))

    def test_nonword_substring_of_existing_word(self):
        self.assertFalse(self.tre.has('he'))

    def test_nonexisting_after_removed(self):
        """Also a test of the TrieRegEx.remove() function
        """
        self.assertTrue(self.tre.has('healthy'),
                        "'healthy' must first exist in trie")
        self.tre.remove('healthy')
        self.assertFalse(self.tre.has('healthy'))

    def test_existing_after_added(self):
        """Also a test of the TrieRegEx.add() function
        """
        self.assertFalse(self.tre.has('settled'),
                         "'settled' must first NOT exist in trie")
        self.tre.add('settled')
        self.assertTrue(self.tre.has('settled'))

    def test_empty_string(self):
        self.assertFalse(self.tre.has(''))
示例#3
0
class TestRegex(unittest.TestCase):
    """Tests for TrieRegEx.regex()"""
    def setUp(self):
        self.tre = TRE()
        self.words = ['p', 'pe', 'pea', 'pear']
        self.more_words = [
            'orange', 'kumquat', 'tangerine', 'mandarin', 'pomelo', 'yuzu',
            'grapefruit', 'lemon', 'lime', 'bergamot', 'citron', 'clementine',
            'satsuma', 'tangelo', 'mikan', 'tangor', 'mint', 'peppermint',
            'spearmint', 'basil', 'cilantro', 'coriander', 'chives', 'parsley',
            'oregano', 'rosemary', 'thyme', 'scallion', 'ginger', 'garlic',
            'onion', 'galangal'
        ]

    def test_match_all_incrementals(self):
        self.tre.add(*self.words)
        found = re.findall(f'\\b{self.tre.regex()}\\b', ' '.join(self.words))

        self.assertEqual(sorted(found), sorted(self.words))

    def test_does_not_match_larger_string(self):
        self.tre.add('p')
        found = re.findall(f'\\b{self.tre.regex()}\\b', 'pe')
        self.assertEqual(found, [])

    def test_does_not_match_substring(self):
        my_words = self.words[1:]  # leave out 'p'
        self.tre.add(*my_words)
        found = re.findall(f'\\b{self.tre.regex()}\\b', ' '.join(self.words))
        self.assertEqual(found, sorted(my_words), "'p' should not be captured")

    def test_empty_trie_returns_empty_string_regex(self):
        self.assertEqual(self.tre.regex(), '')

    def test_match_all_words(self):
        self.tre.add(*self.more_words)
        pattern = f'\\b{self.tre.regex()}\\b'
        found = re.findall(pattern, ' '.join(self.more_words))
        self.assertEqual(sorted(found), sorted(self.more_words))

    def test_match_all_words_surrounded_by_spaces(self):
        words = sorted(self.more_words)
        self.tre.add(*words)
        found = re.findall(f"(?<= ){self.tre.regex()}(?= )", ' '.join(words))
        self.assertEqual(
            found, words[1:-1],
            'First and last item in sorted words list should not be matched.')

    def test_added_word_reflected_in_new_regex_call(self):
        self.tre.add(*self.words)
        self.assertEqual(self.tre.regex(), 'p(?:e(?:ar?)?)?',
                         'Setup for the real test in the next assertEqual')
        self.tre.add('peak')
        self.assertEqual(self.tre.regex(), 'p(?:e(?:a[kr]?)?)?')

    def test_removed_word_reflected_in_new_regex_call(self):
        expanded = self.words + ['peak']
        self.tre.add(*expanded)
        self.assertEqual(self.tre.regex(), 'p(?:e(?:a[kr]?)?)?',
                         'Setup for the real test in the next assertEqual')
        self.tre.remove('peak')
        self.assertEqual(self.tre.regex(), 'p(?:e(?:ar?)?)?')

    def test_multiple_adding_removing_reflected(self):
        """This test also checks that the memoizer cache clearing is called
        in the right places so that .add(), .remove(), and .regex() run
        correctly as expected
        """
        self.tre.add(*self.words)
        self.assertEqual(self.tre.regex(), 'p(?:e(?:ar?)?)?',
                         'Setup for the real test in the next assertEqual')
        self.tre.add('peak')
        self.tre.remove('pe')
        self.tre.add('river')
        self.tre.add('rich')
        self.tre.remove('pea')
        self.tre.remove('peak')
        self.assertEqual(self.tre.regex(), '(?:ri(?:ver|ch)|p(?:ear)?)')
        self.tre.add('peak')
        self.tre.remove('peak')
        self.tre.remove('pear')
        self.tre.add(*self.words)
        self.assertEqual(self.tre.regex(), '(?:p(?:e(?:ar?)?)?|ri(?:ver|ch))')
示例#4
0
class TestTrieRegEx(unittest.TestCase):
    """Basic tests for each function in the trieregex.TrieRegEx class. 
    More in-depth tests are located in files bearing their function names.
    """
    def setUp(self):
        self.words = [
            'heart', 'healthy', 'pear', 'peach', 'lark', 'look', 'change'
        ]
        self.tre = TRE(*self.words)

    def test_add(self):
        self.assertEqual(
            self.tre._trie, {
                'c': {
                    'h': {
                        'a': {
                            'n': {
                                'g': {
                                    'e': {
                                        '**': {}
                                    }
                                }
                            }
                        }
                    }
                },
                'l': {
                    'a': {
                        'r': {
                            'k': {
                                '**': {}
                            }
                        }
                    },
                    'o': {
                        'o': {
                            'k': {
                                '**': {}
                            }
                        }
                    }
                },
                'h': {
                    'e': {
                        'a': {
                            'l': {
                                't': {
                                    'h': {
                                        'y': {
                                            '**': {}
                                        }
                                    }
                                }
                            },
                            'r': {
                                't': {
                                    '**': {}
                                }
                            }
                        }
                    }
                },
                'p': {
                    'e': {
                        'a': {
                            'c': {
                                'h': {
                                    '**': {}
                                }
                            },
                            'r': {
                                '**': {}
                            }
                        }
                    }
                }
            }, "Words were not added to the trie (._trie) properly")

    def test_remove(self):
        self.tre = TRE(*self.words)
        self.tre.remove('healthy', 'change')
        self.assertEqual(
            self.tre._trie, {
                'l': {
                    'a': {
                        'r': {
                            'k': {
                                '**': {}
                            }
                        }
                    },
                    'o': {
                        'o': {
                            'k': {
                                '**': {}
                            }
                        }
                    }
                },
                'h': {
                    'e': {
                        'a': {
                            'r': {
                                't': {
                                    '**': {}
                                }
                            }
                        }
                    }
                },
                'p': {
                    'e': {
                        'a': {
                            'c': {
                                'h': {
                                    '**': {}
                                }
                            },
                            'r': {
                                '**': {}
                            }
                        }
                    }
                }
            },
            "'healthy' and 'change' were not properly removed from the trie")

    def test_has(self):
        for word in self.words:
            self.assertTrue(self.tre.has(word),
                            f"'{word}' should be searchable in trie")
        non_existing = ['hear', 'ear', 'each', 'hang', 'ok', 'heal', 'pa']
        for word in non_existing:
            self.assertFalse(self.tre.has(word),
                             f"'{word}' should not be searchable in trie")

    def test_initials_variable(self):
        self.assertEqual(
            self.tre._initials,
            {
                'c': 1,
                'h': 2,
                'l': 2,
                'p': 2
            },
        )

    def test_initials(self):
        self.assertEqual(self.tre.initials(), ['c', 'h', 'l', 'p'])

    def test_finals_variable(self):
        self.assertEqual(self.tre._finals, {
            'e': 1,
            'h': 1,
            'k': 2,
            'r': 1,
            't': 1,
            'y': 1
        })

    def test_finals(self):
        self.assertEqual(self.tre.finals(), ['e', 'h', 'k', 'r', 't', 'y'])

    def test_regex(self):
        self.assertEqual(self.tre.regex(),
                         "(?:hea(?:lthy|rt)|l(?:ark|ook)|pea(?:ch|r)|change)")
示例#5
0
class TestInitials(unittest.TestCase):
    """Tests for TrieRegEx.initials() and TrieRegEx._initials"""
    def setUp(self):
        self.words = [
            'all', 'the', 'stars', 'we', 'steal', 'from', 'night', 'sky',
            'will', 'never', 'be', 'enough'
        ]
        self.tre = TRE(*self.words)

    def test_initials_variable(self):
        self.assertEqual(  # "hard" check
            self.tre._initials, {
                'a': 1,
                't': 1,
                's': 3,
                'w': 2,
                'f': 1,
                'n': 2,
                'b': 1,
                'e': 1
            })
        initials = defaultdict(int)  # "soft" check
        for w in self.words:
            initials[w[0]] += 1
        self.assertEqual(self.tre._initials, initials)

    def test_initials(self):
        self.assertEqual(  # "hard" check
            self.tre.initials(), ['a', 'b', 'e', 'f', 'n', 's', 't', 'w'])
        self.assertEqual(  # "soft" check
            self.tre.initials(), sorted(list(set([w[0] for w in self.words]))))

    def test_add_existing_word_will_not_change_counts(self):
        self.tre.add('the')
        self.assertEqual(self.tre._initials, {
            'a': 1,
            't': 1,
            's': 3,
            'w': 2,
            'f': 1,
            'n': 2,
            'b': 1,
            'e': 1
        }, "key-value pairs should remain the same")

    def test_add_new_word_increase_frequency(self):
        self.tre.add('spotlights')
        self.assertEqual(self.tre._initials, {
            'a': 1,
            't': 1,
            's': 4,
            'w': 2,
            'f': 1,
            'n': 2,
            'b': 1,
            'e': 1
        }, "'s' should be set to 4 (up from 3)")

    def test_add_new_initial(self):
        self.tre.add('dream')
        self.assertEqual(self.tre._initials, {
            'a': 1,
            't': 1,
            's': 3,
            'w': 2,
            'f': 1,
            'n': 2,
            'b': 1,
            'e': 1,
            'd': 1
        }, "new key 'd' should have a value of 1")

    def test_add_new_escaped_char(self):
        self.tre.add('\nnewline')
        self.assertEqual(
            self.tre._initials, {
                'a': 1,
                't': 1,
                's': 3,
                'w': 2,
                'f': 1,
                'n': 2,
                'b': 1,
                'e': 1,
                '\n': 1
            }, "new key '\n' should have a value of 1")

    def test_add_new_special_char(self):
        self.tre.add('åll')
        self.assertEqual(self.tre._initials, {
            'a': 1,
            't': 1,
            's': 3,
            'w': 2,
            'f': 1,
            'n': 2,
            'b': 1,
            'e': 1,
            'å': 1
        }, "new key 'å' should have a value of 1")

    def test_remove_word_lower_frequency(self):
        self.tre.remove('the')
        self.assertEqual(self.tre._initials, {
            'a': 1,
            't': 0,
            's': 3,
            'w': 2,
            'f': 1,
            'n': 2,
            'b': 1,
            'e': 1
        }, "'t' should have a value of 0 (down from 1)")

    def test_zero_frequency_should_not_appear(self):
        self.tre.remove('the')
        self.assertEqual(self.tre.initials(),
                         ['a', 'b', 'e', 'f', 'n', 's', 'w'],
                         "'t' should not appear in the list")

    def test_remove_nonexisting_initial_with_zero_frequency(self):
        self.tre.remove('the')  # set 't': 1 -> 't': 0
        self.tre.remove('table')  # attempt removal of nonexisting word
        self.assertEqual(self.tre._initials, {
            'a': 1,
            't': 0,
            's': 3,
            'w': 2,
            'f': 1,
            'n': 2,
            'b': 1,
            'e': 1
        }, "'t' should still have a value of 0")

    def test_remove_all(self):
        self.tre.remove(*self.words)
        self.assertEqual(self.tre._initials, {
            'a': 0,
            't': 0,
            's': 0,
            'w': 0,
            'f': 0,
            'n': 0,
            'b': 0,
            'e': 0
        }, "All keys should be set to a value of 0")
示例#6
0
class TestRemove(unittest.TestCase):
    """Tests for TrieRegEx.remove()"""

    def setUp(self):
        self.words = ['heart', 'healthy', 'pear', 'peach',
                      'lark', 'look', 'change']
        self.incrementals = ['p', 'pe', 'pea', 'pear']
        self.tre = TRE(*self.words)
        self.tre_incr = TRE(*self.incrementals)

    def test_remove_one(self):
        self.tre.remove('healthy')
        self.assertEqual(           # "hard" check
            self.tre._trie,
            {
                'c': {'h': {'a': {'n': {'g': {'e': {'**': {}}}}}}},
                'l': {'a': {'r': {'k': {'**': {}}}},
                    'o': {'o': {'k': {'**': {}}}}},
                'h': {'e': {'a': {'r': {'t': {'**': {}}}}}},
                'p': {'e': {'a': {'c': {'h': {'**': {}}},
                                'r': {'**': {}}}}}
            },
            "'healthy' should have been removed (hard check)"
        )
        self.assertEqual(           # "soft" check
            self.tre._trie,
            TRE(*[w for w in self.words if w != 'healthy'])._trie,
            "'healthy' should have been removed (soft check)"
        )

    def test_remove_two(self):
        self.tre.remove('healthy', 'change')
        self.assertEqual(           # "hard" check
            self.tre._trie,
            {
                'l': {'a': {'r': {'k': {'**': {}}}},
                    'o': {'o': {'k': {'**': {}}}}},
                'h': {'e': {'a': {'r': {'t': {'**': {}}}}}},
                'p': {'e': {'a': {'c': {'h': {'**': {}}},
                                'r': {'**': {}}}}}
            },
            "'healthy' and 'change' should have been removed (hard check)"
        )
        self.assertEqual(           # "soft" check
            self.tre._trie,
            TRE('lark', 'look', 'heart', 'peach', 'pear')._trie,
            "'healthy' and 'change' should have been removed (soft check)"
        )

    def test_remove_all(self):
        self.tre.remove(*self.words)
        self.assertEqual(self.tre._trie, {}, 'Trie should be empty')

    def test_remove_second_time(self):
        self.tre.remove(*self.words)
        self.tre.add(*self.words)
        self.tre.remove(*[w for w in self.words if w != 'pear'])
        self.assertEqual(           # "hard" check
            self.tre._trie,
            {'p': {'e': {'a': {'r': {'**': {}}}}}},
            "Only 'pear' should be in trie (hard check)"
        )
        self.assertEqual(           # "soft" check
            self.tre._trie, 
            TRE('pear')._trie,
            "Only 'pear' should be in trie (soft check)"
        )  
    
    def test_remove_first_in_incremental_words(self):
        self.tre_incr.remove('p')
        self.assertEqual(           # "hard" check
            self.tre_incr._trie,
            {'p': {'e': {'**': {},
                         'a': {'**': {},
                               'r': {'**': {}}}}}},
            "'p' should have been removed (hard check)"
        )
        self.assertEqual(           # "soft" check
            self.tre_incr._trie,
            TRE('pe', 'pea', 'pear')._trie,
            "'p' should have been removed (soft check)"
        )

    def test_remove_middle_in_incremental_words(self):
        self.tre_incr.remove('pea')
        self.assertEqual(           # "hard" check
            self.tre_incr._trie,
            {'p': {'**': {},
                   'e': {'**': {},
                         'a': {'r': {'**': {}}}}}},
            "'pea' should have been removed (hard check)"
        )
        self.assertEqual(           # "soft" check
            self.tre_incr._trie,
            TRE('p', 'pe', 'pear')._trie,
            "'pea' should have been removed (soft check)"
        )

    def test_remove_last_in_incremental_words(self):
        self.tre_incr.remove('pear')
        self.assertEqual(           # "hard" check
            self.tre_incr._trie,
            {'p': {'**': {},
                   'e': {'**': {},
                         'a': {'**': {}}}}},
            "'pear' should have been removed (hard check)"
        )
        self.assertEqual(           # "soft" check
            self.tre_incr._trie,
            TRE('p', 'pe', 'pea')._trie,
            "'pear' should have been removed (soft check)"
        )

    def test_remove_one_in_multiple_shared(self):
        tre = TRE('brander', 'brandy', 'brandless')
        tre.remove('brandless')
        self.assertEqual(           # "hard" check
            tre._trie,
            {'b': {'r': {'a': {'n': {'d': {'y': {'**': {}},
                                           'e': {'r': {'**': {}}}}}}}}},
            "'brandless' should have been removed (hard check)"
        )
        self.assertEqual(           # "soft" check
            tre._trie,
            TRE('brander', 'brandy')._trie,
            "'brandless' should have been removed (soft check)"
        )

    def test_remove_nonexisting_word(self):
        self.tre_incr.remove('riffraff')
        self.assertEqual(           # "hard" check
            self.tre_incr._trie,
            {'p': {'**': {},
                   'e': {'**': {},
                         'a': {'**': {},
                               'r': {'**': {}}}}}},
            "Trie should remain the same (hard check)"
        )
        self.assertEqual(           # "soft" check
            self.tre_incr._trie,
            TRE(*self.incrementals)._trie,
            "Trie should remain the same (soft check)"
        )