Пример #1
0
def find_compound_words(words):
    """ trie + BFS + pruning
    Advantages of trie:
    1. Predictable O(k) lookup time where k is the size of the key.
    2. We can easily get all prefixes of a given word.
    Drawbacks of tries:
    1. Space-consuming, it is a trade-off between time-complexity and space\
    complexity. We can use radix-tree to get optimized space, but in \
    practice, it doesn't have a reasonable improvement and it takes more\
    time than trie.
    """
    compound_words = set([])
    trie = Trie()
    queue = collections.deque()
    prefixes_dict = {}
    for word in words:
        prefixes = trie.has_prefixes(word)
        for prefix in prefixes:
            queue.append((word, word[len(prefix) :]))
        trie.insert(word)
    while queue:
        word, suffix = queue.popleft()
        # pruning
        if word in compound_words:
            continue
        # find a compund word
        if suffix in trie:
            compound_words.add(word)
        else:
            prefixes = trie.has_prefixes(suffix)
            for prefix in prefixes:
                queue.append((word, suffix[len(prefix) :]))
    return compound_words
Пример #2
0
class TestTrie(unittest.TestCase):
    def setUp(self):
        self.rt = Trie()
        self.rt.insert('cat')
        self.rt.insert('cats')
        self.rt.insert('dog')
        self.rt.insert('data')

    def test_insert(self):
        node = self.rt.root
        # insert 'cat'
        self.assertTrue('c' in node.children)
        # insert 'cats'
        self.assertTrue('c' == node.children['c'].letter)
        self.assertTrue('a' in node.children['c'].children)
        # insert 'dog'
        # insert 'data'
        self.assertTrue('d' in node.children)
        self.assertTrue('o' in node.children['d'].children)
        self.assertTrue('a' in node.children['d'].children)

    def test_contains(self):
        self.assertTrue('cat' in self.rt)
        self.assertTrue('cats' in self.rt)
        self.assertTrue('dog' in self.rt)
        self.assertTrue('data' in self.rt)
        self.assertFalse('ca' in self.rt)
        self.assertFalse('c' in self.rt)
        self.assertFalse('d' in self.rt)
        self.assertFalse('dogs' in self.rt)

    def test_has_prefixes(self):
        self.assertEquals(['cat'], self.rt.has_prefixes('cat'))
        self.assertEquals(['cat', 'cats'], self.rt.has_prefixes('cats'))
        self.assertEquals(['dog'], self.rt.has_prefixes('dog'))