def find_compound_words(words): """ trie + BFS + pruning Advantages of trie: 1. Predictable O(k) lookup time where k is the size of the key. 2. We can easily get all prefixes of a given word. Drawbacks of tries: 1. Space-consuming, it is a trade-off between time-complexity and space\ complexity. We can use radix-tree to get optimized space, but in \ practice, it doesn't have a reasonable improvement and it takes more\ time than trie. """ compound_words = set([]) trie = Trie() queue = collections.deque() prefixes_dict = {} for word in words: prefixes = trie.has_prefixes(word) for prefix in prefixes: queue.append((word, word[len(prefix) :])) trie.insert(word) while queue: word, suffix = queue.popleft() # pruning if word in compound_words: continue # find a compund word if suffix in trie: compound_words.add(word) else: prefixes = trie.has_prefixes(suffix) for prefix in prefixes: queue.append((word, suffix[len(prefix) :])) return compound_words
class TestTrie(unittest.TestCase): def setUp(self): self.rt = Trie() self.rt.insert('cat') self.rt.insert('cats') self.rt.insert('dog') self.rt.insert('data') def test_insert(self): node = self.rt.root # insert 'cat' self.assertTrue('c' in node.children) # insert 'cats' self.assertTrue('c' == node.children['c'].letter) self.assertTrue('a' in node.children['c'].children) # insert 'dog' # insert 'data' self.assertTrue('d' in node.children) self.assertTrue('o' in node.children['d'].children) self.assertTrue('a' in node.children['d'].children) def test_contains(self): self.assertTrue('cat' in self.rt) self.assertTrue('cats' in self.rt) self.assertTrue('dog' in self.rt) self.assertTrue('data' in self.rt) self.assertFalse('ca' in self.rt) self.assertFalse('c' in self.rt) self.assertFalse('d' in self.rt) self.assertFalse('dogs' in self.rt) def test_has_prefixes(self): self.assertEquals(['cat'], self.rt.has_prefixes('cat')) self.assertEquals(['cat', 'cats'], self.rt.has_prefixes('cats')) self.assertEquals(['dog'], self.rt.has_prefixes('dog'))