def get_completions(args): tries = corpusTries[args["corpus"]] mode = args["trie_mode"] if mode in ("words", "pattern"): trie = tries[0] prefix = args["prefix"] else: trie = tries[1] prefix = tuple(args["prefix"].split()) max_results = args["max_results"] if max_results == 0: max_results = None if mode == "pattern": r = [ word for word, freq in lab.word_filter(trie, prefix)[:max_results] ] elif args["autocorrect"] and mode == "words": r = lab.autocorrect(trie, prefix, max_results) else: if mode == "sentences": r = [ ' '.join(result) for result in lab.autocomplete(trie, prefix, max_results) ] else: r = lab.autocomplete(trie, prefix, max_results) return r
def test_03_big_filter_2(self): patterns = ('*ing', '*ing?', '****ing', '**ing**', '????', 'mon*', '*?*?*?*', '*???') with open(os.path.join(TEST_DIRECTORY, 'testing_data', 'frankenstein.txt'), encoding='utf-8') as f: text = f.read() w = lab.make_word_trie(text) for ix, i in enumerate(patterns): result = lab.word_filter(w, i) expected = read_expected('frank_filter_%s.pickle' % (ix, )) self.assertEqual(len(expected), len(result), msg='incorrect word_filter of '+repr(i)) self.assertEqual(set(expected), set(result), msg='incorrect word_filter of '+repr(i))
def test_filter_big_2(): patterns = ('*ing', '*ing?', '****ing', '**ing**', '????', 'mon*', '*?*?*?*', '*???') with open(os.path.join(TEST_DIRECTORY, 'testing_data', 'frankenstein.txt'), encoding='utf-8') as f: text = f.read() w = lab.make_word_trie(text) for ix, i in enumerate(patterns): result = lab.word_filter(w, i) expected = read_expected('frank_filter_%s.pickle' % (ix, )) assert len(expected) == len(result), 'incorrect word_filter of %r' % i assert set(expected) == set(result), 'incorrect word_filter of %r' % i
def test_02_big_filter_1(self): alphabet = a = "abcdefghijklmnopqrstuvwxyz" word_list = ["aa" + l1 + l2 + l3 + l4 for l1 in a for l2 in a for l3 in a for l4 in a] word_list.extend(["apple", "application", "apple", "apricot", "apricot", "apple"]) word_list.append("bruteforceisbad") trie = lab.make_word_trie(' '.join(word_list)) for i in range(20): result = lab.word_filter(trie, "ap*") expected = [('apple', 3), ('apricot', 2), ('application', 1)] self.assertEqual(len(expected), len(result), msg='incorrect word_filter of ap*') self.assertEqual(set(expected), set(result), msg='incorrect word_filter of ap*')
def test_filter_big_1(): alphabet = a = "abcdefghijklmnopqrstuvwxyz" word_list = [ "aa" + l1 + l2 + l3 + l4 for l1 in a for l2 in a for l3 in a for l4 in a ] word_list.extend( ["apple", "application", "apple", "apricot", "apricot", "apple"]) word_list.append("bruteforceisbad") trie = lab.make_word_trie(' '.join(word_list)) for i in range(1000): result = lab.word_filter(trie, "ap*") expected = {('apple', 3), ('apricot', 2), ('application', 1)} assert len(expected) == len(result), 'incorrect word_filter of ap*' assert set(expected) == set(result), 'incorrect word_filter of ap*'
def test_01_filter(self): # Filter to select all words in trie trie = lab.make_word_trie( "man mat mattress map me met a man a a a map man met") result = lab.word_filter(trie, '*') self.assertIsInstance(result, list, "result not a list.") result.sort() expect = [("a", 4), ("man", 3), ("map", 2), ("mat", 1), ("mattress", 1), ("me", 1), ("met", 2)] self.assertEqual(result, expect, msg="incorrect result from filter.") # All three-letter words in trie result = lab.word_filter(trie, '???') self.assertIsInstance(result, list, "result not a list.") result.sort() expect = [("man", 3), ("map", 2), ("mat", 1), ("met", 2)] self.assertEqual(result, expect, msg="incorrect result from filter.") # Words beginning with 'mat' result = lab.word_filter(trie, 'mat*') self.assertIsInstance(result, list, "result not a list.") result.sort() expect = [("mat", 1), ("mattress", 1)] self.assertEqual(result, expect, msg="incorrect result from filter.") # Words beginning with 'm', third letter is t result = lab.word_filter(trie, 'm?t*') self.assertIsInstance(result, list, "result not a list.") result.sort() expect = [("mat", 1), ("mattress", 1), ("met", 2)] self.assertEqual(result, expect, msg="incorrect result from filter.") # Words with at least 4 letters result = lab.word_filter(trie, '*????') self.assertIsInstance(result, list, "result not a list.") result.sort() expect = [("mattress", 1)] self.assertEqual(result, expect, msg="incorrect result from filter.") # All words result = lab.word_filter(trie, '**') self.assertIsInstance(result, list, "result not a list.") result.sort() expect = [("a", 4), ("man", 3), ("map", 2), ("mat", 1), ("mattress", 1), ("me", 1), ("met", 2)] self.assertEqual(result, expect, msg="incorrect result from filter.")
def test_filter_small(): # Filter to select all words in trie trie = lab.make_word_trie( "man mat mattress map me met a man a a a map man met") result = lab.word_filter(trie, '*') assert isinstance(result, list) result.sort() assert result == [("a", 4), ("man", 3), ("map", 2), ("mat", 1), ("mattress", 1), ("me", 1), ("met", 2)] # All three-letter words in trie result = lab.word_filter(trie, '???') assert isinstance(result, list) result.sort() assert result == [("man", 3), ("map", 2), ("mat", 1), ("met", 2)] # Words beginning with 'mat' result = lab.word_filter(trie, 'mat*') assert isinstance(result, list) result.sort() assert result == [("mat", 1), ("mattress", 1)] # Words beginning with 'm', third letter is t result = lab.word_filter(trie, 'm?t*') assert isinstance(result, list) result.sort() assert result == [("mat", 1), ("mattress", 1), ("met", 2)] # Words with at least 4 letters result = lab.word_filter(trie, '*????') assert isinstance(result, list) result.sort() assert result == [("mattress", 1)] # All words result = lab.word_filter(trie, '**') assert isinstance(result, list) result.sort() assert result == [("a", 4), ("man", 3), ("map", 2), ("mat", 1), ("mattress", 1), ("me", 1), ("met", 2)]