Пример #1
0
def get_completions(args):
    tries = corpusTries[args["corpus"]]
    mode = args["trie_mode"]
    if mode in ("words", "pattern"):
        trie = tries[0]
        prefix = args["prefix"]
    else:
        trie = tries[1]
        prefix = tuple(args["prefix"].split())
    max_results = args["max_results"]
    if max_results == 0:
        max_results = None
    if mode == "pattern":
        r = [
            word for word, freq in lab.word_filter(trie, prefix)[:max_results]
        ]
    elif args["autocorrect"] and mode == "words":
        r = lab.autocorrect(trie, prefix, max_results)
    else:
        if mode == "sentences":
            r = [
                ' '.join(result)
                for result in lab.autocomplete(trie, prefix, max_results)
            ]
        else:
            r = lab.autocomplete(trie, prefix, max_results)
    return r
Пример #2
0
 def test_03_big_filter_2(self):
     patterns = ('*ing', '*ing?', '****ing', '**ing**', '????', 'mon*',
                 '*?*?*?*', '*???')
     with open(os.path.join(TEST_DIRECTORY, 'testing_data', 'frankenstein.txt'), encoding='utf-8') as f:
         text = f.read()
     w = lab.make_word_trie(text)
     for ix, i in enumerate(patterns):
         result = lab.word_filter(w, i)
         expected = read_expected('frank_filter_%s.pickle' % (ix, ))
         self.assertEqual(len(expected), len(result), msg='incorrect word_filter of '+repr(i))
         self.assertEqual(set(expected), set(result), msg='incorrect word_filter of '+repr(i))
Пример #3
0
def test_filter_big_2():
    patterns = ('*ing', '*ing?', '****ing', '**ing**', '????', 'mon*',
                '*?*?*?*', '*???')
    with open(os.path.join(TEST_DIRECTORY, 'testing_data', 'frankenstein.txt'),
              encoding='utf-8') as f:
        text = f.read()
    w = lab.make_word_trie(text)
    for ix, i in enumerate(patterns):
        result = lab.word_filter(w, i)
        expected = read_expected('frank_filter_%s.pickle' % (ix, ))
        assert len(expected) == len(result), 'incorrect word_filter of %r' % i
        assert set(expected) == set(result), 'incorrect word_filter of %r' % i
Пример #4
0
    def test_02_big_filter_1(self):
        alphabet = a = "abcdefghijklmnopqrstuvwxyz"

        word_list = ["aa" + l1 + l2 + l3 + l4 for l1 in a for l2 in a for l3 in a for l4 in a]
        word_list.extend(["apple", "application", "apple", "apricot", "apricot", "apple"])
        word_list.append("bruteforceisbad")

        trie = lab.make_word_trie(' '.join(word_list))
        for i in range(20):
            result = lab.word_filter(trie, "ap*")
            expected = [('apple', 3), ('apricot', 2), ('application', 1)]
            self.assertEqual(len(expected), len(result), msg='incorrect word_filter of ap*')
            self.assertEqual(set(expected), set(result), msg='incorrect word_filter of ap*')
Пример #5
0
def test_filter_big_1():
    alphabet = a = "abcdefghijklmnopqrstuvwxyz"

    word_list = [
        "aa" + l1 + l2 + l3 + l4 for l1 in a for l2 in a for l3 in a
        for l4 in a
    ]
    word_list.extend(
        ["apple", "application", "apple", "apricot", "apricot", "apple"])
    word_list.append("bruteforceisbad")

    trie = lab.make_word_trie(' '.join(word_list))
    for i in range(1000):
        result = lab.word_filter(trie, "ap*")
        expected = {('apple', 3), ('apricot', 2), ('application', 1)}
        assert len(expected) == len(result), 'incorrect word_filter of ap*'
        assert set(expected) == set(result), 'incorrect word_filter of ap*'
Пример #6
0
    def test_01_filter(self):
        # Filter to select all words in trie
        trie = lab.make_word_trie(
            "man mat mattress map me met a man a a a map man met")
        result = lab.word_filter(trie, '*')
        self.assertIsInstance(result, list, "result not a list.")
        result.sort()
        expect = [("a", 4), ("man", 3), ("map", 2), ("mat", 1),
                  ("mattress", 1), ("me", 1), ("met", 2)]
        self.assertEqual(result, expect, msg="incorrect result from filter.")

        # All three-letter words in trie
        result = lab.word_filter(trie, '???')
        self.assertIsInstance(result, list, "result not a list.")
        result.sort()
        expect = [("man", 3), ("map", 2), ("mat", 1), ("met", 2)]
        self.assertEqual(result, expect, msg="incorrect result from filter.")

        # Words beginning with 'mat'
        result = lab.word_filter(trie, 'mat*')
        self.assertIsInstance(result, list, "result not a list.")
        result.sort()
        expect = [("mat", 1), ("mattress", 1)]
        self.assertEqual(result, expect, msg="incorrect result from filter.")

        # Words beginning with 'm', third letter is t
        result = lab.word_filter(trie, 'm?t*')
        self.assertIsInstance(result, list, "result not a list.")
        result.sort()
        expect = [("mat", 1), ("mattress", 1), ("met", 2)]
        self.assertEqual(result, expect, msg="incorrect result from filter.")

        # Words with at least 4 letters
        result = lab.word_filter(trie, '*????')
        self.assertIsInstance(result, list, "result not a list.")
        result.sort()
        expect = [("mattress", 1)]
        self.assertEqual(result, expect, msg="incorrect result from filter.")

        # All words
        result = lab.word_filter(trie, '**')
        self.assertIsInstance(result, list, "result not a list.")
        result.sort()
        expect = [("a", 4), ("man", 3), ("map", 2), ("mat", 1),
                  ("mattress", 1), ("me", 1), ("met", 2)]
        self.assertEqual(result, expect, msg="incorrect result from filter.")
Пример #7
0
def test_filter_small():
    # Filter to select all words in trie
    trie = lab.make_word_trie(
        "man mat mattress map me met a man a a a map man met")
    result = lab.word_filter(trie, '*')
    assert isinstance(result, list)
    result.sort()
    assert result == [("a", 4), ("man", 3), ("map", 2), ("mat", 1),
                      ("mattress", 1), ("me", 1), ("met", 2)]

    # All three-letter words in trie
    result = lab.word_filter(trie, '???')
    assert isinstance(result, list)
    result.sort()
    assert result == [("man", 3), ("map", 2), ("mat", 1), ("met", 2)]

    # Words beginning with 'mat'
    result = lab.word_filter(trie, 'mat*')
    assert isinstance(result, list)
    result.sort()
    assert result == [("mat", 1), ("mattress", 1)]

    # Words beginning with 'm', third letter is t
    result = lab.word_filter(trie, 'm?t*')
    assert isinstance(result, list)
    result.sort()
    assert result == [("mat", 1), ("mattress", 1), ("met", 2)]

    # Words with at least 4 letters
    result = lab.word_filter(trie, '*????')
    assert isinstance(result, list)
    result.sort()
    assert result == [("mattress", 1)]

    # All words
    result = lab.word_filter(trie, '**')
    assert isinstance(result, list)
    result.sort()
    assert result == [("a", 4), ("man", 3), ("map", 2), ("mat", 1),
                      ("mattress", 1), ("me", 1), ("met", 2)]