def sample_sentence_autocomplete(data: Dict) -> List[Tuple[str, float]]:
    """A sample run of the sentence autocomplete engine.

    data['file'] - file location
    data['autocompleter'] - 'simple' or 'compressed'
    data['weight_type'] - 'sum' or 'average'
    data['search'] - search input
    data['limit'] - autocomplete limit
    """
    engine = SentenceAutocompleteEngine({
        'file': data['file'],
        'autocompleter': data['autocompleter'],
        'weight_type': data['weight_type']
    })
    return engine.autocomplete(data['search'], data['limit'])
Пример #2
0
def test_sentence_autocompleter() -> None:
    """Basic test for SentenceAutocompleteEngine.

    Note that this relies on a new data file that you'll need to download from
    the course website. That file consists of just a few lines, but there are
    three important details to catch:

        1. You should use the second entry of each csv file as the weight of
           the sentence. This entry can be a float! (Don't assume it's an int.)
        2. The file contains two sentences that are sanitized to the same
           string, and so this value is inserted twice. This means its weight
           is the *sum* of the weights from each of the two lines in the file.
        3. Numbers *are allowed* in the strings (this is true for both types
           of text-based autocomplete engines). Don't remove them!
    """
    engine = SentenceAutocompleteEngine({
        'file': 'data/sample_sentences.csv',
        'autocompleter': 'simple',
        'weight_type': 'average'
    })

    # Check simple autocompletion and sanitization
    results = engine.autocomplete('what a')
    assert len(results) == 1
    assert results[0][0] == 'what a wonderful world'
    assert results[0][1] == 1.0

    # Check that numbers are allowed in the sentences
    results = engine.autocomplete('numbers')
    assert len(results) == 1
    assert results[0][0] == 'numbers are 0k4y'

    # Check that one sentence can be inserted twice
    results = engine.autocomplete('a')
    assert len(results) == 1
    assert results[0][0] == 'a star is born'
    assert results[0][1] == 15.0 + 6.5
Пример #3
0
def test_engine() -> None:


    engine = SentenceAutocompleteEngine({
        'file': 'data/google_searches.csv',
        'autocompleter': 'simple',
        'weight_type': 'sum'
    })

    engine2 = SentenceAutocompleteEngine({
        'file': 'data/google_searches.csv',
        'autocompleter': 'simple',
        'weight_type': 'sum'
    })

    x = engine.autocomplete('why', 20)
    y = engine2.autocomplete('why', 20)

    assert x == y
def test_sample_sentence_autocomplete() -> None:
    """Tests
        1. CompressPrefixTree properties of autocompleter
            - compressibility check
            - subtrees non increasing order check
            - subtree weight check
        2. SimplePrefixTree properties of autocompleter
            - len(subtree.value) == len(spt.value) + 1 (when subtree is List)
            - subtrees non increasing order check
            - subtree weight check
        3. Test autocompleter properties
            - num_leaves == total_inputs - duplicate inputs
            - len(output) == limit
            - output weight is non-increasing
            - check leaves.weight == number of times it was inputted"""
    autocompleters = ['simple', 'compressed']
    files = ['data/google_searches.csv']
    weight_types = ['sum', 'average']
    google_searches = ['how', 'why', 'when', 'who', 'what']
    limits = [None] + random.sample(range(1, 200), 50)

    for file in files:
        for autocompleter in autocompleters:
            for weight_type in weight_types:
                # create engine
                engine = SentenceAutocompleteEngine({
                    'file': file,
                    'autocompleter': autocompleter,
                    'weight_type': weight_type
                })

                if autocompleter == 'simple':
                    assert scheck_subtrees_non_increasing_order(
                        engine.autocompleter)
                    assert scheck_subtrees_value(engine.autocompleter)
                    assert stree_weight_check(engine.autocompleter,
                                              weight_type)
                else:
                    # autocomplete == 'compressed'
                    assert check_subtrees_non_increasing_order(
                        engine.autocompleter)
                    assert check_subtrees_compressibility(engine.autocompleter)
                    assert tree_weight_check(engine.autocompleter, weight_type)

                duplicates = num_duplicate_inputs(
                    file, 'SentenceAutocompleteEngine')
                total_duplicates = 0
                for duplicate in duplicates[0]:
                    total_duplicates += duplicates[0][duplicate]
                assert engine.autocompleter._num_leaves == duplicates[
                    1] - total_duplicates

                for search in google_searches:
                    for limit in limits:
                        output = engine.autocomplete(search, limit)
                        if limit is not None:
                            assert len(output) <= limit
                        assert autocomplete_non_increasing_order(output)

                # weights of duplicate entries must be greater than 1
                for duplicate in duplicates[0]:
                    output = engine.autocomplete(duplicate, 20)
                    for val in output:
                        if val == duplicate:
                            assert val[1] >= duplicates[0][duplicate]