示例#1
0
def test_inv_index_query(load_inverted_index, wiki_docs):
    words = ['after', 'were']
    doc_ind = InvertedIndex.query(load_inverted_index, words)
    assert {25, 290}.issubset(doc_ind)
    words = ['neizvesnie', 'slova']
    doc_ind = InvertedIndex.query(load_inverted_index, words)
    assert len(doc_ind) == 0
    words = ['after', 'were']
    index_dict = build_inverted_index(wiki_docs)
    inv_index = InvertedIndex(index_dict)
    doc_ind = inv_index.query(words)
    assert {25, 290}.issubset(doc_ind)
def test_query_2_intersect_words(words=['bow', 'tfidf']):
    inv_idx = InvertedIndex(TEST_INDEX_TABLE)
    doc_ids = inv_idx.query(words)
    right_answer = {
        4,
    }
    assert doc_ids == right_answer
示例#3
0
def test_query2(mock_file):
    """ Check if we can query some correct words with repeats. """

    my_index = InvertedIndex()
    my_index.build(mock_file)

    assert my_index.query(['string', 'long']) == {1, 10, 100500}
示例#4
0
def test_query_from_loaded2(mock_file):
    """ Check if we can query some non-existed words. """

    my_index = InvertedIndex()
    my_index.load('test.index')

    assert my_index.query(['me', 'test', 'non existed']) == set()
    assert len(mock_file.mock_calls) > 1
def test_query_one_doc_in_index():
    index = InvertedIndex()
    index.inverted_index = defaultdict(set, {
        'foo': {1, 2, 3},
        'bar': {1},
        'foobar': {1, 2}
    })
    assert index.query(['foo', 'bar'
                        ]) == {1}, 'didnt find a doc, which present in index'
示例#6
0
def test_query(mock_file):
    """ Check if we can query some correct words. """

    my_index = InvertedIndex()
    my_index.build(mock_file)

    assert my_index.query(['test']) == {
        1,
    }
def test_unicode_query_two_docs_in_index():
    index = InvertedIndex()
    index.inverted_index = defaultdict(set, {
        'один': {1, 2, 3},
        'bar': {1},
        'два': {1, 2}
    })
    assert index.query(['один', 'два']) == {
        1, 2
    }, 'didnt find a two docs, which are present in index with unicode'
示例#8
0
def test_query_from_loaded(mock_file):
    """ Check if we can query after we load the index. """

    my_index = InvertedIndex()
    my_index.load('test.index')

    assert my_index.query(['me', 'test']) == {
        1,
    }
    assert len(mock_file.mock_calls) > 1
def test_unseen_word(word='fasttext'):
    inv_idx = InvertedIndex(TEST_INDEX_TABLE)
    doc_ids = inv_idx.query([word])
    right_answer = None
    assert doc_ids is right_answer
def test_query_2_words_without_shared_docs(words=['bow', 'cbow']):
    inv_idx = InvertedIndex(TEST_INDEX_TABLE)
    doc_ids = inv_idx.query(words)
    right_answer = None
    assert doc_ids is right_answer
def test_query_single_word(word='bow'):
    inv_idx = InvertedIndex(TEST_INDEX_TABLE)
    doc_ids = inv_idx.query([word])
    right_answer = TEST_INDEX_TABLE[word]
    assert doc_ids == right_answer
def test_query_not_in_index():
    index = InvertedIndex()
    index.inverted_index = defaultdict(set, {'foo': {1, 2, 3}, 'bar': {1}})
    assert index.query(['foobar']) == set(), 'find a doc, that not in index'