Пример #1
0
def test_can_raise_without_storage_policy(tmpdir, filepath=DATASET_SMALL_FILEPATH):
    index_fio = tmpdir.join("index.dump")
    documents = load_documents(filepath)
    expected_inverted_index = build_inverted_index(documents)
    expected_inverted_index.dump(index_fio, storage_policy=None)
    with pytest.raises(AttributeError):
        InvertedIndex.load(index_fio, storage_policy=3)
Пример #2
0
def test_can_dump_and_load_inverted_index_with_array_policy_parametrized(filepath, tmpdir):
    index_fio = tmpdir.join("index.dump")
    documents = load_documents(filepath)
    expected_inverted_index = build_inverted_index(documents)
    expected_inverted_index.dump(index_fio, storage_policy=StoragePolicy)
    loaded_inverted_index = InvertedIndex.load(index_fio, storage_policy=StoragePolicy)
    assert expected_inverted_index == loaded_inverted_index, (
        "load should return the same inverted index"
    )
Пример #3
0
def arguments(small_wikipedia_inverted_index, tmpdir, filepath=DATASET_SMALL_FILEPATH):
    index_fio = tmpdir.join("index.dump")
    documents = load_documents(filepath)
    expected_inverted_index = build_inverted_index(documents)
    expected_inverted_index.dump(index_fio, storage_policy=StoragePolicy)

    class Arguments:
        def __init__(self):
            self.dataset = DATASET_SMALL_FILEPATH
            self.index = index_fio
            self.output = index_fio
            self.query = [["a", "b"]]

    return Arguments()
Пример #4
0
def small_wikipedia_inverted_index(small_sample_wikipedia_documents):
    wikipedia_inverted_index = build_inverted_index(small_sample_wikipedia_documents)
    return wikipedia_inverted_index
Пример #5
0
def wikipedia_inverted_index(wikipedia_documents):
    wikipedia_inverted_index = build_inverted_index(wikipedia_documents)
    return wikipedia_inverted_index
Пример #6
0
def test_can_build_and_query_inverted_index(wikipedia_documents):
    wikipedia_inverted_index = build_inverted_index(wikipedia_documents)
    doc_ids = wikipedia_inverted_index.query(["wikipedia"])
    assert isinstance(doc_ids, list), (
        "inverted index query should return list"
    )
Пример #7
0
def test_query_inverted_index_intersect_results(dataset_tiny_fio, query, expected_res):
    documents = load_documents(dataset_tiny_fio)
    tiny_inverted_index = build_inverted_index(documents)
    res = tiny_inverted_index.query(query)
    assert sorted(res) == sorted(expected_res)
Пример #8
0
def test_can_load_without_storage_policy(tmpdir, filepath=DATASET_SMALL_FILEPATH):
    index_fio = tmpdir.join("index.dump")
    documents = load_documents(filepath)
    expected_inverted_index = build_inverted_index(documents)
    expected_inverted_index.dump(index_fio, storage_policy=None)
    InvertedIndex.load(index_fio, storage_policy=None)