Exemplo n.º 1
0
def test_can_run_load_documents(tmpdir, dataset, length):
    datapath = tmpdir.join("dataset.txt")
    datapath.write(dataset)
    docs = load_documents(datapath)
    assert length == len(
        docs
    ), f"Wrong loaded length with file {datapath}, expected {length}, got {len(docs)}"
Exemplo n.º 2
0
def test_inverted_index_can_dump_load_index(tmpdir):
    datapath = tmpdir.join("dataset.txt")
    datapath.write(DATASET_SMALL_STR)
    documents = load_documents(datapath)
    inverted_index = build_inverted_index(documents)
    tmp_fio = tmpdir.join('test.dump')
    inverted_index.dump(StructStoragePolicy(), tmp_fio)
    loaded_index = inverted_index.load(StructStoragePolicy(), tmp_fio)
    assert inverted_index._index == loaded_index._index, "File was not created"
Exemplo n.º 3
0
def test_load_documents_do_correct(tiny_dataset_fio):
    documents = load_documents(tiny_dataset_fio)
    etalon_document = {
        123: "Show must go on!",
        321: "Still loving you",
        547: "The Adventures of Rain Dance Maggie",
        645: "The House of Rising Son",
        789: "A A B"
    }
    assert documents == etalon_document, "Dataset loaded incorrectly"
Exemplo n.º 4
0
def test_callback_query_list(tiny_dataset_fio, tmpdir, capsys, query, answer):
    documents = load_documents(tiny_dataset_fio)
    inverted_index = build_inverted_index(documents)
    tmp_fio = tmpdir.join('test.dump')
    inverted_index.dump(StructStoragePolicy(), tmp_fio)
    arguments = Namespace(query=query, index_path=tmp_fio)
    callback_query(arguments)
    captured = capsys.readouterr()
    if captured.out == "\n":
        query_ans = set()
    else:
        query_ans = set(int(var) for var in captured.out.rstrip().split(","))
    assert query_ans == answer, "Wrong answer"
Exemplo n.º 5
0
def test_callback_query_utf8(tiny_dataset_fio, tmpdir, capsys, query, answer):
    documents = load_documents(tiny_dataset_fio)
    inverted_index = build_inverted_index(documents)
    tmp_fio = tmpdir.join('test.dump')
    inverted_index.dump(StructStoragePolicy(), tmp_fio)
    tmp_file = tmpdir.join('test.utf8')
    with open(tmp_file, "w", encoding="utf8") as file:
        file.write(" ".join(query) + "\n")
    arguments = Namespace(query=[],
                          query_file=open(tmp_file, "r", encoding="utf8"),
                          index_path=tmp_fio)
    callback_query(arguments)
    captured = capsys.readouterr()
    if captured.out == "\n":
        query_ans = set()
    else:
        query_ans = set(int(var) for var in captured.out.rstrip().split(","))
    assert query_ans == answer, "Wrong answer"
Exemplo n.º 6
0
def test_can_run_build_index(tmpdir, dataset):
    datapath = tmpdir.join("dataset.txt")
    datapath.write(dataset)
    documents = load_documents(datapath)
    build_inverted_index(documents)
Exemplo n.º 7
0
def test_error_load_documents_wrong_filepath():
    with pytest.raises(FileNotFoundError):
        load_documents("impresed")
Exemplo n.º 8
0
def test_inverted_index_can_dump_index(tiny_dataset_fio, tmpdir):
    documents = load_documents(tiny_dataset_fio)
    inverted_index = build_inverted_index(documents)
    tmp_fio = tmpdir.join('test.dump')
    inverted_index.dump(StructStoragePolicy(), tmp_fio)
    assert os.path.isfile(tmp_fio), "File was not created"
Exemplo n.º 9
0
def test_query_return_right_answer(tiny_dataset_fio, query, answer):
    documents = load_documents(tiny_dataset_fio)
    inverted_index = build_inverted_index(documents)
    assert sorted(answer) == sorted(inverted_index.query(query))
Exemplo n.º 10
0
def test_can_build_index_do_correct(tiny_dataset_fio):
    documents = load_documents(tiny_dataset_fio)
    inverted_index = build_inverted_index(documents)
    assert inverted_index._index == ETALON_TINY_INDEX, "Wrong build for inverted index"