def test_query_from_loaded2(mock_file): """ Check if we can query some non-existed words. """ my_index = InvertedIndex() my_index.load('test.index') assert my_index.query(['me', 'test', 'non existed']) == set() assert len(mock_file.mock_calls) > 1
def test_load_index(mock_file): """ Check if we can load index. """ my_index = InvertedIndex() my_index.load('my_Test.index') assert my_index.index_data == {'me': [1], 'test': [1, 2], 'you': [2]} assert len(mock_file.mock_calls) > 1
def test_dump_load_small_index(): index = InvertedIndex() index.inverted_index = _SMALL_INDEX small_index_path = 'small.index' with open(small_index_path, 'wb') as fd: index.dump(fd) with open(small_index_path, 'rb') as fd: index.load(fd) assert index.inverted_index == _SMALL_INDEX, 'dumped and loaded index not the same'
def test_query_from_loaded(mock_file): """ Check if we can query after we load the index. """ my_index = InvertedIndex() my_index.load('test.index') assert my_index.query(['me', 'test']) == { 1, } assert len(mock_file.mock_calls) > 1
def test_check_compression_good(): json_inverted_index = InvertedIndex.load( "inverted_index/inverted_json.index", JsonStoragePolicy()) compressed_inverted_index = InvertedIndex.load( "inverted_index/inverted_json_zip.index", JsonZipStoragePolicy()) assert json_inverted_index.query(["two", "words" ]) == compressed_inverted_index.query([ "two", "words" ]), "compressin give another answer" assert json_inverted_index.get_size( ) == compressed_inverted_index.get_size( ), "compressed file has diff num of records"
def test_init_empty_inverted_index_do_not_raise_exception(): word_to_docs_mapping = {} inverted_index = InvertedIndex(word_to_docs_mapping) storage_policy = JsonStoragePolicy() inverted_index.dump(INDEX_TMP_PATH, storage_policy=storage_policy) loaded_inverted_index = inverted_index.load(INDEX_TMP_PATH, storage_policy=storage_policy) assert word_to_docs_mapping == word_to_docs_mapping storage_policy = PickleStoragePolicy() inverted_index.dump(INDEX_TMP_PATH, storage_policy=storage_policy) loaded_inverted_index = inverted_index.load(INDEX_TMP_PATH, storage_policy=storage_policy) assert word_to_docs_mapping == word_to_docs_mapping
def test_dump_and_load_index(tmp_path, tiny_sample_document): dir = tmp_path / "tiny_example_dir" dir.mkdir() index_file = dir / "tiny_example.index" docs = tiny_sample_document inv_table = build_inverted_index(docs) inv_table.dump(index_file) assert inv_table == TINY_SAMPLE_INV_TABLE loaded_inv_table = InvertedIndex.load(index_file) assert inv_table == loaded_inv_table
def test_can_dump_and_load_inverted_index(tmpdir, small_dataset_index): index_fio = tmpdir.join('inverted.index') small_dataset_index.dump(index_fio) load_inverted_index = InvertedIndex.load(index_fio) assert small_dataset_index == load_inverted_index, ( "load should return the same inverted index" ) assert {} != load_inverted_index, ( "load should return the same inverted index" )
def test_binary_dump_and_load_index(tmp_path, tiny_sample_document, words=['of', 'words']): dir_ = tmp_path / "tiny_example_dir" dir_.mkdir() index_file = dir_ / "tiny_example.bin.index" docs = tiny_sample_document inv_table = build_inverted_index(docs) inv_table.dump(index_file, storage_policy='binary') assert inv_table == TINY_SAMPLE_INV_TABLE loaded_inv_table = InvertedIndex.load(index_file, storage_policy='binary') assert inv_table == loaded_inv_table # test query callback Args = namedtuple('Args', ['index_path', 'words']) args = Args(index_path=index_file, words=words) response = query_callback(args) ethalon_response = [ {14, 1000}, ] assert response == ethalon_response
def load_inverted_index(): return InvertedIndex.load('inv_index.dat')
def test_can_dump_and_load_inverted_index(tmpdir, wikipedia_inverted_index): index_fio = tmpdir.join("index.dump") wikipedia_inverted_index.dump(index_fio) loaded_inverted_index = InvertedIndex.load(index_fio) assert wikipedia_inverted_index == loaded_inverted_index, ( "load should return the same inverted index")