def test_storing_several_fields(temp_db_file): s = MetadataStorageManager(temp_db_file.name) m1 = Metadata('field_1', 'I', 10, 0) m2 = Metadata('field_2', 'S', 10, 5) m3 = Metadata('field_3', None, 10, 10) s.store_metadata("abc", [m1, m2, m3]) assert sorted([m1, m2, m3]) == sorted(list(s.retrieve_metadata("abc")))
def test_corrupted_db_file(temp_db_file): temp_db_file.write("This is some content") temp_db_file.flush() s = MetadataStorageManager(temp_db_file.name) with pytest.raises(StoringException) as exc: s.store_metadata("abc", [Metadata('field_1', 'I', 10, 0)]) info = exc.value assert info.args[0] == "Could not store metadata into the DB. Is it corrupted?"
def perform_describe(abs_path: str, db_path: str) -> None: """ Print metadata extracted from a given source file. :param abs_path: the file the metadata was extracted from :param db_path: path to the db file. It's created if it doesn't exists. """ s = MetadataStorageManager(db_path) metadata = list(s.retrieve_metadata(abs_path)) if not metadata: print('Could not find metadata for the entered path', file=sys.stderr) sys.exit(1) pretty_print(abs_path, metadata)
def perform_crawling(abs_path: str, db_path: str) -> None: """ Extract metadata from abs_path and store it. :param abs_path: the file to extract metadata from :param db_path: path to the db file. It's created if it doesn't exists. """ s = MetadataStorageManager(db_path) if is_already_crawled(s, abs_path): print(f"File '{abs_path}' already crawled", file=sys.stderr) sys.exit(1) records = extract_metadata_from_file(abs_path) crawled_data = crawl(records) s.store_metadata(abs_path, crawled_data)
def test_non_writable_db_file(temp_db_file): # set temp_db_file non-writable os.chmod(temp_db_file.name, S_IREAD | S_IRGRP | S_IROTH) with pytest.raises(StoringException) as exc: MetadataStorageManager(temp_db_file.name) info = exc.value assert info.args[0] == "Could not create db schema. Is it a readable path?"
def test_successfully_retrieving_by_key(temp_db_file): s = MetadataStorageManager(temp_db_file.name) m1 = Metadata('field_1', 'I', 10, 0) m2 = Metadata('field_2', 'S', 10, 5) m3 = Metadata('field_3', None, 10, 10) s.store_metadata("abc", [m1]) s.store_metadata("def", [m2]) s.store_metadata("ghi", [m3]) assert [m1] == list(s.retrieve_metadata("abc")) assert [m2] == list(s.retrieve_metadata("def")) assert [m3] == list(s.retrieve_metadata("ghi"))
def is_already_crawled(storage_manager: MetadataStorageManager, file_path: str) -> bool: """ Whether a given file was already crawled :param storage_manager: an instance of the storage manager to check metadata :param file_path: a path :return: True if that file was already crawled. False otherwise. """ try: next(storage_manager.retrieve_metadata(file_path)) except StopIteration: return False else: return True
def test_successfully_storing_one_metadata(temp_db_file): s = MetadataStorageManager(temp_db_file.name) m = Metadata('field', 'I', 10, 0) s.store_metadata("abc", [m]) assert [m] == list(s.retrieve_metadata("abc"))
def test_successfully_storing_several_metadata(temp_db_file): s = MetadataStorageManager(temp_db_file.name) m = [Metadata(f'f_{idx}', 'I', 10, 0) for idx in range(1000)] s.store_metadata("abc", m) assert m == list(s.retrieve_metadata("abc"))