Пример #1
0
def test_storing_several_fields(temp_db_file):
    s = MetadataStorageManager(temp_db_file.name)
    m1 = Metadata('field_1', 'I', 10, 0)
    m2 = Metadata('field_2', 'S', 10, 5)
    m3 = Metadata('field_3', None, 10, 10)
    s.store_metadata("abc", [m1, m2, m3])

    assert sorted([m1, m2, m3]) == sorted(list(s.retrieve_metadata("abc")))
Пример #2
0
def test_corrupted_db_file(temp_db_file):
    temp_db_file.write("This is some content")
    temp_db_file.flush()
    s = MetadataStorageManager(temp_db_file.name)
    with pytest.raises(StoringException) as exc:
        s.store_metadata("abc", [Metadata('field_1', 'I', 10, 0)])

    info = exc.value
    assert info.args[0] == "Could not store metadata into the DB. Is it corrupted?"
Пример #3
0
def perform_describe(abs_path: str, db_path: str) -> None:
    """
    Print metadata extracted from a given source file.

    :param abs_path: the file the metadata was extracted from
    :param db_path: path to the db file. It's created if it doesn't exists.
    """
    s = MetadataStorageManager(db_path)
    metadata = list(s.retrieve_metadata(abs_path))
    if not metadata:
        print('Could not find metadata for the entered path', file=sys.stderr)
        sys.exit(1)
    pretty_print(abs_path, metadata)
Пример #4
0
def perform_crawling(abs_path: str, db_path: str) -> None:
    """
    Extract metadata from abs_path and store it.

    :param abs_path: the file to extract metadata from
    :param db_path: path to the db file. It's created if it doesn't exists.
    """
    s = MetadataStorageManager(db_path)
    if is_already_crawled(s, abs_path):
        print(f"File '{abs_path}' already crawled", file=sys.stderr)
        sys.exit(1)

    records = extract_metadata_from_file(abs_path)
    crawled_data = crawl(records)
    s.store_metadata(abs_path, crawled_data)
Пример #5
0
def test_non_writable_db_file(temp_db_file):
    # set temp_db_file non-writable
    os.chmod(temp_db_file.name, S_IREAD | S_IRGRP | S_IROTH)
    with pytest.raises(StoringException) as exc:
        MetadataStorageManager(temp_db_file.name)

    info = exc.value
    assert info.args[0] == "Could not create db schema. Is it a readable path?"
Пример #6
0
def test_successfully_retrieving_by_key(temp_db_file):
    s = MetadataStorageManager(temp_db_file.name)
    m1 = Metadata('field_1', 'I', 10, 0)
    m2 = Metadata('field_2', 'S', 10, 5)
    m3 = Metadata('field_3', None, 10, 10)
    s.store_metadata("abc", [m1])
    s.store_metadata("def", [m2])
    s.store_metadata("ghi", [m3])

    assert [m1] == list(s.retrieve_metadata("abc"))
    assert [m2] == list(s.retrieve_metadata("def"))
    assert [m3] == list(s.retrieve_metadata("ghi"))
Пример #7
0
def is_already_crawled(storage_manager: MetadataStorageManager,
                       file_path: str) -> bool:
    """
    Whether a given file was already crawled

    :param storage_manager: an instance of the storage manager to check metadata
    :param file_path: a path
    :return: True if that file was already crawled. False otherwise.
    """
    try:
        next(storage_manager.retrieve_metadata(file_path))
    except StopIteration:
        return False
    else:
        return True
Пример #8
0
def test_successfully_storing_one_metadata(temp_db_file):
    s = MetadataStorageManager(temp_db_file.name)
    m = Metadata('field', 'I', 10, 0)
    s.store_metadata("abc", [m])

    assert [m] == list(s.retrieve_metadata("abc"))
Пример #9
0
def test_successfully_storing_several_metadata(temp_db_file):
    s = MetadataStorageManager(temp_db_file.name)
    m = [Metadata(f'f_{idx}', 'I', 10, 0) for idx in range(1000)]
    s.store_metadata("abc", m)

    assert m == list(s.retrieve_metadata("abc"))