Пример #1
0
def test_item_properties(tmp_dir_fixture):  # NOQA
    from dtoolcore.storagebroker import DiskStorageBroker

    destination_path = os.path.join(tmp_dir_fixture, 'my_proto_dataset')
    storagebroker = DiskStorageBroker(destination_path)

    storagebroker.create_structure()

    input_file_path = os.path.join(TEST_SAMPLE_DATA, 'tiny.png')

    storagebroker.put_item(fpath=input_file_path, relpath='tiny.png')

    handles = list(storagebroker.iter_item_handles())

    handle = handles[0]

    item_properties = storagebroker.item_properties(handle)

    # Check size_in_bytes property
    assert item_properties['size_in_bytes'] == 276

    # Check timestamp property
    assert 'utc_timestamp' in item_properties

    time_from_item = datetime.datetime.fromtimestamp(float(
        item_properties['utc_timestamp']),
                                                     tz=pytz.UTC)

    time.sleep(0.1)  # Make tests more robust on Windows.
    time_delta = datetime.datetime.now(tz=pytz.UTC) - time_from_item

    assert time_delta.days == 0
    assert time_delta.seconds < 20

    # Check hash property
    from dtoolcore.filehasher import md5sum_hexdigest
    expected_hash = md5sum_hexdigest(input_file_path)

    assert item_properties['hash'] == expected_hash

    # Check relpath property
    assert item_properties['relpath'] == 'tiny.png'
Пример #2
0
def test_proto_dataset_freeze_functional(tmp_uuid_and_uri):  # NOQA

    uuid, dest_uri = tmp_uuid_and_uri

    from dtoolcore import (generate_admin_metadata, DataSet, ProtoDataSet,
                           DtoolCoreTypeError)
    from dtoolcore.utils import generate_identifier

    name = "func_test_dataset_freeze"
    admin_metadata = generate_admin_metadata(name)
    admin_metadata["uuid"] = uuid

    sample_data_path = os.path.join(TEST_SAMPLE_DATA)

    proto_dataset = ProtoDataSet(uri=dest_uri,
                                 admin_metadata=admin_metadata,
                                 config_path=None)
    proto_dataset.create()

    filenames = ['tiny.png', 'actually_a_png.txt', 'another_file.txt']
    for filename in filenames:
        local_file_path = os.path.join(sample_data_path, filename)
        proto_dataset.put_item(local_file_path, filename)
        proto_dataset.add_item_metadata(filename, 'namelen', len(filename))
        proto_dataset.add_item_metadata(filename, 'firstletter', filename[0])

    # At this point the temporary fragments should exist.
    assert _prefix_contains_something(
        proto_dataset._storage_broker,
        proto_dataset._storage_broker.fragments_key_prefix)

    proto_dataset.put_readme(content='Hello world!')

    # We shouldn't be able to load this as a DataSet
    with pytest.raises(DtoolCoreTypeError):
        DataSet.from_uri(dest_uri)

    proto_dataset.freeze()

    # Freezing removes the temporary metadata fragments.
    assert not _prefix_contains_something(
        proto_dataset._storage_broker,
        proto_dataset._storage_broker.fragments_key_prefix)

    # Now we shouln't be able to load as a ProtoDataSet
    with pytest.raises(DtoolCoreTypeError):
        ProtoDataSet.from_uri(dest_uri)

    # But we can as a DataSet
    dataset = DataSet.from_uri(dest_uri)
    assert dataset.name == 'func_test_dataset_freeze'

    # Test identifiers
    expected_identifiers = map(generate_identifier, filenames)
    assert set(dataset.identifiers) == set(expected_identifiers)

    # Test readme contents
    assert dataset.get_readme_content() == "Hello world!"

    # Test item
    expected_identifier = generate_identifier('tiny.png')
    expected_hash = md5sum_hexdigest(os.path.join(sample_data_path,
                                                  'tiny.png'))
    item_properties = dataset.item_properties(expected_identifier)
    assert item_properties['relpath'] == 'tiny.png'
    assert item_properties['size_in_bytes'] == 276
    assert item_properties['hash'] == expected_hash

    # Test accessing item
    expected_identifier = generate_identifier('another_file.txt')
    fpath = dataset.item_content_abspath(expected_identifier)

    with open(fpath) as fh:
        contents = fh.read()

    assert contents == "Hello\n"

    # Test overlays have been created properly
    namelen_overlay = dataset.get_overlay('namelen')
    expected_identifier = generate_identifier('another_file.txt')
    assert namelen_overlay[expected_identifier] == len('another_file.txt')
Пример #3
0
def test_creation_and_reading(tmp_uuid_and_uri):  # NOQA
    from dtoolcore import ProtoDataSet, generate_admin_metadata

    uuid, dest_uri = tmp_uuid_and_uri

    name = "func_test_dataset"
    admin_metadata = generate_admin_metadata(name)
    admin_metadata["uuid"] = uuid

    sample_data_path = os.path.join(TEST_SAMPLE_DATA)

    # Create a proto dataset
    proto_dataset = ProtoDataSet(uri=dest_uri,
                                 admin_metadata=admin_metadata,
                                 config_path=None)
    proto_dataset.create()
    proto_dataset.put_readme("")

    assert proto_dataset.name == "func_test_dataset"

    # Test reading from URI.
    proto_dataset = ProtoDataSet.from_uri(dest_uri)
    assert proto_dataset.name == "func_test_dataset"

    # Test get/put readme.
    assert proto_dataset.get_readme_content() == ""
    proto_dataset.put_readme("Hello world!")
    assert proto_dataset.get_readme_content() == "Hello world!"

    # Test putting a local file
    handle = "tiny.png"
    local_file_path = os.path.join(sample_data_path, 'tiny.png')
    proto_dataset.put_item(local_file_path, handle)
    assert handle in list(proto_dataset._storage_broker.iter_item_handles())

    # Test properties of that file
    expected_hash = md5sum_hexdigest(os.path.join(sample_data_path,
                                                  'tiny.png'))
    item_properties = proto_dataset._storage_broker.item_properties(handle)
    assert item_properties['relpath'] == 'tiny.png'
    assert item_properties['size_in_bytes'] == 276
    assert item_properties['hash'] == expected_hash
    assert 'utc_timestamp' in item_properties
    time_from_item = datetime.datetime.fromtimestamp(float(
        item_properties['utc_timestamp']),
                                                     tz=pytz.UTC)
    time_delta = datetime.datetime.now(tz=pytz.UTC) - time_from_item
    assert time_delta.days == 0
    assert time_delta.seconds < 100

    # Add metadata
    proto_dataset.add_item_metadata(handle, 'foo', 'bar')
    proto_dataset.add_item_metadata(handle, 'key', {
        'subkey': 'subval',
        'morekey': 'moreval'
    })

    # Test metadata retrieval
    metadata = proto_dataset._storage_broker.get_item_metadata(handle)
    assert metadata == {
        'foo': 'bar',
        'key': {
            'subkey': 'subval',
            'morekey': 'moreval'
        }
    }

    # Add another item and test manifest
    from dtoolcore import __version__
    from dtoolcore.utils import generate_identifier
    local_file_path = os.path.join(sample_data_path, 'real_text_file.txt')
    proto_dataset.put_item(local_file_path, 'real_text_file.txt')
    second_handle = 'real_text_file.txt'
    generated_manifest = proto_dataset.generate_manifest()
    assert generated_manifest['hash_function'] == 'md5sum_hexdigest'
    assert generated_manifest['dtoolcore_version'] == __version__
    expected_identifier = generate_identifier(second_handle)
    assert expected_identifier in generated_manifest['items']
    assert generated_manifest['items'][expected_identifier]['relpath'] \
        == 'real_text_file.txt'
    expected_hash = md5sum_hexdigest(local_file_path)
    assert generated_manifest['items'][expected_identifier]['hash'] \
        == expected_hash
Пример #4
0
def test_md5sum_hexdigest():
    from dtoolcore.filehasher import md5sum_hexdigest
    expected = "dc73192d2f81d7009ce5a1ee7bad5755"
    test_file = os.path.join(TEST_SAMPLE_DATA, 'tiny.png')
    actual = md5sum_hexdigest(test_file)
    assert actual == expected
def test_md5sum_hexdigest():
    from dtoolcore.filehasher import md5sum_hexdigest
    expected = "09f7e02f1290be211da707a266f153b3"
    test_file = os.path.join(TEST_SAMPLE_DATA, 'another_file.txt')
    actual = md5sum_hexdigest(test_file)
    assert actual == expected