def test_works_if_all_set(tmp_uuid_and_uri):  # NOQA

    from dtoolcore import ProtoDataSet, generate_admin_metadata

    bucket_name = S3_TEST_BASE_URI[5:]
    endpoint_key = "DTOOL_S3_ENDPOINT_{}".format(bucket_name)
    access_key = "DTOOL_S3_ACCESS_KEY_ID_{}".format(bucket_name)
    secret_access_key = "DTOOL_S3_SECRET_ACCESS_KEY_{}".format(bucket_name)

    uuid, dest_uri = tmp_uuid_and_uri
    name = "my_dataset"
    admin_metadata = generate_admin_metadata(name)
    admin_metadata["uuid"] = uuid

    env_vars = {
        endpoint_key: "https://s3.amazonaws.com",
        access_key: S3_TEST_ACCESS_KEY_ID,
        secret_access_key: S3_TEST_SECRET_ACCESS_KEY,
    }

    a, b, c = list(env_vars.keys())
    with tmp_env_var(a, env_vars[a]):
        with tmp_env_var(b, env_vars[b]):
            with tmp_env_var(c, env_vars[c]):
                proto_dataset = ProtoDataSet(dest_uri, admin_metadata)
                proto_dataset.create()
Пример #2
0
def test_http_enable_with_presigned_url(tmp_uuid_and_uri):  # NOQA

    uuid, dest_uri = tmp_uuid_and_uri

    from dtoolcore import ProtoDataSet, generate_admin_metadata
    from dtoolcore import DataSet

    name = "my_dataset"
    admin_metadata = generate_admin_metadata(name)
    admin_metadata["uuid"] = uuid

    sample_data_path = os.path.join(TEST_SAMPLE_DATA)
    local_file_path = os.path.join(sample_data_path, 'tiny.png')

    # Create a minimal dataset
    proto_dataset = ProtoDataSet(uri=dest_uri,
                                 admin_metadata=admin_metadata,
                                 config_path=None)
    proto_dataset.create()
    proto_dataset.put_item(local_file_path, 'tiny.png')
    proto_dataset.put_readme("---\nproject: testing\n")
    proto_dataset.freeze()

    dataset = DataSet.from_uri(dest_uri)

    # Add an annotation.
    dataset.put_annotation("project", "dtool-testing")

    # Add tags.
    dataset.put_tag("amazing")
    dataset.put_tag("stuff")

    with tmp_env_var("DTOOL_S3_PUBLISH_EXPIRY", "120"):
        access_url = dataset._storage_broker.http_enable()
    assert access_url.find("?") != -1  # This is a presigned URL dataset.

    assert access_url.startswith("https://")

    dataset_from_http = DataSet.from_uri(access_url)

    # Assert that the annotation has been copied across.
    assert dataset_from_http.get_annotation("project") == "dtool-testing"

    # Asser that the tags are available.
    assert dataset_from_http.list_tags() == ["amazing", "stuff"]

    from dtoolcore.compare import (diff_identifiers, diff_sizes, diff_content)

    assert len(diff_identifiers(dataset, dataset_from_http)) == 0
    assert len(diff_sizes(dataset, dataset_from_http)) == 0
    assert len(diff_content(dataset_from_http, dataset)) == 0

    # Make sure that all the URLs in the manifest are presigned.
    http_manifest = dataset_from_http._storage_broker.http_manifest
    assert http_manifest["manifest_url"].find("?") != -1
    assert http_manifest["readme_url"].find("?") != -1
    for url in http_manifest["item_urls"].values():
        assert url.find("?") != -1
    for url in http_manifest["annotations"].values():
        assert url.find("?") != -1
Пример #3
0
def test_writing_of_dtool_readme_file(tmp_uuid_and_uri):  # NOQA
    from dtoolcore import ProtoDataSet, generate_admin_metadata

    # Create a proto dataset.
    uuid, dest_uri = tmp_uuid_and_uri
    name = "test_dtool_readme_file"
    admin_metadata = generate_admin_metadata(name)
    admin_metadata["uuid"] = uuid
    proto_dataset = ProtoDataSet(
        uri=dest_uri,
        admin_metadata=admin_metadata,
        config_path=None
    )
    proto_dataset.create()

    # Check that the ".dtool/README.txt" file exists.
    expected_s3_key = uuid + '/README.txt'
    assert _key_exists_in_storage_broker(
        proto_dataset._storage_broker,
        expected_s3_key
    )

    actual_content = _get_unicode_from_key(
        proto_dataset._storage_broker,
        expected_s3_key
    )
    assert actual_content.startswith("README")
Пример #4
0
def test_basic_workflow(tmp_dir_fixture):  # NOQA

    from dtoolcore import ProtoDataSet, generate_admin_metadata
    from dtoolcore import DataSet
    from dtoolcore.utils import generate_identifier
    from dtoolcore.storagebroker import DiskStorageBroker

    name = "my_dataset"
    admin_metadata = generate_admin_metadata(name)
    dest_uri = DiskStorageBroker.generate_uri(
        name=name,
        uuid=admin_metadata["uuid"],
        base_uri=tmp_dir_fixture)

    sample_data_path = os.path.join(TEST_SAMPLE_DATA)
    local_file_path = os.path.join(sample_data_path, 'tiny.png')

    # Create a minimal dataset
    proto_dataset = ProtoDataSet(
        uri=dest_uri,
        admin_metadata=admin_metadata,
        config_path=None)
    proto_dataset.create()
    proto_dataset.put_item(local_file_path, 'tiny.png')

    proto_dataset.freeze()

    # Read in a dataset
    dataset = DataSet.from_uri(dest_uri)

    expected_identifier = generate_identifier('tiny.png')
    assert expected_identifier in dataset.identifiers
    assert len(dataset.identifiers) == 1
Пример #5
0
def test_basic_workflow_on_first_namespace(tmp_uuid_and_uri):  # NOQA

    uuid, dest_uri = tmp_uuid_and_uri

    from dtoolcore import ProtoDataSet, generate_admin_metadata
    from dtoolcore import DataSet
    from dtoolcore.utils import generate_identifier

    name = "my_dataset"
    admin_metadata = generate_admin_metadata(name)
    admin_metadata["uuid"] = uuid

    sample_data_path = os.path.join(TEST_SAMPLE_DATA)
    local_file_path = os.path.join(sample_data_path, 'tiny.png')

    # Create a minimal dataset
    proto_dataset = ProtoDataSet(
        uri=dest_uri,
        admin_metadata=admin_metadata,
        config_path=None)
    proto_dataset.create()
    proto_dataset.put_item(local_file_path, 'tiny.png')
    proto_dataset.freeze()

    # Read in a dataset
    dataset = DataSet.from_uri(dest_uri)

    expected_identifier = generate_identifier('tiny.png')

    assert expected_identifier in dataset.identifiers
    assert len(dataset.identifiers) == 1
Пример #6
0
def test_item_local_abspath_with_clean_cache(tmp_uuid_and_uri):  # NOQA

    from dtoolcore import ProtoDataSet, generate_admin_metadata
    from dtoolcore import DataSet
    from dtoolcore.utils import generate_identifier

    uuid, dest_uri = tmp_uuid_and_uri

    name = "my_dataset"
    admin_metadata = generate_admin_metadata(name)
    admin_metadata["uuid"] = uuid

    sample_data_path = os.path.join(TEST_SAMPLE_DATA)
    local_file_path = os.path.join(sample_data_path, 'tiny.png')

    # Create a minimal dataset
    proto_dataset = ProtoDataSet(uri=dest_uri,
                                 admin_metadata=admin_metadata,
                                 config_path=None)
    proto_dataset.create()
    proto_dataset.put_item(local_file_path, 'tiny.png')
    proto_dataset.freeze()

    identifier = generate_identifier('tiny.png')

    with tmp_directory() as cache_dir:
        with tmp_env_var("DTOOL_S3_CACHE_DIRECTORY", cache_dir):

            dataset = DataSet.from_uri(dest_uri)
            fpath = dataset.item_content_abspath(identifier)

            assert os.path.isfile(fpath)
Пример #7
0
def test_list_dataset_uris(tmp_uuid_and_uri):  # NOQA

    uuid, dest_uri = tmp_uuid_and_uri

    from dtoolcore import ProtoDataSet, generate_admin_metadata

    name = "my_dataset"
    admin_metadata = generate_admin_metadata(name)
    admin_metadata["uuid"] = uuid

    sample_data_path = os.path.join(TEST_SAMPLE_DATA)
    local_file_path = os.path.join(sample_data_path, 'tiny.png')

    # Create a minimal dataset
    proto_dataset = ProtoDataSet(
        uri=dest_uri,
        admin_metadata=admin_metadata,
        config_path=CONFIG_PATH)
    proto_dataset.create()
    proto_dataset.put_item(local_file_path, 'tiny with space.png')
    proto_dataset.freeze()

    from dtool_azure.storagebroker import AzureStorageBroker
    assert len(AzureStorageBroker.list_dataset_uris(
        dest_uri,
        CONFIG_PATH)
    ) > 0
def test_uri_property_when_using_relpath(chdir_fixture):  # NOQA

    from dtoolcore import ProtoDataSet, generate_admin_metadata
    from dtoolcore import DataSet
    from dtoolcore.storagebroker import DiskStorageBroker

    name = "my_dataset"
    admin_metadata = generate_admin_metadata(name)
    dest_uri = DiskStorageBroker.generate_uri(name=name,
                                              uuid=admin_metadata["uuid"],
                                              prefix=".")

    sample_data_path = os.path.join(TEST_SAMPLE_DATA)
    local_file_path = os.path.join(sample_data_path, 'tiny.png')

    # Create a minimal dataset
    proto_dataset = ProtoDataSet(uri=dest_uri,
                                 admin_metadata=admin_metadata,
                                 config_path=None)
    proto_dataset.create()
    proto_dataset.put_item(local_file_path, 'tiny.png')

    proto_dataset.freeze()

    dataset = DataSet.from_uri("./my_dataset")
    expected_uri = "file://" + os.path.abspath("my_dataset")
    assert dataset.uri == expected_uri
Пример #9
0
def test_annotations(tmp_uuid_and_uri):  # NOQA

    uuid, dest_uri = tmp_uuid_and_uri

    from dtoolcore import ProtoDataSet, generate_admin_metadata
    from dtoolcore import DataSet

    name = "my_dataset"
    admin_metadata = generate_admin_metadata(name)
    admin_metadata["uuid"] = uuid

    sample_data_path = os.path.join(TEST_SAMPLE_DATA)
    local_file_path = os.path.join(sample_data_path, 'tiny.png')

    # Create a minimal dataset
    proto_dataset = ProtoDataSet(uri=dest_uri,
                                 admin_metadata=admin_metadata,
                                 config_path=None)
    proto_dataset.create()
    proto_dataset.put_item(local_file_path, 'tiny.png')
    proto_dataset.freeze()

    # Read in a dataset
    dataset = DataSet.from_uri(dest_uri)

    assert dataset.list_annotation_names() == []

    dataset.put_annotation("project", "demo")
    assert dataset.get_annotation("project") == "demo"

    assert dataset.list_annotation_names() == ["project"]
Пример #10
0
    def __init__(self):
        parser = argparse.ArgumentParser()

        parser.add_argument(
            '-d',
            '--dataset',
            help='URI of input dataset'
        )
        parser.add_argument(
            '-i',
            '--identifier',
            help='Identifier (hash) to process'
        )
        parser.add_argument(
            '-o',
            '--output-dataset',
            help='URI of output dataset'
        )

        args = parser.parse_args()

        self.input_dataset = DataSet.from_uri(args.dataset)
        self.output_dataset = ProtoDataSet.from_uri(args.output_dataset)

        self.identifier = args.identifier
Пример #11
0
def test_dataset_readme_show_functional(chdir_fixture):  # NOQA
    from dtool_create.dataset import create, show, freeze
    runner = CliRunner()

    dataset_name = "my_dataset"
    result = runner.invoke(create, [dataset_name])
    assert result.exit_code == 0

    dataset_abspath = os.path.abspath(dataset_name)
    dataset_uri = "file://{}".format(dataset_abspath)

    result = runner.invoke(show, [dataset_uri])
    assert result.exit_code == 0
    assert result.output.strip() == ""

    # Update the readme content.
    proto_dataset = ProtoDataSet.from_uri(dataset_uri)
    readme_content = "hello\nworld"
    proto_dataset.put_readme(readme_content)

    result = runner.invoke(show, [dataset_uri])
    assert result.exit_code == 0
    assert result.output.strip() == readme_content

    # Make sure that the command works on a frozen dataset.
    result = runner.invoke(freeze, [dataset_uri])
    assert result.exit_code == 0

    result = runner.invoke(show, [dataset_uri])
    assert result.exit_code == 0
    assert result.output.strip() == readme_content
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--dataset-uri', help='Dataset URI')
    parser.add_argument('--identifier', help='Identifier (hash) to process')
    parser.add_argument(
        '--output-uri',
        help='Output dataset uri'
    )

    args = parser.parse_args()

    dataset = DataSet.from_uri(args.dataset_uri)
    output_dataset = ProtoDataSet.from_uri(args.output_uri)

    with temp_working_dir() as working_dir:

        outputs = segment_single_identifier(
            dataset,
            args.identifier,
            working_dir
        )

        overlays_to_copy = ['coords', 'ordering', 'useful_name']

        stage_outputs(
            outputs,
            working_dir,
            dataset,
            output_dataset,
            overlays_to_copy,
            args.identifier
        )
Пример #13
0
def test_dataset_freeze_functional(chdir_fixture):  # NOQA
    from dtool_create.dataset import create, freeze, add
    runner = CliRunner()

    dataset_name = "my_dataset"
    result = runner.invoke(create, [dataset_name])
    assert result.exit_code == 0

    # At this point we have a proto dataset
    dataset_abspath = os.path.abspath(dataset_name)
    dataset_uri = sanitise_uri(dataset_abspath)
    dataset = ProtoDataSet.from_uri(dataset_uri)

    # Create sample file to the proto dataset.
    sample_file_name = "hello.txt"
    with open(sample_file_name, "w") as fh:
        fh.write("hello world")

    # Put it into the dataset

    result = runner.invoke(add, ["item", sample_file_name, dataset_uri])
    assert result.exit_code == 0

    result = runner.invoke(freeze, [dataset_uri])
    assert result.exit_code == 0

    # Now we have a dataset.
    dataset = DataSet.from_uri(dataset_uri)

    # Manifest has been updated.
    assert len(dataset.identifiers) == 1
def test_dataset_name_functional(chdir_fixture):  # NOQA
    from dtool_create.dataset import create, name, freeze
    runner = CliRunner()

    dataset_name = "my_dataset"
    result = runner.invoke(create, [dataset_name])
    assert result.exit_code == 0

    dataset_abspath = os.path.abspath(dataset_name)
    dataset_uri = sanitise_uri(dataset_abspath)

    # Test that the proto dataset has been created.
    dataset = ProtoDataSet.from_uri(dataset_uri)

    # Test that the dataset name is correct.
    assert dataset.name == dataset_name

    result = runner.invoke(name, [dataset_uri])
    assert result.exit_code == 0
    assert result.output.strip() == "my_dataset"

    result = runner.invoke(name, [dataset_uri, "new_name"])
    assert result.exit_code == 0
    assert result.output.strip() == "new_name"

    result = runner.invoke(freeze, [dataset_uri])

    result = runner.invoke(name, [dataset_uri])
    assert result.exit_code == 0
    assert result.output.strip() == "new_name"

    result = runner.invoke(name, [dataset_uri, "frozen_ds_now_allowed"])
    assert result.output.strip() == "frozen_ds_now_allowed"
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--dataset-uri')
    parser.add_argument('--resource-uri')
    parser.add_argument('--identifier')
    parser.add_argument('--output-uri')

    args = parser.parse_args()

    dataset = DataSet.from_uri(args.dataset_uri)
    resource_dataset = DataSet.from_uri(args.resource_uri)
    output_dataset = ProtoDataSet.from_uri(args.output_uri)

    with temp_working_dir() as working_dir:
        outputs = separate_plots(
            dataset,
            args.identifier,
            resource_dataset,
            working_dir
        )

        overlays_to_copy = ['ordering', 'date']

        stage_outputs(
            outputs,
            working_dir,
            dataset,
            output_dataset,
            overlays_to_copy,
            args.identifier
        )
def test_fails_if_any_endpoint_is_missing(tmp_uuid_and_uri):  # NOQA

    from dtoolcore import ProtoDataSet, generate_admin_metadata

    bucket_name = S3_TEST_BASE_URI[5:]
    endpoint_key = "DTOOL_S3_ENDPOINT_{}".format(bucket_name)
    access_key = "DTOOL_S3_ACCESS_KEY_ID_{}".format(bucket_name)
    secret_access_key = "DTOOL_S3_SECRET_ACCESS_KEY_{}".format(bucket_name)

    uuid, dest_uri = tmp_uuid_and_uri
    name = "my_dataset"
    admin_metadata = generate_admin_metadata(name)
    admin_metadata["uuid"] = uuid

    env_vars = {
        endpoint_key: "https://s3.amazonaws.com",
        access_key: S3_TEST_ACCESS_KEY_ID,
        secret_access_key: S3_TEST_SECRET_ACCESS_KEY,
    }

    from itertools import combinations
    for a, b in combinations(env_vars.keys(), 2):
        with tmp_env_var(a, env_vars[a]):
            with tmp_env_var(b, env_vars[b]):
                with pytest.raises(RuntimeError):
                    ProtoDataSet(dest_uri, admin_metadata)
    def __init__(self):
        parser = argparse.ArgumentParser()

        parser.add_argument(
            '-d',
            '--dataset-uri',
            help='URI of input dataset'
        )
        parser.add_argument(
            '-i',
            '--identifier',
            help='Identifier to process'
        )
        parser.add_argument(
            '-o',
            '--output-dataset-uri',
            help='URI of output dataset'
        )

        args = parser.parse_args()

        self.input_dataset = DataSet.from_uri(args.dataset_uri)
        self.output_dataset = ProtoDataSet.from_uri(args.output_dataset_uri)

        self.identifier = args.identifier
Пример #18
0
def test_writing_of_dtool_structure_file(tmp_uuid_and_uri):  # NOQA
    from dtoolcore import ProtoDataSet, generate_admin_metadata
    from dtool_ecs import __version__

    # Create a proto dataset.
    uuid, dest_uri = tmp_uuid_and_uri
    name = "test_dtool_structure_file"
    admin_metadata = generate_admin_metadata(name)
    admin_metadata["uuid"] = uuid
    proto_dataset = ProtoDataSet(
        uri=dest_uri,
        admin_metadata=admin_metadata,
        config_path=None
    )
    proto_dataset.create()

    # Check that the ".dtool/structure.json" file exists.
    expected_s3_key = uuid + '/structure.json'
    assert _key_exists_in_storage_broker(
        proto_dataset._storage_broker,
        expected_s3_key
    )

    expected_content = {
        "dataset_registration_key": "dtool-{}".format(uuid),
        "data_key_infix": "data",
        "fragment_key_infix": "fragments",
        "annotations_key_infix": "annotations",
        "overlays_key_infix": "overlays",
        "structure_key_suffix": "structure.json",
        "dtool_readme_key_suffix": "README.txt",
        "dataset_readme_key_suffix": "README.yml",
        "manifest_key_suffix": "manifest.json",
        "admin_metadata_key_suffix": "dtool",
        "http_manifest_key": "http_manifest.json",
        "storage_broker_version": __version__,
    }
    actual_content = _get_data_structure_from_key(
        proto_dataset._storage_broker,
        expected_s3_key
    )
    assert expected_content == actual_content
Пример #19
0
def test_tags_functional(tmp_uuid_and_uri):  # NOQA

    uuid, dest_uri = tmp_uuid_and_uri

    from dtoolcore import ProtoDataSet, generate_admin_metadata
    from dtoolcore import DataSet

    name = "my_dataset"
    admin_metadata = generate_admin_metadata(name)
    admin_metadata["uuid"] = uuid

    # Create a minimal dataset
    proto_dataset = ProtoDataSet(
        uri=dest_uri,
        admin_metadata=admin_metadata,
        config_path=None)
    proto_dataset.create()

    # Test put_tag on proto dataset.
    proto_dataset.put_tag("testing")

    proto_dataset.freeze()

    dataset = DataSet.from_uri(proto_dataset.uri)
    assert dataset.list_tags() == ["testing"]

    dataset.put_tag("amazing")
    dataset.put_tag("stuff")
    assert dataset.list_tags() == ["amazing", "stuff", "testing"]

    dataset.delete_tag("stuff")
    assert dataset.list_tags() == ["amazing", "testing"]

    # Putting the same tag is idempotent.
    dataset.put_tag("amazing")
    dataset.put_tag("amazing")
    dataset.put_tag("amazing")
    assert dataset.list_tags() == ["amazing", "testing"]

    # Tags can only be strings.
    from dtoolcore import DtoolCoreValueError
    with pytest.raises(DtoolCoreValueError):
        dataset.put_tag(1)

    # Tags need to adhere to the utils.name_is_valid() rules.
    from dtoolcore import DtoolCoreInvalidNameError
    with pytest.raises(DtoolCoreInvalidNameError):
        dataset.put_tag("!invalid")

    # Deleting a non exiting tag does not raise. It silently succeeds.
    dataset.delete_tag("dontexist")
Пример #20
0
def test_http_enable(tmp_uuid_and_uri):  # NOQA

    uuid, dest_uri = tmp_uuid_and_uri

    from dtoolcore import ProtoDataSet, generate_admin_metadata
    from dtoolcore import DataSet

    name = "my_dataset"
    admin_metadata = generate_admin_metadata(name)
    admin_metadata["uuid"] = uuid

    sample_data_path = os.path.join(TEST_SAMPLE_DATA)
    local_file_path = os.path.join(sample_data_path, 'tiny.png')

    # Create a minimal dataset
    proto_dataset = ProtoDataSet(
        uri=dest_uri,
        admin_metadata=admin_metadata,
        config_path=None)
    proto_dataset.create()
    proto_dataset.put_item(local_file_path, 'tiny.png')
    proto_dataset.put_readme("---\nproject: testing\n")
    proto_dataset.freeze()

    dataset = DataSet.from_uri(dest_uri)

    access_url = dataset._storage_broker.http_enable()

    assert access_url.startswith("https://")

    dataset_from_http = DataSet.from_uri(access_url)

    from dtoolcore.compare import (
        diff_identifiers,
        diff_sizes,
        diff_content
    )

    assert len(diff_identifiers(dataset, dataset_from_http)) == 0
    assert len(diff_sizes(dataset, dataset_from_http)) == 0
    assert len(diff_content(dataset_from_http, dataset)) == 0
Пример #21
0
def test_basic_workflow_with_nested_handle(tmp_dir_fixture):  # NOQA

    from dtoolcore import ProtoDataSet, generate_admin_metadata
    from dtoolcore import DataSet
    from dtoolcore.utils import generate_identifier
    from dtoolcore.storagebroker import DiskStorageBroker

    name = "my_dataset"
    admin_metadata = generate_admin_metadata(name)
    dest_uri = DiskStorageBroker.generate_uri(
        name=name,
        uuid=admin_metadata["uuid"],
        base_uri=tmp_dir_fixture)

    sample_data_path = os.path.join(TEST_SAMPLE_DATA)
    local_file_path = os.path.join(sample_data_path, 'tiny.png')
    handle = "subdir/tiny.png"

    # Create a minimal dataset
    proto_dataset = ProtoDataSet(
        uri=dest_uri,
        admin_metadata=admin_metadata,
        config_path=None)
    proto_dataset.create()
    proto_dataset.put_item(local_file_path, handle)

    proto_dataset.freeze()

    # Read in a dataset
    dataset = DataSet.from_uri(dest_uri)

    expected_identifier = generate_identifier(handle)
    assert expected_identifier in dataset.identifiers
    assert len(dataset.identifiers) == 1

    # Ensure that the file exists in the disk dataset.
    # Particularly on Windows.
    item_abspath = os.path.join(
        tmp_dir_fixture,
        name,
        "data",
        "subdir",
        "tiny.png"
    )
    assert os.path.isfile(item_abspath)
    assert os.path.isfile(dataset.item_content_abspath(expected_identifier))

    # Ensure that the correct abspath is returned.
    # Particularly on Windows.
    assert dataset.item_content_abspath(expected_identifier) == item_abspath  # NOQA
def test_writing_of_dtool_structure_file(tmp_uuid_and_uri):  # NOQA
    from dtoolcore import ProtoDataSet, generate_admin_metadata
    from dtool_smb import __version__

    # Create a proto dataset.
    uuid, dest_uri = tmp_uuid_and_uri
    name = "test_dtool_structure_file"
    admin_metadata = generate_admin_metadata(name)
    admin_metadata["uuid"] = uuid
    proto_dataset = ProtoDataSet(uri=dest_uri,
                                 admin_metadata=admin_metadata,
                                 config_path=None)
    proto_dataset.create()

    # Check that the "_dtool/structure.json" file exists.
    expected_smb_key = '_dtool/structure.json'
    assert _key_exists_in_storage_broker(proto_dataset._storage_broker,
                                         expected_smb_key)

    expected_content = {
        "data_directory": ["data"],
        "dataset_readme_relpath": ["README.yml"],
        "dtool_directory": ["_dtool"],
        "admin_metadata_relpath": ["_dtool", "dtool"],
        "structure_metadata_relpath": ["_dtool", "structure.json"],
        "dtool_readme_relpath": ["_dtool", "README.txt"],
        "manifest_relpath": ["_dtool", "manifest.json"],
        "overlays_directory": ["_dtool", "overlays"],
        "annotations_directory": ["_dtool", "annotations"],
        "tags_directory": ["_dtool", "tags"],
        "metadata_fragments_directory": ["_dtool", "tmp_fragments"],
        "storage_broker_version": __version__,
    }

    actual_content = _get_data_structure_from_key(
        proto_dataset._storage_broker, expected_smb_key)
    print(actual_content)
    assert expected_content == actual_content
Пример #23
0
def test_update_readme(tmp_uuid_and_uri):  # NOQA

    uuid, dest_uri = tmp_uuid_and_uri

    from dtoolcore import ProtoDataSet, generate_admin_metadata
    from dtoolcore import DataSet

    name = "my_dataset"
    admin_metadata = generate_admin_metadata(name)
    admin_metadata["uuid"] = uuid

    # Create a minimal dataset
    proto_dataset = ProtoDataSet(
        uri=dest_uri,
        admin_metadata=admin_metadata,
        config_path=None)
    proto_dataset.create()
    proto_dataset.put_readme("First")
    proto_dataset.put_readme("Hello world")
    proto_dataset.freeze()

    # Read in a dataset
    dataset = DataSet.from_uri(dest_uri)

    assert len(dataset._storage_broker._list_historical_readme_keys()) == 0

    dataset.put_readme("Updated")

    assert len(dataset._storage_broker._list_historical_readme_keys()) == 1

    key = dataset._storage_broker._list_historical_readme_keys()[0]
    content = dataset._storage_broker.get_text(key)
    assert content == 'Hello world'

    time.sleep(0.1)

    dataset.put_readme('Updated again')
    assert dataset.get_readme_content() == 'Updated again'
Пример #24
0
def test_writing_of_dtool_structure_file(tmp_uuid_and_uri):  # NOQA
    from dtoolcore import ProtoDataSet, generate_admin_metadata
    from dtool_azure import __version__

    # Create a proto dataset.
    uuid, dest_uri = tmp_uuid_and_uri
    name = "test_dtool_structure_file"
    admin_metadata = generate_admin_metadata(name)
    admin_metadata["uuid"] = uuid
    proto_dataset = ProtoDataSet(uri=dest_uri,
                                 admin_metadata=admin_metadata,
                                 config_path=None)
    proto_dataset.create()

    # Check that the ".dtool/structure.json" file exists.
    expected_azure_key = 'structure.json'
    assert _key_exists_in_storage_broker(proto_dataset._storage_broker,
                                         expected_azure_key)

    expected_content = {
        'http_manifest_key': 'http_manifest.json',
        'fragments_key_prefix': 'fragments/',
        'overlays_key_prefix': 'overlays/',
        'structure_dict_key': 'structure.json',
        'annotations_key_prefix': 'annotations/',
        'tags_key_prefix': 'tags/',
        'admin_metadata_key': 'dtool',
        'storage_broker_version': __version__,
        'dtool_readme_key': 'README.txt',
        'manifest_key': 'manifest.json',
        'dataset_readme_key': 'README.yml'
    }

    actual_content = _get_data_structure_from_key(
        proto_dataset._storage_broker, expected_azure_key)
    print(actual_content)
    assert expected_content == actual_content
Пример #25
0
def test_http_manifest(tmp_uuid_and_uri):  # NOQA

    uuid, dest_uri = tmp_uuid_and_uri

    from dtoolcore import ProtoDataSet, generate_admin_metadata
    from dtoolcore import DataSet

    name = "my_dataset"
    admin_metadata = generate_admin_metadata(name)
    admin_metadata["uuid"] = uuid

    sample_data_path = os.path.join(TEST_SAMPLE_DATA)
    local_file_path = os.path.join(sample_data_path, 'tiny.png')

    # Create a minimal dataset
    proto_dataset = ProtoDataSet(uri=dest_uri,
                                 admin_metadata=admin_metadata,
                                 config_path=None)
    proto_dataset.create()
    proto_dataset.put_item(local_file_path, 'tiny.png')
    proto_dataset.put_readme("---\nproject: testing\n")
    proto_dataset.freeze()

    dataset = DataSet.from_uri(dest_uri)

    # Test HTTP manifest.
    http_manifest = dataset._storage_broker._generate_http_manifest(
        expiry=None)  # NOQA
    assert "admin_metadata" in http_manifest
    assert http_manifest["admin_metadata"] == dataset._admin_metadata
    assert "overlays" in http_manifest
    assert "readme_url" in http_manifest
    assert "manifest_url" in http_manifest
    assert "item_urls" in http_manifest
    assert "annotations" in http_manifest
    assert "tags" in http_manifest
    assert set(http_manifest["item_urls"].keys()) == set(dataset.identifiers)
def test_fails_if_only_endpoint_is_set(tmp_uuid_and_uri):  # NOQA

    from dtoolcore import ProtoDataSet, generate_admin_metadata

    bucket_name = S3_TEST_BASE_URI[5:]
    endpoint_key = "DTOOL_S3_ENDPOINT_{}".format(bucket_name)

    uuid, dest_uri = tmp_uuid_and_uri
    name = "my_dataset"
    admin_metadata = generate_admin_metadata(name)
    admin_metadata["uuid"] = uuid

    with tmp_env_var(endpoint_key, "https://s3.amazonaws.com"):
        with pytest.raises(RuntimeError):
            ProtoDataSet(dest_uri, admin_metadata)
def test_dataset_create_functional(chdir_fixture):  # NOQA
    from dtool_create.dataset import create
    runner = CliRunner()

    dataset_name = "my_dataset"
    result = runner.invoke(create, [dataset_name])
    assert result.exit_code == 0

    # Test that the proto dataset has been created.
    dataset_abspath = os.path.abspath(dataset_name)
    dataset_uri = sanitise_uri(dataset_abspath)
    dataset = ProtoDataSet.from_uri(dataset_uri)

    # Test that the dataset name is correct.
    assert dataset.name == dataset_name
Пример #28
0
def test_copy_and_diff(tmp_uuid_and_uri):  # NOQA

    uuid, dest_uri = tmp_uuid_and_uri

    import dtoolcore
    from dtoolcore import ProtoDataSet, generate_admin_metadata
    from dtoolcore import DataSet
    from dtoolcore.compare import (
        diff_identifiers,
        diff_sizes,
        diff_content,
    )

    name = "my_dataset"
    admin_metadata = generate_admin_metadata(name)
    admin_metadata["uuid"] = uuid

    sample_data_path = os.path.join(TEST_SAMPLE_DATA)
    local_file_path = os.path.join(sample_data_path, 'tiny.png')

    # Create a minimal dataset
    proto_dataset = ProtoDataSet(uri=dest_uri, admin_metadata=admin_metadata)
    proto_dataset.create()
    proto_dataset.put_readme(content='---\ndescription: test')
    proto_dataset.put_item(local_file_path, 'tiny.png')
    proto_dataset.freeze()

    remote_dataset = DataSet.from_uri(dest_uri)

    with tmp_directory() as local_dir:
        local_uri = dtoolcore.copy(dest_uri, local_dir)
        assert local_uri.startswith("file:/")
        local_dataset = DataSet.from_uri(local_uri)
        assert len(diff_identifiers(local_dataset, remote_dataset)) == 0
        assert len(diff_sizes(local_dataset, remote_dataset)) == 0
        assert len(diff_content(local_dataset, remote_dataset)) == 0
def test_dataset_create_can_work_outside_current_directory(
        tmp_dir_fixture):  # NOQA
    from dtool_create.dataset import create
    runner = CliRunner()

    dataset_name = "my_dataset"
    dataset_path = os.path.join(tmp_dir_fixture, dataset_name)
    result = runner.invoke(create, [dataset_name, tmp_dir_fixture])
    assert result.exit_code == 0

    # Test that the dataset has been created.
    dataset_uri = sanitise_uri(dataset_path)
    dataset = ProtoDataSet.from_uri(dataset_uri)

    # Test that the dataset name is correct.
    assert dataset.name == dataset_name
Пример #30
0
def test_list_overlays_when_dir_missing(chdir_fixture):  # NOQA
    """
    This test simulates checking out a frozen dataset from Git that has no
    overlays written to it, i.e. where the ``.dtool/overlays`` directory is
    missing.

    See also:
    https://github.com/jic-dtool/dtoolcore/issues/3
    """

    from dtoolcore import ProtoDataSet, generate_admin_metadata
    from dtoolcore import DataSet
    from dtoolcore.storagebroker import DiskStorageBroker

    name = "my_dataset"
    admin_metadata = generate_admin_metadata(name)
    dest_uri = DiskStorageBroker.generate_uri(name=name,
                                              uuid=admin_metadata["uuid"],
                                              base_uri="file://.")

    sample_data_path = os.path.join(TEST_SAMPLE_DATA)
    local_file_path = os.path.join(sample_data_path, 'tiny.png')

    # Create a minimal dataset
    proto_dataset = ProtoDataSet(uri=dest_uri,
                                 admin_metadata=admin_metadata,
                                 config_path=None)
    proto_dataset.create()
    proto_dataset.put_item(local_file_path, 'tiny.png')

    proto_dataset.freeze()

    # Simulate the missing overlay directory.
    assert os.path.isdir(proto_dataset._storage_broker._overlays_abspath)
    os.rmdir(proto_dataset._storage_broker._overlays_abspath)
    assert not os.path.isdir(proto_dataset._storage_broker._overlays_abspath)

    dataset = DataSet.from_uri(proto_dataset.uri)

    # This call caused the bug.
    overlay_names = dataset.list_overlay_names()
    assert overlay_names == []
def test_uri_property_when_using_relpath(chdir_fixture):  # NOQA

    from dtoolcore import ProtoDataSet, generate_admin_metadata
    from dtoolcore import DataSet
    from dtoolcore.storagebroker import DiskStorageBroker
    from dtoolcore.utils import (IS_WINDOWS, windows_to_unix_path, urlparse)

    name = "my_dataset"
    admin_metadata = generate_admin_metadata(name)
    dest_uri = DiskStorageBroker.generate_uri(name=name,
                                              uuid=admin_metadata["uuid"],
                                              base_uri=".")

    sample_data_path = os.path.join(TEST_SAMPLE_DATA)
    local_file_path = os.path.join(sample_data_path, 'tiny.png')

    # Create a minimal dataset
    proto_dataset = ProtoDataSet(uri=dest_uri,
                                 admin_metadata=admin_metadata,
                                 config_path=None)
    proto_dataset.create()
    proto_dataset.put_item(local_file_path, 'tiny.png')

    proto_dataset.freeze()

    dataset = DataSet.from_uri("my_dataset")

    abspath = os.path.abspath("my_dataset")
    if IS_WINDOWS:
        abspath = windows_to_unix_path(abspath)
    assert dataset.uri.startswith("file://")
    assert dataset.uri.endswith(abspath)

    parsed = urlparse(dataset.uri)
    if IS_WINDOWS:
        assert parsed.netloc == ""
    else:
        assert parsed.netloc != ""
def test_defect_imeta_fails_if_key_already_exists_regression(
        tmp_uuid_and_uri):  # NOQA

    uuid, dest_uri = tmp_uuid_and_uri

    from dtoolcore import ProtoDataSet, generate_admin_metadata

    name = "my_dataset"
    admin_metadata = generate_admin_metadata(name)
    admin_metadata["uuid"] = uuid

    sample_data_path = os.path.join(TEST_SAMPLE_DATA)
    local_file_path = os.path.join(sample_data_path, 'tiny.png')

    # Create a minimal dataset
    proto_dataset = ProtoDataSet(uri=dest_uri,
                                 admin_metadata=admin_metadata,
                                 config_path=None)
    proto_dataset.create()
    proto_dataset.put_item(local_file_path, 'tiny.png')

    # When using ``imeta add`` the below raises SystemExit.
    proto_dataset.put_item(local_file_path, 'tiny.png')
Пример #33
0
def test_proto_dataset_freeze_functional(tmp_uuid_and_uri):  # NOQA

    uuid, dest_uri = tmp_uuid_and_uri

    from dtoolcore import (generate_admin_metadata, DataSet, ProtoDataSet,
                           DtoolCoreTypeError)
    from dtoolcore.utils import generate_identifier

    name = "func_test_dataset_freeze"
    admin_metadata = generate_admin_metadata(name)
    admin_metadata["uuid"] = uuid

    sample_data_path = os.path.join(TEST_SAMPLE_DATA)

    proto_dataset = ProtoDataSet(uri=dest_uri,
                                 admin_metadata=admin_metadata,
                                 config_path=None)
    proto_dataset.create()

    filenames = ['tiny.png', 'actually_a_png.txt', 'another_file.txt']
    for filename in filenames:
        local_file_path = os.path.join(sample_data_path, filename)
        proto_dataset.put_item(local_file_path, filename)
        proto_dataset.add_item_metadata(filename, 'namelen', len(filename))
        proto_dataset.add_item_metadata(filename, 'firstletter', filename[0])

    # At this point the temporary fragments should exist.
    assert _prefix_contains_something(
        proto_dataset._storage_broker,
        proto_dataset._storage_broker.fragments_key_prefix)

    proto_dataset.put_readme(content='Hello world!')

    # We shouldn't be able to load this as a DataSet
    with pytest.raises(DtoolCoreTypeError):
        DataSet.from_uri(dest_uri)

    proto_dataset.freeze()

    # Freezing removes the temporary metadata fragments.
    assert not _prefix_contains_something(
        proto_dataset._storage_broker,
        proto_dataset._storage_broker.fragments_key_prefix)

    # Now we shouln't be able to load as a ProtoDataSet
    with pytest.raises(DtoolCoreTypeError):
        ProtoDataSet.from_uri(dest_uri)

    # But we can as a DataSet
    dataset = DataSet.from_uri(dest_uri)
    assert dataset.name == 'func_test_dataset_freeze'

    # Test identifiers
    expected_identifiers = map(generate_identifier, filenames)
    assert set(dataset.identifiers) == set(expected_identifiers)

    # Test readme contents
    assert dataset.get_readme_content() == "Hello world!"

    # Test item
    expected_identifier = generate_identifier('tiny.png')
    expected_hash = md5sum_hexdigest(os.path.join(sample_data_path,
                                                  'tiny.png'))
    item_properties = dataset.item_properties(expected_identifier)
    assert item_properties['relpath'] == 'tiny.png'
    assert item_properties['size_in_bytes'] == 276
    assert item_properties['hash'] == expected_hash

    # Test accessing item
    expected_identifier = generate_identifier('another_file.txt')
    fpath = dataset.item_content_abspath(expected_identifier)

    with open(fpath) as fh:
        contents = fh.read()

    assert contents == "Hello\n"

    # Test overlays have been created properly
    namelen_overlay = dataset.get_overlay('namelen')
    expected_identifier = generate_identifier('another_file.txt')
    assert namelen_overlay[expected_identifier] == len('another_file.txt')