示例#1
0
def main(input_uri, output_prefix, output_storage):
    StorageBroker = dtoolcore._get_storage_broker(input_uri, None)
    parsed_uri = urlparse(input_uri)

    for uri in StorageBroker.list_dataset_uris(parsed_uri.path, None):
        dataset = dtoolcore.DataSet.from_uri(uri)
        name = dataset.name

        start = time.time()
        dtoolcore.copy(uri, output_prefix, output_storage)
        elapsed = time.time() - start
        print("Copying {} took: {}s".format(name, elapsed))
示例#2
0
def copy(quiet, dataset_uri, dest_location_uri):
    """Copy a dataset to a different location."""
    src_dataset = dtoolcore.DataSet.from_uri(dataset_uri)

    # Generate the destination URI.
    parsed_location_uri = urlparse(dest_location_uri)
    prefix = parsed_location_uri.path
    storage = parsed_location_uri.scheme
    if storage == "":
        storage = "file"

    dest_uri = dtoolcore._generate_uri(
        admin_metadata=src_dataset._admin_metadata,
        prefix=prefix,
        storage=storage)

    # Check if the destination URI is already a dataset
    # and exit gracefully if true.
    if dtoolcore._is_dataset(dest_uri, config_path=CONFIG_PATH):
        raise click.UsageError("Dataset already exists: {}".format(dest_uri))

    # If the destination URI is a "file" dataset one needs to check if
    # the path already exists and exit gracefully if true.
    parsed_dataset_uri = urlparse(dest_uri)
    if storage == "file":
        if os.path.exists(parsed_dataset_uri.path):
            raise click.UsageError("Path already exists: {}".format(
                parsed_dataset_uri.path))

    # Finally do the copy
    if quiet:
        dest_uri = dtoolcore.copy(src_uri=dataset_uri,
                                  prefix=prefix,
                                  storage=storage,
                                  config_path=CONFIG_PATH)
        click.secho(dest_uri)
    else:
        num_items = len(list(src_dataset.identifiers))
        with click.progressbar(length=num_items * 2,
                               label="Copying dataset") as progressbar:
            dest_uri = dtoolcore.copy(src_uri=dataset_uri,
                                      prefix=prefix,
                                      storage=storage,
                                      config_path=CONFIG_PATH,
                                      progressbar=progressbar)

        click.secho("Dataset copied to:\n{}".format(dest_uri))
示例#3
0
def test_copy(tmp_uri_fixture):  # NOQA

    import dtoolcore

    src_dir = os.path.join(uri_to_path(tmp_uri_fixture), "src")
    dest_dir = os.path.join(uri_to_path(tmp_uri_fixture), "dest")
    for directory in [src_dir, dest_dir]:
        os.mkdir(directory)

    # Create the src dataset to be copied.
    admin_metadata = dtoolcore.generate_admin_metadata("test_copy")
    proto_dataset = dtoolcore.generate_proto_dataset(
        admin_metadata=admin_metadata, base_uri=tmp_uri_fixture + "/src")
    proto_dataset.create()
    src_uri = proto_dataset.uri

    proto_dataset.put_readme("---\nproject: exciting\n")

    overlay = "file_extension"
    for fname in os.listdir(TEST_SAMPLE_DATA):
        _, ext = os.path.splitext(fname)
        item_fpath = os.path.join(TEST_SAMPLE_DATA, fname)
        proto_dataset.put_item(item_fpath, fname)
        proto_dataset.add_item_metadata(fname, overlay, ext)

    proto_dataset.freeze()

    # Copy the src dataset to dest.
    dest_uri = dtoolcore.copy(src_uri, tmp_uri_fixture + "/dest")

    # Compare the two datasets.
    src_ds = dtoolcore.DataSet.from_uri(src_uri)
    dest_ds = dtoolcore.DataSet.from_uri(dest_uri)

    for key, value in src_ds._admin_metadata.items():
        if key == "frozen_at":
            tolerance = 2  # seconds (number chosen arbitrarily)
            assert dest_ds._admin_metadata[key] >= value
            assert dest_ds._admin_metadata[key] < value + tolerance
        else:
            assert dest_ds._admin_metadata[key] == value

    assert src_ds.identifiers == dest_ds.identifiers
    for i in src_ds.identifiers:
        src_item_props = src_ds.item_properties(i)
        dest_item_props = dest_ds.item_properties(i)
        for key, value in src_item_props.items():
            if key == "utc_timestamp":
                tolerance = 2  # seconds (number chosen arbitrarily)
                assert dest_item_props[key] >= value
                assert dest_item_props[key] < value + tolerance
            else:
                assert dest_item_props[key] == value

    assert src_ds.get_readme_content() == dest_ds.get_readme_content()

    assert src_ds.list_overlay_names() == dest_ds.list_overlay_names()
    assert src_ds.get_overlay(overlay) == dest_ds.get_overlay(overlay)
示例#4
0
def test_frozen_at_value_when_copying_dataset(tmp_dir_fixture):  # NOQA

    with dc.DataSetCreator("delete-me", tmp_dir_fixture) as ds_creator:
        src_uri = ds_creator.uri

    dest_base_uri = os.path.join(tmp_dir_fixture, "dest")
    os.mkdir(dest_base_uri)

    src_dataset = dc.DataSet.from_uri(src_uri)
    src_frozen_at = src_dataset._admin_metadata["frozen_at"]

    time.sleep(0.1)

    dest_uri = dc.copy(src_uri, dest_base_uri)
    dest_dataset = dc.DataSet.from_uri(dest_uri)
    dest_frozen_at = dest_dataset._admin_metadata["frozen_at"]

    assert src_frozen_at == dest_frozen_at
def test_copy(tmp_dir_fixture):  # NOQA

    import dtoolcore

    src_dir = os.path.join(tmp_dir_fixture, "src")
    dest_dir = os.path.join(tmp_dir_fixture, "dest")
    for directory in [src_dir, dest_dir]:
        os.mkdir(directory)

    # Create the src dataset to be copied.
    admin_metadata = dtoolcore.generate_admin_metadata("test_copy")
    proto_dataset = dtoolcore.generate_proto_dataset(
        admin_metadata=admin_metadata, prefix=src_dir, storage="file")
    proto_dataset.create()
    src_uri = proto_dataset.uri

    proto_dataset.put_readme("---\nproject: exciting\n")

    overlay = "file_extension"
    for fname in os.listdir(TEST_SAMPLE_DATA):
        _, ext = os.path.splitext(fname)
        item_fpath = os.path.join(TEST_SAMPLE_DATA, fname)
        proto_dataset.put_item(item_fpath, fname)
        proto_dataset.add_item_metadata(fname, overlay, ext)

    proto_dataset.freeze()

    # Copy the src dataset to dest.
    dest_uri = dtoolcore.copy(src_uri, dest_dir, "file")

    # Compare the two datasets.
    src_ds = dtoolcore.DataSet.from_uri(src_uri)
    dest_ds = dtoolcore.DataSet.from_uri(dest_uri)

    assert src_ds._admin_metadata == dest_ds._admin_metadata

    assert src_ds.identifiers == dest_ds.identifiers
    for i in src_ds.identifiers:
        assert src_ds.item_properties(i) == dest_ds.item_properties(i)

    assert src_ds.get_readme_content() == dest_ds.get_readme_content()

    assert src_ds.list_overlay_names() == dest_ds.list_overlay_names()
    assert src_ds.get_overlay(overlay) == dest_ds.get_overlay(overlay)
示例#6
0
def test_dataset_verify_functional(tmp_dir_fixture):  # NOQA

    from dtool_info.dataset import verify

    uri = dtoolcore.copy(lion_dataset_uri, tmp_dir_fixture, "file")
    dataset = dtoolcore.DataSet.from_uri(uri)

    runner = CliRunner()

    result = runner.invoke(verify, [uri])
    assert result.exit_code == 0
    assert result.output.startswith("All good")

    extra_fpath = os.path.join(dataset._storage_broker._data_abspath,
                               "extra.txt")
    with open(extra_fpath, "w") as fh:
        fh.write("extra")

    result = runner.invoke(verify, [uri])
    assert result.exit_code == 1
    assert result.output.startswith("Unknown item: ")

    os.unlink(extra_fpath)

    item_fpath = os.path.join(dataset._storage_broker._data_abspath,
                              "file.txt")
    os.unlink(item_fpath)

    result = runner.invoke(verify, [uri])
    assert result.exit_code == 1
    assert result.output.startswith("Missing item: ")

    with open(item_fpath, "w") as fh:
        fh.write("Different content")

    result = runner.invoke(verify, [uri])
    assert result.exit_code == 1
    assert result.output.startswith("Altered item size: ")

    result = runner.invoke(verify, ["--full", uri])
    assert result.exit_code == 1
    assert result.output.startswith("Altered item size: ")
    assert result.output.find("Altered item hash: ") != -1
示例#7
0
def test_copy_and_diff(tmp_uuid_and_uri):  # NOQA

    uuid, dest_uri = tmp_uuid_and_uri

    import dtoolcore
    from dtoolcore import ProtoDataSet, generate_admin_metadata
    from dtoolcore import DataSet
    from dtoolcore.compare import (
        diff_identifiers,
        diff_sizes,
        diff_content,
    )

    name = "my_dataset"
    admin_metadata = generate_admin_metadata(name)
    admin_metadata["uuid"] = uuid

    sample_data_path = os.path.join(TEST_SAMPLE_DATA)
    local_file_path = os.path.join(sample_data_path, 'tiny.png')

    # Create a minimal dataset
    proto_dataset = ProtoDataSet(uri=dest_uri, admin_metadata=admin_metadata)
    proto_dataset.create()
    proto_dataset.put_readme(content='---\ndescription: test')
    proto_dataset.put_item(local_file_path, 'tiny.png')
    proto_dataset.freeze()

    remote_dataset = DataSet.from_uri(dest_uri)

    with tmp_directory() as local_dir:
        local_uri = dtoolcore.copy(dest_uri, local_dir)
        assert local_uri.startswith("file:/")
        local_dataset = DataSet.from_uri(local_uri)
        assert len(diff_identifiers(local_dataset, remote_dataset)) == 0
        assert len(diff_sizes(local_dataset, remote_dataset)) == 0
        assert len(diff_content(local_dataset, remote_dataset)) == 0
def test_annotation_functional(tmp_dir_fixture):  # NOQA

    from dtoolcore import (
        DataSet,
        ProtoDataSet,
        DtoolCoreKeyError,
        DtoolCoreInvalidNameError,
        generate_admin_metadata,
        copy,
    )

    from dtoolcore.storagebroker import DiskStorageBroker

    name = "my_dataset"
    admin_metadata = generate_admin_metadata(name)
    dest_uri = DiskStorageBroker.generate_uri(name=name,
                                              uuid=admin_metadata["uuid"],
                                              base_uri=tmp_dir_fixture)

    sample_data_path = os.path.join(TEST_SAMPLE_DATA)
    local_file_path = os.path.join(sample_data_path, 'tiny.png')

    # Create a minimal dataset.
    proto_dataset = ProtoDataSet(uri=dest_uri,
                                 admin_metadata=admin_metadata,
                                 config_path=None)
    proto_dataset.create()
    proto_dataset.put_item(local_file_path, 'tiny.png')

    # Test working on annotations with a ProtoDataset.
    with pytest.raises(DtoolCoreKeyError):
        proto_dataset.get_annotation(annotation_name="project")

    proto_dataset.put_annotation(annotation_name="project",
                                 annotation="world-peace")
    assert proto_dataset.get_annotation("project") == "world-peace"

    proto_dataset.put_annotation("project", "food-sustainability")
    assert proto_dataset.get_annotation("project") == "food-sustainability"

    assert proto_dataset.list_annotation_names() == ["project"]

    # Freeze the dataset
    proto_dataset.put_readme("")
    proto_dataset.freeze()

    # Test working on annotations with a frozen DataSet.
    dataset = DataSet.from_uri(dest_uri)
    with pytest.raises(DtoolCoreKeyError):
        dataset.get_annotation(annotation_name="stars")

    dataset.put_annotation(annotation_name="stars", annotation=0)
    assert dataset.get_annotation("stars") == 0

    dataset.put_annotation("stars", 5)
    assert dataset.get_annotation("stars") == 5

    assert dataset.list_annotation_names() == ["project", "stars"]

    # Test invalid keys, no spaces allowed.
    invalid_keys = ["with space", "with,comma", "with/slash", "X" * 81]
    for invalid_key in invalid_keys:
        with pytest.raises(DtoolCoreInvalidNameError):
            dataset.put_annotation(invalid_key, "bad")

    # Test invalid keys, name too long.
    with pytest.raises(DtoolCoreInvalidNameError):
        dataset.put_annotation("x" * 81, "bad")

    # Test copy.
    copy_dataset_directory = os.path.join(tmp_dir_fixture, "copy")
    os.mkdir(copy_dataset_directory)
    dest_uri = dataset.base_uri + "/copy"
    copy_uri = copy(dataset.uri, dest_uri)

    copy_dataset = DataSet.from_uri(copy_uri)
    assert copy_dataset.list_annotation_names() == ["project", "stars"]
    assert copy_dataset.get_annotation("stars") == 5
    assert copy_dataset.get_annotation("project") == "food-sustainability"
def test_overlays_functional(tmp_dir_fixture):  # NOQA

    from dtoolcore import (
        DataSet,
        ProtoDataSet,
        DtoolCoreKeyError,
        DtoolCoreTypeError,
        DtoolCoreValueError,
        DtoolCoreInvalidNameError,
        generate_admin_metadata,
        copy,
    )

    from dtoolcore.utils import generate_identifier
    from dtoolcore.storagebroker import DiskStorageBroker

    name = "my_dataset"
    admin_metadata = generate_admin_metadata(name)
    dest_uri = DiskStorageBroker.generate_uri(name=name,
                                              uuid=admin_metadata["uuid"],
                                              base_uri=tmp_dir_fixture)

    sample_data_path = os.path.join(TEST_SAMPLE_DATA)
    local_file_path = os.path.join(sample_data_path, 'tiny.png')

    # Create a minimal dataset.
    proto_dataset = ProtoDataSet(uri=dest_uri,
                                 admin_metadata=admin_metadata,
                                 config_path=None)
    proto_dataset.create()
    proto_dataset.put_item(local_file_path, 'tiny.png')

    # Freeze the dataset
    proto_dataset.put_readme("")
    proto_dataset.freeze()

    # Load the dataset.
    dataset = DataSet.from_uri(proto_dataset.uri)

    # The overlay has not been added yet.
    with pytest.raises(DtoolCoreKeyError):
        dataset.get_overlay("is_png")

    # Create overlay content.
    expected_identifier = generate_identifier('tiny.png')
    is_png_overlay = {expected_identifier: True}

    with pytest.raises(DtoolCoreTypeError):
        dataset.put_overlay("is_png", "not_a_dict")

    incorrect_identifier_overlay = {"incorrect": True}
    with pytest.raises(DtoolCoreValueError):
        dataset.put_overlay("is_png", incorrect_identifier_overlay)

    invalid_keys = ["with space", "with,comma", "with/slash", "X" * 81]
    for invalid_key in invalid_keys:
        with pytest.raises(DtoolCoreInvalidNameError):
            dataset.put_overlay(invalid_key, is_png_overlay)

    dataset.put_overlay("is_png", is_png_overlay)
    assert dataset.get_overlay("is_png") == is_png_overlay

    # Test copy.
    copy_dataset_directory = os.path.join(tmp_dir_fixture, "copy")
    os.mkdir(copy_dataset_directory)
    dest_uri = dataset.base_uri + "/copy"
    copy_uri = copy(dataset.uri, dest_uri)

    copy_dataset = DataSet.from_uri(copy_uri)
    assert copy_dataset.list_overlay_names() == ["is_png"]
    assert copy_dataset.get_overlay("is_png") == is_png_overlay