def test_functional(tmp_dir_fixture, monkeypatch):  # NOQA
    from smarttoolbase import SmartTool

    input_admin_metadata = dtoolcore.generate_admin_metadata(
        "my_input_ds", "testing_bot")
    input_dataset = dtoolcore.generate_proto_dataset(
        admin_metadata=input_admin_metadata,
        prefix=tmp_dir_fixture,
        storage="file")
    input_dataset.create()
    input_dataset.put_readme("")
    input_dataset.freeze()

    output_admin_metadata = dtoolcore.generate_admin_metadata(
        "my_output_ds", "testing_bot")
    output_dataset = dtoolcore.generate_proto_dataset(
        admin_metadata=output_admin_metadata,
        prefix=tmp_dir_fixture,
        storage="file")
    output_dataset.create()
    output_dataset.put_readme("")

    with SmartTool(
            input_uri=input_dataset.uri,
            output_uri=output_dataset.uri,
    ) as smart_tool:

        assert smart_tool.input_dataset.uri == input_dataset.uri
        assert smart_tool.output_proto_dataset.uri == output_dataset.uri

        smart_tool.base_commands = [
            "bowtie2 -x {reference_prefix} -1 {forward_read_fpath} -2 {reverse_read_fpath} -S {output_fpath}",
        ]
        smart_tool.outputs = []

        smart_tool.base_command_props = {
            "reference_prefix": "/tmp/reference/Athaliana",
            "forward_read_fpath": "/tmp/input/data/read1.fq",
            "reverse_read_fpath": "/tmp/input/data/read2.fq",
            "output_fpath": "/tmp/working/output",
        }

        expected_command_list = [
            "bowtie2", "-x", "/tmp/reference/Athaliana", "-1",
            "/tmp/input/data/read1.fq", "-2", "/tmp/input/data/read2.fq", "-S",
            "/tmp/working/output"
        ]

        #       assert smart_tool.command_list("identifier") == expected_command_list

        import subprocess
        subprocess.call = MagicMock()

        smart_tool.pre_run = MagicMock()

        smart_tool("identifier")

        subprocess.call.assert_called_once_with(
            expected_command_list, cwd=smart_tool.working_directory)
        smart_tool.pre_run.assert_called_once()
示例#2
0
def test_diff_sizes(tmp_uri_fixture):  # NOQA

    from dtoolcore import (
        DataSet,
        generate_admin_metadata,
        generate_proto_dataset,
    )
    from dtoolcore.utils import generate_identifier
    from dtoolcore.compare import diff_sizes

    fpaths = create_test_files(tmp_uri_fixture)

    proto_ds_a = generate_proto_dataset(
        admin_metadata=generate_admin_metadata("test_compare_1"),
        base_uri=tmp_uri_fixture)
    proto_ds_a.create()
    proto_ds_a.put_item(fpaths["he"], "file.txt")
    proto_ds_a.freeze()

    proto_ds_b = generate_proto_dataset(
        admin_metadata=generate_admin_metadata("test_compare_2"),
        base_uri=tmp_uri_fixture)
    proto_ds_b.create()
    proto_ds_b.put_item(fpaths["she"], "file.txt")
    proto_ds_b.freeze()

    ds_a = DataSet.from_uri(proto_ds_a.uri)
    ds_b = DataSet.from_uri(proto_ds_b.uri)

    assert diff_sizes(ds_a, ds_a) == []

    expected = [
        (generate_identifier("file.txt"), 2, 3),
    ]
    assert diff_sizes(ds_a, ds_b) == expected
示例#3
0
def test_copy(tmp_dir_fixture):  # NOQA

    import dtoolcore

    admin_metadata = dtoolcore.generate_admin_metadata("test_name")
    proto_dataset = dtoolcore.generate_proto_dataset(
        admin_metadata=admin_metadata,
        prefix=tmp_dir_fixture,
        storage="file")

    assert proto_dataset.name == "test_name"

    proto_dataset.update_name("test_new_name")

    assert proto_dataset.name == "test_new_name"

    proto_dataset.create()

    proto_dataset.update_name("test_another_new_name")

    assert proto_dataset.name == "test_another_new_name"

    read_proto_dataset = dtoolcore.ProtoDataSet.from_uri(proto_dataset.uri)

    assert read_proto_dataset.name == "test_another_new_name"
示例#4
0
def test_copy_resume_fixes_broken_files(tmp_uri_fixture):  # NOQA

    import dtoolcore

    src_dir = os.path.join(uri_to_path(tmp_uri_fixture), "src")
    dest_dir = os.path.join(uri_to_path(tmp_uri_fixture), "dest")
    for directory in [src_dir, dest_dir]:
        os.mkdir(directory)

    # Create the src dataset to be copied.
    admin_metadata = dtoolcore.generate_admin_metadata("test_copy")
    proto_dataset = dtoolcore.generate_proto_dataset(
        admin_metadata=admin_metadata, base_uri=tmp_uri_fixture + "/src")
    proto_dataset.create()
    src_uri = proto_dataset.uri

    proto_dataset.put_readme("---\nproject: exciting\n")

    overlay = "file_extension"
    for fname in os.listdir(TEST_SAMPLE_DATA):
        _, ext = os.path.splitext(fname)
        item_fpath = os.path.join(TEST_SAMPLE_DATA, fname)
        proto_dataset.put_item(item_fpath, fname)
        proto_dataset.add_item_metadata(fname, overlay, ext)

    proto_dataset.freeze()

    # Create a partial copy.
    src_dataset = dtoolcore.DataSet.from_uri(proto_dataset.uri)
    dest_proto_dataset = dtoolcore._copy_create_proto_dataset(
        src_dataset, tmp_uri_fixture + "/dest")
    broken_content_fpath = os.path.join(TEST_SAMPLE_DATA, "another_file.txt")
    dest_proto_dataset.put_item(broken_content_fpath, "random_bytes")

    # Copy resume should work.
    dest_uri = dtoolcore.copy_resume(src_uri, tmp_uri_fixture + "/dest")

    # Compare the two datasets.
    src_ds = dtoolcore.DataSet.from_uri(src_uri)
    dest_ds = dtoolcore.DataSet.from_uri(dest_uri)

    for key, value in src_ds._admin_metadata.items():
        assert dest_ds._admin_metadata[key] == value

    assert src_ds.identifiers == dest_ds.identifiers
    for i in src_ds.identifiers:
        src_item_props = src_ds.item_properties(i)
        dest_item_props = dest_ds.item_properties(i)
        for key, value in src_item_props.items():
            if key == "utc_timestamp":
                tolerance = 2  # seconds (number chosen arbitrarily)
                assert dest_item_props[key] >= value
                assert dest_item_props[key] < value + tolerance
            else:
                assert dest_item_props[key] == value

    assert src_ds.get_readme_content() == dest_ds.get_readme_content()

    assert src_ds.list_overlay_names() == dest_ds.list_overlay_names()
    assert src_ds.get_overlay(overlay) == dest_ds.get_overlay(overlay)
示例#5
0
def create_proto_dataset(base_uri, name, username):
    admin_metadata = dtoolcore.generate_admin_metadata(name, username)
    proto_dataset = dtoolcore.generate_proto_dataset(
        admin_metadata=admin_metadata, base_uri=base_uri)
    proto_dataset.create()
    proto_dataset.put_readme("")
    return proto_dataset
示例#6
0
def generate_dataset(base_uri, name, size, num_files):
    #   print(
    #       "Generating dataset in {} with {} files of size {} bytes".format(
    #           storage, num_files, size
    #       )
    #   )
    admin_metadata = generate_admin_metadata(name=name,
                                             creator_username="******")
    proto_dataset = generate_proto_dataset(admin_metadata, base_uri)
    proto_dataset.create()
    proto_dataset.put_readme("")

    for i in range(num_files):
        handle = "{}.txt".format(i)

        with tempfile.NamedTemporaryFile() as fp:
            fp.write(os.urandom(size))
            fp.flush()
            proto_dataset.put_item(fp.name, handle)
            proto_dataset.add_item_metadata(handle, "number", i)

    start = time.time()
    #   cProfile.runctx("proto_dataset.freeze()", {"proto_dataset": proto_dataset}, {}, sort="cumtime")
    proto_dataset.freeze()
    elapsed = time.time() - start

    #   print("Freezing {} took: {}s".format(name, elapsed))
    print("{},{}".format(num_files, elapsed))
def test_update_name_of_frozen_dataset(tmp_uri_fixture):  # NOQA

    import dtoolcore

    # Create a dataset.
    admin_metadata = dtoolcore.generate_admin_metadata("test_name")
    proto_dataset = dtoolcore.generate_proto_dataset(
        admin_metadata=admin_metadata, base_uri=tmp_uri_fixture)
    proto_dataset.create()
    proto_dataset.freeze()

    dataset = dtoolcore.DataSet.from_uri(proto_dataset.uri)
    assert dataset.name == "test_name"

    dataset.update_name("updated_name")
    assert dataset.name == "updated_name"

    dataset_again = dtoolcore.DataSet.from_uri(proto_dataset.uri)
    assert dataset_again.name == "updated_name"

    # Make sure that none of the other admin metadata has been altered.
    for key, value in admin_metadata.items():
        if key == "name":
            continue
        assert dataset_again._admin_metadata[key] == value
示例#8
0
def test_diff_content(tmp_dir_fixture):  # NOQA

    from dtoolcore import (
        DataSet,
        generate_admin_metadata,
        generate_proto_dataset,
    )
    from dtoolcore.utils import generate_identifier
    from dtoolcore.compare import diff_content
    from dtoolcore.storagebroker import DiskStorageBroker

    fpaths = create_test_files(tmp_dir_fixture)

    proto_ds_a = generate_proto_dataset(
        admin_metadata=generate_admin_metadata("test_compare_1"),
        prefix=tmp_dir_fixture,
        storage="file")
    proto_ds_a.create()
    proto_ds_a.put_item(fpaths["cat"], "file.txt")
    proto_ds_a.freeze()

    proto_ds_b = generate_proto_dataset(
        admin_metadata=generate_admin_metadata("test_compare_2"),
        prefix=tmp_dir_fixture,
        storage="file")
    proto_ds_b.create()
    proto_ds_b.put_item(fpaths["she"], "file.txt")
    proto_ds_b.freeze()

    ds_a = DataSet.from_uri(proto_ds_a.uri)
    ds_b = DataSet.from_uri(proto_ds_b.uri)

    assert diff_content(ds_a, ds_a) == []

    identifier = generate_identifier("file.txt")
    expected = [
        (generate_identifier("file.txt"),
         DiskStorageBroker.hasher(ds_a.item_content_abspath(identifier)),
         DiskStorageBroker.hasher(ds_b.item_content_abspath(identifier)))
    ]
    assert diff_content(ds_a, ds_b) == expected
示例#9
0
def proto_dataset_from_base_uri(name, base_uri):

    admin_metadata = dtoolcore.generate_admin_metadata(name)
    parsed_base_uri = dtoolcore.utils.generous_parse_uri(base_uri)

    proto_dataset = dtoolcore.generate_proto_dataset(
        admin_metadata=admin_metadata,
        base_uri=dtoolcore.utils.urlunparse(parsed_base_uri))

    proto_dataset.create()

    return proto_dataset
示例#10
0
def test_status_command_on_proto_dataset_functional(tmp_dir_fixture):  # NOQA
    from dtoolcore import generate_admin_metadata, generate_proto_dataset
    from dtool_info.dataset import status

    admin_metadata = generate_admin_metadata("test_ds")
    proto_dataset = generate_proto_dataset(admin_metadata=admin_metadata,
                                           base_uri=tmp_dir_fixture)
    proto_dataset.create()

    runner = CliRunner()

    result = runner.invoke(status, [proto_dataset.uri])
    assert result.exit_code == 0
    assert result.output.strip() == "proto"
示例#11
0
def create_derived_dataset(parent_dataset, dest_location_uri, name_suffix):

    dest_dataset_name = "{}_{}".format(parent_dataset.name, name_suffix)

    admin_metadata = dtoolcore.generate_admin_metadata(dest_dataset_name)
    dest_dataset = dtoolcore.generate_proto_dataset(
        admin_metadata=admin_metadata,
        base_uri=dest_location_uri,
        config_path=CONFIG_PATH)
    try:
        dest_dataset.create()
    except dtoolcore.storagebroker.StorageBrokerOSError as err:
        raise click.UsageError(str(err))

    return dest_dataset
示例#12
0
def test_diff_identifiers(tmp_dir_fixture):  # NOQA

    from dtoolcore import (
        DataSet,
        generate_admin_metadata,
        generate_proto_dataset,
    )
    from dtoolcore.utils import generate_identifier
    from dtoolcore.compare import diff_identifiers

    fpaths = create_test_files(tmp_dir_fixture)

    proto_ds_a = generate_proto_dataset(
        admin_metadata=generate_admin_metadata("test_compare_1"),
        prefix=tmp_dir_fixture,
        storage="file")
    proto_ds_a.create()
    proto_ds_a.put_item(fpaths["cat"], "a.txt")
    proto_ds_a.freeze()

    proto_ds_b = generate_proto_dataset(
        admin_metadata=generate_admin_metadata("test_compare_2"),
        prefix=tmp_dir_fixture,
        storage="file")
    proto_ds_b.create()
    proto_ds_b.put_item(fpaths["cat"], "b.txt")
    proto_ds_b.freeze()

    ds_a = DataSet.from_uri(proto_ds_a.uri)
    ds_b = DataSet.from_uri(proto_ds_b.uri)

    assert diff_identifiers(ds_a, ds_a) == []

    expected = [(generate_identifier("a.txt"), True, False),
                (generate_identifier("b.txt"), False, True)]
    assert diff_identifiers(ds_a, ds_b) == expected
示例#13
0
def create(quiet, name, storage, prefix):
    """Create a proto dataset."""
    admin_metadata = dtoolcore.generate_admin_metadata(name)

    # Create the dataset.
    proto_dataset = dtoolcore.generate_proto_dataset(
        admin_metadata=admin_metadata,
        prefix=prefix,
        storage=storage,
        config_path=CONFIG_PATH)
    try:
        proto_dataset.create()
    except dtoolcore.storagebroker.StorageBrokerOSError as err:
        raise click.UsageError(str(err))

    proto_dataset.put_readme("")

    if quiet:
        click.secho(proto_dataset.uri)
    else:
        # Give the user some feedback and hints on what to do next.
        click.secho("Created proto dataset ", nl=False, fg="green")
        click.secho(proto_dataset.uri)
        click.secho("Next steps: ")

        step = 1
        click.secho("{}. Add descriptive metadata, e.g: ".format(step))
        click.secho("   dtool readme interactive {}".format(proto_dataset.uri),
                    fg="cyan")

        if storage != "symlink":
            step = step + 1
            click.secho("{}. Add raw data, eg:".format(step))
            click.secho("   dtool add item my_file.txt {}".format(
                proto_dataset.uri),
                        fg="cyan")

            if storage == "file":
                # Find the abspath of the data directory for user feedback.
                data_path = proto_dataset._storage_broker._data_abspath
                click.secho("   Or use your system commands, e.g: ")
                click.secho("   mv my_data_directory {}/".format(data_path),
                            fg="cyan")

        step = step + 1
        click.secho(
            "{}. Convert the proto dataset into a dataset: ".format(step))
        click.secho("   dtool freeze {}".format(proto_dataset.uri), fg="cyan")
def test_copy(tmp_dir_fixture):  # NOQA

    import dtoolcore

    src_dir = os.path.join(tmp_dir_fixture, "src")
    dest_dir = os.path.join(tmp_dir_fixture, "dest")
    for directory in [src_dir, dest_dir]:
        os.mkdir(directory)

    # Create the src dataset to be copied.
    admin_metadata = dtoolcore.generate_admin_metadata("test_copy")
    proto_dataset = dtoolcore.generate_proto_dataset(
        admin_metadata=admin_metadata, prefix=src_dir, storage="file")
    proto_dataset.create()
    src_uri = proto_dataset.uri

    proto_dataset.put_readme("---\nproject: exciting\n")

    overlay = "file_extension"
    for fname in os.listdir(TEST_SAMPLE_DATA):
        _, ext = os.path.splitext(fname)
        item_fpath = os.path.join(TEST_SAMPLE_DATA, fname)
        proto_dataset.put_item(item_fpath, fname)
        proto_dataset.add_item_metadata(fname, overlay, ext)

    proto_dataset.freeze()

    # Copy the src dataset to dest.
    dest_uri = dtoolcore.copy(src_uri, dest_dir, "file")

    # Compare the two datasets.
    src_ds = dtoolcore.DataSet.from_uri(src_uri)
    dest_ds = dtoolcore.DataSet.from_uri(dest_uri)

    assert src_ds._admin_metadata == dest_ds._admin_metadata

    assert src_ds.identifiers == dest_ds.identifiers
    for i in src_ds.identifiers:
        assert src_ds.item_properties(i) == dest_ds.item_properties(i)

    assert src_ds.get_readme_content() == dest_ds.get_readme_content()

    assert src_ds.list_overlay_names() == dest_ds.list_overlay_names()
    assert src_ds.get_overlay(overlay) == dest_ds.get_overlay(overlay)
def test_update_name_raises_DtoolCoreInvalidName(tmp_uri_fixture):  # NOQA

    import dtoolcore
    from dtoolcore import DtoolCoreInvalidNameError

    admin_metadata = dtoolcore.generate_admin_metadata("test_name")
    proto_dataset = dtoolcore.generate_proto_dataset(
        admin_metadata=admin_metadata, base_uri=tmp_uri_fixture)

    assert proto_dataset.name == "test_name"

    proto_dataset.update_name("test_new_name")

    assert proto_dataset.name == "test_new_name"

    proto_dataset.create()

    with pytest.raises(DtoolCoreInvalidNameError):
        proto_dataset.update_name("test_another:new_name")
示例#16
0
def test_list_dataset_uris(tmp_dir_fixture):  # NOQA

    import dtoolcore
    from dtoolcore.storagebroker import DiskStorageBroker

    assert [] == DiskStorageBroker.list_dataset_uris(prefix=tmp_dir_fixture,
                                                     config_path=None)

    # Create two datasets to be copied.
    expected_uris = []
    for name in ["test_ds_1", "test_ds_2"]:
        admin_metadata = dtoolcore.generate_admin_metadata(name)
        proto_dataset = dtoolcore.generate_proto_dataset(
            admin_metadata=admin_metadata,
            prefix=tmp_dir_fixture,
            storage="file")
        proto_dataset.create()
        expected_uris.append(proto_dataset.uri)

    actual_uris = DiskStorageBroker.list_dataset_uris(prefix=tmp_dir_fixture,
                                                      config_path=None)

    assert set(expected_uris) == set(actual_uris)
def test_update_name(tmp_uri_fixture):  # NOQA

    import dtoolcore

    admin_metadata = dtoolcore.generate_admin_metadata("test_name")
    proto_dataset = dtoolcore.generate_proto_dataset(
        admin_metadata=admin_metadata, base_uri=tmp_uri_fixture)

    assert proto_dataset.name == "test_name"

    proto_dataset.update_name("test_new_name")

    assert proto_dataset.name == "test_new_name"

    proto_dataset.create()

    proto_dataset.update_name("test_another_new_name")

    assert proto_dataset.name == "test_another_new_name"

    read_proto_dataset = dtoolcore.ProtoDataSet.from_uri(proto_dataset.uri)

    assert read_proto_dataset.name == "test_another_new_name"