def test_works_if_all_set(tmp_uuid_and_uri): # NOQA from dtoolcore import ProtoDataSet, generate_admin_metadata bucket_name = S3_TEST_BASE_URI[5:] endpoint_key = "DTOOL_S3_ENDPOINT_{}".format(bucket_name) access_key = "DTOOL_S3_ACCESS_KEY_ID_{}".format(bucket_name) secret_access_key = "DTOOL_S3_SECRET_ACCESS_KEY_{}".format(bucket_name) uuid, dest_uri = tmp_uuid_and_uri name = "my_dataset" admin_metadata = generate_admin_metadata(name) admin_metadata["uuid"] = uuid env_vars = { endpoint_key: "https://s3.amazonaws.com", access_key: S3_TEST_ACCESS_KEY_ID, secret_access_key: S3_TEST_SECRET_ACCESS_KEY, } a, b, c = list(env_vars.keys()) with tmp_env_var(a, env_vars[a]): with tmp_env_var(b, env_vars[b]): with tmp_env_var(c, env_vars[c]): proto_dataset = ProtoDataSet(dest_uri, admin_metadata) proto_dataset.create()
def test_http_enable_with_presigned_url(tmp_uuid_and_uri): # NOQA uuid, dest_uri = tmp_uuid_and_uri from dtoolcore import ProtoDataSet, generate_admin_metadata from dtoolcore import DataSet name = "my_dataset" admin_metadata = generate_admin_metadata(name) admin_metadata["uuid"] = uuid sample_data_path = os.path.join(TEST_SAMPLE_DATA) local_file_path = os.path.join(sample_data_path, 'tiny.png') # Create a minimal dataset proto_dataset = ProtoDataSet(uri=dest_uri, admin_metadata=admin_metadata, config_path=None) proto_dataset.create() proto_dataset.put_item(local_file_path, 'tiny.png') proto_dataset.put_readme("---\nproject: testing\n") proto_dataset.freeze() dataset = DataSet.from_uri(dest_uri) # Add an annotation. dataset.put_annotation("project", "dtool-testing") # Add tags. dataset.put_tag("amazing") dataset.put_tag("stuff") with tmp_env_var("DTOOL_S3_PUBLISH_EXPIRY", "120"): access_url = dataset._storage_broker.http_enable() assert access_url.find("?") != -1 # This is a presigned URL dataset. assert access_url.startswith("https://") dataset_from_http = DataSet.from_uri(access_url) # Assert that the annotation has been copied across. assert dataset_from_http.get_annotation("project") == "dtool-testing" # Asser that the tags are available. assert dataset_from_http.list_tags() == ["amazing", "stuff"] from dtoolcore.compare import (diff_identifiers, diff_sizes, diff_content) assert len(diff_identifiers(dataset, dataset_from_http)) == 0 assert len(diff_sizes(dataset, dataset_from_http)) == 0 assert len(diff_content(dataset_from_http, dataset)) == 0 # Make sure that all the URLs in the manifest are presigned. http_manifest = dataset_from_http._storage_broker.http_manifest assert http_manifest["manifest_url"].find("?") != -1 assert http_manifest["readme_url"].find("?") != -1 for url in http_manifest["item_urls"].values(): assert url.find("?") != -1 for url in http_manifest["annotations"].values(): assert url.find("?") != -1
def test_writing_of_dtool_readme_file(tmp_uuid_and_uri): # NOQA from dtoolcore import ProtoDataSet, generate_admin_metadata # Create a proto dataset. uuid, dest_uri = tmp_uuid_and_uri name = "test_dtool_readme_file" admin_metadata = generate_admin_metadata(name) admin_metadata["uuid"] = uuid proto_dataset = ProtoDataSet( uri=dest_uri, admin_metadata=admin_metadata, config_path=None ) proto_dataset.create() # Check that the ".dtool/README.txt" file exists. expected_s3_key = uuid + '/README.txt' assert _key_exists_in_storage_broker( proto_dataset._storage_broker, expected_s3_key ) actual_content = _get_unicode_from_key( proto_dataset._storage_broker, expected_s3_key ) assert actual_content.startswith("README")
def test_basic_workflow(tmp_dir_fixture): # NOQA from dtoolcore import ProtoDataSet, generate_admin_metadata from dtoolcore import DataSet from dtoolcore.utils import generate_identifier from dtoolcore.storagebroker import DiskStorageBroker name = "my_dataset" admin_metadata = generate_admin_metadata(name) dest_uri = DiskStorageBroker.generate_uri( name=name, uuid=admin_metadata["uuid"], base_uri=tmp_dir_fixture) sample_data_path = os.path.join(TEST_SAMPLE_DATA) local_file_path = os.path.join(sample_data_path, 'tiny.png') # Create a minimal dataset proto_dataset = ProtoDataSet( uri=dest_uri, admin_metadata=admin_metadata, config_path=None) proto_dataset.create() proto_dataset.put_item(local_file_path, 'tiny.png') proto_dataset.freeze() # Read in a dataset dataset = DataSet.from_uri(dest_uri) expected_identifier = generate_identifier('tiny.png') assert expected_identifier in dataset.identifiers assert len(dataset.identifiers) == 1
def test_basic_workflow_on_first_namespace(tmp_uuid_and_uri): # NOQA uuid, dest_uri = tmp_uuid_and_uri from dtoolcore import ProtoDataSet, generate_admin_metadata from dtoolcore import DataSet from dtoolcore.utils import generate_identifier name = "my_dataset" admin_metadata = generate_admin_metadata(name) admin_metadata["uuid"] = uuid sample_data_path = os.path.join(TEST_SAMPLE_DATA) local_file_path = os.path.join(sample_data_path, 'tiny.png') # Create a minimal dataset proto_dataset = ProtoDataSet( uri=dest_uri, admin_metadata=admin_metadata, config_path=None) proto_dataset.create() proto_dataset.put_item(local_file_path, 'tiny.png') proto_dataset.freeze() # Read in a dataset dataset = DataSet.from_uri(dest_uri) expected_identifier = generate_identifier('tiny.png') assert expected_identifier in dataset.identifiers assert len(dataset.identifiers) == 1
def test_item_local_abspath_with_clean_cache(tmp_uuid_and_uri): # NOQA from dtoolcore import ProtoDataSet, generate_admin_metadata from dtoolcore import DataSet from dtoolcore.utils import generate_identifier uuid, dest_uri = tmp_uuid_and_uri name = "my_dataset" admin_metadata = generate_admin_metadata(name) admin_metadata["uuid"] = uuid sample_data_path = os.path.join(TEST_SAMPLE_DATA) local_file_path = os.path.join(sample_data_path, 'tiny.png') # Create a minimal dataset proto_dataset = ProtoDataSet(uri=dest_uri, admin_metadata=admin_metadata, config_path=None) proto_dataset.create() proto_dataset.put_item(local_file_path, 'tiny.png') proto_dataset.freeze() identifier = generate_identifier('tiny.png') with tmp_directory() as cache_dir: with tmp_env_var("DTOOL_S3_CACHE_DIRECTORY", cache_dir): dataset = DataSet.from_uri(dest_uri) fpath = dataset.item_content_abspath(identifier) assert os.path.isfile(fpath)
def test_list_dataset_uris(tmp_uuid_and_uri): # NOQA uuid, dest_uri = tmp_uuid_and_uri from dtoolcore import ProtoDataSet, generate_admin_metadata name = "my_dataset" admin_metadata = generate_admin_metadata(name) admin_metadata["uuid"] = uuid sample_data_path = os.path.join(TEST_SAMPLE_DATA) local_file_path = os.path.join(sample_data_path, 'tiny.png') # Create a minimal dataset proto_dataset = ProtoDataSet( uri=dest_uri, admin_metadata=admin_metadata, config_path=CONFIG_PATH) proto_dataset.create() proto_dataset.put_item(local_file_path, 'tiny with space.png') proto_dataset.freeze() from dtool_azure.storagebroker import AzureStorageBroker assert len(AzureStorageBroker.list_dataset_uris( dest_uri, CONFIG_PATH) ) > 0
def test_uri_property_when_using_relpath(chdir_fixture): # NOQA from dtoolcore import ProtoDataSet, generate_admin_metadata from dtoolcore import DataSet from dtoolcore.storagebroker import DiskStorageBroker name = "my_dataset" admin_metadata = generate_admin_metadata(name) dest_uri = DiskStorageBroker.generate_uri(name=name, uuid=admin_metadata["uuid"], prefix=".") sample_data_path = os.path.join(TEST_SAMPLE_DATA) local_file_path = os.path.join(sample_data_path, 'tiny.png') # Create a minimal dataset proto_dataset = ProtoDataSet(uri=dest_uri, admin_metadata=admin_metadata, config_path=None) proto_dataset.create() proto_dataset.put_item(local_file_path, 'tiny.png') proto_dataset.freeze() dataset = DataSet.from_uri("./my_dataset") expected_uri = "file://" + os.path.abspath("my_dataset") assert dataset.uri == expected_uri
def test_annotations(tmp_uuid_and_uri): # NOQA uuid, dest_uri = tmp_uuid_and_uri from dtoolcore import ProtoDataSet, generate_admin_metadata from dtoolcore import DataSet name = "my_dataset" admin_metadata = generate_admin_metadata(name) admin_metadata["uuid"] = uuid sample_data_path = os.path.join(TEST_SAMPLE_DATA) local_file_path = os.path.join(sample_data_path, 'tiny.png') # Create a minimal dataset proto_dataset = ProtoDataSet(uri=dest_uri, admin_metadata=admin_metadata, config_path=None) proto_dataset.create() proto_dataset.put_item(local_file_path, 'tiny.png') proto_dataset.freeze() # Read in a dataset dataset = DataSet.from_uri(dest_uri) assert dataset.list_annotation_names() == [] dataset.put_annotation("project", "demo") assert dataset.get_annotation("project") == "demo" assert dataset.list_annotation_names() == ["project"]
def __init__(self): parser = argparse.ArgumentParser() parser.add_argument( '-d', '--dataset', help='URI of input dataset' ) parser.add_argument( '-i', '--identifier', help='Identifier (hash) to process' ) parser.add_argument( '-o', '--output-dataset', help='URI of output dataset' ) args = parser.parse_args() self.input_dataset = DataSet.from_uri(args.dataset) self.output_dataset = ProtoDataSet.from_uri(args.output_dataset) self.identifier = args.identifier
def test_dataset_readme_show_functional(chdir_fixture): # NOQA from dtool_create.dataset import create, show, freeze runner = CliRunner() dataset_name = "my_dataset" result = runner.invoke(create, [dataset_name]) assert result.exit_code == 0 dataset_abspath = os.path.abspath(dataset_name) dataset_uri = "file://{}".format(dataset_abspath) result = runner.invoke(show, [dataset_uri]) assert result.exit_code == 0 assert result.output.strip() == "" # Update the readme content. proto_dataset = ProtoDataSet.from_uri(dataset_uri) readme_content = "hello\nworld" proto_dataset.put_readme(readme_content) result = runner.invoke(show, [dataset_uri]) assert result.exit_code == 0 assert result.output.strip() == readme_content # Make sure that the command works on a frozen dataset. result = runner.invoke(freeze, [dataset_uri]) assert result.exit_code == 0 result = runner.invoke(show, [dataset_uri]) assert result.exit_code == 0 assert result.output.strip() == readme_content
def main(): parser = argparse.ArgumentParser() parser.add_argument('--dataset-uri', help='Dataset URI') parser.add_argument('--identifier', help='Identifier (hash) to process') parser.add_argument( '--output-uri', help='Output dataset uri' ) args = parser.parse_args() dataset = DataSet.from_uri(args.dataset_uri) output_dataset = ProtoDataSet.from_uri(args.output_uri) with temp_working_dir() as working_dir: outputs = segment_single_identifier( dataset, args.identifier, working_dir ) overlays_to_copy = ['coords', 'ordering', 'useful_name'] stage_outputs( outputs, working_dir, dataset, output_dataset, overlays_to_copy, args.identifier )
def test_dataset_freeze_functional(chdir_fixture): # NOQA from dtool_create.dataset import create, freeze, add runner = CliRunner() dataset_name = "my_dataset" result = runner.invoke(create, [dataset_name]) assert result.exit_code == 0 # At this point we have a proto dataset dataset_abspath = os.path.abspath(dataset_name) dataset_uri = sanitise_uri(dataset_abspath) dataset = ProtoDataSet.from_uri(dataset_uri) # Create sample file to the proto dataset. sample_file_name = "hello.txt" with open(sample_file_name, "w") as fh: fh.write("hello world") # Put it into the dataset result = runner.invoke(add, ["item", sample_file_name, dataset_uri]) assert result.exit_code == 0 result = runner.invoke(freeze, [dataset_uri]) assert result.exit_code == 0 # Now we have a dataset. dataset = DataSet.from_uri(dataset_uri) # Manifest has been updated. assert len(dataset.identifiers) == 1
def test_dataset_name_functional(chdir_fixture): # NOQA from dtool_create.dataset import create, name, freeze runner = CliRunner() dataset_name = "my_dataset" result = runner.invoke(create, [dataset_name]) assert result.exit_code == 0 dataset_abspath = os.path.abspath(dataset_name) dataset_uri = sanitise_uri(dataset_abspath) # Test that the proto dataset has been created. dataset = ProtoDataSet.from_uri(dataset_uri) # Test that the dataset name is correct. assert dataset.name == dataset_name result = runner.invoke(name, [dataset_uri]) assert result.exit_code == 0 assert result.output.strip() == "my_dataset" result = runner.invoke(name, [dataset_uri, "new_name"]) assert result.exit_code == 0 assert result.output.strip() == "new_name" result = runner.invoke(freeze, [dataset_uri]) result = runner.invoke(name, [dataset_uri]) assert result.exit_code == 0 assert result.output.strip() == "new_name" result = runner.invoke(name, [dataset_uri, "frozen_ds_now_allowed"]) assert result.output.strip() == "frozen_ds_now_allowed"
def main(): parser = argparse.ArgumentParser() parser.add_argument('--dataset-uri') parser.add_argument('--resource-uri') parser.add_argument('--identifier') parser.add_argument('--output-uri') args = parser.parse_args() dataset = DataSet.from_uri(args.dataset_uri) resource_dataset = DataSet.from_uri(args.resource_uri) output_dataset = ProtoDataSet.from_uri(args.output_uri) with temp_working_dir() as working_dir: outputs = separate_plots( dataset, args.identifier, resource_dataset, working_dir ) overlays_to_copy = ['ordering', 'date'] stage_outputs( outputs, working_dir, dataset, output_dataset, overlays_to_copy, args.identifier )
def test_fails_if_any_endpoint_is_missing(tmp_uuid_and_uri): # NOQA from dtoolcore import ProtoDataSet, generate_admin_metadata bucket_name = S3_TEST_BASE_URI[5:] endpoint_key = "DTOOL_S3_ENDPOINT_{}".format(bucket_name) access_key = "DTOOL_S3_ACCESS_KEY_ID_{}".format(bucket_name) secret_access_key = "DTOOL_S3_SECRET_ACCESS_KEY_{}".format(bucket_name) uuid, dest_uri = tmp_uuid_and_uri name = "my_dataset" admin_metadata = generate_admin_metadata(name) admin_metadata["uuid"] = uuid env_vars = { endpoint_key: "https://s3.amazonaws.com", access_key: S3_TEST_ACCESS_KEY_ID, secret_access_key: S3_TEST_SECRET_ACCESS_KEY, } from itertools import combinations for a, b in combinations(env_vars.keys(), 2): with tmp_env_var(a, env_vars[a]): with tmp_env_var(b, env_vars[b]): with pytest.raises(RuntimeError): ProtoDataSet(dest_uri, admin_metadata)
def __init__(self): parser = argparse.ArgumentParser() parser.add_argument( '-d', '--dataset-uri', help='URI of input dataset' ) parser.add_argument( '-i', '--identifier', help='Identifier to process' ) parser.add_argument( '-o', '--output-dataset-uri', help='URI of output dataset' ) args = parser.parse_args() self.input_dataset = DataSet.from_uri(args.dataset_uri) self.output_dataset = ProtoDataSet.from_uri(args.output_dataset_uri) self.identifier = args.identifier
def test_writing_of_dtool_structure_file(tmp_uuid_and_uri): # NOQA from dtoolcore import ProtoDataSet, generate_admin_metadata from dtool_ecs import __version__ # Create a proto dataset. uuid, dest_uri = tmp_uuid_and_uri name = "test_dtool_structure_file" admin_metadata = generate_admin_metadata(name) admin_metadata["uuid"] = uuid proto_dataset = ProtoDataSet( uri=dest_uri, admin_metadata=admin_metadata, config_path=None ) proto_dataset.create() # Check that the ".dtool/structure.json" file exists. expected_s3_key = uuid + '/structure.json' assert _key_exists_in_storage_broker( proto_dataset._storage_broker, expected_s3_key ) expected_content = { "dataset_registration_key": "dtool-{}".format(uuid), "data_key_infix": "data", "fragment_key_infix": "fragments", "annotations_key_infix": "annotations", "overlays_key_infix": "overlays", "structure_key_suffix": "structure.json", "dtool_readme_key_suffix": "README.txt", "dataset_readme_key_suffix": "README.yml", "manifest_key_suffix": "manifest.json", "admin_metadata_key_suffix": "dtool", "http_manifest_key": "http_manifest.json", "storage_broker_version": __version__, } actual_content = _get_data_structure_from_key( proto_dataset._storage_broker, expected_s3_key ) assert expected_content == actual_content
def test_tags_functional(tmp_uuid_and_uri): # NOQA uuid, dest_uri = tmp_uuid_and_uri from dtoolcore import ProtoDataSet, generate_admin_metadata from dtoolcore import DataSet name = "my_dataset" admin_metadata = generate_admin_metadata(name) admin_metadata["uuid"] = uuid # Create a minimal dataset proto_dataset = ProtoDataSet( uri=dest_uri, admin_metadata=admin_metadata, config_path=None) proto_dataset.create() # Test put_tag on proto dataset. proto_dataset.put_tag("testing") proto_dataset.freeze() dataset = DataSet.from_uri(proto_dataset.uri) assert dataset.list_tags() == ["testing"] dataset.put_tag("amazing") dataset.put_tag("stuff") assert dataset.list_tags() == ["amazing", "stuff", "testing"] dataset.delete_tag("stuff") assert dataset.list_tags() == ["amazing", "testing"] # Putting the same tag is idempotent. dataset.put_tag("amazing") dataset.put_tag("amazing") dataset.put_tag("amazing") assert dataset.list_tags() == ["amazing", "testing"] # Tags can only be strings. from dtoolcore import DtoolCoreValueError with pytest.raises(DtoolCoreValueError): dataset.put_tag(1) # Tags need to adhere to the utils.name_is_valid() rules. from dtoolcore import DtoolCoreInvalidNameError with pytest.raises(DtoolCoreInvalidNameError): dataset.put_tag("!invalid") # Deleting a non exiting tag does not raise. It silently succeeds. dataset.delete_tag("dontexist")
def test_http_enable(tmp_uuid_and_uri): # NOQA uuid, dest_uri = tmp_uuid_and_uri from dtoolcore import ProtoDataSet, generate_admin_metadata from dtoolcore import DataSet name = "my_dataset" admin_metadata = generate_admin_metadata(name) admin_metadata["uuid"] = uuid sample_data_path = os.path.join(TEST_SAMPLE_DATA) local_file_path = os.path.join(sample_data_path, 'tiny.png') # Create a minimal dataset proto_dataset = ProtoDataSet( uri=dest_uri, admin_metadata=admin_metadata, config_path=None) proto_dataset.create() proto_dataset.put_item(local_file_path, 'tiny.png') proto_dataset.put_readme("---\nproject: testing\n") proto_dataset.freeze() dataset = DataSet.from_uri(dest_uri) access_url = dataset._storage_broker.http_enable() assert access_url.startswith("https://") dataset_from_http = DataSet.from_uri(access_url) from dtoolcore.compare import ( diff_identifiers, diff_sizes, diff_content ) assert len(diff_identifiers(dataset, dataset_from_http)) == 0 assert len(diff_sizes(dataset, dataset_from_http)) == 0 assert len(diff_content(dataset_from_http, dataset)) == 0
def test_basic_workflow_with_nested_handle(tmp_dir_fixture): # NOQA from dtoolcore import ProtoDataSet, generate_admin_metadata from dtoolcore import DataSet from dtoolcore.utils import generate_identifier from dtoolcore.storagebroker import DiskStorageBroker name = "my_dataset" admin_metadata = generate_admin_metadata(name) dest_uri = DiskStorageBroker.generate_uri( name=name, uuid=admin_metadata["uuid"], base_uri=tmp_dir_fixture) sample_data_path = os.path.join(TEST_SAMPLE_DATA) local_file_path = os.path.join(sample_data_path, 'tiny.png') handle = "subdir/tiny.png" # Create a minimal dataset proto_dataset = ProtoDataSet( uri=dest_uri, admin_metadata=admin_metadata, config_path=None) proto_dataset.create() proto_dataset.put_item(local_file_path, handle) proto_dataset.freeze() # Read in a dataset dataset = DataSet.from_uri(dest_uri) expected_identifier = generate_identifier(handle) assert expected_identifier in dataset.identifiers assert len(dataset.identifiers) == 1 # Ensure that the file exists in the disk dataset. # Particularly on Windows. item_abspath = os.path.join( tmp_dir_fixture, name, "data", "subdir", "tiny.png" ) assert os.path.isfile(item_abspath) assert os.path.isfile(dataset.item_content_abspath(expected_identifier)) # Ensure that the correct abspath is returned. # Particularly on Windows. assert dataset.item_content_abspath(expected_identifier) == item_abspath # NOQA
def test_writing_of_dtool_structure_file(tmp_uuid_and_uri): # NOQA from dtoolcore import ProtoDataSet, generate_admin_metadata from dtool_smb import __version__ # Create a proto dataset. uuid, dest_uri = tmp_uuid_and_uri name = "test_dtool_structure_file" admin_metadata = generate_admin_metadata(name) admin_metadata["uuid"] = uuid proto_dataset = ProtoDataSet(uri=dest_uri, admin_metadata=admin_metadata, config_path=None) proto_dataset.create() # Check that the "_dtool/structure.json" file exists. expected_smb_key = '_dtool/structure.json' assert _key_exists_in_storage_broker(proto_dataset._storage_broker, expected_smb_key) expected_content = { "data_directory": ["data"], "dataset_readme_relpath": ["README.yml"], "dtool_directory": ["_dtool"], "admin_metadata_relpath": ["_dtool", "dtool"], "structure_metadata_relpath": ["_dtool", "structure.json"], "dtool_readme_relpath": ["_dtool", "README.txt"], "manifest_relpath": ["_dtool", "manifest.json"], "overlays_directory": ["_dtool", "overlays"], "annotations_directory": ["_dtool", "annotations"], "tags_directory": ["_dtool", "tags"], "metadata_fragments_directory": ["_dtool", "tmp_fragments"], "storage_broker_version": __version__, } actual_content = _get_data_structure_from_key( proto_dataset._storage_broker, expected_smb_key) print(actual_content) assert expected_content == actual_content
def test_update_readme(tmp_uuid_and_uri): # NOQA uuid, dest_uri = tmp_uuid_and_uri from dtoolcore import ProtoDataSet, generate_admin_metadata from dtoolcore import DataSet name = "my_dataset" admin_metadata = generate_admin_metadata(name) admin_metadata["uuid"] = uuid # Create a minimal dataset proto_dataset = ProtoDataSet( uri=dest_uri, admin_metadata=admin_metadata, config_path=None) proto_dataset.create() proto_dataset.put_readme("First") proto_dataset.put_readme("Hello world") proto_dataset.freeze() # Read in a dataset dataset = DataSet.from_uri(dest_uri) assert len(dataset._storage_broker._list_historical_readme_keys()) == 0 dataset.put_readme("Updated") assert len(dataset._storage_broker._list_historical_readme_keys()) == 1 key = dataset._storage_broker._list_historical_readme_keys()[0] content = dataset._storage_broker.get_text(key) assert content == 'Hello world' time.sleep(0.1) dataset.put_readme('Updated again') assert dataset.get_readme_content() == 'Updated again'
def test_writing_of_dtool_structure_file(tmp_uuid_and_uri): # NOQA from dtoolcore import ProtoDataSet, generate_admin_metadata from dtool_azure import __version__ # Create a proto dataset. uuid, dest_uri = tmp_uuid_and_uri name = "test_dtool_structure_file" admin_metadata = generate_admin_metadata(name) admin_metadata["uuid"] = uuid proto_dataset = ProtoDataSet(uri=dest_uri, admin_metadata=admin_metadata, config_path=None) proto_dataset.create() # Check that the ".dtool/structure.json" file exists. expected_azure_key = 'structure.json' assert _key_exists_in_storage_broker(proto_dataset._storage_broker, expected_azure_key) expected_content = { 'http_manifest_key': 'http_manifest.json', 'fragments_key_prefix': 'fragments/', 'overlays_key_prefix': 'overlays/', 'structure_dict_key': 'structure.json', 'annotations_key_prefix': 'annotations/', 'tags_key_prefix': 'tags/', 'admin_metadata_key': 'dtool', 'storage_broker_version': __version__, 'dtool_readme_key': 'README.txt', 'manifest_key': 'manifest.json', 'dataset_readme_key': 'README.yml' } actual_content = _get_data_structure_from_key( proto_dataset._storage_broker, expected_azure_key) print(actual_content) assert expected_content == actual_content
def test_http_manifest(tmp_uuid_and_uri): # NOQA uuid, dest_uri = tmp_uuid_and_uri from dtoolcore import ProtoDataSet, generate_admin_metadata from dtoolcore import DataSet name = "my_dataset" admin_metadata = generate_admin_metadata(name) admin_metadata["uuid"] = uuid sample_data_path = os.path.join(TEST_SAMPLE_DATA) local_file_path = os.path.join(sample_data_path, 'tiny.png') # Create a minimal dataset proto_dataset = ProtoDataSet(uri=dest_uri, admin_metadata=admin_metadata, config_path=None) proto_dataset.create() proto_dataset.put_item(local_file_path, 'tiny.png') proto_dataset.put_readme("---\nproject: testing\n") proto_dataset.freeze() dataset = DataSet.from_uri(dest_uri) # Test HTTP manifest. http_manifest = dataset._storage_broker._generate_http_manifest( expiry=None) # NOQA assert "admin_metadata" in http_manifest assert http_manifest["admin_metadata"] == dataset._admin_metadata assert "overlays" in http_manifest assert "readme_url" in http_manifest assert "manifest_url" in http_manifest assert "item_urls" in http_manifest assert "annotations" in http_manifest assert "tags" in http_manifest assert set(http_manifest["item_urls"].keys()) == set(dataset.identifiers)
def test_fails_if_only_endpoint_is_set(tmp_uuid_and_uri): # NOQA from dtoolcore import ProtoDataSet, generate_admin_metadata bucket_name = S3_TEST_BASE_URI[5:] endpoint_key = "DTOOL_S3_ENDPOINT_{}".format(bucket_name) uuid, dest_uri = tmp_uuid_and_uri name = "my_dataset" admin_metadata = generate_admin_metadata(name) admin_metadata["uuid"] = uuid with tmp_env_var(endpoint_key, "https://s3.amazonaws.com"): with pytest.raises(RuntimeError): ProtoDataSet(dest_uri, admin_metadata)
def test_dataset_create_functional(chdir_fixture): # NOQA from dtool_create.dataset import create runner = CliRunner() dataset_name = "my_dataset" result = runner.invoke(create, [dataset_name]) assert result.exit_code == 0 # Test that the proto dataset has been created. dataset_abspath = os.path.abspath(dataset_name) dataset_uri = sanitise_uri(dataset_abspath) dataset = ProtoDataSet.from_uri(dataset_uri) # Test that the dataset name is correct. assert dataset.name == dataset_name
def test_copy_and_diff(tmp_uuid_and_uri): # NOQA uuid, dest_uri = tmp_uuid_and_uri import dtoolcore from dtoolcore import ProtoDataSet, generate_admin_metadata from dtoolcore import DataSet from dtoolcore.compare import ( diff_identifiers, diff_sizes, diff_content, ) name = "my_dataset" admin_metadata = generate_admin_metadata(name) admin_metadata["uuid"] = uuid sample_data_path = os.path.join(TEST_SAMPLE_DATA) local_file_path = os.path.join(sample_data_path, 'tiny.png') # Create a minimal dataset proto_dataset = ProtoDataSet(uri=dest_uri, admin_metadata=admin_metadata) proto_dataset.create() proto_dataset.put_readme(content='---\ndescription: test') proto_dataset.put_item(local_file_path, 'tiny.png') proto_dataset.freeze() remote_dataset = DataSet.from_uri(dest_uri) with tmp_directory() as local_dir: local_uri = dtoolcore.copy(dest_uri, local_dir) assert local_uri.startswith("file:/") local_dataset = DataSet.from_uri(local_uri) assert len(diff_identifiers(local_dataset, remote_dataset)) == 0 assert len(diff_sizes(local_dataset, remote_dataset)) == 0 assert len(diff_content(local_dataset, remote_dataset)) == 0
def test_dataset_create_can_work_outside_current_directory( tmp_dir_fixture): # NOQA from dtool_create.dataset import create runner = CliRunner() dataset_name = "my_dataset" dataset_path = os.path.join(tmp_dir_fixture, dataset_name) result = runner.invoke(create, [dataset_name, tmp_dir_fixture]) assert result.exit_code == 0 # Test that the dataset has been created. dataset_uri = sanitise_uri(dataset_path) dataset = ProtoDataSet.from_uri(dataset_uri) # Test that the dataset name is correct. assert dataset.name == dataset_name
def test_list_overlays_when_dir_missing(chdir_fixture): # NOQA """ This test simulates checking out a frozen dataset from Git that has no overlays written to it, i.e. where the ``.dtool/overlays`` directory is missing. See also: https://github.com/jic-dtool/dtoolcore/issues/3 """ from dtoolcore import ProtoDataSet, generate_admin_metadata from dtoolcore import DataSet from dtoolcore.storagebroker import DiskStorageBroker name = "my_dataset" admin_metadata = generate_admin_metadata(name) dest_uri = DiskStorageBroker.generate_uri(name=name, uuid=admin_metadata["uuid"], base_uri="file://.") sample_data_path = os.path.join(TEST_SAMPLE_DATA) local_file_path = os.path.join(sample_data_path, 'tiny.png') # Create a minimal dataset proto_dataset = ProtoDataSet(uri=dest_uri, admin_metadata=admin_metadata, config_path=None) proto_dataset.create() proto_dataset.put_item(local_file_path, 'tiny.png') proto_dataset.freeze() # Simulate the missing overlay directory. assert os.path.isdir(proto_dataset._storage_broker._overlays_abspath) os.rmdir(proto_dataset._storage_broker._overlays_abspath) assert not os.path.isdir(proto_dataset._storage_broker._overlays_abspath) dataset = DataSet.from_uri(proto_dataset.uri) # This call caused the bug. overlay_names = dataset.list_overlay_names() assert overlay_names == []
def test_uri_property_when_using_relpath(chdir_fixture): # NOQA from dtoolcore import ProtoDataSet, generate_admin_metadata from dtoolcore import DataSet from dtoolcore.storagebroker import DiskStorageBroker from dtoolcore.utils import (IS_WINDOWS, windows_to_unix_path, urlparse) name = "my_dataset" admin_metadata = generate_admin_metadata(name) dest_uri = DiskStorageBroker.generate_uri(name=name, uuid=admin_metadata["uuid"], base_uri=".") sample_data_path = os.path.join(TEST_SAMPLE_DATA) local_file_path = os.path.join(sample_data_path, 'tiny.png') # Create a minimal dataset proto_dataset = ProtoDataSet(uri=dest_uri, admin_metadata=admin_metadata, config_path=None) proto_dataset.create() proto_dataset.put_item(local_file_path, 'tiny.png') proto_dataset.freeze() dataset = DataSet.from_uri("my_dataset") abspath = os.path.abspath("my_dataset") if IS_WINDOWS: abspath = windows_to_unix_path(abspath) assert dataset.uri.startswith("file://") assert dataset.uri.endswith(abspath) parsed = urlparse(dataset.uri) if IS_WINDOWS: assert parsed.netloc == "" else: assert parsed.netloc != ""
def test_defect_imeta_fails_if_key_already_exists_regression( tmp_uuid_and_uri): # NOQA uuid, dest_uri = tmp_uuid_and_uri from dtoolcore import ProtoDataSet, generate_admin_metadata name = "my_dataset" admin_metadata = generate_admin_metadata(name) admin_metadata["uuid"] = uuid sample_data_path = os.path.join(TEST_SAMPLE_DATA) local_file_path = os.path.join(sample_data_path, 'tiny.png') # Create a minimal dataset proto_dataset = ProtoDataSet(uri=dest_uri, admin_metadata=admin_metadata, config_path=None) proto_dataset.create() proto_dataset.put_item(local_file_path, 'tiny.png') # When using ``imeta add`` the below raises SystemExit. proto_dataset.put_item(local_file_path, 'tiny.png')
def test_proto_dataset_freeze_functional(tmp_uuid_and_uri): # NOQA uuid, dest_uri = tmp_uuid_and_uri from dtoolcore import (generate_admin_metadata, DataSet, ProtoDataSet, DtoolCoreTypeError) from dtoolcore.utils import generate_identifier name = "func_test_dataset_freeze" admin_metadata = generate_admin_metadata(name) admin_metadata["uuid"] = uuid sample_data_path = os.path.join(TEST_SAMPLE_DATA) proto_dataset = ProtoDataSet(uri=dest_uri, admin_metadata=admin_metadata, config_path=None) proto_dataset.create() filenames = ['tiny.png', 'actually_a_png.txt', 'another_file.txt'] for filename in filenames: local_file_path = os.path.join(sample_data_path, filename) proto_dataset.put_item(local_file_path, filename) proto_dataset.add_item_metadata(filename, 'namelen', len(filename)) proto_dataset.add_item_metadata(filename, 'firstletter', filename[0]) # At this point the temporary fragments should exist. assert _prefix_contains_something( proto_dataset._storage_broker, proto_dataset._storage_broker.fragments_key_prefix) proto_dataset.put_readme(content='Hello world!') # We shouldn't be able to load this as a DataSet with pytest.raises(DtoolCoreTypeError): DataSet.from_uri(dest_uri) proto_dataset.freeze() # Freezing removes the temporary metadata fragments. assert not _prefix_contains_something( proto_dataset._storage_broker, proto_dataset._storage_broker.fragments_key_prefix) # Now we shouln't be able to load as a ProtoDataSet with pytest.raises(DtoolCoreTypeError): ProtoDataSet.from_uri(dest_uri) # But we can as a DataSet dataset = DataSet.from_uri(dest_uri) assert dataset.name == 'func_test_dataset_freeze' # Test identifiers expected_identifiers = map(generate_identifier, filenames) assert set(dataset.identifiers) == set(expected_identifiers) # Test readme contents assert dataset.get_readme_content() == "Hello world!" # Test item expected_identifier = generate_identifier('tiny.png') expected_hash = md5sum_hexdigest(os.path.join(sample_data_path, 'tiny.png')) item_properties = dataset.item_properties(expected_identifier) assert item_properties['relpath'] == 'tiny.png' assert item_properties['size_in_bytes'] == 276 assert item_properties['hash'] == expected_hash # Test accessing item expected_identifier = generate_identifier('another_file.txt') fpath = dataset.item_content_abspath(expected_identifier) with open(fpath) as fh: contents = fh.read() assert contents == "Hello\n" # Test overlays have been created properly namelen_overlay = dataset.get_overlay('namelen') expected_identifier = generate_identifier('another_file.txt') assert namelen_overlay[expected_identifier] == len('another_file.txt')