def main(): parser = argparse.ArgumentParser() parser.add_argument('--dataset-uri') parser.add_argument('--resource-uri') parser.add_argument('--identifier') parser.add_argument('--output-uri') args = parser.parse_args() dataset = DataSet.from_uri(args.dataset_uri) resource_dataset = DataSet.from_uri(args.resource_uri) output_dataset = ProtoDataSet.from_uri(args.output_uri) with temp_working_dir() as working_dir: outputs = separate_plots( dataset, args.identifier, resource_dataset, working_dir ) overlays_to_copy = ['ordering', 'date'] stage_outputs( outputs, working_dir, dataset, output_dataset, overlays_to_copy, args.identifier )
def test_item_local_abspath_with_clean_cache(tmp_uuid_and_uri): # NOQA uuid, dest_uri = tmp_uuid_and_uri from dtoolcore import ProtoDataSet, generate_admin_metadata from dtoolcore import DataSet from dtoolcore.utils import generate_identifier name = "my_dataset" admin_metadata = generate_admin_metadata(name) admin_metadata["uuid"] = uuid sample_data_path = os.path.join(TEST_SAMPLE_DATA) local_file_path = os.path.join(sample_data_path, 'tiny.png') # Create a minimal dataset proto_dataset = ProtoDataSet(uri=dest_uri, admin_metadata=admin_metadata, config_path=CONFIG_PATH) proto_dataset.create() proto_dataset.put_item(local_file_path, 'tiny.png') proto_dataset.freeze() # Read in a dataset dataset = DataSet.from_uri(dest_uri, config_path=CONFIG_PATH) identifier = generate_identifier('tiny.png') with tmp_directory() as cache_dir: with tmp_env_var("DTOOL_AZURE_CACHE_DIRECTORY", cache_dir): dataset = DataSet.from_uri(dest_uri, config_path=CONFIG_PATH) fpath = dataset.item_content_abspath(identifier) assert os.path.isfile(fpath)
def test_http_enable_with_presigned_url(tmp_uuid_and_uri): # NOQA uuid, dest_uri = tmp_uuid_and_uri from dtoolcore import ProtoDataSet, generate_admin_metadata from dtoolcore import DataSet name = "my_dataset" admin_metadata = generate_admin_metadata(name) admin_metadata["uuid"] = uuid sample_data_path = os.path.join(TEST_SAMPLE_DATA) local_file_path = os.path.join(sample_data_path, 'tiny.png') # Create a minimal dataset proto_dataset = ProtoDataSet(uri=dest_uri, admin_metadata=admin_metadata, config_path=None) proto_dataset.create() proto_dataset.put_item(local_file_path, 'tiny.png') proto_dataset.put_readme("---\nproject: testing\n") proto_dataset.freeze() dataset = DataSet.from_uri(dest_uri) # Add an annotation. dataset.put_annotation("project", "dtool-testing") # Add tags. dataset.put_tag("amazing") dataset.put_tag("stuff") with tmp_env_var("DTOOL_S3_PUBLISH_EXPIRY", "120"): access_url = dataset._storage_broker.http_enable() assert access_url.find("?") != -1 # This is a presigned URL dataset. assert access_url.startswith("https://") dataset_from_http = DataSet.from_uri(access_url) # Assert that the annotation has been copied across. assert dataset_from_http.get_annotation("project") == "dtool-testing" # Asser that the tags are available. assert dataset_from_http.list_tags() == ["amazing", "stuff"] from dtoolcore.compare import (diff_identifiers, diff_sizes, diff_content) assert len(diff_identifiers(dataset, dataset_from_http)) == 0 assert len(diff_sizes(dataset, dataset_from_http)) == 0 assert len(diff_content(dataset_from_http, dataset)) == 0 # Make sure that all the URLs in the manifest are presigned. http_manifest = dataset_from_http._storage_broker.http_manifest assert http_manifest["manifest_url"].find("?") != -1 assert http_manifest["readme_url"].find("?") != -1 for url in http_manifest["item_urls"].values(): assert url.find("?") != -1 for url in http_manifest["annotations"].values(): assert url.find("?") != -1
def test_diff_sizes(tmp_uri_fixture): # NOQA from dtoolcore import ( DataSet, generate_admin_metadata, generate_proto_dataset, ) from dtoolcore.utils import generate_identifier from dtoolcore.compare import diff_sizes fpaths = create_test_files(tmp_uri_fixture) proto_ds_a = generate_proto_dataset( admin_metadata=generate_admin_metadata("test_compare_1"), base_uri=tmp_uri_fixture) proto_ds_a.create() proto_ds_a.put_item(fpaths["he"], "file.txt") proto_ds_a.freeze() proto_ds_b = generate_proto_dataset( admin_metadata=generate_admin_metadata("test_compare_2"), base_uri=tmp_uri_fixture) proto_ds_b.create() proto_ds_b.put_item(fpaths["she"], "file.txt") proto_ds_b.freeze() ds_a = DataSet.from_uri(proto_ds_a.uri) ds_b = DataSet.from_uri(proto_ds_b.uri) assert diff_sizes(ds_a, ds_a) == [] expected = [ (generate_identifier("file.txt"), 2, 3), ] assert diff_sizes(ds_a, ds_b) == expected
def test_windows_abspath_uri(tmp_dir_fixture): # NOQA from dtoolcore import DataSet, DataSetCreator from dtoolcore.utils import IS_WINDOWS with DataSetCreator("tmp_ds", tmp_dir_fixture): pass path = os.path.abspath(os.path.join(tmp_dir_fixture, "tmp_ds")) uri = "file://" + path if IS_WINDOWS: # Example Win URI: file:///C:/some/path/to/ds. # Note that "C:" is part of the path. uri = "file:///" + path.replace("\\", "/") DataSet.from_uri(uri)
def analyse_dataset(dataset_dir, output_dir): """Analyse all the files in the dataset.""" dataset = DataSet.from_uri(dataset_dir) logging.info("Analysing items in dataset: {}".format(dataset.name)) for i in dataset.identifiers: analyse_item(dataset_dir, output_dir, i)
def cli(dataset_uri): dataset = DataSet.from_uri(dataset_uri) create_illumina_metadata_overlay(dataset) create_read1_overlay(dataset) create_pair_id_overlay(dataset)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--dataset-uri', help='Dataset URI') parser.add_argument('--identifier', help='Identifier (hash) to process') parser.add_argument( '--output-uri', help='Output dataset uri' ) args = parser.parse_args() dataset = DataSet.from_uri(args.dataset_uri) output_dataset = ProtoDataSet.from_uri(args.output_uri) with temp_working_dir() as working_dir: outputs = segment_single_identifier( dataset, args.identifier, working_dir ) overlays_to_copy = ['coords', 'ordering', 'useful_name'] stage_outputs( outputs, working_dir, dataset, output_dataset, overlays_to_copy, args.identifier )
def main(dataset_path): dataset = DataSet.from_uri(dataset_path) create_is_csv_overlay(dataset) def info_from_identifier(identifier): relpath = dataset.item_properties(identifier)['relpath'] # label1 = "5A_CB_Rep3_T2" # compound = relpath # compound, _ = relpath.split('/') print(relpath) label1, compound, _ = relpath.split('/') try: label2, label3 = compound.rsplit('-', 1) except ValueError: label2, label3 = "?", "?" return label1, label2, label3 fpaths_and_extra_data = [ (dataset.item_content_abspath(identifier), info_from_identifier(identifier)) for identifier in identifiers_where_overlay_is_true(dataset, "is_csv") ] import pprint pprint.pprint(fpaths_and_extra_data) build_master_csv(fpaths_and_extra_data, 'all_cells.csv')
def test_basic_workflow(tmp_dir_fixture): # NOQA from dtoolcore import ProtoDataSet, generate_admin_metadata from dtoolcore import DataSet from dtoolcore.utils import generate_identifier from dtoolcore.storagebroker import DiskStorageBroker name = "my_dataset" admin_metadata = generate_admin_metadata(name) dest_uri = DiskStorageBroker.generate_uri( name=name, uuid=admin_metadata["uuid"], base_uri=tmp_dir_fixture) sample_data_path = os.path.join(TEST_SAMPLE_DATA) local_file_path = os.path.join(sample_data_path, 'tiny.png') # Create a minimal dataset proto_dataset = ProtoDataSet( uri=dest_uri, admin_metadata=admin_metadata, config_path=None) proto_dataset.create() proto_dataset.put_item(local_file_path, 'tiny.png') proto_dataset.freeze() # Read in a dataset dataset = DataSet.from_uri(dest_uri) expected_identifier = generate_identifier('tiny.png') assert expected_identifier in dataset.identifiers assert len(dataset.identifiers) == 1
def main(dataset_uri): dataset = DataSet.from_uri(dataset_uri) app.dataset = dataset app.tags = {} app.image_generator = dataset_image_generator(dataset) im, app.current_id = next(app.image_generator) app.image = scene.visuals.Image(im, parent=view.scene) app.counter = 0 textstr = "Image {}".format(app.counter) t1 = scene.visuals.Text(textstr, parent=app.image, color='red', pos=(30, 5)) t1.font_size = 24 app.t1 = t1 view.camera = scene.PanZoomCamera(aspect=1) view.camera.set_range() view.camera.flip = (False, True, False) app.run()
def test_dataset_freeze_functional(chdir_fixture): # NOQA from dtool_create.dataset import create, freeze, add runner = CliRunner() dataset_name = "my_dataset" result = runner.invoke(create, [dataset_name]) assert result.exit_code == 0 # At this point we have a proto dataset dataset_abspath = os.path.abspath(dataset_name) dataset_uri = sanitise_uri(dataset_abspath) dataset = ProtoDataSet.from_uri(dataset_uri) # Create sample file to the proto dataset. sample_file_name = "hello.txt" with open(sample_file_name, "w") as fh: fh.write("hello world") # Put it into the dataset result = runner.invoke(add, ["item", sample_file_name, dataset_uri]) assert result.exit_code == 0 result = runner.invoke(freeze, [dataset_uri]) assert result.exit_code == 0 # Now we have a dataset. dataset = DataSet.from_uri(dataset_uri) # Manifest has been updated. assert len(dataset.identifiers) == 1
def test_iter_datasets_in_base_uri(tmp_uri_fixture): # NOQA from dtoolcore import ( create_proto_dataset, DataSet, ProtoDataSet, iter_datasets_in_base_uri, iter_proto_datasets_in_base_uri, ) # Create a proto dataset. proto_ds = create_proto_dataset("proto", tmp_uri_fixture) # Create a proto dataset. _frozen_ds = create_proto_dataset("frozen", tmp_uri_fixture) _frozen_ds.freeze() frozen_ds = DataSet.from_uri(_frozen_ds.uri) from_iter_datasets = list(iter_datasets_in_base_uri(tmp_uri_fixture)) from_iter_proto_datasets = list( iter_proto_datasets_in_base_uri(tmp_uri_fixture)) # NOQA assert len(from_iter_datasets) == 1 assert len(from_iter_datasets) == 1 assert isinstance(from_iter_datasets[0], DataSet) assert isinstance(from_iter_proto_datasets[0], ProtoDataSet) assert proto_ds.uri == from_iter_proto_datasets[0].uri assert frozen_ds.uri == from_iter_datasets[0].uri
def __init__(self): parser = argparse.ArgumentParser() parser.add_argument( '-d', '--dataset', help='URI of input dataset' ) parser.add_argument( '-i', '--identifier', help='Identifier (hash) to process' ) parser.add_argument( '-o', '--output-dataset', help='URI of output dataset' ) args = parser.parse_args() self.input_dataset = DataSet.from_uri(args.dataset) self.output_dataset = ProtoDataSet.from_uri(args.output_dataset) self.identifier = args.identifier
def __init__(self): parser = argparse.ArgumentParser() parser.add_argument( '-d', '--dataset-uri', help='URI of input dataset' ) parser.add_argument( '-i', '--identifier', help='Identifier to process' ) parser.add_argument( '-o', '--output-dataset-uri', help='URI of output dataset' ) args = parser.parse_args() self.input_dataset = DataSet.from_uri(args.dataset_uri) self.output_dataset = ProtoDataSet.from_uri(args.output_dataset_uri) self.identifier = args.identifier
def test_basic_workflow_on_first_namespace(tmp_uuid_and_uri): # NOQA uuid, dest_uri = tmp_uuid_and_uri from dtoolcore import ProtoDataSet, generate_admin_metadata from dtoolcore import DataSet from dtoolcore.utils import generate_identifier name = "my_dataset" admin_metadata = generate_admin_metadata(name) admin_metadata["uuid"] = uuid sample_data_path = os.path.join(TEST_SAMPLE_DATA) local_file_path = os.path.join(sample_data_path, 'tiny.png') # Create a minimal dataset proto_dataset = ProtoDataSet( uri=dest_uri, admin_metadata=admin_metadata, config_path=None) proto_dataset.create() proto_dataset.put_item(local_file_path, 'tiny.png') proto_dataset.freeze() # Read in a dataset dataset = DataSet.from_uri(dest_uri) expected_identifier = generate_identifier('tiny.png') assert expected_identifier in dataset.identifiers assert len(dataset.identifiers) == 1
def test_dataset_summary_json_functional(): from dtoolcore import DataSet from dtool_info.dataset import summary # Create expected output. lion_ds = DataSet.from_uri(lion_dataset_uri) tot_size = sum([ lion_ds.item_properties(i)["size_in_bytes"] for i in lion_ds.identifiers ]) expected = { "name": lion_ds.name, "uuid": lion_ds.uuid, "number_of_items": len(lion_ds.identifiers), "size_in_bytes": tot_size, "creator_username": lion_ds._admin_metadata["creator_username"], "frozen_at": lion_ds._admin_metadata["frozen_at"], } runner = CliRunner() result = runner.invoke(summary, ["--format", "json", lion_dataset_uri]) assert result.exit_code == 0 actual = json.loads(result.output) assert expected == actual
def main(dataset_uri): dataset = DataSet.from_uri(dataset_uri) display.im_ids = iter(identifiers_where_overlay_is_true(dataset, "is_image")) imid = next(display.im_ids) display.mask_overlay = dataset.get_overlay("mask_ids") im = imread(dataset.item_content_abspath(imid)) mask_im = imread(dataset.item_content_abspath(display.mask_overlay[imid])) display.dataset = dataset display.image = scene.visuals.Image(im, parent=view.scene) display.mask_image = scene.visuals.Image(mask_im, parent=view.scene) display.mask_image.visible = False textstr = display.dataset.item_properties(imid)['relpath'] t1 = scene.visuals.Text(textstr, parent=display.image, color='red', pos=(30,5)) t1.font_size = 24 display.t1 = t1 view.camera = scene.PanZoomCamera(aspect=1) view.camera.set_range() view.camera.flip = (False, True, False) app.run()
def test_uri_property_when_using_relpath(chdir_fixture): # NOQA from dtoolcore import ProtoDataSet, generate_admin_metadata from dtoolcore import DataSet from dtoolcore.storagebroker import DiskStorageBroker name = "my_dataset" admin_metadata = generate_admin_metadata(name) dest_uri = DiskStorageBroker.generate_uri(name=name, uuid=admin_metadata["uuid"], prefix=".") sample_data_path = os.path.join(TEST_SAMPLE_DATA) local_file_path = os.path.join(sample_data_path, 'tiny.png') # Create a minimal dataset proto_dataset = ProtoDataSet(uri=dest_uri, admin_metadata=admin_metadata, config_path=None) proto_dataset.create() proto_dataset.put_item(local_file_path, 'tiny.png') proto_dataset.freeze() dataset = DataSet.from_uri("./my_dataset") expected_uri = "file://" + os.path.abspath("my_dataset") assert dataset.uri == expected_uri
def test_annotations(tmp_uuid_and_uri): # NOQA uuid, dest_uri = tmp_uuid_and_uri from dtoolcore import ProtoDataSet, generate_admin_metadata from dtoolcore import DataSet name = "my_dataset" admin_metadata = generate_admin_metadata(name) admin_metadata["uuid"] = uuid sample_data_path = os.path.join(TEST_SAMPLE_DATA) local_file_path = os.path.join(sample_data_path, 'tiny.png') # Create a minimal dataset proto_dataset = ProtoDataSet(uri=dest_uri, admin_metadata=admin_metadata, config_path=None) proto_dataset.create() proto_dataset.put_item(local_file_path, 'tiny.png') proto_dataset.freeze() # Read in a dataset dataset = DataSet.from_uri(dest_uri) assert dataset.list_annotation_names() == [] dataset.put_annotation("project", "demo") assert dataset.get_annotation("project") == "demo" assert dataset.list_annotation_names() == ["project"]
def __init__(self, uri): self.dataset = DataSet.from_uri(uri) self.tags = { n: 'Untagged' for n in list(range(len(self.dataset.identifiers))) } self.load_times = {} self.tag_times = {}
def test_prefix_functional(): # NOQA from dtoolcore import DataSetCreator from dtoolcore import DataSet, iter_datasets_in_base_uri # Create a minimal dataset without a prefix with tmp_env_var("DTOOL_S3_DATASET_PREFIX", ""): with DataSetCreator("no-prefix", S3_TEST_BASE_URI) as ds_creator: ds_creator.put_annotation("prefix", "no") no_prefix_uri = ds_creator.uri dataset_no_prefix = DataSet.from_uri(no_prefix_uri) # Basic test that retrieval works. assert dataset_no_prefix.get_annotation("prefix") == "no" # Basic test that prefix is correct. structure_key = dataset_no_prefix._storage_broker.get_structure_key() assert structure_key.startswith(dataset_no_prefix.uuid) # Create a minimal dataset prefix = "u/olssont/" with tmp_env_var("DTOOL_S3_DATASET_PREFIX", prefix): with DataSetCreator("no-prefix", S3_TEST_BASE_URI) as ds_creator: ds_creator.put_annotation("prefix", "yes") prefix_uri = ds_creator.uri dataset_with_prefix = DataSet.from_uri(prefix_uri) # Basic test that retrieval works. assert dataset_with_prefix.get_annotation("prefix") == "yes" # Basic test that prefix is correct. structure_key = dataset_with_prefix._storage_broker.get_structure_key() assert structure_key.startswith(prefix) # Basic tests that everything can be picked up. dataset_uris = list(ds.uri for ds in iter_datasets_in_base_uri(S3_TEST_BASE_URI)) assert dataset_no_prefix.uri in dataset_uris assert dataset_with_prefix.uri in dataset_uris _remove_dataset(dataset_no_prefix.uri) _remove_dataset(dataset_with_prefix.uri)
def main(input_dataset_uri, output_dspath, param_json): output_base_uri = os.path.dirname(output_dspath) output_name = os.path.basename(output_dspath) input_ds = DataSet.from_uri(input_dataset_uri) params = Parameters.from_json_string(param_json) with DerivedDataSet(output_base_uri, output_name, input_ds) as output_ds: train_and_save_results(input_ds, output_ds, params)
def test_http_enable(tmp_uuid_and_uri): # NOQA uuid, dest_uri = tmp_uuid_and_uri from dtoolcore import ProtoDataSet, generate_admin_metadata from dtoolcore import DataSet name = "my_dataset" admin_metadata = generate_admin_metadata(name) admin_metadata["uuid"] = uuid sample_data_path = os.path.join(TEST_SAMPLE_DATA) local_file_path = os.path.join(sample_data_path, 'tiny.png') # Create a minimal dataset proto_dataset = ProtoDataSet( uri=dest_uri, admin_metadata=admin_metadata, config_path=None) proto_dataset.create() proto_dataset.put_item(local_file_path, 'tiny.png') proto_dataset.put_readme("---\nproject: testing\n") proto_dataset.freeze() dataset = DataSet.from_uri(dest_uri) access_url = dataset._storage_broker.http_enable() assert access_url.startswith("https://") dataset_from_http = DataSet.from_uri(access_url) from dtoolcore.compare import ( diff_identifiers, diff_sizes, diff_content ) assert len(diff_identifiers(dataset, dataset_from_http)) == 0 assert len(diff_sizes(dataset, dataset_from_http)) == 0 assert len(diff_content(dataset_from_http, dataset)) == 0
def test_diff_content(tmp_dir_fixture): # NOQA from dtoolcore import ( DataSet, generate_admin_metadata, generate_proto_dataset, ) from dtoolcore.utils import generate_identifier from dtoolcore.compare import diff_content from dtoolcore.storagebroker import DiskStorageBroker fpaths = create_test_files(tmp_dir_fixture) proto_ds_a = generate_proto_dataset( admin_metadata=generate_admin_metadata("test_compare_1"), prefix=tmp_dir_fixture, storage="file") proto_ds_a.create() proto_ds_a.put_item(fpaths["cat"], "file.txt") proto_ds_a.freeze() proto_ds_b = generate_proto_dataset( admin_metadata=generate_admin_metadata("test_compare_2"), prefix=tmp_dir_fixture, storage="file") proto_ds_b.create() proto_ds_b.put_item(fpaths["she"], "file.txt") proto_ds_b.freeze() ds_a = DataSet.from_uri(proto_ds_a.uri) ds_b = DataSet.from_uri(proto_ds_b.uri) assert diff_content(ds_a, ds_a) == [] identifier = generate_identifier("file.txt") expected = [ (generate_identifier("file.txt"), DiskStorageBroker.hasher(ds_a.item_content_abspath(identifier)), DiskStorageBroker.hasher(ds_b.item_content_abspath(identifier))) ] assert diff_content(ds_a, ds_b) == expected
def test_dataset_ls_functional(): from dtoolcore import DataSet from dtool_info.dataset import ls # Create one expected line. lion_ds = DataSet.from_uri(lion_dataset_uri) runner = CliRunner() result = runner.invoke(ls, [SAMPLE_DATASETS_DIR]) assert result.exit_code == 0 assert result.output.find(lion_ds.name) != -1
def test_tags_functional(tmp_uuid_and_uri): # NOQA uuid, dest_uri = tmp_uuid_and_uri from dtoolcore import ProtoDataSet, generate_admin_metadata from dtoolcore import DataSet name = "my_dataset" admin_metadata = generate_admin_metadata(name) admin_metadata["uuid"] = uuid # Create a minimal dataset proto_dataset = ProtoDataSet( uri=dest_uri, admin_metadata=admin_metadata, config_path=None) proto_dataset.create() # Test put_tag on proto dataset. proto_dataset.put_tag("testing") proto_dataset.freeze() dataset = DataSet.from_uri(proto_dataset.uri) assert dataset.list_tags() == ["testing"] dataset.put_tag("amazing") dataset.put_tag("stuff") assert dataset.list_tags() == ["amazing", "stuff", "testing"] dataset.delete_tag("stuff") assert dataset.list_tags() == ["amazing", "testing"] # Putting the same tag is idempotent. dataset.put_tag("amazing") dataset.put_tag("amazing") dataset.put_tag("amazing") assert dataset.list_tags() == ["amazing", "testing"] # Tags can only be strings. from dtoolcore import DtoolCoreValueError with pytest.raises(DtoolCoreValueError): dataset.put_tag(1) # Tags need to adhere to the utils.name_is_valid() rules. from dtoolcore import DtoolCoreInvalidNameError with pytest.raises(DtoolCoreInvalidNameError): dataset.put_tag("!invalid") # Deleting a non exiting tag does not raise. It silently succeeds. dataset.delete_tag("dontexist")
def test_dataset_uuid_functional(): from dtoolcore import DataSet from dtool_info.dataset import uuid # Create expected output. lion_ds = DataSet.from_uri(lion_dataset_uri) expected_uuid = lion_ds.uuid runner = CliRunner() result = runner.invoke(uuid, [lion_dataset_uri]) assert result.exit_code == 0 assert result.output.strip() == expected_uuid
def analyse_dataset(dataset_dir, output_dir): """Analyse all the files in the dataset.""" dataset = DataSet.from_uri(dataset_dir) logging.info("Analysing items in dataset: {}".format(dataset.name)) for i in dataset.identifiers: data_item_abspath = dataset.item_content_abspath(i) item_info = dataset.item_properties(i) specific_output_dir = item_output_path( output_dir, item_info["relpath"] ) analyse_file(data_item_abspath, specific_output_dir)
def test_basic_workflow_with_nested_handle(tmp_dir_fixture): # NOQA from dtoolcore import ProtoDataSet, generate_admin_metadata from dtoolcore import DataSet from dtoolcore.utils import generate_identifier from dtoolcore.storagebroker import DiskStorageBroker name = "my_dataset" admin_metadata = generate_admin_metadata(name) dest_uri = DiskStorageBroker.generate_uri( name=name, uuid=admin_metadata["uuid"], base_uri=tmp_dir_fixture) sample_data_path = os.path.join(TEST_SAMPLE_DATA) local_file_path = os.path.join(sample_data_path, 'tiny.png') handle = "subdir/tiny.png" # Create a minimal dataset proto_dataset = ProtoDataSet( uri=dest_uri, admin_metadata=admin_metadata, config_path=None) proto_dataset.create() proto_dataset.put_item(local_file_path, handle) proto_dataset.freeze() # Read in a dataset dataset = DataSet.from_uri(dest_uri) expected_identifier = generate_identifier(handle) assert expected_identifier in dataset.identifiers assert len(dataset.identifiers) == 1 # Ensure that the file exists in the disk dataset. # Particularly on Windows. item_abspath = os.path.join( tmp_dir_fixture, name, "data", "subdir", "tiny.png" ) assert os.path.isfile(item_abspath) assert os.path.isfile(dataset.item_content_abspath(expected_identifier)) # Ensure that the correct abspath is returned. # Particularly on Windows. assert dataset.item_content_abspath(expected_identifier) == item_abspath # NOQA
def test_diff_identifiers(tmp_dir_fixture): # NOQA from dtoolcore import ( DataSet, generate_admin_metadata, generate_proto_dataset, ) from dtoolcore.utils import generate_identifier from dtoolcore.compare import diff_identifiers fpaths = create_test_files(tmp_dir_fixture) proto_ds_a = generate_proto_dataset( admin_metadata=generate_admin_metadata("test_compare_1"), prefix=tmp_dir_fixture, storage="file") proto_ds_a.create() proto_ds_a.put_item(fpaths["cat"], "a.txt") proto_ds_a.freeze() proto_ds_b = generate_proto_dataset( admin_metadata=generate_admin_metadata("test_compare_2"), prefix=tmp_dir_fixture, storage="file") proto_ds_b.create() proto_ds_b.put_item(fpaths["cat"], "b.txt") proto_ds_b.freeze() ds_a = DataSet.from_uri(proto_ds_a.uri) ds_b = DataSet.from_uri(proto_ds_b.uri) assert diff_identifiers(ds_a, ds_a) == [] expected = [(generate_identifier("a.txt"), True, False), (generate_identifier("b.txt"), False, True)] assert diff_identifiers(ds_a, ds_b) == expected
def test_dataset_item_fetch_functional(): from dtoolcore import DataSet from dtool_info.dataset import item # Create expected output. lion_ds = DataSet.from_uri(lion_dataset_uri) expected = lion_ds.item_content_abspath(item_identifier) runner = CliRunner() result = runner.invoke(item, ["fetch", lion_dataset_uri, item_identifier]) assert result.exit_code == 0 assert expected == result.output.strip()
def main(dataset_uri, config_path=None): dataset = DataSet.from_uri(dataset_uri, config_path=config_path) def name_from_identifier(identifier): item_properties = dataset.item_properties(identifier) name = item_properties['relpath'].rsplit('.', 1)[0] return name useful_name_overlay = { identifier: name_from_identifier(identifier) for identifier in dataset.identifiers } dataset.put_overlay("useful_name", useful_name_overlay)
def test_copy_and_diff(tmp_uuid_and_uri): # NOQA uuid, dest_uri = tmp_uuid_and_uri import dtoolcore from dtoolcore import ProtoDataSet, generate_admin_metadata from dtoolcore import DataSet from dtoolcore.compare import ( diff_identifiers, diff_sizes, diff_content, ) name = "my_dataset" admin_metadata = generate_admin_metadata(name) admin_metadata["uuid"] = uuid sample_data_path = os.path.join(TEST_SAMPLE_DATA) local_file_path = os.path.join(sample_data_path, 'tiny.png') # Create a minimal dataset proto_dataset = ProtoDataSet(uri=dest_uri, admin_metadata=admin_metadata) proto_dataset.create() proto_dataset.put_readme(content='---\ndescription: test') proto_dataset.put_item(local_file_path, 'tiny.png') proto_dataset.freeze() remote_dataset = DataSet.from_uri(dest_uri) with tmp_directory() as local_dir: local_uri = dtoolcore.copy(dest_uri, local_dir) assert local_uri.startswith("file:/") local_dataset = DataSet.from_uri(local_uri) assert len(diff_identifiers(local_dataset, remote_dataset)) == 0 assert len(diff_sizes(local_dataset, remote_dataset)) == 0 assert len(diff_content(local_dataset, remote_dataset)) == 0