def test_upload_from_config_bad_version(): config = yaml.safe_load( """ post: - target: 'end_point_1' data: version: '1' """ ) with patch("requests.get") as get: get.return_value = MockResponse([]) with pytest.raises(ValueError): upload_from_config(config, DATA_REGISTRY_URL, TOKEN)
def test_upload_from_config_with_post_present(): config = yaml.safe_load( """ post: - target: 'end_point_1' data: name: 'B' description: 'posted B' """ ) with patch("requests.get") as get: with patch("requests.post") as post: get.return_value = MockResponse([{"name": "B", "description": "initial B", "url": "mock_url_b"}]) upload_from_config(config, DATA_REGISTRY_URL, TOKEN) post.assert_not_called()
def test_upload_from_config_with_patch_not_present(): config = yaml.safe_load( """ patch: - target: 'end_point_1' data: name: 'A' description: 'patched A' """ ) with patch("requests.get") as get: with patch("requests.patch") as rpatch: get.return_value = MockResponse([]) upload_from_config(config, DATA_REGISTRY_URL, TOKEN) rpatch.assert_not_called()
def test_upload_from_config_good_version(): config = yaml.safe_load( """ post: - target: 'end_point_1' data: version: '1.1.1' """ ) with patch("requests.get") as get: with patch("requests.post") as post: get.return_value = MockResponse([]) upload_from_config(config, DATA_REGISTRY_URL, TOKEN) post.assert_called_once_with( get_end_point(DATA_REGISTRY_URL, "end_point_1"), data={"version": "1.1.1"}, headers=get_headers(TOKEN), )
def test_upload_from_config_with_patch_present(): config = yaml.safe_load( """ patch: - target: 'end_point_1' data: name: 'A' description: 'patched A' """ ) with patch("requests.get") as get: with patch("requests.patch") as rpatch: get.return_value = MockResponse([{"name": "A", "description": "initial A", "url": "mock_url_a"}]) upload_from_config(config, DATA_REGISTRY_URL, TOKEN) rpatch.assert_called_once_with( "mock_url_a", data={"name": "A", "description": "patched A"}, headers=get_headers(TOKEN) )
def test_upload_from_config_with_post_not_present(): config = yaml.safe_load( """ post: - target: 'end_point_1' data: name: 'B' description: 'posted B' """ ) with patch("requests.get") as get: with patch("requests.post") as post: get.return_value = MockResponse([]) upload_from_config(config, DATA_REGISTRY_URL, TOKEN) post.assert_called_once_with( get_end_point(DATA_REGISTRY_URL, "end_point_1"), data={"name": "B", "description": "posted B"}, headers=get_headers(TOKEN), )
def upload_model_run( config_filename: Union[Path, str], model_config_filename: Union[Path, str], submission_script_filename: Union[Path, str], remote_options: Dict[str, str], token: str, text_file_table: bool = True, ) -> None: """ Reads the provided configuration files and then calls PATCH or POST with the data to the data registry as appropriate, resolving references to other data where required. :param config_filename: file path to the configuration file :param model_config_filename: file path to the model configuration file :param submission_script_filename: file path to the submission script file :param remote_options: (key, value) pairs that are passed to the remote storage, e.g. credentials :param token: personal access token :param text_file_table: If true, model_config and submission_script are uploaded to the text_file table in the data_registry """ config_filename = Path(config_filename) with open(config_filename, "r") as cf: config = yaml.safe_load(cf) accessibility = _get_accessibility(config) run_metadata = config["run_metadata"] data_directory = Path(run_metadata[RunMetadata.data_directory]) if not data_directory.is_absolute(): data_directory = config_filename.parent / data_directory run_id = run_metadata[RunMetadata.run_id] namespace = run_metadata.get(RunMetadata.default_output_namespace) model_version_str = run_metadata.get(RunMetadata.model_version) model_name = run_metadata.get(RunMetadata.model_name) open_timestamp = run_metadata[RunMetadata.open_timestamp] model_git_sha = run_metadata[RunMetadata.git_sha] model_uri = run_metadata[RunMetadata.git_repo] remote_uri = run_metadata[RunMetadata.remote_uri] remote_uri_override = run_metadata.get(RunMetadata.remote_uri_override, remote_uri) data_registry_url = run_metadata.get( RunMetadata.data_registry_url, get_data_registry_url() ) description = run_metadata[RunMetadata.description] inputs = [] outputs = [] posts = [] storage_root = _add_storage_root( posts, remote_uri_override, accessibility, data_registry_url, token ) code_repo = _add_code_repo( posts, model_git_sha, model_uri, model_name, model_version_str ) for event in config["io"]: read = event["type"] == "read" metadata = event["access_metadata"] component = metadata.get(MetadataKey.component) version = metadata.get(MetadataKey.version, "") access_calculated_hash = metadata[MetadataKey.calculated_hash] filename = data_directory / Path(metadata[MetadataKey.filename]) if MetadataKey.data_product in metadata: data_product_name = metadata[MetadataKey.data_product] event_namespace = metadata.get(MetadataKey.namespace, namespace) if event_namespace is None: raise ValueError(f"No namespace specified for {event}") if read: inputs.append( _get_data_product_url( data_product_name, event_namespace, version, component, data_registry_url, token, ) ) else: _verify_hash(filename, access_calculated_hash) path = upload_to_storage( remote_uri, remote_options, data_directory, filename, path_prefix=namespace ) object_component = _add_data_product_output_posts( posts, path, data_product_name, event_namespace, run_id, component, access_calculated_hash, storage_root, ) outputs.append(object_component) elif MetadataKey.doi_or_unique_name in metadata: doi_or_unique_name = metadata[MetadataKey.doi_or_unique_name] if read: inputs.append( _get_external_object_url( doi_or_unique_name, version, component, data_registry_url, token ) ) else: raise ValueError("can only read external objects") else: raise ValueError( "metadata did not contain a data product or an external object" ) if text_file_table: model_config_obj = _upload_file_to_text_table( posts, model_config_filename, data_registry_url, token ) submission_script_obj = _upload_file_to_text_table( posts, submission_script_filename, data_registry_url, token ) else: model_config_obj = _upload_file_to_storage( posts, model_config_filename, remote_uri, remote_options, storage_root, namespace ) submission_script_obj = _upload_file_to_storage( posts, submission_script_filename, remote_uri, remote_options, storage_root, namespace ) _add_model_run( posts, run_id, open_timestamp, inputs, outputs, model_config_obj, submission_script_obj, code_repo, description, ) posts = unique_dicts(posts) upload_from_config({"post": posts}, data_registry_url, token)
def upload_data_product_cli( data_product_path, namespace, storage_root_name, storage_location_path, accessibility, data_product_name, data_product_description, data_product_version, component, data_registry, token, remote_uri, remote_option, remote_uri_override, ): configure_cli_logging() template_file = Path(__file__).parent / Path("templates/data_product.yaml") with open(template_file, "r") as f: template = f.read() data_registry = data_registry or DEFAULT_DATA_REGISTRY_URL remote_uri_override = remote_uri_override or remote_uri remote_uri = remote_uri.strip() remote_uri_override = remote_uri_override.strip() storage_root_name = storage_root_name or urllib.parse.urlparse( remote_uri_override).netloc storage_root = remote_uri_override remote_options = get_remote_options() arg_remote_options = dict(remote_option) if remote_option else {} remote_options.update(arg_remote_options) data_product_path = Path(data_product_path) storage_location_hash = FileAPI.calculate_hash(data_product_path) path = upload_to_storage(remote_uri, remote_option, data_product_path.parent, data_product_path, upload_path=storage_location_path, path_prefix=namespace) namespace_ref = get_reference({DataRegistryField.name: namespace}, DataRegistryTarget.namespace, data_registry, token) if namespace_ref: query = { DataRegistryField.name: data_product_name, DataRegistryField.namespace: namespace_ref } if data_product_version: query["version"] = data_product_version data_products = get_data(query, DataRegistryTarget.data_product, data_registry, token, False) if data_products: latest = next(iter(sort_by_semver(data_products))) data_product_version = str( semver.VersionInfo.parse( latest[DataRegistryField.version]).bump_minor()) elif not data_product_version: data_product_version = "0.1.0" populated_yaml = template.format( namespace=namespace, storage_root_name=storage_root_name, storage_root=storage_root, accessibility=accessibility, storage_location_path=path, storage_location_hash=storage_location_hash, data_product_name=data_product_name, data_product_description=data_product_description, data_product_version=data_product_version, component_name="COMPONENT_NAME", component_description="COMPONENT_DESCRIPTION", ) config = yaml.safe_load(populated_yaml) component_template = config["post"].pop(-1) if component: for component_name, component_description in component: c = component_template["data"].copy() c["name"] = component_name c["description"] = component_description config["post"].append({ "data": c, "target": DataRegistryTarget.object_component }) else: c = component_template["data"].copy() c["name"] = data_product_name c["description"] = data_product_description config["post"].append({ "data": c, "target": DataRegistryTarget.object_component }) upload_from_config(config, data_registry, token)