示例#1
0
def test_upload_from_config_bad_version():
    config = yaml.safe_load(
        """
post:
    - 
        target: 'end_point_1'
        data:
            version: '1'
    """
    )
    with patch("requests.get") as get:
        get.return_value = MockResponse([])
        with pytest.raises(ValueError):
            upload_from_config(config, DATA_REGISTRY_URL, TOKEN)
示例#2
0
def test_upload_from_config_with_post_present():
    config = yaml.safe_load(
        """
post:
    -
        target: 'end_point_1'
        data:
            name: 'B'
            description: 'posted B'
    """
    )
    with patch("requests.get") as get:
        with patch("requests.post") as post:
            get.return_value = MockResponse([{"name": "B", "description": "initial B", "url": "mock_url_b"}])
            upload_from_config(config, DATA_REGISTRY_URL, TOKEN)
            post.assert_not_called()
示例#3
0
def test_upload_from_config_with_patch_not_present():
    config = yaml.safe_load(
        """
patch:
    - 
        target: 'end_point_1'
        data:
            name: 'A'
            description: 'patched A'
"""
    )
    with patch("requests.get") as get:
        with patch("requests.patch") as rpatch:
            get.return_value = MockResponse([])
            upload_from_config(config, DATA_REGISTRY_URL, TOKEN)
            rpatch.assert_not_called()
示例#4
0
def test_upload_from_config_good_version():
    config = yaml.safe_load(
        """
post:
    - 
        target: 'end_point_1'
        data:
            version: '1.1.1'
    """
    )
    with patch("requests.get") as get:
        with patch("requests.post") as post:
            get.return_value = MockResponse([])
            upload_from_config(config, DATA_REGISTRY_URL, TOKEN)
            post.assert_called_once_with(
                get_end_point(DATA_REGISTRY_URL, "end_point_1"), data={"version": "1.1.1"}, headers=get_headers(TOKEN),
            )
示例#5
0
def test_upload_from_config_with_patch_present():
    config = yaml.safe_load(
        """
patch:
    - 
        target: 'end_point_1'
        data:
            name: 'A'
            description: 'patched A'
    """
    )
    with patch("requests.get") as get:
        with patch("requests.patch") as rpatch:
            get.return_value = MockResponse([{"name": "A", "description": "initial A", "url": "mock_url_a"}])
            upload_from_config(config, DATA_REGISTRY_URL, TOKEN)
            rpatch.assert_called_once_with(
                "mock_url_a", data={"name": "A", "description": "patched A"}, headers=get_headers(TOKEN)
            )
示例#6
0
def test_upload_from_config_with_post_not_present():
    config = yaml.safe_load(
        """
post:
    -
        target: 'end_point_1'
        data:
            name: 'B'
            description: 'posted B'
    """
    )
    with patch("requests.get") as get:
        with patch("requests.post") as post:
            get.return_value = MockResponse([])
            upload_from_config(config, DATA_REGISTRY_URL, TOKEN)
            post.assert_called_once_with(
                get_end_point(DATA_REGISTRY_URL, "end_point_1"),
                data={"name": "B", "description": "posted B"},
                headers=get_headers(TOKEN),
            )
def upload_model_run(
    config_filename: Union[Path, str],
    model_config_filename: Union[Path, str],
    submission_script_filename: Union[Path, str],
    remote_options: Dict[str, str],
    token: str,
    text_file_table: bool = True,
) -> None:
    """
    Reads the provided configuration files and then calls PATCH or POST with the data to the data registry as
    appropriate, resolving references to other data where required.

    :param config_filename: file path to the configuration file
    :param model_config_filename: file path to the model configuration file
    :param submission_script_filename: file path to the submission script file
    :param remote_options: (key, value) pairs that are passed to the remote storage, e.g. credentials
    :param token: personal access token
    :param text_file_table: If true, model_config and submission_script are uploaded to the text_file table in the data_registry
    """
    config_filename = Path(config_filename)
    with open(config_filename, "r") as cf:
        config = yaml.safe_load(cf)

    accessibility = _get_accessibility(config)
    run_metadata = config["run_metadata"]

    data_directory = Path(run_metadata[RunMetadata.data_directory])
    if not data_directory.is_absolute():
        data_directory = config_filename.parent / data_directory
    run_id = run_metadata[RunMetadata.run_id]
    namespace = run_metadata.get(RunMetadata.default_output_namespace)
    model_version_str = run_metadata.get(RunMetadata.model_version)
    model_name = run_metadata.get(RunMetadata.model_name)
    open_timestamp = run_metadata[RunMetadata.open_timestamp]
    model_git_sha = run_metadata[RunMetadata.git_sha]
    model_uri = run_metadata[RunMetadata.git_repo]
    remote_uri = run_metadata[RunMetadata.remote_uri]
    remote_uri_override = run_metadata.get(RunMetadata.remote_uri_override, remote_uri)
    data_registry_url = run_metadata.get(
        RunMetadata.data_registry_url, get_data_registry_url()
    )
    description = run_metadata[RunMetadata.description]

    inputs = []
    outputs = []
    posts = []

    storage_root = _add_storage_root(
        posts, remote_uri_override, accessibility, data_registry_url, token
    )
    code_repo = _add_code_repo(
        posts, model_git_sha, model_uri, model_name, model_version_str
    )

    for event in config["io"]:
        read = event["type"] == "read"
        metadata = event["access_metadata"]
        component = metadata.get(MetadataKey.component)
        version = metadata.get(MetadataKey.version, "")
        access_calculated_hash = metadata[MetadataKey.calculated_hash]
        filename = data_directory / Path(metadata[MetadataKey.filename])
        if MetadataKey.data_product in metadata:
            data_product_name = metadata[MetadataKey.data_product]
            event_namespace = metadata.get(MetadataKey.namespace, namespace)
            if event_namespace is None:
                raise ValueError(f"No namespace specified for {event}")
            if read:
                inputs.append(
                    _get_data_product_url(
                        data_product_name,
                        event_namespace,
                        version,
                        component,
                        data_registry_url,
                        token,
                    )
                )
            else:
                _verify_hash(filename, access_calculated_hash)

                path = upload_to_storage(
                    remote_uri, remote_options, data_directory, filename, path_prefix=namespace
                )

                object_component = _add_data_product_output_posts(
                    posts,
                    path,
                    data_product_name,
                    event_namespace,
                    run_id,
                    component,
                    access_calculated_hash,
                    storage_root,
                )
                outputs.append(object_component)
        elif MetadataKey.doi_or_unique_name in metadata:
            doi_or_unique_name = metadata[MetadataKey.doi_or_unique_name]
            if read:
                inputs.append(
                    _get_external_object_url(
                        doi_or_unique_name, version, component, data_registry_url, token
                    )
                )
            else:
                raise ValueError("can only read external objects")
        else:
            raise ValueError(
                "metadata did not contain a data product or an external object"
            )

    if text_file_table:
        model_config_obj = _upload_file_to_text_table(
            posts, model_config_filename, data_registry_url, token
        )
        submission_script_obj = _upload_file_to_text_table(
            posts, submission_script_filename, data_registry_url, token
        )
    else:
        model_config_obj = _upload_file_to_storage(
            posts, model_config_filename, remote_uri, remote_options, storage_root, namespace
        )
        submission_script_obj = _upload_file_to_storage(
            posts, submission_script_filename, remote_uri, remote_options, storage_root, namespace
        )

    _add_model_run(
        posts,
        run_id,
        open_timestamp,
        inputs,
        outputs,
        model_config_obj,
        submission_script_obj,
        code_repo,
        description,
    )

    posts = unique_dicts(posts)

    upload_from_config({"post": posts}, data_registry_url, token)
def upload_data_product_cli(
    data_product_path,
    namespace,
    storage_root_name,
    storage_location_path,
    accessibility,
    data_product_name,
    data_product_description,
    data_product_version,
    component,
    data_registry,
    token,
    remote_uri,
    remote_option,
    remote_uri_override,
):
    configure_cli_logging()

    template_file = Path(__file__).parent / Path("templates/data_product.yaml")
    with open(template_file, "r") as f:
        template = f.read()

    data_registry = data_registry or DEFAULT_DATA_REGISTRY_URL
    remote_uri_override = remote_uri_override or remote_uri
    remote_uri = remote_uri.strip()
    remote_uri_override = remote_uri_override.strip()
    storage_root_name = storage_root_name or urllib.parse.urlparse(
        remote_uri_override).netloc
    storage_root = remote_uri_override
    remote_options = get_remote_options()
    arg_remote_options = dict(remote_option) if remote_option else {}
    remote_options.update(arg_remote_options)
    data_product_path = Path(data_product_path)

    storage_location_hash = FileAPI.calculate_hash(data_product_path)

    path = upload_to_storage(remote_uri,
                             remote_option,
                             data_product_path.parent,
                             data_product_path,
                             upload_path=storage_location_path,
                             path_prefix=namespace)
    namespace_ref = get_reference({DataRegistryField.name: namespace},
                                  DataRegistryTarget.namespace, data_registry,
                                  token)
    if namespace_ref:
        query = {
            DataRegistryField.name: data_product_name,
            DataRegistryField.namespace: namespace_ref
        }
        if data_product_version:
            query["version"] = data_product_version
        data_products = get_data(query, DataRegistryTarget.data_product,
                                 data_registry, token, False)
        if data_products:
            latest = next(iter(sort_by_semver(data_products)))
            data_product_version = str(
                semver.VersionInfo.parse(
                    latest[DataRegistryField.version]).bump_minor())
        elif not data_product_version:
            data_product_version = "0.1.0"

    populated_yaml = template.format(
        namespace=namespace,
        storage_root_name=storage_root_name,
        storage_root=storage_root,
        accessibility=accessibility,
        storage_location_path=path,
        storage_location_hash=storage_location_hash,
        data_product_name=data_product_name,
        data_product_description=data_product_description,
        data_product_version=data_product_version,
        component_name="COMPONENT_NAME",
        component_description="COMPONENT_DESCRIPTION",
    )
    config = yaml.safe_load(populated_yaml)
    component_template = config["post"].pop(-1)
    if component:
        for component_name, component_description in component:
            c = component_template["data"].copy()
            c["name"] = component_name
            c["description"] = component_description
            config["post"].append({
                "data": c,
                "target": DataRegistryTarget.object_component
            })
    else:
        c = component_template["data"].copy()
        c["name"] = data_product_name
        c["description"] = data_product_description
        config["post"].append({
            "data": c,
            "target": DataRegistryTarget.object_component
        })
    upload_from_config(config, data_registry, token)