Exemplo n.º 1
0
def test_basic_generation(path_to_config_files):
    """
    Model must be included in the config file

    start/end dates ...always included? or default to specific dates if not included?
    """

    project_name = "some-fancy-project-name"
    model_config = '{"sklearn.pipeline.Pipeline": {"steps": ["sklearn.preprocessing.data.MinMaxScaler", {"gordo.machine.model.models.KerasAutoEncoder": {"kind": "feedforward_hourglass"}}]}}'

    config_filename = "config-test-with-models.yml"
    expanded_template = _generate_test_workflow_str(
        path_to_config_files, config_filename, project_name=project_name
    )

    assert (
        project_name in expanded_template
    ), f"Expected to find project name: {project_name} in output: {expanded_template}"

    assert (
        model_config in expanded_template
    ), f"Expected to find model config: {model_config} in output: {expanded_template}"

    yaml_content = wg.get_dict_from_yaml(
        os.path.join(path_to_config_files, config_filename)
    )

    with patch.object(sensor_tag, "_asset_from_tag_name", return_value="default"):
        machines = NormalizedConfig(yaml_content, project_name=project_name).machines

    assert len(machines) == 2
Exemplo n.º 2
0
def local_build(
    config_str: str, ) -> Iterable[Tuple[Union[BaseEstimator, None], Machine]]:
    """
    Build model(s) from a bare Gordo config file locally.

    This is very similar to the same steps as the normal workflow generation and subsequent
    Gordo deployment process makes. Should help developing locally,
    as well as giving a good indication that your config is valid for deployment
    with Gordo.

    Parameters
    ----------
    config_str: str
        The raw yaml config file in string format.

    Examples
    --------
    >>> import numpy as np
    >>> from gordo.dependencies import configure_once
    >>> configure_once()
    >>> config = '''
    ... machines:
    ...       - dataset:
    ...           tags:
    ...             - SOME-TAG1
    ...             - SOME-TAG2
    ...           target_tag_list:
    ...             - SOME-TAG3
    ...             - SOME-TAG4
    ...           train_end_date: '2019-03-01T00:00:00+00:00'
    ...           train_start_date: '2019-01-01T00:00:00+00:00'
    ...           asset: asgb
    ...           data_provider:
    ...             type: RandomDataProvider
    ...         metadata:
    ...           information: Some sweet information about the model
    ...         model:
    ...           gordo.machine.model.anomaly.diff.DiffBasedAnomalyDetector:
    ...             base_estimator:
    ...               sklearn.pipeline.Pipeline:
    ...                 steps:
    ...                 - sklearn.decomposition.PCA
    ...                 - sklearn.multioutput.MultiOutputRegressor:
    ...                     estimator: sklearn.linear_model.LinearRegression
    ...         name: crazy-sweet-name
    ... '''
    >>> models_n_metadata = local_build(config)
    >>> assert len(list(models_n_metadata)) == 1

    Returns
    -------
    Iterable[Tuple[Union[BaseEstimator, None], Machine]]
        A generator yielding tuples of models and their metadata.
    """

    config = get_dict_from_yaml(io.StringIO(config_str))
    normed = NormalizedConfig(config, project_name="local-build")
    for machine in normed.machines:
        yield ModelBuilder(machine=machine).build()
Exemplo n.º 3
0
def test_splited_docker_images():
    config = {"machines": [], "globals": {"runtime": {}}}
    normalized_config = NormalizedConfig(config, "test", "1.0.0")
    config_globals = normalized_config.globals
    config_runtime = config_globals["runtime"]
    assert config_runtime["deployer"]["image"] == "gordo-deploy"
    assert config_runtime["server"]["image"] == "gordo-model-server"
    assert config_runtime["prometheus_metrics_server"]["image"] == "gordo-model-server"
    assert config_runtime["builder"]["image"] == "gordo-model-builder"
    assert config_runtime["client"]["image"] == "gordo-client"
Exemplo n.º 4
0
def unique_tag_list_cli(machine_config: str, output_file_tag_list: str):

    yaml_content = wg.get_dict_from_yaml(machine_config)

    machines = NormalizedConfig(yaml_content, project_name="test-proj-name").machines

    tag_list = set(tag for machine in machines for tag in machine.dataset.tag_list)

    if output_file_tag_list:
        with open(output_file_tag_list, "w") as output_file:
            for tag in tag_list:
                output_file.write(f"{tag.name}\n")
    else:
        for tag in tag_list:
            print(tag.name)
Exemplo n.º 5
0
def workflow_generator_cli(gordo_ctx, **ctx):
    """
    Machine Configuration to Argo Workflow
    """

    context: Dict[Any, Any] = ctx.copy()
    yaml_content = wg.get_dict_from_yaml(context["machine_config"])

    try:
        log_level = yaml_content["globals"]["runtime"]["log_level"]
    except KeyError:
        log_level = os.getenv("GORDO_LOG_LEVEL", gordo_ctx.obj["log_level"])

    logging.getLogger("gordo").setLevel(log_level.upper())
    context["log_level"] = log_level.upper()

    # Create normalized config
    config = NormalizedConfig(yaml_content,
                              project_name=context["project_name"])

    context["max_server_replicas"] = (context.pop("n_servers")
                                      or len(config.machines) * 10)

    # We know these exist since we set them in the default globals
    builder_resources = config.globals["runtime"]["builder"]["resources"]
    context["model_builder_resources_requests_memory"] = builder_resources[
        "requests"]["memory"]
    context["model_builder_resources_requests_cpu"] = builder_resources[
        "requests"]["cpu"]
    context["model_builder_resources_limits_memory"] = builder_resources[
        "limits"]["memory"]
    context["model_builder_resources_limits_cpu"] = builder_resources[
        "limits"]["cpu"]

    context["server_resources"] = config.globals["runtime"]["server"][
        "resources"]

    # These are also set in the default globals, and guaranteed to exist
    client_resources = config.globals["runtime"]["client"]["resources"]
    context["client_resources_requests_memory"] = client_resources["requests"][
        "memory"]
    context["client_resources_requests_cpu"] = client_resources["requests"][
        "cpu"]
    context["client_resources_limits_memory"] = client_resources["limits"][
        "memory"]
    context["client_resources_limits_cpu"] = client_resources["limits"]["cpu"]

    context["client_max_instances"] = config.globals["runtime"]["client"][
        "max_instances"]

    influx_resources = config.globals["runtime"]["influx"]["resources"]
    context["influx_resources_requests_memory"] = influx_resources["requests"][
        "memory"]
    context["influx_resources_requests_cpu"] = influx_resources["requests"][
        "cpu"]
    context["influx_resources_limits_memory"] = influx_resources["limits"][
        "memory"]
    context["influx_resources_limits_cpu"] = influx_resources["limits"]["cpu"]

    nr_of_models_with_clients = len([
        machine for machine in config.machines
        if machine.runtime.get("influx", {}).get("enable", True)
    ])
    context["client_total_instances"] = nr_of_models_with_clients

    # Should we start up influx/grafana at all, i.e. is there at least one request
    # for it?"
    enable_influx = nr_of_models_with_clients > 0
    context["enable_influx"] = enable_influx

    context["postgres_host"] = f"gordo-postgres-{config.project_name}"

    # If enabling influx, we setup a postgres reporter to send metadata
    # to allowing querying about the machine from grafana
    if enable_influx:
        pg_reporter = {
            "gordo.reporters.postgres.PostgresReporter": {
                "host": context["postgres_host"]
            }
        }
        for machine in config.machines:
            machine.runtime["reporters"].append(pg_reporter)

    # Determine if MlFlowReporter should be enabled per machine
    for machine in config.machines:
        try:
            enabled = machine.runtime["builder"]["remote_logging"]["enable"]
        except KeyError:
            continue
        else:
            if enabled:
                machine.runtime["reporters"].append(
                    "gordo.reporters.mlflow.MlFlowReporter")

    context["machines"] = config.machines

    # Context requiring pre-processing
    context["target_names"] = [machine.name for machine in config.machines]

    # Json dump owner_references, if not None, otherwise pop it out of the context
    if context["owner_references"]:
        context["owner_references"] = json.dumps(context["owner_references"])
    else:
        context.pop("owner_references")

    builder_exceptions_report_level = get_builder_exceptions_report_level(
        config)
    context[
        "builder_exceptions_report_level"] = builder_exceptions_report_level.name
    if builder_exceptions_report_level != ReportLevel.EXIT_CODE:
        context["builder_exceptions_report_file"] = "/tmp/exception.json"

    if context["workflow_template"]:
        template = wg.load_workflow_template(context["workflow_template"])
    else:
        workflow_template = pkg_resources.resource_filename(
            "gordo.workflow.workflow_generator.resources",
            "argo-workflow.yml.template")
        template = wg.load_workflow_template(workflow_template)

    # Clear output file
    if context["output_file"]:
        open(context["output_file"], "w").close()  # type: ignore
    for i in range(0, len(config.machines),
                   context["split_workflows"]):  # type: ignore
        logger.info(
            f"Generating workflow for machines {i} to {i + context['split_workflows']}"
        )
        context["machines"] = config.machines[i:i + context["split_workflows"]]

        if context["output_file"]:
            s = template.stream(**context)
            with open(context["output_file"], "a") as f:  # type: ignore
                if i != 0:
                    f.write("\n---\n")
                s.dump(f)
        else:
            output = template.render(**context)
            if i != 0:
                print("\n---\n")
            print(output)