Exemplos de Machine em Python, exemplos de gordo.machine.Machine em Python

Exemplo n.º 1

0

Exibir arquivo

def test_invalid_model(default_globals: dict):
    """
    Test invalid model with 'step' instead of 'steps'
    """
    element_str = """
        name: ct-23-0001-machine
        data_provider:
          threads: 10
        dataset:
          tags: [GRA-TE  -23-0733.PV, GRA-TT  -23-0719.PV, GRA-YE  -23-0751X.PV]
          target_tag_list: [GRA-TE -123-456]
          train_start_date: 2018-01-01T09:00:30Z
          train_end_date: 2018-01-02T09:00:30Z
        model:
          sklearn.pipeline.Pipeline:
            step:
              - sklearn.preprocessing.data.MinMaxScaler
              - gordo.machine.model.models.KerasAutoEncoder:
                  kind: feedforward_hourglass
        evaluation:
            scoring_scaler: Null
        metadata:
          id: special-id
    """
    element = get_dict_from_yaml(StringIO(element_str))
    with pytest.raises(ValueError):
        Machine.from_config(element,
                            project_name="test-project-name",
                            config_globals=default_globals)

Exemplo n.º 2

0

Exibir arquivo

def test_build_cv_mode_cross_val_cache(
    tmpdir,
    should_save_model: bool,
    cv_mode_1: str,
    cv_mode_2: str,
    runner: CliRunner,
    machine: Machine,
):
    """
    Checks that cv_scores uses cache if ran after a full build. Loads the same model, and can
    print the cv_scores from them.
    """
    logger.info(f"MODEL_CONFIG={json.dumps(machine.model)}")

    machine.evaluation = cv_mode_1  # type: ignore
    with temp_env_vars(MACHINE=json.dumps(machine.to_dict()),
                       OUTPUT_DIR=str(tmpdir)):
        runner.invoke(cli.gordo, ["build"])

    machine.evaluation = cv_mode_2  # type: ignore
    with temp_env_vars(MACHINE=json.dumps(machine.to_dict()),
                       OUTPUT_DIR=str(tmpdir)):
        runner.invoke(cli.gordo, ["build"])

    if should_save_model:
        assert len(os.listdir(tmpdir)) > 0
    else:
        assert len(os.listdir(tmpdir)) == 0

Exemplo n.º 3

0

Exibir arquivo

Arquivo: build_model.py Projeto: flikka/gordo

    def __init__(self, machine: Machine):
        """
        Build a model for a given :class:`gordo.workflow.config_elements.machine.Machine`

        Parameters
        ----------
        machine: Machine

        Example
        -------
        >>> from gordo_dataset.sensor_tag import SensorTag
        >>> from gordo.machine import Machine
        >>> from gordo.dependencies import configure_once
        >>> configure_once()
        >>> machine = Machine(
        ...     name="special-model-name",
        ...     model={"sklearn.decomposition.PCA": {"svd_solver": "auto"}},
        ...     dataset={
        ...         "type": "RandomDataset",
        ...         "train_start_date": "2017-12-25 06:00:00Z",
        ...         "train_end_date": "2017-12-30 06:00:00Z",
        ...         "tag_list": [SensorTag("Tag 1", None), SensorTag("Tag 2", None)],
        ...         "target_tag_list": [SensorTag("Tag 3", None), SensorTag("Tag 4", None)]
        ...     },
        ...     project_name='test-proj',
        ... )
        >>> builder = ModelBuilder(machine=machine)
        >>> model, machine = builder.build()
        """
        # Avoid overwriting the passed machine, copy doesn't work if it holds
        # reference to a loaded Tensorflow model; .to_dict() serializes it to
        # a primitive dict representation.
        self.machine = Machine(**machine.to_dict())

Exemplo n.º 4

0

Exibir arquivo

Arquivo: test_postgres_reporter.py Projeto: yinxiEquinor/gordo

def test_postgres_reporter(postgresdb, metadata):
    """
    Check logging  of a machine into postgres
    """
    reporter1 = PostgresReporter(host="localhost")
    machine1 = Machine(**metadata)

    # Before inserting, the machine does not exist.
    with pytest.raises(peewee.DoesNotExist):
        PostgresMachine.get(PostgresMachine.name == machine1.name)

    reporter1.report(machine1)

    record = PostgresMachine.get(PostgresMachine.name == machine1.name)
    assert record.name == machine1.name

    # Create another logger to ensure nothing happened to the DB
    reporter2 = PostgresReporter(host="localhost")
    machine2 = Machine(**metadata)
    machine2.name = "another-machine"

    reporter2.report(machine2)

    # The first machine is still there
    record = PostgresMachine.get(PostgresMachine.name == machine1.name)
    assert record.name == machine1.name

    # And the second
    record = PostgresMachine.get(PostgresMachine.name == machine2.name)
    assert record.name == machine2.name

Exemplo n.º 5

0

Exibir arquivo

def test_build_cv_mode(tmpdir, runner: CliRunner, should_save_model: bool,
                       cv_mode: str, machine: Machine):
    """
    Testing build with cv_mode set to full and cross_val_only. Checks that cv_scores are
    printed and model are only saved when using the default (full) value.
    """
    machine.model = MODEL_CONFIG_WITH_PREDICT
    machine.evaluation = cv_mode  # type: ignore

    logger.info(f"MODEL_CONFIG={json.dumps(machine.model)}")

    tmp_model_dir = os.path.join(tmpdir, "tmp")
    os.makedirs(tmp_model_dir, exist_ok=True)

    with temp_env_vars(MACHINE=json.dumps(machine.to_dict()),
                       OUTPUT_DIR=tmp_model_dir):
        result = runner.invoke(cli.gordo, ["build", "--print-cv-scores"])
        assert result.exit_code == 0
        # Checks that the file is empty or not depending on the mode.
        if should_save_model:
            assert len(os.listdir(tmp_model_dir)) != 0
        else:
            assert len(os.listdir(tmp_model_dir)) == 0

        # Checks the output contains 'explained-variance_raw-scores'
        assert "r2-score" in result.output
        assert "mean-squared-error" in result.output
        assert "mean-absolute-error" in result.output
        assert "explained-variance-score" in result.output

Exemplo n.º 6

0

Exibir arquivo

def test_builder_with_reporter(postgresdb, metadata):
    """
    Verify a model can take a reporter and .report() will run any given reporters
    """
    reporter = PostgresReporter(host="localhost")
    metadata["runtime"]["reporters"].append(reporter.to_dict())

    machine = Machine(**metadata)

    with pytest.raises(peewee.DoesNotExist):
        PostgresMachine.get(PostgresMachine.name == machine.name)
    machine.report()
    PostgresMachine.get(PostgresMachine.name == machine.name)

Exemplo n.º 7

0

Exibir arquivo

Arquivo: test_client.py Projeto: hjarraya/gordo

def test_client_get_dataset(gordo_project, metadata, ml_server):
    data_provider = providers.RandomDataProvider(min_size=10)
    client = Client(project=gordo_project, data_provider=data_provider)
    start = isoparse("2016-01-01T00:00:00+00:00")
    end = isoparse("2016-01-01T12:00:00+00:00")
    machine = Machine(**metadata)
    assert type(machine.dataset) is TimeSeriesDataset
    machine.dataset.row_filter_buffer_size = 12
    machine.dataset.n_samples_threshold = 10
    client_machine = ClientMachine(**machine.to_dict())
    dataset = client._get_dataset(client_machine, start, end)
    assert dataset.row_filter_buffer_size == 0
    assert dataset.n_samples_threshold == 0
    assert dataset.low_threshold is None
    assert dataset.high_threshold is None

Exemplo n.º 8

0

Exibir arquivo

def machine():
    return Machine(
        name="test-model",
        model=MODEL_CONFIG,
        dataset=DATA_CONFIG,
        project_name="project-name",
    )

Exemplo n.º 9

0

Exibir arquivo

Arquivo: test_builder.py Projeto: yinxiEquinor/gordo

def test_model_builder_metrics_list(metrics_: Optional[List[str]]):
    model_config = {
        "sklearn.multioutput.MultiOutputRegressor": {
            "estimator": "sklearn.linear_model.LinearRegression"
        }
    }
    data_config = get_random_data()

    evaluation_config: Dict[str, Any] = {"cv_mode": "full_build"}
    if metrics_:
        evaluation_config.update({"metrics": metrics_})

    machine = Machine(
        name="model-name",
        dataset=data_config,
        model=model_config,
        evaluation=evaluation_config,
        project_name="test",
    )
    _model, machine = ModelBuilder(machine).build()

    expected_metrics = metrics_ or [
        "sklearn.metrics.explained_variance_score",
        "sklearn.metrics.r2_score",
        "sklearn.metrics.mean_squared_error",
        "sklearn.metrics.mean_absolute_error",
    ]

    assert all(
        metric.split(".")[-1].replace("_", "-") in
        machine.metadata.build_metadata.model.cross_validation.scores
        for metric in expected_metrics)

Exemplo n.º 10

0

Exibir arquivo

Arquivo: test_builder.py Projeto: yinxiEquinor/gordo

def test_setting_seed(seed, model_config):
    """
    Test that we can set the seed and get same results.
    """

    data_config = get_random_data()
    evaluation_config = {"cv_mode": "full_build", "seed": seed}

    # Training two instances, without a seed should result in different scores,
    # while doing it with a seed should result in the same scores.
    machine = Machine(
        name="model-name",
        dataset=data_config,
        model=model_config,
        evaluation=evaluation_config,
        project_name="test",
    )
    _model, machine1 = ModelBuilder(machine).build()
    _model, machine2 = ModelBuilder(machine).build()

    df1 = pd.DataFrame.from_dict(
        machine1.metadata.build_metadata.model.cross_validation.scores)
    df2 = pd.DataFrame.from_dict(
        machine2.metadata.build_metadata.model.cross_validation.scores)

    # Equality depends on the seed being set.
    if seed:
        assert df1.equals(df2)
    else:
        assert not df1.equals(df2)

Exemplo n.º 11

0

Exibir arquivo

Arquivo: mlflow.py Projeto: yinxiEquinor/gordo

def log_machine(mlflow_client: MlflowClient, run_id: str, machine: Machine):
    """
    Send logs to configured MLflow backend

    Parameters
    ----------
    mlflow_client: MlflowClient
        Client instance to call logging methods from.
    run_id: str
        Unique ID off MLflow Run to log to.
    machine: Machine
        Machine to log with MlflowClient.
    """
    # Log machine metrics and params
    for batch_kwargs in batch_log_items(*get_machine_log_items(machine)):
        mlflow_client.log_batch(run_id, **batch_kwargs)

    # Send configs as JSON artifacts
    try:
        with tempfile.TemporaryDirectory(dir="./") as tmp_dir:
            fp = os.path.join(tmp_dir, f"metadata.json")
            with open(fp, "w") as fh:
                json.dump(machine.to_dict(), fh, cls=MachineEncoder)
            mlflow_client.log_artifacts(run_id=run_id, local_dir=tmp_dir)
    # Map to MlflowLoggingError for coding errors in the model builder
    except Exception as e:
        raise MlflowLoggingError(e)

Exemplo n.º 12

0

Exibir arquivo

Arquivo: test_builder.py Projeto: yinxiEquinor/gordo

def test_n_splits_from_config(mocked_pipeline_from_definition, cv):
    """
    Test that we can set arbitrary splitters and parameters in the config file which is called by the serializer.
    """
    data_config = get_random_data()
    evaluation_config = {"cv_mode": "full_build"}
    if cv:
        evaluation_config["cv"] = cv

    model_config = {
        "sklearn.multioutput.MultiOutputRegressor": {
            "estimator": "sklearn.ensemble.forest.RandomForestRegressor"
        }
    }

    machine = Machine(
        name="model-name",
        dataset=data_config,
        model=model_config,
        evaluation=evaluation_config,
        project_name="test",
    )

    ModelBuilder(machine).build()

    if cv:
        mocked_pipeline_from_definition.assert_called_with(cv)
    else:
        mocked_pipeline_from_definition.assert_called_with(
            {"sklearn.model_selection.TimeSeriesSplit": {
                "n_splits": 3
            }})

Exemplo n.º 13

0

Exibir arquivo

Arquivo: postgres.py Projeto: yinxiEquinor/gordo

    def report(self, machine: GordoMachine):
        """
        Log a machine to Postgres where top level keys, 'name', 'dataset', 'model',
        and 'metadata' mappings to BinaryJSON fields.

        Parameters
        ----------
        machine: gordo.machine.Machine

        Returns
        -------
        None
        """
        try:
            with self.db.atomic():
                logger.info(
                    f"Inserting machine {machine.name} in sql")  # type: ignore

                # Ensure it's serializable using MachineEncoder
                record = json.loads(
                    json.dumps(machine.to_dict(), cls=MachineEncoder))
                model = dict_to_model(Machine, record, ignore_unknown=True)
                try:
                    Machine.get(Machine.name == machine.name)
                except peewee.DoesNotExist:
                    model.save()
                else:
                    query = Machine.update(**model_to_dict(model)).where(
                        Machine.name == machine.name)
                    query.execute()

        except Exception as exc:
            raise PostgresReporterException(exc)

Exemplo n.º 14

0

Exibir arquivo

Arquivo: test_builder.py Projeto: yinxiEquinor/gordo

def test_builder_calls_machine_report(mocked_report_method, metadata):
    """
    When building a machine, the Modelbuilder.build should call Machine.report()
    so that it can run any reporters in the Machine's runtime.
    """
    machine = Machine(**metadata)
    ModelBuilder(machine).build()
    assert mocked_report_method.called_once()

Exemplo n.º 15

0

Exibir arquivo

def test_scores_metadata(raw_model_config):
    data_config = get_random_data()
    model_config = yaml.load(raw_model_config, Loader=yaml.FullLoader)
    machine = Machine(
        dataset=data_config, model=model_config, name="model-name", project_name="test"
    )
    model, machine_out = ModelBuilder(machine).build()
    machine_check(machine_out, False)

Exemplo n.º 16

0

Exibir arquivo

def test_get_machine_log_items(metadata):
    """
    Test that dicts are correctly converted to MLflow types or errors raised
    """
    metrics, params = mlu.get_machine_log_items(Machine(**metadata))

    assert all(type(m) == Metric for m in metrics)
    assert all(type(p) == Param for p in params)

Exemplo n.º 17

0

Exibir arquivo

Arquivo: test_postgres_reporter.py Projeto: yinxiEquinor/gordo

def test_overwrite_report(postgresdb, metadata):
    """
    Ensure saving same machine is ok.
    """
    reporter1 = PostgresReporter(host="localhost")
    reporter2 = PostgresReporter(host="localhost")

    machine1 = Machine(**metadata)
    machine2 = Machine(**metadata)

    reporter1.report(machine1)

    # Reporting twice should be ok.
    reporter2.report(machine2)

    results = PostgresMachine.select().where(
        PostgresMachine.name == machine1.name)
    assert len([result for result in results]) == 1

Exemplo n.º 18

0

Exibir arquivo

Arquivo: client.py Projeto: JunFugithub/gordo

 def _machine_from_server(self, name: str, revision: str) -> Machine:
     resp = self.session.get(
         f"{self.base_url}/gordo/v0/{self.project_name}/{name}/metadata",
         params={"revision": revision},
     )
     metadata = _handle_response(
         resp=resp, resource_name=f"Machine metadata for {name}")
     if isinstance(metadata, dict) and metadata.get("metadata", None):
         return Machine(**metadata.get("metadata", None))
     else:
         raise NotFound(f"Machine {name} not found")

Exemplo n.º 19

0

Exibir arquivo

def test_builder_metadata(raw_model_config):
    """
    Ensure the builder works with various model configs and that each has
    expected/valid metadata results.
    """
    model_config = yaml.load(raw_model_config, Loader=yaml.FullLoader)
    data_config = get_random_data()
    machine = Machine(
        name="model-name", dataset=data_config, model=model_config, project_name="test"
    )
    model, machine_out = ModelBuilder(machine).build()
    # Check metadata, and only verify 'history' if it's a *Keras* type model
    machine_check(machine_out, "Keras" in raw_model_config)

Exemplo n.º 20

0

Exibir arquivo

def test_provide_saved_model_simple_happy_path(tmpdir):
    """
    Test provide_saved_model with no caching
    """
    model_config = {"sklearn.decomposition.PCA": {"svd_solver": "auto"}}
    data_config = get_random_data()
    output_dir = os.path.join(tmpdir, "model")
    machine = Machine(
        name="model-name", dataset=data_config, model=model_config, project_name="test"
    )
    ModelBuilder(machine).build(output_dir=output_dir)

    # Assert the model was saved at the location
    # Should be model file, and the metadata
    assert len(os.listdir(output_dir)) == 2

Exemplo n.º 21

0

Exibir arquivo

def test_mlflow_context_log_error(MockClient, metadata):
    """
    Test that an error while logging metadata as an artifact raises MlflowLoggingError
    """
    metadata = Machine(**metadata)
    mock_client = MockClient()
    mock_client.log_artifacts.side_effect = Exception(
        "Some unknown exception!")

    with pytest.raises(mlu.MlflowLoggingError):
        with mlu.mlflow_context("returns metadata", "unique_key", {}, {}) as (
                mlflow_client,
                run_id,
        ):
            mlu.log_machine(mlflow_client, run_id, metadata)

Exemplo n.º 22

0

Exibir arquivo

def test_build_cv_mode_build_only(tmpdir, runner: CliRunner, machine: Machine):
    """
    Testing build with cv_mode set to build_only. Checks that OUTPUT_DIR gets a model
    saved to it. It also checks that the metadata contains cv-duration-sec=None and
    cv-scores={}
    """

    logger.info(f"MODEL_CONFIG={json.dumps(machine.model)}")
    machine.evaluation = {"cv_mode": "build_only"}

    with temp_env_vars(MACHINE=json.dumps(machine.to_dict()),
                       OUTPUT_DIR=str(tmpdir)):

        metadata_file = f"{os.path.join(tmpdir, 'metadata.json')}"
        runner.invoke(cli.gordo, ["build"])

        # A model has been saved
        assert len(os.listdir(tmpdir)) != 0
        with open(metadata_file) as f:
            metadata_json = json.loads(f.read())
            assert (metadata_json["metadata"]["build_metadata"]["model"]
                    ["cross_validation"]["cv_duration_sec"] is None)
            assert (metadata_json["metadata"]["build_metadata"]["model"]
                    ["cross_validation"]["scores"] == {})

Exemplo n.º 23

0

Exibir arquivo

Arquivo: test_builder.py Projeto: yinxiEquinor/gordo

def test_output_scores_metadata():
    data_config = get_random_data()
    raw_model_config = f"""
            gordo.machine.model.anomaly.diff.DiffBasedAnomalyDetector:
                scaler: sklearn.preprocessing.MinMaxScaler
                base_estimator:
                    sklearn.compose.TransformedTargetRegressor:
                        transformer: sklearn.preprocessing.MinMaxScaler
                        regressor:
                            sklearn.pipeline.Pipeline:
                                steps:
                                - sklearn.preprocessing.MinMaxScaler
                                - gordo.machine.model.models.KerasAutoEncoder:
                                    kind: feedforward_hourglass
                                    batch_size: 3
                                    compression_factor: 0.5
                                    encoding_layers: 1
                                    func: tanh
                                    out_func: linear
                                    epochs: 1
            """

    model_config = yaml.load(raw_model_config, Loader=yaml.FullLoader)
    machine = Machine(name="model-name",
                      dataset=data_config,
                      model=model_config,
                      project_name="test")
    model, machine_out = ModelBuilder(machine).build()
    scores_metadata = machine_out.metadata.build_metadata.model.cross_validation.scores
    assert (scores_metadata["explained-variance-score-Tag-1"]["fold-mean"] +
            scores_metadata["explained-variance-score-Tag-2"]["fold-mean"]
            ) / 2 == pytest.approx(
                scores_metadata["explained-variance-score"]["fold-mean"])

    assert (
        scores_metadata["r2-score-Tag-1"]["fold-mean"] +
        scores_metadata["r2-score-Tag-2"]["fold-mean"]) / 2 == pytest.approx(
            scores_metadata["r2-score"]["fold-mean"])

    assert (scores_metadata["mean-squared-error-Tag-1"]["fold-mean"] +
            scores_metadata["mean-squared-error-Tag-2"]["fold-mean"]
            ) / 2 == pytest.approx(
                scores_metadata["mean-squared-error"]["fold-mean"])

    assert (scores_metadata["mean-absolute-error-Tag-1"]["fold-mean"] +
            scores_metadata["mean-absolute-error-Tag-2"]["fold-mean"]
            ) / 2 == pytest.approx(
                scores_metadata["mean-absolute-error"]["fold-mean"])

Exemplo n.º 24

0

Exibir arquivo

def test_provide_saved_model_caching_handle_existing_same_dir(tmpdir):
    """If the model exists in the model register, and the path there is the
    same as output_dir, output_dir is returned"""
    model_config = {"sklearn.decomposition.PCA": {"svd_solver": "auto"}}
    data_config = get_random_data()
    output_dir = os.path.join(tmpdir, "model")
    registry_dir = os.path.join(tmpdir, "registry")
    machine = Machine(
        name="model-name", dataset=data_config, model=model_config, project_name="test"
    )
    builder = ModelBuilder(machine)
    builder.build(output_dir=output_dir, model_register_dir=registry_dir)
    assert builder.cached_model_path == output_dir

    # Saving to same output_dir as the one saved in the registry just returns the output_dir
    builder.build(output_dir=output_dir, model_register_dir=registry_dir)
    assert builder.cached_model_path == output_dir

Exemplo n.º 25

0

Exibir arquivo

def _machine(name: str) -> Machine:
    """
    Helper to build a basic Machine, only defining its name
    """
    from gordo_dataset.sensor_tag import SensorTag

    return Machine.from_config(
        config={
            "name": name,
            "dataset": {
                "tag_list": [SensorTag("tag-1", "foo"), SensorTag("tag-2", "foo")],
                "train_start_date": "2016-01-01T00:00:00Z",
                "train_end_date": "2016-01-05T00:00:00Z",
            },
            "model": {"sklearn.linear_model.LinearRegression": {}},
        },
        project_name="test-project",
    )

Exemplo n.º 26

0

Exibir arquivo

def test_mlflow_context_log_metadata(MockClient, tmpdir, metadata):
    """
    Test that call to wrapped function initiates MLflow logging or throws warning
    """
    metadata = Machine(**metadata)
    mlflow.set_tracking_uri(f"file:{tmpdir}")

    mock_client = MockClient()
    mock_client.log_batch.return_value = "test"

    # Function with a metadata dict returned
    with mlu.mlflow_context("returns metadata", "unique_key", {}, {}) as (
            mlflow_client,
            run_id,
    ):
        mlu.log_machine(mlflow_client, run_id, metadata)

    assert mock_client.log_batch.called

Exemplo n.º 27

0

Exibir arquivo

def test_output_dir(tmpdir):
    """
    Test building of model will create subdirectories for model saving if needed.
    """
    model_config = {"sklearn.decomposition.PCA": {"svd_solver": "auto"}}
    data_config = get_random_data()
    output_dir = os.path.join(tmpdir, "some", "sub", "directories")
    machine = Machine(
        name="model-name", dataset=data_config, model=model_config, project_name="test"
    )
    builder = ModelBuilder(machine)
    model, machine_out = builder.build()
    machine_check(machine_out, False)

    builder._save_model(model=model, machine=machine_out, output_dir=output_dir)

    # Assert the model was saved at the location
    # Should be model file, and the metadata
    assert len(os.listdir(output_dir)) == 2

Exemplo n.º 28

0

Exibir arquivo

Arquivo: normalized_config.py Projeto: tommyod/gordo

    def __init__(self, config: dict, project_name: str):
        default_globals = self.DEFAULT_CONFIG_GLOBALS
        default_globals["runtime"]["influx"][  # type: ignore
            "resources"] = _calculate_influx_resources(  # type: ignore
                len(config["machines"]))

        passed_globals = config.get("globals", dict())
        patched_globals = patch_dict(default_globals, passed_globals)
        if patched_globals.get("runtime"):
            patched_globals["runtime"] = fix_runtime(
                patched_globals.get("runtime"))
        self.project_name = project_name
        self.machines = [
            Machine.from_config(conf,
                                project_name=project_name,
                                config_globals=patched_globals)
            for conf in config["machines"]
        ]  # type: List[Machine]

        self.globals = patched_globals

Exemplo n.º 29

0

Exibir arquivo

Arquivo: normalized_config.py Projeto: hjarraya/gordo

    def __init__(
        self,
        config: dict,
        project_name: str,
        gordo_version: Optional[str] = None,
        model_builder_env: Optional[dict] = None,
    ):
        if gordo_version is None:
            gordo_version = __version__
        default_globals = self.get_default_globals(gordo_version)
        default_globals["runtime"]["influx"][  # type: ignore
            "resources"
        ] = _calculate_influx_resources(  # type: ignore
            len(config["machines"])
        )

        passed_globals = config.get("globals", dict())

        # keeping it for back-compatibility
        if model_builder_env is not None and not (
            passed_globals
            and "runtime" in passed_globals
            and "builder" in passed_globals["runtime"]
            and "env" in passed_globals["runtime"]["builder"]
        ):
            if "builder" not in default_globals["runtime"]:
                default_globals["runtime"]["builder"] = {}
            default_globals["runtime"]["builder"]["env"] = model_builder_env

        patched_globals = patch_dict(default_globals, passed_globals)
        patched_globals = self.prepare_patched_globals(patched_globals)

        self.project_name = project_name
        self.machines: List[Machine] = [
            Machine.from_config(
                conf, project_name=project_name, config_globals=patched_globals
            )
            for conf in config["machines"]
        ]

        self.globals: dict = patched_globals

Exemplo n.º 30

0

Exibir arquivo

def test_provide_saved_model_caching_handle_existing_different_register(tmpdir):
    """If the model exists in the model register, but the output_dir is not where
    the model is, the model is copied to the new location, unless the new location
    already exists. If it does then return it"""
    model_config = {"sklearn.decomposition.PCA": {"svd_solver": "auto"}}
    data_config = get_random_data()
    output_dir1 = os.path.join(tmpdir, "model1")
    output_dir2 = os.path.join(tmpdir, "model2")

    registry_dir = os.path.join(tmpdir, "registry")
    machine = Machine(
        name="model-name", dataset=data_config, model=model_config, project_name="test"
    )
    builder = ModelBuilder(machine)
    builder.build(output_dir=output_dir1, model_register_dir=registry_dir)

    builder.build(output_dir=output_dir2, model_register_dir=registry_dir)
    assert builder.cached_model_path == output_dir2

    builder.build(output_dir=output_dir2, model_register_dir=registry_dir)
    assert builder.cached_model_path == output_dir2