Exemplo n.º 1
0
    def test_build_use_registry(self):
        """
        Using a registry causes the second build of a model to return the path to the
        first.
        """

        with tempfile.TemporaryDirectory() as tmpdir:
            with temp_env_vars(
                    OUTPUT_DIR=os.path.join(tmpdir, "dir1"),
                    DATA_CONFIG=DATA_CONFIG,
                    MODEL_CONFIG=json.dumps(MODEL_CONFIG),
                    MODEL_REGISTER_DIR=tmpdir + "/reg",
            ):
                result1 = self.runner.invoke(cli.gordo, ["build"])

            self.assertEqual(result1.exit_code,
                             0,
                             msg=f"Command failed: {result1}")
            with open("/tmp/model-location.txt") as f:
                first_path = f.read()

            # OUTPUT_DIR is the only difference
            with temp_env_vars(
                    OUTPUT_DIR=os.path.join(tmpdir, "dir2"),
                    DATA_CONFIG=DATA_CONFIG,
                    MODEL_CONFIG=json.dumps(MODEL_CONFIG),
                    MODEL_REGISTER_DIR=tmpdir + "/reg",
            ):
                result2 = self.runner.invoke(cli.gordo, ["build"])
            self.assertEqual(result2.exit_code,
                             0,
                             msg=f"Command failed: {result2}")
            with open("/tmp/model-location.txt") as f:
                second_path = f.read()
            assert first_path == second_path
Exemplo n.º 2
0
def test_build_cv_mode_cross_val_cache(
    tmpdir,
    should_save_model: bool,
    cv_mode_1: str,
    cv_mode_2: str,
    runner: CliRunner,
    machine: Machine,
):
    """
    Checks that cv_scores uses cache if ran after a full build. Loads the same model, and can
    print the cv_scores from them.
    """
    logger.info(f"MODEL_CONFIG={json.dumps(machine.model)}")

    machine.evaluation = cv_mode_1  # type: ignore
    with temp_env_vars(MACHINE=json.dumps(machine.to_dict()),
                       OUTPUT_DIR=str(tmpdir)):
        runner.invoke(cli.gordo, ["build"])

    machine.evaluation = cv_mode_2  # type: ignore
    with temp_env_vars(MACHINE=json.dumps(machine.to_dict()),
                       OUTPUT_DIR=str(tmpdir)):
        runner.invoke(cli.gordo, ["build"])

    if should_save_model:
        assert len(os.listdir(tmpdir)) > 0
    else:
        assert len(os.listdir(tmpdir)) == 0
Exemplo n.º 3
0
def test_build_use_registry(runner, tmpdir, machine):
    """
    Using a registry causes the second build of a model to copy the first to the
    new location.
    """

    output_dir_1 = os.path.join(tmpdir, "dir1")
    output_dir_2 = os.path.join(tmpdir, "dir2")

    with temp_env_vars(
            MACHINE=json.dumps(machine.to_dict()),
            OUTPUT_DIR=output_dir_1,
            MODEL_REGISTER_DIR=os.path.join(tmpdir, "reg"),
    ):
        result1 = runner.invoke(cli.gordo, ["build"])

    assert result1.exit_code == 0, f"Command failed: {result1}"
    # OUTPUT_DIR is the only difference
    with temp_env_vars(
            MACHINE=json.dumps(machine.to_dict()),
            OUTPUT_DIR=output_dir_2,
            MODEL_REGISTER_DIR=os.path.join(tmpdir, "reg"),
    ):
        result2 = runner.invoke(cli.gordo, ["build"])
    assert result2.exit_code == 0, f"Command failed: {result2}"

    first_metadata = serializer.load_metadata(output_dir_1)
    second_metadata = serializer.load_metadata(output_dir_2)

    # The metadata contains the model build date, so if it got rebuilt these two
    # would be different
    assert (first_metadata["metadata"]["build_metadata"]["model"]
            ["model_creation_date"] == second_metadata["metadata"]
            ["build_metadata"]["model"]["model_creation_date"])
Exemplo n.º 4
0
def test_mlflow_reporter_set_cli_build(
    MockClient,
    mock_get_workspace_kwargs,
    mock_get_spauth_kwargs,
    monkeypatch,
    runner,
    tmpdir,
    machine,
):
    """
    Tests disabling MlFlow logging in cli, and missing env var when enabled
    """

    mlflow.set_tracking_uri(f"file:{tmpdir}")
    machine.runtime = dict(
        reporters=[{
            "gordo.reporters.mlflow.MlFlowReporter": dict()
        }])

    with temp_env_vars(MACHINE=json.dumps(machine.to_dict()),
                       OUTPUT_DIR=str(tmpdir)):
        # Logging enabled, without env vars set:
        # Raise error
        result = runner.invoke(cli.gordo, ["build"])
        result.exit_code != 0

        # Logging enabled, with env vars set:
        # Build success, remote logging executed
        with monkeypatch.context() as m:
            m.setenv("DL_SERVICE_AUTH_STR", "test:test:test")
            m.setenv("AZUREML_WORKSPACE_STR", "test:test:test")

            result = runner.invoke(cli.gordo, ["build"])
            result.exit_code == 1
            assert MockClient.called
            assert mock_get_workspace_kwargs.called
            assert mock_get_spauth_kwargs.called

        # Reset call counts
        for m in [
                MockClient, mock_get_workspace_kwargs, mock_get_spauth_kwargs
        ]:
            m.reset_mock()

    # Logging not enabled:
    # Build success, remote logging not executed
    machine.runtime = dict(builder=dict(remote_logging=dict(enable=False)))
    with temp_env_vars(MACHINE=json.dumps(machine.to_dict()),
                       OUTPUT_DIR=str(tmpdir)):
        result = runner.invoke(cli.gordo, ["build"])
        result.exit_code == 0
        assert not MockClient.called
        assert not mock_get_workspace_kwargs.called
        assert not mock_get_spauth_kwargs.called
Exemplo n.º 5
0
def test_build_cv_mode(tmpdir, runner: CliRunner, should_save_model: bool,
                       cv_mode: str, machine: Machine):
    """
    Testing build with cv_mode set to full and cross_val_only. Checks that cv_scores are
    printed and model are only saved when using the default (full) value.
    """
    machine.model = MODEL_CONFIG_WITH_PREDICT
    machine.evaluation = cv_mode  # type: ignore

    logger.info(f"MODEL_CONFIG={json.dumps(machine.model)}")

    tmp_model_dir = os.path.join(tmpdir, "tmp")
    os.makedirs(tmp_model_dir, exist_ok=True)

    with temp_env_vars(MACHINE=json.dumps(machine.to_dict()),
                       OUTPUT_DIR=tmp_model_dir):
        result = runner.invoke(cli.gordo, ["build", "--print-cv-scores"])
        assert result.exit_code == 0
        # Checks that the file is empty or not depending on the mode.
        if should_save_model:
            assert len(os.listdir(tmp_model_dir)) != 0
        else:
            assert len(os.listdir(tmp_model_dir)) == 0

        # Checks the output contains 'explained-variance_raw-scores'
        assert "r2-score" in result.output
        assert "mean-squared-error" in result.output
        assert "mean-absolute-error" in result.output
        assert "explained-variance-score" in result.output
Exemplo n.º 6
0
def test_list_revisions_listdir_fail(caplog):
    """
    Verify the server will not fail if listing directories above the current
    model collection directory it has, fails.
    """
    def listdir_fail(*args, **kwargs):
        raise FileNotFoundError()

    expected_revision = "some-project-revision-123"

    with patch.object(os, "listdir",
                      side_effect=listdir_fail) as mocked_listdir:
        with caplog.at_level(logging.CRITICAL):
            with tu.temp_env_vars(MODEL_COLLECTION_DIR=expected_revision):
                app = server.build_app({"ENABLE_PROMETHEUS": False})
                app.testing = True
                client = app.test_client()
                resp = client.get("/gordo/v0/test-project/revisions")

    assert mocked_listdir.called_once()
    assert set(
        resp.json.keys()) == {"latest", "available-revisions", "revision"}
    assert resp.json["latest"] == expected_revision
    assert isinstance(resp.json["available-revisions"], list)
    assert resp.json["available-revisions"] == [expected_revision]
Exemplo n.º 7
0
def test_list_revisions(tmpdir, revisions: List[str]):
    """
    Verify the server is capable of returning the project revisions
    it's capable of serving.
    """

    # Server gets the 'latest' directory to serve models from, but knows other
    # revisions should be available a step up from this directory.
    model_dir = os.path.join(tmpdir, revisions[0])

    # Make revision directories under the tmpdir
    [os.mkdir(os.path.join(tmpdir, rev)) for rev in revisions]  # type: ignore

    # Request from the server what revisions it can serve, should match
    with tu.temp_env_vars(MODEL_COLLECTION_DIR=model_dir):
        app = server.build_app({"ENABLE_PROMETHEUS": False})
        app.testing = True
        client = app.test_client()
        resp = client.get("/gordo/v0/test-project/revisions")
        resp_with_revision = client.get(
            f"/gordo/v0/test-project/revisions?revision={revisions[-1]}")

    assert set(
        resp.json.keys()) == {"latest", "available-revisions", "revision"}
    assert resp.json["latest"] == os.path.basename(model_dir)
    assert resp.json["revision"] == os.path.basename(model_dir)
    assert isinstance(resp.json["available-revisions"], list)
    assert set(resp.json["available-revisions"]) == set(revisions)

    # And the request asking to use a specific revision gives back that revision,
    # but will return the expected latest available
    assert resp_with_revision.json["latest"] == os.path.basename(model_dir)
    assert resp_with_revision.json["revision"] == revisions[-1]
Exemplo n.º 8
0
def test_build_cv_mode_build_only(tmp_dir: tempfile.TemporaryDirectory):
    """
    Testing build with cv_mode set to build_only. Checks that OUTPUT_DIR gets a model
    saved to it. It also checks that the metadata contains cv-duration-sec=None and
    cv-scores={}
    """

    runner = CliRunner()

    logger.info(f"MODEL_CONFIG={json.dumps(MODEL_CONFIG)}")

    with temp_env_vars(
        MODEL_NAME="model-name",
        OUTPUT_DIR=tmp_dir.name,
        DATA_CONFIG=DATA_CONFIG,
        MODEL_CONFIG=json.dumps(MODEL_CONFIG),
    ):

        metadata_file = f"{os.path.join(tmp_dir.name, 'metadata.json')}"
        runner.invoke(
            cli.gordo, ["build", '--evaluation-config={"cv_mode": "build_only"}']
        )

        # A model has been saved
        assert len(os.listdir(tmp_dir.name)) != 0
        with open(metadata_file) as f:
            metadata_json = json.loads(f.read())
            assert metadata_json["model"]["cross-validation"]["cv-duration-sec"] is None
            assert metadata_json["model"]["cross-validation"]["scores"] == {}
Exemplo n.º 9
0
def test_build_cv_mode_cross_val_cache(
    should_be_equal: bool,
    cv_mode_1: str,
    cv_mode_2: str,
    tmp_dir: tempfile.TemporaryDirectory,
):
    """
    Checks that cv_scores uses cache if ran after a full build. Loads the same model, and can
    print the cv_scores from them.
    """

    runner = CliRunner()

    logger.info(f"MODEL_CONFIG={json.dumps(MODEL_CONFIG)}")

    model_register_dir = f"{os.path.join(tmp_dir.name, 'reg')}"

    with temp_env_vars(
        MODEL_NAME="model-name",
        OUTPUT_DIR=tmp_dir.name,
        DATA_CONFIG=DATA_CONFIG,
        MODEL_CONFIG=json.dumps(MODEL_CONFIG),
        MODEL_REGISTER_DIR=model_register_dir,
    ):

        runner.invoke(cli.gordo, ["build", f"--evaluation-config={cv_mode_1}"])
        runner.invoke(cli.gordo, ["build", f"--evaluation-config={cv_mode_2}"])

        if should_be_equal:
            assert os.path.exists(model_register_dir)
        else:
            assert not os.path.exists(model_register_dir)
Exemplo n.º 10
0
def test_build_cv_mode(
    should_save_model: bool, cv_mode: str, tmp_dir: tempfile.TemporaryDirectory
):
    """
    Testing build with cv_mode set to full and cross_val_only. Checks that cv_scores are
    printed and model are only saved when using the default (full) value.
    """

    runner = CliRunner()

    logger.info(f"MODEL_CONFIG={json.dumps(MODEL_CONFIG_WITH_PREDICT)}")

    with temp_env_vars(
        MODEL_NAME="model-name",
        OUTPUT_DIR=tmp_dir.name,
        DATA_CONFIG=DATA_CONFIG,
        MODEL_CONFIG=json.dumps(MODEL_CONFIG_WITH_PREDICT),
    ):
        result = runner.invoke(
            cli.gordo, ["build", "--print-cv-scores", f"--evaluation-config={cv_mode}"]
        )
        # Checks that the file is empty or not depending on the mode.
        if should_save_model:
            assert len(os.listdir(tmp_dir.name)) != 0
        else:
            assert len(os.listdir(tmp_dir.name)) == 0

        # Checks the output contains 'explained-variance_raw-scores'
        assert "r2-score" in result.output
        assert "mean-squared-error" in result.output
        assert "mean-absolute-error" in result.output
        assert "explained-variance-score" in result.output
Exemplo n.º 11
0
    def test_build_env_args(self):
        """
        Instead of passing OUTPUT_DIR directly to CLI, should be able to
        read environment variables
        """

        logger.info(f"MODEL_CONFIG={json.dumps(MODEL_CONFIG)}")

        with tempfile.TemporaryDirectory() as tmpdir:
            with temp_env_vars(
                    MODEL_NAME="model-name",
                    OUTPUT_DIR=tmpdir,
                    DATA_CONFIG=DATA_CONFIG,
                    MODEL_CONFIG=json.dumps(MODEL_CONFIG),
            ):
                result = self.runner.invoke(cli.gordo, ["build"])

            self.assertEqual(result.exit_code,
                             0,
                             msg=f"Command failed: {result}")
            self.assertTrue(
                os.path.exists("/tmp/model-location.txt"),
                msg=
                'Building was supposed to create a "model-location.txt", but it did not!',
            )
Exemplo n.º 12
0
    def test_build_use_registry_bust_cache(self):
        """
        Even using a registry we get separate model-paths when we ask for models for
        different configurations.
        """

        with tempfile.TemporaryDirectory() as tmpdir:
            output_dir_1 = os.path.join(tmpdir, "dir1")
            output_dir_2 = os.path.join(tmpdir, "dir2")

            with temp_env_vars(
                MODEL_NAME="model-name",
                OUTPUT_DIR=output_dir_1,
                DATA_CONFIG=DATA_CONFIG,
                MODEL_CONFIG=json.dumps(MODEL_CONFIG),
                MODEL_REGISTER_DIR=tmpdir + "/reg",
            ):
                result1 = self.runner.invoke(cli.gordo, ["build"])

            self.assertEqual(result1.exit_code, 0, msg=f"Command failed: {result1}")

            with temp_env_vars(
                MODEL_NAME="model-name",
                OUTPUT_DIR=output_dir_2,
                # NOTE: Different train dates!
                DATA_CONFIG=(
                    "{"
                    ' "type": "RandomDataset",'
                    ' "train_start_date": "2019-01-01T00:00:00+00:00", '
                    ' "train_end_date": "2019-06-01T00:00:00+00:00",'
                    ' "tags": ["TRC1", "TRC2"],'
                    "}"
                ),
                MODEL_CONFIG=json.dumps(MODEL_CONFIG),
                MODEL_REGISTER_DIR=tmpdir + "/reg",
            ):
                result2 = self.runner.invoke(cli.gordo, ["build"])
            self.assertEqual(result2.exit_code, 0, msg=f"Command failed: {result2}")

            first_metadata = serializer.load_metadata(output_dir_1)
            second_metadata = serializer.load_metadata(output_dir_2)
            # The metadata contains the model build date, so if it got rebuilt these two
            # would be different
            assert (
                first_metadata["model"]["model-creation-date"]
                != second_metadata["model"]["model-creation-date"]
            )
Exemplo n.º 13
0
def gordo_ml_server_client(request, trained_model_directory):

    with tu.temp_env_vars(MODEL_COLLECTION_DIR=trained_model_directory):

        app = server.build_app()
        app.testing = True

        yield app.test_client()
Exemplo n.º 14
0
def test_model_list_view_non_existant_proj():
    with tu.temp_env_vars(MODEL_COLLECTION_DIR=os.path.join("does", "not", "exist")):
        app = server.build_app()
        app.testing = True
        client = app.test_client()
        resp = client.get("/gordo/v0/test-project/models")
        assert resp.status_code == 200
        assert resp.json["models"] == []
Exemplo n.º 15
0
def gordo_ml_server_client(request, trained_model_directory):

    with tu.temp_env_vars(MODEL_LOCATION=trained_model_directory):

        app = server.build_app(data_provider=request.param)
        app.testing = True

        yield app.test_client()
Exemplo n.º 16
0
    def test_build_use_registry_bust_cache(self):
        """
        Even using a registry we get separate model-paths when we ask for models for
        different configurations.
        """

        with tempfile.TemporaryDirectory() as tmpdir:
            with temp_env_vars(
                    MODEL_NAME="model-name",
                    OUTPUT_DIR=os.path.join(tmpdir, "dir1"),
                    DATA_CONFIG=DATA_CONFIG,
                    MODEL_CONFIG=json.dumps(MODEL_CONFIG),
                    MODEL_REGISTER_DIR=tmpdir + "/reg",
            ):
                result1 = self.runner.invoke(cli.gordo, ["build"])

            self.assertEqual(result1.exit_code,
                             0,
                             msg=f"Command failed: {result1}")
            with open("/tmp/model-location.txt") as f:
                first_path = f.read()

            with temp_env_vars(
                    MODEL_NAME="model-name",
                    OUTPUT_DIR=os.path.join(tmpdir, "dir2"),
                    # NOTE: Different train dates!
                    DATA_CONFIG=(
                        "{"
                        ' "type": "RandomDataset",'
                        ' "train_start_date": "2019-01-01T00:00:00+00:00", '
                        ' "train_end_date": "2019-06-01T00:00:00+00:00",'
                        ' "tags": ["TRC1", "TRC2"]'
                        "}"),
                    MODEL_CONFIG=json.dumps(MODEL_CONFIG),
                    MODEL_REGISTER_DIR=tmpdir + "/reg",
            ):
                result2 = self.runner.invoke(cli.gordo, ["build"])
            self.assertEqual(result2.exit_code,
                             0,
                             msg=f"Command failed: {result2}")
            with open("/tmp/model-location.txt") as f:
                second_path = f.read()
            assert first_path != second_path
Exemplo n.º 17
0
def test_request_specific_revision(trained_model_directory, tmpdir, revisions):

    model_name = "test-model"
    current_revision = revisions[0]
    collection_dir = os.path.join(tmpdir, current_revision)

    # Copy trained model into revision model folders
    for revision in revisions:
        model_dir = os.path.join(tmpdir, revision, model_name)
        shutil.copytree(trained_model_directory, model_dir)

        # Now overwrite the metadata.json file to ensure the server actually reads
        # the metadata for this specific revision
        metadata_file = os.path.join(model_dir, "metadata.json")
        assert os.path.isfile(metadata_file)
        with open(metadata_file, "w") as fp:
            json.dump({"revision": revision, "model": model_name}, fp)

    with tu.temp_env_vars(MODEL_COLLECTION_DIR=collection_dir):
        app = server.build_app({"ENABLE_PROMETHEUS": False})
        app.testing = True
        client = app.test_client()
        for revision in revisions:
            resp = client.get(
                f"/gordo/v0/test-project/{model_name}/metadata?revision={revision}"
            )
            assert resp.status_code == 200
            assert resp.json["revision"] == revision

            # Verify the server read the metadata.json file we had overwritten
            assert resp.json["metadata"] == {
                "revision": revision,
                "model": model_name
            }

        # Asking for a revision which doesn't exist gives a 410 Gone.
        resp = client.get(
            f"/gordo/v0/test-project/{model_name}/metadata?revision=does-not-exist"
        )
        assert resp.status_code == 410
        assert resp.json == {
            "error": "Revision 'does-not-exist' not found.",
            "revision": "does-not-exist",
        }

        # Again but by setting header, to ensure we also check the header
        resp = client.get(
            f"/gordo/v0/test-project/{model_name}/metadata",
            headers={"revision": "does-not-exist"},
        )
        assert resp.status_code == 410
        assert resp.json == {
            "error": "Revision 'does-not-exist' not found.",
            "revision": "does-not-exist",
        }
Exemplo n.º 18
0
    def test_build_model_with_parameters(self):
        """
        It works to build a simple model with parameters set
        """

        model = """
        {
         "sklearn.decomposition.pca.PCA":
          {
            "svd_solver": "{{svd_solver}}",
            "n_components": {{n_components}}
          }
        }
        """

        svd_solver = "auto"
        n_components = 0.5

        logger.info(f"MODEL_CONFIG={json.dumps(model)}")

        with tempfile.TemporaryDirectory() as tmpdir:
            with temp_env_vars(
                    MODEL_NAME="model-name",
                    OUTPUT_DIR=tmpdir,
                    DATA_CONFIG=DATA_CONFIG,
                    MODEL_CONFIG=model,
            ):
                location_file = f"{os.path.join(tmpdir, 'special-model-location.txt')}"
                args = [
                    "build",
                    "--model-parameter",
                    f"svd_solver,{svd_solver}",
                    "--model-parameter",
                    f"n_components,{n_components}",
                    "--model-location-file",
                    location_file,
                ]

                # Run it twice to ensure the model location in the location file
                # is only written once and not appended.
                for _ in range(2):

                    result = self.runner.invoke(cli.gordo, args=args)

                    self.assertEqual(result.exit_code,
                                     0,
                                     msg=f"Command failed: {result}")
                    self.assertTrue(
                        os.path.exists(location_file),
                        msg=
                        f'Building was supposed to create a model location file at "{location_file}", but it did not!',
                    )
                    with open(location_file, "r") as f:
                        assert f.read() == tmpdir
Exemplo n.º 19
0
def gordo_ml_server_client(
    request, model_collection_directory, trained_model_directory
):

    with tu.temp_env_vars(MODEL_COLLECTION_DIR=model_collection_directory):

        app = server.build_app()
        app.testing = True

        # always return a valid asset for any tag name
        with patch.object(sensor_tag, "_asset_from_tag_name", return_value="default"):
            yield app.test_client()
Exemplo n.º 20
0
def test_server_version_route(model_collection_directory, gordo_revision):
    """
    Simple route which returns the current version
    """
    with tu.temp_env_vars(MODEL_COLLECTION_DIR=model_collection_directory):
        app = server.build_app()
        app.testing = True
        client = app.test_client()

        resp = client.get("/server-version")
        assert resp.status_code == 200
        assert resp.json == {"revision": gordo_revision, "version": __version__}
Exemplo n.º 21
0
    def test_build_use_registry(self):
        """
        Using a registry causes the second build of a model to copy the first to the
        new location.
        """

        with tempfile.TemporaryDirectory() as tmpdir:
            output_dir_1 = os.path.join(tmpdir, "dir1")
            output_dir_2 = os.path.join(tmpdir, "dir2")

            with temp_env_vars(
                MODEL_NAME="model-name",
                OUTPUT_DIR=output_dir_1,
                DATA_CONFIG=DATA_CONFIG,
                MODEL_CONFIG=json.dumps(MODEL_CONFIG),
                MODEL_REGISTER_DIR=tmpdir + "/reg",
            ):
                result1 = self.runner.invoke(cli.gordo, ["build"])

            self.assertEqual(result1.exit_code, 0, msg=f"Command failed: {result1}")
            # OUTPUT_DIR is the only difference
            with temp_env_vars(
                MODEL_NAME="model-name",
                OUTPUT_DIR=output_dir_2,
                DATA_CONFIG=DATA_CONFIG,
                MODEL_CONFIG=json.dumps(MODEL_CONFIG),
                MODEL_REGISTER_DIR=tmpdir + "/reg",
            ):
                result2 = self.runner.invoke(cli.gordo, ["build"])
            self.assertEqual(result2.exit_code, 0, msg=f"Command failed: {result2}")

            first_metadata = serializer.load_metadata(output_dir_1)
            second_metadata = serializer.load_metadata(output_dir_2)

            # The metadata contains the model build date, so if it got rebuilt these two
            # would be different
            assert (
                first_metadata["model"]["model-creation-date"]
                == second_metadata["model"]["model-creation-date"]
            )
Exemplo n.º 22
0
def test_build_use_registry_bust_cache(runner, tmpdir, machine):
    """
    Even using a registry we get separate model-paths when we ask for models for
    different configurations.
    """

    output_dir_1 = os.path.join(tmpdir, "dir1")
    output_dir_2 = os.path.join(tmpdir, "dir2")

    with temp_env_vars(
            MACHINE=json.dumps(machine.to_dict()),
            OUTPUT_DIR=output_dir_1,
            MODEL_REGISTER_DIR=os.path.join(tmpdir, "reg"),
    ):
        result1 = runner.invoke(cli.gordo, ["build"])

    assert result1.exit_code == 0, f"Command failed: {result1}"

    # NOTE: Different train dates!
    machine.dataset = machine.dataset.from_dict({
        "type": "RandomDataset",
        "train_start_date": "2019-01-01T00:00:00+00:00",
        "train_end_date": "2019-06-01T00:00:00+00:00",
        "tags": ["TRC1", "TRC2"],
    })
    with temp_env_vars(
            MACHINE=json.dumps(machine.to_dict()),
            OUTPUT_DIR=output_dir_2,
            MODEL_REGISTER_DIR=os.path.join(tmpdir, "reg"),
    ):
        result2 = runner.invoke(cli.gordo, ["build"])
    assert result2.exit_code == 0, f"Command failed: {result2}"

    first_metadata = serializer.load_metadata(output_dir_1)
    second_metadata = serializer.load_metadata(output_dir_2)
    # The metadata contains the model build date, so if it got rebuilt these two
    # would be different
    assert (first_metadata["metadata"]["build_metadata"]["model"]
            ["model_creation_date"] != second_metadata["metadata"]
            ["build_metadata"]["model"]["model_creation_date"])
Exemplo n.º 23
0
def test_non_existant_model_metadata(tmpdir, gordo_project, api_version):
    """
    Simple route which returns the current version
    """
    with tu.temp_env_vars(MODEL_COLLECTION_DIR=str(tmpdir)):
        app = server.build_app({"ENABLE_PROMETHEUS": False})
        app.testing = True
        client = app.test_client()

        resp = client.get(
            f"/gordo/{api_version}/{gordo_project}/model-does-not-exist/metadata"
        )
        assert resp.status_code == 404
Exemplo n.º 24
0
    def test_build_model_with_parameters(self):
        """
        It works to build a simple model with parameters set
        """

        model = """
        {
         "sklearn.decomposition.pca.PCA":
          {
            "svd_solver": "{{svd_solver}}",
            "n_components": {{n_components}}
          }
        }
        """

        svd_solver = "auto"
        n_components = 0.5

        logger.info(f"MODEL_CONFIG={json.dumps(model)}")

        with tempfile.TemporaryDirectory() as tmpdir:
            with temp_env_vars(
                MODEL_NAME="model-name",
                OUTPUT_DIR=tmpdir,
                DATA_CONFIG=DATA_CONFIG,
                MODEL_CONFIG=model,
            ):
                args = [
                    "build",
                    "--model-parameter",
                    f"svd_solver,{svd_solver}",
                    "--model-parameter",
                    f"n_components,{n_components}",
                ]

                # Run it twice to ensure the model location in the location file
                # is only written once and not appended.
                for _ in range(2):

                    result = self.runner.invoke(cli.gordo, args=args)

                    self.assertEqual(
                        result.exit_code, 0, msg=f"Command failed: {result}"
                    )
                    self.assertGreater(
                        len(os.listdir(tmpdir)),
                        1,
                        msg="Building was supposed to create at least two files ("
                        "model and metadata) in OUTPUT_DIR, but it did not!",
                    )
Exemplo n.º 25
0
def test_build_env_args(runner, tmpdir, machine):
    """
    Instead of passing OUTPUT_DIR directly to CLI, should be able to
    read environment variables
    """
    logger.info(f"MODEL_CONFIG={json.dumps(MODEL_CONFIG)}")

    with temp_env_vars(MACHINE=json.dumps(machine.to_dict()),
                       OUTPUT_DIR=str(tmpdir)):
        result = runner.invoke(cli.gordo, ["build"])

    assert result.exit_code == 0, f"Command failed: {result}, {result.exception}"
    assert (
        len(os.listdir(tmpdir)) > 1
    ), "Building was supposed to create at least two files (model and metadata) in OUTPUT_DIR, but it did not!"
Exemplo n.º 26
0
def test_expected_models_route(tmpdir):
    """
    Route that gives back the expected models names, which are just read from
    the 'EXPECTED_MODELS' env var.
    """
    with tu.temp_env_vars(
            MODEL_COLLECTION_DIR=str(tmpdir),
            EXPECTED_MODELS=json.dumps(["model-a", "model-b"]),
    ):
        app = server.build_app({"ENABLE_PROMETHEUS": False})
        app.testing = True
        client = app.test_client()

        resp = client.get("/gordo/v0/test-project/expected-models")
        assert resp.json["expected-models"] == ["model-a", "model-b"]
Exemplo n.º 27
0
def test_models_by_revision_list_view(caplog, tmpdir, revision_to_models):
    """
    Server returns expected models it can serve under specific revisions.

    revision_to_models: Dict[str, Tuple[str, ...]]
        Map revision codes to models belonging to that revision.
        Simulate serving some revision, but having access to other
        revisions and its models.
    """

    # Current collection dir for the server, order isn't important.
    if revision_to_models:
        collection_dir = os.path.join(tmpdir,
                                      list(revision_to_models.keys())[0])
    else:
        # This will cause a failure to look up a certain revision
        collection_dir = str(tmpdir)

    # Make all the revision and model subfolders
    for revision in revision_to_models.keys():
        os.mkdir(os.path.join(tmpdir, revision))
        for model in revision_to_models[revision]:
            os.makedirs(os.path.join(tmpdir, revision, model), exist_ok=True)

    with tu.temp_env_vars(MODEL_COLLECTION_DIR=collection_dir):
        app = server.build_app({"ENABLE_PROMETHEUS": False})
        app.testing = True
        client = app.test_client()
        for revision in revision_to_models:
            resp = client.get(
                f"/gordo/v0/test-project/models?revision={revision}")
            assert resp.status_code == 200
            assert "models" in resp.json
            assert sorted(resp.json["models"]) == sorted(
                revision_to_models[revision])
        else:
            # revision_to_models is empty, so there is nothing on the server.
            # Test that asking for some arbitrary revision will give a 404 and error message
            resp = client.get(
                f"/gordo/v0/test-project/models?revision=revision-does-not-exist"
            )
            assert resp.status_code == 410
            assert resp.json == {
                "error": "Revision 'revision-does-not-exist' not found.",
                "revision": "revision-does-not-exist",
            }
Exemplo n.º 28
0
def test_build_exit_code(exception, exit_code, runner, tmpdir, machine):
    """
    Test that cli build exists with different exit codes for different errors.
    """
    machine.model = MODEL_CONFIG_WITH_PREDICT
    machine.evaluation = {"cv_mode": "cross_val_only"}

    logger.info(f"MODEL_CONFIG={json.dumps(machine.model)}")
    with mock.patch(
            "gordo.cli.cli.ModelBuilder.build",
            mock.MagicMock(side_effect=exception,
                           autospec=True,
                           return_value=None),
    ):
        with temp_env_vars(MACHINE=json.dumps(machine.to_dict()),
                           OUTPUT_DIR=str(tmpdir)):
            result = runner.invoke(cli.gordo, ["build"])
            assert result.exit_code == exit_code
Exemplo n.º 29
0
def test_build_model_with_parameters(runner, tmpdir, machine):
    """
    It works to build a simple model with parameters set
    """
    machine._strict = False
    machine.model = """
    {
     "sklearn.decomposition.pca.PCA":
      {
        "svd_solver": "{{svd_solver}}",
        "n_components": {{n_components}}
      }
    }
    """

    svd_solver = "auto"
    n_components = 0.5

    logger.info(f"MODEL_CONFIG={json.dumps(machine.model)}")

    with temp_env_vars(MACHINE=json.dumps(machine.to_dict()),
                       OUTPUT_DIR=str(tmpdir)):
        args = [
            "build",
            "--model-parameter",
            f"svd_solver,{svd_solver}",
            "--model-parameter",
            f"n_components,{n_components}",
        ]

        # Run it twice to ensure the model location in the location file
        # is only written once and not appended.
        for _ in range(2):

            result = runner.invoke(cli.gordo, args=args)

            assert (result.exit_code == 0
                    ), f"Command failed: {result}, {result.exception}"
            assert (
                len(os.listdir(tmpdir)) > 1
            ), "Building was supposed to create at least two files (model and metadata) in OUTPUT_DIR, but it did not!"
Exemplo n.º 30
0
def test_build_cv_mode_build_only(tmpdir, runner: CliRunner, machine: Machine):
    """
    Testing build with cv_mode set to build_only. Checks that OUTPUT_DIR gets a model
    saved to it. It also checks that the metadata contains cv-duration-sec=None and
    cv-scores={}
    """

    logger.info(f"MODEL_CONFIG={json.dumps(machine.model)}")
    machine.evaluation = {"cv_mode": "build_only"}

    with temp_env_vars(MACHINE=json.dumps(machine.to_dict()),
                       OUTPUT_DIR=str(tmpdir)):

        metadata_file = f"{os.path.join(tmpdir, 'metadata.json')}"
        runner.invoke(cli.gordo, ["build"])

        # A model has been saved
        assert len(os.listdir(tmpdir)) != 0
        with open(metadata_file) as f:
            metadata_json = json.loads(f.read())
            assert (metadata_json["metadata"]["build_metadata"]["model"]
                    ["cross_validation"]["cv_duration_sec"] is None)
            assert (metadata_json["metadata"]["build_metadata"]["model"]
                    ["cross_validation"]["scores"] == {})