def test_dump_load_models(model):

    X = np.random.random(size=100).reshape(10, 10)
    model.fit(X.copy(), X.copy())
    model_out = model.predict(X.copy())

    with TemporaryDirectory() as tmp:
        serializer.dump(model, tmp)

        model_clone = serializer.load(tmp)
        model_clone_out = model_clone.predict(X.copy())

        assert np.allclose(model_out.flatten(), model_clone_out.flatten())
def trained_model_directory(gordo_project: str, gordo_name: str,
                            sensors: List[SensorTag]):
    """
    Fixture: Train a basic AutoEncoder and save it to a given directory
    will also save some metadata with the model
    """
    with tempfile.TemporaryDirectory() as model_dir:

        # This is a model collection directory
        collection_dir = os.path.join(model_dir, gordo_project)

        # Model specific to the model being trained here
        model_dir = os.path.join(collection_dir, gordo_name)
        os.makedirs(model_dir, exist_ok=True)

        definition = ruamel.yaml.load(
            """
            gordo_components.model.anomaly.diff.DiffBasedAnomalyDetector:
                base_estimator:
                    sklearn.pipeline.Pipeline:
                        steps:
                            - sklearn.preprocessing.data.MinMaxScaler
                            - gordo_components.model.models.KerasAutoEncoder:
                                kind: feedforward_hourglass
                        memory:
            """,
            Loader=ruamel.yaml.Loader,
        )
        model = serializer.pipeline_from_definition(definition)
        X = np.random.random(size=len(sensors) * 10).reshape(10, len(sensors))
        model.fit(X, X)
        serializer.dump(
            model,
            model_dir,
            metadata={
                "dataset": {
                    "tag_list": sensors,
                    "resolution": "10T",
                    "target_tag_list": sensors,
                },
                "name": "machine-1",
                "model": {
                    "model-offset": 0
                },
                "user-defined": {
                    "model-name": "test-model"
                },
            },
        )
        yield collection_dir
    def test_dump_load_keras_directly(self):

        model = KerasAutoEncoder(kind="feedforward_hourglass")

        X = np.random.random(size=100).reshape(10, 10)
        model.fit(X.copy(), X.copy())

        with TemporaryDirectory() as tmp:
            serializer.dump(model, tmp)

            model_clone = serializer.load(tmp)

            self.assertTrue(
                np.allclose(
                    model.predict(X.copy()).flatten(),
                    model_clone.predict(X.copy()).flatten(),
                ))
示例#4
0
def download_model(ctx: click.Context, output_dir: str):
    """
    Download the actual model from the target and write to an output directory
    """
    client = Client(*ctx.obj["args"], **ctx.obj["kwargs"])
    models = client.download_model()

    # Iterate over mapping of models and save into their own sub dirs of the output_dir
    for target, model in models.items():
        model_out_dir = os.path.join(output_dir, target)
        os.mkdir(model_out_dir)
        click.secho(
            f"Writing model '{target}' to directory: '{model_out_dir}'...",
            nl=False)
        serializer.dump(model, model_out_dir)
        click.secho(f"done")

    click.secho(f"Wrote all models to directory: {output_dir}", fg="green")
示例#5
0
def _save_model_for_workflow(model: BaseEstimator, metadata: dict,
                             output_dir: Union[os.PathLike, str]):
    """
    Save a model according to the expected Argo workflow procedure.

    Parameters
    ----------
    model: BaseEstimator
        The model to save to the directory with gordo serializer.
    metadata: dict
        Various mappings of metadata to save alongside model.
    output_dir: Union[os.PathLike, str]
        The directory where to save the model, will create directories if needed.

    Returns
    -------
    Union[os.PathLike, str]
        Path to the saved model
    """
    os.makedirs(output_dir, exist_ok=True)  # Ok if some dirs exist
    serializer.dump(model, output_dir, metadata=metadata)
    return output_dir
示例#6
0
def trained_model_directory(sensors: List[SensorTag]):
    """
    Fixture: Train a basic AutoEncoder and save it to a given directory
    will also save some metadata with the model
    """
    with tempfile.TemporaryDirectory() as tmp_dir:
        definition = ruamel.yaml.load(
            """
            gordo_components.model.anomaly.diff.DiffBasedAnomalyDetector:
                base_estimator:
                    sklearn.pipeline.Pipeline:
                        steps:
                            - sklearn.preprocessing.data.MinMaxScaler
                            - gordo_components.model.models.KerasAutoEncoder:
                                kind: feedforward_hourglass
                        memory:
            """,
            Loader=ruamel.yaml.Loader,
        )
        model = serializer.pipeline_from_definition(definition)
        X = np.random.random(size=len(sensors) * 10).reshape(10, len(sensors))
        model.fit(X, X)
        serializer.dump(
            model,
            tmp_dir,
            metadata={
                "dataset": {
                    "tag_list": sensors,
                    "resolution": "10T",
                    "target_tag_list": sensors,
                },
                "name": "machine-1",
                "user-defined": {
                    "model-name": "test-model"
                },
            },
        )
        yield tmp_dir
    def test_pipeline_serialization(self):

        pipe = Pipeline([
            ("pca1", PCA(n_components=10)),
            (
                "fu",
                FeatureUnion([
                    ("pca2", PCA(n_components=3)),
                    (
                        "pipe",
                        Pipeline([
                            ("minmax", MinMaxScaler()),
                            ("truncsvd", TruncatedSVD(n_components=7)),
                        ]),
                    ),
                ]),
            ),
            ("ae", KerasAutoEncoder(kind="feedforward_hourglass")),
        ])

        X = np.random.random(size=100).reshape(10, 10)
        pipe.fit(X.copy(), X.copy())

        with TemporaryDirectory() as tmp:

            # Test dump
            metadata = {"key": "value"}
            serializer.dump(pipe, tmp, metadata=metadata)

            # Assert that a dirs are created for each step in Pipeline
            expected_structure = OrderedDict([
                ("n_step=000-class=sklearn.pipeline.Pipeline",
                 "metadata.json"),
                (
                    "n_step=000-class=sklearn.pipeline.Pipeline",
                    OrderedDict([
                        (
                            "n_step=000-class=sklearn.decomposition.pca.PCA",
                            "pca1.pkl.gz",
                        ),
                        (
                            "n_step=001-class=sklearn.pipeline.FeatureUnion",
                            "params.json",
                        ),
                        (
                            "n_step=001-class=sklearn.pipeline.FeatureUnion",
                            OrderedDict([
                                (
                                    "n_step=000-class=sklearn.decomposition.pca.PCA",
                                    "pca2.pkl.gz",
                                ),
                                (
                                    "n_step=001-class=sklearn.pipeline.Pipeline",
                                    OrderedDict([
                                        (
                                            "n_step=000-class=sklearn.preprocessing.data.MinMaxScaler",
                                            "minmax.pkl.gz",
                                        ),
                                        (
                                            "n_step=001-class=sklearn.decomposition.truncated_svd.TruncatedSVD",
                                            "truncsvd.pkl.gz",
                                        ),
                                    ]),
                                ),
                            ]),
                        ),
                        (
                            "n_step=002-class=gordo_components.model.models.KerasAutoEncoder",
                            "model.h5",
                        ),
                        (
                            "n_step=002-class=gordo_components.model.models.KerasAutoEncoder",
                            "params.json",
                        ),
                    ]),
                ),
            ])

            self._structure_verifier(prefix_dir=tmp,
                                     structure=expected_structure)

            # Test load from the serialized pipeline above
            pipe_clone = serializer.load(tmp)
            metadata_clone = serializer.load_metadata(tmp)

            # Ensure the metadata was saved and loaded back
            self.assertEqual(metadata, metadata_clone)

            # Verify same state for both pipelines
            y_hat_pipe1 = pipe.predict(X.copy()).flatten()
            y_hat_pipe2 = pipe_clone.predict(X.copy()).flatten()
            self.assertTrue(np.allclose(y_hat_pipe1, y_hat_pipe2))

            # Now use dumps/loads
            serialized = serializer.dumps(pipe)
            pipe_clone = serializer.loads(serialized)

            # Verify same state for both pipelines
            y_hat_pipe1 = pipe.predict(X.copy()).flatten()
            y_hat_pipe2 = pipe_clone.predict(X.copy()).flatten()
            self.assertTrue(np.allclose(y_hat_pipe1, y_hat_pipe2))