def test_diff_detector_serializability(config): """ Should play well with the gordo serializer """ config = yaml.load(config) model = serializer.pipeline_from_definition(config) serializer.pipeline_into_definition(model) serialized_bytes = serializer.dumps(model) serializer.loads(serialized_bytes)
def test_download_model(gordo_ml_server_client): """ Test we can download a model, loadable via serializer.loads() """ resp = gordo_ml_server_client.get("/download-model") serialized_model = resp.get_data() model = serializer.loads(serialized_model) # All models have a fit method assert hasattr(model, "fit") # Models MUST have either predict or transform assert hasattr(model, "predict") or hasattr(model, "transform")
def download_model(self) -> typing.Dict[str, BaseEstimator]: """ Download the actual model(s) from the ML server /download-model Returns ------- Dict[str, BaseEstimator] Mapping of target name to the model """ models = dict() for endpoint in self.endpoints: resp = self.session.get(f"{self.base_url + endpoint.endpoint}/download-model") if resp.ok: models[endpoint.name] = serializer.loads(resp.content) else: raise IOError(f"Failed to download model: '{repr(resp.content)}'") return models
def test_pipeline_serialization(self): pipe = Pipeline([ ("pca1", PCA(n_components=10)), ( "fu", FeatureUnion([ ("pca2", PCA(n_components=3)), ( "pipe", Pipeline([ ("minmax", MinMaxScaler()), ("truncsvd", TruncatedSVD(n_components=7)), ]), ), ]), ), ("ae", KerasAutoEncoder(kind="feedforward_hourglass")), ]) X = np.random.random(size=100).reshape(10, 10) pipe.fit(X.copy(), X.copy()) with TemporaryDirectory() as tmp: # Test dump metadata = {"key": "value"} serializer.dump(pipe, tmp, metadata=metadata) # Assert that a dirs are created for each step in Pipeline expected_structure = OrderedDict([ ("n_step=000-class=sklearn.pipeline.Pipeline", "metadata.json"), ( "n_step=000-class=sklearn.pipeline.Pipeline", OrderedDict([ ( "n_step=000-class=sklearn.decomposition.pca.PCA", "pca1.pkl.gz", ), ( "n_step=001-class=sklearn.pipeline.FeatureUnion", "params.json", ), ( "n_step=001-class=sklearn.pipeline.FeatureUnion", OrderedDict([ ( "n_step=000-class=sklearn.decomposition.pca.PCA", "pca2.pkl.gz", ), ( "n_step=001-class=sklearn.pipeline.Pipeline", OrderedDict([ ( "n_step=000-class=sklearn.preprocessing.data.MinMaxScaler", "minmax.pkl.gz", ), ( "n_step=001-class=sklearn.decomposition.truncated_svd.TruncatedSVD", "truncsvd.pkl.gz", ), ]), ), ]), ), ( "n_step=002-class=gordo_components.model.models.KerasAutoEncoder", "model.h5", ), ( "n_step=002-class=gordo_components.model.models.KerasAutoEncoder", "params.json", ), ]), ), ]) self._structure_verifier(prefix_dir=tmp, structure=expected_structure) # Test load from the serialized pipeline above pipe_clone = serializer.load(tmp) metadata_clone = serializer.load_metadata(tmp) # Ensure the metadata was saved and loaded back self.assertEqual(metadata, metadata_clone) # Verify same state for both pipelines y_hat_pipe1 = pipe.predict(X.copy()).flatten() y_hat_pipe2 = pipe_clone.predict(X.copy()).flatten() self.assertTrue(np.allclose(y_hat_pipe1, y_hat_pipe2)) # Now use dumps/loads serialized = serializer.dumps(pipe) pipe_clone = serializer.loads(serialized) # Verify same state for both pipelines y_hat_pipe1 = pipe.predict(X.copy()).flatten() y_hat_pipe2 = pipe_clone.predict(X.copy()).flatten() self.assertTrue(np.allclose(y_hat_pipe1, y_hat_pipe2))