def test_fit_pipeline(self): # create some data x = np.array(list(range(0, 10))) y = x * 2 df = pd.DataFrame({'x': x, 'y': y}) X = df[['x']] Y = df[['y']] # put into Omega os.environ['DJANGO_SETTINGS_MODULE'] = '' om = Omega() om.runtime.celeryapp.conf.CELERY_ALWAYS_EAGER = True om.datasets.put(X, 'datax') om.datasets.put(Y, 'datay') om.datasets.get('datax') om.datasets.get('datay') # create a pipeline locally, store (unfitted) in Omega p = Pipeline([ ('lr', LinearRegression()), ]) om.models.put(p, 'mymodel2') self.assertIn('mymodel2', om.models.list('*')) # predict locally for comparison p.fit(reshaped(X), reshaped(Y)) pred = p.predict(reshaped(X)) # have Omega fit the model then predict result = om.runtime.model('mymodel2').fit('datax', 'datay') result.get() result = om.runtime.model('mymodel2').predict('datax') pred1 = result.get() self.assertTrue( (pred == pred1).all(), "runtimes prediction is different(1)")
def fit_transform(self, modelname, Xname, Yname=None, rName=None, pure_python=True, **kwargs): model = self.model_store.get(modelname) X, metaX = self.data_store.get(Xname), self.data_store.metadata(Xname) Y, metaY = None, None if Yname: Y, metaY = (self.data_store.get(Yname), self.data_store.metadata(Yname)) result = model.fit_transform(reshaped(X), reshaped(Y), **kwargs) # store information required for retraining model_attrs = { 'metaX': metaX.to_mongo(), 'metaY': metaY.to_mongo() if metaY is not None else None } try: import sklearn model_attrs['scikit-learn'] = sklearn.__version__ except: model_attrs['scikit-learn'] = 'unknown' meta = self.model_store.put(model, modelname, attributes=model_attrs) if pure_python: result = result.tolist() if rName: meta = self.data_store.put(result, rName) result = meta return result
def test_predict_hdf_dataframe(self): # create some data x = np.array(list(range(0, 10))) y = x * 2 df = pd.DataFrame({'x': x, 'y': y}) X = df['x'] Y = df['y'] # put into Omega -- assume a client with pandas, scikit learn os.environ['DJANGO_SETTINGS_MODULE'] = '' om = Omega() om.runtime.pure_python = True om.runtime.celeryapp.conf.CELERY_ALWAYS_EAGER = True om.datasets.put(X, 'datax', as_hdf=True) om.datasets.put(Y, 'datay', as_hdf=True) # have Omega fit the model then predict lr = LinearRegression() lr.fit(reshaped(X), reshaped(Y)) pred = lr.predict(reshaped(X)) om.models.put(lr, 'mymodel2') # -- using data provided locally # note this is the same as # om.datasets.put(X, 'foo') # om.runtimes.model('mymodel2').predict('foo') result = om.runtime.model('mymodel2').predict('datax') pred2 = result.get() self.assertTrue( (pred == pred2).all(), "runtimes prediction is different(1)") self.assertTrue( (pred == pred2).all(), "runtimes prediction is different(2)")
def partial_fit(self, modelname, Xname, Yname=None, pure_python=True, **kwargs): model = self.model_store.get(modelname) X, metaX = self.data_store.get(Xname), self.data_store.metadata(Xname) Y, metaY = None, None if Yname: Y, metaY = (self.data_store.get(Yname), self.data_store.metadata(Yname)) process( maybe_chunked(model.partial_fit, lambda X, Y: as_args(reshaped(X), reshaped(Y)), X, Y, **kwargs)) # store information required for retraining model_attrs = { 'metaX': metaX.to_mongo(), 'metaY': metaY.to_mongo() if metaY is not None else None, } try: import sklearn model_attrs['scikit-learn'] = sklearn.__version__ except: model_attrs['scikit-learn'] = 'unknown' meta = self.model_store.put(model, modelname, attributes=model_attrs) return meta
def score(self, modelname, Xname, Yname=None, rName=None, pure_python=True, **kwargs): model = self.model_store.get(modelname) X = self.data_store.get(Xname) Y = self.data_store.get(Yname) def store(result): if rName: meta = self.model_store.put(result, rName) result = meta return result result = process(maybe_chunked( model.score, lambda X, Y: as_args(reshaped(X), reshaped(Y)), X, Y, **kwargs), fn=store, keep_last=True) with self.tracking as exp: exp.log_metric('score', result) return result
def test_save_mlflow_saved_model_file(self): """ test deploying model saved by MLFlow, by file """ import mlflow model_path = os.path.join(omegaml.defaults.OMEGA_TMP, 'mymodel') model = LinearRegression() X = pd.Series(range(0, 10)) Y = pd.Series(X) * 2 + 3 model.fit(reshaped(X), reshaped(Y)) rmtree(model_path, ignore_errors=True) mlflow.sklearn.save_model(model, model_path) om = self.om # test multiple ways of storing for fn in ('mlflow://' + os.path.join(model_path, 'MLmodel'), 'mlflow://' + model_path): # store with just the MLmodel file as a reference, no kind necessary om.models.drop('mymodel', force=True) meta = om.models.put(fn, 'mymodel') self.assertEqual(meta.kind, MLFlowModelBackend.KIND) self.assertEqual(meta.kind, MLFlowModelBackend.KIND) model_ = om.models.get('mymodel') self.assertIsInstance(model_, mlflow.pyfunc.PyFuncModel) yhat_direct = model_.predict(reshaped(X)) yhat_rt = om.runtime.model('mymodel').predict(X).get() assert_array_equal(yhat_rt, yhat_direct)
def test_save_mlflow_model_run(self): """ test deploying an MLModel from a tracking server URI """ import mlflow mlflow.set_tracking_uri(self.mlflow_tracking_db) mlflow.set_registry_uri(self.mlflow_registry_db) with mlflow.start_run() as run: model = LinearRegression() X = pd.Series(range(0, 10)) Y = pd.Series(X) * 2 + 3 model.fit(reshaped(X), reshaped(Y)) mlflow.sklearn.log_model(sk_model=model, artifact_path='sklearn-model', registered_model_name='sklearn-model') # simulate a new session on another device (tracking URI comes from repo) mlflow.set_tracking_uri(None) om = self.om # use the tracking URI to store the model as a reference to a MLFlow tracking server meta = om.models.put('mlflow+models://sklearn-model/1', 'sklearn-model') self.assertEqual(meta.kind, MLFlowRegistryBackend.KIND) # simulate a new mlflow session model_ = om.models.get('sklearn-model') self.assertIsInstance(model_, mlflow.pyfunc.PyFuncModel) yhat_direct = model_.predict(reshaped(X)) yhat_rt = om.runtime.model('sklearn-model').predict(X).get() assert_array_equal(yhat_rt, yhat_direct)
def score( self, modelname, Xname, Yname, rName=None, pure_python=True, **kwargs): model = self.model_store.get(modelname) X = self.data_store.get(Xname) Y = self.data_store.get(Yname) result = model.score(reshaped(X), reshaped(Y), **kwargs) if rName: meta = self.model_store.put(result, rName) result = meta return result
def test_inferred_model_flavor(self): """ test deploying an arbitrary model by inferring MLFlow flavor """ import mlflow om = self.om model = LinearRegression() X = pd.Series(range(0, 10)) Y = pd.Series(X) * 2 + 3 model.fit(reshaped(X), reshaped(Y)) meta = om.models.put(model, 'mymodel', kind='mlflow.model') self.assertEqual(meta.kind, MLFlowModelBackend.KIND) model_ = om.models.get('mymodel') self.assertIsInstance(model_, mlflow.pyfunc.PyFuncModel)
def decision_function(self, modelname, Xname, rName=None, pure_python=True, **kwargs): model = self.model_store.get(modelname) X = self.data_store.get(Xname) result = model.decision_function(reshaped(X), **kwargs) if pure_python: result = result.tolist() if rName: meta = self.data_store.put(result, rName) result = meta return result
def fit_transform(self, modelname, Xname, Yname=None, rName=None, pure_python=True, **kwargs): model = self.model_store.get(modelname) X, metaX = self.data_store.get(Xname), self.data_store.metadata(Xname) Y, metaY = None, None if Yname: Y, metaY = (self.data_store.get(Yname), self.data_store.metadata(Yname)) def store(result): if pure_python: result = result.tolist() if rName: meta = self.data_store.put(result, rName) result = meta return result result = process(maybe_chunked( model.fit_transform, lambda X, Y: as_args(reshaped(X), reshaped(Y)), X, Y, **kwargs), fn=store, keep_last=True) # store information required for retraining model_attrs = { 'metaX': metaX.to_mongo(), 'metaY': metaY.to_mongo() if metaY is not None else None } try: import sklearn model_attrs['scikit-learn'] = sklearn.__version__ except: model_attrs['scikit-learn'] = 'unknown' model_meta = self.model_store.put(model, modelname, attributes=model_attrs) return result if rName else model_meta
def predict( self, modelname, Xname, rName=None, pure_python=True, **kwargs): data = self.data_store.get(Xname) model = self.model_store.get(modelname) result = model.predict(reshaped(data), **kwargs) if pure_python: result = result.tolist() if rName: meta = self.data_store.put(result, rName) result = meta return result
def test_save_mlflow_saved_model_path(self): """ test deploying a model saved by MLflow, from path """ import mlflow model_path = os.path.join(omegaml.defaults.OMEGA_TMP, 'mymodel') model = LinearRegression() X = pd.Series(range(0, 10)) Y = pd.Series(X) * 2 + 3 model.fit(reshaped(X), reshaped(Y)) rmtree(model_path, ignore_errors=True) mlflow.sklearn.save_model(model, model_path) om = self.om # store with just the model path, specify the kind because paths can be other files too meta = om.models.put(model_path, 'mymodel', kind='mlflow.model') self.assertEqual(meta.kind, MLFlowModelBackend.KIND) model_ = om.models.get('mymodel') self.assertIsInstance(model_, mlflow.pyfunc.PyFuncModel) yhat_direct = model_.predict(reshaped(X)) yhat_rt = om.runtime.model('mymodel').predict(X).get() assert_array_equal(yhat_rt, yhat_direct)
def test_save_mlflow_pyfunc_model(self): """ test deploying a custom MLFlow PythonModel""" import mlflow class MyModel(mlflow.pyfunc.PythonModel): def predict(self, context, data): return data X = pd.Series(range(10)) model = MyModel() om = self.om meta = om.models.put(model, 'mymodel') self.assertEqual(meta.kind, MLFlowModelBackend.KIND) model_ = om.models.get('mymodel') self.assertIsInstance(model_, mlflow.pyfunc.PyFuncModel) yhat_direct = model_.predict(X) yhat_rt = om.runtime.model('mymodel').predict(X).get() assert_array_equal(yhat_rt, reshaped(yhat_direct))
def predict_proba(self, modelname, Xname, rName=None, pure_python=True, **kwargs): data = self.data_store.get(Xname) model = self.model_store.get(modelname) def store(result): if pure_python: result = result.tolist() if rName: meta = self.data_store.put(result, rName) result = meta return result result = process(maybe_chunked(model.predict_proba, lambda data: as_args(reshaped(data)), data, **kwargs), fn=store, keep_last=True)
def decision_function(self, modelname, Xname, rName=None, pure_python=True, **kwargs): model = self.model_store.get(modelname) X = self.data_store.get(Xname) def store(result): if pure_python: result = result.tolist() if rName: meta = self.data_store.put(result, rName) result = meta return result result = process(maybe_chunked(model.decision_function, lambda X: as_args(reshaped(X)), X, **kwargs), fn=store, keep_last=True) return result