def generic_blank_constructor(): # Train a model airlines = h2o.import_file( path=pyunit_utils.locate("smalldata/testng/airlines_train.csv")) model = H2OGradientBoostingEstimator(ntrees=1) model.train(x=["Origin", "Dest"], y="IsDepDelayed", training_frame=airlines) #Save the previously created model into a temporary file original_model_filename = tempfile.mkdtemp() original_model_filename = model.download_mojo(original_model_filename) # Load the model from the temporary using an empty constructor mojo_model = H2OGenericEstimator() mojo_model.path = original_model_filename mojo_model.train() assert isinstance(mojo_model, H2OGenericEstimator) assert mojo_model._model_json["output"][ "original_model_identifier"] == "gbm" assert mojo_model._model_json["output"][ "original_model_full_name"] == "Gradient Boosting Machine" # Test scoring is available on the model predictions = mojo_model.predict(airlines) assert predictions is not None assert predictions.nrows == 24421
def test(x, y, output_test, strip_part, algo_name, generic_algo_name): airlines = h2o.import_file( path=pyunit_utils.locate("smalldata/testng/airlines_train.csv")) gbm = H2ODeepLearningEstimator(epochs=1) gbm.train(x=x, y=y, training_frame=airlines, validation_frame=airlines) print(gbm) with Capturing() as original_output: gbm.show() original_model_filename = tempfile.mkdtemp() original_model_filename = gbm.download_mojo(original_model_filename) key = h2o.lazy_import(original_model_filename) fr = h2o.get_frame(key[0]) generic_mojo_model = H2OGenericEstimator(model_key=fr) generic_mojo_model.train() compare_params(gbm, generic_mojo_model) print(generic_mojo_model) with Capturing() as generic_output: generic_mojo_model.show() output_test(str(original_output), str(generic_output), strip_part, algo_name, generic_algo_name) predictions = generic_mojo_model.predict(airlines) assert predictions is not None assert predictions.nrows == 24421 assert generic_mojo_model._model_json["output"][ "model_summary"] is not None assert len(generic_mojo_model._model_json["output"] ["model_summary"]._cell_values) > 0 # Test constructor generating the model from existing MOJO file generic_mojo_model_from_file = H2OGenericEstimator.from_file( original_model_filename) assert generic_mojo_model_from_file is not None predictions = generic_mojo_model_from_file.predict(airlines) assert predictions is not None assert predictions.nrows == 24421 assert generic_mojo_model_from_file._model_json["output"][ "model_summary"] is not None assert len(generic_mojo_model_from_file._model_json["output"] ["model_summary"]._cell_values) > 0 generic_mojo_filename = tempfile.mkdtemp("zip", "genericMojo") generic_mojo_filename = generic_mojo_model_from_file.download_mojo( path=generic_mojo_filename) assert os.path.getsize(generic_mojo_filename) == os.path.getsize( original_model_filename)
def stackedensemble_mojo_model_test(): train = h2o.import_file( pyunit_utils.locate("smalldata/iris/iris_train.csv")) test = h2o.import_file(pyunit_utils.locate("smalldata/iris/iris_test.csv")) x = train.columns y = "species" nfolds = 2 gbm = H2OGradientBoostingEstimator(nfolds=nfolds, fold_assignment="Modulo", keep_cross_validation_predictions=True) gbm.train(x=x, y=y, training_frame=train) rf = H2ORandomForestEstimator(nfolds=nfolds, fold_assignment="Modulo", keep_cross_validation_predictions=True) rf.train(x=x, y=y, training_frame=train) se = H2OStackedEnsembleEstimator(training_frame=train, validation_frame=test, base_models=[gbm.model_id, rf.model_id]) se.train(x=x, y=y, training_frame=train) print(se) with Capturing() as original_output: se.show() original_model_filename = tempfile.mkdtemp() original_model_filename = se.download_mojo(original_model_filename) key = h2o.lazy_import(original_model_filename) fr = h2o.get_frame(key[0]) generic_mojo_model = H2OGenericEstimator(model_key=fr) generic_mojo_model.train() compare_params(se, generic_mojo_model) predictions = generic_mojo_model.predict(test) assert predictions is not None # Test constructor generating the model from existing MOJO file generic_mojo_model_from_file = H2OGenericEstimator.from_file( original_model_filename) assert generic_mojo_model_from_file is not None predictions = generic_mojo_model_from_file.predict(test) assert predictions is not None generic_mojo_filename = tempfile.mkdtemp("zip", "genericMojo") generic_mojo_filename = generic_mojo_model_from_file.download_mojo( path=generic_mojo_filename) assert os.path.getsize(generic_mojo_filename) == os.path.getsize( original_model_filename)
def test(x, y, output_test, strip_part, algo_name, generic_algo_name): airlines = h2o.import_file( path=pyunit_utils.locate("smalldata/testng/airlines_train.csv")) xgb = H2OXGBoostEstimator(ntrees=1, nfolds=3) xgb.train(x=x, y=y, training_frame=airlines, validation_frame=airlines) print(xgb) with Capturing() as original_output: xgb.show() original_model_filename = tempfile.mkdtemp() original_model_filename = xgb.download_mojo(original_model_filename) key = h2o.lazy_import(original_model_filename) fr = h2o.get_frame(key[0]) model = H2OGenericEstimator(model_key=fr) model.train() print(model) with Capturing() as generic_output: model.show() output_test(str(original_output), str(generic_output), strip_part, algo_name, generic_algo_name) predictions = model.predict(airlines) assert predictions is not None assert predictions.nrows == 24421 assert model._model_json["output"]["variable_importances"] is not None assert len( model._model_json["output"]["variable_importances"]._cell_values) > 0 assert model._model_json["output"]["model_summary"] is not None assert len(model._model_json["output"]["model_summary"]._cell_values) > 0 # Test constructor generating the model from existing MOJO file model = H2OGenericEstimator.from_file(original_model_filename) assert model is not None predictions = model.predict(airlines) assert predictions is not None assert predictions.nrows == 24421 assert model._model_json["output"]["variable_importances"] is not None assert len( model._model_json["output"]["variable_importances"]._cell_values) > 0 assert model._model_json["output"]["model_summary"] is not None assert len(model._model_json["output"]["model_summary"]._cell_values) > 0 generic_mojo_filename = tempfile.mkdtemp("zip", "genericMojo") generic_mojo_filename = model.download_mojo(path=generic_mojo_filename) assert os.path.getsize(generic_mojo_filename) == os.path.getsize( original_model_filename)
def mojo_model_test(): # GBM airlines = h2o.import_file( path=pyunit_utils.locate("smalldata/testng/airlines_train.csv")) gbm = H2OGradientBoostingEstimator(ntrees=1) gbm.train(x=["Origin", "Dest"], y="IsDepDelayed", training_frame=airlines) original_model_filename = tempfile.mkdtemp() original_model_filename = gbm.download_mojo(original_model_filename) key = h2o.lazy_import(original_model_filename) fr = h2o.get_frame(key[0]) model = H2OGenericEstimator(model_key=fr) model.train() predictions = model.predict(airlines) assert predictions is not None assert predictions.nrows == 24421 assert model._model_json["output"]["variable_importances"] is not None assert len( model._model_json["output"]["variable_importances"]._cell_values) > 0 assert model._model_json["output"]["model_summary"] is not None assert len(model._model_json["output"]["model_summary"]._cell_values) > 0 # Test constructor generating the model from existing MOJO file model = H2OGenericEstimator.from_file(original_model_filename) assert model is not None predictions = model.predict(airlines) assert predictions is not None assert predictions.nrows == 24421 assert model._model_json["output"]["variable_importances"] is not None assert len( model._model_json["output"]["variable_importances"]._cell_values) > 0 assert model._model_json["output"]["model_summary"] is not None assert len(model._model_json["output"]["model_summary"]._cell_values) > 0 generic_mojo_filename = tempfile.mkdtemp("zip", "genericMojo") generic_mojo_filename = model.download_mojo(path=generic_mojo_filename) assert os.path.getsize(generic_mojo_filename) == os.path.getsize( original_model_filename)