def test(x, y, output_test, strip_part, algo_name, generic_algo_name):
    airlines = h2o.import_file(
        path=pyunit_utils.locate("smalldata/testng/airlines_train.csv"))
    gbm = H2ODeepLearningEstimator(epochs=1)
    gbm.train(x=x, y=y, training_frame=airlines, validation_frame=airlines)
    print(gbm)
    with Capturing() as original_output:
        gbm.show()

    original_model_filename = tempfile.mkdtemp()
    original_model_filename = gbm.download_mojo(original_model_filename)

    key = h2o.lazy_import(original_model_filename)
    fr = h2o.get_frame(key[0])
    generic_mojo_model = H2OGenericEstimator(model_key=fr)
    generic_mojo_model.train()
    compare_params(gbm, generic_mojo_model)
    print(generic_mojo_model)
    with Capturing() as generic_output:
        generic_mojo_model.show()

    output_test(str(original_output), str(generic_output), strip_part,
                algo_name, generic_algo_name)

    predictions = generic_mojo_model.predict(airlines)
    assert predictions is not None
    assert predictions.nrows == 24421
    assert generic_mojo_model._model_json["output"][
        "model_summary"] is not None
    assert len(generic_mojo_model._model_json["output"]
               ["model_summary"]._cell_values) > 0

    # Test constructor generating the model from existing MOJO file
    generic_mojo_model_from_file = H2OGenericEstimator.from_file(
        original_model_filename)
    assert generic_mojo_model_from_file is not None
    predictions = generic_mojo_model_from_file.predict(airlines)
    assert predictions is not None
    assert predictions.nrows == 24421
    assert generic_mojo_model_from_file._model_json["output"][
        "model_summary"] is not None
    assert len(generic_mojo_model_from_file._model_json["output"]
               ["model_summary"]._cell_values) > 0

    generic_mojo_filename = tempfile.mkdtemp("zip", "genericMojo")
    generic_mojo_filename = generic_mojo_model_from_file.download_mojo(
        path=generic_mojo_filename)
    assert os.path.getsize(generic_mojo_filename) == os.path.getsize(
        original_model_filename)
Exemplo n.º 2
0
def stackedensemble_mojo_model_test():
    train = h2o.import_file(
        pyunit_utils.locate("smalldata/iris/iris_train.csv"))
    test = h2o.import_file(pyunit_utils.locate("smalldata/iris/iris_test.csv"))
    x = train.columns
    y = "species"

    nfolds = 2
    gbm = H2OGradientBoostingEstimator(nfolds=nfolds,
                                       fold_assignment="Modulo",
                                       keep_cross_validation_predictions=True)
    gbm.train(x=x, y=y, training_frame=train)
    rf = H2ORandomForestEstimator(nfolds=nfolds,
                                  fold_assignment="Modulo",
                                  keep_cross_validation_predictions=True)
    rf.train(x=x, y=y, training_frame=train)
    se = H2OStackedEnsembleEstimator(training_frame=train,
                                     validation_frame=test,
                                     base_models=[gbm.model_id, rf.model_id])
    se.train(x=x, y=y, training_frame=train)
    print(se)
    with Capturing() as original_output:
        se.show()

    original_model_filename = tempfile.mkdtemp()
    original_model_filename = se.download_mojo(original_model_filename)

    key = h2o.lazy_import(original_model_filename)
    fr = h2o.get_frame(key[0])
    generic_mojo_model = H2OGenericEstimator(model_key=fr)
    generic_mojo_model.train()
    compare_params(se, generic_mojo_model)

    predictions = generic_mojo_model.predict(test)
    assert predictions is not None

    # Test constructor generating the model from existing MOJO file
    generic_mojo_model_from_file = H2OGenericEstimator.from_file(
        original_model_filename)
    assert generic_mojo_model_from_file is not None
    predictions = generic_mojo_model_from_file.predict(test)
    assert predictions is not None

    generic_mojo_filename = tempfile.mkdtemp("zip", "genericMojo")
    generic_mojo_filename = generic_mojo_model_from_file.download_mojo(
        path=generic_mojo_filename)
    assert os.path.getsize(generic_mojo_filename) == os.path.getsize(
        original_model_filename)
def test(x, y, output_test, strip_part, algo_name, generic_algo_name):
    airlines = h2o.import_file(
        path=pyunit_utils.locate("smalldata/testng/airlines_train.csv"))
    xgb = H2OXGBoostEstimator(ntrees=1, nfolds=3)
    xgb.train(x=x, y=y, training_frame=airlines, validation_frame=airlines)
    print(xgb)
    with Capturing() as original_output:
        xgb.show()

    original_model_filename = tempfile.mkdtemp()
    original_model_filename = xgb.download_mojo(original_model_filename)

    key = h2o.lazy_import(original_model_filename)
    fr = h2o.get_frame(key[0])
    model = H2OGenericEstimator(model_key=fr)
    model.train()
    print(model)
    with Capturing() as generic_output:
        model.show()

    output_test(str(original_output), str(generic_output), strip_part,
                algo_name, generic_algo_name)

    predictions = model.predict(airlines)
    assert predictions is not None
    assert predictions.nrows == 24421
    assert model._model_json["output"]["variable_importances"] is not None
    assert len(
        model._model_json["output"]["variable_importances"]._cell_values) > 0
    assert model._model_json["output"]["model_summary"] is not None
    assert len(model._model_json["output"]["model_summary"]._cell_values) > 0

    # Test constructor generating the model from existing MOJO file
    model = H2OGenericEstimator.from_file(original_model_filename)
    assert model is not None
    predictions = model.predict(airlines)
    assert predictions is not None
    assert predictions.nrows == 24421
    assert model._model_json["output"]["variable_importances"] is not None
    assert len(
        model._model_json["output"]["variable_importances"]._cell_values) > 0
    assert model._model_json["output"]["model_summary"] is not None
    assert len(model._model_json["output"]["model_summary"]._cell_values) > 0

    generic_mojo_filename = tempfile.mkdtemp("zip", "genericMojo")
    generic_mojo_filename = model.download_mojo(path=generic_mojo_filename)
    assert os.path.getsize(generic_mojo_filename) == os.path.getsize(
        original_model_filename)
Exemplo n.º 4
0
def mojo_model_ifr_test():

    airlines = h2o.import_file(path=pyunit_utils.locate("smalldata/testng/airlines_train.csv"))
    ifr = H2OIsolationForestEstimator(ntrees=1)
    ifr.train(x = ["Origin", "Dest"], y = "Distance", training_frame=airlines)
    print(ifr)
    with Capturing() as original_output:
        ifr.show()

    original_model_filename = tempfile.mkdtemp()
    original_model_filename = ifr.download_mojo(original_model_filename)
      
    model = H2OGenericEstimator.from_file(original_model_filename)
    assert model is not None
    print(model)
    compare_params(ifr, model)
    with Capturing() as generic_output:
        model.show()

    strip_part = "'Model Summary: '"
    algo_name = 'ModelMetricsAnomaly: isolationforest'
    generic_algo_name = 'ModelMetricsAnomaly: generic'

    compare_output(str(original_output), str(generic_output), strip_part, algo_name, generic_algo_name)
    predictions = model.predict(airlines)
    assert predictions is not None
    assert predictions.nrows == 24421
    assert model._model_json["output"]["variable_importances"] is None
    assert model._model_json["output"]["model_summary"] is not None
    assert len(model._model_json["output"]["model_summary"]._cell_values) > 0

    generic_mojo_filename = tempfile.mkdtemp("zip", "genericMojo");
    generic_mojo_filename = model.download_mojo(path=generic_mojo_filename)
    assert os.path.getsize(generic_mojo_filename) == os.path.getsize(original_model_filename)
    def optimum_threshold(self, hf: h2o.H2OFrame, model: H2OGenericEstimator) -> float:
        """ Selects the best threshold for this model given the cost values of this instance

        Args:
            hf (DataFrame): Data used for evaluation. Must contain ground truth column named fraud
            model (H2OModel): A model object to be evaluated
        Returns: optimum_threshold (float): Indicates that if a model p1 value is less than this number
                                            the prediction is 0 (not fraud). If the model p1 value is greater than
                                            this number the prediction is 1 (fraud)
        """
        # Extract the probability of the positive class from the predictions
        df = hf.as_data_frame()
        df['model_score'] = model.predict(test_data=hf).as_data_frame()['p1']

        matrix = {str(model.model_id): {'x': [], 'y': []}}
        # Calculate cost function for ever 1/100 ranging from 0 to 1
        for t in range(1, 100):
            t = t / 100
            df['prediction'] = predict(df, t, 1, 'model_score')
            df = reconcile(df, 'prediction', 'fraud', f"CM_{t}")
            t_cost, df = outcome(df, self.inverse_costs, f"CM_{t}", f"costs_{t}")
            matrix[str(model.model_id)]['x'].append(t)
            matrix[str(model.model_id)]['y'].append(t_cost)

        # Return threshold that produced the minimum cost
        idx_min_cost = matrix[str(model.model_id)]['y'].index(min(matrix[str(model.model_id)]['y']))
        optimum_threshold = matrix[str(model.model_id)]['x'][idx_min_cost]
        print(f"optimum_threshold: {optimum_threshold}")
        return optimum_threshold
Exemplo n.º 6
0
def mojo_model_glm_test():

    # GLM
    airlines = h2o.import_file(
        path=pyunit_utils.locate("smalldata/testng/airlines_train.csv"))
    glm = H2OGeneralizedLinearEstimator(nfolds=3)
    glm.train(x=["Origin", "Dest"],
              y="Distance",
              training_frame=airlines,
              validation_frame=airlines)

    original_model_filename = tempfile.mkdtemp()
    original_model_filename = glm.download_mojo(original_model_filename)

    model = H2OGenericEstimator.from_file(original_model_filename)
    assert model is not None
    print(model)
    predictions = model.predict(airlines)
    assert predictions is not None
    assert predictions.nrows == 24421
    assert model._model_json["output"]["model_summary"] is not None
    assert len(model._model_json["output"]["model_summary"]._cell_values) > 0

    generic_mojo_filename = tempfile.mkdtemp("zip", "genericMojo")
    generic_mojo_filename = model.download_mojo(path=generic_mojo_filename)
    assert os.path.getsize(generic_mojo_filename) == os.path.getsize(
        original_model_filename)
def test(x, y, output_test, strip_part, algo_name, generic_algo_name):

    airlines = h2o.import_file(
        path=pyunit_utils.locate("smalldata/testng/airlines_train.csv"))
    drf = H2ORandomForestEstimator(ntrees=1, nfolds=3)
    drf.train(x=x, y=y, training_frame=airlines, validation_frame=airlines)
    print(drf)
    with Capturing() as original_output:
        drf.show()

    original_model_filename = tempfile.mkdtemp()
    original_model_filename = drf.download_mojo(original_model_filename)

    model = H2OGenericEstimator.from_file(original_model_filename)
    assert model is not None
    print(model)
    with Capturing() as generic_output:
        model.show()

    output_test(str(original_output), str(generic_output), strip_part,
                algo_name, generic_algo_name)
    predictions = model.predict(airlines)
    assert predictions is not None
    assert predictions.nrows == 24421
    assert model._model_json["output"]["variable_importances"] is not None
    assert len(
        model._model_json["output"]["variable_importances"]._cell_values) > 0
    assert model._model_json["output"]["model_summary"] is not None
    assert len(model._model_json["output"]["model_summary"]._cell_values) > 0

    generic_mojo_filename = tempfile.mkdtemp("zip", "genericMojo")
    generic_mojo_filename = model.download_mojo(path=generic_mojo_filename)
    assert os.path.getsize(generic_mojo_filename) == os.path.getsize(
        original_model_filename)
Exemplo n.º 8
0
def test(x, y, output_test, strip_part, algo_name, generic_algo_name, family):

    # GLM
    airlines = h2o.import_file(path=pyunit_utils.locate("smalldata/testng/airlines_train.csv"))
    glm = H2OGeneralizedLinearEstimator(nfolds = 3, family = family, alpha = 1, lambda_ = 1)
    glm.train(x = x, y = y, training_frame=airlines, validation_frame=airlines, )
    print(glm)
    with Capturing() as original_output:
        glm.show()
    original_model_filename = tempfile.mkdtemp()
    original_model_filename = glm.download_mojo(original_model_filename)

    generic_mojo_model_from_file = H2OGenericEstimator.from_file(original_model_filename)
    assert generic_mojo_model_from_file is not None
    print(generic_mojo_model_from_file)
    compare_params(glm, generic_mojo_model_from_file)
    with Capturing() as generic_output:
        generic_mojo_model_from_file.show()

    output_test(str(original_output), str(generic_output), strip_part, algo_name, generic_algo_name)
    predictions = generic_mojo_model_from_file.predict(airlines)
    assert predictions is not None
    assert predictions.nrows == 24421
    assert generic_mojo_model_from_file._model_json["output"]["model_summary"] is not None
    assert len(generic_mojo_model_from_file._model_json["output"]["model_summary"]._cell_values) > 0

    generic_mojo_filename = tempfile.mkdtemp("zip", "genericMojo");
    generic_mojo_filename = generic_mojo_model_from_file.download_mojo(path=generic_mojo_filename)
    assert os.path.getsize(generic_mojo_filename) == os.path.getsize(original_model_filename)
Exemplo n.º 9
0
def mojo_model_irf_test():

    airlines = h2o.import_file(
        path=pyunit_utils.locate("smalldata/testng/airlines_train.csv"))
    irf = H2OIsolationForestEstimator(ntrees=1)
    irf.train(x=["Origin", "Dest"], y="Distance", training_frame=airlines)
    print(irf)
    with Capturing() as original_output:
        irf.show()

    original_model_filename = tempfile.mkdtemp()
    original_model_filename = irf.download_mojo(original_model_filename)

    model = H2OGenericEstimator.from_file(original_model_filename)
    assert model is not None
    print(model)
    with Capturing() as mojo_output:
        irf.show()

    compare_output(str(original_output), str(mojo_output))
    predictions = model.predict(airlines)
    assert predictions is not None
    assert predictions.nrows == 24421
    assert model._model_json["output"]["variable_importances"] is None
    assert model._model_json["output"]["model_summary"] is not None
    assert len(model._model_json["output"]["model_summary"]._cell_values) > 0

    generic_mojo_filename = tempfile.mkdtemp("zip", "genericMojo")
    generic_mojo_filename = model.download_mojo(path=generic_mojo_filename)
    assert os.path.getsize(generic_mojo_filename) == os.path.getsize(
        original_model_filename)
Exemplo n.º 10
0
def generic_blank_constructor():

    # Train a model
    airlines = h2o.import_file(
        path=pyunit_utils.locate("smalldata/testng/airlines_train.csv"))
    model = H2OGradientBoostingEstimator(ntrees=1)
    model.train(x=["Origin", "Dest"],
                y="IsDepDelayed",
                training_frame=airlines)

    #Save the previously created model into a temporary file
    original_model_filename = tempfile.mkdtemp()
    original_model_filename = model.download_mojo(original_model_filename)

    # Load the model from the temporary using an empty constructor
    mojo_model = H2OGenericEstimator()
    mojo_model.path = original_model_filename
    mojo_model.train()
    assert isinstance(mojo_model, H2OGenericEstimator)

    assert mojo_model._model_json["output"][
        "original_model_identifier"] == "gbm"
    assert mojo_model._model_json["output"][
        "original_model_full_name"] == "Gradient Boosting Machine"

    # Test scoring is available on the model
    predictions = mojo_model.predict(airlines)
    assert predictions is not None
    assert predictions.nrows == 24421
Exemplo n.º 11
0
def test(x, ties, stratify_by, use_all_factor_levels):

    heart = h2o.import_file(
        path=pyunit_utils.locate("smalldata/coxph_test/heart.csv"))
    heart_test = h2o.import_file(
        path=pyunit_utils.locate("smalldata/coxph_test/heart_test.csv"))
    for colname in stratify_by:
        heart[colname] = heart[colname].asfactor()
        heart_test[colname] = heart_test[colname].asfactor()

    coxph = H2OCoxProportionalHazardsEstimator(
        start_column="start",
        stop_column="stop",
        stratify_by=stratify_by,
        use_all_factor_levels=use_all_factor_levels,
        ties=ties)
    coxph.train(x=x, y="event", training_frame=heart)
    coxph.show()

    mojo_path = pyunit_utils.locate("results")
    mojo_path = coxph.download_mojo(mojo_path)

    from h2o.estimators import H2OGenericEstimator
    model = H2OGenericEstimator.from_file(mojo_path)
    assert model is not None
    # test printing the model won't cause issues but don't compare - they won't match
    model.show()
    compare_params(coxph, model)

    assert isinstance(model.model_performance(),
                      H2OModelMetricsRegressionCoxPH)
    assert coxph.model_performance().concordance() == model.model_performance(
    ).concordance()
    assert coxph.model_performance().concordant() == model.model_performance(
    ).concordant()
    assert coxph.model_performance().tied_y() == model.model_performance(
    ).tied_y()

    # also check we can get metrics on new data
    assert isinstance(model.model_performance(test_data=heart_test),
                      H2OModelMetricsRegressionCoxPH)

    predictions = model.predict(heart_test)
    predictions_orig = coxph.predict(heart_test)
    assert predictions is not None
    assert predictions.nrows == heart_test.nrows
    assert predictions_orig.nrows == heart_test.nrows

    pyunit_utils.compare_string_frames_local(predictions, predictions_orig,
                                             0.001)

    generic_mojo_filename = tempfile.mkdtemp("zip", "genericMojo")
    generic_mojo_filename = model.download_mojo(path=generic_mojo_filename)
    assert os.path.getsize(generic_mojo_filename) == os.path.getsize(mojo_path)
def mojo_model_test():

    # GBM
    airlines = h2o.import_file(
        path=pyunit_utils.locate("smalldata/testng/airlines_train.csv"))
    gbm = H2OGradientBoostingEstimator(ntrees=1)
    gbm.train(x=["Origin", "Dest"], y="IsDepDelayed", training_frame=airlines)

    original_model_filename = tempfile.mkdtemp()
    original_model_filename = gbm.download_mojo(original_model_filename)

    key = h2o.lazy_import(original_model_filename)
    fr = h2o.get_frame(key[0])
    model = H2OGenericEstimator(model_key=fr)
    model.train()
    predictions = model.predict(airlines)
    assert predictions is not None
    assert predictions.nrows == 24421
    assert model._model_json["output"]["variable_importances"] is not None
    assert len(
        model._model_json["output"]["variable_importances"]._cell_values) > 0
    assert model._model_json["output"]["model_summary"] is not None
    assert len(model._model_json["output"]["model_summary"]._cell_values) > 0

    # Test constructor generating the model from existing MOJO file
    model = H2OGenericEstimator.from_file(original_model_filename)
    assert model is not None
    predictions = model.predict(airlines)
    assert predictions is not None
    assert predictions.nrows == 24421
    assert model._model_json["output"]["variable_importances"] is not None
    assert len(
        model._model_json["output"]["variable_importances"]._cell_values) > 0
    assert model._model_json["output"]["model_summary"] is not None
    assert len(model._model_json["output"]["model_summary"]._cell_values) > 0

    generic_mojo_filename = tempfile.mkdtemp("zip", "genericMojo")
    generic_mojo_filename = model.download_mojo(path=generic_mojo_filename)
    assert os.path.getsize(generic_mojo_filename) == os.path.getsize(
        original_model_filename)
Exemplo n.º 13
0
def test_mojo_ids():

    # Train a model
    airlines = h2o.import_file(
        path=pyunit_utils.locate("smalldata/testng/airlines_train.csv"))
    model = H2OGradientBoostingEstimator(ntrees=1)
    model.train(x=["Origin", "Dest"],
                y="IsDepDelayed",
                training_frame=airlines,
                verbose=False)

    # Save the previously created model into a temporary file
    original_model_filename = tempfile.mkdtemp()
    original_model_filename = model.save_mojo(original_model_filename)

    original_model_id = model.model_id
    print(original_model_id)

    # Import MOJO from the temporary file
    mojo_model = h2o.import_mojo(original_model_filename,
                                 model_id=original_model_id)
    print(mojo_model.model_id)
    assert_equals(mojo_model.model_id, original_model_id,
                  "Ids should be the same.")

    # Download the MOJO
    original_model_filename = model.download_mojo(original_model_filename)

    # Upload MOJO from the temporary file
    mojo_model_up = h2o.upload_mojo(original_model_filename,
                                    model_id=original_model_id)
    print(mojo_model_up.model_id)
    assert_equals(mojo_model_up.model_id, original_model_id,
                  "Ids should be the same.")

    # Load MOJO model from file
    mojo_model_from_file = H2OGenericEstimator.from_file(
        original_model_filename, original_model_id)
    print(mojo_model_from_file.model_id)
    assert_equals(mojo_model_from_file.model_id, original_model_id,
                  "Ids should be the same.")

    # Test initialize model_id from path
    mojo_model_up_wid = h2o.upload_mojo(original_model_filename)
    print(mojo_model_up_wid.model_id)
    assert_equals(mojo_model_up_wid.model_id, original_model_id,
                  "Ids should not be the same.")

    mojo_model_im_wid = h2o.import_mojo(original_model_filename)
    print(mojo_model_im_wid.model_id)
    assert_equals(mojo_model_im_wid.model_id, original_model_id,
                  "Ids should not be the same.")
def test(output_test, x, ties, stratify_by, use_all_factor_levels):

    heart = h2o.import_file(
        path=pyunit_utils.locate("smalldata/coxph_test/heart.csv"))
    heart_test = h2o.import_file(
        path=pyunit_utils.locate("smalldata/coxph_test/heart_test.csv"))
    for colname in stratify_by:
        heart[colname] = heart[colname].asfactor()
        heart_test[colname] = heart_test[colname].asfactor()

    coxph = H2OCoxProportionalHazardsEstimator(
        start_column="start",
        stop_column="stop",
        stratify_by=stratify_by,
        use_all_factor_levels=use_all_factor_levels,
        ties=ties)
    coxph.train(x=x, y="event", training_frame=heart)
    with Capturing() as original_output:
        coxph.show()

    coxph.show()

    original_model_filename = tempfile.mkdtemp()
    original_model_filename = coxph.download_mojo(original_model_filename)

    model = H2OGenericEstimator.from_file(original_model_filename)
    assert model is not None
    compare_params(coxph, model)

    predictions = model.predict(heart_test)
    predictions_orig = coxph.predict(heart_test)
    assert predictions is not None
    assert predictions.nrows == heart_test.nrows
    assert predictions_orig.nrows == heart_test.nrows

    pyunit_utils.compare_string_frames_local(predictions, predictions_orig,
                                             0.001)

    generic_mojo_filename = tempfile.mkdtemp("zip", "genericMojo")
    generic_mojo_filename = model.download_mojo(path=generic_mojo_filename)
    assert os.path.getsize(generic_mojo_filename) == os.path.getsize(
        original_model_filename)
def mojo_model_glm_test():

    # GLM
    airlines = h2o.import_file(path=pyunit_utils.locate("smalldata/testng/airlines_train.csv"))
    glm = H2OGeneralizedLinearEstimator()
    glm.train(x = ["Origin", "Dest"], y = "Distance", training_frame=airlines)

    original_model_filename = tempfile.mkdtemp()
    original_model_filename = glm.download_mojo(original_model_filename)
      
    model = H2OGenericEstimator.from_file(original_model_filename)
    assert model is not None
    predictions = model.predict(airlines)
    assert predictions is not None
    assert predictions.nrows == 24421
    assert model._model_json["output"]["model_summary"] is not None
    assert len(model._model_json["output"]["model_summary"]._cell_values) > 0

    generic_mojo_filename = tempfile.mkdtemp("zip", "genericMojo");
    generic_mojo_filename = model.download_mojo(path=generic_mojo_filename)
    assert os.path.getsize(generic_mojo_filename) == os.path.getsize(original_model_filename)
def mojo_model_eif_test():

    train = h2o.import_file(path=pyunit_utils.locate("smalldata/anomaly/single_blob.csv"))
    eif = H2OExtendedIsolationForestEstimator(ntrees=1, extension_level=train.ncol - 1, seed=1234)
    eif.train(training_frame=train)
    prediction_orig = eif.predict(train)
    print(eif)
    print(prediction_orig)
    with Capturing() as original_output:
        eif.show()

    original_model_filename = tempfile.mkdtemp()
    original_model_filename = eif.download_mojo(original_model_filename)

    model = H2OGenericEstimator.from_file(original_model_filename)
    assert model is not None
    print(model)
    compare_params(eif, model)
    with Capturing() as generic_output:
        model.show()

    strip_part = "'Model Summary: '"
    algo_name = 'ModelMetricsAnomaly: extendedisolationforest'
    generic_algo_name = 'ModelMetricsAnomaly: generic'

    compare_output(str(original_output), str(generic_output), strip_part, algo_name, generic_algo_name)
    predictions = model.predict(train)
    print(predictions)
    assert predictions is not None
    assert predictions.nrows == 500
    assert model._model_json["output"]["variable_importances"] is None
    assert model._model_json["output"]["model_summary"] is not None
    assert len(model._model_json["output"]["model_summary"]._cell_values) > 0
    assert compare_frames(prediction_orig, predictions, numElements=-1, strict=True)

    generic_mojo_filename = tempfile.mkdtemp("zip", "genericMojo");
    generic_mojo_filename = model.download_mojo(path=generic_mojo_filename)
    assert os.path.getsize(generic_mojo_filename) == os.path.getsize(original_model_filename)
Exemplo n.º 17
0
def mojo_model_drf_test():

    # GLM
    airlines = h2o.import_file(path=pyunit_utils.locate("smalldata/testng/airlines_train.csv"))
    drf = H2ORandomForestEstimator(ntrees=1)
    drf.train(x = ["Origin", "Dest"], y = "Distance", training_frame=airlines)

    original_model_filename = tempfile.mkdtemp()
    original_model_filename = drf.download_mojo(original_model_filename)
      
    model = H2OGenericEstimator.from_file(original_model_filename)
    assert model is not None
    predictions = model.predict(airlines)
    assert predictions is not None
    assert predictions.nrows == 24421
    assert model._model_json["output"]["variable_importances"] is not None
    assert len(model._model_json["output"]["variable_importances"]._cell_values) > 0
    assert model._model_json["output"]["model_summary"] is not None
    assert len(model._model_json["output"]["model_summary"]._cell_values) > 0

    generic_mojo_filename = tempfile.mkdtemp("zip", "genericMojo");
    generic_mojo_filename = model.download_mojo(path=generic_mojo_filename)
    assert os.path.getsize(generic_mojo_filename) == os.path.getsize(original_model_filename)
def mojo_model_test():

    # GBM
    airlines = h2o.import_file(path=pyunit_utils.locate("smalldata/testng/airlines_train.csv"))
    gbm = H2OGradientBoostingEstimator(ntrees = 1)
    gbm.train(x = ["Origin", "Dest"], y = "IsDepDelayed", training_frame=airlines)

    original_model_filename = tempfile.mkdtemp()
    original_model_filename = gbm.download_mojo(original_model_filename)
    
    key = h2o.lazy_import(original_model_filename)
    fr = h2o.get_frame(key[0])
    model = H2OGenericEstimator(model_key = fr)
    model.train()
    predictions = model.predict(airlines)
    assert predictions is not None
    assert predictions.nrows == 24421
    assert model._model_json["output"]["variable_importances"] is not None
    assert len(model._model_json["output"]["variable_importances"]._cell_values) > 0
    assert model._model_json["output"]["model_summary"] is not None
    assert len(model._model_json["output"]["model_summary"]._cell_values) > 0
    
    # Test constructor generating the model from existing MOJO file
    model = H2OGenericEstimator.from_file(original_model_filename)
    assert model is not None
    predictions = model.predict(airlines)
    assert predictions is not None
    assert predictions.nrows == 24421
    assert model._model_json["output"]["variable_importances"] is not None
    assert len(model._model_json["output"]["variable_importances"]._cell_values) > 0
    assert model._model_json["output"]["model_summary"] is not None
    assert len(model._model_json["output"]["model_summary"]._cell_values) > 0 
    
    generic_mojo_filename = tempfile.mkdtemp("zip", "genericMojo");
    generic_mojo_filename = model.download_mojo(path=generic_mojo_filename)
    assert os.path.getsize(generic_mojo_filename) == os.path.getsize(original_model_filename)