Exemplo n.º 1
0
    def test_ridge_daal_vs_sklearn(rows=10, columns=9, verbose=False):
        inout = get_random_array(rows, columns)
        x = inout[0]
        y = inout[1]
        print("Prediction for X[{}][{}] and y[{}][{}]".format(
            x.shape[0], x.shape[1], y.shape[0], y.shape[1]))

        start_sklearn = time.time()
        ridge_solver_sklearn = h2o4gpu.Ridge(backend='sklearn',
                                             normalize=True,
                                             alpha=0.0)

        ridge_solver_sklearn.fit(x, y)
        sklearn_predicted = ridge_solver_sklearn.predict(x)
        end_sklearn = time.time()

        print(("Sklearn prediction: ", sklearn_predicted) if verbose else "",
              end="")

        start_daal = time.time()
        ridge_solver_daal = h2o4gpu.Ridge(backend='daal',
                                          normalize=True,
                                          alpha=0.0)

        ridge_solver_daal.fit(x, y)
        daal_predicted = ridge_solver_daal.predict(x)
        end_daal = time.time()

        print(("Daal prediction: ", daal_predicted) if verbose else "", end="")

        daal_predicted_man = get_daal_prediction(x, y)
        print(("Manual Daal prediction()",
               daal_predicted_man) if verbose else "",
              end="\n")

        print("Prediction calculated:")
        print("+ Sklearn: {}".format(end_sklearn - start_sklearn))
        print("+ Daal:    {}".format(end_daal - start_daal))

        assert_array_almost_equal(daal_predicted, sklearn_predicted, decimal=4)
        assert_array_almost_equal(daal_predicted, y, decimal=4)
        assert_array_almost_equal(daal_predicted,
                                  daal_predicted_man,
                                  decimal=4)

        if os.getenv("CHECKPERFORMANCE") is not None:
            assert end_daal - start_daal <= end_sklearn - start_sklearn

        sklearn_score = ridge_solver_sklearn.score(x, y)
        daal_score = ridge_solver_daal.score(x, y)
        print("Score calculated: ")
        print("+ Sklearn: {}".format(sklearn_score))
        print("+ Daal:    {}".format(daal_score))

        assert daal_score == sklearn_score
Exemplo n.º 2
0
def fit_model(X_train, y_train, X_test, y_test, reg_type='enet'):

    if reg_type == 'lasso':
        tol = 1e-2
        alpha = 1.0
        n_threads = None
        n_alphas = 1
        n_lambdas = 1
        n_folds = 1
        lambda_max = alpha
        lambda_min_ratio = 1.0
        lambda_stop_early = False
        store_full_path = 1
        alphas = None
        lambdas = None
        alpha_min = 1.0
        alpha_max = 1.0
        n_gpus = -1
        fit_intercept = True
        max_iter = 5000
        glm_stop_early = True
        glm_stop_early_error_fraction = 1.0
        verbose = False

        reg_h2o = elastic_net.ElasticNetH2O(
            n_threads=n_threads,
            n_gpus=n_gpus,
            fit_intercept=fit_intercept,
            lambda_min_ratio=lambda_min_ratio,
            n_lambdas=n_lambdas,
            n_folds=n_folds,
            n_alphas=n_alphas,
            tol=tol,
            lambda_stop_early=lambda_stop_early,
            glm_stop_early=glm_stop_early,
            glm_stop_early_error_fraction=glm_stop_early_error_fraction,
            max_iter=max_iter,
            verbose=verbose,
            store_full_path=store_full_path,
            lambda_max=lambda_max,
            alpha_max=alpha_max,
            alpha_min=alpha_min,
            alphas=alphas,
            lambdas=lambdas,
            order=None)

        reg_sklearn = linear_model.LassoSklearn()
    elif reg_type == 'ridge':
        reg_h2o = h2o4gpu.Ridge()
        reg_sklearn = linear_model.RidgeSklearn()
    elif reg_type == 'enet':
        reg_h2o = h2o4gpu.ElasticNet()  # update when the wrapper is done
        reg_sklearn = linear_model.ElasticNetSklearn()

    start_h2o = time.time()
    reg_h2o.fit(X_train, y_train, free_input_data=1)
    time_h2o = time.time() - start_h2o

    start_sklearn = time.time()
    reg_sklearn.fit(X_train, y_train)
    time_sklearn = time.time() - start_sklearn

    # Predicting test values
    y_pred_h2o = reg_h2o.predict(X_test, free_input_data=1)
    y_pred_h2o = y_pred_h2o.squeeze()

    y_pred_sklearn = reg_sklearn.predict(X_test)

    # Calculating R^2 scores
    r2_h2o = r2_score(y_test, y_pred_h2o)
    r2_sklearn = r2_score(y_test, y_pred_sklearn)

    # Clearing the memory
    reg_h2o.free_sols()
    reg_h2o.free_preds()
    reg_h2o.finish()
    del reg_h2o
    del reg_sklearn
    gc.collect()

    return time_h2o, time_sklearn, r2_h2o, r2_sklearn
Exemplo n.º 3
0
def test_ridge():
    run_stress(h2o4gpu.Ridge())