def test_ridge_daal_vs_sklearn(rows=10, columns=9, verbose=False): inout = get_random_array(rows, columns) x = inout[0] y = inout[1] print("Prediction for X[{}][{}] and y[{}][{}]".format( x.shape[0], x.shape[1], y.shape[0], y.shape[1])) start_sklearn = time.time() ridge_solver_sklearn = h2o4gpu.Ridge(backend='sklearn', normalize=True, alpha=0.0) ridge_solver_sklearn.fit(x, y) sklearn_predicted = ridge_solver_sklearn.predict(x) end_sklearn = time.time() print(("Sklearn prediction: ", sklearn_predicted) if verbose else "", end="") start_daal = time.time() ridge_solver_daal = h2o4gpu.Ridge(backend='daal', normalize=True, alpha=0.0) ridge_solver_daal.fit(x, y) daal_predicted = ridge_solver_daal.predict(x) end_daal = time.time() print(("Daal prediction: ", daal_predicted) if verbose else "", end="") daal_predicted_man = get_daal_prediction(x, y) print(("Manual Daal prediction()", daal_predicted_man) if verbose else "", end="\n") print("Prediction calculated:") print("+ Sklearn: {}".format(end_sklearn - start_sklearn)) print("+ Daal: {}".format(end_daal - start_daal)) assert_array_almost_equal(daal_predicted, sklearn_predicted, decimal=4) assert_array_almost_equal(daal_predicted, y, decimal=4) assert_array_almost_equal(daal_predicted, daal_predicted_man, decimal=4) if os.getenv("CHECKPERFORMANCE") is not None: assert end_daal - start_daal <= end_sklearn - start_sklearn sklearn_score = ridge_solver_sklearn.score(x, y) daal_score = ridge_solver_daal.score(x, y) print("Score calculated: ") print("+ Sklearn: {}".format(sklearn_score)) print("+ Daal: {}".format(daal_score)) assert daal_score == sklearn_score
def fit_model(X_train, y_train, X_test, y_test, reg_type='enet'): if reg_type == 'lasso': tol = 1e-2 alpha = 1.0 n_threads = None n_alphas = 1 n_lambdas = 1 n_folds = 1 lambda_max = alpha lambda_min_ratio = 1.0 lambda_stop_early = False store_full_path = 1 alphas = None lambdas = None alpha_min = 1.0 alpha_max = 1.0 n_gpus = -1 fit_intercept = True max_iter = 5000 glm_stop_early = True glm_stop_early_error_fraction = 1.0 verbose = False reg_h2o = elastic_net.ElasticNetH2O( n_threads=n_threads, n_gpus=n_gpus, fit_intercept=fit_intercept, lambda_min_ratio=lambda_min_ratio, n_lambdas=n_lambdas, n_folds=n_folds, n_alphas=n_alphas, tol=tol, lambda_stop_early=lambda_stop_early, glm_stop_early=glm_stop_early, glm_stop_early_error_fraction=glm_stop_early_error_fraction, max_iter=max_iter, verbose=verbose, store_full_path=store_full_path, lambda_max=lambda_max, alpha_max=alpha_max, alpha_min=alpha_min, alphas=alphas, lambdas=lambdas, order=None) reg_sklearn = linear_model.LassoSklearn() elif reg_type == 'ridge': reg_h2o = h2o4gpu.Ridge() reg_sklearn = linear_model.RidgeSklearn() elif reg_type == 'enet': reg_h2o = h2o4gpu.ElasticNet() # update when the wrapper is done reg_sklearn = linear_model.ElasticNetSklearn() start_h2o = time.time() reg_h2o.fit(X_train, y_train, free_input_data=1) time_h2o = time.time() - start_h2o start_sklearn = time.time() reg_sklearn.fit(X_train, y_train) time_sklearn = time.time() - start_sklearn # Predicting test values y_pred_h2o = reg_h2o.predict(X_test, free_input_data=1) y_pred_h2o = y_pred_h2o.squeeze() y_pred_sklearn = reg_sklearn.predict(X_test) # Calculating R^2 scores r2_h2o = r2_score(y_test, y_pred_h2o) r2_sklearn = r2_score(y_test, y_pred_sklearn) # Clearing the memory reg_h2o.free_sols() reg_h2o.free_preds() reg_h2o.finish() del reg_h2o del reg_sklearn gc.collect() return time_h2o, time_sklearn, r2_h2o, r2_sklearn
def test_ridge(): run_stress(h2o4gpu.Ridge())