def RunH2Oaiglm(arg): import h2o4gpu as h2o4gpu import time trainX, trainY, validX, validY, family, intercept, lambda_min_ratio, n_folds, n_alphas, n_lambdas, n_gpus = arg print("Begin Setting up Solver") os.system( "rm -f error.txt ; touch error.txt ; rm -f varimp.txt ; touch varimp.txt" ) ## for visualization enet = h2o4gpu.ElasticNetH2O( n_gpus=n_gpus, fit_intercept=intercept, lambda_min_ratio=lambda_min_ratio, n_lambdas=n_lambdas, n_folds=n_folds, n_alphas=n_alphas, family=family) print("End Setting up Solver") # Solve print("Begin Solving") t0 = time.time() enet.fit(trainX, trainY, validX, validY) t1 = time.time() print("End Solving") print("Time to train H2O AI ElasticNetH2O: %r" % (t1 - t0))
def func(): # data prep iris = datasets.load_iris() X = iris.data y = iris.target # removing the third class, making it a binary problem X = X[y != 2] y = y[y != 2] X -= np.mean(X, 0) # splitting into train and valid frame X_test = X[np.r_[40:50,90:100]] y_test = y[np.r_[40:50,90:100]] X = X[np.r_[:40,50:90]] y = y[np.r_[:40,50:90]] classification = True logreg = h2o4gpu.LogisticRegression(penalty="l1") lr = h2o4gpu.ElasticNetH2O( n_threads = None, n_alphas = 1, n_lambdas = 1, n_folds = 1, lambda_max = 1.0, lambda_min_ratio = 1.0, lambda_stop_early = False, store_full_path = 0, alphas = None, lambdas = None, family = 'logistic', alpha_max = 1.0, alpha_min = 1.0) model = logreg.fit(X, y) mm = lr.fit(X, y) y_pred = model.predict(X_test) print(y_pred) y_p = mm.predict(X_test) print(y_p) print(y_pred, np.round(y_pred)) # TO-DO: change the assertion once the logic to convert probabilities to classes is implemented assert (y_test == np.round(y_pred)).all() == True assert (y_pred == y_p).all() == True
def RunH2Oaiglm_ptr(arg): import h2o4gpu as h2o4gpu import time trainX, trainY, validX, validY, trainW, fortran, mTrain, n, mvalid, intercept, lambda_min_ratio, n_folds, n_alphas, n_lambdas, n_gpus = arg print("Begin Setting up Solver") os.system( "rm -f error.txt ; touch error.txt ; rm -f varimp.txt ; touch varimp.txt" ) ## for visualization enet = h2o4gpu.ElasticNetH2O( n_gpus=n_gpus, fit_intercept=intercept, lambda_min_ratio=lambda_min_ratio, n_lambdas=n_lambdas, n_folds=n_folds, n_alphas=n_alphas) print("End Setting up Solver") ## First, get backend pointers sourceDev = 0 t0 = time.time() a, b, c, d, e = enet.prepare_and_upload_data( trainX, trainY, validX, validY, trainW, source_dev=sourceDev) t1 = time.time() print("Time to ingest data: %r" % (t1 - t0)) ## Solve if 1 == 1: print("Solving") t0 = time.time() order = 'c' if fortran else 'r' double_precision = 0 # Not used store_full_path = 0 enet.fit_ptr( mTrain, n, mvalid, double_precision, order, a, b, c, d, e, source_dev=sourceDev) t1 = time.time() print("Done Solving") print("Time to train H2O AI ElasticNetH2O: %r" % (t1 - t0))
def RunH2Oaiglm(arg): import h2o4gpu as h2o4gpu import time trainX, trainY, validX, validY, family, intercept, lambda_min_ratio, n_folds, n_alphas, n_lambdas, n_gpus = arg # assume ok with 32-bit float for speed on GPU if using this wrapper if trainX is not None: trainX.astype(np.float32) if trainY is not None: trainY.astype(np.float32) if validX is not None: validX.astype(np.float32) if validY is not None: validY.astype(np.float32) print("Begin Setting up Solver") os.system( "rm -f error.txt ; touch error.txt ; rm -f varimp.txt ; touch varimp.txt" ) ## for visualization enet = h2o4gpu.ElasticNetH2O( n_gpus=n_gpus, fit_intercept=intercept, lambda_min_ratio=lambda_min_ratio, n_lambdas=n_lambdas, n_folds=n_folds, n_alphas=n_alphas, family=family) print("End Setting up Solver") # Solve print("Begin Solving") t0 = time.time() enet.fit(trainX, trainY, validX, validY) t1 = time.time() print("End Solving") print("Time to train H2O AI ElasticNetH2O: %r" % (t1 - t0))
X_test = X[d_train.shape[0]:, :] y_train = y[0:d_train.shape[0]] y_test = y[d_train.shape[0]:] start = time.time() md = h2o4gpu.RandomForestClassifier(n_estimators=100, max_depth=10, backend="h2o4gpu", tree_method="gpu_hist").fit( X_train, y_train) end = time.time() print(end - start) y_pred = md.predict_proba(X_test)[:, 1] metrics.roc_auc_score(y_test, y_pred) ## needs dense, but using too much GPU memory, so must use smaller train size X_train2 = X_train[0:300000, :].todense() y_train2 = y_train[0:300000] X_test2 = X_test.todense() start = time.time() md = h2o4gpu.ElasticNetH2O(alphas=[0], lambdas=[0], n_folds=1).fit(X_train2, y_train2) end = time.time() print(end - start) y_pred = md.predict_proba(X_test2)[0, :] metrics.roc_auc_score(y_test, y_pred)