示例#1
0
def RunH2Oaiglm(arg):
    import h2o4gpu as h2o4gpu
    import time

    trainX, trainY, validX, validY, family, intercept, lambda_min_ratio, n_folds, n_alphas, n_lambdas, n_gpus = arg

    print("Begin Setting up Solver")
    os.system(
        "rm -f error.txt ; touch error.txt ; rm -f varimp.txt ; touch varimp.txt"
    )  ## for visualization
    enet = h2o4gpu.ElasticNetH2O(
        n_gpus=n_gpus,
        fit_intercept=intercept,
        lambda_min_ratio=lambda_min_ratio,
        n_lambdas=n_lambdas,
        n_folds=n_folds,
        n_alphas=n_alphas,
        family=family)
    print("End Setting up Solver")

    # Solve
    print("Begin Solving")
    t0 = time.time()
    enet.fit(trainX, trainY, validX, validY)
    t1 = time.time()
    print("End Solving")

    print("Time to train H2O AI ElasticNetH2O: %r" % (t1 - t0))
示例#2
0
def func():
   
    # data prep
    iris = datasets.load_iris()
    X = iris.data
    y = iris.target

    # removing the third class, making it a binary problem
    X = X[y != 2]
    y = y[y != 2]

    X -= np.mean(X, 0)

    # splitting into train and valid frame
    X_test = X[np.r_[40:50,90:100]]
    y_test = y[np.r_[40:50,90:100]]
    X = X[np.r_[:40,50:90]]
    y = y[np.r_[:40,50:90]]

    classification = True

    logreg = h2o4gpu.LogisticRegression(penalty="l1")
    lr = h2o4gpu.ElasticNetH2O(
        n_threads = None,
        n_alphas = 1,
        n_lambdas = 1,
        n_folds = 1,
        lambda_max = 1.0,
        lambda_min_ratio = 1.0,
        lambda_stop_early = False,
        store_full_path = 0,
        alphas = None,
        lambdas = None,
        family = 'logistic',
        alpha_max = 1.0,
        alpha_min = 1.0)
    
    model = logreg.fit(X, y)
    mm = lr.fit(X, y)

    y_pred = model.predict(X_test)
    print(y_pred)
    y_p = mm.predict(X_test)
    print(y_p)
    print(y_pred, np.round(y_pred))

    # TO-DO: change the assertion once the logic to convert probabilities to classes is implemented
    assert (y_test == np.round(y_pred)).all() == True
    assert (y_pred == y_p).all() == True
示例#3
0
def RunH2Oaiglm_ptr(arg):
    import h2o4gpu as h2o4gpu
    import time

    trainX, trainY, validX, validY, trainW, fortran, mTrain, n, mvalid, intercept, lambda_min_ratio, n_folds, n_alphas, n_lambdas, n_gpus = arg

    print("Begin Setting up Solver")
    os.system(
        "rm -f error.txt ; touch error.txt ; rm -f varimp.txt ; touch varimp.txt"
    )  ## for visualization
    enet = h2o4gpu.ElasticNetH2O(
        n_gpus=n_gpus,
        fit_intercept=intercept,
        lambda_min_ratio=lambda_min_ratio,
        n_lambdas=n_lambdas,
        n_folds=n_folds,
        n_alphas=n_alphas)
    print("End Setting up Solver")

    ## First, get backend pointers
    sourceDev = 0
    t0 = time.time()
    a, b, c, d, e = enet.prepare_and_upload_data(
        trainX, trainY, validX, validY, trainW, source_dev=sourceDev)
    t1 = time.time()
    print("Time to ingest data: %r" % (t1 - t0))

    ## Solve
    if 1 == 1:
        print("Solving")
        t0 = time.time()
        order = 'c' if fortran else 'r'
        double_precision = 0  # Not used
        store_full_path = 0
        enet.fit_ptr(
            mTrain,
            n,
            mvalid,
            double_precision,
            order,
            a,
            b,
            c,
            d,
            e,
            source_dev=sourceDev)
        t1 = time.time()
        print("Done Solving")
        print("Time to train H2O AI ElasticNetH2O: %r" % (t1 - t0))
示例#4
0
def RunH2Oaiglm(arg):
    import h2o4gpu as h2o4gpu
    import time

    trainX, trainY, validX, validY, family, intercept, lambda_min_ratio, n_folds, n_alphas, n_lambdas, n_gpus = arg

    # assume ok with 32-bit float for speed on GPU if using this wrapper
    if trainX is not None:
        trainX.astype(np.float32)
    if trainY is not None:
        trainY.astype(np.float32)
    if validX is not None:
        validX.astype(np.float32)
    if validY is not None:
        validY.astype(np.float32)

    print("Begin Setting up Solver")
    os.system(
        "rm -f error.txt ; touch error.txt ; rm -f varimp.txt ; touch varimp.txt"
    )  ## for visualization
    enet = h2o4gpu.ElasticNetH2O(
        n_gpus=n_gpus,
        fit_intercept=intercept,
        lambda_min_ratio=lambda_min_ratio,
        n_lambdas=n_lambdas,
        n_folds=n_folds,
        n_alphas=n_alphas,
        family=family)
    print("End Setting up Solver")

    # Solve
    print("Begin Solving")
    t0 = time.time()
    enet.fit(trainX, trainY, validX, validY)
    t1 = time.time()
    print("End Solving")

    print("Time to train H2O AI ElasticNetH2O: %r" % (t1 - t0))
示例#5
0
X_test = X[d_train.shape[0]:, :]

y_train = y[0:d_train.shape[0]]
y_test = y[d_train.shape[0]:]

start = time.time()
md = h2o4gpu.RandomForestClassifier(n_estimators=100,
                                    max_depth=10,
                                    backend="h2o4gpu",
                                    tree_method="gpu_hist").fit(
                                        X_train, y_train)
end = time.time()
print(end - start)

y_pred = md.predict_proba(X_test)[:, 1]
metrics.roc_auc_score(y_test, y_pred)

## needs dense, but using too much GPU memory, so must use smaller train size
X_train2 = X_train[0:300000, :].todense()
y_train2 = y_train[0:300000]
X_test2 = X_test.todense()

start = time.time()
md = h2o4gpu.ElasticNetH2O(alphas=[0], lambdas=[0],
                           n_folds=1).fit(X_train2, y_train2)
end = time.time()
print(end - start)

y_pred = md.predict_proba(X_test2)[0, :]
metrics.roc_auc_score(y_test, y_pred)