Пример #1
0
def test_lasso_default(datatype, nrows, column_info):

    ncols, n_info = column_info
    X, y = make_regression(n_samples=nrows,
                           n_features=ncols,
                           n_informative=n_info,
                           random_state=0)
    X = X.astype(datatype)
    y = y.astype(datatype)
    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        train_size=0.8,
                                                        random_state=0)

    cu_lasso = cuLasso()

    cu_lasso.fit(X_train, y_train)
    assert cu_lasso.coef_ is not None
    cu_predict = cu_lasso.predict(X_test)
    cu_r2 = r2_score(y_test, cu_predict)

    sk_lasso = Lasso()
    sk_lasso.fit(X_train, y_train)
    sk_predict = sk_lasso.predict(X_test)
    sk_r2 = r2_score(y_test, sk_predict)
    assert cu_r2 >= sk_r2 - 0.07
Пример #2
0
def test_lasso(datatype, X_type, alpha, algorithm,
               nrows, column_info):
    ncols, n_info = column_info
    X, y = make_regression(n_samples=nrows, n_features=ncols,
                           n_informative=n_info, random_state=0)
    X = X.astype(datatype)
    y = y.astype(datatype)
    X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8,
                                                        random_state=0)
    cu_lasso = cuLasso(alpha=np.array([alpha]), fit_intercept=True,
                       normalize=False, max_iter=1000,
                       selection=algorithm, tol=1e-10)

    cu_lasso.fit(X_train, y_train)
    assert cu_lasso.coef_ is not None
    cu_predict = cu_lasso.predict(X_test)

    cu_r2 = r2_score(y_test, cu_predict)

    if nrows < 500000:
        sk_lasso = Lasso(alpha=np.array([alpha]), fit_intercept=True,
                         normalize=False, max_iter=1000,
                         selection=algorithm, tol=1e-10)
        sk_lasso.fit(X_train, y_train)
        sk_predict = sk_lasso.predict(X_test)
        sk_r2 = r2_score(y_test, sk_predict)
        assert cu_r2 >= sk_r2 - 0.07
Пример #3
0
def test_lasso_predict_convert_dtype(train_dtype, test_dtype):
    X, y = make_regression(n_samples=50, n_features=10,
                           n_informative=5, random_state=0)
    X = X.astype(train_dtype)
    y = y.astype(train_dtype)
    X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8,
                                                        random_state=0)

    clf = cuLasso()
    clf.fit(X_train, y_train)
    clf.predict(X_test.astype(test_dtype))
def test_lasso(datatype, X_type, alpha, algorithm, nrows, ncols, n_info):

    train_rows = np.int32(nrows * 0.8)
    X, y = make_regression(n_samples=nrows,
                           n_features=ncols,
                           n_informative=n_info,
                           random_state=0)
    X_test = np.asarray(X[train_rows:, 0:], dtype=datatype)
    X_train = np.asarray(X[0:train_rows, :], dtype=datatype)
    y_train = np.asarray(y[0:train_rows, ], dtype=datatype)

    cu_lasso = cuLasso(alpha=np.array([alpha]),
                       fit_intercept=True,
                       normalize=False,
                       max_iter=1000,
                       selection=algorithm,
                       tol=1e-10)

    if X_type == 'dataframe':
        y_train = pd.DataFrame({'fea0': y_train[0:, ]})
        X_train = pd.DataFrame(
            {'fea%d' % i: X_train[0:, i]
             for i in range(X_train.shape[1])})
        X_test = pd.DataFrame(
            {'fea%d' % i: X_test[0:, i]
             for i in range(X_test.shape[1])})
        X_cudf = cudf.DataFrame.from_pandas(X_train)
        X_cudf_test = cudf.DataFrame.from_pandas(X_test)
        y_cudf = y_train.values
        y_cudf = y_cudf[:, 0]
        y_cudf = cudf.Series(y_cudf)
        cu_lasso.fit(X_cudf, y_cudf)
        cu_predict = cu_lasso.predict(X_cudf_test)

    elif X_type == 'ndarray':

        cu_lasso.fit(X_train, y_train)
        cu_predict = cu_lasso.predict(X_test)

    if nrows < 500000:
        sk_lasso = Lasso(alpha=np.array([alpha]),
                         fit_intercept=True,
                         normalize=False,
                         max_iter=1000,
                         selection=algorithm,
                         tol=1e-10)
        sk_lasso.fit(X_train, y_train)
        sk_predict = sk_lasso.predict(X_test)
        assert array_equal(sk_predict, cu_predict, 1e-1, with_sign=True)
Пример #5
0
        X_train_np = X_train.toarray()
    else:
        X_train_np = X_train

    if args.densify_all:
        X_train = X_train_np

    if args.test == 'ridge':
        sk = Ridge(fit_intercept=False,
                   alpha=regularizer,
                   max_iter=1000000,
                   tol=1e-06)
        cu = cuRidge(fit_intercept=False, alpha=regularizer, solver='eig')
    elif args.test == 'lasso':
        sk = Lasso(fit_intercept=False, alpha=regularizer / X_train.shape[0])
        cu = cuLasso(fit_intercept=False, alpha=regularizer / X_train.shape[0])
    elif args.test == 'logistic':
        sk = Logistic(fit_intercept=False,
                      C=regularizer,
                      dual=True,
                      solver='liblinear')
        cu = cuLogistic(fit_intercept=False,
                        C=regularizer * X_train.shape[0],
                        max_iter=100000,
                        tol=1e-8)
    else:
        raise ("Invalid test")

    if args.densify_sk:
        X_train_sk = X_train_np
    else: