def _test_sparse_enet_not_as_toy_dataset(alpha, fit_intercept, positive):
    n_samples, n_features, max_iter = 100, 100, 1000
    n_informative = 10

    X, y = make_sparse_data(n_samples, n_features, n_informative,
                            positive=positive)

    X_train, X_test = X[n_samples / 2:], X[:n_samples / 2]
    y_train, y_test = y[n_samples / 2:], y[:n_samples / 2]

    s_clf = ElasticNet(alpha=alpha, l1_ratio=0.8, fit_intercept=fit_intercept,
                       max_iter=max_iter, tol=1e-7, positive=positive,
                       warm_start=True)
    s_clf.fit(X_train, y_train)

    assert_almost_equal(s_clf.dual_gap_, 0, 4)
    assert_greater(s_clf.score(X_test, y_test), 0.85)

    # check the convergence is the same as the dense version
    d_clf = ElasticNet(alpha=alpha, l1_ratio=0.8, fit_intercept=fit_intercept,
                       max_iter=max_iter, tol=1e-7, positive=positive,
                       warm_start=True)
    d_clf.fit(X_train.todense(), y_train)

    assert_almost_equal(d_clf.dual_gap_, 0, 4)
    assert_greater(d_clf.score(X_test, y_test), 0.85)

    assert_almost_equal(s_clf.coef_, d_clf.coef_, 5)
    assert_almost_equal(s_clf.intercept_, d_clf.intercept_, 5)

    # check that the coefs are sparse
    assert_less(np.sum(s_clf.coef_ != 0.0), 2 * n_informative)
def _test_sparse_enet_not_as_toy_dataset(alpha, fit_intercept, positive):
    n_samples, n_features, max_iter = 100, 100, 1000
    n_informative = 10

    X, y = make_sparse_data(n_samples, n_features, n_informative,
                            positive=positive)

    X_train, X_test = X[n_samples // 2:], X[:n_samples // 2]
    y_train, y_test = y[n_samples // 2:], y[:n_samples // 2]

    s_clf = ElasticNet(alpha=alpha, l1_ratio=0.8, fit_intercept=fit_intercept,
                       max_iter=max_iter, tol=1e-7, positive=positive,
                       warm_start=True)
    s_clf.fit(X_train, y_train)

    assert_almost_equal(s_clf.dual_gap_, 0, 4)
    assert s_clf.score(X_test, y_test) > 0.85

    # check the convergence is the same as the dense version
    d_clf = ElasticNet(alpha=alpha, l1_ratio=0.8, fit_intercept=fit_intercept,
                       max_iter=max_iter, tol=1e-7, positive=positive,
                       warm_start=True)
    d_clf.fit(X_train.toarray(), y_train)

    assert_almost_equal(d_clf.dual_gap_, 0, 4)
    assert d_clf.score(X_test, y_test) > 0.85

    assert_almost_equal(s_clf.coef_, d_clf.coef_, 5)
    assert_almost_equal(s_clf.intercept_, d_clf.intercept_, 5)

    # check that the coefs are sparse
    assert np.sum(s_clf.coef_ != 0.0) < 2 * n_informative
示例#3
0
def _test_sparse_enet_not_as_toy_dataset(alpha, fit_intercept, positive):
    n_samples, n_features, max_iter = 100, 100, 1000
    n_informative = 10

    X, y = make_sparse_data(n_samples,
                            n_features,
                            n_informative,
                            positive=positive)

    X_train, X_test = X[n_samples / 2:], X[:n_samples / 2]
    y_train, y_test = y[n_samples / 2:], y[:n_samples / 2]

    s_clf = ElasticNet(alpha=alpha,
                       rho=0.8,
                       fit_intercept=fit_intercept,
                       max_iter=max_iter,
                       tol=1e-7,
                       positive=positive,
                       warm_start=True)
    s_clf.fit(X_train, y_train)

    assert_almost_equal(s_clf.dual_gap_, 0, 4)
    assert_greater(s_clf.score(X_test, y_test), 0.85)

    # check the convergence is the same as the dense version
    d_clf = ElasticNet(alpha=alpha,
                       rho=0.8,
                       fit_intercept=fit_intercept,
                       max_iter=max_iter,
                       tol=1e-7,
                       positive=positive,
                       warm_start=True)
    d_clf.fit(X_train.todense(), y_train)

    assert_almost_equal(d_clf.dual_gap_, 0, 4)
    assert_greater(d_clf.score(X_test, y_test), 0.85)

    assert_almost_equal(s_clf.coef_, d_clf.coef_, 5)
    assert_almost_equal(s_clf.intercept_, d_clf.intercept_, 5)

    # check that the coefs are sparse
    assert_less(np.sum(s_clf.coef_ != 0.0), 2 * n_informative)

    # check that warm restart leads to the same result with
    # sparse and dense versions

    rng = np.random.RandomState(seed=0)
    coef_init = rng.randn(n_features)

    d_clf.fit(X_train.todense(), y_train, coef_init=coef_init)
    s_clf.fit(X_train, y_train, coef_init=coef_init)

    assert_almost_equal(s_clf.coef_, d_clf.coef_, 5)
    assert_almost_equal(s_clf.intercept_, d_clf.intercept_, 5)
def test_sparse_enet_not_as_toy_dataset():
    n_samples, n_features, max_iter = 100, 100, 1000
    n_informative = 10

    X, y = make_sparse_data(n_samples, n_features, n_informative)

    X_train, X_test = X[n_samples / 2:], X[:n_samples / 2]
    y_train, y_test = y[n_samples / 2:], y[:n_samples / 2]

    s_clf = SparseENet(alpha=0.1, rho=0.8, fit_intercept=False,
                       max_iter=max_iter, tol=1e-7)
    s_clf.fit(X_train, y_train)
    assert_almost_equal(s_clf.dual_gap_, 0, 4)
    assert s_clf.score(X_test, y_test) > 0.85

    # check the convergence is the same as the dense version
    d_clf = DenseENet(alpha=0.1, rho=0.8, fit_intercept=False,
                      max_iter=max_iter, tol=1e-7)
    d_clf.fit(X_train, y_train)
    assert_almost_equal(d_clf.dual_gap_, 0, 4)
    assert d_clf.score(X_test, y_test) > 0.85

    assert_almost_equal(s_clf.coef_, d_clf.coef_, 5)

    # check that the coefs are sparse
    assert np.sum(s_clf.coef_ != 0.0) < 2 * n_informative
示例#5
0
def test_sparse_enet_not_as_toy_dataset():
    n_samples, n_features, max_iter = 100, 100, 1000
    n_informative = 10

    X, y = make_sparse_data(n_samples, n_features, n_informative)

    X_train, X_test = X[n_samples / 2:], X[:n_samples / 2]
    y_train, y_test = y[n_samples / 2:], y[:n_samples / 2]

    s_clf = SparseENet(alpha=0.1, rho=0.8, fit_intercept=False,
                       max_iter=max_iter, tol=1e-7)
    s_clf.fit(X_train, y_train)
    assert_almost_equal(s_clf.dual_gap_, 0, 4)
    assert s_clf.score(X_test, y_test) > 0.85

    # check the convergence is the same as the dense version
    d_clf = DenseENet(alpha=0.1, rho=0.8, fit_intercept=False,
                      max_iter=max_iter, tol=1e-7)
    d_clf.fit(X_train, y_train)
    assert_almost_equal(d_clf.dual_gap_, 0, 4)
    assert d_clf.score(X_test, y_test) > 0.85

    assert_almost_equal(s_clf.coef_, d_clf.coef_, 5)

    # check that the coefs are sparse
    assert np.sum(s_clf.coef_ != 0.0) < 2 * n_informative
示例#6
0
def _test_sparse_enet_not_as_toy_dataset(alpha, fit_intercept, positive):
    n_samples, n_features, max_iter = 100, 100, 1000
    n_informative = 10

    X, y = make_sparse_data(n_samples, n_features, n_informative,
                            positive=positive)

    X_train, X_test = X[n_samples / 2:], X[:n_samples / 2]
    y_train, y_test = y[n_samples / 2:], y[:n_samples / 2]

    s_clf = SparseENet(alpha=alpha, rho=0.8, fit_intercept=fit_intercept,
                       max_iter=max_iter, tol=1e-7, positive=positive)
    s_clf.fit(X_train, y_train)

    assert_almost_equal(s_clf.dual_gap_, 0, 4)
    assert_greater(s_clf.score(X_test, y_test), 0.85)

    # check the convergence is the same as the dense version
    d_clf = DenseENet(alpha=alpha, rho=0.8, fit_intercept=fit_intercept,
                      max_iter=max_iter, tol=1e-7, positive=positive)
    d_clf.fit(X_train, y_train)

    assert_almost_equal(d_clf.dual_gap_, 0, 4)
    assert_greater(d_clf.score(X_test, y_test), 0.85)

    assert_almost_equal(s_clf.coef_, d_clf.coef_, 5)
    assert_almost_equal(s_clf.intercept_, d_clf.intercept_, 5)

    # check that the coefs are sparse
    assert_less(np.sum(s_clf.coef_ != 0.0), 2 * n_informative)
def _test_sparse_enet_not_as_toy_dataset(alpha, fit_intercept, positive):
    n_samples, n_features, max_iter = 100, 100, 1000
    n_informative = 10

    X, y = make_sparse_data(n_samples, n_features, n_informative,
                            positive=positive)

    X_train, X_test = X[n_samples / 2:], X[:n_samples / 2]
    y_train, y_test = y[n_samples / 2:], y[:n_samples / 2]

    s_clf = ElasticNet(alpha=alpha, rho=0.8, fit_intercept=fit_intercept,
                       max_iter=max_iter, tol=1e-7, positive=positive,
                       warm_start=True)
    s_clf.fit(X_train, y_train)

    assert_almost_equal(s_clf.dual_gap_, 0, 4)
    assert_greater(s_clf.score(X_test, y_test), 0.85)

    # check the convergence is the same as the dense version
    d_clf = ElasticNet(alpha=alpha, rho=0.8, fit_intercept=fit_intercept,
                      max_iter=max_iter, tol=1e-7, positive=positive,
                      warm_start=True)
    d_clf.fit(X_train, y_train)

    assert_almost_equal(d_clf.dual_gap_, 0, 4)
    assert_greater(d_clf.score(X_test, y_test), 0.85)

    assert_almost_equal(s_clf.coef_, d_clf.coef_, 5)
    assert_almost_equal(s_clf.intercept_, d_clf.intercept_, 5)

    # check that the coefs are sparse
    assert_less(np.sum(s_clf.coef_ != 0.0), 2 * n_informative)

    # check that warm restart leads to the same result with
    # sparse and dense versions

    rng = np.random.RandomState(seed=0)
    coef_init = rng.randn(n_features)

    d_clf.fit(X_train, y_train, coef_init=coef_init)
    s_clf.fit(X_train, y_train, coef_init=coef_init)

    assert_almost_equal(s_clf.coef_, d_clf.coef_, 5)
    assert_almost_equal(s_clf.intercept_, d_clf.intercept_, 5)
def test_fit_simple_backupsklearn():
    df = pd.read_csv("./open_data/simple.txt", delim_whitespace=True)
    X = np.array(df.iloc[:, :df.shape[1] - 1], dtype='float32', order='C')
    y = np.array(df.iloc[:, df.shape[1] - 1], dtype='float32', order='C')
    Solver = h2o4gpu.ElasticNet

    enet = Solver(glm_stop_early=False)
    print("h2o4gpu fit()")
    enet.fit(X, y)
    print("h2o4gpu predict()")
    print(enet.predict(X))
    print("h2o4gpu score()")
    print(enet.score(X,y))

    enet_wrapper = Solver(positive=True, random_state=1234)
    print("h2o4gpu scikit wrapper fit()")
    enet_wrapper.fit(X, y)
    print("h2o4gpu scikit wrapper predict()")
    print(enet_wrapper.predict(X))
    print("h2o4gpu scikit wrapper score()")
    print(enet_wrapper.score(X, y))

    from sklearn.linear_model.coordinate_descent import ElasticNet
    enet_sk = ElasticNet(positive=True, random_state=1234)
    print("Scikit fit()")
    enet_sk.fit(X, y)
    print("Scikit predict()")
    print(enet_sk.predict(X))
    print("Scikit score()")
    print(enet_sk.score(X, y))

    enet_sk_coef = csr_matrix(enet_sk.coef_, dtype=np.float32).toarray()

    print(enet_sk.coef_)

    print(enet_sk_coef)

    print(enet_wrapper.coef_)

    print(enet_sk.intercept_)
    print(enet_wrapper.intercept_)

    print(enet_sk.n_iter_)
    print(enet_wrapper.n_iter_)

    print("Coeffs, intercept, and n_iters should match")
    assert np.allclose(enet_wrapper.coef_, enet_sk_coef)
    assert np.allclose(enet_wrapper.intercept_, enet_sk.intercept_)
示例#9
0
def test_fit_simple_backupsklearn():
    df = pd.read_csv("./open_data/simple.txt", delim_whitespace=True)
    X = np.array(df.iloc[:, :df.shape[1] - 1], dtype='float32', order='C')
    y = np.array(df.iloc[:, df.shape[1] - 1], dtype='float32', order='C')
    Solver = h2o4gpu.ElasticNet

    enet = Solver(glm_stop_early=False)
    print("h2o4gpu fit()")
    enet.fit(X, y)
    print("h2o4gpu predict()")
    print(enet.predict(X))
    print("h2o4gpu score()")
    print(enet.score(X, y))

    enet_wrapper = Solver(positive=True, random_state=1234)
    print("h2o4gpu scikit wrapper fit()")
    enet_wrapper.fit(X, y)
    print("h2o4gpu scikit wrapper predict()")
    print(enet_wrapper.predict(X))
    print("h2o4gpu scikit wrapper score()")
    print(enet_wrapper.score(X, y))

    from sklearn.linear_model.coordinate_descent import ElasticNet
    enet_sk = ElasticNet(positive=True, random_state=1234)
    print("Scikit fit()")
    enet_sk.fit(X, y)
    print("Scikit predict()")
    print(enet_sk.predict(X))
    print("Scikit score()")
    print(enet_sk.score(X, y))

    enet_sk_coef = csr_matrix(enet_sk.coef_, dtype=np.float32).toarray()

    print(enet_sk.coef_)

    print(enet_sk_coef)

    print(enet_wrapper.coef_)

    print(enet_sk.intercept_)
    print(enet_wrapper.intercept_)

    print(enet_sk.n_iter_)
    print(enet_wrapper.n_iter_)

    print("Coeffs, intercept, and n_iters should match")
    assert np.allclose(enet_wrapper.coef_, enet_sk_coef)
    assert np.allclose(enet_wrapper.intercept_, enet_sk.intercept_)