def _test_sparse_enet_not_as_toy_dataset(alpha, fit_intercept, positive): n_samples, n_features, max_iter = 100, 100, 1000 n_informative = 10 X, y = make_sparse_data(n_samples, n_features, n_informative, positive=positive) X_train, X_test = X[n_samples / 2:], X[:n_samples / 2] y_train, y_test = y[n_samples / 2:], y[:n_samples / 2] s_clf = ElasticNet(alpha=alpha, l1_ratio=0.8, fit_intercept=fit_intercept, max_iter=max_iter, tol=1e-7, positive=positive, warm_start=True) s_clf.fit(X_train, y_train) assert_almost_equal(s_clf.dual_gap_, 0, 4) assert_greater(s_clf.score(X_test, y_test), 0.85) # check the convergence is the same as the dense version d_clf = ElasticNet(alpha=alpha, l1_ratio=0.8, fit_intercept=fit_intercept, max_iter=max_iter, tol=1e-7, positive=positive, warm_start=True) d_clf.fit(X_train.todense(), y_train) assert_almost_equal(d_clf.dual_gap_, 0, 4) assert_greater(d_clf.score(X_test, y_test), 0.85) assert_almost_equal(s_clf.coef_, d_clf.coef_, 5) assert_almost_equal(s_clf.intercept_, d_clf.intercept_, 5) # check that the coefs are sparse assert_less(np.sum(s_clf.coef_ != 0.0), 2 * n_informative)
def _test_sparse_enet_not_as_toy_dataset(alpha, fit_intercept, positive): n_samples, n_features, max_iter = 100, 100, 1000 n_informative = 10 X, y = make_sparse_data(n_samples, n_features, n_informative, positive=positive) X_train, X_test = X[n_samples // 2:], X[:n_samples // 2] y_train, y_test = y[n_samples // 2:], y[:n_samples // 2] s_clf = ElasticNet(alpha=alpha, l1_ratio=0.8, fit_intercept=fit_intercept, max_iter=max_iter, tol=1e-7, positive=positive, warm_start=True) s_clf.fit(X_train, y_train) assert_almost_equal(s_clf.dual_gap_, 0, 4) assert s_clf.score(X_test, y_test) > 0.85 # check the convergence is the same as the dense version d_clf = ElasticNet(alpha=alpha, l1_ratio=0.8, fit_intercept=fit_intercept, max_iter=max_iter, tol=1e-7, positive=positive, warm_start=True) d_clf.fit(X_train.toarray(), y_train) assert_almost_equal(d_clf.dual_gap_, 0, 4) assert d_clf.score(X_test, y_test) > 0.85 assert_almost_equal(s_clf.coef_, d_clf.coef_, 5) assert_almost_equal(s_clf.intercept_, d_clf.intercept_, 5) # check that the coefs are sparse assert np.sum(s_clf.coef_ != 0.0) < 2 * n_informative
def _test_sparse_enet_not_as_toy_dataset(alpha, fit_intercept, positive): n_samples, n_features, max_iter = 100, 100, 1000 n_informative = 10 X, y = make_sparse_data(n_samples, n_features, n_informative, positive=positive) X_train, X_test = X[n_samples / 2:], X[:n_samples / 2] y_train, y_test = y[n_samples / 2:], y[:n_samples / 2] s_clf = ElasticNet(alpha=alpha, rho=0.8, fit_intercept=fit_intercept, max_iter=max_iter, tol=1e-7, positive=positive, warm_start=True) s_clf.fit(X_train, y_train) assert_almost_equal(s_clf.dual_gap_, 0, 4) assert_greater(s_clf.score(X_test, y_test), 0.85) # check the convergence is the same as the dense version d_clf = ElasticNet(alpha=alpha, rho=0.8, fit_intercept=fit_intercept, max_iter=max_iter, tol=1e-7, positive=positive, warm_start=True) d_clf.fit(X_train.todense(), y_train) assert_almost_equal(d_clf.dual_gap_, 0, 4) assert_greater(d_clf.score(X_test, y_test), 0.85) assert_almost_equal(s_clf.coef_, d_clf.coef_, 5) assert_almost_equal(s_clf.intercept_, d_clf.intercept_, 5) # check that the coefs are sparse assert_less(np.sum(s_clf.coef_ != 0.0), 2 * n_informative) # check that warm restart leads to the same result with # sparse and dense versions rng = np.random.RandomState(seed=0) coef_init = rng.randn(n_features) d_clf.fit(X_train.todense(), y_train, coef_init=coef_init) s_clf.fit(X_train, y_train, coef_init=coef_init) assert_almost_equal(s_clf.coef_, d_clf.coef_, 5) assert_almost_equal(s_clf.intercept_, d_clf.intercept_, 5)
def test_sparse_enet_not_as_toy_dataset(): n_samples, n_features, max_iter = 100, 100, 1000 n_informative = 10 X, y = make_sparse_data(n_samples, n_features, n_informative) X_train, X_test = X[n_samples / 2:], X[:n_samples / 2] y_train, y_test = y[n_samples / 2:], y[:n_samples / 2] s_clf = SparseENet(alpha=0.1, rho=0.8, fit_intercept=False, max_iter=max_iter, tol=1e-7) s_clf.fit(X_train, y_train) assert_almost_equal(s_clf.dual_gap_, 0, 4) assert s_clf.score(X_test, y_test) > 0.85 # check the convergence is the same as the dense version d_clf = DenseENet(alpha=0.1, rho=0.8, fit_intercept=False, max_iter=max_iter, tol=1e-7) d_clf.fit(X_train, y_train) assert_almost_equal(d_clf.dual_gap_, 0, 4) assert d_clf.score(X_test, y_test) > 0.85 assert_almost_equal(s_clf.coef_, d_clf.coef_, 5) # check that the coefs are sparse assert np.sum(s_clf.coef_ != 0.0) < 2 * n_informative
def test_sparse_enet_not_as_toy_dataset(): n_samples, n_features, max_iter = 100, 100, 1000 n_informative = 10 X, y = make_sparse_data(n_samples, n_features, n_informative) X_train, X_test = X[n_samples / 2:], X[:n_samples / 2] y_train, y_test = y[n_samples / 2:], y[:n_samples / 2] s_clf = SparseENet(alpha=0.1, rho=0.8, fit_intercept=False, max_iter=max_iter, tol=1e-7) s_clf.fit(X_train, y_train) assert_almost_equal(s_clf.dual_gap_, 0, 4) assert s_clf.score(X_test, y_test) > 0.85 # check the convergence is the same as the dense version d_clf = DenseENet(alpha=0.1, rho=0.8, fit_intercept=False, max_iter=max_iter, tol=1e-7) d_clf.fit(X_train, y_train) assert_almost_equal(d_clf.dual_gap_, 0, 4) assert d_clf.score(X_test, y_test) > 0.85 assert_almost_equal(s_clf.coef_, d_clf.coef_, 5) # check that the coefs are sparse assert np.sum(s_clf.coef_ != 0.0) < 2 * n_informative
def _test_sparse_enet_not_as_toy_dataset(alpha, fit_intercept, positive): n_samples, n_features, max_iter = 100, 100, 1000 n_informative = 10 X, y = make_sparse_data(n_samples, n_features, n_informative, positive=positive) X_train, X_test = X[n_samples / 2:], X[:n_samples / 2] y_train, y_test = y[n_samples / 2:], y[:n_samples / 2] s_clf = SparseENet(alpha=alpha, rho=0.8, fit_intercept=fit_intercept, max_iter=max_iter, tol=1e-7, positive=positive) s_clf.fit(X_train, y_train) assert_almost_equal(s_clf.dual_gap_, 0, 4) assert_greater(s_clf.score(X_test, y_test), 0.85) # check the convergence is the same as the dense version d_clf = DenseENet(alpha=alpha, rho=0.8, fit_intercept=fit_intercept, max_iter=max_iter, tol=1e-7, positive=positive) d_clf.fit(X_train, y_train) assert_almost_equal(d_clf.dual_gap_, 0, 4) assert_greater(d_clf.score(X_test, y_test), 0.85) assert_almost_equal(s_clf.coef_, d_clf.coef_, 5) assert_almost_equal(s_clf.intercept_, d_clf.intercept_, 5) # check that the coefs are sparse assert_less(np.sum(s_clf.coef_ != 0.0), 2 * n_informative)
def _test_sparse_enet_not_as_toy_dataset(alpha, fit_intercept, positive): n_samples, n_features, max_iter = 100, 100, 1000 n_informative = 10 X, y = make_sparse_data(n_samples, n_features, n_informative, positive=positive) X_train, X_test = X[n_samples / 2:], X[:n_samples / 2] y_train, y_test = y[n_samples / 2:], y[:n_samples / 2] s_clf = ElasticNet(alpha=alpha, rho=0.8, fit_intercept=fit_intercept, max_iter=max_iter, tol=1e-7, positive=positive, warm_start=True) s_clf.fit(X_train, y_train) assert_almost_equal(s_clf.dual_gap_, 0, 4) assert_greater(s_clf.score(X_test, y_test), 0.85) # check the convergence is the same as the dense version d_clf = ElasticNet(alpha=alpha, rho=0.8, fit_intercept=fit_intercept, max_iter=max_iter, tol=1e-7, positive=positive, warm_start=True) d_clf.fit(X_train, y_train) assert_almost_equal(d_clf.dual_gap_, 0, 4) assert_greater(d_clf.score(X_test, y_test), 0.85) assert_almost_equal(s_clf.coef_, d_clf.coef_, 5) assert_almost_equal(s_clf.intercept_, d_clf.intercept_, 5) # check that the coefs are sparse assert_less(np.sum(s_clf.coef_ != 0.0), 2 * n_informative) # check that warm restart leads to the same result with # sparse and dense versions rng = np.random.RandomState(seed=0) coef_init = rng.randn(n_features) d_clf.fit(X_train, y_train, coef_init=coef_init) s_clf.fit(X_train, y_train, coef_init=coef_init) assert_almost_equal(s_clf.coef_, d_clf.coef_, 5) assert_almost_equal(s_clf.intercept_, d_clf.intercept_, 5)
def test_fit_simple_backupsklearn(): df = pd.read_csv("./open_data/simple.txt", delim_whitespace=True) X = np.array(df.iloc[:, :df.shape[1] - 1], dtype='float32', order='C') y = np.array(df.iloc[:, df.shape[1] - 1], dtype='float32', order='C') Solver = h2o4gpu.ElasticNet enet = Solver(glm_stop_early=False) print("h2o4gpu fit()") enet.fit(X, y) print("h2o4gpu predict()") print(enet.predict(X)) print("h2o4gpu score()") print(enet.score(X,y)) enet_wrapper = Solver(positive=True, random_state=1234) print("h2o4gpu scikit wrapper fit()") enet_wrapper.fit(X, y) print("h2o4gpu scikit wrapper predict()") print(enet_wrapper.predict(X)) print("h2o4gpu scikit wrapper score()") print(enet_wrapper.score(X, y)) from sklearn.linear_model.coordinate_descent import ElasticNet enet_sk = ElasticNet(positive=True, random_state=1234) print("Scikit fit()") enet_sk.fit(X, y) print("Scikit predict()") print(enet_sk.predict(X)) print("Scikit score()") print(enet_sk.score(X, y)) enet_sk_coef = csr_matrix(enet_sk.coef_, dtype=np.float32).toarray() print(enet_sk.coef_) print(enet_sk_coef) print(enet_wrapper.coef_) print(enet_sk.intercept_) print(enet_wrapper.intercept_) print(enet_sk.n_iter_) print(enet_wrapper.n_iter_) print("Coeffs, intercept, and n_iters should match") assert np.allclose(enet_wrapper.coef_, enet_sk_coef) assert np.allclose(enet_wrapper.intercept_, enet_sk.intercept_)
def test_fit_simple_backupsklearn(): df = pd.read_csv("./open_data/simple.txt", delim_whitespace=True) X = np.array(df.iloc[:, :df.shape[1] - 1], dtype='float32', order='C') y = np.array(df.iloc[:, df.shape[1] - 1], dtype='float32', order='C') Solver = h2o4gpu.ElasticNet enet = Solver(glm_stop_early=False) print("h2o4gpu fit()") enet.fit(X, y) print("h2o4gpu predict()") print(enet.predict(X)) print("h2o4gpu score()") print(enet.score(X, y)) enet_wrapper = Solver(positive=True, random_state=1234) print("h2o4gpu scikit wrapper fit()") enet_wrapper.fit(X, y) print("h2o4gpu scikit wrapper predict()") print(enet_wrapper.predict(X)) print("h2o4gpu scikit wrapper score()") print(enet_wrapper.score(X, y)) from sklearn.linear_model.coordinate_descent import ElasticNet enet_sk = ElasticNet(positive=True, random_state=1234) print("Scikit fit()") enet_sk.fit(X, y) print("Scikit predict()") print(enet_sk.predict(X)) print("Scikit score()") print(enet_sk.score(X, y)) enet_sk_coef = csr_matrix(enet_sk.coef_, dtype=np.float32).toarray() print(enet_sk.coef_) print(enet_sk_coef) print(enet_wrapper.coef_) print(enet_sk.intercept_) print(enet_wrapper.intercept_) print(enet_sk.n_iter_) print(enet_wrapper.n_iter_) print("Coeffs, intercept, and n_iters should match") assert np.allclose(enet_wrapper.coef_, enet_sk_coef) assert np.allclose(enet_wrapper.intercept_, enet_sk.intercept_)