def test_sparsify(): # Test sparsify and densify members. n_samples, n_features = iris.data.shape target = iris.target_names[iris.target] clf = LogisticRegression(random_state=0).fit(iris.data, target) pred_d_d = clf.decision_function(iris.data) clf.sparsify() assert_true(sp.issparse(clf.coef_)) pred_s_d = clf.decision_function(iris.data) sp_data = sp.coo_matrix(iris.data) pred_s_s = clf.decision_function(sp_data) clf.densify() pred_d_s = clf.decision_function(sp_data) assert_array_almost_equal(pred_d_d, pred_s_d) assert_array_almost_equal(pred_d_d, pred_s_s) assert_array_almost_equal(pred_d_d, pred_d_s)
def test_fit_credit_backupsklearn(): df = pd.read_csv("./open_data/creditcard.csv") X = np.array(df.iloc[:, :df.shape[1] - 1], dtype='float32', order='C') y = np.array(df.iloc[:, df.shape[1] - 1], dtype='float32', order='C') Solver = h2o4gpu.LogisticRegression enet_h2o4gpu = Solver(glm_stop_early=False) print("h2o4gpu fit()") enet_h2o4gpu.fit(X, y) print("h2o4gpu predict()") print(enet_h2o4gpu.predict(X)) print("h2o4gpu score()") print(enet_h2o4gpu.score(X, y)) enet = Solver(dual=True, max_iter=100, tol=1E-4, random_state=1234) print("h2o4gpu scikit wrapper fit()") enet.fit(X, y) print("h2o4gpu scikit wrapper predict()") print(enet.predict(X)) print("h2o4gpu scikit wrapper predict_proba()") print(enet.predict_proba(X)) print("h2o4gpu scikit wrapper predict_log_proba()") print(enet.predict_log_proba(X)) print("h2o4gpu scikit wrapper score()") print(enet.score(X, y)) print("h2o4gpu scikit wrapper decision_function()") print(enet.decision_function(X)) print("h2o4gpu scikit wrapper densify()") print(enet.densify()) print("h2o4gpu scikit wrapper sparsify") print(enet.sparsify()) from sklearn.linear_model.logistic import LogisticRegression enet_sk = LogisticRegression(dual=True, max_iter=100, tol=1E-4, random_state=1234) print("Scikit fit()") enet_sk.fit(X, y) print("Scikit predict()") print(enet_sk.predict(X)) print("Scikit predict_proba()") print(enet_sk.predict_proba(X)) print("Scikit predict_log_proba()") print(enet_sk.predict_log_proba(X)) print("Scikit score()") print(enet_sk.score(X, y)) print("Scikit decision_function()") print(enet_sk.decision_function(X)) print("Scikit densify()") print(enet_sk.densify()) print("Sciki sparsify") print(enet_sk.sparsify()) enet_sk_coef = csr_matrix(enet_sk.coef_, dtype=np.float32).toarray() print(enet_sk.coef_) print(enet_sk_coef) print(enet.coef_) print(enet_sk.intercept_) print("Coeffs, intercept, and n_iters should match") assert np.allclose(enet.coef_, enet_sk_coef) assert np.allclose(enet.intercept_, enet_sk.intercept_) assert np.allclose(enet.n_iter_, enet_sk.n_iter_) print("Preds should match") assert np.allclose(enet.predict_proba(X), enet_sk.predict_proba(X)) assert np.allclose(enet.predict(X), enet_sk.predict(X)) assert np.allclose(enet.predict_log_proba(X), enet_sk.predict_log_proba(X))
def test_fit_credit_backupsklearn(): df = pd.read_csv("./open_data/creditcard.csv") X = np.array(df.iloc[:, :df.shape[1] - 1], dtype='float32', order='C') y = np.array(df.iloc[:, df.shape[1] - 1], dtype='float32', order='C') Solver = h2o4gpu.LogisticRegression enet_h2o4gpu = Solver(glm_stop_early=False) print("h2o4gpu fit()") enet_h2o4gpu.fit(X, y) print("h2o4gpu predict()") print(enet_h2o4gpu.predict(X)) print("h2o4gpu score()") print(enet_h2o4gpu.score(X,y)) enet = Solver(dual=True, max_iter=100, tol=1E-4, intercept_scaling=0.99, random_state=1234) print("h2o4gpu scikit wrapper fit()") enet.fit(X, y) print("h2o4gpu scikit wrapper predict()") print(enet.predict(X)) print("h2o4gpu scikit wrapper predict_proba()") print(enet.predict_proba(X)) print("h2o4gpu scikit wrapper predict_log_proba()") print(enet.predict_log_proba(X)) print("h2o4gpu scikit wrapper score()") print(enet.score(X,y)) print("h2o4gpu scikit wrapper decision_function()") print(enet.decision_function(X)) print("h2o4gpu scikit wrapper densify()") print(enet.densify()) print("h2o4gpu scikit wrapper sparsify") print(enet.sparsify()) from sklearn.linear_model.logistic import LogisticRegression enet_sk = LogisticRegression(dual=True, max_iter=100, tol=1E-4, intercept_scaling=0.99, random_state=1234) print("Scikit fit()") enet_sk.fit(X, y) print("Scikit predict()") print(enet_sk.predict(X)) print("Scikit predict_proba()") print(enet_sk.predict_proba(X)) print("Scikit predict_log_proba()") print(enet_sk.predict_log_proba(X)) print("Scikit score()") print(enet_sk.score(X,y)) print("Scikit decision_function()") print(enet_sk.decision_function(X)) print("Scikit densify()") print(enet_sk.densify()) print("Sciki sparsify") print(enet_sk.sparsify()) enet_sk_coef = csr_matrix(enet_sk.coef_, dtype=np.float32).toarray() print(enet_sk.coef_) print(enet_sk_coef) print(enet.coef_) print(enet_sk.intercept_) print("Coeffs, intercept, and n_iters should match") assert np.allclose(enet.coef_, enet_sk_coef) assert np.allclose(enet.intercept_, enet_sk.intercept_) assert np.allclose(enet.n_iter_, enet_sk.n_iter_) print("Preds should match") assert np.allclose(enet.predict_proba(X), enet_sk.predict_proba(X)) assert np.allclose(enet.predict(X), enet_sk.predict(X)) assert np.allclose(enet.predict_log_proba(X), enet_sk.predict_log_proba(X))