def test_write_parameters(): # Test that we can write to coef_ and intercept_ clf = LogisticRegression(random_state=0) clf.fit(X, Y1) clf.coef_[:] = 0 clf.intercept_[:] = 0 assert_array_almost_equal(clf.decision_function(X), 0)
class LogisticRegressionImpl(): def __init__(self, penalty='l2', dual=False, tol=0.0001, C=1.0, fit_intercept=True, intercept_scaling=1, class_weight='balanced', random_state=None, solver='liblinear', max_iter=100, multi_class='ovr', verbose=0, warm_start=False, n_jobs=None): self._hyperparams = { 'penalty': penalty, 'dual': dual, 'tol': tol, 'C': C, 'fit_intercept': fit_intercept, 'intercept_scaling': intercept_scaling, 'class_weight': class_weight, 'random_state': random_state, 'solver': solver, 'max_iter': max_iter, 'multi_class': multi_class, 'verbose': verbose, 'warm_start': warm_start, 'n_jobs': n_jobs} self._wrapped_model = SKLModel(**self._hyperparams) def fit(self, X, y=None): if (y is not None): self._wrapped_model.fit(X, y) else: self._wrapped_model.fit(X) return self def predict(self, X): return self._wrapped_model.predict(X) def predict_proba(self, X): return self._wrapped_model.predict_proba(X) def decision_function(self, X): return self._wrapped_model.decision_function(X)
def test_sparsify(): # Test sparsify and densify members. n_samples, n_features = iris.data.shape target = iris.target_names[iris.target] clf = LogisticRegression(random_state=0).fit(iris.data, target) pred_d_d = clf.decision_function(iris.data) clf.sparsify() assert_true(sp.issparse(clf.coef_)) pred_s_d = clf.decision_function(iris.data) sp_data = sp.coo_matrix(iris.data) pred_s_s = clf.decision_function(sp_data) clf.densify() pred_d_s = clf.decision_function(sp_data) assert_array_almost_equal(pred_d_d, pred_s_d) assert_array_almost_equal(pred_d_d, pred_s_s) assert_array_almost_equal(pred_d_d, pred_d_s)
def test_multinomial_binary_probabilities(): # Test multinomial LR gives expected probabilities based on the # decision function, for a binary problem. X, y = make_classification() clf = LogisticRegression(multi_class='multinomial', solver='saga') clf.fit(X, y) decision = clf.decision_function(X) proba = clf.predict_proba(X) expected_proba_class_1 = (np.exp(decision) / (np.exp(decision) + np.exp(-decision))) expected_proba = np.c_[1-expected_proba_class_1, expected_proba_class_1] assert_almost_equal(proba, expected_proba)
def test_fit_credit_backupsklearn(): df = pd.read_csv("./open_data/creditcard.csv") X = np.array(df.iloc[:, :df.shape[1] - 1], dtype='float32', order='C') y = np.array(df.iloc[:, df.shape[1] - 1], dtype='float32', order='C') Solver = h2o4gpu.LogisticRegression enet_h2o4gpu = Solver(glm_stop_early=False) print("h2o4gpu fit()") enet_h2o4gpu.fit(X, y) print("h2o4gpu predict()") print(enet_h2o4gpu.predict(X)) print("h2o4gpu score()") print(enet_h2o4gpu.score(X, y)) enet = Solver(dual=True, max_iter=100, tol=1E-4, random_state=1234) print("h2o4gpu scikit wrapper fit()") enet.fit(X, y) print("h2o4gpu scikit wrapper predict()") print(enet.predict(X)) print("h2o4gpu scikit wrapper predict_proba()") print(enet.predict_proba(X)) print("h2o4gpu scikit wrapper predict_log_proba()") print(enet.predict_log_proba(X)) print("h2o4gpu scikit wrapper score()") print(enet.score(X, y)) print("h2o4gpu scikit wrapper decision_function()") print(enet.decision_function(X)) print("h2o4gpu scikit wrapper densify()") print(enet.densify()) print("h2o4gpu scikit wrapper sparsify") print(enet.sparsify()) from sklearn.linear_model.logistic import LogisticRegression enet_sk = LogisticRegression(dual=True, max_iter=100, tol=1E-4, random_state=1234) print("Scikit fit()") enet_sk.fit(X, y) print("Scikit predict()") print(enet_sk.predict(X)) print("Scikit predict_proba()") print(enet_sk.predict_proba(X)) print("Scikit predict_log_proba()") print(enet_sk.predict_log_proba(X)) print("Scikit score()") print(enet_sk.score(X, y)) print("Scikit decision_function()") print(enet_sk.decision_function(X)) print("Scikit densify()") print(enet_sk.densify()) print("Sciki sparsify") print(enet_sk.sparsify()) enet_sk_coef = csr_matrix(enet_sk.coef_, dtype=np.float32).toarray() print(enet_sk.coef_) print(enet_sk_coef) print(enet.coef_) print(enet_sk.intercept_) print("Coeffs, intercept, and n_iters should match") assert np.allclose(enet.coef_, enet_sk_coef) assert np.allclose(enet.intercept_, enet_sk.intercept_) assert np.allclose(enet.n_iter_, enet_sk.n_iter_) print("Preds should match") assert np.allclose(enet.predict_proba(X), enet_sk.predict_proba(X)) assert np.allclose(enet.predict(X), enet_sk.predict(X)) assert np.allclose(enet.predict_log_proba(X), enet_sk.predict_log_proba(X))
def test_fit_credit_backupsklearn(): df = pd.read_csv("./open_data/creditcard.csv") X = np.array(df.iloc[:, :df.shape[1] - 1], dtype='float32', order='C') y = np.array(df.iloc[:, df.shape[1] - 1], dtype='float32', order='C') Solver = h2o4gpu.LogisticRegression enet_h2o4gpu = Solver(glm_stop_early=False) print("h2o4gpu fit()") enet_h2o4gpu.fit(X, y) print("h2o4gpu predict()") print(enet_h2o4gpu.predict(X)) print("h2o4gpu score()") print(enet_h2o4gpu.score(X,y)) enet = Solver(dual=True, max_iter=100, tol=1E-4, intercept_scaling=0.99, random_state=1234) print("h2o4gpu scikit wrapper fit()") enet.fit(X, y) print("h2o4gpu scikit wrapper predict()") print(enet.predict(X)) print("h2o4gpu scikit wrapper predict_proba()") print(enet.predict_proba(X)) print("h2o4gpu scikit wrapper predict_log_proba()") print(enet.predict_log_proba(X)) print("h2o4gpu scikit wrapper score()") print(enet.score(X,y)) print("h2o4gpu scikit wrapper decision_function()") print(enet.decision_function(X)) print("h2o4gpu scikit wrapper densify()") print(enet.densify()) print("h2o4gpu scikit wrapper sparsify") print(enet.sparsify()) from sklearn.linear_model.logistic import LogisticRegression enet_sk = LogisticRegression(dual=True, max_iter=100, tol=1E-4, intercept_scaling=0.99, random_state=1234) print("Scikit fit()") enet_sk.fit(X, y) print("Scikit predict()") print(enet_sk.predict(X)) print("Scikit predict_proba()") print(enet_sk.predict_proba(X)) print("Scikit predict_log_proba()") print(enet_sk.predict_log_proba(X)) print("Scikit score()") print(enet_sk.score(X,y)) print("Scikit decision_function()") print(enet_sk.decision_function(X)) print("Scikit densify()") print(enet_sk.densify()) print("Sciki sparsify") print(enet_sk.sparsify()) enet_sk_coef = csr_matrix(enet_sk.coef_, dtype=np.float32).toarray() print(enet_sk.coef_) print(enet_sk_coef) print(enet.coef_) print(enet_sk.intercept_) print("Coeffs, intercept, and n_iters should match") assert np.allclose(enet.coef_, enet_sk_coef) assert np.allclose(enet.intercept_, enet_sk.intercept_) assert np.allclose(enet.n_iter_, enet_sk.n_iter_) print("Preds should match") assert np.allclose(enet.predict_proba(X), enet_sk.predict_proba(X)) assert np.allclose(enet.predict(X), enet_sk.predict(X)) assert np.allclose(enet.predict_log_proba(X), enet_sk.predict_log_proba(X))