def test_choice(self): planned = LogisticRegression() | KNeighborsClassifier() try: planned.fit(self.X, self.y) except AttributeError as e: self.assertEqual( e.__str__(), """The pipeline is not trainable, which means you can not call fit on it. Suggested fixes: Fix [A]: You can make the following changes in the pipeline in order to make it trainable: [A.1] Please remove the operator choice `|` from `LogisticRegression | KNeighborsClassifier` and keep only one of those operators. Fix [B]: Alternatively, you could use `auto_configure(X, y, Hyperopt, max_evals=5)` on the pipeline to use Hyperopt for `max_evals` iterations for hyperparameter tuning. `Hyperopt` can be imported as `from lale.lib.lale import Hyperopt`.""", )
def test_fit_clones_impl(self): from sklearn.datasets import load_iris lr_trainable = LogisticRegression() iris = load_iris() X, y = iris.data, iris.target lr_trained = lr_trainable.fit(X, y) self.assertIsNot(lr_trainable._impl, lr_trained._impl)
def test_predict_proba(self): import numpy as np trainable_lr = LogisticRegression(n_jobs=1) iris = sklearn.datasets.load_iris() trained_lr = trainable_lr.fit(iris.data, iris.target, sample_weight = np.arange(len(iris.target))) #with self.assertWarns(DeprecationWarning): predicted = trainable_lr.predict_proba(iris.data) predicted = trained_lr.predict_proba(iris.data)
def test_missing_iris(self): # classification, only numbers, synthetically added missing values all_X, all_y = sklearn.datasets.load_iris(return_X_y=True) with_missing_X = lale.helpers.add_missing_values(all_X) with self.assertRaisesRegex(ValueError, "Input contains NaN"): lr_trainable = LogisticRegression() _ = lr_trainable.fit(with_missing_X, all_y) self._fit_predict("classification", with_missing_X, all_y)
def test_clone_of_trained(self): from sklearn.base import clone lr = LogisticRegression() from sklearn.datasets import load_iris iris = load_iris() X, y = iris.data, iris.target trained = lr.fit(X, y) trained2 = clone(trained)
def test_sample_weight(self): import numpy as np trainable_lr = LogisticRegression(n_jobs=1) iris = load_iris() trained_lr = trainable_lr.fit( iris.data, iris.target, sample_weight=np.arange(len(iris.target)) ) _ = trained_lr.predict(iris.data)
def test_compose5(self): ohe = OneHotEncoder(handle_unknown=OneHotEncoder.handle_unknown.ignore) digits = sklearn.datasets.load_digits() lr = LogisticRegression() lr_trained = lr.fit(digits.data, digits.target) lr_trained.predict(digits.data) pipeline1 = ohe >> lr pipeline1_trained = pipeline1.fit(digits.data, digits.target) pipeline1_trained.predict(digits.data)
def test_decision_function(self): import numpy as np trainable_lr = LogisticRegression(n_jobs=1) iris = sklearn.datasets.load_iris() trained_lr = trainable_lr.fit(iris.data, iris.target, sample_weight=np.arange(len( iris.target))) predicted = trained_lr.decision_function(iris.data)
def test_scorers_np_num(self): fairness_info = self.creditg_np_num["fairness_info"] trainable = LogisticRegression(max_iter=1000) train_X = self.creditg_np_num["train_X"] train_y = self.creditg_np_num["train_y"] trained = trainable.fit(train_X, train_y) test_X = self.creditg_np_num["test_X"] test_y = self.creditg_np_num["test_y"] self._attempt_scorers(fairness_info, trained, test_X, test_y)
def test_score_trained_op(self): from sklearn.metrics import accuracy_score from lale.lib.sklearn import LogisticRegression trainable = LogisticRegression() trained_lr = trainable.fit(self.X_train, self.y_train) score = trained_lr.score(self.X_test, self.y_test) predictions = trained_lr.predict(self.X_test) accuracy = accuracy_score(self.y_test, predictions) self.assertEqual(score, accuracy)
def test_grid_search_on_trained(self): from sklearn.metrics import accuracy_score, make_scorer from sklearn.model_selection import GridSearchCV iris = load_iris() X, y = iris.data, iris.target lr = LogisticRegression() trained = lr.fit(X, y) parameters = {"solver": ("liblinear", "lbfgs"), "penalty": ["l2"]} _ = GridSearchCV(trained, parameters, cv=5, scoring=make_scorer(accuracy_score))
def test_grid_search_on_trained(self): from sklearn.model_selection import GridSearchCV from sklearn.datasets import load_iris from sklearn.metrics import accuracy_score, make_scorer iris = load_iris() X, y = iris.data, iris.target lr = LogisticRegression() trained = lr.fit(X, y) parameters = {'solver':('liblinear', 'lbfgs'), 'penalty':['l2']} clf = GridSearchCV(trained, parameters, cv=5, scoring=make_scorer(accuracy_score))
def test_grid_search_on_trained_auto(self): from sklearn.model_selection import GridSearchCV from sklearn.datasets import load_iris from sklearn.metrics import accuracy_score, make_scorer iris = load_iris() X, y = iris.data, iris.target lr = LogisticRegression() trained = lr.fit(X, y) parameters = get_grid_search_parameter_grids(lr, num_samples=2) clf = GridSearchCV(trained, parameters, cv=5, scoring=make_scorer(accuracy_score))
def test_log_fit_predict(self): import lale.datasets trainable = LogisticRegression() (X_train, y_train), (X_test, y_test) = lale.datasets.load_iris_df() trained = trainable.fit(X_train, y_train) predicted = trained.predict(X_test) self.handler.flush() s1, s2, s3, s4 = self.stream.getvalue().strip().split('\n') self.assertTrue(s1.endswith('enter fit LogisticRegression')) self.assertTrue(s2.endswith('exit fit LogisticRegression')) self.assertTrue(s3.endswith('enter predict LogisticRegression')) self.assertTrue(s4.endswith('exit predict LogisticRegression'))
def test_clone_with_scikit1(self): lr = LogisticRegression() lr.get_params() from sklearn.base import clone lr_clone = clone(lr) self.assertNotEqual(lr, lr_clone) self.assertNotEqual(lr._impl, lr_clone._impl) iris = load_iris() trained_lr = lr.fit(iris.data, iris.target) _ = trained_lr.predict(iris.data) cloned_trained_lr = clone(trained_lr) self.assertNotEqual(trained_lr._impl, cloned_trained_lr._impl)
def test_score_trained_op_sample_wt(self): import numpy as np from sklearn.metrics import accuracy_score from lale.lib.sklearn import LogisticRegression trainable = LogisticRegression() trained_lr = trainable.fit(self.X_train, self.y_train) rng = np.random.RandomState(0) iris_weights = rng.randint(10, size=self.y_test.shape) score = trained_lr.score(self.X_test, self.y_test, sample_weight=iris_weights) predictions = trained_lr.predict(self.X_test) accuracy = accuracy_score(self.y_test, predictions, sample_weight=iris_weights) self.assertEqual(score, accuracy)
def test_bare_array(self): from lale.datasets.data_schemas import NDArrayWithSchema from numpy import ndarray import sklearn.metrics X, y = sklearn.datasets.load_iris(return_X_y=True) self.assertIsInstance(X, ndarray) self.assertIsInstance(y, ndarray) self.assertNotIsInstance(X, NDArrayWithSchema) self.assertNotIsInstance(y, NDArrayWithSchema) trainable = LogisticRegression() trained = trainable.fit(X, y) scorer = sklearn.metrics.make_scorer(sklearn.metrics.accuracy_score) out = scorer(trained, X, y) self.assertIsInstance(out, float) self.assertNotIsInstance(out, NDArrayWithSchema)
def test_clone_with_scikit1(self): lr = LogisticRegression() lr.get_params() from sklearn.base import clone lr_clone = clone(lr) self.assertNotEqual(lr, lr_clone) self.assertNotEqual(lr._impl, lr_clone._impl) iris = sklearn.datasets.load_iris() trained_lr = lr.fit(iris.data, iris.target) predicted = trained_lr.predict(iris.data) cloned_trained_lr = clone(trained_lr) self.assertNotEqual(trained_lr._impl, cloned_trained_lr._impl) predicted_clone = cloned_trained_lr.predict(iris.data) for i in range(len(iris.target)): self.assertEqual(predicted[i], predicted_clone[i])
def test_disparate_impact_remover_np_num(self): fairness_info = self.creditg_np_num["fairness_info"] trainable_orig = LogisticRegression(max_iter=1000) trainable_remi = DisparateImpactRemover(**fairness_info) >> trainable_orig train_X = self.creditg_np_num["train_X"] train_y = self.creditg_np_num["train_y"] trained_orig = trainable_orig.fit(train_X, train_y) trained_remi = trainable_remi.fit(train_X, train_y) test_X = self.creditg_np_num["test_X"] test_y = self.creditg_np_num["test_y"] disparate_impact_scorer = lale.lib.aif360.disparate_impact(**fairness_info) impact_orig = disparate_impact_scorer(trained_orig, test_X, test_y) self.assertTrue(0.6 < impact_orig < 1.0, f"impact_orig {impact_orig}") impact_remi = disparate_impact_scorer(trained_remi, test_X, test_y) self.assertTrue(0.8 < impact_remi < 1.0, f"impact_remi {impact_remi}")
def test_using_individual_operator(self): from lale.lib.lale import Hyperopt, OptimizeLast lr = LogisticRegression() # Individual Operator trained_operator = lr.fit(self.X_train, self.y_train) # Now let's use Hyperopt to optimize the classifier hyperopt_args = {"scoring": "accuracy", "cv": 3, "max_evals": 2} opt_last = OptimizeLast( estimator=trained_operator, last_optimizer=Hyperopt, optimizer_args=hyperopt_args, ) res_last = opt_last.fit(self.X_train, self.y_train) predictions = res_last.predict(self.X_test) predictions_1 = opt_last.predict(self.X_test) best_pipeline = res_last.get_pipeline() self.assertEqual(type(trained_operator), type(best_pipeline)) assert np.array_equal(predictions_1, predictions)
def test_with_defaults(self): trainable = LogisticRegression() trained = trainable.fit(self.train_X, self.train_y) _ = trained.predict(self.test_X)
def test_score_trainable_op(self): from lale.lib.sklearn import LogisticRegression trainable = LogisticRegression() _ = trainable.fit(self.X_train, self.y_train) trainable.score(self.X_test, self.y_test)