def test_estimators(self): trainable = VotingClassifier(estimators=[( 'lr', LogisticRegression()), ('dt', DecisionTreeClassifier()), ('na', None)]) trained = trainable.fit(self.train_X, self.train_y) predicted = trained.predict(self.test_X)
def test_with_defaults(self): trainable = VotingClassifier( estimators=[("lr", LogisticRegression()), ("dt", DecisionTreeClassifier())]) trained = trainable.fit(self.train_X, self.train_y) _ = trained.predict(self.test_X)
def test_with_lale_classifiers(self): clf = VotingClassifier( estimators=[("knn", KNeighborsClassifier()), ("lr", LogisticRegression())]) trained = clf.fit(self.X_train, self.y_train) trained.predict(self.X_test)
def test_with_hyperopt(self): from lale.lib.lale import Hyperopt from lale.lib.sklearn import VotingClassifier clf = VotingClassifier( estimators=[("knn", KNeighborsClassifier()), ("lr", LogisticRegression())] ) _ = clf.auto_configure(self.X_train, self.y_train, Hyperopt, max_evals=1)
def test_estimators(self): trainable = VotingClassifier(estimators=[ ("lr", LogisticRegression()), ("dt", DecisionTreeClassifier()), ("na", None), ]) trained = trainable.fit(self.train_X, self.train_y) predicted = trained.predict(self.test_X)
def test_with_hyperopt(self): planned = VotingClassifier( estimators=[("lr", LogisticRegression), ("dt", DecisionTreeClassifier)] ) trained = planned.auto_configure( self.train_X, self.train_y, optimizer=Hyperopt, cv=3, max_evals=3 ) _ = trained.predict(self.test_X)
def test_with_lale_pipeline(self): from lale.lib.sklearn import VotingClassifier clf = VotingClassifier(estimators=[ ("knn", KNeighborsClassifier()), ("pca_lr", PCA() >> LogisticRegression()), ]) trained = clf.fit(self.X_train, self.y_train) trained.predict(self.X_test)
def test_higher_order_2(self): self.maxDiff = None from lale.lib.sklearn import VotingClassifier as Vote from lale.lib.sklearn import KNeighborsClassifier as KNN from lale.lib.sklearn import PCA from lale.lib.sklearn import LogisticRegression as LR from lale.json_operator import from_json operator = Vote(estimators=[('knn',KNN), ('pipeline',PCA()>>LR)], voting='soft') json_expected = { 'class': 'lale.lib.sklearn.voting_classifier.VotingClassifierImpl', 'state': 'trainable', 'operator': 'VotingClassifier', 'is_frozen_trainable': True, 'label': 'Vote', 'documentation_url': 'https://lale.readthedocs.io/en/latest/modules/lale.lib.sklearn.voting_classifier.html', 'hyperparams': { 'estimators': [ ('knn', {'$ref': '../steps/knn'}), ('pipeline', {'$ref': '../steps/pipeline'})], 'voting': 'soft'}, 'steps': { 'knn': { 'class': 'lale.lib.sklearn.k_neighbors_classifier.KNeighborsClassifierImpl', 'state': 'planned', 'operator': 'KNeighborsClassifier', 'label': 'KNN', 'documentation_url': 'https://lale.readthedocs.io/en/latest/modules/lale.lib.sklearn.k_neighbors_classifier.html'}, 'pipeline': { 'class': 'lale.operators.PlannedPipeline', 'state': 'planned', 'edges': [['pca', 'lr']], 'steps': { 'pca': { 'class': 'lale.lib.sklearn.pca.PCAImpl', 'state': 'trainable', 'operator': 'PCA', 'label': 'PCA', 'documentation_url': 'https://lale.readthedocs.io/en/latest/modules/lale.lib.sklearn.pca.html', 'hyperparams': {}, 'is_frozen_trainable': False}, 'lr': { 'class': 'lale.lib.sklearn.logistic_regression.LogisticRegressionImpl', 'state': 'planned', 'operator': 'LogisticRegression', 'label': 'LR', 'documentation_url': 'https://lale.readthedocs.io/en/latest/modules/lale.lib.sklearn.logistic_regression.html'}}}}} json = operator.to_json() self.assertEqual(json, json_expected) operator_2 = from_json(json) json_2 = operator_2.to_json() self.assertEqual(json, json_2)
def fit(self, X_train, y_train): optimizer_instance = self.optimizer(estimator=self.estimator, **self.args_to_optimizer) trained_optimizer1 = optimizer_instance.fit(X_train, y_train) results = trained_optimizer1.summary() results = results[results["status"] == STATUS_OK] # Consider only successful trials results = results.sort_values(by=["loss"], axis=0) k = min(self.k, results.shape[0]) top_k_pipelines = results.iloc[0:k] pipeline_tuples = [] for pipeline_name in top_k_pipelines.index: pipeline_instance = trained_optimizer1.get_pipeline(pipeline_name) pipeline_tuple = (pipeline_name, pipeline_instance) pipeline_tuples.append(pipeline_tuple) voting = VotingClassifier(estimators=pipeline_tuples) args_to_optimizer = copy.copy(self.args_to_optimizer) try: del args_to_optimizer["max_evals"] except KeyError: pass args_to_optimizer[ "max_evals"] = 1 # Currently, voting classifier has no useful hyperparameters to tune. optimizer_instance2 = self.optimizer(estimator=voting, **args_to_optimizer) trained_optimizer2 = optimizer_instance2.fit(X_train, y_train) self._best_estimator = trained_optimizer2.get_pipeline() return self
def test_voting_post_estimator_mitigation_ensemble(self): model = CalibratedEqOddsPostprocessing( **self.fairness_info, estimator=VotingClassifier(estimators=[ ("dtc", DecisionTreeClassifier()), ("lr", LogisticRegression()), ])) self._attempt_fit_predict(model)
def test_voting_in_estimator_mitigation_base(self): model = VotingClassifier( estimators=[ ("pr", PrejudiceRemover(**self.fairness_info)), ("lr", LogisticRegression()), ] ) self._attempt_fit_predict(model)
def test_voting_pre_estimator_mitigation_base(self): model = VotingClassifier(estimators=[ ( "dir+dtc", DisparateImpactRemover( **self.fairness_info) >> DecisionTreeClassifier(), ), ("lr", LogisticRegression()), ]) self._attempt_fit_predict(model)
def test_with_gridsearch(self): from sklearn.metrics import accuracy_score, make_scorer from lale.lib.lale import GridSearchCV from lale.lib.sklearn import VotingClassifier clf = VotingClassifier( estimators=[("knn", KNeighborsClassifier()), ("rc", RidgeClassifier())], voting="hard", ) _ = clf.auto_configure( self.X_train, self.y_train, GridSearchCV, lale_num_samples=1, lale_num_grids=1, cv=2, scoring=make_scorer(accuracy_score), )
def test_hyperparam_estimator_list(self): lr = LogisticRegression() linear_reg = LinearRegression() dtc = DecisionTreeClassifier() cls_list = [("lr", lr), ("linear_reg", linear_reg)] vc = VotingClassifier(estimators=cls_list) replaced_vc = vc.replace(linear_reg, dtc) new_cls_list = [("lr", lr), ("linear_reg", dtc)] expected_vc = VotingClassifier(estimators=new_cls_list) self.assertEqual(replaced_vc.to_json(), expected_vc.to_json()) sc = StackingClassifier(estimators=cls_list, final_estimator=vc) replaced_sc = sc.replace(linear_reg, dtc) new_cls_list = [("lr", lr), ("linear_reg", dtc)] expected_sc = StackingClassifier( estimators=new_cls_list, final_estimator=expected_vc ) self.assertEqual(replaced_sc.to_json(), expected_sc.to_json())
def test_higher_order_2(self): self.maxDiff = None from lale.json_operator import from_json from lale.lib.sklearn import PCA from lale.lib.sklearn import KNeighborsClassifier as KNN from lale.lib.sklearn import LogisticRegression as LR from lale.lib.sklearn import VotingClassifier as Vote operator = Vote( estimators=[("knn", KNN), ("pipeline", PCA() >> LR)], voting="soft" ) json_expected = { "class": Vote.class_name(), "state": "trainable", "operator": "VotingClassifier", "is_frozen_trainable": True, "label": "Vote", "documentation_url": "https://lale.readthedocs.io/en/latest/modules/lale.lib.sklearn.voting_classifier.html", "hyperparams": { "estimators": [ ("knn", {"$ref": "../steps/knn"}), ("pipeline", {"$ref": "../steps/pipeline"}), ], "voting": "soft", }, "steps": { "knn": { "class": KNN.class_name(), "state": "planned", "operator": "KNeighborsClassifier", "label": "KNN", "documentation_url": "https://lale.readthedocs.io/en/latest/modules/lale.lib.sklearn.k_neighbors_classifier.html", }, "pipeline": { "class": "lale.operators.PlannedPipeline", "state": "planned", "edges": [["pca", "lr"]], "steps": { "pca": { "class": PCA.class_name(), "state": "trainable", "operator": "PCA", "label": "PCA", "documentation_url": "https://lale.readthedocs.io/en/latest/modules/lale.lib.sklearn.pca.html", "hyperparams": {}, "is_frozen_trainable": False, }, "lr": { "class": LR.class_name(), "state": "planned", "operator": "LogisticRegression", "label": "LR", "documentation_url": "https://lale.readthedocs.io/en/latest/modules/lale.lib.sklearn.logistic_regression.html", }, }, }, }, } json = operator.to_json() self.assertEqual(json, json_expected) operator_2 = from_json(json) json_2 = operator_2.to_json() self.assertEqual(json, json_2)