Пример #1
0
    def test_using_pipeline(self):
        from sklearn.metrics import f1_score, make_scorer

        from lale.lib.lale import Hyperopt, OptimizeLast

        planned_pipeline = (PCA | NoOp) >> LogisticRegression

        # Let's first use Hyperopt to find the best pipeline
        opt = Hyperopt(estimator=planned_pipeline, max_evals=1)
        # run optimizer
        res = opt.fit(self.X_train, self.y_train)
        best_pipeline = res.get_pipeline()

        # Now let's use Hyperopt to optimize only the
        # last step (i.e., classifier) in the best pipeline
        hyperopt_args = {
            "scoring": make_scorer(f1_score, average="macro"),
            "cv": 3,
            "max_evals": 2,
        }
        opt_last = OptimizeLast(
            estimator=best_pipeline,
            last_optimizer=Hyperopt,
            optimizer_args=hyperopt_args,
        )

        res_last = opt_last.fit(self.X_train, self.y_train)
        predictions = res_last.predict(self.X_test)
        predictions_1 = opt_last.predict(self.X_test)
        best_pipeline2 = res_last.get_pipeline()

        self.assertEqual(type(best_pipeline), type(best_pipeline2))
        assert np.array_equal(predictions_1, predictions)
Пример #2
0
    def test_get_named_pipeline(self):
        pipeline = MinMaxScaler() >> KNeighborsClassifier()
        trained_pipeline = pipeline.fit(self.X_train, self.y_train)

        fpr_scorer = make_scorer(compute_fpr, greater_is_better=False)
        nsga2_args = {
            'scoring': ['accuracy', fpr_scorer],
            'best_score': [1, 0],
            'cv': 3,
            'max_evals': 20,
            'population_size': 10
        }
        opt_last = OptimizeLast(estimator=trained_pipeline,
                                last_optimizer=NSGA2,
                                optimizer_args=nsga2_args)

        res_last = opt_last.fit(self.X_train, self.y_train)

        df_summary = res_last.summary()
        pareto_pipeline = res_last.get_pipeline(pipeline_name='p0')
        self.assertEqual(type(trained_pipeline), type(pareto_pipeline))

        if (df_summary.shape[0] > 1):
            pareto_pipeline = res_last.get_pipeline(pipeline_name='p1')
            self.assertEqual(type(trained_pipeline), type(pareto_pipeline))
Пример #3
0
    def test_using_pipeline(self):
        import lale.datasets.openml
        import pandas as pd
        (X_train,
         y_train), (X_test,
                    y_test) = lale.datasets.openml.fetch('credit-g',
                                                         'classification',
                                                         preprocess=False)

        project_nums = Project(columns={'type': 'number'})
        project_cats = Project(columns={'type': 'string'})
        planned_pipeline = (
            (project_nums >>
             (Normalizer | NoOp) & project_cats >> OneHotEncoder) >>
            ConcatFeatures >> (LGBMClassifier | GradientBoostingClassifier))

        # Let's first use Hyperopt to find the best pipeline
        opt = Hyperopt(estimator=planned_pipeline, max_evals=3)
        # run optimizer
        res = opt.fit(X_train, y_train)
        best_pipeline = res.get_pipeline()

        # Now let's use NSGA2 to perform multi-objective
        # optimization on the last step (i.e., classifier)
        # in the best pipeline returned by Hyperopt
        fpr_scorer = make_scorer(compute_fpr, greater_is_better=False)
        nsga2_args = {
            'scoring': ['roc_auc', fpr_scorer],
            'best_score': [1, 0],
            'cv': 3,
            'max_evals': 20,
            'population_size': 10
        }
        opt_last = OptimizeLast(estimator=best_pipeline,
                                last_optimizer=NSGA2,
                                optimizer_args=nsga2_args)

        res_last = opt_last.fit(X_train, y_train)
        df_summary = res_last.summary()
        print(df_summary)
        self.assertTrue(df_summary.shape[0] > 0)

        # check if summary contains valid loss values
        valid_objs = True
        for i in range(df_summary.shape[0]):
            record = df_summary.iloc[i]
            valid_objs = valid_objs and \
                         all([0 <= record['loss1'], record['loss1'] <= 1,
                              0 <= record['loss2'], record['loss2'] <= 1])
        self.assertTrue(valid_objs, msg="Invalid loss values in summary")

        _ = res_last.predict(X_test)
        best_pipeline2 = res_last.get_pipeline()
        self.assertEqual(type(best_pipeline), type(best_pipeline2))

        auc_scorer = get_scorer('roc_auc')
        print(f'test_using_pipeline: \n'
              'AUC, FPR scorer values on test split - %.3f %.3f' %
              (auc_scorer(best_pipeline2, X_test,
                          y_test), fpr_scorer(best_pipeline2, X_test, y_test)))
Пример #4
0
    def test_unspecified_arguments(self):
        from lale.lib.lale import OptimizeLast
        from lale.operators import TrainedIndividualOp

        opt = OptimizeLast(optimizer_args={"max_evals": 1})  # No arguments
        res = opt.fit(self.X_train, self.y_train)
        predictions = res.predict(self.X_test)
        predictions_1 = opt.predict(self.X_test)
        best_pipeline = res.get_pipeline()

        assert np.array_equal(predictions_1, predictions)
        self.assertEqual(type(best_pipeline), TrainedIndividualOp)
Пример #5
0
    def test_get_named_pipeline(self):
        from lale.lib.lale import Hyperopt, OptimizeLast

        pipeline = MinMaxScaler() >> KNeighborsClassifier()
        trained_pipeline = pipeline.fit(self.X_train, self.y_train)

        hyperopt_args = {"cv": 3, "max_evals": 2}
        opt_last = OptimizeLast(
            estimator=trained_pipeline,
            last_optimizer=Hyperopt,
            optimizer_args=hyperopt_args,
        )

        res_last = opt_last.fit(self.X_train, self.y_train)
        pipeline2 = res_last.get_pipeline(pipeline_name="p1")
        if pipeline2 is not None:
            trained_pipeline2 = pipeline2.fit(self.X_train, self.y_train)
            _ = trained_pipeline2.predict(self.X_test)

            self.assertEqual(type(trained_pipeline), type(trained_pipeline2))
Пример #6
0
    def test_using_individual_operator(self):
        from lale.lib.lale import Hyperopt, OptimizeLast

        lr = LogisticRegression()  # Individual Operator
        trained_operator = lr.fit(self.X_train, self.y_train)

        # Now let's use Hyperopt to optimize the classifier
        hyperopt_args = {"scoring": "accuracy", "cv": 3, "max_evals": 2}
        opt_last = OptimizeLast(
            estimator=trained_operator,
            last_optimizer=Hyperopt,
            optimizer_args=hyperopt_args,
        )

        res_last = opt_last.fit(self.X_train, self.y_train)
        predictions = res_last.predict(self.X_test)
        predictions_1 = opt_last.predict(self.X_test)
        best_pipeline = res_last.get_pipeline()

        self.assertEqual(type(trained_operator), type(best_pipeline))
        assert np.array_equal(predictions_1, predictions)