示例#1
0
    def test_fit1(self):
        import warnings
        warnings.filterwarnings(action="ignore")
        from lale.lib.sklearn import MinMaxScaler, MLPClassifier
        pipeline = Batching(
            operator=MinMaxScaler() >> MLPClassifier(random_state=42),
            batch_size=112)
        trained = pipeline.fit(self.X_train, self.y_train)
        predictions = trained.predict(self.X_test)
        lale_accuracy = accuracy_score(self.y_test, predictions)

        from sklearn.preprocessing import MinMaxScaler
        from sklearn.neural_network import MLPClassifier
        prep = MinMaxScaler()
        trained_prep = prep.partial_fit(self.X_train, self.y_train)
        X_transformed = trained_prep.transform(self.X_train)

        clf = MLPClassifier(random_state=42)
        import numpy as np
        trained_clf = clf.partial_fit(X_transformed,
                                      self.y_train,
                                      classes=np.unique(self.y_train))
        predictions = trained_clf.predict(trained_prep.transform(self.X_test))
        sklearn_accuracy = accuracy_score(self.y_test, predictions)

        self.assertEqual(lale_accuracy, sklearn_accuracy)
示例#2
0
    def test_get_named_pipeline(self):
        pipeline = MinMaxScaler() >> KNeighborsClassifier()
        trained_pipeline = pipeline.fit(self.X_train, self.y_train)

        fpr_scorer = make_scorer(compute_fpr, greater_is_better=False)
        nsga2_args = {
            'scoring': ['accuracy', fpr_scorer],
            'best_score': [1, 0],
            'cv': 3,
            'max_evals': 20,
            'population_size': 10
        }
        opt_last = OptimizeLast(estimator=trained_pipeline,
                                last_optimizer=NSGA2,
                                optimizer_args=nsga2_args)

        res_last = opt_last.fit(self.X_train, self.y_train)

        df_summary = res_last.summary()
        pareto_pipeline = res_last.get_pipeline(pipeline_name='p0')
        self.assertEqual(type(trained_pipeline), type(pareto_pipeline))

        if (df_summary.shape[0] > 1):
            pareto_pipeline = res_last.get_pipeline(pipeline_name='p1')
            self.assertEqual(type(trained_pipeline), type(pareto_pipeline))
示例#3
0
 def test_planned_pipeline_3(self) :
     plan = (
         ( MinMaxScaler() & NoOp() ) >> ConcatFeatures() >>
         ( StandardScaler & ( NoOp() | MinMaxScaler() ) ) >> ConcatFeatures() >>
         ( LogisticRegression | KNeighborsClassifier )
     )
     run_hyperopt_on_planned_pipeline(plan)
示例#4
0
 def test_pipeline_freeze_trained(self):
     from lale.lib.sklearn import MinMaxScaler, LogisticRegression
     trainable = MinMaxScaler() >> LogisticRegression()
     X = [[0.0], [1.0], [2.0]]
     y = [0.0, 0.0, 1.0]
     liquid = trainable.fit(X, y)
     frozen = liquid.freeze_trained()
     self.assertFalse(liquid.is_frozen_trained())
     self.assertTrue(frozen.is_frozen_trained())
示例#5
0
 def test_trained_pipeline_freeze_trainable(self):
     from lale.lib.sklearn import MinMaxScaler, LogisticRegression
     from lale.operators import TrainedPipeline
     trainable = MinMaxScaler() >> LogisticRegression()
     X = [[0.0], [1.0], [2.0]]
     y = [0.0, 0.0, 1.0]
     liquid = trainable.fit(X, y)
     self.assertIsInstance(liquid, TrainedPipeline)
     self.assertFalse(liquid.is_frozen_trainable())
     frozen = liquid.freeze_trainable()
     self.assertFalse(liquid.is_frozen_trainable())
     self.assertTrue(frozen.is_frozen_trainable())
     self.assertIsInstance(frozen, TrainedPipeline)
示例#6
0
    def test_fit2(self):
        import warnings

        warnings.filterwarnings(action="ignore")
        from lale.lib.sklearn import MinMaxScaler, MLPClassifier

        pipeline = Batching(operator=MinMaxScaler() >> MinMaxScaler(),
                            batch_size=112)
        trained = pipeline.fit(self.X_train, self.y_train)
        lale_transforms = trained.transform(self.X_test)

        from sklearn.preprocessing import MinMaxScaler

        prep = MinMaxScaler()
        trained_prep = prep.partial_fit(self.X_train, self.y_train)
        X_transformed = trained_prep.transform(self.X_train)

        clf = MinMaxScaler()
        import numpy as np

        trained_clf = clf.partial_fit(X_transformed, self.y_train)
        sklearn_transforms = trained_clf.transform(
            trained_prep.transform(self.X_test))

        for i in range(5):
            for j in range(2):
                self.assertAlmostEqual(lale_transforms[i, j],
                                       sklearn_transforms[i, j])
示例#7
0
 def test_fit3(self):
     from lale.lib.sklearn import MinMaxScaler, MLPClassifier, PCA
     pipeline = PCA() >> Batching(
         operator=MinMaxScaler() >> MLPClassifier(random_state=42),
         batch_size=10)
     trained = pipeline.fit(self.X_train, self.y_train)
     predictions = trained.predict(self.X_test)
示例#8
0
    def test_operator_choice(self):
        self.maxDiff = None
        from lale.json_operator import from_json, to_json
        from lale.lib.sklearn import PCA
        from lale.lib.sklearn import MinMaxScaler as Scl

        operator = PCA | Scl
        json_expected = {
            "class": "lale.operators.OperatorChoice",
            "operator": "OperatorChoice",
            "state": "planned",
            "steps": {
                "pca": {
                    "class": PCA.class_name(),
                    "state": "planned",
                    "operator": "PCA",
                    "label": "PCA",
                    "documentation_url": "https://lale.readthedocs.io/en/latest/modules/lale.lib.sklearn.pca.html",
                },
                "scl": {
                    "class": Scl.class_name(),
                    "state": "planned",
                    "operator": "MinMaxScaler",
                    "label": "Scl",
                    "documentation_url": "https://lale.readthedocs.io/en/latest/modules/lale.lib.sklearn.min_max_scaler.html",
                },
            },
        }
        json = to_json(operator)
        self.assertEqual(json, json_expected)
        operator_2 = from_json(json)
        json_2 = to_json(operator_2)
        self.assertEqual(json_2, json_expected)
示例#9
0
    def test_astype_sklearn(self):
        from lale.lib.lale import ConcatFeatures
        from lale.lib.sklearn import PCA, LogisticRegression, MinMaxScaler, Nystroem

        pca = PCA(copy=False)
        logistic_regression = LogisticRegression(solver="saga", C=0.9)
        pipeline = (
            MinMaxScaler()
            >> (pca & Nystroem())
            >> ConcatFeatures
            >> logistic_regression
        )
        expected = """from sklearn.preprocessing import MinMaxScaler
from sklearn.decomposition import PCA
from sklearn.kernel_approximation import Nystroem
from sklearn.pipeline import make_union
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import make_pipeline

pca = PCA(copy=False)
union = make_union(pca, Nystroem())
logistic_regression = LogisticRegression(solver="saga", C=0.9)
pipeline = make_pipeline(MinMaxScaler(), union, logistic_regression)"""
        printed = lale.pretty_print.to_string(pipeline, astype="sklearn")
        self._roundtrip(expected, printed)
示例#10
0
    def test_invalid_args(self):
        import jsonschema

        clf = LGBMClassifier()
        nsga2_args = {
            'estimator': clf,
            'cv': 3,
            'max_evals': 50,
            'population_size': 10
        }

        # No scorer specified
        with self.assertRaises(jsonschema.exceptions.ValidationError):
            _ = NSGA2(**nsga2_args)

        # Less scorers provided
        with self.assertRaises(AssertionError):
            _ = NSGA2(scoring=['accuracy'], **nsga2_args)

        # Specify LALE Pipeline as estimator. It should raise
        # AssertionError as MOO over pipelines is not supported
        pipeline = MinMaxScaler() >> KNeighborsClassifier()
        fpr_scorer = make_scorer(compute_fpr, greater_is_better=False)
        with self.assertRaises(AssertionError):
            _ = NSGA2(estimator=pipeline, scoring=['accuracy', fpr_scorer])
示例#11
0
    def test_get_named_pipeline(self):
        from lale.lib.lale import Hyperopt, OptimizeLast

        pipeline = MinMaxScaler() >> KNeighborsClassifier()
        trained_pipeline = pipeline.fit(self.X_train, self.y_train)

        hyperopt_args = {"cv": 3, "max_evals": 2}
        opt_last = OptimizeLast(
            estimator=trained_pipeline,
            last_optimizer=Hyperopt,
            optimizer_args=hyperopt_args,
        )

        res_last = opt_last.fit(self.X_train, self.y_train)
        pipeline2 = res_last.get_pipeline(pipeline_name="p1")
        if pipeline2 is not None:
            trained_pipeline2 = pipeline2.fit(self.X_train, self.y_train)
            _ = trained_pipeline2.predict(self.X_test)

            self.assertEqual(type(trained_pipeline), type(trained_pipeline2))
示例#12
0
    def test_batching_with_hyperopt(self):
        from lale.lib.sklearn import MinMaxScaler, SGDClassifier
        from lale.lib.lale import Hyperopt, Batching
        from sklearn.metrics import accuracy_score

        pipeline = Batching(operator=MinMaxScaler() >> SGDClassifier())
        trained = pipeline.auto_configure(self.X_train,
                                          self.y_train,
                                          optimizer=Hyperopt,
                                          max_evals=1)
        predictions = trained.predict(self.X_test)
示例#13
0
    def test_decision_function_2(self):
        def my_scorer(estimator, X, y=None):
            return 1

        from lale.lib.lale import Hyperopt
        from lale.lib.sklearn import MinMaxScaler

        hyperopt = Hyperopt(
            estimator=MinMaxScaler() >> IsolationForest(max_features=1.0,
                                                        max_samples=1.0),
            max_evals=5,
            verbose=True,
            scoring=my_scorer,
        )
        trained = hyperopt.fit(self.X_train)
        pipeline = trained.get_pipeline()
        assert pipeline is not None
        _ = pipeline.decision_function(self.X_test)
示例#14
0
 def test_pipeline_digits_scaler_j48(self):
     import sklearn.datasets
     import sklearn.utils
     digits = sklearn.datasets.load_digits()
     X_all, y_all = sklearn.utils.shuffle(digits.data,
                                          digits.target,
                                          random_state=42)
     holdout_size = 200
     X_train, y_train = X_all[holdout_size:], y_all[holdout_size:]
     X_test, y_test = X_all[:holdout_size], y_all[:holdout_size]
     from lale.lib.sklearn import MinMaxScaler
     import lale.helpers
     scaler = MinMaxScaler()
     j48 = J48()
     trainable_pipe = scaler >> j48
     print('before calling fit on pipeline')
     trained_pipe = trainable_pipe.fit(X_train, y_train)
     print('after calling fit on pipeline')
     lale.helpers.to_graphviz(trained_pipe)
     predicted = trained_pipe.predict(X_test)