Exemplo n.º 1
0
 def test_two_estimators_predict1(self):
     pipeline = (
         StandardScaler()
         >> (PCA() & Nystroem() & PassiveAggressiveClassifier())
         >> ConcatFeatures()
         >> NoOp()
         >> PassiveAggressiveClassifier()
     )
     trained = pipeline.fit(self.X_train, self.y_train)
     trained.predict(self.X_test)
Exemplo n.º 2
0
 def test_two_estimators_predict_proba(self):
     pipeline = (
         StandardScaler()
         >> (PCA() & Nystroem() & LogisticRegression())
         >> ConcatFeatures()
         >> NoOp()
         >> LogisticRegression()
     )
     trained = pipeline.fit(self.X_train, self.y_train)
     trained.predict_proba(self.X_test)
Exemplo n.º 3
0
 def test_pipeline_freeze_trainable(self):
     from lale.lib.sklearn import PCA, LogisticRegression
     liquid = PCA() >> LogisticRegression()
     self.assertFalse(liquid.is_frozen_trainable())
     liquid_grid = get_grid_search_parameter_grids(liquid)
     self.assertTrue(len(liquid_grid) > 1, f'grid size {len(liquid_grid)}')
     frozen = liquid.freeze_trainable()
     self.assertTrue(frozen.is_frozen_trainable())
     frozen_grid = get_grid_search_parameter_grids(frozen)
     self.assertEqual(len(frozen_grid), 1)
Exemplo n.º 4
0
 def test_clone_with_scikit2(self):
     lr = LogisticRegression()
     from sklearn.model_selection import cross_val_score
     from sklearn.metrics import accuracy_score, make_scorer
     from sklearn.datasets import load_iris
     pca = PCA()
     trainable = pca >> lr
     from sklearn.base import clone
     iris = load_iris()
     X, y = iris.data, iris.target
     trainable2 = clone(trainable)
     with warnings.catch_warnings():
         warnings.simplefilter("ignore")
         result = cross_val_score(trainable,
                                  X,
                                  y,
                                  scoring=make_scorer(accuracy_score),
                                  cv=2)
         result2 = cross_val_score(trainable2,
                                   X,
                                   y,
                                   scoring=make_scorer(accuracy_score),
                                   cv=2)
     for i in range(len(result)):
         self.assertEqual(result[i], result2[i])
     # Testing clone with nested linear pipelines
     trainable = PCA() >> trainable
     trainable2 = clone(trainable)
     with warnings.catch_warnings():
         warnings.simplefilter("ignore")
         result = cross_val_score(trainable,
                                  X,
                                  y,
                                  scoring=make_scorer(accuracy_score),
                                  cv=2)
         result2 = cross_val_score(trainable2,
                                   X,
                                   y,
                                   scoring=make_scorer(accuracy_score),
                                   cv=2)
     for i in range(len(result)):
         self.assertEqual(result[i], result2[i])
Exemplo n.º 5
0
    def test_pipeline_choice_with_hyperopt(self):
        from lale.lib.lale import Hyperopt
        from lale.lib.sklearn import BaggingClassifier

        clf = BaggingClassifier(
            base_estimator=PCA() >> (LogisticRegression()
                                     | KNeighborsClassifier()))
        _ = clf.auto_configure(self.X_train,
                               self.y_train,
                               Hyperopt,
                               max_evals=1)
Exemplo n.º 6
0
    def test_with_lale_pipeline(self):
        from lale.lib.sklearn import VotingClassifier

        clf = VotingClassifier(
            estimators=[
                ("knn", KNeighborsClassifier()),
                ("pca_lr", PCA() >> LogisticRegression()),
            ]
        )
        trained = clf.fit(self.X_train, self.y_train)
        trained.predict(self.X_test)
Exemplo n.º 7
0
    def test_higher_order_1(self):
        from lale.json_operator import from_json
        from lale.lib.lale import Both
        from lale.lib.sklearn import PCA, Nystroem

        operator = Both(op1=PCA(n_components=2), op2=Nystroem)
        json_expected = {
            "class": Both.class_name(),
            "state": "trainable",
            "operator": "Both",
            "label": "Both",
            "documentation_url":
            "https://lale.readthedocs.io/en/latest/modules/lale.lib.lale.both.html",
            "hyperparams": {
                "op1": {
                    "$ref": "../steps/pca"
                },
                "op2": {
                    "$ref": "../steps/nystroem"
                },
            },
            "steps": {
                "pca": {
                    "class": PCA.class_name(),
                    "state": "trainable",
                    "operator": "PCA",
                    "label": "PCA",
                    "documentation_url":
                    "https://lale.readthedocs.io/en/latest/modules/lale.lib.sklearn.pca.html",
                    "hyperparams": {
                        "n_components": 2
                    },
                    "is_frozen_trainable": False,
                },
                "nystroem": {
                    "class":
                    Nystroem.class_name(),
                    "state":
                    "planned",
                    "operator":
                    "Nystroem",
                    "label":
                    "Nystroem",
                    "documentation_url":
                    "https://lale.readthedocs.io/en/latest/modules/lale.lib.sklearn.nystroem.html",
                },
            },
            "is_frozen_trainable": False,
        }
        json = operator.to_json()
        self.assertEqual(json, json_expected)
        operator_2 = from_json(json)
        json_2 = operator_2.to_json()
        self.assertEqual(json, json_2)
Exemplo n.º 8
0
 def test_remove_last4(self):
     pipeline = (
         StandardScaler()
         >> (PCA() & Nystroem() & PassiveAggressiveClassifier())
         >> ConcatFeatures()
         >> NoOp()
         >> PassiveAggressiveClassifier()
     )
     new_pipeline = pipeline.remove_last(inplace=True)
     self.assertEqual(len(new_pipeline._steps), 6)
     self.assertEqual(len(pipeline._steps), 6)
Exemplo n.º 9
0
 def test_make_choice_with_instance(self):
     from lale.operators import make_union, make_choice, make_pipeline
     from sklearn.datasets import load_iris
     iris = load_iris()
     X, y = iris.data, iris.target
     tfm = PCA() | Nystroem() | NoOp()
     with self.assertRaises(AttributeError):
         trained = tfm.fit(X, y)
     planned_pipeline1 = (OneHotEncoder | NoOp) >> tfm >> (LogisticRegression | KNeighborsClassifier)
     planned_pipeline2 = (OneHotEncoder | NoOp) >> (PCA | Nystroem) >> (LogisticRegression | KNeighborsClassifier)
     planned_pipeline3 = make_choice(OneHotEncoder, NoOp) >> make_choice(PCA, Nystroem) >> make_choice(LogisticRegression, KNeighborsClassifier)
Exemplo n.º 10
0
    def test_with_voting_classifier2(self):
        lr = LogisticRegression()
        pca = PCA()
        trainable = pca >> lr

        from sklearn.ensemble import VotingClassifier
        vclf = VotingClassifier(estimators=[('lr', lr), ('pipe', trainable)])
        from sklearn.datasets import load_iris
        iris = load_iris()
        X, y = iris.data, iris.target
        vclf.fit(X, y)
Exemplo n.º 11
0
 def test_export_to_sklearn_pipeline(self):
     lale_pipeline = PCA(n_components=3) >> KNeighborsClassifier()
     trained_lale_pipeline = lale_pipeline.fit(self.X_train, self.y_train)
     sklearn_pipeline = trained_lale_pipeline.export_to_sklearn_pipeline()
     for i, pipeline_step in enumerate(sklearn_pipeline.named_steps):
         sklearn_step_params = sklearn_pipeline.named_steps[
             pipeline_step].get_params()
         lale_sklearn_params = self.get_sklearn_params(
             trained_lale_pipeline.steps()[i])
         self.assertEqual(sklearn_step_params, lale_sklearn_params)
     self.assert_equal_predictions(sklearn_pipeline, trained_lale_pipeline)
Exemplo n.º 12
0
    def test_fit_args(self):
        from lale.lib.lale import TopKVotingClassifier
        from lale.lib.sklearn import Nystroem

        ensemble = TopKVotingClassifier(
            estimator=(PCA() | Nystroem())
            >> (LogisticRegression() | KNeighborsClassifier()),
            k=2,
        )
        trained = ensemble.fit(self.X_train, self.y_train)
        trained.predict(self.X_test)
Exemplo n.º 13
0
    def test_no_max_schema(self):
        pca = PCA().customize_schema(n_components=schemas.Float(min=0.0))
        plan = (
            (pca & (MinMaxScaler | Normalizer))
            >> ConcatFeatures()
            >> (MinMaxScaler | Normalizer)
            >> (LogisticRegression | KNeighborsClassifier)
        )
        from lale.search.search_space import SearchSpaceError

        with self.assertRaises(SearchSpaceError):
            run_hyperopt_on_planned_pipeline(plan)
Exemplo n.º 14
0
    def test_fit_args(self):
        from sklearn.datasets import load_iris
        from lale.lib.lale import TopKVotingClassifier
        from lale.lib.sklearn import Nystroem
        from sklearn.metrics import accuracy_score

        ensemble = TopKVotingClassifier(
            estimator=(PCA() | Nystroem()) >>
            (LogisticRegression() | KNeighborsClassifier()),
            k=2)
        trained = ensemble.fit(self.X_train, self.y_train)
        trained.predict(self.X_test)
Exemplo n.º 15
0
 def test_multiple_estimators_predict_predict_proba(self):
     pipeline = (
         StandardScaler()
         >> (LogisticRegression() & PCA())
         >> ConcatFeatures()
         >> (NoOp() & LinearSVC())
         >> ConcatFeatures()
         >> KNeighborsClassifier()
     )
     pipeline.fit(self.X_train, self.y_train)
     _ = pipeline.predict_proba(self.X_test)
     _ = pipeline.predict(self.X_test)
Exemplo n.º 16
0
    def test_with_gridsearchcv_auto_wrapped_pipe2(self):
        from sklearn.datasets import load_iris
        from sklearn.metrics import accuracy_score, make_scorer

        lr = LogisticRegression()
        pca1 = PCA()
        pca1._name = "PCA1"
        pca2 = PCA()
        pca2._name = "PCA2"
        trainable = (pca1 | pca2) >> lr

        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            from lale.lib.lale import GridSearchCV
            clf = GridSearchCV(estimator=trainable,
                               lale_num_samples=1,
                               lale_num_grids=1,
                               cv=2,
                               scoring=make_scorer(accuracy_score))
            iris = load_iris()
            clf.fit(iris.data, iris.target)
Exemplo n.º 17
0
 def test_pipeline_create_trainable(self):
     pipeline = lale.lib.sklearn.Pipeline(
         steps=[("pca1", PCA()), ("lr1", LogisticRegression())])
     self.assertIsInstance(pipeline, lale.operators.TrainableIndividualOp)
     trained = pipeline.fit(self.X_train, self.y_train)
     pca_trained, lr_trained = [
         op for _, op in trained.hyperparams()["steps"]
     ]
     self.assertIsInstance(pca_trained, lale.operators.TrainedIndividualOp)
     self.assertIsInstance(lr_trained, lale.operators.TrainedIndividualOp)
     predictions = trained.predict(self.X_test)
     accuracy_score(self.y_test, predictions)
Exemplo n.º 18
0
    def test_empty_schema(self):
        pca = PCA().customize_schema(whiten=schemas.Schema())
        plan = (
            (pca & (MinMaxScaler | Normalizer))
            >> ConcatFeatures()
            >> (MinMaxScaler | Normalizer)
            >> (LogisticRegression | KNeighborsClassifier)
        )
        from lale.search.schema2search_space import OperatorSchemaError

        with self.assertRaises(OperatorSchemaError):
            run_hyperopt_on_planned_pipeline(plan)
Exemplo n.º 19
0
 def test_higher_order_1(self):
     from lale.lib.lale import Both
     from lale.lib.sklearn import PCA, Nystroem
     from lale.json_operator import from_json
     operator = Both(op1=PCA(n_components=2), op2=Nystroem)
     json_expected = {
         'class': 'lale.lib.lale.both.BothImpl',
         'state': 'trainable',
         'operator': 'Both',
         'label': 'Both',
         'documentation_url':
         'https://lale.readthedocs.io/en/latest/modules/lale.lib.lale.both.html',
         'hyperparams': {
             'op1': {
                 '$ref': '../steps/pca'
             },
             'op2': {
                 '$ref': '../steps/nystroem'
             }
         },
         'steps': {
             'pca': {
                 'class': 'lale.lib.sklearn.pca.PCAImpl',
                 'state': 'trainable',
                 'operator': 'PCA',
                 'label': 'PCA',
                 'documentation_url':
                 'https://lale.readthedocs.io/en/latest/modules/lale.lib.sklearn.pca.html',
                 'hyperparams': {
                     'n_components': 2
                 },
                 'is_frozen_trainable': False
             },
             'nystroem': {
                 'class':
                 'lale.lib.sklearn.nystroem.NystroemImpl',
                 'state':
                 'planned',
                 'operator':
                 'Nystroem',
                 'label':
                 'Nystroem',
                 'documentation_url':
                 'https://lale.readthedocs.io/en/latest/modules/lale.lib.sklearn.nystroem.html'
             }
         },
         'is_frozen_trainable': False
     }
     json = operator.to_json()
     self.assertEqual(json, json_expected)
     operator_2 = from_json(json)
     json_2 = operator_2.to_json()
     self.assertEqual(json, json_2)
Exemplo n.º 20
0
    def test_pipeline_clone(self):
        from sklearn.base import clone
        from lale.operators import Pipeline
        pipeline = Pipeline(([('pca1', PCA()), ('lr1', LogisticRegression())]))
        trained = pipeline.fit(self.X_train, self.y_train)
        predictions = trained.predict(self.X_test)
        orig_acc = accuracy_score(self.y_test, predictions)

        cloned_pipeline = clone(pipeline)
        trained = cloned_pipeline.fit(self.X_train, self.y_train)
        predictions = trained.predict(self.X_test)
        cloned_acc = accuracy_score(self.y_test, predictions)
        self.assertEqual(orig_acc, cloned_acc)
Exemplo n.º 21
0
    def test_pipeline_create_trained(self):
        import lale.lib.sklearn
        import lale.operators

        orig_trainable = PCA() >> LogisticRegression()
        orig_trained = orig_trainable.fit(self.X_train, self.y_train)
        self.assertIsInstance(orig_trained, lale.operators.TrainedPipeline)
        pca_trained, lr_trained = orig_trained.steps()
        pre_trained = lale.lib.sklearn.Pipeline(
            steps=[("pca1", pca_trained), ("lr1", lr_trained)])
        self.assertIsInstance(pre_trained, lale.operators.TrainedIndividualOp)
        predictions = pre_trained.predict(self.X_test)
        accuracy_score(self.y_test, predictions)
Exemplo n.º 22
0
    def test_with_pandas(self):
        from lale.datasets import load_iris_df
        import warnings
        warnings.filterwarnings("ignore")
        pca = PCA(n_components=3)
        nys = Nystroem(n_components=10)
        concat = ConcatFeatures()
        lr = LogisticRegression(random_state=42, C=0.1)
        trainable = (pca & nys) >> concat >> lr

        (X_train, y_train), (X_test, y_test) = load_iris_df()
        trained = trainable.fit(X_train, y_train)
        predicted = trained.predict(X_test)
Exemplo n.º 23
0
    def test_concat_with_hyperopt(self):
        from lale.lib.lale import Hyperopt
        pca = PCA(n_components=3)
        nys = Nystroem(n_components=10)
        concat = ConcatFeatures()
        lr = LogisticRegression(random_state=42, C=0.1)

        trainable = (pca & nys) >> concat >> lr
        clf = Hyperopt(estimator=trainable, max_evals=2)
        from sklearn.datasets import load_iris
        iris_data = load_iris()
        clf.fit(iris_data.data, iris_data.target)
        clf.predict(iris_data.data)
Exemplo n.º 24
0
    def test_string_labels(self):
        from lale.lib.imblearn import CondensedNearestNeighbour

        print(type(CondensedNearestNeighbour))
        from lale.operators import make_pipeline

        y_train = ["low" if label == 0 else "high" for label in self.y_train]
        pipeline = CondensedNearestNeighbour(
            operator=make_pipeline(PCA(), LogisticRegression()),
            sampling_strategy=["high"],
        )
        trained = pipeline.fit(self.X_train, y_train)
        _ = trained.predict(self.X_test)
Exemplo n.º 25
0
    def test_fit_smaller_trials(self):
        from lale.lib.lale import TopKVotingClassifier
        from lale.lib.sklearn import Nystroem

        ensemble = TopKVotingClassifier(
            estimator=(PCA() | Nystroem())
            >> (LogisticRegression() | KNeighborsClassifier()),
            args_to_optimizer={"max_evals": 3},
            k=20,
        )
        trained = ensemble.fit(self.X_train, self.y_train)
        final_ensemble = trained._impl._best_estimator
        self.assertLessEqual(len(final_ensemble._impl._wrapped_model.estimators), 3)
Exemplo n.º 26
0
    def test_feature_preprocessor(self):
        X_train, y_train = self.X_train, self.y_train
        import importlib

        module_name = ".".join(fproc_name.split(".")[0:-1])
        class_name = fproc_name.split(".")[-1]
        module = importlib.import_module(module_name)

        class_ = getattr(module, class_name)
        fproc = class_()

        from lale.lib.sklearn.one_hot_encoder import OneHotEncoder

        if isinstance(fproc, OneHotEncoder):  # type: ignore
            # fproc = OneHotEncoder(handle_unknown = 'ignore')
            # remove the hack when this is fixed
            fproc = PCA()
        # test_schemas_are_schemas
        lale.type_checking.validate_is_schema(fproc.input_schema_fit())
        lale.type_checking.validate_is_schema(fproc.input_schema_transform())
        lale.type_checking.validate_is_schema(fproc.output_schema_transform())
        lale.type_checking.validate_is_schema(fproc.hyperparam_schema())

        # test_init_fit_transform
        trained = fproc.fit(self.X_train, self.y_train)
        _ = trained.transform(self.X_test)

        # test_predict_on_trainable
        trained = fproc.fit(X_train, y_train)
        fproc.transform(X_train)

        # test_to_json
        fproc.to_json()

        # test_in_a_pipeline
        # This test assumes that the output of feature processing is compatible with LogisticRegression
        from lale.lib.sklearn import LogisticRegression

        pipeline = fproc >> LogisticRegression()
        trained = pipeline.fit(self.X_train, self.y_train)
        _ = trained.predict(self.X_test)

        # Tune the pipeline with LR using Hyperopt
        from lale.lib.lale import Hyperopt

        hyperopt = Hyperopt(estimator=pipeline,
                            max_evals=1,
                            verbose=True,
                            cv=3)
        trained = hyperopt.fit(self.X_train, self.y_train)
        _ = trained.predict(self.X_test)
Exemplo n.º 27
0
 def test_import_from_sklearn_pipeline_nested_pipeline1(self):
     from sklearn.pipeline import FeatureUnion, make_pipeline
     from sklearn.decomposition import PCA
     from sklearn.kernel_approximation import Nystroem
     from sklearn.feature_selection import SelectKBest
     from sklearn.neighbors import KNeighborsClassifier
     from sklearn.pipeline import make_pipeline
     union = FeatureUnion([
         ("selectkbest_pca",
          make_pipeline(
              SelectKBest(k=3),
              FeatureUnion([('pca', PCA(n_components=1)),
                            ('nested_pipeline',
                             make_pipeline(SelectKBest(k=2),
                                           Nystroem()))]))),
         ("nys", Nystroem(n_components=2, random_state=42))
     ])
     sklearn_pipeline = make_pipeline(union, KNeighborsClassifier())
     lale_pipeline = import_from_sklearn_pipeline(sklearn_pipeline)
     print(lale_pipeline.to_json())
     self.assertEqual(len(lale_pipeline.edges()), 8)
     #These assertions assume topological sort, which may not be unique. So the assertions are brittle.
     from lale.lib.sklearn.pca import PCAImpl
     from lale.lib.sklearn.nystroem import NystroemImpl
     from lale.lib.lale.concat_features import ConcatFeaturesImpl
     from lale.lib.sklearn.k_neighbors_classifier import KNeighborsClassifierImpl
     self.assertIsInstance(lale_pipeline.edges()[0][0]._impl, SelectKBest)
     self.assertIsInstance(lale_pipeline.edges()[0][1]._impl, PCAImpl)
     self.assertIsInstance(lale_pipeline.edges()[1][0]._impl, SelectKBest)
     self.assertIsInstance(lale_pipeline.edges()[1][1]._impl, SelectKBest)
     self.assertIsInstance(lale_pipeline.edges()[2][0]._impl, SelectKBest)
     self.assertIsInstance(lale_pipeline.edges()[2][1]._impl, NystroemImpl)
     self.assertIsInstance(lale_pipeline.edges()[3][0]._impl, PCAImpl)
     self.assertIsInstance(lale_pipeline.edges()[3][1]._impl,
                           ConcatFeaturesImpl)
     self.assertIsInstance(lale_pipeline.edges()[4][0]._impl, NystroemImpl)
     self.assertIsInstance(lale_pipeline.edges()[4][1]._impl,
                           ConcatFeaturesImpl)
     self.assertIsInstance(lale_pipeline.edges()[5][0]._impl,
                           ConcatFeaturesImpl)
     self.assertIsInstance(lale_pipeline.edges()[5][1]._impl,
                           ConcatFeaturesImpl)
     self.assertIsInstance(lale_pipeline.edges()[6][0]._impl, NystroemImpl)
     self.assertIsInstance(lale_pipeline.edges()[6][1]._impl,
                           ConcatFeaturesImpl)
     self.assertIsInstance(lale_pipeline.edges()[7][0]._impl,
                           ConcatFeaturesImpl)
     self.assertIsInstance(lale_pipeline.edges()[7][1]._impl,
                           KNeighborsClassifierImpl)
     self.assert_equal_predictions(sklearn_pipeline, lale_pipeline)
Exemplo n.º 28
0
 def test_compose4(self):
     from lale.operators import make_choice
     digits = sklearn.datasets.load_digits()
     ohe = OneHotEncoder(handle_unknown=OneHotEncoder.handle_unknown.ignore)
     ohe.get_params()
     no_op = NoOp()
     pca = PCA()
     nys = Nystroem()
     lr = LogisticRegression()
     knn = KNeighborsClassifier()
     step1 = ohe | no_op
     step2 = pca | nys
     step3 = lr | knn
     model_plan = step1 >> step2 >> step3
Exemplo n.º 29
0
 def test_import_from_sklearn_pipeline1(self):
     from sklearn.decomposition import PCA
     from sklearn.neighbors import KNeighborsClassifier
     from sklearn.pipeline import make_pipeline
     sklearn_pipeline = make_pipeline(PCA(n_components=3),
                                      KNeighborsClassifier())
     lale_pipeline = import_from_sklearn_pipeline(sklearn_pipeline)
     for i, pipeline_step in enumerate(sklearn_pipeline.named_steps):
         sklearn_step_params = sklearn_pipeline.named_steps[
             pipeline_step].get_params()
         lale_sklearn_params = lale_pipeline.steps(
         )[i]._impl._wrapped_model.get_params()
         self.assertEqual(sklearn_step_params, lale_sklearn_params)
     self.assert_equal_predictions(sklearn_pipeline, lale_pipeline)
Exemplo n.º 30
0
 def test_export_to_sklearn_pipeline(self):
     from lale.lib.sklearn import PCA
     from lale.lib.sklearn import KNeighborsClassifier
     from sklearn.pipeline import make_pipeline
     lale_pipeline = PCA(n_components=3) >> KNeighborsClassifier()
     trained_lale_pipeline = lale_pipeline.fit(self.X_train, self.y_train)
     sklearn_pipeline = trained_lale_pipeline.export_to_sklearn_pipeline()
     for i, pipeline_step in enumerate(sklearn_pipeline.named_steps):
         sklearn_step_params = sklearn_pipeline.named_steps[
             pipeline_step].get_params()
         lale_sklearn_params = trained_lale_pipeline.steps(
         )[i]._impl._wrapped_model.get_params()
         self.assertEqual(sklearn_step_params, lale_sklearn_params)
     self.assert_equal_predictions(sklearn_pipeline, trained_lale_pipeline)