Пример #1
0
 def test_export_to_sklearn_pipeline_with_noop_3(self):
     # This test is probably unnecessary, but doesn't harm at this point
     lale_pipeline = PCA(n_components=3) >> KNeighborsClassifier() >> NoOp()
     trained_lale_pipeline = lale_pipeline.fit(self.X_train, self.y_train)
     _ = trained_lale_pipeline.export_to_sklearn_pipeline()
Пример #2
0
 def test_export_to_sklearn_pipeline_with_noop_4(self):
     lale_pipeline = NoOp() >> KNeighborsClassifier()
     trained_lale_pipeline = lale_pipeline.fit(self.X_train, self.y_train)
     sklearn_pipeline = trained_lale_pipeline.export_to_sklearn_pipeline()
     self.assert_equal_predictions(sklearn_pipeline, trained_lale_pipeline)
Пример #3
0
    def test_classifier(self):
        X_train, y_train = self.X_train, self.y_train
        X_test, y_test = self.X_test, self.y_test
        import importlib
        module_name = ".".join(clf_name.split('.')[0:-1])
        class_name = clf_name.split('.')[-1]
        module = importlib.import_module(module_name)

        class_ = getattr(module, class_name)
        clf = class_()

        #test_schemas_are_schemas
        from lale.helpers import validate_is_schema
        validate_is_schema(clf.input_schema_fit())
        validate_is_schema(clf.input_schema_predict())
        validate_is_schema(clf.output_schema())
        validate_is_schema(clf.hyperparam_schema())

        #test_init_fit_predict
        trained = clf.fit(self.X_train, self.y_train)
        predictions = trained.predict(self.X_test)

        #test_with_hyperopt
        from lale.lib.lale import HyperoptClassifier
        hyperopt = HyperoptClassifier(model=clf, max_evals=1)
        trained = hyperopt.fit(self.X_train, self.y_train)
        predictions = trained.predict(self.X_test)

        #test_cross_validation
        from lale.helpers import cross_val_score
        cv_results = cross_val_score(clf, X_train, y_train, cv=2)
        self.assertEqual(len(cv_results), 2)

        #test_with_gridsearchcv_auto_wrapped
        from sklearn.metrics import accuracy_score, make_scorer
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            from lale.lib.sklearn.gradient_boosting_classifier import GradientBoostingClassifierImpl
            from lale.lib.sklearn.mlp_classifier import MLPClassifierImpl
            if isinstance(clf._impl, GradientBoostingClassifierImpl):
                #because exponential loss does not work with iris dataset as it is not binary classification
                import lale.schemas as schemas
                clf = clf.customize_schema(
                    loss=schemas.Enum(default='deviance', values=['deviance']))
            if not isinstance(clf._impl, MLPClassifierImpl):
                #mlp fails due to issue #164.
                grid_search = LaleGridSearchCV(
                    clf,
                    lale_num_samples=1,
                    lale_num_grids=1,
                    cv=2,
                    scoring=make_scorer(accuracy_score))
                grid_search.fit(X_train, y_train)

        #test_predict_on_trainable
        trained = clf.fit(X_train, y_train)
        clf.predict(X_train)

        #test_to_json
        clf.to_json()

        #test_in_a_pipeline
        pipeline = NoOp() >> clf
        trained = pipeline.fit(self.X_train, self.y_train)
        predictions = trained.predict(self.X_test)
Пример #4
0
 def test_no_partial_fit(self):
     pipeline = Batching(operator=NoOp() >> LogisticRegression())
     with self.assertRaises(AttributeError):
         trained = pipeline.fit(self.X_train, self.y_train)
Пример #5
0
    def test_classifier(self):
        X_train, y_train = self.X_train, self.y_train
        import importlib

        module_name = ".".join(clf_name.split(".")[0:-1])
        class_name = clf_name.split(".")[-1]
        module = importlib.import_module(module_name)

        class_ = getattr(module, class_name)
        clf = class_()

        # test_schemas_are_schemas
        lale.type_checking.validate_is_schema(clf.input_schema_fit())
        lale.type_checking.validate_is_schema(clf.input_schema_predict())
        lale.type_checking.validate_is_schema(clf.output_schema_predict())
        lale.type_checking.validate_is_schema(clf.hyperparam_schema())

        # test_init_fit_predict
        trained = clf.fit(self.X_train, self.y_train)
        _ = trained.predict(self.X_test)

        # test score
        _ = trained.score(self.X_test, self.y_test)

        from lale.lib.sklearn.gradient_boosting_classifier import (
            GradientBoostingClassifier, )

        if isinstance(clf, GradientBoostingClassifier):  # type: ignore
            # because exponential loss does not work with iris dataset as it is not binary classification
            import lale.schemas as schemas

            clf = clf.customize_schema(
                loss=schemas.Enum(default="deviance", values=["deviance"]))

        # test_with_hyperopt
        from lale.lib.lale import Hyperopt

        hyperopt = Hyperopt(estimator=clf, max_evals=1, verbose=True)
        trained = hyperopt.fit(self.X_train, self.y_train)
        _ = trained.predict(self.X_test)

        # test_cross_validation
        from lale.helpers import cross_val_score

        cv_results = cross_val_score(clf, X_train, y_train, cv=2)
        self.assertEqual(len(cv_results), 2)

        # test_with_gridsearchcv_auto_wrapped
        from sklearn.metrics import accuracy_score, make_scorer

        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            grid_search = lale.lib.lale.GridSearchCV(
                estimator=clf,
                lale_num_samples=1,
                lale_num_grids=1,
                cv=2,
                scoring=make_scorer(accuracy_score),
            )
            grid_search.fit(X_train, y_train)

        # test_predict_on_trainable
        trained = clf.fit(X_train, y_train)
        clf.predict(X_train)

        # test_to_json
        clf.to_json()

        # test_in_a_pipeline
        pipeline = NoOp() >> clf
        trained = pipeline.fit(self.X_train, self.y_train)
        _ = trained.predict(self.X_test)
Пример #6
0
 def test_planned_pipeline_3(self):
     plan = ((MinMaxScaler() & NoOp()) >> ConcatFeatures() >>
             (StandardScaler &
              (NoOp() | MinMaxScaler())) >> ConcatFeatures() >>
             (LogisticRegression | KNeighborsClassifier))
     run_hyperopt_on_planned_pipeline(plan)
Пример #7
0
 def test_remove_last5(self):
     pipeline = (
         StandardScaler() >>
         (PCA() & Nystroem() & PassiveAggressiveClassifier()) >>
         ConcatFeatures() >> NoOp() >> PassiveAggressiveClassifier())
     pipeline.remove_last(inplace=True).freeze_trainable()
Пример #8
0
 def test_transform_schema_NoOp(self):
     from lale.datasets.data_schemas import to_schema
     for ds in [self._irisArr, self._irisDf, self._digits, self._housing, self._creditG, self._movies, self._drugRev]:
         s_input = to_schema(ds['X'])
         s_output = NoOp.transform_schema(s_input)
         self.assertIs(s_input, s_output)
Пример #9
0
 def test_two_estimators_predict_proba1(self):
     pipeline = (StandardScaler() >> (PCA() & Nystroem() & GaussianNB()) >>
                 ConcatFeatures() >> NoOp() >> GaussianNB())
     pipeline.fit(self.X_train, self.y_train)
     pipeline.predict_proba(self.X_test)
Пример #10
0
 def test_two_estimators_predict_proba(self):
     pipeline = (StandardScaler() >>
                 (PCA() & Nystroem() & LogisticRegression()) >>
                 ConcatFeatures() >> NoOp() >> LogisticRegression())
     trained = pipeline.fit(self.X_train, self.y_train)
     trained.predict_proba(self.X_test)
Пример #11
0
    def test_nested(self):
        self.maxDiff = None
        from lale.json_operator import from_json, to_json
        from lale.lib.lale import NoOp
        from lale.lib.sklearn import PCA
        from lale.lib.sklearn import LogisticRegression as LR

        operator = PCA >> (LR(C=0.09) | NoOp >> LR(C=0.19))
        json_expected = {
            "class": "lale.operators.PlannedPipeline",
            "state": "planned",
            "edges": [["pca", "choice"]],
            "steps": {
                "pca": {
                    "class":
                    PCA.class_name(),
                    "state":
                    "planned",
                    "operator":
                    "PCA",
                    "label":
                    "PCA",
                    "documentation_url":
                    "https://lale.readthedocs.io/en/latest/modules/lale.lib.sklearn.pca.html",
                },
                "choice": {
                    "class": "lale.operators.OperatorChoice",
                    "state": "planned",
                    "operator": "OperatorChoice",
                    "steps": {
                        "lr_0": {
                            "class": LR.class_name(),
                            "state": "trainable",
                            "operator": "LogisticRegression",
                            "label": "LR",
                            "documentation_url":
                            "https://lale.readthedocs.io/en/latest/modules/lale.lib.sklearn.logistic_regression.html",
                            "hyperparams": {
                                "C": 0.09
                            },
                            "is_frozen_trainable": False,
                        },
                        "pipeline_1": {
                            "class": "lale.operators.TrainablePipeline",
                            "state": "trainable",
                            "edges": [["no_op", "lr_1"]],
                            "steps": {
                                "no_op": {
                                    "class": NoOp.class_name(),
                                    "state": "trained",
                                    "operator": "NoOp",
                                    "label": "NoOp",
                                    "documentation_url":
                                    "https://lale.readthedocs.io/en/latest/modules/lale.lib.lale.no_op.html",
                                    "hyperparams": None,
                                    "coefs": None,
                                    "is_frozen_trainable": True,
                                    "is_frozen_trained": True,
                                },
                                "lr_1": {
                                    "class": LR.class_name(),
                                    "state": "trainable",
                                    "operator": "LogisticRegression",
                                    "label": "LR",
                                    "documentation_url":
                                    "https://lale.readthedocs.io/en/latest/modules/lale.lib.sklearn.logistic_regression.html",
                                    "hyperparams": {
                                        "C": 0.19
                                    },
                                    "is_frozen_trainable": False,
                                },
                            },
                        },
                    },
                },
            },
        }
        json = to_json(operator)
        self.assertEqual(json, json_expected)
        operator_2 = from_json(json)
        json_2 = to_json(operator_2)
        self.assertEqual(json, json_2)
Пример #12
0
    def test_pipeline_1(self):
        self.maxDiff = None
        from lale.json_operator import from_json, to_json
        from lale.lib.lale import ConcatFeatures, NoOp
        from lale.lib.sklearn import PCA
        from lale.lib.sklearn import LogisticRegression as LR

        operator = (PCA & NoOp) >> ConcatFeatures >> LR
        json_expected = {
            "class":
            "lale.operators.PlannedPipeline",
            "state":
            "planned",
            "edges": [
                ["pca", "concat_features"],
                ["no_op", "concat_features"],
                ["concat_features", "lr"],
            ],
            "steps": {
                "pca": {
                    "class":
                    PCA.class_name(),
                    "state":
                    "planned",
                    "operator":
                    "PCA",
                    "label":
                    "PCA",
                    "documentation_url":
                    "https://lale.readthedocs.io/en/latest/modules/lale.lib.sklearn.pca.html",
                },
                "no_op": {
                    "class": NoOp.class_name(),
                    "state": "trained",
                    "operator": "NoOp",
                    "label": "NoOp",
                    "documentation_url":
                    "https://lale.readthedocs.io/en/latest/modules/lale.lib.lale.no_op.html",
                    "hyperparams": None,
                    "coefs": None,
                    "is_frozen_trainable": True,
                    "is_frozen_trained": True,
                },
                "concat_features": {
                    "class": ConcatFeatures.class_name(),
                    "state": "trained",
                    "operator": "ConcatFeatures",
                    "label": "ConcatFeatures",
                    "documentation_url":
                    "https://lale.readthedocs.io/en/latest/modules/lale.lib.lale.concat_features.html",
                    "hyperparams": None,
                    "coefs": None,
                    "is_frozen_trainable": True,
                    "is_frozen_trained": True,
                },
                "lr": {
                    "class":
                    LR.class_name(),
                    "state":
                    "planned",
                    "operator":
                    "LogisticRegression",
                    "label":
                    "LR",
                    "documentation_url":
                    "https://lale.readthedocs.io/en/latest/modules/lale.lib.sklearn.logistic_regression.html",
                },
            },
        }
        json = to_json(operator)
        self.assertEqual(json, json_expected)
        operator_2 = from_json(json)
        json_2 = to_json(operator_2)
        self.assertEqual(json, json_2)
Пример #13
0
 def test_two_estimators_predict_proba1(self):
     pipeline = StandardScaler() >> (
         PCA() & Nystroem() & PassiveAggressiveClassifier()
     ) >> ConcatFeatures() >> NoOp() >> PassiveAggressiveClassifier()
     pipeline.fit(self.X_train, self.y_train)
     pipeline.predict_proba(self.X_test)