Пример #1
0
    def test_bool_enum(self):
        from lale.lib.sklearn import SVR
        from lale.schemas import AnyOf, Bool, Null

        SVR = SVR.customize_schema(shrinking=AnyOf(
            types=[Bool(), Null()],
            default=None,
            desc="Whether to use the shrinking heuristic.",
        ))

        ranges, dists = SVR.get_param_ranges()
        expected_ranges = {
            "kernel": ["poly", "rbf", "sigmoid", "linear"],
            "degree": (2, 5, 3),
            "gamma": (3.0517578125e-05, 8, None),
            "tol": (0.0, 0.01, 0.001),
            "C": (0.03125, 32768, 1.0),
            "shrinking": [False, True, None],
        }

        self.maxDiff = None
        self.assertEqual(ranges, expected_ranges)
Пример #2
0
if sklearn.__version__ >= "0.22":
    # old: https://scikit-learn.org/0.20/modules/generated/sklearn.svm.SVC.html
    # new: https://scikit-learn.org/0.23/modules/generated/sklearn.svm.SVC.html
    from lale.schemas import AnyOf, Bool, Enum, Float

    SVC = SVC.customize_schema(
        gamma=AnyOf(
            types=[
                Enum(["scale", "auto"]),
                Float(
                    minimum=0.0,
                    exclusiveMinimum=True,
                    minimumForOptimizer=3.0517578125e-05,
                    maximumForOptimizer=8,
                    distribution="loguniform",
                ),
            ],
            desc="Kernel coefficient for 'rbf', 'poly' and 'sigmoid'.",
            default="scale",
        ),
        break_ties=Bool(
            desc="If true, decision_function_shape='ovr', and number of classes > 2, predict will break ties according to the confidence values of decision_function; otherwise the first class among the tied classes is returned.",
            default=False,
        ),
        set_as_available=True,
    )


lale.docstrings.set_docstrings(SVC)
Пример #3
0
        "post": []
    },
    "properties": {
        "hyperparams": _hyperparams_schema,
        "input_fit": _input_fit_schema,
        "input_transform": _input_transform_schema,
        "output_transform": _output_transform_schema,
    },
}

FunctionTransformer: lale.operators.IndividualOp
FunctionTransformer = lale.operators.make_operator(FunctionTransformerImpl,
                                                   _combined_schemas)

if sklearn.__version__ >= "0.22":
    # old: https://scikit-learn.org/0.20/modules/generated/sklearn.preprocessing.FunctionTransformer.html
    # new: https://scikit-learn.org/0.23/modules/generated/sklearn.preprocessing.FunctionTransformer.html
    from lale.schemas import Bool

    FunctionTransformer = FunctionTransformer.customize_schema(
        validate=Bool(
            desc=
            "Indicate that the input X array should be checked before calling ``func``.",
            default=False,
        ),
        pass_y=None,
    )

lale.docstrings.set_docstrings(FunctionTransformerImpl,
                               FunctionTransformer._schemas)
    },
}

GradientBoostingRegressor: lale.operators.PlannedIndividualOp
GradientBoostingRegressor = lale.operators.make_operator(
    sklearn.ensemble.GradientBoostingRegressor, _combined_schemas
)

if sklearn.__version__ >= "0.22":
    # old: https://scikit-learn.org/0.20/modules/generated/sklearn.ensemble.GradientBoostingRegressor.html
    # new: https://scikit-learn.org/0.22/modules/generated/sklearn.ensemble.GradientBoostingRegressor.html
    from lale.schemas import AnyOf, Bool, Enum, Float

    GradientBoostingRegressor = GradientBoostingRegressor.customize_schema(
        presort=AnyOf(
            types=[Bool(), Enum(["deprecated", "auto"])],
            desc="This parameter is deprecated and will be removed in v0.24.",
            default="deprecated",
        ),
        ccp_alpha=Float(
            desc="Complexity parameter used for Minimal Cost-Complexity Pruning. The subtree with the largest cost complexity that is smaller than ccp_alpha will be chosen. By default, no pruning is performed.",
            default=0.0,
            forOptimizer=False,
            min=0.0,
            maxForOptimizer=0.1,
        ),
    )

if sklearn.__version__ >= "0.24":
    # old: https://scikit-learn.org/0.22/modules/generated/sklearn.ensemble.GradientBoostingRegressor.html
    # new: https://scikit-learn.org/0.24/modules/generated/sklearn.ensemble.GradientBoostingRegressor.html
Пример #5
0
        "post": []
    },
    "properties": {
        "hyperparams": _hyperparams_schema,
        "input_fit": _input_fit_schema,
        "input_transform": _input_transform_schema,
        "output_transform": _output_transform_schema,
    },
}

RobustScaler: lale.operators.PlannedIndividualOp
RobustScaler = lale.operators.make_operator(sklearn.preprocessing.RobustScaler,
                                            _combined_schemas)

if sklearn.__version__ >= "0.24":
    # old: https://scikit-learn.org/0.20/modules/generated/sklearn.preprocessing.RobustScaler.html
    # new: https://scikit-learn.org/0.24/modules/generated/sklearn.preprocessing.RobustScaler.html
    from lale.schemas import Bool

    RobustScaler = RobustScaler.customize_schema(
        unit_variance=Bool(
            desc=
            "If True, scale data so that normally distributed features have a variance of 1. In general, if the difference between the x-values of q_max and q_min for a standard normal distribution is greater than 1, the dataset will be scaled down. If less than 1, the dataset will be scaled up.",
            default=False,
            forOptimizer=True,
        ),
        set_as_available=True,
    )

lale.docstrings.set_docstrings(RobustScaler)
Пример #6
0
}

RidgeClassifier = lale.operators.make_operator(
    sklearn.linear_model.RidgeClassifier, _combined_schemas)

if sklearn.__version__ >= "1.0":
    # old: https://scikit-learn.org/0.24/modules/generated/sklearn.linear_model.RidgeClassifier.html
    # new: https://scikit-learn.org/1.0/modules/generated/sklearn.linear_model.RidgeClassifier.html
    from lale.schemas import Bool

    RidgeClassifier = RidgeClassifier.customize_schema(
        relevantToOptimizer=[
            "alpha",
            "fit_intercept",
            "copy_X",
            "max_iter",
            "tol",
            "solver",
        ],
        normalize=Bool(
            desc="""This parameter is ignored when fit_intercept is set to False.
If True, the regressors X will be normalized before regression by subtracting the mean and dividing by the l2-norm.
If you wish to standardize, please use StandardScaler before calling fit on an estimator with normalize=False.""",
            default=False,
            forOptimizer=False,
        ),
        set_as_available=True,
    )

lale.docstrings.set_docstrings(RidgeClassifier)
Пример #7
0
        "hyperparams": _hyperparams_schema,
        "input_fit": _input_fit_schema,
        "input_predict": _input_predict_schema,
        "output_predict": _output_predict_schema,
        "input_predict_proba": _input_predict_proba_schema,
        "output_predict_proba": _output_predict_proba_schema,
        "input_transform": _input_transform_schema,
        "output_transform": _output_transform_schema,
    },
}


Pipeline = lale.operators.make_operator(_PipelineImpl, _combined_schemas)

if sklearn.__version__ >= "0.21":
    # old: https://scikit-learn.org/0.20/modules/generated/sklearn.pipeline.Pipeline.html
    # new: https://scikit-learn.org/0.21/modules/generated/sklearn.pipeline.Pipeline.html
    Pipeline = typing.cast(
        lale.operators.PlannedIndividualOp,
        Pipeline.customize_schema(
            verbose=Bool(
                desc="If True, the time elapsed while fitting each step will be printed as it is completed.",
                default=False,
            ),
            set_as_available=True,
        ),
    )


lale.docstrings.set_docstrings(Pipeline)
Пример #8
0
    "tags": {
        "pre": [],
        "op": ["transformer"],
        "post": []
    },
    "properties": {
        "hyperparams": _hyperparams_schema,
        "input_fit": _input_fit_schema,
        "input_transform": _input_transform_schema,
        "output_transform": _output_transform_schema,
    },
}

ColumnTransformer = lale.operators.make_operator(ColumnTransformerImpl,
                                                 _combined_schemas)

if sklearn.__version__ >= "0.21":
    # old: https://scikit-learn.org/0.20/modules/generated/sklearn.compose.ColumnTransformer.html
    # new: https://scikit-learn.org/0.21/modules/generated/sklearn.compose.ColumnTransformer.html
    ColumnTransformer = typing.cast(
        lale.operators.PlannedIndividualOp,
        ColumnTransformer.customize_schema(verbose=Bool(
            desc=
            "If True, the time elapsed while fitting each transformer will be printed as it is completed.",
            default=False,
        ), ),
    )

lale.docstrings.set_docstrings(ColumnTransformerImpl,
                               ColumnTransformer._schemas)
Пример #9
0
        'hyperparams': _hyperparams_schema,
        'input_fit': _input_fit_schema,
        'input_predict': _input_predict_schema,
        'output_predict': _output_predict_schema
    }
}

GradientBoostingRegressor: lale.operators.IndividualOp
GradientBoostingRegressor = lale.operators.make_operator(
    GradientBoostingRegressorImpl, _combined_schemas)

if sklearn.__version__ >= '0.22':
    # old: https://scikit-learn.org/0.20/modules/generated/sklearn.ensemble.GradientBoostingRegressor.html
    # new: https://scikit-learn.org/0.23/modules/generated/sklearn.ensemble.GradientBoostingRegressor.html
    from lale.schemas import AnyOf, Bool, Enum, Float
    GradientBoostingRegressor = GradientBoostingRegressor.customize_schema(
        presort=AnyOf(
            types=[Bool(), Enum(['deprecated', 'auto'])],
            desc='This parameter is deprecated and will be removed in v0.24.',
            default='deprecated'),
        ccp_alpha=Float(
            desc=
            'Complexity parameter used for Minimal Cost-Complexity Pruning. The subtree with the largest cost complexity that is smaller than ccp_alpha will be chosen. By default, no pruning is performed.',
            default=0.0,
            forOptimizer=True,
            min=0.0,
            maxForOptimizer=0.1))

lale.docstrings.set_docstrings(GradientBoostingRegressorImpl,
                               GradientBoostingRegressor._schemas)
Пример #10
0
        "op": ["estimator", "transformer"],
        "post": []
    },
    "properties": {
        "hyperparams": _hyperparams_schema,
        "input_fit": _input_fit_schema,
        "input_predict": _input_predict_schema,
        "output_predict": _output_predict_schema,
        "input_predict_proba": _input_predict_proba_schema,
        "output_predict_proba": _output_predict_proba_schema,
        "input_transform": _input_transform_schema,
        "output_transform": _output_transform_schema,
    },
}

Pipeline = lale.operators.make_operator(PipelineImpl, _combined_schemas)

if sklearn.__version__ >= "0.21":
    # old: https://scikit-learn.org/0.20/modules/generated/sklearn.pipeline.Pipeline.html
    # new: https://scikit-learn.org/0.21/modules/generated/sklearn.pipeline.Pipeline.html
    Pipeline = typing.cast(
        lale.operators.PlannedIndividualOp,
        Pipeline.customize_schema(verbose=Bool(
            desc=
            "If True, the time elapsed while fitting each step will be printed as it is completed.",
            default=False,
        ), ),
    )

lale.docstrings.set_docstrings(PipelineImpl, Pipeline._schemas)
Пример #11
0
    "type": "object",
    "tags": {"pre": [], "op": ["estimator", "classifier"], "post": []},
    "properties": {
        "hyperparams": _hyperparams_schema,
        "input_fit": _input_fit_schema,
        "input_predict": _input_predict_schema,
        "output_predict": _output_predict_schema,
        "input_predict_proba": _input_predict_proba_schema,
        "output_predict_proba": _output_predict_proba_schema,
    },
}


SnapRandomForestClassifier = lale.operators.make_operator(
    _SnapRandomForestClassifierImpl, _combined_schemas
)

if snapml_installed and snapml.__version__ > "1.7.8":  # type: ignore # noqa
    from lale.schemas import Bool

    SnapRandomForestClassifier = SnapRandomForestClassifier.customize_schema(
        compress_trees=Bool(
            desc="""Compress trees after training for fast inference.""",
            default=False,
            forOptimizer=False,
        ),
        set_as_available=True,
    )

lale.docstrings.set_docstrings(SnapRandomForestClassifier)
Пример #12
0
        "output_predict": _output_predict_schema,
    },
}

LinearRegression: lale.operators.PlannedIndividualOp
LinearRegression = lale.operators.make_operator(LinearRegressionImpl,
                                                _combined_schemas)

if sklearn.__version__ >= "0.24":
    # old: https://scikit-learn.org/0.20/modules/generated/sklearn.linear_model.LinearRegression.html
    # new: https://scikit-learn.org/0.24/modules/generated/sklearn.linear_model.LinearRegression.html
    from lale.schemas import JSON, Bool

    LinearRegression = LinearRegression.customize_schema(positive=Bool(
        desc="When set to True, forces the coefficients to be positive.",
        default=False,
        forOptimizer=False,
    ))
    LinearRegression = LinearRegression.customize_schema(constraint=JSON({
        "description":
        "Setting positive=True is only supported for dense arrays.",
        "anyOf": [
            {
                "type": "object",
                "properties": {
                    "positive": {
                        "enum": [False]
                    }
                }
            },
            {
Пример #13
0
    "tags": {
        "pre": ["~categoricals"],
        "op": ["transformer", "interpretable"],
        "post": [],
    },
    "properties": {
        "hyperparams": _hyperparams_schema,
        "input_fit": _input_schema_fit,
        "input_transform": _input_transform_schema,
        "output_transform": _output_transform_schema,
    },
}

MinMaxScaler: lale.operators.PlannedIndividualOp
MinMaxScaler = lale.operators.make_operator(MinMaxScalerImpl,
                                            _combined_schemas)

if sklearn.__version__ >= "0.24":
    # old: https://scikit-learn.org/0.22/modules/generated/sklearn.preprocessing.MinMaxScaler.html
    # new: https://scikit-learn.org/0.24/modules/generated/sklearn.preprocessing.MinMaxScaler.html
    from lale.schemas import Bool

    MinMaxScaler = MinMaxScaler.customize_schema(clip=Bool(
        desc=
        "Set to True to clip transformed values of held-out data to provided feature range.",
        default=False,
        forOptimizer=False,
    ), )

lale.docstrings.set_docstrings(MinMaxScalerImpl, MinMaxScaler._schemas)
Пример #14
0
        constraint=AnyOf(
            [Object(n_clusters=Null()),
             Object(distance_threshold=Null())],
            desc="n_clusters must be None if distance_threshold is not None.",
        ),
    )
    FeatureAgglomeration = FeatureAgglomeration.customize_schema(
        constraint=AnyOf(
            [
                Object(compute_full_tree=Enum(["True"])),
                Object(distance_threshold=Null()),
            ],
            desc=
            "compute_full_tree must be True if distance_threshold is not None.",
        ))

if sklearn.__version__ >= "0.24":
    # old: https://scikit-learn.org/0.21/modules/generated/sklearn.cluster.FeatureAgglomeration.html
    # new: https://scikit-learn.org/0.24/modules/generated/sklearn.cluster.FeatureAgglomeration.html
    from lale.schemas import Bool

    FeatureAgglomeration = FeatureAgglomeration.customize_schema(
        compute_distances=Bool(
            desc=
            "Computes distances between clusters even if distance_threshold is not used. This can be used to make dendrogram visualization, but introduces a computational and memory overhead.",
            default=False,
            forOptimizer=False,
        ), )

lale.docstrings.set_docstrings(FeatureAgglomeration)
        'hyperparams': _hyperparams_schema,
        'input_fit': _input_fit_schema,
        'input_predict': _input_predict_schema,
        'output_predict': _output_predict_schema
    }
}

GradientBoostingRegressor: lale.operators.IndividualOp
GradientBoostingRegressor = lale.operators.make_operator(
    GradientBoostingRegressorImpl, _combined_schemas)

if sklearn.__version__ >= '0.22':
    # old: https://scikit-learn.org/0.20/modules/generated/sklearn.ensemble.GradientBoostingRegressor.html
    # new: https://scikit-learn.org/0.23/modules/generated/sklearn.ensemble.GradientBoostingRegressor.html
    from lale.schemas import AnyOf, Bool, Enum, Float
    GradientBoostingRegressor = GradientBoostingRegressor.customize_schema(
        presort=AnyOf(
            types=[Bool(), Enum(['deprecated'])],
            desc='This parameter is deprecated and will be removed in v0.24.',
            default='deprecated'),
        ccp_alpha=Float(
            desc=
            'Complexity parameter used for Minimal Cost-Complexity Pruning. The subtree with the largest cost complexity that is smaller than ccp_alpha will be chosen. By default, no pruning is performed.',
            default=0.0,
            forOptimizer=True,
            min=0.0,
            maxForOptimizer=0.1))

lale.docstrings.set_docstrings(GradientBoostingRegressorImpl,
                               GradientBoostingRegressor._schemas)
Пример #16
0
    def predict_proba(self, X):
        return self._wrapped_model.predict_proba(X)


DecisionTreeClassifier: lale.operators.IndividualOp
DecisionTreeClassifier = lale.operators.make_operator(
    DecisionTreeClassifierImpl, _combined_schemas)

if sklearn.__version__ >= "0.22":
    # old: https://scikit-learn.org/0.20/modules/generated/sklearn.tree.DecisionTreeClassifier.html
    # new: https://scikit-learn.org/0.23/modules/generated/sklearn.tree.DecisionTreeClassifier.html
    from lale.schemas import AnyOf, Bool, Enum, Float

    DecisionTreeClassifier = DecisionTreeClassifier.customize_schema(
        presort=AnyOf(
            types=[Bool(), Enum(["deprecated"])],
            desc="This parameter is deprecated and will be removed in v0.24.",
            default="deprecated",
        ),
        ccp_alpha=Float(
            desc=
            "Complexity parameter used for Minimal Cost-Complexity Pruning. The subtree with the largest cost complexity that is smaller than ccp_alpha will be chosen. By default, no pruning is performed.",
            default=0.0,
            forOptimizer=True,
            min=0.0,
            maxForOptimizer=0.1,
        ),
    )

lale.docstrings.set_docstrings(DecisionTreeClassifierImpl,
                               DecisionTreeClassifier._schemas)
Пример #17
0
    'type': 'object',
    'tags': {
        'pre': [],
        'op': ['transformer'],
        'post': []
    },
    'properties': {
        'hyperparams': _hyperparams_schema,
        'input_fit': _input_fit_schema,
        'input_transform': _input_transform_schema,
        'output_transform': _output_transform_schema
    }
}

FunctionTransformer: lale.operators.IndividualOp
FunctionTransformer = lale.operators.make_operator(FunctionTransformerImpl,
                                                   _combined_schemas)

if sklearn.__version__ >= '0.22':
    # old: https://scikit-learn.org/0.20/modules/generated/sklearn.preprocessing.FunctionTransformer.html
    # new: https://scikit-learn.org/0.23/modules/generated/sklearn.preprocessing.FunctionTransformer.html
    from lale.schemas import Bool
    FunctionTransformer = FunctionTransformer.customize_schema(validate=Bool(
        desc=
        'Indicate that the input X array should be checked before calling ``func``.',
        default=False),
                                                               pass_y=None)

lale.docstrings.set_docstrings(FunctionTransformerImpl,
                               FunctionTransformer._schemas)