Пример #1
0
def test_gbrt_gaussian():
    # estimate quantiles of the normal distribution
    rng = np.random.RandomState(1)
    N = 10000
    X = np.ones((N, 1))
    y = rng.normal(size=N)

    rgr = GradientBoostingQuantileRegressor()
    rgr.fit(X, y)

    estimates = rgr.predict(X)
    assert_almost_equal(stats.norm.ppf(rgr.quantiles),
                        np.mean(estimates, axis=0),
                        decimal=2)
Пример #2
0
def test_gbrt_gaussian():
    # estimate quantiles of the normal distribution
    rng = np.random.RandomState(1)
    N = 10000
    X = np.ones((N, 1))
    y = rng.normal(size=N)

    rgr = GradientBoostingQuantileRegressor()
    rgr.fit(X, y)

    estimates = rgr.predict(X)
    assert_almost_equal(stats.norm.ppf(rgr.quantiles),
                        np.mean(estimates, axis=0),
                        decimal=2)
Пример #3
0
def test_gbrt_with_std():
    # simple test of the interface
    rng = np.random.RandomState(1)
    X = rng.uniform(0, 5, 500)[:, np.newaxis]

    noise_level = 0.5
    y = truth(X) + rng.normal(0, noise_level, len(X))
    X_ = np.linspace(0, 5, 1000)[:, np.newaxis]

    model = GradientBoostingQuantileRegressor()
    model.fit(X, y)

    # three quantiles, so three numbers per sample
    assert_array_equal(
        model.predict(X_, return_quantiles=True).shape, (len(X_), 3))
    # "traditional" API which returns one number per sample, in this case
    # just the median/mean
    assert_array_equal(model.predict(X_).shape, (len(X_)))

    l, c, h = model.predict(X_, return_quantiles=True).T
    assert_equal(l.shape, c.shape)
    assert_equal(c.shape, h.shape)
    assert_equal(l.shape[0], X_.shape[0])

    mean, std = model.predict(X_, return_std=True)
    assert_array_equal(mean, c)
    assert_array_equal(std, (h - l) / 2.0)
Пример #4
0
    def __init__(
        self,
        problem,
        num_workers,
        surrogate_model="RF",
        acq_func="gp_hedge",
        acq_kappa=1.96,
        acq_xi=None,
        liar_strategy="cl_max",
        n_jobs=1,
        **kwargs,
    ):

        assert surrogate_model in [
            "RF",
            "ET",
            "GBRT",
            "GP",
            "DUMMY",
        ], f"Unknown scikit-optimize base_estimator: {surrogate_model}"

        if surrogate_model == "RF":
            base_estimator = RandomForestRegressor(n_jobs=n_jobs)
        elif surrogate_model == "ET":
            base_estimator = ExtraTreesRegressor(n_jobs=n_jobs)
        elif surrogate_model == "GBRT":
            base_estimator = GradientBoostingQuantileRegressor(n_jobs=n_jobs)
        else:
            base_estimator = surrogate_model

        self.space = problem.space
        # queue of remaining starting points
        self.starting_points = problem.starting_point

        n_init = (inf if surrogate_model == "DUMMY" else max(
            num_workers, len(self.starting_points)))

        # Set acq_func_kwargs parameters
        acq_func_kwargs = {}
        if type(acq_kappa) is float:
            acq_func_kwargs["kappa"] = acq_kappa
        if type(acq_xi) is float:
            acq_func_kwargs["xi"] = acq_xi

        self._optimizer = SkOptimizer(
            dimensions=self.space,
            base_estimator=base_estimator,
            acq_optimizer="sampling",
            acq_func=acq_func,
            acq_func_kwargs=acq_func_kwargs,
            random_state=self.SEED,
            n_initial_points=n_init,
        )

        assert liar_strategy in "cl_min cl_mean cl_max".split()
        self.strategy = liar_strategy
        self.evals = {}
        self.counter = 0
        logger.info(
            f"Using skopt.Optimizer with {surrogate_model} base_estimator")
Пример #5
0
def test_gbrt_in_parallel():
    # check estimate quantiles with parallel
    rng = np.random.RandomState(1)
    N = 10000
    X = np.ones((N, 1))
    y = rng.normal(size=N)

    rgr = GradientBoostingQuantileRegressor(
        n_jobs=1, random_state=np.random.RandomState(1))
    rgr.fit(X, y)
    estimates = rgr.predict(X)

    rgr.set_params(n_jobs=2, random_state=np.random.RandomState(1))
    rgr.fit(X, y)
    estimates_parallel = rgr.predict(X)

    assert_array_equal(estimates, estimates_parallel)
Пример #6
0
def test_gbrt_with_std():
    # simple test of the interface
    rng = np.random.RandomState(1)
    X = rng.uniform(0, 5, 500)[:, np.newaxis]

    noise_level = 0.5
    y = truth(X) + rng.normal(0, noise_level, len(X))
    X_ = np.linspace(0, 5, 1000)[:, np.newaxis]

    model = GradientBoostingQuantileRegressor()
    model.fit(X, y)

    # three quantiles, so three numbers per sample
    assert_array_equal(model.predict(X_, return_quantiles=True).shape,
                       (len(X_), 3))
    # "traditional" API which returns one number per sample, in this case
    # just the median/mean
    assert_array_equal(model.predict(X_).shape, (len(X_)))

    l, c, h = model.predict(X_, return_quantiles=True).T
    assert_equal(l.shape, c.shape, h.shape)
    assert_equal(l.shape[0], X_.shape[0])

    mean, std = model.predict(X_, return_std=True)
    assert_array_equal(mean, c)
    assert_array_equal(std, (h - l) / 2.0)
Пример #7
0
def test_gbrt_in_parallel():
    # check estimate quantiles with parallel
    rng = np.random.RandomState(1)
    N = 10000
    X = np.ones((N, 1))
    y = rng.normal(size=N)

    rgr = GradientBoostingQuantileRegressor(
        n_jobs=1, random_state=np.random.RandomState(1))
    rgr.fit(X, y)
    estimates = rgr.predict(X)

    rgr.set_params(n_jobs=2, random_state=np.random.RandomState(1))
    rgr.fit(X, y)
    estimates_parallel = rgr.predict(X)

    assert_array_equal(estimates, estimates_parallel)
Пример #8
0
    def __init__(self,
                 problem,
                 num_workers,
                 surrogate_model='RF',
                 acq_func='gp_hedge',
                 acq_kappa=1.96,
                 liar_strategy='cl_max',
                 n_jobs=-1,
                 **kwargs):
        assert surrogate_model in [
            "RF", "ET", "GBRT", "GP", "DUMMY"
        ], f"Unknown scikit-optimize base_estimator: {surrogate_model}"
        if surrogate_model == "RF":
            base_estimator = RandomForestRegressor(n_jobs=n_jobs)
        elif surrogate_model == "ET":
            base_estimator = ExtraTreesRegressor(n_jobs=n_jobs)
        elif surrogate_model == "GBRT":
            base_estimator = GradientBoostingQuantileRegressor(n_jobs=n_jobs)
        else:
            base_estimator = surrogate_model

        self.space = problem.space
        cs_kwargs = self.space['create_search_space'].get('kwargs')
        if cs_kwargs is None:
            search_space = self.space['create_search_space']['func']()
        else:
            search_space = self.space['create_search_space']['func'](
                **cs_kwargs)

        # // queue of remaining starting points
        # // self.starting_points = problem.starting_point
        n_init = np.inf if surrogate_model == 'DUMMY' else num_workers

        self.starting_points = []  # ! EMPTY for now TODO

        # Building search space for SkOptimizer
        skopt_space = [(0, vnode.num_ops - 1)
                       for vnode in search_space.variable_nodes]

        self._optimizer = SkOptimizer(skopt_space,
                                      base_estimator=base_estimator,
                                      acq_optimizer='sampling',
                                      acq_func=acq_func,
                                      acq_func_kwargs={'kappa': acq_kappa},
                                      random_state=self.SEED,
                                      n_initial_points=n_init)

        assert liar_strategy in "cl_min cl_mean cl_max".split()
        self.strategy = liar_strategy
        self.evals = {}
        self.counter = 0
        logger.info("Using skopt.Optimizer with %s base_estimator" %
                    surrogate_model)
Пример #9
0
def cook_estimator(base_estimator, space=None, **kwargs):
    if isinstance(base_estimator, str):
        base_estimator = base_estimator.upper()
        allowed_estimators = ['GP', 'ET', 'RF', 'GBRT', 'DUMMY']
        if base_estimator not in allowed_estimators:
            raise ValueError(
                'invalid estimator, should be in {}, got {}'.format(
                    allowed_estimators, base_estimator))
    elif not is_regressor(base_estimator):
        raise ValueError('base estimator should be a regressor, got {}'.format(
            base_estimator))

    if base_estimator == 'GP':
        if space is not None:
            # space = Space(space)
            space = Space(normalize_param_space(space))
            n_params = space.transformed_n_params
            is_cat = space.is_categorical
        else:
            raise ValueError('expected a space instance, got None')
        cov_amplitude = ConstantKernel(1.0, (0.01, 1000.0))
        if is_cat:
            other_kernel = HammingKernel(length_scale=np.ones(n_params))
        else:
            other_kernel = Matern(length_scale=np.ones(n_params),
                                  length_scale_bounds=[(0.01, 100)] * n_params,
                                  nu=2.5)
        base_estimator = GaussianProcessRegressor(kernel=cov_amplitude *
                                                  other_kernel,
                                                  normalize_y=True,
                                                  noise='gaussian',
                                                  n_restarts_optimizer=2)
    elif base_estimator == 'RF':
        base_estimator = RandomForestRegressor(n_estimators=100,
                                               min_samples_leaf=3)
    elif base_estimator == 'ET':
        base_estimator = ExtraTreesRegressor(n_estimators=100,
                                             min_samples_leaf=3)
    elif base_estimator == 'GRBT':
        grbt = GradientBoostingRegressor(n_estimators=30, loss='quantile')
        base_estimator = GradientBoostingQuantileRegressor(base_estimator=grbt)
    elif base_estimator == 'DUMMY':
        return None

    base_estimator.set_params(**kwargs)
    return base_estimator
Пример #10
0
def test_gbrt_base_estimator():
    rng = np.random.RandomState(1)
    N = 10000
    X = np.ones((N, 1))
    y = rng.normal(size=N)

    base = RandomForestRegressor()
    rgr = GradientBoostingQuantileRegressor(base_estimator=base)
    assert_raise_message(ValueError, 'type GradientBoostingRegressor',
                         rgr.fit, X, y)

    base = GradientBoostingRegressor()
    rgr = GradientBoostingQuantileRegressor(base_estimator=base)
    assert_raise_message(ValueError, 'quantile loss', rgr.fit, X, y)

    base = GradientBoostingRegressor(loss='quantile', n_estimators=20)
    rgr = GradientBoostingQuantileRegressor(base_estimator=base)
    rgr.fit(X, y)

    estimates = rgr.predict(X)
    assert_almost_equal(stats.norm.ppf(rgr.quantiles),
                        np.mean(estimates, axis=0),
                        decimal=2)
Пример #11
0
    def __init__(self,
                 problem,
                 num_workers,
                 surrogate_model='RF',
                 acq_func='gp_hedge',
                 acq_kappa=1.96,
                 liar_strategy='cl_max',
                 n_jobs=1,
                 **kwargs):

        assert surrogate_model in [
            "RF", "ET", "GBRT", "GP", "DUMMY"
        ], f"Unknown scikit-optimize base_estimator: {surrogate_model}"

        if surrogate_model == "RF":
            base_estimator = RandomForestRegressor(n_jobs=n_jobs)
        elif surrogate_model == "ET":
            base_estimator = ExtraTreesRegressor(n_jobs=n_jobs)
        elif surrogate_model == "GBRT":
            base_estimator = GradientBoostingQuantileRegressor(n_jobs=n_jobs)
        else:
            base_estimator = surrogate_model

        self.space = problem.space
        # queue of remaining starting points
        self.starting_points = problem.starting_point

        n_init = inf if surrogate_model == 'DUMMY' else max(
            num_workers, len(self.starting_points))

        self._optimizer = SkOptimizer(self.space.values(),
                                      base_estimator=base_estimator,
                                      acq_optimizer='sampling',
                                      acq_func=acq_func,
                                      acq_func_kwargs={'kappa': acq_kappa},
                                      random_state=self.SEED,
                                      n_initial_points=n_init)

        assert liar_strategy in "cl_min cl_mean cl_max".split()
        self.strategy = liar_strategy
        self.evals = {}
        self.counter = 0
        logger.info(
            f"Using skopt.Optimizer with {surrogate_model} base_estimator")
Пример #12
0
def test_gbrt_with_std():
    # simple test of the interface
    rng = np.random.RandomState(1)
    X = rng.uniform(0, 5, 500)[:, np.newaxis]

    noise_level = 0.5
    y = truth(X) + rng.normal(0, noise_level, len(X))
    X_ = np.linspace(0, 5, 1000)[:, np.newaxis]

    model = GradientBoostingQuantileRegressor()
    model.fit(X, y)

    assert_array_equal(model.predict(X_).shape, (len(X_), 3))

    l, c, h = model.predict(X_).T
    assert_equal(l.shape, c.shape, h.shape)
    assert_equal(l.shape[0], X_.shape[0])

    mean, std = model.predict(X_, return_std=True)
    assert_array_equal(mean, c)
    assert_array_equal(std, (h - l) / 2.0)
Пример #13
0
def test_gbrt_with_std():
    # simple test of the interface
    rng = np.random.RandomState(1)
    X = rng.uniform(0, 5, 500)[:, np.newaxis]

    noise_level = 0.5
    y = truth(X) + sample_noise(X, noise_level, random_state=rng)

    X_ = np.linspace(0, 5, 1000)[:, np.newaxis]

    model = GradientBoostingQuantileRegressor()
    model.fit(X, y)

    assert_array_equal(model.predict(X_).shape, (len(X_), 3))

    l, c, h = model.predict(X_).T
    assert_equal(l.shape, c.shape, h.shape)
    assert_equal(l.shape[0], X_.shape[0])
Пример #14
0
def test_gbrt_with_std():
    # simple test of the interface
    rng = np.random.RandomState(1)
    X = rng.uniform(0, 5, 500)[:, np.newaxis]

    noise_level = 0.5
    y = truth(X) + sample_noise(X, noise_level, random_state=rng)

    X_ = np.linspace(0, 5, 1000)[:, np.newaxis]

    model = GradientBoostingQuantileRegressor()
    model.fit(X, y)

    assert_array_equal(model.predict(X_).shape, (len(X_), 3))

    l, c, h = model.predict(X_).T
    assert_equal(l.shape, c.shape, h.shape)
    assert_equal(l.shape[0], X_.shape[0])

    mean, std = model.predict(X_, return_std=True)
    assert_array_equal(mean, c)
    assert_array_equal(std, (h - l) / 2.0)
Пример #15
0
def test_gbrt_base_estimator():
    rng = np.random.RandomState(1)
    N = 10000
    X = np.ones((N, 1))
    y = rng.normal(size=N)

    base = RandomForestRegressor()
    rgr = GradientBoostingQuantileRegressor(base_estimator=base)
    assert_raise_message(ValueError, 'type GradientBoostingRegressor',
                         rgr.fit, X, y)

    base = GradientBoostingRegressor()
    rgr = GradientBoostingQuantileRegressor(base_estimator=base)
    assert_raise_message(ValueError, 'quantile loss', rgr.fit, X, y)

    base = GradientBoostingRegressor(loss='quantile', n_estimators=20)
    rgr = GradientBoostingQuantileRegressor(base_estimator=base)
    rgr.fit(X, y)

    estimates = rgr.predict(X)
    assert_almost_equal(stats.norm.ppf(rgr.quantiles),
                        np.mean(estimates, axis=0),
                        decimal=2)
Пример #16
0
from numpy.testing import assert_array_equal
from numpy.testing import assert_equal
from numpy.testing import assert_raises

from skopt import gp_minimize
from skopt import forest_minimize
from skopt.benchmarks import bench1, bench1_with_time
from skopt.benchmarks import branin
from skopt.learning import ExtraTreesRegressor, RandomForestRegressor
from skopt.learning import GradientBoostingQuantileRegressor
from skopt.optimizer import Optimizer
from scipy.optimize import OptimizeResult

TREE_REGRESSORS = (ExtraTreesRegressor(random_state=2),
                   RandomForestRegressor(random_state=2),
                   GradientBoostingQuantileRegressor(random_state=2))
ACQ_FUNCS_PS = ["EIps", "PIps"]
ACQ_FUNCS_MIXED = ["EI", "EIps"]
ESTIMATOR_STRINGS = [
    "GP", "RF", "ET", "GBRT", "DUMMY", "gp", "rf", "et", "gbrt", "dummy"
]


@pytest.mark.fast_test
def test_multiple_asks():
    # calling ask() multiple times without a tell() inbetween should
    # be a "no op"
    base_estimator = ExtraTreesRegressor(random_state=2)
    opt = Optimizer([(-2.0, 2.0)],
                    base_estimator,
                    n_initial_points=1,
Пример #17
0
def cook_estimator(base_estimator, space=None, **kwargs):
    """Cook a default estimator

    For the special `base_estimator` called "DUMMY", the return value is None. This corresponds to
    sampling points at random, hence there is no need for an estimator

    Parameters
    ----------
    base_estimator: {SKLearn Regressor, "GP", "RF", "ET", "GBRT", "DUMMY"}, default="GP"
        If not string, should inherit from `sklearn.base.RegressorMixin`. In addition, the `predict`
        method should have an optional `return_std` argument, which returns `std(Y | x)`,
        along with `E[Y | x]`.

        If `base_estimator` is a string in {"GP", "RF", "ET", "GBRT", "DUMMY"}, a surrogate model
        corresponding to the relevant `X_minimize` function is created
    space: `hyperparameter_hunter.space.space_core.Space`
        Required only if the `base_estimator` is a Gaussian Process. Ignored otherwise
    **kwargs: Dict
        Extra parameters provided to the `base_estimator` at initialization time

    Returns
    -------
    SKLearn Regressor
        Regressor instance cooked up according to `base_estimator` and `kwargs`"""
    #################### Validate `base_estimator` ####################
    str_estimators = ["GP", "ET", "RF", "GBRT", "DUMMY"]
    if isinstance(base_estimator, str):
        if base_estimator.upper() not in str_estimators:
            raise ValueError(
                f"Expected `base_estimator` in {str_estimators}. Got {base_estimator}"
            )
        # Convert to upper after error check, so above error shows actual given `base_estimator`
        base_estimator = base_estimator.upper()
    elif not is_regressor(base_estimator):
        raise ValueError("`base_estimator` must be a regressor")

    #################### Get Cooking ####################
    if base_estimator == "GP":
        if space is not None:
            space = Space(space)
            # NOTE: Below `normalize_dimensions` is NOT an unnecessary duplicate of the call in
            #   `Optimizer` - `Optimizer` calls `cook_estimator` before its `dimensions` have been
            #   normalized, so `normalize_dimensions` must also be called here
            space = Space(normalize_dimensions(space.dimensions))
            n_dims = space.transformed_n_dims
            is_cat = space.is_categorical
        else:
            raise ValueError("Expected a `Space` instance, not None")

        cov_amplitude = ConstantKernel(1.0, (0.01, 1000.0))
        # Only special if *all* dimensions are `Categorical`
        if is_cat:
            other_kernel = HammingKernel(length_scale=np.ones(n_dims))
        else:
            other_kernel = Matern(length_scale=np.ones(n_dims),
                                  length_scale_bounds=[(0.01, 100)] * n_dims,
                                  nu=2.5)

        base_estimator = GaussianProcessRegressor(
            kernel=cov_amplitude * other_kernel,
            normalize_y=True,
            noise="gaussian",
            n_restarts_optimizer=2,
        )
    elif base_estimator == "RF":
        base_estimator = RandomForestRegressor(n_estimators=100,
                                               min_samples_leaf=3)
    elif base_estimator == "ET":
        base_estimator = ExtraTreesRegressor(n_estimators=100,
                                             min_samples_leaf=3)
    elif base_estimator == "GBRT":
        gbrt = GradientBoostingRegressor(n_estimators=30, loss="quantile")
        base_estimator = GradientBoostingQuantileRegressor(base_estimator=gbrt)
    elif base_estimator == "DUMMY":
        return None

    base_estimator.set_params(**kwargs)
    return base_estimator
Пример #18
0
    def __init__(self,
                 dimensions,
                 base_estimator="GP",
                 maximize=True,
                 n_random_starts=10,
                 acq_func="LCB",
                 acq_optimizer="lbfgs",
                 random_state=None,
                 n_points=10000,
                 n_restarts_optimizer=5,
                 xi=0.01,
                 kappa=1.96,
                 n_jobs=1):
        if not skopt_available:
            raise ImportError("skopt is not installed correctly")
        self.maximize = maximize
        self.n_params = len(dimensions)

        rng = check_random_state(random_state)
        if isinstance(base_estimator, str):
            if base_estimator == "RF":
                base_estimator = RandomForestRegressor(n_estimators=100,
                                                       min_samples_leaf=3,
                                                       n_jobs=n_jobs,
                                                       random_state=rng)
            elif base_estimator == "ET":
                base_estimator = ExtraTreesRegressor(n_estimators=100,
                                                     min_samples_leaf=3,
                                                     n_jobs=n_jobs,
                                                     random_state=rng)
            elif base_estimator == "GP":
                cov_amplitude = ConstantKernel(1.0, (0.01, 1000.0))
                matern = Matern(length_scale=np.ones(len(dimensions)),
                                length_scale_bounds=[(0.01, 100)] *
                                len(dimensions),
                                nu=2.5)
                base_estimator = GaussianProcessRegressor(
                    kernel=cov_amplitude * matern,
                    normalize_y=True,
                    random_state=rng,
                    alpha=0.0,
                    noise="gaussian",
                    n_restarts_optimizer=2)
            elif base_estimator == "GBRT":
                gbrt = GradientBoostingRegressor(n_estimators=30,
                                                 loss="quantile")
                base_estimator = GradientBoostingQuantileRegressor(
                    base_estimator=gbrt, n_jobs=n_jobs, random_state=rng)
            else:
                raise ValueError(
                    "Valid strings for the base_estimator parameter"
                    " are: 'RF', 'ET', or 'GP', not '%s'" % base_estimator)

        acq_func_kwargs = {"xi": xi, "kappa": kappa}
        acq_optimizer_kwargs = {
            "n_points": n_points,
            "n_restarts_optimizer": n_restarts_optimizer,
            "n_jobs": n_jobs
        }
        self.optimizer = _SkOptOptimizer(
            dimensions=dimensions,
            base_estimator=base_estimator,
            n_initial_points=n_random_starts,
            acq_func=acq_func,
            acq_optimizer=acq_optimizer,
            random_state=random_state,
            acq_func_kwargs=acq_func_kwargs,
            acq_optimizer_kwargs=acq_optimizer_kwargs)
Пример #19
0
    def __init__(
        self,
        problem,
        num_workers,
        surrogate_model="RF",
        acq_func="gp_hedge",
        acq_kappa=1.96,
        liar_strategy="cl_max",
        n_jobs=1,
        **kwargs,
    ):
        assert surrogate_model in [
            "RF",
            "ET",
            "GBRT",
            "GP",
            "DUMMY",
        ], f"Unknown scikit-optimize base_estimator: {surrogate_model}"
        if surrogate_model == "RF":
            base_estimator = RandomForestRegressor(n_jobs=n_jobs)
        elif surrogate_model == "ET":
            base_estimator = ExtraTreesRegressor(n_jobs=n_jobs)
        elif surrogate_model == "GBRT":
            base_estimator = GradientBoostingQuantileRegressor(n_jobs=n_jobs)
        else:
            base_estimator = surrogate_model

        self.problem = problem
        cs_kwargs = self.problem.space["create_search_space"].get("kwargs")
        if cs_kwargs is None:
            search_space = self.problem.space["create_search_space"]["func"]()
        else:
            search_space = self.problem.space["create_search_space"]["func"](
                **cs_kwargs)

        n_init = np.inf if surrogate_model == "DUMMY" else num_workers

        self.starting_points = []  # ! EMPTY for now TODO

        # Building search space for SkOptimizer using ConfigSpace
        skopt_space = cs.ConfigurationSpace(seed=self.problem.seed)
        for i, vnode in enumerate(search_space.variable_nodes):
            hp = csh.UniformIntegerHyperparameter(name=f"vnode_{i}",
                                                  lower=0,
                                                  upper=(vnode.num_ops - 1))
            skopt_space.add_hyperparameter(hp)

        self._optimizer = SkOptimizer(
            skopt_space,
            base_estimator=base_estimator,
            acq_optimizer="sampling",
            acq_func=acq_func,
            acq_func_kwargs={"kappa": acq_kappa},
            random_state=self.SEED,
            n_initial_points=n_init,
        )

        assert liar_strategy in "cl_min cl_mean cl_max".split()
        self.strategy = liar_strategy
        self.evals = {}
        self.counter = 0
        logger.info("Using skopt.Optimizer with %s base_estimator" %
                    surrogate_model)
Пример #20
0
def cook_estimator(base_estimator, space=None, **kwargs):
    """
    Cook a default estimator.
    For the special base_estimator called "DUMMY" the return value is None.
    This corresponds to sampling points at random, hence there is no need
    for an estimator.
    Parameters
    ----------
    * `base_estimator` ["GP", "RF", "ET", "GBRT", "DUMMY"
                        or sklearn regressor, default="GP"]:
        Should inherit from `sklearn.base.RegressorMixin`.
        In addition the `predict` method should have an optional `return_std`
        argument, which returns `std(Y | x)`` along with `E[Y | x]`.
        If base_estimator is one of ["GP", "RF", "ET", "GBRT", "DUMMY"], a
        surrogate model corresponding to the relevant `X_minimize` function
        is created.
    * `space` [Space instance]:
        Has to be provided if the base_estimator is a gaussian process.
        Ignored otherwise.
    * `kwargs` [dict]:
        Extra parameters provided to the base_estimator at init time.
    """
    if isinstance(base_estimator, str):
        base_estimator = base_estimator.upper()
        if base_estimator not in ["GP", "ET", "RF", "GBRT", "DUMMY", "GPM32", "GPM1", "RBF", "RQ"]:
            raise ValueError("Valid strings for the base_estimator parameter "
                             " are: 'RF', 'ET', 'GP', 'GBRT' or 'DUMMY' not "
                             "%s." % base_estimator)
    elif not is_regressor(base_estimator):
        raise ValueError("base_estimator has to be a regressor.")

    if base_estimator == "GP":
        if space is not None:
            space = Space(space)
            space = Space(normalize_dimensions(space.dimensions))
            n_dims = space.transformed_n_dims
            is_cat = space.is_categorical

        else:
            raise ValueError("Expected a Space instance, not None.")

        cov_amplitude = ConstantKernel(1.0, (0.01, 1000.0))
        # only special if *all* dimensions are categorical
        if is_cat:
            other_kernel = HammingKernel(length_scale=np.ones(n_dims))
        else:
            other_kernel = Matern(
                length_scale=np.ones(n_dims),
                length_scale_bounds=[(0.01, 100)] * n_dims, nu=2.5)

        base_estimator = GaussianProcessRegressor(
            kernel=cov_amplitude * other_kernel,
            normalize_y=True, noise="gaussian",
            n_restarts_optimizer=2)

    elif base_estimator == "GPM32":
        if space is not None:
            space = Space(space)
            space = Space(normalize_dimensions(space.dimensions))
            n_dims = space.transformed_n_dims
            is_cat = space.is_categorical

        else:
            raise ValueError("Expected a Space instance, not None.")

        cov_amplitude = ConstantKernel(1.0, (0.01, 1000.0))
        # only special if *all* dimensions are categorical
        if is_cat:
            other_kernel = HammingKernel(length_scale=np.ones(n_dims))
        else:
            other_kernel = Matern(
                length_scale=np.ones(n_dims),
                length_scale_bounds=[(0.01, 100)] * n_dims, nu=1.5)

        base_estimator = GaussianProcessRegressor(
            kernel=cov_amplitude * other_kernel,
            normalize_y=True, noise="gaussian",
            n_restarts_optimizer=2)

    elif base_estimator == "GPM1":
        if space is not None:
            space = Space(space)
            space = Space(normalize_dimensions(space.dimensions))
            n_dims = space.transformed_n_dims
            is_cat = space.is_categorical

        else:
            raise ValueError("Expected a Space instance, not None.")

        cov_amplitude = ConstantKernel(1.0, (0.01, 1000.0))
        # only special if *all* dimensions are categorical
        if is_cat:
            other_kernel = HammingKernel(length_scale=np.ones(n_dims))
        else:
            other_kernel = Matern(
                length_scale=np.ones(n_dims),
                length_scale_bounds=[(0.01, 100)] * n_dims, nu=1.5)

        base_estimator = GaussianProcessRegressor(
            kernel=cov_amplitude * other_kernel,
            normalize_y=True, noise="gaussian",
            n_restarts_optimizer=2)

    elif base_estimator == "RBF":
        if space is not None:
            space = Space(space)
            space = Space(normalize_dimensions(space.dimensions))
            n_dims = space.transformed_n_dims
        cov_amplitude = ConstantKernel(1.0, (0.01, 1000.0))
        other_kernel = RBF(length_scale=np.ones(n_dims))

        base_estimator = GaussianProcessRegressor(
            kernel=cov_amplitude * other_kernel,
            normalize_y=True, noise="gaussian",
            n_restarts_optimizer=2)

    elif base_estimator == "RQ":
        if space is not None:
            space = Space(space)
            space = Space(normalize_dimensions(space.dimensions))
            n_dims = space.transformed_n_dims
        cov_amplitude = ConstantKernel(1.0, (0.01, 1000.0))
        other_kernel = RationalQuadratic(length_scale=np.ones(n_dims), alpha=0.1)

        base_estimator = GaussianProcessRegressor(
            kernel=cov_amplitude * other_kernel,
            normalize_y=True, noise="gaussian",
            n_restarts_optimizer=2)

    elif base_estimator == "RF":
        base_estimator = RandomForestRegressor(n_estimators=100,
                                               min_samples_leaf=3)
    elif base_estimator == "ET":
        base_estimator = ExtraTreesRegressor(n_estimators=100,
                                             min_samples_leaf=3)
    elif base_estimator == "GBRT":
        gbrt = GradientBoostingRegressor(n_estimators=30, loss="quantile")
        base_estimator = GradientBoostingQuantileRegressor(base_estimator=gbrt)

    elif base_estimator == "DUMMY":
        return None

    base_estimator.set_params(**kwargs)
    return base_estimator