Exemplo n.º 1
0
def test_reproducible_runs(strategy, surrogate):
    # two runs of the optimizer should yield exactly the same results

    optimizer = Optimizer(base_estimator=surrogate(random_state=1),
                          dimensions=[Real(-5.0, 10.0),
                                      Real(0.0, 15.0)],
                          acq_optimizer='sampling',
                          random_state=1)

    points = []
    for i in range(n_steps):
        x = optimizer.ask(n_points, strategy)
        points.append(x)
        optimizer.tell(x, [branin(v) for v in x])

    # the x's should be exaclty as they are in `points`
    optimizer = Optimizer(base_estimator=surrogate(random_state=1),
                          dimensions=[Real(-5.0, 10.0),
                                      Real(0.0, 15.0)],
                          acq_optimizer='sampling',
                          random_state=1)
    for i in range(n_steps):
        x = optimizer.ask(n_points, strategy)

        assert points[i] == x

        optimizer.tell(x, [branin(v) for v in x])
Exemplo n.º 2
0
def test_reproducible_runs(strategy, surrogate):
    # two runs of the optimizer should yield exactly the same results

    optimizer = Optimizer(
        base_estimator=surrogate(random_state=1),
        dimensions=[Real(-5.0, 10.0), Real(0.0, 15.0)],
        acq_optimizer='sampling',
        random_state=1
    )

    points = []
    for i in range(n_steps):
        x = optimizer.ask(n_points, strategy)
        points.append(x)
        optimizer.tell(x, [branin(v) for v in x])

    # the x's should be exaclty as they are in `points`
    optimizer = Optimizer(
        base_estimator=surrogate(random_state=1),
        dimensions=[Real(-5.0, 10.0), Real(0.0, 15.0)],
        acq_optimizer='sampling',
        random_state=1
    )
    for i in range(n_steps):
        x = optimizer.ask(n_points, strategy)

        assert points[i] == x

        optimizer.tell(x, [branin(v) for v in x])
Exemplo n.º 3
0
def create_guassian_process():
    default_parameters_1 = [1e-3, 3, 200, "block", 10, 'relu', 64]
    default_parameters_2 = [1e-3, 3, 200, "bowtie", 10, 'relu', 64]
    default_parameters_3 = [1e-3, 3, 200, "diamond", 10, 'relu', 64]
    x0 = []
    x0.append(default_parameters_1)
    x0.append(default_parameters_2)
    x0.append(default_parameters_3)
    optimizer = Optimizer(dimensions=dimensions,
                          random_state=1,
                          n_initial_points=3,
                          base_estimator='gp')
    y = Parallel(n_jobs=3)(delayed(fitness)(v) for v in x0)
    optimizer.tell(x0, y)
    for run in range(20):
        x = optimizer.ask(n_points=3)
        y = Parallel(n_jobs=3)(delayed(fitness)(v) for v in x)
        print(str(val) for val in y)
        optimizer.tell(x, y)
    # gp_result = gp_minimize(func=fitness,
    #                         dimensions=dimensions,
    #                         n_calls=12,
    #                         noise= 0.01,
    #                         n_jobs=-1,
    #                         kappa = 5,
    #                         x0=default_parameters)
    results_file = open("df_gp_res.pickle", "wb")
    pickle.dump(optimizer, results_file)
Exemplo n.º 4
0
def test_same_set_of_points_ask(strategy, surrogate):
    """
    For n_points not None, tests whether two consecutive calls to ask
    return the same sets of points.

    Parameters
    ----------
    * `strategy` [string]:
        Name of the strategy to use during optimization.

    * `surrogate` [scikit-optimize surrogate class]:
        A class of the scikit-optimize surrogate used in Optimizer.
    """

    optimizer = Optimizer(base_estimator=surrogate(),
                          dimensions=[Real(-5.0, 10.0),
                                      Real(0.0, 15.0)],
                          acq_optimizer='sampling',
                          random_state=2)

    for i in range(n_steps):
        xa = optimizer.ask(n_points, strategy)
        xb = optimizer.ask(n_points, strategy)
        optimizer.tell(xa, [branin(v) for v in xa])
        assert_equal(xa, xb)  # check if the sets of points generated are equal
Exemplo n.º 5
0
def test_constant_liar_runs(strategy, surrogate, acq_func):
    """
    Tests whether the optimizer runs properly during the random
    initialization phase and beyond

    Parameters
    ----------
    * `strategy` [string]:
        Name of the strategy to use during optimization.

    * `surrogate` [scikit-optimize surrogate class]:
        A class of the scikit-optimize surrogate used in Optimizer.
    """
    optimizer = Optimizer(base_estimator=surrogate(),
                          dimensions=[Real(-5.0, 10.0),
                                      Real(0.0, 15.0)],
                          acq_func=acq_func,
                          acq_optimizer='sampling',
                          random_state=0)

    # test arguments check
    assert_raises(ValueError, optimizer.ask, {"strategy": "cl_maen"})
    assert_raises(ValueError, optimizer.ask, {"n_points": "0"})
    assert_raises(ValueError, optimizer.ask, {"n_points": 0})

    for i in range(n_steps):
        x = optimizer.ask(n_points=n_points, strategy=strategy)
        # check if actually n_points was generated
        assert_equal(len(x), n_points)

        if "ps" in acq_func:
            optimizer.tell(x, [[branin(v), 1.1] for v in x])
        else:
            optimizer.tell(x, [branin(v) for v in x])
Exemplo n.º 6
0
class BayesianOptimizer(BaseAlgorithm):
    """Wrapper skopt's bayesian optimizer"""

    def __init__(self, space, **kwargs):
        super(BayesianOptimizer, self).__init__(space)

        self.optimizer = Optimizer(
            base_estimator=GaussianProcessRegressor(**kwargs),
            dimensions=convert_orion_space_to_skopt_space(space))

        self.strategy = "cl_min"

    def suggest(self, num=1):
        """Suggest a `num`ber of new sets of parameters.

        Perform a step towards negative gradient and suggest that point.

        """
        points = self.optimizer.ask(n_points=num, strategy=self.strategy)
        return points

    def observe(self, points, results):
        """Observe evaluation `results` corresponding to list of `points` in
        space.

        Save current point and gradient corresponding to this point.

        """
        self.optimizer.tell(points, [r['objective'] for r in results])

    @property
    def is_done(self):
        """Implement a terminating condition."""
        return False
Exemplo n.º 7
0
def test_all_points_different(strategy, surrogate):
    """
    Tests whether the parallel optimizer always generates
    different points to evaluate.

    Parameters
    ----------
    * `strategy` [string]:
        Name of the strategy to use during optimization.

    * `surrogate` [scikit-optimize surrogate class]:
        A class of the scikit-optimize surrogate used in Optimizer.
    """
    optimizer = Optimizer(base_estimator=surrogate(),
                          dimensions=[Real(-5.0, 10.0),
                                      Real(0.0, 15.0)],
                          acq_optimizer='sampling',
                          random_state=1)

    tolerance = 1e-3  # distance above which points are assumed same
    for i in range(n_steps):
        x = optimizer.ask(n_points, strategy)
        optimizer.tell(x, [branin(v) for v in x])
        distances = pdist(x)
        assert all(distances > tolerance)
Exemplo n.º 8
0
def optimize(cfgFilename):
    params = importJsonCfg(cfgFilename)
    BO_params = unpackVariables(params)

    varSpace = BO_params["Variables"]
    opt = Optimizer(varSpace,
                    base_estimator=params["BaseEstimator"],
                    acq_func=params["AcquisitionFunction"],
                    acq_optimizer=params["AcquisitionOptimizer"])

    optMaxIter = int(params["maxOptIter"])

    pointLst = []
    auprcs = []

    if os.path.isfile("tempOpt.txt"):
        pointLst, auprcs = importFromFile(BO_params["VariableNames"],
                                          BO_params["FixedVars"])
        for i in range(0, len(auprcs)):
            opt.tell(pointLst[i], auprcs[i])

    shouldIContinue(pointLst, auprcs, optMaxIter,
                    int(params["EarlyStoppingNBest"]),
                    float(params["EarlyStoppingDelta"]))

    pt = opt.ask()
    pt = convertPoint(pt, BO_params["VariableNames"], BO_params["FixedVars"])
    with open("tempOpt.txt", "a") as fout:
        fout.write(" ".join([str(x) for x in pt]))
Exemplo n.º 9
0
def test_same_set_of_points_ask(strategy, surrogate):
    """
    For n_points not None, tests whether two consecutive calls to ask
    return the same sets of points.

    Parameters
    ----------
    * `strategy` [string]:
        Name of the strategy to use during optimization.

    * `surrogate` [scikit-optimize surrogate class]:
        A class of the scikit-optimize surrogate used in Optimizer.
    """

    optimizer = Optimizer(
        base_estimator=surrogate(),
        dimensions=[Real(-5.0, 10.0), Real(0.0, 15.0)],
        acq_optimizer='sampling',
        random_state=2
    )

    for i in range(n_steps):
        xa = optimizer.ask(n_points, strategy)
        xb = optimizer.ask(n_points, strategy)
        optimizer.tell(xa, [branin(v) for v in xa])
        assert_equal(xa, xb)  # check if the sets of points generated are equal
Exemplo n.º 10
0
def job(loss):
    scorer = SklearnScorer(
        X, y, words, postfx,
        rules_apply=0.8,
        max_endings=75
    )

    space = {
        'alpha': (0.0001, 1.0, 'log-uniform'),
        'l1_ratio': (0.001, 0.999),
        'loss': [loss],
        'epsilon': (0.001, 10.0, 'log-uniform'),
        'threshold': (0.00001, 0.001, 'log-uniform'),
    }

    opt = Optimizer(point_aslist(space, space))

    for i in range(128):
        p = opt.ask()
        p = point_asdict(space, p)

        f = scorer(p)

        opt.tell(point_aslist(space, p), f)
        print(f)
        print(i, scorer.best_obj, scorer.best_params)

    import json
    json.dump(scorer.result, open(loss + '.json', 'w'), indent=2, sort_keys=True)
Exemplo n.º 11
0
def test_constant_liar_runs(strategy, surrogate, acq_func):
    """
    Tests whether the optimizer runs properly during the random
    initialization phase and beyond

    Parameters
    ----------
    * `strategy` [string]:
        Name of the strategy to use during optimization.

    * `surrogate` [scikit-optimize surrogate class]:
        A class of the scikit-optimize surrogate used in Optimizer.
    """
    optimizer = Optimizer(
        base_estimator=surrogate(),
        dimensions=[Real(-5.0, 10.0), Real(0.0, 15.0)],
        acq_func=acq_func,
        acq_optimizer='sampling',
        random_state=0
    )

    # test arguments check
    assert_raises(ValueError, optimizer.ask, {"strategy": "cl_maen"})
    assert_raises(ValueError, optimizer.ask, {"n_points": "0"})
    assert_raises(ValueError, optimizer.ask, {"n_points": 0})

    for i in range(n_steps):
        x = optimizer.ask(n_points=n_points, strategy=strategy)
        # check if actually n_points was generated
        assert_equal(len(x), n_points)

        if "ps" in acq_func:
            optimizer.tell(x, [[branin(v), 1.1] for v in x])
        else:
            optimizer.tell(x, [branin(v) for v in x])
Exemplo n.º 12
0
def evaluate_optimizer(surrogate, model, dataset, n_calls, random_state):
    """
    Evaluates some estimator for the task of optimization of parameters of some
    model, given limited number of model evaluations.

    Parameters
    ----------
    * `surrogate`:
        Estimator to use for optimization.
    * `model`: scikit-learn estimator.
        sklearn estimator used for parameter tuning.
    * `dataset`: str
        Name of dataset to train ML model on.
    * `n_calls`: int
        Budget of evaluations
    * `random_state`: seed
        Set the random number generator in numpy.

    Returns
    -------
    * `trace`: list of tuples
        (p, f(p), best), where p is a dictionary of the form "param name":value,
        and f(p) is performance achieved by the model for configuration p
        and best is the best value till that index.
        Such a list contains history of execution of optimization.
    """
    # below seed is necessary for processes which fork at the same time
    # so that random numbers generated in processes are different
    np.random.seed(random_state)
    problem = MLBench(model, dataset)
    space = problem.space

    # initialization
    estimator = surrogate(random_state=random_state)
    dimensions_names = sorted(space)
    dimensions = [space[d][0] for d in dimensions_names]
    solver = Optimizer(dimensions, estimator, random_state=random_state)

    trace = []
    best_y = np.inf

    # optimization loop
    for i in range(n_calls):
        point_list = solver.ask()

        # convert list of dimension values to dictionary
        point_dct = dict(zip(dimensions_names, point_list))

        # the result of "evaluate" is accuracy / r^2, which is the more the better
        objective_at_point = -problem.evaluate(point_dct)

        if best_y > objective_at_point:
            best_y = objective_at_point

        # remember the point, objective pair
        trace.append((point_dct, objective_at_point, best_y))
        print("Evaluation no. " + str(i + 1))

        solver.tell(point_list, objective_at_point)
    return trace
Exemplo n.º 13
0
def test_dict_list_space_representation():
    """
    Tests whether the conversion of the dictionary and list representation
    of a point from a search space works properly.
    """

    chef_space = {
        'Cooking time': (0, 1200),  # in minutes
        'Main ingredient': [
            'cheese', 'cherimoya', 'chicken', 'chard', 'chocolate', 'chicory'
        ],
        'Secondary ingredient': [
            'love', 'passion', 'dedication'
        ],
        'Cooking temperature': (-273.16, 10000.0)  # in Celsius
    }

    opt = Optimizer(dimensions=dimensions_aslist(chef_space))
    point = opt.ask()

    # check if the back transformed point and original one are equivalent
    assert_equal(
        point,
        point_aslist(chef_space, point_asdict(chef_space, point))
    )
Exemplo n.º 14
0
def test_all_points_different(strategy, surrogate):
    """
    Tests whether the parallel optimizer always generates
    different points to evaluate.

    Parameters
    ----------
    * `strategy` [string]:
        Name of the strategy to use during optimization.

    * `surrogate` [scikit-optimize surrogate class]:
        A class of the scikit-optimize surrogate used in Optimizer.
    """
    optimizer = Optimizer(
        base_estimator=surrogate(),
        dimensions=[Real(-5.0, 10.0), Real(0.0, 15.0)],
        acq_optimizer='sampling',
        random_state=1
    )

    tolerance = 1e-3  # distance above which points are assumed same
    for i in range(n_steps):
        x = optimizer.ask(n_points, strategy)
        optimizer.tell(x, [branin(v) for v in x])
        distances = pdist(x)
        assert all(distances > tolerance)
Exemplo n.º 15
0
class GPOptimizer(HyperParamSearch):
    def __init__(self, space, samples, random_state=1):
        super().__init__(space)
        self._num_samples = samples
        self.optimizer = Optimizer(dimensions=space,
                                   random_state=1,
                                   base_estimator="GP",
                                   acq_optimizer="auto",
                                   n_initial_points=10)
        self.asked = 0

    def tell(self, args, train_eval, validation_eval, test_eval, model_dir):
        super().tell(args, train_eval, validation_eval, test_eval, model_dir)
        self.optimizer.tell(args, -validation_eval["ll_mean"])

    def ask(self):
        self.asked += 1
        if self.asked <= self._num_samples:
            return self.optimizer.ask()
        else:
            raise StopIteration

    @property
    def num_samples(self):
        return self._num_samples
Exemplo n.º 16
0
def test_dict_list_space_representation():
    """
    Tests whether the conversion of the dictionary and list representation
    of a point from a search space works properly.
    """

    chef_space = {
        'Cooking time': (0, 1200),  # in minutes
        'Main ingredient': [
            'cheese', 'cherimoya', 'chicken', 'chard', 'chocolate', 'chicory'
        ],
        'Secondary ingredient': [
            'love', 'passion', 'dedication'
        ],
        'Cooking temperature': (-273.16, 10000.0)  # in Celsius
    }

    opt = Optimizer(dimensions=dimensions_aslist(chef_space))
    point = opt.ask()

    # check if the back transformed point and original one are equivalent
    assert_equal(
        point,
        point_aslist(chef_space, point_asdict(chef_space, point))
    )
Exemplo n.º 17
0
def do_bayesian_optimization(fitness, dimensions, default_parameters,
                             timesteps, n_seq):
    print("START BAYESIAN OPTIMIZATION")
    print("fitness", fitness)
    print("dimensions", len(dimensions))
    print("default parameters", len(default_parameters))
    param = []

    opt = Optimizer(dimensions=dimensions, acq_func='EIps')
    n_calls = 11
    i = 0
    res = []
    while i != n_calls:
        next_x = list()
        if i == 0:
            next_x = default_parameters
        else:
            next_x = opt.ask()

        while verifyConditions(next_x, timesteps, n_seq) == False:
            next_x = opt.ask()

        if verifyConditions(next_x, timesteps, n_seq):
            f_val = fitness(next_x)
            res = opt.tell(next_x, f_val)
            i += 1

    clear_session()
    return res
    """
    es = DeltaYStopper(0.01)
    
    gp_result = gp_minimize(func=fitness,
                                dimensions=dimensions,
                                n_calls=11,
                                noise= 0.01,
                                n_jobs=-1,
                                x0=default_parameters,
                                callback=es, 
                                random_state=12,
                                acq_func="EIps")
    print("END BAYESIAN OPTIMIZATION")
    param = gp_result.x     
    clear_session()
    """

    return param
Exemplo n.º 18
0
class Optimizer:
    def __init__(self,
                 model,
                 featureset,
                 target,
                 validator,
                 goal='maximize',
                 search_spaces=None):
        raise NotImplementedError
        if isinstance(search_spaces, type(None)):
            self.search_spaces = type(model).search_spaces
        else:
            self.search_spaces = search_spaces
        self.parameter_names = list(self.search_spaces.keys())
        self.value_spaces = list(self.search_spaces.values())
        self.opt = SKOptimizer(self.value_spaces)
        self.model = model
        self.featureset = featureset
        self.target = target
        self.validator = validator
        self.goal = goal
        if self.goal == 'maximize':
            self.coeff = -1
        elif self.goal == 'minimize':
            self.coeff = 1
        else:
            raise ValueError(
                'Goal should be either to maximize or minimize objective.')

    def optimize(self, n_iters):
        for i in range(n_iters):
            pt = self.opt.ask()
            self.model.params = {
                k: v
                for k, v in zip(self.parameter_names, pt)
            }
            val = self.coeff * self.validator.score(self.model,
                                                    self.featureset)
            clear_output(True)
            plot_convergence(self.opt.tell(pt, val))
            plt.show()

    def plot_objective(self):
        res = create_result(Xi=self.opt.Xi,
                            yi=self.opt.yi,
                            space=self.opt.space,
                            rng=self.opt.rng,
                            models=self.opt.models)
        plot_objective(res, dimensions=self.parameter_names)
        plt.show()

    def plot_evaluations(self):
        res = create_result(Xi=self.opt.Xi,
                            yi=self.opt.yi,
                            space=self.opt.space,
                            rng=self.opt.rng,
                            models=self.opt.models)
        plot_objective(res, dimensions=self.parameter_names)
        plt.show()
Exemplo n.º 19
0
def test_purely_categorical_space():
    # Test reproduces the bug in #908, make sure it doesn't come back
    dims = [Categorical(['a', 'b', 'c']), Categorical(['A', 'B', 'C'])]
    optimizer = Optimizer(dims, n_initial_points=1, random_state=3)

    x = optimizer.ask()
    # before the fix this call raised an exception
    optimizer.tell(x, 1.)
Exemplo n.º 20
0
class SKOptTuner(Tuner):
    """Bayesian Optimizer."""
    def __init__(self, pipeline_hyperparameter_ranges, random_state=0):
        """Init SkOptTuner

        Arguments:
            pipeline_hyperparameter_ranges (dict): A set of hyperparameter ranges corresponding to a pipeline's parameters
            random_state (int): The random state. Defaults to 0.
        """
        super().__init__(pipeline_hyperparameter_ranges,
                         random_state=random_state)
        self.opt = Optimizer(self._search_space_ranges,
                             "ET",
                             acq_optimizer="sampling",
                             random_state=random_state)

    def add(self, pipeline_parameters, score):
        """Add score to sample

        Arguments:
            pipeline_parameters (dict): A dict of the parameters used to evaluate a pipeline
            score (float): The score obtained by evaluating the pipeline with the provided parameters

        Returns:
            None
        """
        # skip adding nan scores
        if pd.isnull(score):
            return
        flat_parameter_values = self._convert_to_flat_parameters(
            pipeline_parameters)
        try:
            self.opt.tell(flat_parameter_values, score)
        except Exception as e:
            logger.debug(
                'SKOpt tuner received error during add. Score: {}\nParameters: {}\nFlat parameter values: {}\nError: {}'
                .format(pipeline_parameters, score, flat_parameter_values, e))
            if str(
                    e
            ) == "'<=' not supported between instances of 'int' and 'NoneType'":
                msg = "Invalid parameters specified to SKOptTuner.add: parameters {} error {}" \
                    .format(pipeline_parameters, str(e))
                logger.error(msg)
                raise ParameterError(msg)
            raise (e)

    def propose(self):
        """Returns a suggested set of parameters to train and score a pipeline with, based off the search space dimensions and prior samples.

        Returns:
            dict: Proposed pipeline parameters
        """
        with warnings.catch_warnings():
            warnings.simplefilter('ignore')
            if not len(self._search_space_ranges):
                return self._convert_to_pipeline_parameters({})
            flat_parameters = self.opt.ask()
            return self._convert_to_pipeline_parameters(flat_parameters)
Exemplo n.º 21
0
    def runOptimizer(self):
        # if self.optimizer=='gp':
        #     res = gp_minimize(self.getRuntime, self.domain, n_calls=self.budget,
        #                 n_random_starts=self.initial_samples)
        # elif self.optimizer=='gbrt':
        #     res = gbrt_minimize(self.getRuntime, self.domain, n_calls=self.budget,
        #             n_random_starts=self.initial_samples)
        # elif self.optimizer=='forest':
        #     res = forest_minimize(self.getRuntime, self.domain, n_calls=self.budget,
        #             n_random_starts=self.initial_samples)
        opt = Optimizer(self.domain,
                        base_estimator=self.optimizer,
                        n_random_starts=self.initial_samples,
                        acq_optimizer="sampling",
                        acq_func=self.acquisition_method,
                        acq_func_kwargs=self.acq_kwargs
                        #acq_optimizer_kwargs={'n_points': 100}
                        )
        count = 0
        trails = list()
        results = list()
        min_x = list()
        min_val = 10000

        pte = self.convert_points(self.points_to_evaluate)
        # print("Evaluating initial points")
        for point in pte:
            f_val = self.getObjectiveValue(point)
            count += 1
            if f_val < min_val:
                min_val = f_val
                min_x = point
            trails.append(point)
            results.append(f_val)
            opt.tell(point, f_val)

        # print("Doing optimization runs")
        while count < self.budget:
            next_x = opt.ask()
            if next_x not in trails:
                f_val = self.getObjectiveValue(next_x)
                count += 1
                if f_val < min_val:
                    min_val = f_val
                    min_x = next_x
                trails.append(next_x)
                results.append(f_val)
            else:
                f_val = results[trails.index(next_x)]
            opt.tell(next_x, f_val)

        best_parameters = dict()
        best_parameters['type'], best_parameters['size'], best_parameters[
            'num'] = self.convertToConfig(min_x)
        print(min_val, best_parameters)
        trials = pickleRead(self.trialsFile)
        return trials
Exemplo n.º 22
0
    def _step(
        self,
        optimizer: Optimizer,
        data: Dataset,
        metrics: List[str],
        cv: Any,
        n_jobs: int,
        verbose: int,
    ) -> Result:
        """
        Performs a step in the Bayesian optimization

        Parameters
        ----------
        optimizer: Optimizer
            An instance of skopt's Optimizer

        data: Dataset
           Instance of data to train on

        metrics: List of str
            List of metrics to calculate results for

        cv: Any
            Either a CV object from sklearn or an int to specify number of folds

        n_jobs
            Number of jobs to calculate in parallel

        verbose
            Verbosity level of the method

        Returns
        -------
        Result
        """
        params = optimizer.ask()
        params = [np.array(p).item() for p in params]

        # make lists into dictionaries
        params_dict = point_asdict(self.param_grid, params)
        estimator = clone(self.estimator).set_params(**params_dict)
        logger.info("Fitting estimator...")
        logger.debug("Fitting estimator %s", estimator)

        result = Result.from_estimator(
            estimator=estimator,
            data=data,
            metrics=metrics,
            cv=cv,
            n_jobs=n_jobs,
            verbose=verbose,
        )
        logger.info("Result: %s", result)
        optimizer.tell([params], [-result.metrics[0].score])
        return result
Exemplo n.º 23
0
def evaluate_optimizer(surrogate, model, dataset, n_calls, random_state):
    """
    Evaluates some estimator for the task of optimization of parameters of some
    model, given limited number of model evaluations.
    Parentheses on parameters are used to be able to run function with pool.map method.

    :param surrogate: Estimator to use for optimization.
    :param model: str, name of the ML model class to be used for parameter tuning
    :param dataset: str, name of dataset to train ML model on
    :param n_calls: a budget of evaluations
    :param random_state: random seed, used to set the random number generator in numpy
    :return: a list of paris (p, f(p)), where p is a dictionary of the form "param name":value,
            and f(p) is performance measure value achieved by the model for configuration p.
            Such list contains history of execution of optimization.
    """

    # below seed is necessary for processes which fork at the same time
    # so that random numbers generated in processes are different

    np.random.seed(random_state)

    problem = MLBench(model, dataset, random_state=random_state)
    space = problem.model_description[MODEL_PARAMETERS]

    # initialization
    estimator = surrogate(random_state=random_state)
    dimensions_names = space.keys()

    dimensions = [space[k][0] for k in dimensions_names]
    solver = Optimizer(dimensions, estimator, random_state=random_state)

    trace = []
    best_y = np.inf
    best_x = None

    # optimization loop
    for i in range(n_calls):
        point_list = solver.ask()

        point_dct = {k: v
                     for k, v in zip(dimensions_names, point_list)
                     }  # convert list of dimension values to dictionary
        objective_at_point = -problem.evaluate(
            point_dct
        )  # the result of "evaluate" is accuracy / r^2, which is the more the better

        if best_y > objective_at_point:
            best_y = objective_at_point

        trace.append((point_dct, objective_at_point,
                      best_y))  # remember the point, objective pair
        print("Eval. #" + str(i))

        solver.tell(point_list, objective_at_point)

    return trace
Exemplo n.º 24
0
Arquivo: rf.py Projeto: ztuowen/tspec
class LeafOptimizer:
    FAIL = sys.float_info.max

    def __init__(self, nlist: List[TNode], rfsearch):
        self.nlist = nlist[:]
        self.path = ""
        self.minimum = self.FAIL
        odims = list()
        self.space = 1
        for n in nlist:
            self.path += n.hash()
            odims += n.get_odims()
            for i in n.get_dims():
                self.space = self.space * i
        optparam = {'kappa': 1.96}
        self.opt = Optimizer(odims,
                             base_estimator='RF',
                             acq_optimizer='sampling',
                             acq_func='LCB',
                             acq_func_kwargs=optparam)
        self.lv = self.FAIL
        self.l = 1
        self.exp = 0
        self.rfsearch = rfsearch
        self.reporter = rfsearch.reporter

    def execscript(self):
        psel = self.opt.ask()
        for i in range(len(psel)):
            if isinstance(psel[i], np.generic):
                psel[i] = np.asscalar(psel[i])
        print(self.path, psel)
        pstate = {'global': dict(), 'local': dict()}
        b = 0
        try:
            for n in self.nlist:
                pl = len(n.get_dims())
                scr = n.compile_val(psel[b:(b + pl)])
                b += pl
                if not runseg(self.reporter, scr, pstate):
                    raise ScriptExit()
            self.lv = self.rfsearch.obj(self.reporter.metrics)
            self.rfsearch.update(self.nlist, psel)
            self.reporter.finalize(self.path, psel)
        except ScriptExit:
            self.lv = self.FAIL
        self.opt.tell(psel, self.lv)
        self.reporter.clear()
        if self.lv < self.minimum:
            self.minimum = self.lv
            return True
        return False
def bayesian_optimization(X, y, k_folds, random_state, model_space, model_creator, metric,
                          evaluation_handler, args_handler=None, post_evaluation_handler=None,
                          n_iterations=1000, extra_columns=[], n_parallel=1):
    dim_names, dims = model_space_to_dims(model_space)
    opt = Optimizer(dims, random_state=random_state)

    data = {column: [] for column in model_space.keys()}
    data['mean_score'] = []
    for i in range(k_folds):
        data['score%d' % i] = []
    for column in extra_columns:
        data[column] = []

    model_idx = 0
    for i in range(0, n_iterations, n_parallel):
        suggested = opt.ask(n_points=n_parallel)

        results = Parallel(n_jobs=n_parallel)(
            delayed(evaluate_single_point)(
                X, y, k_folds, random_state, point, model_creator,
                evaluation_handler, args_handler, dim_names, model_idx + parallel_offset)
            for point, parallel_offset in zip(suggested, range(n_parallel))
        )

        # save scores
        for model_args, model_mean_score, model_scores, results_model_idx, extra_values in results:
            for arg_name in data.keys():
                if arg_name in model_args:
                    data[arg_name].append(model_args[arg_name])

            for column, value in extra_values.items():
                data[column].append(value)

            data['mean_score'].append(model_mean_score)
            for i in range(k_folds):
                data['score%d' % i].append(model_scores[i])

            if post_evaluation_handler is not None:
                post_evaluation_handler(
                    results_model_idx, data, model_mean_score)

        opt.tell(suggested, [
            metric.rank(model_mean_score)
            for model_args, model_mean_score, model_scores, results_model_idx, extra_values in results])

        model_idx += n_parallel

    if post_evaluation_handler is not None:
        post_evaluation_handler(-model_idx, data, None)

    return pd.DataFrame(data=data)
Exemplo n.º 26
0
def run_bayopt(conf):
    '''
     Perform a bayesian optmization for the algorithms using
         --------
         conf: dict
             Configuration dictionary
     '''

    iterations = conf['optimize'][
        'iterations'] if 'optimize' in conf and 'iterations' in conf[
            'optimize'] else 100
    start = conf['optimize'][
        'iterations_skip'] if 'optimize' in conf and 'iterations_skip' in conf[
            'optimize'] else 0
    print('run opt with {} iterations starting at {}'.format(
        iterations, start))

    globals = {}
    globals['results'] = []

    for entry in conf['algorithms']:

        space_dict = generate_space(entry)

        # generate space for algorithm
        opt = Optimizer([values for k, values in space_dict.items()],
                        n_initial_points=conf['optimize']['initial_points']
                        if 'optimize' in conf
                        and 'initial_points' in conf['optimize'] else 10)

        for i in range(start, iterations):
            print('start bayesian test ', str(i))
            suggested = opt.ask()
            params = {k: v for k, v in zip(space_dict.keys(), suggested)}

            algo_instance = create_algorithm_dict(entry, params)

            run_bayopt_single(conf, algo_instance, i, globals)
            res = globals['current']
            opt.tell(suggested, -1 * res)

    global_results = {}
    for results in globals['results']:
        for key, value in results.items():
            global_results[key] = value

    write_results_csv(global_results, conf)
Exemplo n.º 27
0
def test_names_dimensions():
    # Define objective
    def objective(x, noise_level=0.1):
        return np.sin(5 * x[0]) * (1 - np.tanh(x[0] ** 2)) +\
               np.random.randn() * noise_level

    # Initialize Optimizer
    opt = Optimizer([(-2.0, 2.0)], n_initial_points=1)

    # Optimize
    for i in range(2):
        next_x = opt.ask()
        f_val = objective(next_x)
        res = opt.tell(next_x, f_val)

    # Plot results
    plots.plot_objective(res)
Exemplo n.º 28
0
class SkoptBackend(Backend):
    """The scikit-optimize backend uses scikit-optimize for black box optimization."""
    backend_name = "scikit-optimize"
    implemented_funcs = (
        "choice",
        "randrange",
        "uniform",
    )

    def __init__(self, examples, params, base_estimator="GP", **options):
        self.init_fallback_backend()

        if not params:
            self.current_values = {}
            return

        if isinstance(base_estimator, str):
            base_estimator = py_str(base_estimator)
        self.optimizer = Optimizer(create_dimensions(params), base_estimator,
                                   **options)

        if examples:
            self.tell_examples(examples, params)
        else:
            self.current_values = {}

    def tell_examples(self, new_examples, params):
        """Special method that allows fast updating of the backend with new examples."""
        data_points, losses = split_examples(new_examples, params)
        self.result = self.optimizer.tell(data_points, losses)

        current_point = self.optimizer.ask()
        self.current_values = make_values(params, current_point)

    @property
    def space(self):
        """The space over which optimization was performed."""
        return self.optimizer.space

    @property
    def model(self):
        """Get the most recently fit model."""
        return self.optimizer.models[-1]
Exemplo n.º 29
0
    def __init__(self, examples, params, base_estimator="gp", **options):
        self.init_fallback_backend()

        if not examples:
            self.current_values = {}
            return

        data_points, losses = split_examples(examples, params)
        dimensions = [
            create_dimension(name, func, *args)
            for name, (func, args, kwargs) in sorted_items(params)
        ]

        if isinstance(base_estimator, str):
            base_estimator = py_str(base_estimator)

        optimizer = Optimizer(dimensions, base_estimator, **options)
        optimizer.tell(data_points, losses)
        current_point = optimizer.ask()

        self.current_values = make_values(params, current_point)
    def get_next_candidate(self, n_points):
        """Returns the next candidates for the skopt acquisition function

        Args:
            n_points (int): Number of candidates desired

        Returns:
            List of points that would be chosen by gp_minimize as next candidate

        """
        # Negate y_values because skopt performs minimization instead of maximization
        y_values = [-y for y in self.y_values]
        optimizer = Optimizer(
            dimensions=self.search_space,
            base_estimator='gp',
            n_initial_points=len(self.x_values),
            acq_func='EI'
        )
        optimizer.tell(self.x_values, y_values)  # TODO Does this fit the values???
        points = optimizer.ask(n_points=n_points)
        return self._to_dict_list(points)
Exemplo n.º 31
0
class SkoptBackend(StandardBackend):
    """The scikit-optimize backend uses scikit-optimize for black box optimization."""
    backend_name = "scikit-optimize"
    implemented_funcs = ("choice", "randrange", "uniform")

    @override
    def setup_backend(self,
                      params,
                      base_estimator="GP",
                      n_initial_points=None,
                      **options):
        """Special method to initialize the backend from params."""
        self.params = params
        if isinstance(base_estimator, str):
            base_estimator = py_str(base_estimator)
        if n_initial_points is None:
            n_initial_points = guess_n_initial_points(params)
        self.optimizer = Optimizer(create_dimensions(params),
                                   base_estimator,
                                   n_initial_points=n_initial_points,
                                   **options)

    @override
    def tell_examples(self, new_examples):
        """Special method that allows fast updating of the backend with new examples."""
        data_points, losses = split_examples(new_examples, self.params)
        self.result = self.optimizer.tell(data_points, losses)

        current_point = self.optimizer.ask()
        self.current_values = make_values(self.params, current_point)

    @property
    def space(self):
        """The space over which optimization was performed."""
        return self.optimizer.space

    @property
    def model(self):
        """Get the most recently fit model."""
        return self.optimizer.models[-1]
Exemplo n.º 32
0
def skopt_main():
    from skopt import Optimizer, dump, load, Space
    from skopt.learning import GaussianProcessRegressor
    from skopt.space import Real, Integer
    fname = 'optimizer-exp-pendulum-4.pkl'
    dims = [Integer(15, 500), Real(0.025, 0.1, prior="log-uniform")]
    try:
        optimizer = load(fname)
        optimizer.space = Space(dims)
    except:
        optimizer = Optimizer(dimensions=dims, random_state=1)
    n_jobs = 2
    for i in range(3):
        pool = Pool(n_jobs, initializer=mute)
        x = optimizer.ask(n_points=n_jobs)  # x is a list of n_points points
        print(x)
        y = pool.map(f, x)
        pool.close()
        optimizer.tell(x, y)
        print('Iteration %d. Best yi %.2f' % (i, min(optimizer.yi)))

    dump(optimizer, fname)
Exemplo n.º 33
0
def submit(n, optimizer: Optimizer, opt_param_names, current_configs,
           param_space: ParamSpace, queue: Queue):
    """ Generate and submit n new configurations to a queue.

    Asks the optimizer for n new values to explore, creates configurations for those points and puts them
    in the given queue.

    Args:
        n: the number of configurations to be generated
        optimizer: the optimiser object from skopt with the model used for the suggested points to explore
        opt_param_names: the names for the parameters using the same order of the dimensions in the optimizer
        current_configs: current list of configurations (updated with the newly generated ones)
        param_space: parameter space which we can use to convert optimizer points to fully specified configurations
        queue: que multiprocessing queue in which we put the new configurations
    """
    dims = opt_param_names
    xs = optimizer.ask(n_points=n)
    cfgs = [values_to_params(dict(zip(dims, x)), param_space) for x in xs]
    for i, c in enumerate(cfgs):
        c["id"] = i + len(current_configs)
        queue.put(c)
    current_configs += cfgs
Exemplo n.º 34
0
class BayesianOptimizedExperimentQueue(ExperimentQueue):
    def __init__(self, dimensions_file: str, min_num_results_to_fit: int=8, lease_timout='2 days'):
        self.__all_experiments = pd.DataFrame()
        self.__all_experiments['status'] = [self.WAITING] * len(self.__all_experiments)
        self.__all_experiments['last_update'] = pd.Series(pd.Timestamp(float('NaN')))
        self.__all_experiments['client'] = [""] * len(self.__all_experiments)

        self.__lease_duration = pd.to_timedelta(lease_timout)
        self.__leased_experiments = []

        dims = self.__load_dimensions(dimensions_file)
        self.__dimension_names = list(dims.keys())
        self.__dimensions = list(dims.values())
        self.__min_num_results_to_fit = min_num_results_to_fit

        # Initialize

        dim_types = [check_dimension(d) for d in self.__dimensions]
        is_cat = all([isinstance(check_dimension(d), Categorical) for d in dim_types])
        if is_cat:
            transformed_dims = [check_dimension(d, transform="identity") for d in self.__dimensions]
        else:
            transformed_dims = []
            for dim_type, dim in zip(dim_types, self.__dimensions):
                if isinstance(dim_type, Categorical):
                    transformed_dims.append(check_dimension(dim, transform="onehot"))
                # To make sure that GP operates in the [0, 1] space
                else:
                    transformed_dims.append(check_dimension(dim, transform="normalize"))

        space = Space(transformed_dims)
        # Default GP
        cov_amplitude = ConstantKernel(1.0, (0.01, 1000.0))

        if is_cat:
            other_kernel = HammingKernel(length_scale=np.ones(space.transformed_n_dims))
            acq_optimizer = "lbfgs"
        else:
            other_kernel = Matern(
                length_scale=np.ones(space.transformed_n_dims),
                length_scale_bounds=[(0.01, 100)] * space.transformed_n_dims,
                nu=2.5)

        base_estimator = GaussianProcessRegressor(
            kernel=cov_amplitude * other_kernel,
            normalize_y=True, random_state=None, alpha=0.0, noise='gaussian',
            n_restarts_optimizer=2)

        self.__opt = Optimizer(self.__dimensions, base_estimator, acq_optimizer="lbfgs",
                               n_random_starts=100, acq_optimizer_kwargs=dict(n_points=10000))

    @property
    def all_experiments(self) -> pd.DataFrame:
        """
        :return: The PandasFrame containing the details for all the experiments in the queue.
        """
        return self.__all_experiments

    @property
    def completed_percent(self) -> float:
        return 0.

    @property
    def leased_percent(self) -> float:
        return 0

    @property
    def experiment_parameters(self) -> List:
        return self.__dimension_names

    def lease_new(self, client_name: str) -> Tuple[int, Dict]:
        """
        Lease a new experiment lock. Select first any waiting experiments and then re-lease expired ones
        :param client_name: The name of the leasing client
        :return: a tuple (id, parameters) or None if nothing is available
        """
        experiment_params = self.__opt.ask()
        if experiment_params in self.__leased_experiments:
            experiment_params = self.__compute_alternative_params()
        self.__leased_experiments.append(experiment_params)
        # TODO: Add to all experiments, use Ids

        def parse_dim_val(value, dim_type):
            if type(dim_type) is Real:
                return float(value)
            elif type(dim_type) is Integer:
                return int(value)
            return value
        return {name: parse_dim_val(value, dim_type) for name, dim_type, value in zip(self.__dimension_names, self.__dimensions, experiment_params)}, -1

    def __compute_alternative_params(self):
        # Copied directly from skopt
        transformed_bounds = np.array(self.__opt.space.transformed_bounds)
        est = clone(self.__opt.base_estimator)

        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            est.fit(self.__opt.space.transform(self.__opt.Xi), self.__opt.yi)

        X = self.__opt.space.transform(self.__opt.space.rvs(
            n_samples=self.__opt.n_points, random_state=self.__opt.rng))

        values = _gaussian_acquisition(X=X, model=est, y_opt=np.min(self.__opt.yi),
                                       acq_func='EI',
                                       acq_func_kwargs=dict(n_points=10000))

        print('original point ei: %s' % np.min(values))
        discount_width = .5
        values = self.__discount_leased_params(X, values, discount_width)
        while np.min(values) > -1e-5 and discount_width > 1e-2:
            discount_width *= .9
            values = _gaussian_acquisition(X=X, model=est, y_opt=np.min(self.__opt.yi),
                                           acq_func='EI',
                                           acq_func_kwargs=dict(n_points=10000))
            values = self.__discount_leased_params(X, values, discount_width)
        next_x = X[np.argmin(values)]
        print('new point ei: %s' % np.min(values))

        if not self.__opt.space.is_categorical:
            next_x = np.clip(next_x, transformed_bounds[:, 0], transformed_bounds[:, 1])

        return self.__opt.space.inverse_transform(next_x.reshape((1, -1)))[0]

    @staticmethod
    def leased_discount(center, width, x_values):
        """Triangular (cone) discount"""
        distance_from_center = np.linalg.norm(x_values - center, 2, axis=1)
        discount = -distance_from_center / width + 1
        discount[discount < 0] = 0
        return discount

    def __discount_leased_params(self, X, values, discount_width_size):
        transformed_leased_params = self.__opt.space.transform(np.array(self.__leased_experiments))
        discount_factor = reduce(lambda x, y: x * y,
                                 (self.leased_discount(p, discount_width_size, X) for p in self.__leased_experiments),
                                 np.ones(values.shape[0]))
        out_vals = values * (1. - discount_factor)
        return out_vals

    def complete(self, experiment_id: int, parameters: Dict, client: str, result: float = 0) -> None:
        """
        Declare an experiment to be completed.
        :param experiment_id: the id of the experiment or -1 if unknown
        :param client: the client id
        :param result: the output results of the experiment. This may be used in optimizing queues.
        """
        parameters = [parameters[n] for n in self.__dimension_names]
        if parameters in self.__leased_experiments:
            self.__leased_experiments.remove(parameters)
        do_fit_model = len(self.__opt.yi) >= self.__min_num_results_to_fit
        # Unfortunate hack: this depends on the internals.
        if do_fit_model:
            self.__opt._n_random_starts = 0  # Since we have adequately many results, stop using random
        self.__opt.tell(parameters, result, fit=do_fit_model)

    def __load_dimensions(self, dimensions_file:str)->Dict:
        with open(dimensions_file) as f:
            dimensions = json.load(f)

        def parse_dimension(specs: Dict[str, Any]):
            if specs['type'] == 'Real':
                return specs['name'], Real(specs['low'], specs['high'])
            elif specs['type'] == 'Integer':
                return specs['name'], Integer(specs['low'], specs['high'])
            elif specs['type'] == 'Categorical':
                return specs['name'], Categorical(specs['categories'])
            else:
                raise Exception('Unrecognized dimension type %s' % specs['type'])

        return OrderedDict([parse_dimension(d) for d in dimensions])
Exemplo n.º 35
0
class SkOptOptimizer(PhotonBaseOptimizer):
    def __init__(
        self,
        n_configurations: int = 20,
        acq_func: str = "gp_hedge",
        acq_func_kwargs: dict = None,
    ):
        self.optimizer = None
        self.hyperparameter_list = []
        self.metric_to_optimize = ""
        self.ask = self.ask_generator()
        self.n_configurations = n_configurations
        self.acq_func = acq_func
        self.acq_func_kwargs = acq_func_kwargs
        self.maximize_metric = True
        self.constant_dictionary = {}

    def prepare(self, pipeline_elements: list, maximize_metric: bool):

        self.hyperparameter_list = []
        self.maximize_metric = maximize_metric
        # build space
        space = []
        for pipe_element in pipeline_elements:
            if hasattr(pipe_element, "hyperparameters"):
                for name, value in pipe_element.hyperparameters.items():
                    # if we only have one value we do not need to optimize
                    if isinstance(value, list) and len(value) < 2:
                        self.constant_dictionary[name] = value[0]
                        continue
                    if isinstance(value,
                                  PhotonCategorical) and len(value.values) < 2:
                        self.constant_dictionary[name] = value.values[0]
                        continue
                    skopt_param = self._convert_PHOTON_to_skopt_space(
                        value, name)
                    if skopt_param is not None:
                        space.append(skopt_param)
        if len(space) == 0:
            logger.warn(
                "Did not find any hyperparameters to convert into skopt space")
            self.optimizer = None
        else:
            self.optimizer = Optimizer(
                space,
                "ET",
                acq_func=self.acq_func,
                acq_func_kwargs=self.acq_func_kwargs,
            )
        self.ask = self.ask_generator()

    def _convert_PHOTON_to_skopt_space(self, hyperparam: object, name: str):
        if not hyperparam:
            return None
        self.hyperparameter_list.append(name)
        if isinstance(hyperparam, PhotonCategorical):
            return skoptCategorical(hyperparam.values, name=name)
        elif isinstance(hyperparam, list):
            return skoptCategorical(hyperparam, name=name)
        elif isinstance(hyperparam, FloatRange):
            if hyperparam.range_type == "linspace":
                return Real(hyperparam.start,
                            hyperparam.stop,
                            name=name,
                            prior="uniform")
            elif hyperparam.range_type == "logspace":
                return Real(hyperparam.start,
                            hyperparam.stop,
                            name=name,
                            prior="log-uniform")
            else:
                return Real(hyperparam.start, hyperparam.stop, name=name)
        elif isinstance(hyperparam, IntegerRange):
            return Integer(hyperparam.start, hyperparam.stop, name=name)

    def ask_generator(self):
        if self.optimizer is None:
            yield {}
        else:
            for i in range(self.n_configurations):
                next_config_list = self.optimizer.ask()
                next_config_dict = {
                    self.hyperparameter_list[number]:
                    self._convert_to_native(value)
                    for number, value in enumerate(next_config_list)
                }
                yield next_config_dict

    def _convert_to_native(self, obj):
        # check if we have a numpy object, if so convert it to python native
        if type(obj).__module__ == np.__name__:
            return np.asscalar(obj)
        else:
            return obj

    def tell(self, config, performance):
        # convert dictionary to list in correct order
        if self.optimizer is not None:
            config_values = [config[name] for name in self.hyperparameter_list]
            best_config_metric_performance = performance[1]
            if self.maximize_metric:
                if isinstance(best_config_metric_performance, list):
                    print("BEST CONFIG METRIC PERFORMANCE: " +
                          str(best_config_metric_performance))
                    best_config_metric_performance = best_config_metric_performance[
                        0]
                best_config_metric_performance = -best_config_metric_performance
            # random_accuracy = np.random.randn(1)[0]
            self.optimizer.tell(config_values, best_config_metric_performance)

    def plot_evaluations(self):
        results = SkoptResults()
        results.space = self.optimizer.space
        results.x_iters = self.optimizer.Xi
        results = self._convert_categorical_hyperparameters(results)
        results.x = results.x_iters[np.argmin(self.optimizer.yi)]
        plt.figure(figsize=(10, 10))
        return plot_evaluations(results)

    def plot_objective(self):
        results = SkoptResults()
        results.space = self.optimizer.space
        results.x_iters = self.optimizer.Xi
        results = self._convert_categorical_hyperparameters(results)
        results.x = results.x_iters[np.argmin(self.optimizer.yi)]
        results.models = self.optimizer.models
        plt.figure(figsize=(10, 10))
        return plot_objective(results)

    def _convert_categorical_hyperparameters(self, results):
        parameter_types = list()

        for i, dim in enumerate(results.space.dimensions):
            if isinstance(dim, skoptCategorical):
                parameter_types.append(dim.transformer)
                setattr(results.space.dimensions[i], "categories",
                        dim.transformed_bounds)
            else:
                parameter_types.append(False)

        for i, xs in enumerate(results.x_iters):
            for k, xsk in enumerate(xs):
                if parameter_types[k]:
                    results.x_iters[i][k] = parameter_types[k].transform([xsk])
        return results