Пример #1
0
class HyperoptBackend(StandardBackend):
    """The hyperopt backend uses hyperopt for black box optimization."""
    backend_name = "hyperopt"
    implemented_funcs = ("choice", "randrange", "uniform", "normalvariate")

    @override
    def setup_backend(self,
                      params,
                      algo=tpe.suggest,
                      rstate=np.random.RandomState(),
                      show_progressbar=False,
                      **options):
        """Special method to initialize the backend from params."""
        self.params = params

        space = (as_apply)(dict(
            ((name), (create_space(name, func, *args)))
            for name, (func, args, kwargs) in sorted_items(params)))

        domain = Domain(self.set_current_values, space)

        self.trials = Trials()

        self.fmin_iter = FMinIter(algo,
                                  domain,
                                  self.trials,
                                  rstate,
                                  show_progressbar=show_progressbar,
                                  **options)

    @override
    def tell_examples(self, new_examples):
        """Special method that allows fast updating of the backend with new examples."""
        trial_list = examples_to_trials(new_examples, self.params)
        self.trials.insert_trial_docs(trial_list)
        self.trials.refresh()

        # run one iteration of hyperparameter optimization, with values saved
        #  to the self.set_current_values callback passed to Domain
        next(self.fmin_iter)

        assert self.current_values is not None, self.current_values
        assert set(self.current_values.keys()) == set(
            self.params), self.current_values

    def set_current_values(self, values):
        """Callback to set the values for this run."""
        assert isinstance(values, dict), values
        self.current_values = values
        return {"status": STATUS_RUNNING}
Пример #2
0
def nu_simple_fmin(hpo_project_key, objective, rseed=1337, full_model_string=None, notebook_name=None, verbose=True, stack=3, keep_temp=False, data_args=None):
    
    # db에서 가져오기
    db_info = asyncio.run(Requests().get_action(parameter1 = hpo_project_key, parameter2 = "null", url = hpo_url))[0]
    hpo_project_id = db_info["hpoProjectId"]
    algo, space = __transform_db_to_function(method = db_info["method"], config = db_info["config"])

    trials = Trials()
    best = fmin(objective, space, algo=algo, max_evals=50, trials=trials, rstate=np.random.RandomState(rseed), return_argmin=True)
    importances = calculate_importance(trials)
 
    # 저장 api
    all_info = dict()
    
    all_info["best_result"] = trials.best_trial['result'] 
    all_info["best_hp"] = best
    all_info["trial_result"] = trials.results
    all_info["trial_hp"] = trials.vals
    # json int64 때문에 작업
    all_info = __to_int(all_info)

    all_info["hpo_project_key"] = hpo_project_key

    tmp_importance = list()
    for i in range(len(importances)):
        for key1, value1 in importances[i].items():
            for key2, value2 in value1.items(): 
                tmp_importance.append(value2)
                break
    all_info["importances"] = tmp_importance

    asyncio.run(Requests().post_action(request_datas = all_info, url = hpo_url))

    return best, trials
Пример #3
0
    def setup_backend(self, params, algo=tpe.suggest, rstate=None, show_progressbar=False, **options):
        """Special method to initialize the backend from params."""
        if rstate is None:
            try:
                rstate = np.random.default_rng()
            except AttributeError:
                rstate = np.random.RandomState()
        self.params = params

        space = (as_apply)(dict(((name), (create_space(name, func, *args))) for name, (func, args, kwargs) in sorted_items(params)))

        domain = Domain(self.set_current_values, space)

        self.trials = Trials()

        self.fmin_iter = FMinIter(algo, domain, self.trials, rstate, show_progressbar=show_progressbar, **options)
Пример #4
0
    def foo(self):
        self.bandit = bandit = Bandit(self.expr)
        self.algo = algo = Random(bandit)
        if hasattr(self, 'n_randints'):
            n_randints = len(filter(
                lambda x: x.name == 'randint',
                algo.vh.params.values()))
            assert n_randints == self.n_randints

        self.trials = Trials()
        self.experiment = Experiment(self.trials, algo, async=False)
        self.experiment.run(5)
        self.output = output = []
        for trial in self.trials._trials:
            print ''
            tmp = []
            for nid in trial['misc']['idxs']:
                thing = (
                        nid,
                        trial['misc']['idxs'][nid],
                        trial['misc']['vals'][nid])
                print thing
                tmp.append(thing)
            tmp.sort()
            output.append(tmp)
        print repr(output)
        print repr(self.wanted)
        # -- think of a more robust way to test these things
        #    or, if the sampling style is to be nailed down,
        #    put it in and be sure of it.
        raise nose.SkipTest()
        assert output == self.wanted
Пример #5
0
def main():
    search_space = {}
    search_space["problem_size"] = hp.quniform("problem_size", 1, 64, 1)
    search_space["num_ranks"] = hp.quniform("num_ranks", 1, 8, 1)

    trials = Trials()

    best = fmin(fn=run_hpcg,
                space=search_space,
                algo=tpe.suggest, 
                max_evals = 50,
                trials=trials)

    print("------------------------------------------------")

    for trial in trials:
        print("{1}x({2}, {3}, {4}) = {0} GF".format(-1.0*trial["result"]["loss"],
                                                    trial["misc"]["vals"]["num_ranks"][0],
                                                    trial["misc"]["vals"]["problem_size"][0],
                                                    trial["misc"]["vals"]["problem_size"][0],
                                                    trial["misc"]["vals"]["problem_size"][0]))

    print("Saving pkl....")
    with open('trials.pkl', 'wb') as output:
        pickle.dump(trials, output)
    print("... done")
Пример #6
0
    def __init__(self, api_config):
        """Build wrapper class to use an optimizer in benchmark.

        Parameters
        ----------
        api_config : dict-like of dict-like
            Configuration of the optimization variables. See API description.
        """
        AbstractOptimizer.__init__(self, api_config)

        self.space_x = JointSpace(api_config)
        self.bounds = self.space_x.get_bounds()
        self.create_opt_prob(
        )  # Sets up the optimization problem (needs self.bounds)
        self.max_evals = np.iinfo(np.int32).max  # NOTE: Largest possible int
        self.turbo_batch_size = None
        self.pysot_batch_size = None
        self.history = []
        self.proposals = []
        self.lb, self.ub = self.bounds[:, 0], self.bounds[:, 1]
        self.dim = len(self.bounds)

        self.turbo = Turbo1(
            f=None,
            lb=self.bounds[:, 0],
            ub=self.bounds[:, 1],
            n_init=2 * self.dim + 1,
            max_evals=self.max_evals,
            batch_size=4,  # We need to update this later
            verbose=False,
        )

        # hyperopt
        self.random = np_random

        space, self.round_to_values = tuSOTOptimizer.get_hyperopt_dimensions(
            api_config)
        self.domain = Domain(dummy_f, space, pass_expr_memo_ctrl=None)
        self.trials = Trials()

        # Some book keeping like opentuner wrapper
        self.trial_id_lookup = {}

        # Store just for data validation
        self.param_set_chk = frozenset(api_config.keys())
Пример #7
0
    def __init__(self,
                 est,
                 X,
                 y,
                 params=None,
                 iters=500,
                 time_to_search=None,
                 cv=5,
                 cv_times=1,
                 scorer="f1",
                 verbose=1,
                 random=False,
                 foldtype="Kfold"):

        # check and get skiperopt style parameters
        if params == None and not hasattr(est, "param_grid"):
            raise ValueError("No parameters supplied")
        else:
            params = est.param_grid

        self.params = params
        self.best_params = None
        self.__space = create_hyper(params)
        self.__initparams = est.get_params()
        # set hyper settings
        self.__algo = tpe.suggest
        self.__trial = Trials()

        # set run settings
        self.verbose = verbose
        self.iters = iters
        self.cv = cv
        self.cv_times = cv_times
        self.scorer = scorer
        self.__run = 0

        self.est = est

        self.stats = {}
        self.__init_score = cross_validation(self.est,
                                             X,
                                             y,
                                             cv=self.cv,
                                             cv_times=self.cv_times,
                                             scorer=self.scorer)
        self.best_score = self.__init_score

        self.__X = X
        self.__y = y

        self.__start_now = None

        self.__runok = True
        self.time_to_search = time_to_search

        self.random = random
        self.foldtype = foldtype
Пример #8
0
    def __init__(self, api_config, random=np_random):
        """Build wrapper class to use hyperopt optimizer in benchmark.

        Parameters
        ----------
        api_config : dict-like of dict-like
            Configuration of the optimization variables. See API description.
        """
        AbstractOptimizer.__init__(self, api_config)
        self.random = random

        space, self.round_to_values = HyperoptOptimizer.get_hyperopt_dimensions(api_config)
        self.domain = Domain(dummy_f, space, pass_expr_memo_ctrl=None)
        self.trials = Trials()

        # Some book keeping like opentuner wrapper
        self.trial_id_lookup = {}

        # Store just for data validation
        self.param_set_chk = frozenset(api_config.keys())
Пример #9
0
    def test_seeding(self):
        # -- assert that the seeding works a particular way

        domain = coin_flip()
        docs = rand.suggest(list(range(10)), domain, Trials(), seed=123)
        trials = trials_from_docs(docs)
        idxs, vals = miscs_to_idxs_vals(trials.miscs)

        # Passes Nov 8 / 2013
        self.assertEqual(list(idxs["flip"]), list(range(10)))
        self.assertEqual(list(vals["flip"]), [0, 1, 0, 0, 0, 0, 0, 1, 1, 0])
Пример #10
0
 def test_suggest_1(self):
     print 'EXPR', self.bandit.expr
     docs = self.algo.suggest([0], Trials())
     assert len(docs) == 1
     print 'DOCS', docs
     # -- assert validity of docs
     trials = trials_from_docs(docs)
     print 'TRIALS', trials
     assert docs[0]['misc']['idxs']['flip'] == [0]
     idxs, vals = miscs_to_idxs_vals(trials.miscs)
     assert idxs['flip'] == [0]
Пример #11
0
 def test_arbitrary_range(self):
     new_ids = [-2, 0, 7, 'a', '007']
     docs = self.algo.suggest(new_ids, Trials())
     # -- assert validity of docs
     trials = trials_from_docs(docs)
     idxs, vals = miscs_to_idxs_vals(trials.miscs)
     assert len(docs) == 5
     assert len(idxs) == 1
     assert len(vals) == 1
     print vals
     assert idxs['flip'] == new_ids
     assert np.all(vals['flip'] == [0, 1, 0, 1, 1])
Пример #12
0
def test_failure():

    #XXX also test the Bandit.exceptions mechanism that actually catches them
    class BanditE(Exception):
        pass

    class DummyBandit(Bandit):
        param_gen = {"loss": 10}
        def __init__(self):
            super(DummyBandit, self).__init__(self.param_gen)

        def evaluate(self, config, ctrl):
            raise BanditE()

    trials = Trials()
    bandit_algo = Random(DummyBandit())
    exp = Experiment(trials, bandit_algo, async=False)

    exp.run(1)
    trials.refresh()
    assert len(trials) == 0
    assert len(trials._dynamic_trials) == 1
    assert trials._dynamic_trials[0]['state'] == JOB_STATE_ERROR
    assert trials._dynamic_trials[0]['misc']['error'] != None

    exp.catch_bandit_exceptions = False
    nose.tools.assert_raises(BanditE, exp.run, 1)
    trials.refresh()
    assert len(trials) == 0
    assert len(trials._dynamic_trials) == 2
    assert trials._dynamic_trials[1]['state'] == JOB_STATE_ERROR
    assert trials._dynamic_trials[1]['misc']['error'] != None
Пример #13
0
 def test_suggest_5(self):
     docs = self.algo.suggest(range(5), Trials())
     print docs
     assert len(docs) == 5
     # -- assert validity of docs
     trials = trials_from_docs(docs)
     idxs, vals = miscs_to_idxs_vals(trials.miscs)
     print idxs
     print vals
     assert len(idxs) == 1
     assert len(vals) == 1
     assert idxs['flip'] == range(5)
     assert np.all(vals['flip'] == [1, 1, 0, 1, 0])
Пример #14
0
    def __init__(self, examples, params, algo=tpe.suggest, rstate=np.random.RandomState(), show_progressbar=False, **options):
        self.init_fallback_backend()

        if not examples:
            self.current_values = {}
            return

        space = (as_apply)(dict(((name), (create_space(name, func, *args))) for name, (func, args, kwargs) in sorted_items(params)))

        domain = Domain(self.set_current_values, space)

        trial_list = examples_to_trials(examples, params)

        trials = Trials()
        trials.insert_trial_docs(trial_list)

# run one iteration of hyperparameter optimization, with values saved
#  to the self.set_current_values callback passed to Domain
        (next)(FMinIter(algo, domain, trials, rstate, show_progressbar=show_progressbar, **options))

        assert self.current_values is not None, self.current_values
        assert set(self.current_values.keys()) == set(params), self.current_values
Пример #15
0
def main():

    space = {
        'ltr':
        hp.choice('ltr', [True]),
        'shuffle':
        hp.choice('shuffle', [False]),
        'num_leaves':
        hp.choice('num_leaves', list(np.arange(8, 256, 2, dtype=int))),
        'max_depth':
        hp.choice('max_depth', list(np.arange(4, 64, 2, dtype=int))),
        'max_bin':
        hp.choice('max_bin', list(np.arange(255, 255 * 4, 5, dtype=int))),
        'min_data_in_leaf':
        hp.choice('min_data_in_leaf', list(np.arange(5, 100, 5, dtype=int))),
        'learning_rate':
        hp.uniform('learning_rate', 0.01, 0.3),
        'bagging_fraction':
        hp.uniform('bagging_fraction', 0.2, 1.0),
        'feature_fraction':
        hp.uniform('feature_fraction', 0.2, 1.0),
        'early_stopping':
        hp.uniform('test_size', 100, 1000),
    }

    trials_step = 1  # how many additional trials to do after loading saved trials. 1 = save after iteration
    max_trials = 1  # initial max_trials. put something small to not have to wait

    try:  # try to load an already saved trials object, and increase the max
        trials = pickle.load(
            open(BASE_PATH + SET + TRAILKEY + '.hyperopt', "rb"))
        print("Found saved Trials! Loading...")
        max_trials = len(trials.trials) + trials_step
        print("Rerunning from {} trials to {} (+{}) trials".format(
            len(trials.trials), max_trials, trials_step))
    except:  # create a new trials object and start searching
        trials = Trials()

    best = fmin(fn=objective,
                space=space,
                algo=tpe.suggest,
                trials=trials,
                max_evals=max_trials)

    print("Best:", best)
    print("Num:", max_trials)

    # save the trials object
    with open(BASE_PATH + SET + TRAILKEY + ".hyperopt", "wb") as f:
        pickle.dump(trials, f)
Пример #16
0
    def test_arbitrary_range(self, N=10):
        assert N <= 10
        new_ids = [-2, 0, 7, 'a', '007', 66, 'a3', '899', 23, 2333][:N]
        docs = self.algo.suggest(new_ids, Trials())
        # -- assert validity of docs
        trials = trials_from_docs(docs)
        idxs, vals = miscs_to_idxs_vals(trials.miscs)
        assert len(docs) == N
        assert len(idxs) == 1
        assert len(vals) == 1
        print vals
        assert idxs['flip'] == new_ids

        # -- assert that the random seed matches that of Jan 8/2013
        assert np.all(vals['flip'] == [0, 1, 0, 0, 0, 0, 0, 1, 1, 0][:N])
Пример #17
0
 def test_suggest_N(self, N=10):
     assert N <= 10
     docs = self.algo.suggest(range(N), Trials())
     print 'docs', docs
     assert len(docs) == N
     # -- assert validity of docs
     trials = trials_from_docs(docs)
     idxs, vals = miscs_to_idxs_vals(trials.miscs)
     print 'idxs', idxs
     print 'vals', vals
     assert len(idxs) == 1
     assert len(vals) == 1
     assert idxs['flip'] == range(N)
     # -- only works when N == 5
     assert np.all(vals['flip'] == [0, 1, 0, 0, 0, 0,  0, 1, 1, 0][:N])
Пример #18
0
class HyperoptOptimizer(AbstractOptimizer):
    primary_import = "hyperopt"

    def __init__(self, api_config, random=np_random):
        """Build wrapper class to use hyperopt optimizer in benchmark.

        Parameters
        ----------
        api_config : dict-like of dict-like
            Configuration of the optimization variables. See API description.
        """
        AbstractOptimizer.__init__(self, api_config)
        self.random = random

        space, self.round_to_values = HyperoptOptimizer.get_hyperopt_dimensions(
            api_config)
        self.domain = Domain(dummy_f, space, pass_expr_memo_ctrl=None)
        self.trials = Trials()

        # Some book keeping like opentuner wrapper
        self.trial_id_lookup = {}

        # Store just for data validation
        self.param_set_chk = frozenset(api_config.keys())

    @staticmethod
    def hashable_dict(d):
        """A custom function for hashing dictionaries.

        Parameters
        ----------
        d : dict or dict-like
            The dictionary to be converted to immutable/hashable type.

        Returns
        -------
        hashable_object : frozenset of tuple pairs
            Bijective equivalent to dict that can be hashed.
        """
        hashable_object = frozenset(d.items())
        return hashable_object

    @staticmethod
    def get_hyperopt_dimensions(api_config):
        """Help routine to setup hyperopt search space in constructor.

        Take api_config as argument so this can be static.
        """
        # The ordering of iteration prob makes no difference, but just to be
        # safe and consistnent with space.py, I will make sorted.
        param_list = sorted(api_config.keys())

        space = {}
        round_to_values = {}
        for param_name in param_list:
            param_config = api_config[param_name]

            param_type = param_config["type"]

            param_space = param_config.get("space", None)
            param_range = param_config.get("range", None)
            param_values = param_config.get("values", None)

            # Some setup for case that whitelist of values is provided:
            values_only_type = param_type in ("cat", "ordinal")
            if (param_values is not None) and (not values_only_type):
                assert param_range is None
                param_values = np.unique(param_values)
                param_range = (param_values[0], param_values[-1])
                round_to_values[param_name] = interp1d(
                    param_values,
                    param_values,
                    kind="nearest",
                    fill_value="extrapolate")

            if param_type == "int":
                low, high = param_range
                if param_space in ("log", "logit"):
                    space[param_name] = hp.qloguniform(param_name, np.log(low),
                                                       np.log(high), 1)
                else:
                    space[param_name] = hp.quniform(param_name, low, high, 1)
            elif param_type == "bool":
                assert param_range is None
                assert param_values is None
                space[param_name] = hp.choice(param_name, (False, True))
            elif param_type in ("cat", "ordinal"):
                assert param_range is None
                space[param_name] = hp.choice(param_name, param_values)
            elif param_type == "real":
                low, high = param_range
                if param_space in ("log", "logit"):
                    space[param_name] = hp.loguniform(param_name, np.log(low),
                                                      np.log(high))
                else:
                    space[param_name] = hp.uniform(param_name, low, high)
            else:
                assert False, "type %s not handled in API" % param_type

        return space, round_to_values

    def get_trial(self, trial_id):
        for trial in self.trials._dynamic_trials:
            if trial["tid"] == trial_id:
                assert isinstance(trial, dict)
                # Make sure right kind of dict
                assert "state" in trial and "result" in trial
                assert trial["state"] == JOB_STATE_NEW
                return trial
        assert False, "No matching trial ID"

    def cleanup_guess(self, x_guess):
        assert isinstance(x_guess, dict)
        # Also, check the keys are only the vars we are searching over:
        assert frozenset(x_guess.keys()) == self.param_set_chk

        # Do the rounding
        # Make a copy to be safe, and also unpack singletons
        # We may also need to consider clip_chk at some point like opentuner
        x_guess = {k: only(x_guess[k]) for k in x_guess}
        for param_name, round_f in self.round_to_values.items():
            x_guess[param_name] = round_f(x_guess[param_name])
        # Also ensure this is correct dtype so sklearn is happy
        x_guess = {
            k: DTYPE_MAP[self.api_config[k]["type"]](x_guess[k])
            for k in x_guess
        }
        return x_guess

    def _suggest(self):
        """Helper function to `suggest` that does the work of calling
        `hyperopt` via its dumb API.
        """
        new_ids = self.trials.new_trial_ids(1)
        assert len(new_ids) == 1
        self.trials.refresh()

        seed = random_seed(self.random)
        new_trials = tpe.suggest(new_ids, self.domain, self.trials, seed)
        assert len(new_trials) == 1

        self.trials.insert_trial_docs(new_trials)
        self.trials.refresh()

        new_trial, = new_trials  # extract singleton
        return new_trial

    def suggest(self, n_suggestions=1):
        """Make `n_suggestions` suggestions for what to evaluate next.

        This requires the user observe all previous suggestions before calling
        again.

        Parameters
        ----------
        n_suggestions : int
            The number of suggestions to return.

        Returns
        -------
        next_guess : list of dict
            List of `n_suggestions` suggestions to evaluate the objective
            function. Each suggestion is a dictionary where each key
            corresponds to a parameter being optimized.
        """
        assert n_suggestions >= 1, "invalid value for n_suggestions"

        # Get the new trials, it seems hyperopt either uses random search or
        # guesses one at a time anyway, so we might as welll call serially.
        new_trials = [self._suggest() for _ in range(n_suggestions)]

        X = []
        for trial in new_trials:
            x_guess = self.cleanup_guess(trial["misc"]["vals"])
            X.append(x_guess)

            # Build lookup to get original trial object
            x_guess_ = HyperoptOptimizer.hashable_dict(x_guess)
            assert x_guess_ not in self.trial_id_lookup, "the suggestions should not already be in the trial dict"
            self.trial_id_lookup[x_guess_] = trial["tid"]

        assert len(X) == n_suggestions
        return X

    def observe(self, X, y):
        """Feed the observations back to hyperopt.

        Parameters
        ----------
        X : list of dict-like
            Places where the objective function has already been evaluated.
            Each suggestion is a dictionary where each key corresponds to a
            parameter being optimized.
        y : array-like, shape (n,)
            Corresponding values where objective has been evaluated.
        """
        assert len(X) == len(y)

        for x_guess, y_ in zip(X, y):
            x_guess_ = HyperoptOptimizer.hashable_dict(x_guess)
            assert x_guess_ in self.trial_id_lookup, "Appears to be guess that did not originate from suggest"

            assert x_guess_ in self.trial_id_lookup, "trial object not available in trial dict"
            trial_id = self.trial_id_lookup.pop(x_guess_)
            trial = self.get_trial(trial_id)
            assert self.cleanup_guess(
                trial["misc"]["vals"]
            ) == x_guess, "trial ID not consistent with x values stored"

            # Cast to float to ensure native type
            result = {"loss": float(y_), "status": STATUS_OK}
            trial["state"] = JOB_STATE_DONE
            trial["result"] = result
        # hyperopt.fmin.FMinIter.serial_evaluate only does one refresh at end
        # of loop of a bunch of evals, so we will do the same thing here.
        self.trials.refresh()
Пример #19
0
 def setUp(self):
     self.trials = Trials()
Пример #20
0
 def setUp(self):
     self.bandit = coin_flip()
     self.algo = RandomStop(5, self.bandit)
     self.trials = Trials()
     self.experiment = Experiment(self.trials, self.algo, async=False)
Пример #21
0
class tuSOTOptimizer(AbstractOptimizer):
    primary_import = "pysot"

    def __init__(self, api_config):
        """Build wrapper class to use an optimizer in benchmark.

        Parameters
        ----------
        api_config : dict-like of dict-like
            Configuration of the optimization variables. See API description.
        """
        AbstractOptimizer.__init__(self, api_config)

        self.space_x = JointSpace(api_config)
        self.bounds = self.space_x.get_bounds()
        self.create_opt_prob(
        )  # Sets up the optimization problem (needs self.bounds)
        self.max_evals = np.iinfo(np.int32).max  # NOTE: Largest possible int
        self.turbo_batch_size = None
        self.pysot_batch_size = None
        self.history = []
        self.proposals = []
        self.lb, self.ub = self.bounds[:, 0], self.bounds[:, 1]
        self.dim = len(self.bounds)

        self.turbo = Turbo1(
            f=None,
            lb=self.bounds[:, 0],
            ub=self.bounds[:, 1],
            n_init=2 * self.dim + 1,
            max_evals=self.max_evals,
            batch_size=4,  # We need to update this later
            verbose=False,
        )

        # hyperopt
        self.random = np_random

        space, self.round_to_values = tuSOTOptimizer.get_hyperopt_dimensions(
            api_config)
        self.domain = Domain(dummy_f, space, pass_expr_memo_ctrl=None)
        self.trials = Trials()

        # Some book keeping like opentuner wrapper
        self.trial_id_lookup = {}

        # Store just for data validation
        self.param_set_chk = frozenset(api_config.keys())

    def restart(self):
        self.turbo._restart()
        self.turbo._X = np.zeros((0, self.turbo.dim))
        self.turbo._fX = np.zeros((0, 1))
        X_init = latin_hypercube(self.turbo.n_init, self.dim)
        self.X_init = from_unit_cube(X_init, self.lb, self.ub)

    def create_opt_prob(self):
        """Create an optimization problem object."""
        opt = OptimizationProblem()
        opt.lb = self.bounds[:, 0]  # In warped space
        opt.ub = self.bounds[:, 1]  # In warped space
        opt.dim = len(self.bounds)
        opt.cont_var = np.arange(len(self.bounds))
        opt.int_var = []
        assert len(opt.cont_var) + len(opt.int_var) == opt.dim
        opt.objfun = None
        self.opt = opt

    def start(self):
        """Starts a new pySOT run."""
        self.history = []
        self.proposals = []

        # Symmetric Latin hypercube design
        des_pts = max([self.pysot_batch_size, 2 * (self.opt.dim + 1)])
        slhd = SymmetricLatinHypercube(dim=self.opt.dim, num_pts=des_pts)

        # Warped RBF interpolant
        rbf = RBFInterpolant(dim=self.opt.dim,
                             kernel=CubicKernel(),
                             tail=LinearTail(self.opt.dim),
                             eta=1e-4)
        rbf = SurrogateUnitBox(rbf, lb=self.opt.lb, ub=self.opt.ub)

        # Optimization strategy
        self.strategy = SRBFStrategy(
            max_evals=self.max_evals,
            opt_prob=self.opt,
            exp_design=slhd,
            surrogate=rbf,
            asynchronous=True,
            batch_size=1,
            use_restarts=True,
        )

    @staticmethod
    def hashable_dict(d):
        hashable_object = frozenset(d.items())
        return hashable_object

    @staticmethod
    def get_hyperopt_dimensions(api_config):
        param_list = sorted(api_config.keys())

        space = {}
        round_to_values = {}
        for param_name in param_list:
            param_config = api_config[param_name]

            param_type = param_config["type"]

            param_space = param_config.get("space", None)
            param_range = param_config.get("range", None)
            param_values = param_config.get("values", None)

            # Some setup for case that whitelist of values is provided:
            values_only_type = param_type in ("cat", "ordinal")
            if (param_values is not None) and (not values_only_type):
                assert param_range is None
                param_values = np.unique(param_values)
                param_range = (param_values[0], param_values[-1])
                round_to_values[param_name] = interp1d(
                    param_values,
                    param_values,
                    kind="nearest",
                    fill_value="extrapolate")

            if param_type == "int":
                low, high = param_range
                if param_space in ("log", "logit"):
                    space[param_name] = hp.qloguniform(param_name, np.log(low),
                                                       np.log(high), 1)
                else:
                    space[param_name] = hp.quniform(param_name, low, high, 1)
            elif param_type == "bool":
                assert param_range is None
                assert param_values is None
                space[param_name] = hp.choice(param_name, (False, True))
            elif param_type in ("cat", "ordinal"):
                assert param_range is None
                space[param_name] = hp.choice(param_name, param_values)
            elif param_type == "real":
                low, high = param_range
                if param_space in ("log", "logit"):
                    space[param_name] = hp.loguniform(param_name, np.log(low),
                                                      np.log(high))
                else:
                    space[param_name] = hp.uniform(param_name, low, high)
            else:
                assert False, "type %s not handled in API" % param_type

        return space, round_to_values

    def get_trial(self, trial_id):
        for trial in self.trials._dynamic_trials:
            if trial["tid"] == trial_id:
                assert isinstance(trial, dict)
                # Make sure right kind of dict
                assert "state" in trial and "result" in trial
                assert trial["state"] == JOB_STATE_NEW
                return trial
        assert False, "No matching trial ID"

    def cleanup_guess(self, x_guess):
        assert isinstance(x_guess, dict)
        # Also, check the keys are only the vars we are searching over:
        assert frozenset(x_guess.keys()) == self.param_set_chk

        # Do the rounding
        # Make a copy to be safe, and also unpack singletons
        # We may also need to consider clip_chk at some point like opentuner
        x_guess = {k: only(x_guess[k]) for k in x_guess}
        for param_name, round_f in self.round_to_values.items():
            x_guess[param_name] = round_f(x_guess[param_name])
        # Also ensure this is correct dtype so sklearn is happy
        x_guess = {
            k: DTYPE_MAP[self.api_config[k]["type"]](x_guess[k])
            for k in x_guess
        }
        return x_guess

    def pysot_suggest(self, n_suggestions=1):
        if self.pysot_batch_size is None:  # First call to suggest
            self.pysot_batch_size = n_suggestions
            self.start()

        # Set the tolerances pretending like we are running batch
        d, p = float(self.opt.dim), float(n_suggestions)
        self.strategy.failtol = p * int(max(np.ceil(d / p), np.ceil(4 / p)))

        # Now we can make suggestions
        x_w = []
        self.proposals = []
        for _ in range(n_suggestions):
            proposal = self.strategy.propose_action()
            record = EvalRecord(proposal.args, status="pending")
            proposal.record = record
            proposal.accept()  # This triggers all the callbacks

            # It is possible that pySOT proposes a previously evaluated point
            # when all variables are integers, so we just abort in this case
            # since we have likely converged anyway. See PySOT issue #30.
            x = list(proposal.record.params)  # From tuple to list
            x_unwarped, = self.space_x.unwarp(x)
            if x_unwarped in self.history:
                warnings.warn("pySOT proposed the same point twice")
                self.start()
                return self.suggest(n_suggestions=n_suggestions)

            # NOTE: Append unwarped to avoid rounding issues
            self.history.append(copy(x_unwarped))
            self.proposals.append(proposal)
            x_w.append(copy(x_unwarped))

        return x_w

    def pysot_get_suggest(self, suggests):
        turbo_suggest_warps = self.space_x.warp(suggests)
        for i, warps in enumerate(turbo_suggest_warps):
            proposal = self.strategy.make_proposal(warps)
            proposal.add_callback(self.strategy.on_initial_proposal)
            record = EvalRecord(proposal.args, status="pending")
            proposal.record = record
            proposal.accept()

            self.history.append(copy(suggests[i]))
            self.proposals.append(proposal)

    def turbo_suggest(self, n_suggestions=1):
        if self.turbo_batch_size is None:  # Remember the batch size on the first call to suggest
            self.turbo_batch_size = n_suggestions
            self.turbo.batch_size = n_suggestions
            self.turbo.failtol = np.ceil(
                np.max([
                    4.0 / self.turbo_batch_size,
                    self.dim / self.turbo_batch_size
                ]))
            self.turbo.n_init = max([self.turbo.n_init, self.turbo_batch_size])
            self.restart()

        X_next = np.zeros((n_suggestions, self.dim))

        # Pick from the initial points
        n_init = min(len(self.X_init), n_suggestions)
        if n_init > 0:
            X_next[:n_init] = deepcopy(self.X_init[:n_init, :])
            self.X_init = self.X_init[
                n_init:, :]  # Remove these pending points

        # Get remaining points from TuRBO
        n_adapt = n_suggestions - n_init
        if n_adapt > 0:
            if len(self.turbo._X
                   ) > 0:  # Use random points if we can't fit a GP
                X = to_unit_cube(deepcopy(self.turbo._X), self.lb, self.ub)
                fX = copula_standardize(deepcopy(
                    self.turbo._fX).ravel())  # Use Copula
                X_cand, y_cand, _ = self.turbo._create_candidates(
                    X,
                    fX,
                    length=self.turbo.length,
                    n_training_steps=100,
                    hypers={})
                X_next[-n_adapt:, :] = self.turbo._select_candidates(
                    X_cand, y_cand)[:n_adapt, :]
                X_next[-n_adapt:, :] = from_unit_cube(X_next[-n_adapt:, :],
                                                      self.lb, self.ub)

        # Unwarp the suggestions
        suggestions = self.space_x.unwarp(X_next)
        return suggestions

    def _hyperopt_suggest(self):
        new_ids = self.trials.new_trial_ids(1)
        assert len(new_ids) == 1
        self.trials.refresh()

        seed = random_seed(self.random)
        new_trials = tpe.suggest(new_ids, self.domain, self.trials, seed)
        assert len(new_trials) == 1

        self.trials.insert_trial_docs(new_trials)
        self.trials.refresh()

        new_trial, = new_trials  # extract singleton
        return new_trial

    def _hyperopt_transform(self, x):
        new_id = self.trials.new_trial_ids(1)[0]

        domain = self.domain
        rng = np.random.RandomState(1)
        idxs, vals = pyll.rec_eval(domain.s_idxs_vals,
                                   memo={
                                       domain.s_new_ids: [new_id],
                                       domain.s_rng: rng,
                                   })
        rval_miscs = [dict(tid=new_id, cmd=domain.cmd, workdir=domain.workdir)]
        rval_results = domain.new_result()
        for (k, _) in vals.items():
            vals[k][0] = x[k]
        miscs_update_idxs_vals(rval_miscs, idxs, vals)
        rval_docs = self.trials.new_trial_docs([new_id], [None], rval_results,
                                               rval_miscs)

        return rval_docs[0]

    def hyperopt_suggest(self, n_suggestions=1):
        assert n_suggestions >= 1, "invalid value for n_suggestions"

        # Get the new trials, it seems hyperopt either uses random search or
        # guesses one at a time anyway, so we might as welll call serially.
        new_trials = [self._hyperopt_suggest() for _ in range(n_suggestions)]

        X = []
        for trial in new_trials:
            x_guess = self.cleanup_guess(trial["misc"]["vals"])
            X.append(x_guess)

            # Build lookup to get original trial object
            x_guess_ = tuSOTOptimizer.hashable_dict(x_guess)
            assert x_guess_ not in self.trial_id_lookup, "the suggestions should not already be in the trial dict"
            self.trial_id_lookup[x_guess_] = trial["tid"]

        assert len(X) == n_suggestions
        return X

    def hyperopt_get_suggest(self, suggests):
        trials = [self._hyperopt_transform(x) for x in suggests]
        for trial in trials:
            x_guess = self.cleanup_guess(trial["misc"]["vals"])
            x_guess_ = tuSOTOptimizer.hashable_dict(x_guess)
            assert x_guess_ not in self.trial_id_lookup, "the suggestions should not already be in the trial dict"
            self.trial_id_lookup[x_guess_] = trial["tid"]
        self.trials.insert_trial_docs(trials)
        self.trials.refresh()

    def suggest(self, n_suggestions=1):
        if n_suggestions == 1:
            return self.turbo_suggest(n_suggestions)
        else:
            t_suggestion = n_suggestions // 2
            # p_suggestion = int((n_suggestions - t_suggestion) * 3/4)
            h_suggestion = n_suggestions - t_suggestion
            turbo_suggest = self.turbo_suggest(t_suggestion)
            # pysot_suggest = self.pysot_suggest(p_suggestion)
            hyperopt_suggest = self.hyperopt_suggest(h_suggestion)
            self.hyperopt_get_suggest(turbo_suggest)
            # self.pysot_get_suggest(turbo_suggest + hyperopt_suggest)
            return turbo_suggest + hyperopt_suggest

    def _observe(self, x, y):
        # Find the matching proposal and execute its callbacks
        idx = [x == xx for xx in self.history]
        if np.any(idx):
            i = np.argwhere(
                idx)[0].item()  # Pick the first index if there are ties
            proposal = self.proposals[i]
            proposal.record.complete(y)
            self.proposals.pop(i)
            self.history.pop(i)

    def observe(self, X, y):
        """Send an observation of a suggestion back to the optimizer.

        Parameters
        ----------
        X : list of dict-like
            Places where the objective function has already been evaluated.
            Each suggestion is a dictionary where each key corresponds to a
            parameter being optimized.
        y : array-like, shape (n,)
            Corresponding values where objective has been evaluated
        """
        assert len(X) == len(y)

        # # pysot observe
        # for x_, y_ in zip(X, y):
        #     # Just ignore, any inf observations we got, unclear if right thing
        #     if np.isfinite(y_):
        #         self._observe(x_, y_)

        # turbo observe
        XX, yy = self.space_x.warp(X), np.array(y)[:, None]

        if len(self.turbo._fX) >= self.turbo.n_init:
            self.turbo._adjust_length(yy)

        self.turbo.n_evals += self.turbo_batch_size

        self.turbo._X = np.vstack((self.turbo._X, deepcopy(XX)))
        self.turbo._fX = np.vstack((self.turbo._fX, deepcopy(yy)))
        self.turbo.X = np.vstack((self.turbo.X, deepcopy(XX)))
        self.turbo.fX = np.vstack((self.turbo.fX, deepcopy(yy)))

        # Check for a restart
        if self.turbo.length < self.turbo.length_min:
            self.restart()

        # hyperopt observe
        for x_guess, y_ in zip(X, y):
            x_guess_ = tuSOTOptimizer.hashable_dict(x_guess)
            assert x_guess_ in self.trial_id_lookup, "Appears to be guess that did not originate from suggest"

            assert x_guess_ in self.trial_id_lookup, "trial object not available in trial dict"
            trial_id = self.trial_id_lookup.pop(x_guess_)
            trial = self.get_trial(trial_id)
            assert self.cleanup_guess(
                trial["misc"]["vals"]
            ) == x_guess, "trial ID not consistent with x values stored"

            # Cast to float to ensure native type
            result = {"loss": float(y_), "status": STATUS_OK}
            trial["state"] = JOB_STATE_DONE
            trial["result"] = result
        self.trials.refresh()
Пример #22
0
 def idxs_vals_from_ids(self, ids, seed):
     docs = self.suggest(ids, self.domain, Trials(), seed)
     trials = trials_from_docs(docs)
     idxs, vals = miscs_to_idxs_vals(trials.miscs)
     return idxs, vals
Пример #23
0
 def __init__(self, client):
     Trials.__init__(self)
     self._client = client
Пример #24
0
                #print dir(tt['ar'])
                #print dir(tt['ar'].metadata)
                #print 'sent', tt['ar'].sent
                #print 'elapsed', tt['ar'].elapsed
                #print 'prog', tt['ar'].progress
                #print 'succ', tt['ar'].successful
                #print tt['ar'].metadata
                if tt['ar'].sent:
                    tt['state'] = JOB_STATE_RUNNING
            #elif (tt['state'] != JOB_STATE_RUNNING):
                    #and tt['ar'].metadata['started']):
            #if tt['state'] == JOB_STATE_NEW:
                #print id(tt['ar']), tt['ar'], tt['ar'].metadata #['status']
            # XXX mark errors

        Trials.refresh(self)

    def fmin(self, fn, space, algo, max_evals,
        random_state=0,
        verbose=0,
        ):
        lb_view = self._client.load_balanced_view()
        random_state = check_random_state(random_state)

        domain = Domain(fn, space,
            rseed=int(random_state.randint(2^20)))

        while len(self.trials) < max_evals:
            if lb_view.queue_status()['unassigned']:
                sleep(1e-3)
                continue