def __init__( self, space: Optional[Dict] = None, metric: Optional[str] = None, mode: Optional[str] = None, points_to_evaluate: Optional[List[Dict]] = None, n_initial_points: int = 20, random_state_seed: Optional[int] = None, gamma: float = 0.25, max_concurrent: Optional[int] = None, use_early_stopped_trials: Optional[bool] = None, ): assert hpo is not None, ( "HyperOpt must be installed! Run `pip install hyperopt`.") if mode: assert mode in ["min", "max"], "`mode` must be 'min' or 'max'." from hyperopt.fmin import generate_trials_to_calculate super(HyperOptSearch, self).__init__(metric=metric, mode=mode, max_concurrent=max_concurrent, use_early_stopped_trials=use_early_stopped_trials) self.max_concurrent = max_concurrent # hyperopt internally minimizes, so "max" => -1 if mode == "max": self.metric_op = -1. elif mode == "min": self.metric_op = 1. if n_initial_points is None: self.algo = hpo.tpe.suggest else: self.algo = partial(hpo.tpe.suggest, n_startup_jobs=n_initial_points) if gamma is not None: self.algo = partial(self.algo, gamma=gamma) if points_to_evaluate is None: self._hpopt_trials = hpo.Trials() self._points_to_evaluate = 0 else: assert isinstance(points_to_evaluate, (list, tuple)) self._hpopt_trials = generate_trials_to_calculate( points_to_evaluate) self._hpopt_trials.refresh() self._points_to_evaluate = len(points_to_evaluate) self._live_trial_mapping = {} if random_state_seed is None: self.rstate = np.random.RandomState() else: self.rstate = np.random.RandomState(random_state_seed) self.domain = None if isinstance(space, dict) and space: resolved_vars, domain_vars, grid_vars = parse_spec_vars(space) if domain_vars or grid_vars: logger.warning( UNRESOLVED_SEARCH_SPACE.format(par="space", cls=type(self))) space = self.convert_search_space(space) self.domain = hpo.Domain(lambda spc: spc, space)
def __init__(self, space, max_concurrent=10, reward_attr="episode_reward_mean", points_to_evaluate=None, **kwargs): assert hpo is not None, "HyperOpt must be installed!" from hyperopt.fmin import generate_trials_to_calculate assert type(max_concurrent) is int and max_concurrent > 0 self._max_concurrent = max_concurrent self._reward_attr = reward_attr self.algo = hpo.tpe.suggest self.domain = hpo.Domain(lambda spc: spc, space) if points_to_evaluate is None: self._hpopt_trials = hpo.Trials() self._points_to_evaluate = 0 else: assert type(points_to_evaluate) == list self._hpopt_trials = generate_trials_to_calculate( points_to_evaluate) self._hpopt_trials.refresh() self._points_to_evaluate = len(points_to_evaluate) self._live_trial_mapping = {} self.rstate = np.random.RandomState() super(HyperOptSearch, self).__init__(**kwargs)
def update_search_space(self, search_space): """ Update search space definition in tuner by search_space in parameters. Will called when first setup experiemnt or update search space in WebUI. Parameters ---------- search_space : dict """ self.json = search_space search_space_instance = json2space(self.json) rstate = np.random.RandomState() trials = hp.Trials() domain = hp.Domain(None, search_space_instance, pass_expr_memo_ctrl=None) algorithm = self._choose_tuner(self.algorithm_name) self.rval = hp.FMinIter(algorithm, domain, trials, max_evals=-1, rstate=rstate, verbose=0) self.rval.catch_eval_exceptions = False
def _search_condor_parallel(path, space, trials_per_point, setting, objective, max_evals, algo=hyperopt.tpe.suggest, max_queue_len=10, poll_interval_secs=30): """ block_until_done means that the process blocks until ALL jobs in trials are not in running or new state suggest() can pass instance of StopExperiment to break out of enqueuing loop """ trials = CondorTrials(path=path, ids=range(1, trials_per_point + 1), setting=setting, objective=objective) domain = hyperopt.Domain(dummy_f, space, rseed=123) trial_path = os.path.join(path, "trials.pck") if os.path.exists(trial_path): with open(trial_path) as f: old_trials = pickle.load(f) print "Loaded existing trials" if old_trials.setting == trials.setting and trials.ids == old_trials.ids: trials = old_trials n_queued = trials.count_by_state_unsynced(hyperopt.JOB_STATES) def get_queue_len(): trials.count_by_state_unsynced(hyperopt.base.JOB_STATE_NEW) return trials.update_trials(trials._trials) stopped = False while n_queued < max_evals: qlen = get_queue_len() while qlen < max_queue_len and n_queued < max_evals: n_to_enqueue = 1 # min(self.max_queue_len - qlen, N - n_queued) new_ids = trials.new_trial_ids(n_to_enqueue) trials.refresh() new_trials = algo(new_ids, domain, trials) if new_trials is hyperopt.base.StopExperiment: stopped = True break else: assert len(new_ids) >= len(new_trials) if len(new_trials): trials.insert_trial_docs(new_trials) trials.refresh() n_queued += len(new_trials) qlen = get_queue_len() else: break with open(trial_path, 'w') as f: pickle.dump(trials, f) # -- wait for workers to fill in the trials time.sleep(poll_interval_secs) if stopped: break while trials.count_by_state_unsynced(hyperopt.base.JOB_STATE_NEW) > 0: time.sleep(poll_interval_secs) trials.refresh() return trials
def __init__(self, space, max_concurrent=10, reward_attr=None, metric="episode_reward_mean", mode="max", points_to_evaluate=None, n_initial_points=20, random_state_seed=None, gamma=0.25, **kwargs): assert hpo is not None, "HyperOpt must be installed!" from hyperopt.fmin import generate_trials_to_calculate assert type(max_concurrent) is int and max_concurrent > 0 assert mode in ["min", "max"], "`mode` must be 'min' or 'max'!" if reward_attr is not None: mode = "max" metric = reward_attr logger.warning( "`reward_attr` is deprecated and will be removed in a future " "version of Tune. " "Setting `metric={}` and `mode=max`.".format(reward_attr)) self._max_concurrent = max_concurrent self._metric = metric # hyperopt internally minimizes, so "max" => -1 if mode == "max": self._metric_op = -1. elif mode == "min": self._metric_op = 1. if n_initial_points is None: self.algo = hpo.tpe.suggest else: self.algo = partial(hpo.tpe.suggest, n_startup_jobs=n_initial_points) if gamma is not None: self.algo = partial(self.algo, gamma=gamma) self.domain = hpo.Domain(lambda spc: spc, space) if points_to_evaluate is None: self._hpopt_trials = hpo.Trials() self._points_to_evaluate = 0 else: assert type(points_to_evaluate) == list self._hpopt_trials = generate_trials_to_calculate( points_to_evaluate) self._hpopt_trials.refresh() self._points_to_evaluate = len(points_to_evaluate) self._live_trial_mapping = {} if random_state_seed is None: self.rstate = np.random.RandomState() else: self.rstate = np.random.RandomState(random_state_seed) super(HyperOptSearch, self).__init__(metric=self._metric, mode=mode, **kwargs)
def __init__( self, space=None, metric=None, mode=None, points_to_evaluate=None, n_initial_points=20, random_state_seed=None, gamma=0.25, max_concurrent=None, use_early_stopped_trials=None, ): assert hpo is not None, ( "HyperOpt must be installed! Run `pip install hyperopt`.") if mode: assert mode in ["min", "max"], "`mode` must be 'min' or 'max'." from hyperopt.fmin import generate_trials_to_calculate super(HyperOptSearch, self).__init__( metric=metric, mode=mode, max_concurrent=max_concurrent, use_early_stopped_trials=use_early_stopped_trials) self.max_concurrent = max_concurrent # hyperopt internally minimizes, so "max" => -1 if mode == "max": self.metric_op = -1. elif mode == "min": self.metric_op = 1. if n_initial_points is None: self.algo = hpo.tpe.suggest else: self.algo = partial( hpo.tpe.suggest, n_startup_jobs=n_initial_points) if gamma is not None: self.algo = partial(self.algo, gamma=gamma) if points_to_evaluate is None: self._hpopt_trials = hpo.Trials() self._points_to_evaluate = 0 else: assert isinstance(points_to_evaluate, (list, tuple)) self._hpopt_trials = generate_trials_to_calculate( points_to_evaluate) self._hpopt_trials.refresh() self._points_to_evaluate = len(points_to_evaluate) self._live_trial_mapping = {} if random_state_seed is None: self.rstate = np.random.RandomState() else: self.rstate = np.random.RandomState(random_state_seed) self.domain = None if space: self.domain = hpo.Domain(lambda spc: spc, space)
def __init__(self, space, max_concurrent=10, reward_attr="episode_reward_mean", **kwargs): assert hpo is not None, "HyperOpt must be installed!" assert type(max_concurrent) is int and max_concurrent > 0 self._max_concurrent = max_concurrent self._reward_attr = reward_attr self.algo = hpo.tpe.suggest self.domain = hpo.Domain(lambda spc: spc, space) self._hpopt_trials = hpo.Trials() self._live_trial_mapping = {} self.rstate = np.random.RandomState() super(HyperOptSearch, self).__init__(**kwargs)
def create_hyperopt_domain(self): # Construct search space, example: {"x": hyperopt.hp.uniform('x', -10, 10), "x2": hyperopt.hp.uniform('x2', -10, 10)} hyperopt_search_space = {} for param in self.search_space.params: if param.type == INTEGER: hyperopt_search_space[param.name] = hyperopt.hp.quniform( param.name, float(param.min), float(param.max), 1) elif param.type == DOUBLE: hyperopt_search_space[param.name] = hyperopt.hp.uniform( param.name, float(param.min), float(param.max)) elif param.type == CATEGORICAL or param.type == DISCRETE: hyperopt_search_space[param.name] = hyperopt.hp.choice( param.name, param.list) self.hyperopt_domain = hyperopt.Domain(None, hyperopt_search_space, pass_expr_memo_ctrl=None)
def get_suggestions(self, configs: List[Dict] = None, metrics: List[float] = None) -> List[Dict]: if not self.config.num_runs: raise ValueError("This search strategy requires `num_runs`.") suggestions = [] rand_generator = get_random_generator(seed=self.config.seed) hyperopt_domain = hyperopt.Domain(None, self._search_space, pass_expr_memo_ctrl=None) hyperopt_trials = self._get_previous_observations( hyperopt_domain=hyperopt_domain, configs=configs, metrics=metrics) is_first = not all([configs, metrics]) minimize = hyperopt.FMinIter( self.config.algorithm, hyperopt_domain, hyperopt_trials, max_evals=-1, rstate=rand_generator, verbose=0, ) minimize.catch_eval_exceptions = False new_ids = minimize.trials.new_trial_ids(self.config.num_runs) minimize.trials.refresh() random_state = minimize.rstate.randint(2**31 - 1) new_trials = self.run_algorithm(is_first, new_ids, minimize.domain, hyperopt_trials, random_state) minimize.trials.refresh() for tid in range(self.config.num_runs): vals = new_trials[tid]["misc"]["vals"] suggestion = {} for param in vals: observation_value = vals[param][0] if param in self._param_to_value: value = self._param_to_value[param][observation_value] suggestion[param] = value else: suggestion[param] = observation_value suggestions.append(suggestion) return suggestions
def set_search_properties(self, metric, mode, config): if self.domain: return False space = self.convert_search_space(config) self.domain = hpo.Domain(lambda spc: spc, space) if metric: self._metric = metric if mode: self._mode = mode if self._mode == "max": self.metric_op = -1. elif self._mode == "min": self.metric_op = 1. return True
def update_search_space(self, search_space): validate_search_space(search_space) self.json = search_space search_space_instance = json2space(self.json) rstate = np.random.RandomState() trials = hp.Trials() domain = hp.Domain(None, search_space_instance, pass_expr_memo_ctrl=None) algorithm = self._choose_tuner(self.algorithm_name) self.rval = hp.FMinIter(algorithm, domain, trials, max_evals=-1, rstate=rstate, verbose=0) self.rval.catch_eval_exceptions = False
def __init__(self, space, seed=123, method="tpe", errors_ok=False, context={}): super().__init__(context=context) self.og_space = deepcopy(space) self.space = deepcopy(space) # Parse space to contain hyperopt functions find_pattern_apply_f(self.space, is_hyperopt, to_hyperopt) self._needs_postprocessing = len(find_pattern(self.space, is_exp2)) > 0 self.seed = seed self.method = method if method == "tpe": self.algo = hpo.tpe elif method == "rand": self.algo = hpo.rand else: raise ValueError(f"Do not recognise suggestion method {method}.") # determines whether errors are treated as errors, or as trials with infinite loss self.errors_ok = errors_ok # this is to avoid getting spammed with useless TPE diagnostic info algo_logger = logging.getLogger("hpo") algo_logger.setLevel(logging.WARNING) self.algo.logger = algo_logger # this is hyperopt internals self.domain = hpo.Domain(lambda spc: spc, self.space) self._hpopt_trials = hpo.Trials() # this maps internal hyperopt ids to trial ids self._live_trial_mapping = {} self.rstate = np.random.RandomState(self.seed) self.counter = count( ) # used to generate unique, but deterministic trial ids
def add_experiment(self, experiment, trial_runner): """Tracks one experiment. Will error if one tries to track multiple experiments. """ assert self._experiment is None, "HyperOpt only tracks one experiment!" self._experiment = experiment self._output_path = experiment.name spec = copy.deepcopy(experiment.spec) # Set Scheduler field, as Tune Parser will default to FIFO assert spec.get("scheduler") in [None, "HyperOpt"], "Incorrectly " \ "specified scheduler!" spec["scheduler"] = "HyperOpt" if "env" in spec: spec["config"] = spec.get("config", {}) spec["config"]["env"] = spec["env"] del spec["env"] space = spec["config"]["space"] del spec["config"]["space"] self.parser = make_parser() self.args = self.parser.parse_args(to_argv(spec)) self.args.scheduler = "HyperOpt" self.default_config = copy.deepcopy(spec["config"]) self.algo = hpo.tpe.suggest self.domain = hpo.Domain(lambda spc: spc, space) self._hpopt_trials = hpo.Trials() self._tune_to_hp = {} self._num_trials_left = self.args.repeat if type(self._max_concurrent) is int: self._max_concurrent = min(self._max_concurrent, self.args.repeat) self.rstate = np.random.RandomState() self.trial_generator = self._trial_generator() self._add_new_trials_if_needed(trial_runner)
def _setup_hyperopt(self) -> None: from hyperopt.fmin import generate_trials_to_calculate if self._metric is None and self._mode: # If only a mode was passed, use anonymous metric self._metric = DEFAULT_METRIC if self._points_to_evaluate is None: self._hpopt_trials = hpo.Trials() self._points_to_evaluate = 0 else: assert isinstance(self._points_to_evaluate, (list, tuple)) for i in range(len(self._points_to_evaluate)): config = self._points_to_evaluate[i] self._convert_categories_to_indices(config) # HyperOpt treats initial points as LIFO, reverse to get FIFO self._points_to_evaluate = list(reversed(self._points_to_evaluate)) self._hpopt_trials = generate_trials_to_calculate(self._points_to_evaluate) self._hpopt_trials.refresh() self._points_to_evaluate = len(self._points_to_evaluate) self.domain = hpo.Domain(lambda spc: spc, self._space)
def _setup_hyperopt(self): if self._metric is None and self._mode: # If only a mode was passed, use anonymous metric self._metric = DEFAULT_METRIC self.domain = hpo.Domain(lambda spc: spc, self._space)
def get_new_suggestions(self, study_id, trials=[], number=1): """ Get the new suggested trials with random search. """ search_space = hyperopt.hp.uniform('x', -10, 10) search_space_instance = search_space rstate = np.random.RandomState() trials = hyperopt.Trials() domain = hyperopt.Domain(None, search_space_instance, pass_expr_memo_ctrl=None) algorithm = hyperopt.tpe.suggest rval = hyperopt.FMinIter(algorithm, domain, trials, max_evals=-1, rstate=rstate, verbose=0) rval.catch_eval_exceptions = False algorithm = rval.algo new_ids = rval.trials.new_trial_ids(1) rval.trials.refresh() random_state = rval.rstate.randint(2**31 - 1) new_trials = algorithm(new_ids, rval.domain, trials, random_state) rval.trials.refresh() # Example: {'x': [8.721658602103911]} vals = new_trials[0]['misc']['vals'] #import ipdb;ipdb.set_trace() """ parameter = dict() for key in vals: try: parameter[key] = vals[key][0].item() except Exception: parameter[key] = None """ """ trials =rval.trials trial = trials.new_trial_docs([new_id], rval_specs, rval_results, rval_miscs)[0] trial['result'] = {'loss': reward, 'status': 'ok'} trial['state'] = hp.JOB_STATE_DONE trials.insert_trial_docs([trial]) trials.refresh() """ """ def _choose_tuner(self, algorithm_name): if algorithm_name == 'tpe': return hp.tpe.suggest if algorithm_name == 'random_search': return hp.rand.suggest if algorithm_name == 'anneal': return hp.anneal.suggest raise RuntimeError('Not support tuner algorithm in hyperopt.') """ return_trial_list = [] study = Study.objects.get(id=study_id) study_configuration_json = json.loads(study.study_configuration) params = study_configuration_json["params"] for i in range(number): trial = Trial.create(study.id, "TpeTrial") parameter_values_json = {} for param in params: if param["type"] == "INTEGER" or param[ "type"] == "DISCRETE" or param["type"] == "CATEGORICAL": pass elif param["type"] == "DOUBLE": # TODO: Get the specified value from hyperopt suggest_value = vals["x"][0] parameter_values_json[ param["parameterName"]] = suggest_value parameter_values_json[param["parameterName"]] = suggest_value trial.parameter_values = json.dumps(parameter_values_json) trial.save() return_trial_list.append(trial) return return_trial_list
def main(): parser = ArgumentParser() parser.add_argument('-p', '--space', dest='spaceFile', help='Where is the space.py located?') parser.add_argument( '--use_optimal_design', dest='use_optimal_design', help='Use optimal design or pure random initialization?') parser.add_argument('--init_budget', dest='init_budget', help='How many evaluations for random burning period?') parser.add_argument( '--ei_budget', dest='ei_budget', help='How many evaluations for EI controlled online period?') parser.add_argument( '--bopt_budget', dest='bopt_budget', help= 'How many evaluations for Bayesian optimization after get subspace?') parser.add_argument( '--ei_xi', dest='ei_xi', help='What is the exploration parameter for computing EI?') parser.add_argument( '--top_k_pipelines', dest='top_k_pipelines', help='How many top (LR predicted) pipelines to cover in subspace?') parser.add_argument('-s', '--seed', default='1', dest='seed', type=int, help='Seed for the algorithm') parser.add_argument( '-a', '--algo', default='SMAC', dest='algo', type=str, help='Specify the algorithm after LR, can be SMAC or TPE') parser.add_argument( '-r', '--restore', action='store_true', dest='restore', help='When this flag is set state.pkl is restored in ' + 'the current working directory') parser.add_argument('--random', default=False, action='store_true', dest='random', help='Use a random search') parser.add_argument('--cwd', help='Change the working directory before ' 'optimizing.') args, unknown = parser.parse_known_args() if args.cwd: os.chdir(args.cwd) if not os.path.exists(args.spaceFile): logger.critical('Search space not found: %s' % args.spaceFile) sys.exit(1) # First remove '.py' space, ext = os.path.splitext(os.path.basename(args.spaceFile)) # Then load dict searchSpace and out function cv.py sys.path.append('./') sys.path.append('') module = import_module(space) search_space = module.space ni = [len(d) for d in module.layer_dict_list] # number of units in each layer cum_ni = np.cumsum(ni) log_filename = 'lr.pkl' # Random burning period as initialization init_budget = int(args.init_budget) if args.use_optimal_design == '1': picks = get_random_picks_by_optimal_design(ni, init_budget) else: picks = get_pure_random_picks(ni, init_budget) for i in range(init_budget): times = get_num_of_trials(log_filename, filter_valid=False) valid_times = get_num_of_trials(log_filename, filter_valid=True) logger.info('IMPORTANT! YOU ARE RUNNING FLASH WITH: %s' % args.algo) logger.info('Total evaluation times: %d, valid times: %d' % (times, valid_times)) logger.info('Random burning period times: %d, valid times: %d' % (times, valid_times)) subspace = construct_subspace(module, picks[i]) params = sample(subspace) cv.main(params) valid_times_in_random_period = get_num_of_trials(log_filename, filter_valid=True) # Train the first LR model before entering into EI controlled period fh = open(log_filename) log = cPickle.load(fh) trials = log['trials'] fh.close() X = [] y = [] y_time = [] for trial in trials: result = trial['result'] time = trial['duration'] # make sure the logged result is a number (accept evaluations return 100.0) if result <= 100: params = trial['params'] rescaling = params['-rescaling'] balancing = params['-balancing'] feat_pre = params['-feat_pre'] clf = params['-classifier'] x = [[0] * n for n in ni] x[0][module.d_rescaling[rescaling]] = 1 x[1][module.d_balancing[balancing]] = 1 x[2][module.d_feat_pre[feat_pre]] = 1 x[3][module.d_clf[clf]] = 1 x_flat = np.array(x[0] + x[1] + x[2] + x[3]) X.append(x_flat) y.append(result) y_time.append(np.log(time)) X = np.array(X) alpha = 1.0 lr = linear_model.Ridge(alpha=alpha) lr.fit(X, y) lr_time = linear_model.Ridge(alpha=alpha) lr_time.fit(X, y_time) # Online period controlled by EI ei_budget = int(args.ei_budget) for i in range(ei_budget): times = get_num_of_trials(log_filename, filter_valid=False) valid_times = get_num_of_trials(log_filename, filter_valid=True) logger.info('Total evaluation times: %d, valid times: %d' % (times, valid_times)) logger.info( 'EI controlled period times: %d, valid times: %d' % (times - init_budget, valid_times - valid_times_in_random_period)) ebeta = lr.coef_[:cum_ni[0]], \ lr.coef_[cum_ni[0]:cum_ni[1]], \ lr.coef_[cum_ni[1]:cum_ni[2]], \ lr.coef_[cum_ni[2]:] logger.info('LR model estimated unit ranking: %s %s %s %s' % (str(ebeta[0].argsort()), str(ebeta[1].argsort()), str(ebeta[2].argsort()), str(ebeta[3].argsort()))) ebeta_time = lr_time.coef_[:cum_ni[0]], \ lr_time.coef_[cum_ni[0]:cum_ni[1]], \ lr_time.coef_[cum_ni[1]:cum_ni[2]], \ lr_time.coef_[cum_ni[2]:] logger.info( 'LR Time model estimated unit ranking: %s %s %s %s' % (str(ebeta_time[0].argsort()), str(ebeta_time[1].argsort()), str(ebeta_time[2].argsort()), str(ebeta_time[3].argsort()))) # pick the best pipeline by EI x_next = get_next_by_EI(ni, alpha, lr, lr_time, X, y, float(args.ei_xi)) pick = [[np.argmax(x_next_i)] for x_next_i in x_next] subspace = construct_subspace(module, pick) params = sample(subspace) cv.main(params) result, time = get_last_run(log_filename) if result <= 100: x_next_flat = np.array(x_next[0] + x_next[1] + x_next[2] + x_next[3]) X = np.vstack([X, x_next_flat]) y.append(result) y_time.append(np.log(time)) lr = linear_model.Ridge(alpha=alpha) lr.fit(X, y) lr_time = linear_model.Ridge(alpha=alpha) lr_time.fit(X, y_time) valid_times_in_ei_period = get_num_of_trials( log_filename, filter_valid=True) - valid_times_in_random_period # Construct subspace based on LR prediction final_ebeta = lr.coef_[:cum_ni[0]], \ lr.coef_[cum_ni[0]:cum_ni[1]], \ lr.coef_[cum_ni[1]:cum_ni[2]], \ lr.coef_[cum_ni[2]:] final_ebeta_time = lr_time.coef_[:cum_ni[0]], \ lr_time.coef_[cum_ni[0]:cum_ni[1]], \ lr_time.coef_[cum_ni[1]:cum_ni[2]], \ lr_time.coef_[cum_ni[2]:] final_pick = get_covered_units_by_ei(ni, alpha, lr, lr_time, X, y, 0, int(args.top_k_pipelines)) final_subspace = construct_subspace(module, final_pick) logger.info('LR model estimated unit ranking: %s %s %s %s' % (str(final_ebeta[0].argsort()), str(final_ebeta[1].argsort()), str(final_ebeta[2].argsort()), str(final_ebeta[3].argsort()))) logger.info( 'LR Time model estimated unit ranking: %s %s %s %s' % (str(final_ebeta_time[0].argsort()), str( final_ebeta_time[1].argsort()), str(final_ebeta_time[2].argsort()), str(final_ebeta_time[3].argsort()))) logger.info('Selected pipelines: %s %s %s %s' % (final_pick[0], final_pick[1], final_pick[2], final_pick[3])) # Phase 3 with SMAC if args.algo == 'SMAC': fh = file('pickup.txt', 'w') for layer_pick in final_pick: for i in layer_pick: fh.write('%d ' % i) fh.write('\n') fh.close() subspace = construct_subspace(module, final_pick) new_space = convert_tpe_to_smac_from_object(subspace) fh = open('params.pcs', 'w') fh.write(new_space) fh.close() # Phase 3 with TPE elif args.algo == 'TPE': fn = cv.main domain = hyperopt.Domain(fn, final_subspace, rseed=int(args.seed)) trials = hyperopt.Trials() bopt_budget = int(args.bopt_budget) for i in range(bopt_budget): times = get_num_of_trials(log_filename, filter_valid=False) valid_times = get_num_of_trials(log_filename, filter_valid=True) logger.info('Total evaluation times: %d, valid times: %d' % (times, valid_times)) logger.info( 'TPE period times: %d, valid times: %d' % (times - init_budget - ei_budget, valid_times - valid_times_in_random_period - valid_times_in_ei_period)) logger.info( 'LR model estimated unit ranking: %s %s %s %s' % (str(final_ebeta[0].argsort()), str(final_ebeta[1].argsort()), str(final_ebeta[2].argsort()), str(final_ebeta[3].argsort()))) logger.info('LR Time model estimated unit ranking: %s %s %s %s' % (str(final_ebeta_time[0].argsort()), str(final_ebeta_time[1].argsort()), str(final_ebeta_time[2].argsort()), str(final_ebeta_time[3].argsort()))) logger.info( 'Selected pipelines: %s %s %s %s' % (final_pick[0], final_pick[1], final_pick[2], final_pick[3])) # in exhaust, the number of evaluations is max_evals - num_done tpe_with_seed = partial(hyperopt.tpe.suggest, seed=int(args.seed)) rval = hyperopt.FMinIter(tpe_with_seed, domain, trials, max_evals=i) rval.exhaust()
def main(): prog = "python statistics.py WhatIsThis <manyPickles> WhatIsThis <manyPickles> [WhatIsThis <manyPickles>]" description = "Return some statistical information" parser = ArgumentParser(description=description, prog=prog) parser.add_argument("-p", "--space", dest="spaceFile", help="Where is the space.py located?") parser.add_argument("-m", "--maxEvals", dest="maxEvals", help="How many evaluations?") parser.add_argument("-s", "--seed", default="1", dest="seed", type=int, help="Seed for the TPE algorithm") parser.add_argument( "-r", "--restore", action="store_true", dest="restore", help="When this flag is set state.pkl is restored in " + "the current working directory") parser.add_argument("--random", default=False, action="store_true", dest="random", help="Use a random search") parser.add_argument("--cwd", help="Change the working directory before " "optimizing.") args, unknown = parser.parse_known_args() if args.cwd: os.chdir(args.cwd) cfg = load_experiment_config_file() log_level = cfg.getint("HPOLIB", "HPOlib_loglevel") logging.basicConfig(format='[%(levelname)s] [%(asctime)s:%(name)s] %(' 'message)s', datefmt='%H:%M:%S') logger.setLevel(log_level) if not os.path.exists(args.spaceFile): logger.critical("Search space not found: %s" % args.spaceFile) sys.exit(1) # First remove ".py" space, ext = os.path.splitext(os.path.basename(args.spaceFile)) # Then load dict searchSpace and out function cv.py sys.path.append("./") sys.path.append("") module = import_module(space) search_space = module.space cli_target = "HPOlib.optimization_interceptor" fn = partial(command_line_function, cli_target=cli_target) if args.random: # We use a random search tpe_with_seed = partial(hyperopt.tpe.rand.suggest, seed=int(args.seed)) logger.info("Using Random Search") else: tpe_with_seed = partial(hyperopt.tpe.suggest, seed=int(args.seed)) # Now run TPE, emulate fmin.fmin() state_filename = "state.pkl" if args.restore: # We do not need to care about the state of the trials object since it # is only serialized in a synchronized state, there will never be a save # with a running experiment fh = open(state_filename) tmp_dict = cPickle.load(fh) domain = tmp_dict['domain'] trials = tmp_dict['trials'] print trials.__dict__ else: domain = hyperopt.Domain(fn, search_space, rseed=int(args.seed)) trials = hyperopt.Trials() fh = open(state_filename, "w") # By this we probably loose the seed; not too critical for a restart cPickle.dump({"trials": trials, "domain": domain}, fh) fh.close() for i in range(int(args.maxEvals) + 1): # in exhaust, the number of evaluations is max_evals - num_done rval = hyperopt.FMinIter(tpe_with_seed, domain, trials, max_evals=i) rval.exhaust() fh = open(state_filename, "w") cPickle.dump({"trials": trials, "domain": domain}, fh) fh.close() best = trials.argmin print "Best Value found for params:", best
best_trial = trials.best_trial except Exception, e: raise ValueError('problem retrieving best trial: %s' % (e)) dataset_info = {'data_obj': data_obj, 'fn_imgs': 'protocol_imgs', 'fn_eval': 'protocol_eval'} search_space = build_search_space(dataset_info, learning_algo, hp_space=hp_space, n_ok_trials=1000000, batched_lmap_speed_thresh=speed_thresh) ctrl = hyperopt.Ctrl(trials=trials, current_trial=best_trial) domain = hyperopt.Domain(objective, search_space) best_hps = hyperopt.base.spec_from_misc(best_trial['misc']) r_dict = domain.evaluate(best_hps, ctrl, attach_attachments=True) if r_dict['status'] == 'ok': print '\nperformance according to dataset protocol:\n' for key in r_dict: if key == 'int_samples': if int_samples: interesting_samples(r_dict['int_samples']) else: print key, pprint.pformat(r_dict[key]) else: print '\n', r_dict['failure']['tb']
def get_new_suggestions(self, study_name, input_trials=[], number=1): """ Get the new suggested trials with TPE algorithm. """ # Construct search space, example: {"x": hyperopt.hp.uniform('x', -10, 10), "x2": hyperopt.hp.uniform('x2', -10, 10)} hyperopt_search_space = {} study = Study.objects.get(name=study_name) study_configuration_json = json.loads(study.study_configuration) params = study_configuration_json["params"] for param in params: param_name = param["parameterName"] if param["type"] == "INTEGER": # TODO: Support int type of search space) pass elif param["type"] == "DOUBLE": hyperopt_search_space[param_name] = hyperopt.hp.uniform( param_name, param["minValue"], param["maxValue"]) elif param["type"] == "DISCRETE" or param["type"] == "CATEGORICAL": feasible_point_list = [ value.strip() for value in param["feasiblePoints"].split(",") ] hyperopt_search_space[param_name] = hyperopt.hp.choice( param_name, feasible_point_list) # New hyperopt variables hyperopt_rstate = np.random.RandomState() hyperopt_domain = hyperopt.Domain( None, hyperopt_search_space, pass_expr_memo_ctrl=None) hyperopt_trial_specs = [] hyperopt_trial_results = [] # Example: # Example: [{'tid': 0, 'idxs': {'l1_normalization': [0], 'learning_rate': [0], 'hidden2': [0], 'optimizer': [0]}, 'cmd': ('domain_attachment', 'FMinIter_Domain'), 'vals': {'l1_normalization': [0.1], 'learning_rate': [0.1], 'hidden2': [1], 'optimizer': [1]}, 'workdir': None}] hyperopt_trial_miscs = [] hyperopt_trial_new_ids = [] # Update hyperopt for trained trials with completed advisor trials completed_hyperopt_trials = hyperopt.Trials() completed_advisor_trials = Trial.objects.filter( study_name=study_name, status="Completed") for index, advisor_trial in enumerate(completed_advisor_trials): # Example: {"learning_rate": 0.01, "optimizer": "ftrl"} parameter_values_json = json.loads(advisor_trial.parameter_values) # Example: {'l1_normalization': [0], 'learning_rate': [0], 'hidden2': [0], 'optimizer': [0]} hyperopt_trial_miscs_idxs = {} # Example: {'l1_normalization': [0.1], 'learning_rate': [0.1], 'hidden2': [1], 'optimizer': [1]} hyperopt_trial_miscs_vals = {} new_id = index hyperopt_trial_new_ids.append(new_id) hyperopt_trial_misc = dict( tid=new_id, cmd=hyperopt_domain.cmd, workdir=hyperopt_domain.workdir) for param in params: if param["type"] == "INTEGER": pass elif param["type"] == "DOUBLE": parameter_value = parameter_values_json[param["parameterName"]] hyperopt_trial_miscs_idxs[param["parameterName"]] = [index] hyperopt_trial_miscs_vals[param["parameterName"]] = [parameter_value] elif param["type"] == "DISCRETE": feasible_points_string = param["feasiblePoints"] feasible_points = [ float(value.strip()) for value in feasible_points_string.split(",") ] parameter_value = parameter_values_json[param["parameterName"]] index_of_value_in_list = feasible_points.index(parameter_value) hyperopt_trial_miscs_idxs[param["parameterName"]] = [index] hyperopt_trial_miscs_vals[param["parameterName"]] = [ index_of_value_in_list ] elif param["type"] == "CATEGORICAL": feasible_points_string = param["feasiblePoints"] feasible_points = [ value.strip() for value in feasible_points_string.split(",") ] # Example: "ftrl" parameter_value = parameter_values_json[param["parameterName"]] index_of_value_in_list = feasible_points.index(parameter_value) hyperopt_trial_miscs_idxs[param["parameterName"]] = [index] hyperopt_trial_miscs_vals[param["parameterName"]] = [ index_of_value_in_list ] hyperopt_trial_specs.append(None) hyperopt_trial_misc["idxs"] = hyperopt_trial_miscs_idxs hyperopt_trial_misc["vals"] = hyperopt_trial_miscs_vals hyperopt_trial_miscs.append(hyperopt_trial_misc) # TODO: Use negative objective value for loss or not loss_for_hyperopt = advisor_trial.objective_value if study_configuration_json["goal"] == "MAXIMIZE": # Now hyperopt only supports fmin and we need to reverse objective value for maximization loss_for_hyperopt = -1 * advisor_trial.objective_value hyperopt_trial_result = { "loss": loss_for_hyperopt, "status": hyperopt.STATUS_OK } hyperopt_trial_results.append(hyperopt_trial_result) if len(completed_advisor_trials) > 0: # Example: {'refresh_time': datetime.datetime(2018, 9, 18, 12, 6, 41, 922000), 'book_time': datetime.datetime(2018, 9, 18, 12, 6, 41, 922000), 'misc': {'tid': 0, 'idxs': {'x2': [0], 'x': [0]}, 'cmd': ('domain_attachment', 'FMinIter_Domain'), 'vals': {'x2': [-8.137088361136204], 'x': [-4.849028446711832]}, 'workdir': None}, 'state': 2, 'tid': 0, 'exp_key': None, 'version': 0, 'result': {'status': 'ok', 'loss': 14.849028446711833}, 'owner': None, 'spec': None} hyperopt_trials = completed_hyperopt_trials.new_trial_docs( hyperopt_trial_new_ids, hyperopt_trial_specs, hyperopt_trial_results, hyperopt_trial_miscs) for current_hyperopt_trials in hyperopt_trials: current_hyperopt_trials["state"] = hyperopt.JOB_STATE_DONE completed_hyperopt_trials.insert_trial_docs(hyperopt_trials) completed_hyperopt_trials.refresh() rval = hyperopt.FMinIter( self.hyperopt_algorithm, hyperopt_domain, completed_hyperopt_trials, max_evals=-1, rstate=hyperopt_rstate, verbose=0) rval.catch_eval_exceptions = False new_ids = rval.trials.new_trial_ids(number) rval.trials.refresh() random_state = rval.rstate.randint(2**31 - 1) new_trials = self.hyperopt_algorithm( new_ids, rval.domain, completed_hyperopt_trials, random_state) rval.trials.refresh() # Construct return advisor trials from new hyperopt trials return_trial_list = [] for i in range(number): # Example: {u'hidden2': [2], u'learning_rate': [0.04633366105812467], u'l1_normalization': [0.16858448611765364], u'optimizer': [3]} vals = new_trials[0]['misc']['vals'] new_advisor_trial = Trial.create(study.name, "TpeTrial") parameter_values_json = {} for param in params: if param["type"] == "INTEGER": pass elif param["type"] == "DOUBLE": suggest_value = vals[param["parameterName"]][0] parameter_values_json[param["parameterName"]] = suggest_value elif param["type"] == "DISCRETE": feasible_point_list = [ float(value.strip()) for value in param["feasiblePoints"].split(",") ] suggest_index = vals[param["parameterName"]][0] suggest_value = feasible_point_list[suggest_index] elif param["type"] == "CATEGORICAL": feasible_point_list = [ value.strip() for value in param["feasiblePoints"].split(",") ] suggest_index = vals[param["parameterName"]][0] suggest_value = feasible_point_list[suggest_index] parameter_values_json[param["parameterName"]] = suggest_value new_advisor_trial.parameter_values = json.dumps(parameter_values_json) return_trial_list.append(new_advisor_trial) return return_trial_list
def find_hyperparameters( setting, path, space=None, max_evals=100, trials_per_point=30, parallelization="sequential", objective="max_reward", max_concurrent_jobs=100): """ This function does hyperparameter optimization for RLPy experiments with the hyperopt library. At the end an instance of the optimization trials is stored in "path"/trials.pck :param setting: file specifying the experimental setup. It contains a make_experiment function and a dictionary named param_space if the argument space is not used. For each key of param_space there needs to be an optional argument in make_experiment :param path: directory used to store all intermediate results. :param space: (optional) an alternative specification of the hyperparameter space :param max_evals: maximum number of evaluations of a single hyperparameter setting :param trials_per_point: specifies the number of independent runs (with different seeds) of the experiment for evaluating a single hyperparameter setting. :param parallelization: either **sequential**, **joblib**, **condor_all** or **condor_full**, **condor**. the condor options can be used in a computing cluster with a HTCondor machine. The joblib option parallelizes runs on one machine and sequential runs every experiment in sequence. :param objective: (optional) string specifying the objective to optimize, possible values are *max_reward*, *min_steps*, *max_steps* :param max_concurrent_jobs: only relevant for condor_full parallelization. specifies the maximum number of jobs that should run at the same time. :return: a tuple containing the best hyperarameter settings and the hyperopt trials instance of the optimization procedure """ if space is None: space = import_param_space(setting) def f(hyperparam): """function to optimize by hyperopt""" # "temporary" directory to use full_path = os.path.join( path, "-".join([str(v) for v in hyperparam.values()])) # execute experiment rt.run(setting, location=full_path, ids=range(1, trials_per_point + 1), parallelization=parallelization, force_rerun=False, block=True, **hyperparam) # all jobs should be done res = tres.load_results(full_path) if objective == "max_steps": m, s, n = tres.avg_quantity(res, "steps") val = -m std = s[-1] elif objective == "min_steps": m, s, n = tres.avg_quantity(res, "steps") val = m std = s[-1] elif objective == "max_reward": m, s, n = tres.avg_quantity(res, "return") val = -m std = s[-1] else: print "unknown objective" weights = (np.arange(len(val)) + 1) ** 2 loss = (val * weights).sum() / weights.sum() print time.ctime() print "Parameters", hyperparam print "Loss", loss # use #steps/eps at the moment return {"loss": loss, "num_trials": n[-1], "status": hyperopt.STATUS_OK, "std_last_mean": std} if parallelization == "condor_all": trials = CondorTrials(path=path, ids=range(1, trials_per_point + 1), setting=setting, objective=objective) domain = hyperopt.Domain(dummy_f, space, rseed=123) rval = hyperopt.FMinIter(hyperopt.rand.suggest, domain, trials, max_evals=30, max_queue_len=30) rval.exhaust() rval = hyperopt.FMinIter(hyperopt.tpe.suggest, domain, trials, max_evals=max_evals, max_queue_len=1) rval.exhaust() best = trials.argmin elif parallelization == "condor_full": trials = _search_condor_parallel(path=path, setting=setting, objective=objective, space=space, max_evals=max_evals, trials_per_point=trials_per_point) best = trials.argmin else: trials = hyperopt.Trials() best = hyperopt.fmin(f, space=space, algo=hyperopt.tpe.suggest, max_evals=max_evals, trials=trials) with open(os.path.join(path, 'trials.pck'), 'w') as f: pickle.dump(trials, f) return best, trials
def main(): # Parse options and arguments parser = OptionParser() parser.add_option("-p", "--space", dest="spaceFile", help="Where is the space.py located?") parser.add_option("-a", "--algoExec", dest="algoExec", help="Which function to load located?") parser.add_option("-m", "--maxEvals", dest="maxEvals", help="How many evaluations?") parser.add_option("-s", "--seed", dest="seed", default="123", type=int, help="Seed for the TPE algorithm") parser.add_option("-r", "--restore", dest="restore", action="store_true", help="When this flag is set state.pkl is restored in " + "the current working directory") parser.add_option("--random", default=False, dest="random", action="store_true", help="Use a random search") (options, args) = parser.parse_args() # First remove ".py" algo, ext = os.path.splitext(os.path.basename(options.algoExec)) space, ext = os.path.splitext(os.path.basename(options.spaceFile)) # Then load dict searchSpace and out function cv.py import sys sys.path.append("./") sys.path.append("") print os.getcwd() module = import_module(space) search_space = module.space fn = import_module(algo) fn = fn.doForTPE if options.random: # We use a random search suggest = hyperopt.tpe.rand.suggest else: suggest = hyperopt.tpe.suggest rstate = np.random.RandomState(options.seed) # Now run TPE, emulate fmin.fmin() state_filename = "state.pkl" if options.restore: # We do not need to care about the state of the trials object since it # is only serialized in a synchronized state, there will never be a save # with a running experiment fh = open(state_filename) tmp_dict = cPickle.load(fh) domain = tmp_dict['domain'] trials = tmp_dict['trials'] rstate = tmp_dict['rstate'] print trials.__dict__ else: domain = hyperopt.Domain(fn, search_space) trials = hyperopt.Trials() fh = open(state_filename, "w") # By this we probably loose the seed; not too critical for a restart cPickle.dump({ "trials": trials, "domain": domain, "rstate": rstate }, fh) fh.close() for i in range(int(options.maxEvals) + 1): # in exhaust, the number of evaluations is max_evals - num_done rval = hyperopt.FMinIter(suggest, domain, trials, max_evals=i, rstate=rstate) rval.exhaust() fh = open(state_filename, "w") cPickle.dump({ "trials": trials, "domain": domain, "rstate": rstate }, fh) fh.close() best = trials.argmin print "Best Value found for params:", best
def getSuggestions(self, search_space, trials, request_number): """ Get the new suggested trials with the given algorithm. """ # Construct search space, example: {"x": hyperopt.hp.uniform('x', -10, 10), "x2": hyperopt.hp.uniform('x2', -10, 10)} hyperopt_search_space = {} for param in search_space.params: if param.type == INTEGER: hyperopt_search_space[param.name] = hyperopt.hp.quniform( param.name, float(param.min), float(param.max), 1) elif param.type == DOUBLE: hyperopt_search_space[param.name] = hyperopt.hp.uniform( param.name, float(param.min), float(param.max)) elif param.type == CATEGORICAL \ or param.type == DISCRETE: hyperopt_search_space[param.name] = hyperopt.hp.choice( param.name, param.list) # New hyperopt variables hyperopt_rstate = np.random.RandomState() hyperopt_domain = hyperopt.Domain(None, hyperopt_search_space, pass_expr_memo_ctrl=None) hyperopt_trial_specs = [] hyperopt_trial_results = [] # Example: # Example: [{'tid': 0, 'idxs': {'l1_normalization': [0], 'learning_rate': [0], 'hidden2': [0], 'optimizer': [0]}, 'cmd': ('domain_attachment', 'FMinIter_Domain'), 'vals': {'l1_normalization': [0.1], 'learning_rate': [0.1], 'hidden2': [1], 'optimizer': [1]}, 'workdir': None}] hyperopt_trial_miscs = [] hyperopt_trial_new_ids = [] # Update hyperopt for trained trials with completed advisor trials completed_hyperopt_trials = hyperopt.Trials() for trial in trials: # Example: {'l1_normalization': [0], 'learning_rate': [0], 'hidden2': [0], 'optimizer': [0]} hyperopt_trial_miscs_idxs = {} # Example: {'l1_normalization': [0.1], 'learning_rate': [0.1], 'hidden2': [1], 'optimizer': [1]} hyperopt_trial_miscs_vals = {} new_id = trial.name hyperopt_trial_new_ids.append(new_id) hyperopt_trial_misc = dict(tid=new_id, cmd=hyperopt_domain.cmd, workdir=hyperopt_domain.workdir) for param in search_space.params: parameter_value = None for assignment in trial.assignments: if assignment.name == param.name: parameter_value = assignment.value break if param.type == INTEGER: hyperopt_trial_miscs_idxs[param.name] = [new_id] hyperopt_trial_miscs_vals[param.name] = [parameter_value] elif param.type == DOUBLE: hyperopt_trial_miscs_idxs[param.name] = [new_id] hyperopt_trial_miscs_vals[param.name] = [parameter_value] elif param.type == DISCRETE or param.type == CATEGORICAL: index_of_value_in_list = param.list.index(parameter_value) hyperopt_trial_miscs_idxs[param.name] = [trial.name] hyperopt_trial_miscs_vals[param.name] = [ index_of_value_in_list ] hyperopt_trial_specs.append(None) hyperopt_trial_misc["idxs"] = hyperopt_trial_miscs_idxs hyperopt_trial_misc["vals"] = hyperopt_trial_miscs_vals hyperopt_trial_miscs.append(hyperopt_trial_misc) # TODO: Use negative objective value for loss or not objective_for_hyperopt = float(trial.target_metric.value) if search_space.goal == MAX_GOAL: # Now hyperopt only supports fmin and we need to reverse objective value for maximization objective_for_hyperopt = -1 * objective_for_hyperopt hyperopt_trial_result = { "loss": objective_for_hyperopt, "status": hyperopt.STATUS_OK } hyperopt_trial_results.append(hyperopt_trial_result) if len(trials) > 0: # Example: {'refresh_time': datetime.datetime(2018, 9, 18, 12, 6, 41, 922000), 'book_time': datetime.datetime(2018, 9, 18, 12, 6, 41, 922000), 'misc': {'tid': 0, 'idxs': {'x2': [0], 'x': [0]}, 'cmd': ('domain_attachment', 'FMinIter_Domain'), 'vals': {'x2': [-8.137088361136204], 'x': [-4.849028446711832]}, 'workdir': None}, 'state': 2, 'tid': 0, 'exp_key': None, 'version': 0, 'result': {'status': 'ok', 'loss': 14.849028446711833}, 'owner': None, 'spec': None} hyperopt_trials = completed_hyperopt_trials.new_trial_docs( hyperopt_trial_new_ids, hyperopt_trial_specs, hyperopt_trial_results, hyperopt_trial_miscs) for current_hyperopt_trials in hyperopt_trials: current_hyperopt_trials["state"] = hyperopt.JOB_STATE_DONE completed_hyperopt_trials.insert_trial_docs(hyperopt_trials) completed_hyperopt_trials.refresh() rval = hyperopt.FMinIter(self.hyperopt_algorithm, hyperopt_domain, completed_hyperopt_trials, max_evals=-1, rstate=hyperopt_rstate, verbose=0) rval.catch_eval_exceptions = False new_ids = rval.trials.new_trial_ids(request_number) rval.trials.refresh() random_state = rval.rstate.randint(2**31 - 1) new_trials = self.hyperopt_algorithm(new_ids, rval.domain, completed_hyperopt_trials, random_state) rval.trials.refresh() # Construct return advisor trials from new hyperopt trials list_of_assignments = [] for i in range(request_number): vals = new_trials[i]['misc']['vals'] list_of_assignments.append( BaseHyperoptService.convert(search_space, vals)) return list_of_assignments
def view2( host, port, exp_key, bagging_fraction, max_n_per_class=None, maybe_test_view2=True, assume_promising=True, tid=None, fake=False, ): fake = int(fake) real_trials = make_trials(host, port, exp_key) print 'n. real trials', len(real_trials) if tid is None: best_trial = real_trials.best_trial else: try: best_trial = [ t for t in real_trials.trials if t['tid'] == int(tid) ][0] except IndexError: print[t['tid'] for t in real_trials.trials] print 'Best trial' print ' ["tid"]', best_trial['tid'] best_result = best_trial['result'] print 'Best trial had loss', best_result['loss'] best_trace = best_result.get('trace') if 0: print ' ["Result trace"]', best_trace fake_trials = hyperopt.Trials() fn = slm_visitor_lfw_partial( max_n_per_class, maybe_test_view2=int(maybe_test_view2), assume_promising=int(assume_promising), foobar_trace_target=list(best_trace), ) #space = search_space space = hpconvnet.lfw.build_search_space( max_n_features=16000, trn='DevTrain', # -- split used for unsupervised images n_unsup=300, # -- number of images from which to draw patches batched_lmap_speed_thresh={ 'seconds': 60, 'elements': 1 }, bagging_fraction=float(bagging_fraction), ) domain = hyperopt.Domain(fn, space, rseed=123) domain.rng = None # -- this rng is never to be used if fake: ctrl = hyperopt.Ctrl(trials=fake_trials, current_trial=None) print 'WARNING: running on fake ctrl object' else: ctrl = MongoCtrl(trials=real_trials, current_trial=best_trial, read_only=False) #real_trials.handle.update(best_result, msg) #ctrl.checkpoint(dict(best_trial['result'], foodebug='yes')) config = hyperopt.base.spec_from_misc(best_trial['misc']) #print 'Config', config r_dct = domain.evaluate(config, ctrl, attach_attachments=(not fake)) print 'r_dct' print r_dct if fake: print 'WARNING: running on fake ctrl object, not saving result' attachments = r_dct.pop('attachments', {}) print 'Attachments:', attachments.keys() print ' ["Best Result trace"]' def print_trace(r): trace = r['trace'] for t in trace: print ' ', t print_trace(best_result) print ' ["result trace"]' print_trace(r_dct) else: # -- the loss should have been re-computed identically r_dct['view2_recalculated_loss'] = r_dct['loss'] r_dct['loss'] = best_result['loss'] if (r_dct['view2_recalculated_loss'] > best_result['loss']): print 'WARNING: recalculated loss was worst than loss during search' print ' -> original loss', best_result['loss'] print ' -> recalculated loss', r_dct['view2_recalculated_loss'] print 'Checkpointing back to db' # -- N.B. attachments should have been saved by Domain.evaluate, # since we called it with attach_attachments=True. So they should # not be here anymore. assert 'attachments' not in r_dct ctrl.checkpoint(r_dct)