def exhaustive_search(new_ids, domain, trials, seed, nbMaxSucessiveFailures=1000): r""" This is for exhaustive search in HyperTuning. """ from hyperopt import pyll from hyperopt.base import miscs_update_idxs_vals # Build a hash set for previous trials hashset = set([ hash( frozenset([(key, value[0]) if len(value) > 0 else ((key, None)) for key, value in trial['misc']['vals'].items()])) for trial in trials.trials ]) rng = np.random.RandomState(seed) rval = [] for _, new_id in enumerate(new_ids): newSample = False nbSucessiveFailures = 0 while not newSample: # -- sample new specs, idxs, vals idxs, vals = pyll.rec_eval(domain.s_idxs_vals, memo={ domain.s_new_ids: [new_id], domain.s_rng: rng, }) new_result = domain.new_result() new_misc = dict(tid=new_id, cmd=domain.cmd, workdir=domain.workdir) miscs_update_idxs_vals([new_misc], idxs, vals) # Compare with previous hashes h = hash( frozenset([(key, value[0]) if len(value) > 0 else ((key, None)) for key, value in vals.items()])) if h not in hashset: newSample = True else: # Duplicated sample, ignore nbSucessiveFailures += 1 if nbSucessiveFailures > nbMaxSucessiveFailures: # No more samples to produce return [] rval.extend( trials.new_trial_docs([new_id], [None], [new_result], [new_misc])) return rval
def suggest(new_ids, domain, trials, seed, samples_count=200, maxlog=8, rand_iters=3, create_regressor=lambda: GaussianProcessRegressor(alpha=1e-6)): rand_iters = max(1, rand_iters) if len(trials) < rand_iters: return rand.suggest(new_ids, domain, trials, seed) rng = np.random.RandomState(seed) rval = [] for ii, new_id in enumerate(new_ids): # -- sample new specs, idxs, vals idxs, vals = get_best_eval(new_id, domain, trials, rng, samples_count, maxlog, create_regressor) new_result = domain.new_result() new_misc = dict(tid=new_id, cmd=domain.cmd, workdir=domain.workdir) miscs_update_idxs_vals([new_misc], idxs, vals) rval.extend(trials.new_trial_docs([new_id], [None], [new_result], [new_misc])) return rval
def _hyperopt_transform(self, x): new_id = self.trials.new_trial_ids(1)[0] domain = self.domain rng = np.random.RandomState(1) idxs, vals = pyll.rec_eval(domain.s_idxs_vals, memo={ domain.s_new_ids: [new_id], domain.s_rng: rng, }) rval_miscs = [dict(tid=new_id, cmd=domain.cmd, workdir=domain.workdir)] rval_results = domain.new_result() for (k, _) in vals.items(): vals[k][0] = x[k] miscs_update_idxs_vals(rval_miscs, idxs, vals) rval_docs = self.trials.new_trial_docs([new_id], [None], rval_results, rval_miscs) return rval_docs[0]
def suggest(new_ids, domain, trials, chooser, grid_size, grid_seed, expt_dir, # -- a state object will be maintained here verbose=0, ): variables = variables_from_domain(domain) gmap = GridMap(variables.values(), grid_size) values = [] complete = [] pending = [] durations = [] for trial in trials.trials: # Each line in this file represents an experiment # It is whitespace separated and of the form either # <Value> <time taken> <space separated list of parameters> # incating a completed experiment or # P P <space separated list of parameters> # indicating a pending experiment state = trial['state'] status = trial['result']['status'] val = trial['result'].get('loss') dur = trial_duration(trial) unit_vals = unit_assignment(trial, variables) if state in (hyperopt.JOB_STATE_NEW, hyperopt.JOB_STATE_RUNNING): pending.append(unit_vals) elif state in (hyperopt.JOB_STATE_DONE,): if status in hyperopt.STATUS_OK: complete.append(unit_vals) durations.append(dur) values.append(val) # Some stats info("#Complete: %d #Pending: %d" % (len(complete), len(pending))) # Let's print out the best value so far if len(values): best_val = np.min(values) best_job = np.argmin(values) info("Current best: %f (job %d)" % (best_val, best_job)) # Now lets get the next job to run # First throw out a set of candidates on the unit hypercube # Increment by the number of observed so we don't take the # same values twice seed_increment = len(pending) + len(complete) candidates = gmap.hypercube_grid(grid_size, grid_seed + seed_increment) # Ask the chooser to actually pick one. # First mash the data into a format that matches that of the other # spearmint drivers to pass to the chooser modules. grid = np.asarray(complete + list(candidates) + pending) grid_idx = np.hstack((np.zeros(len(complete)), np.ones(len(candidates)), 1.0 + np.ones(len(pending)))) chosen = chooser.next(grid, np.asarray(values), np.asarray(durations), np.nonzero(grid_idx == 1)[0], np.nonzero(grid_idx == 2)[0], np.nonzero(grid_idx == 0)[0]) # If the chosen is a tuple, then the chooser picked a new job not from # the candidate list if isinstance(chosen, tuple): (chosen, candidate) = chosen else: candidate = grid[chosen] info("Selected job %d from the grid." % (chosen,)) params = unit_to_list(candidate, variables) if len(new_ids) > 1: raise NotImplementedError('TODO: recurse for multiple jobs') rval = [] for new_id in new_ids: idxs = dict([(v, [new_id]) for v in variables]) vals = dict([(v, [p]) for v, p in zip(variables, params)]) new_result = domain.new_result() new_misc = dict(tid=new_id, cmd=domain.cmd, workdir=domain.workdir) miscs_update_idxs_vals([new_misc], idxs, vals) rval.extend(trials.new_trial_docs([new_id], [None], [new_result], [new_misc])) return rval
def suggest( new_ids, domain, trials, chooser, grid_size, grid_seed, expt_dir, # -- a state object will be maintained here verbose=0, ): variables = variables_from_domain(domain) gmap = GridMap(variables.values(), grid_size) values = [] complete = [] pending = [] durations = [] for trial in trials.trials: # Each line in this file represents an experiment # It is whitespace separated and of the form either # <Value> <time taken> <space separated list of parameters> # incating a completed experiment or # P P <space separated list of parameters> # indicating a pending experiment state = trial['state'] status = trial['result']['status'] val = trial['result'].get('loss') dur = trial_duration(trial) unit_vals = unit_assignment(trial, variables) if state in (hyperopt.JOB_STATE_NEW, hyperopt.JOB_STATE_RUNNING): pending.append(unit_vals) elif state in (hyperopt.JOB_STATE_DONE, ): if status in hyperopt.STATUS_OK: complete.append(unit_vals) durations.append(dur) values.append(val) # Some stats info("#Complete: %d #Pending: %d" % (len(complete), len(pending))) # Let's print out the best value so far if len(values): best_val = np.min(values) best_job = np.argmin(values) info("Current best: %f (job %d)" % (best_val, best_job)) # Now lets get the next job to run # First throw out a set of candidates on the unit hypercube # Increment by the number of observed so we don't take the # same values twice seed_increment = len(pending) + len(complete) candidates = gmap.hypercube_grid(grid_size, grid_seed + seed_increment) # Ask the chooser to actually pick one. # First mash the data into a format that matches that of the other # spearmint drivers to pass to the chooser modules. grid = np.asarray(complete + list(candidates) + pending) grid_idx = np.hstack((np.zeros(len(complete)), np.ones(len(candidates)), 1.0 + np.ones(len(pending)))) chosen = chooser.next(grid, np.asarray(values), np.asarray(durations), np.nonzero(grid_idx == 1)[0], np.nonzero(grid_idx == 2)[0], np.nonzero(grid_idx == 0)[0]) # If the chosen is a tuple, then the chooser picked a new job not from # the candidate list if isinstance(chosen, tuple): (chosen, candidate) = chosen else: candidate = grid[chosen] info("Selected job %d from the grid." % (chosen, )) params = unit_to_list(candidate, variables) if len(new_ids) > 1: raise NotImplementedError('TODO: recurse for multiple jobs') rval = [] for new_id in new_ids: idxs = dict([(v, [new_id]) for v in variables]) vals = dict([(v, [p]) for v, p in zip(variables, params)]) new_result = domain.new_result() new_misc = dict(tid=new_id, cmd=domain.cmd, workdir=domain.workdir) miscs_update_idxs_vals([new_misc], idxs, vals) rval.extend( trials.new_trial_docs([new_id], [None], [new_result], [new_misc])) return rval
def suggest(self, new_ids, domain, trials, seed): rng = np.random.RandomState(seed) rval = [] # print('new_ids', new_ids) for ii, new_id in enumerate(new_ids): while self._cnt <= self.num_combinations: # -- sample new specs, idxs, vals idxs, vals = pyll.rec_eval( domain.s_idxs_vals, memo={ domain.s_new_ids: [new_id], domain.s_rng: rng, }) new_result = domain.new_result() new_misc = dict(tid=new_id, cmd=domain.cmd, workdir=domain.workdir) miscs_update_idxs_vals([new_misc], idxs, vals) new_trial = trials.new_trial_docs([new_id], [None], [new_result], [new_misc]) # Except the `while`, until here, code is copied from rand.suggest # new code from here self.executed_params = self.executed_params.union( self._get_historical_params(trials)) # avoid counting floating zero twice (as +0.0 and -0.0) this_run_params = hyperopt_grid._convert_neg_zeros_to_zeros( dict(new_misc['vals'])) # represent the params as a hashed string this_run_params_str = dict_to_sorted_str(this_run_params) # if these params are seen for the first time, then generate a new # trial for them if this_run_params_str not in self.executed_params: # add the new trial to returned list rval.extend(new_trial) # log the new trial as executed, in order to avoid duplication self._cnt += 1 self.executed_params = \ self.executed_params.union([this_run_params_str]) print(self._cnt, this_run_params) break else: # otherwise (params were seen), skip this trial # update internal counter self._cnt_skip += 1 # Stopping condition (breaking the hyperopt loop) if len(self.executed_params) >= self.num_combinations: # returning an empty list, breaks the hyperopt loop return [] # "Emergency" stopping condition, breaking the hyperopt loop when # loop runs for too long without submitted experiments if self._cnt_skip >= 100*self.num_combinations: warnings.warn('Warning: Exited due to too many skips.' ' This can happen if most of the param combinationa have ' 'been encountered, and drawing a new ' 'unseen combination, involves a very low probablity.') # returning an empty list, breaks the hyperopt loop return [] return rval