def tune_parameters(): from utils.constants import Constants context_name = '_context' if Constants.USE_CONTEXT else '_nocontext' mongo_url =\ 'mongo://localhost:1234/topicmodel_' +\ Constants.ITEM_TYPE + context_name + '/jobs' trials = MongoTrials(mongo_url, exp_key='exp1') print('Connected to %s' % mongo_url) space =\ hp.choice(Constants.USE_CONTEXT_FIELD, [ { Constants.BUSINESS_TYPE_FIELD: Constants.ITEM_TYPE, # 'lda_alpha': hp.uniform('lda_alpha', 0, 1), # 'lda_beta': hp.uniform('lda_beta', 0, 2), Constants.CONTEXT_EXTRACTOR_EPSILON_FIELD: hp.uniform( Constants.CONTEXT_EXTRACTOR_EPSILON_FIELD, 0, 0.5), Constants.TOPIC_MODEL_ITERATIONS_FIELD: hp.quniform( Constants.TOPIC_MODEL_ITERATIONS_FIELD, 50, 500, 1), Constants.TOPIC_MODEL_PASSES_FIELD: hp.quniform( Constants.TOPIC_MODEL_PASSES_FIELD, 1, 100, 1), Constants.TOPIC_MODEL_NUM_TOPICS_FIELD: hp.quniform( Constants.TOPIC_MODEL_NUM_TOPICS_FIELD, 1, 1000, 1), # 'topic_weighting_method': hp.choice( # 'topic_weighting_method', # ['probability', 'binary', 'all_topics']), Constants.USE_CONTEXT_FIELD: True }, ]) best = fmin(run_recommender, space=space, algo=tpe.suggest, max_evals=1000, trials=trials) print('losses', sorted(trials.losses())) print('best', trials.best_trial['result'], trials.best_trial['misc']['vals']) print('num trials: %d' % len(trials.losses()))
def run_optimization(level=1): print(f"Optimizing at level {level}") set_random_seeds(4) next_lvl_trials = MongoTrials('mongo://localhost:1234/covid/jobs', exp_key=f'covid-{level+1}') if len(next_lvl_trials.trials) > 0: print(f"Already completed level {level} -- skipping") return exp_key = f'covid-{level}' trials = MongoTrials('mongo://localhost:1234/covid/jobs', exp_key=exp_key) suggestion_box = hyperopt.tpe.suggest if level == 1: max_evals = LEVEL_DEFS[0][1] depth = 1 elif level > 1: depth, new_budget, extend_budget = LEVEL_DEFS[level - 1] last_depth, _, _ = LEVEL_DEFS[level - 2] # Minimum one per node for the expensive ones -- no point wasting compute time num_new = int(np.ceil((new_budget / depth) / NUM_NODES) * NUM_NODES) if len(trials.trials) == 0: print("Generating estimates from previous level") result_docs = configure_next_level(level, depth, extend_budget) num_to_extend = len(result_docs) suggestion_box = create_suggestion_box(result_docs) last_level_trials = MongoTrials('mongo://localhost:1234/covid/jobs', exp_key=f'covid-{level-1}') prev_level_count = len( [x for x in last_level_trials.losses() if x is not None]) max_evals = prev_level_count + num_new trials.refresh() objective = functools.partial(test_parameterization, num_epochs=depth) if len([x for x in trials.statuses() if x == 'ok']) >= max_evals: print(f"Already completed level {level} -- skipping") else: best = hyperopt.fmin(objective, space=SEARCH_SPACE, algo=suggestion_box, max_evals=max_evals, trials=trials) print(best)
def tune_parameters(): from utils.constants import Constants context_name = '_context' if Constants.USE_CONTEXT else '_nocontext' mongo_url =\ 'mongo://localhost:1234/topicmodel_' +\ Constants.ITEM_TYPE + context_name + '/jobs' trials = MongoTrials(mongo_url, exp_key='exp1') print('Connected to %s' % mongo_url) space =\ hp.choice(Constants.USE_CONTEXT_FIELD, [ { Constants.BUSINESS_TYPE_FIELD: Constants.ITEM_TYPE, # 'lda_alpha': hp.uniform('lda_alpha', 0, 1), # 'lda_beta': hp.uniform('lda_beta', 0, 2), Constants.CONTEXT_EXTRACTOR_EPSILON_FIELD: hp.uniform( Constants.CONTEXT_EXTRACTOR_EPSILON_FIELD, 0, 0.5), Constants.TOPIC_MODEL_ITERATIONS_FIELD: hp.quniform( Constants.TOPIC_MODEL_ITERATIONS_FIELD, 50, 500, 1), Constants.TOPIC_MODEL_PASSES_FIELD: hp.quniform( Constants.TOPIC_MODEL_PASSES_FIELD, 1, 100, 1), Constants.TOPIC_MODEL_NUM_TOPICS_FIELD: hp.quniform( Constants.TOPIC_MODEL_NUM_TOPICS_FIELD, 1, 1000, 1), # 'topic_weighting_method': hp.choice( # 'topic_weighting_method', # ['probability', 'binary', 'all_topics']), Constants.USE_CONTEXT_FIELD: True }, ]) best = fmin( run_recommender, space=space, algo=tpe.suggest, max_evals=1000, trials=trials) print('losses', sorted(trials.losses())) print( 'best', trials.best_trial['result'], trials.best_trial['misc']['vals']) print('num trials: %d' % len(trials.losses()))
def hp_parallel(self): trials = MongoTrials('mongo://localhost:27017/foo_db/jobs', exp_key=self.task.id + str(random.getrandbits(64))) batch_size = self.n_parallel best_params = fmin(fn=self.hp_objective, space=self.task.hp_space, algo=tpe.suggest, max_evals=self.max_evals * batch_size, trials=trials) scores = [-t['result']['loss'] for t in trials.trials] print("hp parallel task: %s, best: %s, params: %s" % (self.task.id, max(scores), best_params)) search_path = trials.vals search_path['score'] = list(np.array(trials.losses()) * -1) return self.accumulate_max(scores, self.max_evals, batch_size), search_path
def main(): """ Performs hyperparameter optimization with hyperopt. It consists of three steps. First define a trials object connected to a mongo database where all the results will be stored. Secondly define a stochastic search space from which hyperopt will sample hyperparameter configurations. Thirdly define the define the objective function and run the minimization function. """ trials = MongoTrials('mongo://localhost:1234/otto-sqrt-pca-95-5/jobs', exp_key='15-11-03') #Search space space={ 'dense_part': {'num_units' : hp.quniform('DL1', 512, 2048, 512), 'more_layers' : {'num_units' : hp.quniform('DL2', 512, 2048, 512), 'more_layers' : hp.choice('MD2', [0, {'num_units' : hp.quniform('DL3', 512, 2048, 512), 'more_layers' : 0,} #DL3 ])},#DL2 },#DL1 'leakiness' : hp.choice('leak', [0, 0.01, 0.15] ), 'weight_init' : hp.choice('weight',['orto','uni']), 'input_dropout' : hp.quniform('p_in', 0.1, 0.4, 0.1), 'learning_rate': hp.choice('lr',[0.001,0.01,0.025,0.05,0.1]), } #Optimize best = fmin(objective, space=space, algo=tpe.suggest, max_evals=100, trials=trials) print(trials.losses()) print(best)
from TrainClassifiers import main from hyperopt import hp from hyperopt import fmin, tpe, hp from hyperopt.mongoexp import MongoTrials import matplotlib as mpl mpl.use('Agg') import numpy as np import matplotlib.pyplot as plt import matplotlib.cm as cm from matplotlib.colors import LogNorm trials = MongoTrials('mongo://localhost:23888/foo_db/jobs', exp_key='l11') print(len(trials.losses())) print(trials.losses()) print(min([float(x) for x in trials.losses() if x])) print(trials.best_trial) print("\n\n") print(trials.best_trial["misc"]["vals"])
def configure_next_level(lvl: int, depth: int, budget: int = 50): new_exp_key = f'covid-{lvl}' src_trials = MongoTrials('mongo://localhost:1234/covid/jobs', exp_key=f'covid-{lvl-1}') all_trials = MongoTrials('mongo://localhost:1234/covid/jobs') dest_trials = MongoTrials('mongo://localhost:1234/covid/jobs', exp_key=new_exp_key) hist_length = {2: 3, 3: 5, 5: 8}.get(depth, 10) forward_losses = [] for trial, loss in zip(src_trials.trials, src_trials.losses()): if loss is None: forward_losses.append(None) continue v_x, vloss, _, _ = zip(*trial['result']['validation_stats']) slope, intercept, _, _, _ = linregress(v_x[-hist_length:], vloss[-hist_length:]) forward_losses.append( min( 0.5 * (loss + intercept + slope * v_x[-1] + slope * (1 - 0.8**(depth - v_x[-1])) / (1 - 0.8)), loss)) ordered_idxs = list( np.argsort([x if x is not None else np.inf for x in forward_losses])) last_tid = 0 if len(all_trials.tids) == 0 else max(all_trials.tids) available_tids = [] result_docs = [] while len(ordered_idxs) > 0: idx = ordered_idxs.pop(0) if src_trials.losses()[idx] is None: continue epochs_completed = src_trials.trials[idx]['result'].get( 'training_loss_hist', [(0, np.inf)])[-1][0] spec = None result = {'status': 'new'} misc = copy.deepcopy(src_trials.trials[idx]['misc']) result_docs.append((spec, result, misc)) budget -= (depth - epochs_completed) if budget <= 0: break while len(ordered_idxs) > 0: idx = ordered_idxs.pop() if src_trials.losses()[idx] is None: continue if len(available_tids) == 0: available_tids = dest_trials.new_trial_ids(last_tid) tid = available_tids.pop(0) last_tid = tid # copy in the ones that aren't worth exploring further cpy = copy.deepcopy(src_trials.trials[idx]) cpy['exp_key'] = new_exp_key cpy['tid'] = tid cpy['misc']['tid'] = tid cpy['misc']['idxs'] = {k: [tid] for k in cpy['misc']['idxs'].keys()} del cpy['_id'] dest_trials.insert_trial_doc(cpy) return result_docs
trials=trials) now = datetime.datetime.now() if use_mongo: for p in workers: p.terminate() #merge the temporary log files filenames = os.listdir(def_logging_dir) temp_logs = filter(lambda x: x.find("temp_" + date_time_string) > -1, filenames) aggregated_log = open(def_logging_dir + "log_" + date_time_string, 'w') for temp_log in temp_logs: t = open(def_logging_dir + temp_log, 'r') aggregated_log.write(t.read()) t.close() os.remove(def_logging_dir + temp_log) aggregated_log.write("Time for fmin: " + str(now - then) + "\n") aggregated_log.write("Trials: " + str(trials.trials) + "\n") aggregated_log.write("Results: " + str(trials.results) + "\n") aggregated_log.write("Losses: " + str(trials.losses()) + "\n") aggregated_log.write("Statuses: " + str(trials.statuses()) + "\n") aggregated_log.close() print best
def tune_parameters(): # trials = Trials() from utils.constants import Constants context_name = '_context' if Constants.USE_CONTEXT else '_nocontext' cycle = '_' + str(Constants.NESTED_CROSS_VALIDATION_CYCLE) mongo_url =\ 'mongo://localhost:1234/' +\ Constants.ITEM_TYPE + context_name + '_db_nested' + cycle + '/jobs' trials = MongoTrials(mongo_url, exp_key='exp1') print('Connected to %s' % mongo_url) params = Constants.get_properties_copy() params.update({ Constants.BUSINESS_TYPE_FIELD: Constants.ITEM_TYPE, Constants.TOPN_NUM_ITEMS_FIELD: Constants.TOPN_NUM_ITEMS, Constants.NESTED_CROSS_VALIDATION_CYCLE_FIELD: Constants.NESTED_CROSS_VALIDATION_CYCLE, # 'fm_init_stdev': hp.uniform('fm_init_stdev', 0, 2), Constants.FM_ITERATIONS_FIELD: hp.quniform(Constants.FM_ITERATIONS_FIELD, 1, 500, 1), Constants.FM_NUM_FACTORS_FIELD: hp.quniform(Constants.FM_NUM_FACTORS_FIELD, 0, 200, 1), # 'fm_use_1way_interactions': hp.choice('fm_use_1way_interactions', [True, False]), # 'fm_use_bias': hp.choice('use_bias', [True, False]), # 'lda_alpha': hp.uniform('lda_alpha', 0, 1), # 'lda_beta': hp.uniform('lda_beta', 0, 2), # Constants.CONTEXT_EXTRACTOR_EPSILON_FIELD: hp.uniform( # Constants.CONTEXT_EXTRACTOR_EPSILON_FIELD, 0, 0.5), # Constants.TOPIC_MODEL_ITERATIONS_FIELD: hp.quniform( # Constants.TOPIC_MODEL_ITERATIONS_FIELD, 50, 500, 1), # Constants.TOPIC_MODEL_PASSES_FIELD: hp.quniform( # Constants.TOPIC_MODEL_PASSES_FIELD, 1, 100, 1), # Constants.TOPIC_MODEL_NUM_TOPICS_FIELD: hp.quniform( # Constants.TOPIC_MODEL_NUM_TOPICS_FIELD, 1, 1000, 1), # Constants.TOPIC_MODEL_NUM_TOPICS_FIELD: hp.choice( # Constants.TOPIC_MODEL_NUM_TOPICS_FIELD, # [10, 20, 30, 50, 75, 100, 150, 300]), # Constants.TOPIC_MODEL_TYPE_FIELD: hp.choice( # Constants.TOPIC_MODEL_TYPE_FIELD, ['lda', 'mnf']), # 'topic_weighting_method': hp.choice( # 'topic_weighting_method', # ['probability', 'binary', 'all_topics']), # 'use_no_context_topics_sum': hp.choice( # 'use_no_context_topics_sum', [True, False]), Constants.USE_CONTEXT_FIELD: Constants.USE_CONTEXT }) space =\ hp.choice(Constants.USE_CONTEXT_FIELD, [ params, ]) if not Constants.USE_CONTEXT: unwanted_args = [ Constants.CONTEXT_EXTRACTOR_EPSILON_FIELD, Constants.TOPIC_MODEL_ITERATIONS_FIELD, Constants.TOPIC_MODEL_PASSES_FIELD, Constants.TOPIC_MODEL_NUM_TOPICS_FIELD ] for element in space.pos_args[1].named_args[:]: if element[0] in unwanted_args: space.pos_args[1].named_args.remove(element) # best = fmin( # run_recommender, space=space, algo=tpe.suggest, # max_evals=100, trials=trials) print('losses', sorted(trials.losses())) print('best', trials.best_trial['result']['loss'], trials.best_trial['misc']['vals']) print('num trials: %d' % len(trials.losses()))
def tune_parameters(): # trials = Trials() from utils.constants import Constants context_name = '_context' if Constants.USE_CONTEXT else '_nocontext' cycle = '_' + str(Constants.NESTED_CROSS_VALIDATION_CYCLE) mongo_url =\ 'mongo://localhost:1234/' +\ Constants.ITEM_TYPE + context_name + '_db_nested' + cycle + '/jobs' trials = MongoTrials(mongo_url, exp_key='exp1') print('Connected to %s' % mongo_url) params = Constants.get_properties_copy() params.update({ Constants.BUSINESS_TYPE_FIELD: Constants.ITEM_TYPE, Constants.TOPN_NUM_ITEMS_FIELD: Constants.TOPN_NUM_ITEMS, Constants.NESTED_CROSS_VALIDATION_CYCLE_FIELD: Constants.NESTED_CROSS_VALIDATION_CYCLE, # 'fm_init_stdev': hp.uniform('fm_init_stdev', 0, 2), Constants.FM_ITERATIONS_FIELD: hp.quniform( Constants.FM_ITERATIONS_FIELD, 1, 500, 1), Constants.FM_NUM_FACTORS_FIELD: hp.quniform( Constants.FM_NUM_FACTORS_FIELD, 0, 200, 1), # 'fm_use_1way_interactions': hp.choice('fm_use_1way_interactions', [True, False]), # 'fm_use_bias': hp.choice('use_bias', [True, False]), # 'lda_alpha': hp.uniform('lda_alpha', 0, 1), # 'lda_beta': hp.uniform('lda_beta', 0, 2), # Constants.CONTEXT_EXTRACTOR_EPSILON_FIELD: hp.uniform( # Constants.CONTEXT_EXTRACTOR_EPSILON_FIELD, 0, 0.5), # Constants.TOPIC_MODEL_ITERATIONS_FIELD: hp.quniform( # Constants.TOPIC_MODEL_ITERATIONS_FIELD, 50, 500, 1), # Constants.TOPIC_MODEL_PASSES_FIELD: hp.quniform( # Constants.TOPIC_MODEL_PASSES_FIELD, 1, 100, 1), # Constants.TOPIC_MODEL_NUM_TOPICS_FIELD: hp.quniform( # Constants.TOPIC_MODEL_NUM_TOPICS_FIELD, 1, 1000, 1), # Constants.TOPIC_MODEL_NUM_TOPICS_FIELD: hp.choice( # Constants.TOPIC_MODEL_NUM_TOPICS_FIELD, # [10, 20, 30, 50, 75, 100, 150, 300]), # Constants.TOPIC_MODEL_TYPE_FIELD: hp.choice( # Constants.TOPIC_MODEL_TYPE_FIELD, ['lda', 'mnf']), # 'topic_weighting_method': hp.choice( # 'topic_weighting_method', # ['probability', 'binary', 'all_topics']), # 'use_no_context_topics_sum': hp.choice( # 'use_no_context_topics_sum', [True, False]), Constants.USE_CONTEXT_FIELD: Constants.USE_CONTEXT }) space =\ hp.choice(Constants.USE_CONTEXT_FIELD, [ params, ]) if not Constants.USE_CONTEXT: unwanted_args = [ Constants.CONTEXT_EXTRACTOR_EPSILON_FIELD, Constants.TOPIC_MODEL_ITERATIONS_FIELD, Constants.TOPIC_MODEL_PASSES_FIELD, Constants.TOPIC_MODEL_NUM_TOPICS_FIELD ] for element in space.pos_args[1].named_args[:]: if element[0] in unwanted_args: space.pos_args[1].named_args.remove(element) # best = fmin( # run_recommender, space=space, algo=tpe.suggest, # max_evals=100, trials=trials) print('losses', sorted(trials.losses())) print( 'best', trials.best_trial['result']['loss'], trials.best_trial['misc']['vals']) print('num trials: %d' % len(trials.losses()))
exec "trials=" + trials return trials if __name__ == "__main__": trials = MongoTrials('mongo://localhost:1234/first_try/jobs', exp_key='big_run3') x_name = 'test_bias' y_name = 'learn_bias' z_name = 'learning_rate' x = trials.vals[x_name] y = trials.vals[y_name] z = trials.vals[z_name] w = trials.losses() indices = filter(lambda x: w[x] < 1, range(len(w))) x = [x[i] for i in indices] y = [y[i] for i in indices] z = [z[i] for i in indices] w = [w[i] for i in indices] p = ax.scatter(x, y, z, cmap='winter', s=50, c=w) ax.set_xlabel(x_name) ax.set_ylabel(y_name) ax.set_zlabel(z_name) fig.colorbar(p) plt.show() sorted_indices = sorted(range(len(w)), key=lambda x: w[x])
trials = trials.split('Trials: ')[1] exec "trials=" + trials return trials if __name__=="__main__": trials = MongoTrials('mongo://localhost:1234/first_try/jobs', exp_key='big_run3') x_name = 'test_bias' y_name = 'learn_bias' z_name = 'learning_rate' x = trials.vals[x_name] y = trials.vals[y_name] z = trials.vals[z_name] w = trials.losses() indices = filter(lambda x: w[x] < 1, range(len(w))) x = [x[i] for i in indices] y = [y[i] for i in indices] z = [z[i] for i in indices] w = [w[i] for i in indices] p = ax.scatter(x, y, z, cmap='winter', s=50, c=w) ax.set_xlabel(x_name) ax.set_ylabel(y_name) ax.set_zlabel(z_name) fig.colorbar(p) plt.show() sorted_indices = sorted(range(len(w)), key=lambda x: w[x])