def run_optimization(level=1): print(f"Optimizing at level {level}") set_random_seeds(4) next_lvl_trials = MongoTrials('mongo://localhost:1234/covid/jobs', exp_key=f'covid-{level+1}') if len(next_lvl_trials.trials) > 0: print(f"Already completed level {level} -- skipping") return exp_key = f'covid-{level}' trials = MongoTrials('mongo://localhost:1234/covid/jobs', exp_key=exp_key) suggestion_box = hyperopt.tpe.suggest if level == 1: max_evals = LEVEL_DEFS[0][1] depth = 1 elif level > 1: depth, new_budget, extend_budget = LEVEL_DEFS[level - 1] last_depth, _, _ = LEVEL_DEFS[level - 2] # Minimum one per node for the expensive ones -- no point wasting compute time num_new = int(np.ceil((new_budget / depth) / NUM_NODES) * NUM_NODES) if len(trials.trials) == 0: print("Generating estimates from previous level") result_docs = configure_next_level(level, depth, extend_budget) num_to_extend = len(result_docs) suggestion_box = create_suggestion_box(result_docs) last_level_trials = MongoTrials('mongo://localhost:1234/covid/jobs', exp_key=f'covid-{level-1}') prev_level_count = len( [x for x in last_level_trials.losses() if x is not None]) max_evals = prev_level_count + num_new trials.refresh() objective = functools.partial(test_parameterization, num_epochs=depth) if len([x for x in trials.statuses() if x == 'ok']) >= max_evals: print(f"Already completed level {level} -- skipping") else: best = hyperopt.fmin(objective, space=SEARCH_SPACE, algo=suggestion_box, max_evals=max_evals, trials=trials) print(best)
trials=trials) now = datetime.datetime.now() if use_mongo: for p in workers: p.terminate() #merge the temporary log files filenames = os.listdir(def_logging_dir) temp_logs = filter(lambda x: x.find("temp_" + date_time_string) > -1, filenames) aggregated_log = open(def_logging_dir + "log_" + date_time_string, 'w') for temp_log in temp_logs: t = open(def_logging_dir + temp_log, 'r') aggregated_log.write(t.read()) t.close() os.remove(def_logging_dir + temp_log) aggregated_log.write("Time for fmin: " + str(now - then) + "\n") aggregated_log.write("Trials: " + str(trials.trials) + "\n") aggregated_log.write("Results: " + str(trials.results) + "\n") aggregated_log.write("Losses: " + str(trials.losses()) + "\n") aggregated_log.write("Statuses: " + str(trials.statuses()) + "\n") aggregated_log.close() print best
r = t['result'] if r['status'] == 'ok': d = dict([(k, int(v[0])) for k, v in t['misc']['vals'].iteritems()]) d['Loss'] = r['loss'] d['i'] = i d['Compute Time'] = tdToStr(t['refresh_time'] - t['book_time']) res.append(d) if r['status'] == 'new': d = dict([(k, int(v[0])) for k, v in t['misc']['vals'].iteritems()]) #d['c'] = np.prod(d.values()) if t['book_time']: d['Elapsed Time'] = tdToStr(datetime.utcnow() - t['book_time']) d['owner'] = t['owner'][0] d['i'] = i #d['book time'] = t['book_time'] new.append(d) #print t #break print "There are {0} finished results! ({1} total, {2} new)".format( len(res), len(trials.trials), sum(s == 'new' for s in trials.statuses())) print "RESULTS:" #print tabulate(sorted(res, key=lambda x:x['Compute Time']), headers='keys') print tabulate(sorted(res, key=lambda x: x['Loss']), headers='keys') print "IN PROGRESS:" print tabulate(new, headers='keys')