def returnBest(config_directory): os.chdir("/home/carrknight/code/oxfish/runs/optimization/spearmint") options = get_options(config_directory, config_file="config.json") experiment_name = str(options['experiment-name']) db = MongoDB() resources = parse_resources_from_config(options) resource = resources.itervalues().next() # load hyper parameters chooser_module = importlib.import_module('spearmint.choosers.' + options['chooser']) chooser = chooser_module.init(options) print "chooser", chooser hypers = db.load(experiment_name, "hypers") print "loaded hypers", hypers # from GP.to_dict() jobs = load_jobs(db, experiment_name) remove_broken_jobs(db, jobs, experiment_name, resources) task_options = {task: options["tasks"][task] for task in resource.tasks} task_group = spearmint.main.load_task_group(db, options, resource.tasks) hypers = spearmint.main.load_hypers(db, experiment_name) print "loaded hypers", hypers # from GP.to_dict() hypers = chooser.fit(task_group, hypers, task_options) print "\nfitted hypers:" print(hypers) lp, x = chooser.best() x = x.flatten() print "best", lp, x bestp = task_group.paramify(task_group.from_unit(x)) print "expected best position", bestp return bestp
def runSpearmint(self, name): options, expt_dir = self.get_options([os.path.abspath(os.path.join(self.scratchPath,name))]) resources = main.parse_resources_from_config(options) # Load up the chooser. chooser_module = importlib.import_module('spearmint.choosers.' + options['chooser']) chooser = chooser_module.init(options) experiment_name = options.get("experiment-name", 'unnamed-experiment') # Connect to the database db_address = options['database']['address'] sys.stderr.write('Using database at %s.\n' % db_address) db = MongoDB(database_address=db_address) threshold = 1e-2 look_back = 3 stopping = False while not stopping: for resource_name, resource in resources.iteritems(): jobs = main.load_jobs(db, experiment_name) # resource.printStatus(jobs) # If the resource is currently accepting more jobs # TODO: here cost will eventually also be considered: even if the # resource is not full, we might wait because of cost incurred # Note: I chose to fill up one resource and them move on to the next # You could also do it the other way, by changing "while" to "if" here while resource.acceptingJobs(jobs): # Load jobs from DB # (move out of one or both loops?) would need to pass into load_tasks jobs = main.load_jobs(db, experiment_name) #pprint.pprint(main.load_hypers(db, experiment_name)) # Remove any broken jobs from pending. main.remove_broken_jobs(db, jobs, experiment_name, resources) # Get a suggestion for the next job suggested_job = main.get_suggestion(chooser, resource.tasks, db, expt_dir, options, resource_name) # Submit the job to the appropriate resource process_id = resource.attemptDispatch(experiment_name, suggested_job, db_address, expt_dir) # Set the status of the job appropriately (successfully submitted or not) if process_id is None: suggested_job['status'] = 'broken' main.save_job(suggested_job, db, experiment_name) else: suggested_job['status'] = 'pending' suggested_job['proc_id'] = process_id main.save_job(suggested_job, db, experiment_name) jobs = main.load_jobs(db, experiment_name) # Print out the status of the resources # resource.printStatus(jobs) print_resources_status(resources.values(), jobs) stalled = [] for task in main.load_task_group(db, options, resource.tasks).tasks.values(): performance = task.valid_normalized_data_dict["values"][::-1] stalled.append(0) if len(performance) > look_back: print performance[0:look_back] print "Diffs: ", within_thresh = True for i,run in enumerate(performance[0:look_back]): diff = abs(run - performance[i+1]) print str(round(diff,2))+", ", if diff > threshold: within_thresh = False print "...No stall" break if within_thresh: stalled[len(stalled)-1] = 1 if all(stalled): sys.exit("Stalled!") # If no resources are accepting jobs, sleep # (they might be accepting if suggest takes a while and so some jobs already finished by the time this point is reached) if main.tired(db, experiment_name, resources): time.sleep(options.get('polling-time', 5))
def main(): """ Usage: python make_plots.py PATH_TO_DIRECTORY TODO: Some aspects of this function are specific to the simple branin example We should clean this up so that interpretation of plots are more clear and so that it works in more general cases (e.g. if objective likelihood is binomial then values should not be unstandardized) """ options, expt_dir = get_options() print "options:" print_dict(options) # reduce the grid size options["grid_size"] = 400 resources = parse_resources_from_config(options) # Load up the chooser. chooser_module = importlib.import_module('spearmint.choosers.' + options['chooser']) chooser = chooser_module.init(options) print "chooser", chooser experiment_name = options.get("experiment-name", 'unnamed-experiment') # Connect to the database db_address = options['database']['address'] sys.stderr.write('Using database at %s.\n' % db_address) db = MongoDB(database_address=db_address) # testing below here jobs = load_jobs(db, experiment_name) remove_broken_jobs(db, jobs, experiment_name, resources) print "resources:", resources print_dict(resources) resource = resources.itervalues().next() task_options = { task: options["tasks"][task] for task in resource.tasks } print "task_options:" print_dict(task_options) # {'main': {'likelihood': u'NOISELESS', 'type': 'OBJECTIVE'}} task_group = load_task_group(db, options, resource.tasks) print "task_group", task_group # TaskGroup print "tasks:" print_dict(task_group.tasks) # {'main': <spearmint.tasks.task.Task object at 0x10bf63290>} hypers = load_hypers(db, experiment_name) print "loaded hypers", hypers # from GP.to_dict() hypers = chooser.fit(task_group, hypers, task_options) print "\nfitted hypers:" print_dict(hypers) lp, x = chooser.best() x = x.flatten() print "best", lp, x bestp = task_group.paramify(task_group.from_unit(x)) print "expected best position", bestp # get the grid of points grid = chooser.grid # print "chooser objectives:", # print_dict(chooser.objective) print "chooser models:", chooser.models print_dict(chooser.models) obj_model = chooser.models[chooser.objective['name']] obj_mean, obj_var = obj_model.function_over_hypers(obj_model.predict, grid) # un-normalize the function values and variances obj_task = task_group.tasks['main'] obj_mean = [obj_task.unstandardize_mean(obj_task.unstandardize_variance(v)) for v in obj_mean] obj_std = [obj_task.unstandardize_variance(np.sqrt(v)) for v in obj_var] # for xy, m, v in izip(grid, obj_mean, obj_var): # print xy, m, v grid = map(task_group.from_unit, grid) # return xymv = [(xy[0], xy[1], m, v) for xy, m, v in izip(grid, obj_mean, obj_std)]# if .2 < xy[0] < .25] x = map(lambda x:x[0], xymv) y = map(lambda x:x[1], xymv) m = map(lambda x:x[2], xymv) sig = map(lambda x:x[3], xymv) # print y fig = plt.figure(dpi=100) ax = fig.add_subplot(111, projection='3d') ax.plot(x, y, m, marker='.', linestyle="None") # plot errorbars for i in np.arange(0, len(x)): ax.plot([x[i], x[i]], [y[i], y[i]], [m[i]+sig[i], m[i]-sig[i]], marker="_", color='k') # get the observed points task = task_group.tasks['main'] idata = task.valid_normalized_data_dict xy = idata["inputs"] xy = map(task_group.from_unit, xy) xy = np.array(xy) vals = idata["values"] vals = [obj_task.unstandardize_mean(obj_task.unstandardize_variance(v)) for v in vals] ax.plot(xy[:,0], xy[:,1], vals, marker='o', color="r", linestyle="None") plt.show()
def main(filter=None): """ Usage: python make_plots.py PATH_TO_DIRECTORY """ parser = argparse.ArgumentParser() parser.add_argument('--clean', action='store_true', help='remove broken jobs') parser.add_argument('--table', action='store_true', help='print table') parser.add_argument('--csv', action='store_true', help='save table as csv') parser.add_argument('--d', type=int, help='sort by distance from dth smallest result') parser.add_argument('--name', help='experiment name', default=None) args, unknown = parser.parse_known_args() options, expt_dir = get_options(unknown) # print "options:" # print_dict(options) # reduce the grid size options["grid_size"] = 400 resources = parse_resources_from_config(options) # Load up the chooser. chooser_module = importlib.import_module('spearmint.choosers.' + options['chooser']) chooser = chooser_module.init(options) # print "chooser", chooser if args.name: experiment_name = args.name else: experiment_name = options.get("experiment-name", 'unnamed-experiment') # Connect to the database db_address = options['database']['address'] # sys.stderr.write('Using database at %s.\n' % db_address) db = MongoDB(database_address=db_address) # testing below here jobs = load_jobs(db, experiment_name) print len(jobs), 'jobs found' # print jobs # remove_broken_jobs if args.clean: for job in jobs: if job['status'] == 'pending': sys.stderr.write('Broken job %s detected.\n' % job['id']) job['status'] = 'broken' db.save(job, experiment_name, 'jobs', {'id' : job['id']}) # print "resources:", resources # print_dict(resources) resource = resources.itervalues().next() task_options = {task: options["tasks"][task] for task in resource.tasks} # print "task_options:" # print_dict(task_options) # {'main': {'likelihood': u'NOISELESS', 'type': 'OBJECTIVE'}} task_group = load_task_group(db, options, experiment_name, resource.tasks) hypers = load_hypers(db, experiment_name) chooser.fit(task_group, hypers, task_options) lp, x = chooser.best() if args.table: os.chdir(unknown[0]) out_file = open('results.csv', 'w') if args.csv else sys.stdout # get the observed points task = task_group.tasks.itervalues().next() idata = task.valid_normalized_data_dict inputs = idata["inputs"] inputs = map(lambda i: [paramify(task_group, task_group.from_unit(i)).values(), i], inputs) vals = idata["values"] vals = [task.unstandardize_mean(task.unstandardize_variance(v)) for v in vals] out_file.write('\n%10s' % 'result') lengths = [10] for name, vdict in task.variables_meta.iteritems(): name = '%10s' % name out_file.write(',' + name) lengths.append(len(name)) out_file.write('\n') line_template = '%' + str(lengths[0]) + '.4f,' + ','.join(['%' + str(l) + ('.4f' if 'enum' not in inputs[0][0][i]['type'] else 's') for i, l in enumerate(lengths[1:])]) points = sorted(zip(vals, inputs), key=lambda r: r[0]) if args.d is not None: target = x if args.d >= 0: target = points[args.d][1][1] points = sorted(points, key=lambda r: np.linalg.norm(r[1][1] - target)) for i, point in enumerate(points): subs = [point[0]] + [d['values'][0] for d in point[1][0]] out_file.write(line_template % tuple(subs) + '\n') out_file.close()
def plot( config_directory="/home/carrknight/code/oxfish/runs/optimization/spearmint" ): os.chdir("/home/carrknight/code/oxfish/runs/optimization/spearmint") options = get_options(config_directory, config_file="config.json") experiment_name = str(options['experiment-name']) db = MongoDB() resources = parse_resources_from_config(options) resource = resources.itervalues().next() # load hyper parameters chooser_module = importlib.import_module('spearmint.choosers.' + options['chooser']) chooser = chooser_module.init(options) print "chooser", chooser hypers = db.load(experiment_name, "hypers") print "loaded hypers", hypers # from GP.to_dict() jobs = load_jobs(db, experiment_name) remove_broken_jobs(db, jobs, experiment_name, resources) task_options = {task: options["tasks"][task] for task in resource.tasks} task_group = spearmint.main.load_task_group(db, options, resource.tasks) hypers = spearmint.main.load_hypers(db, experiment_name) print "loaded hypers", hypers # from GP.to_dict() hypers = chooser.fit(task_group, hypers, task_options) print "\nfitted hypers:" print(hypers) lp, x = chooser.best() x = x.flatten() print "best", lp, x bestp = task_group.paramify(task_group.from_unit(x)) print "expected best position", bestp print "chooser models:", chooser.models obj_model = chooser.models[chooser.objective['name']] grid = chooser.grid obj_mean, obj_var = obj_model.function_over_hypers(obj_model.predict, grid) import numpy as np bounds = dict() for task_name, task in task_group.tasks.iteritems(): # make a grid, feed it to the predictor: dimensions = () for key in options['variables'].keys(): type = str(options['variables'][key]["type"]).strip().lower() keyname = key.encode('utf-8') bounds[keyname] = dict() bounds[keyname]["type"] = type if type == "float": dimension = np.linspace(0, 1, num=SPACING) dimensions = dimensions + (dimension, ) bounds[keyname]["min"] = options['variables'][key]["min"] bounds[keyname]["max"] = options['variables'][key]["max"] elif type == "int": min = int(options['variables'][key]["min"]) max = int(options['variables'][key]["max"]) dimension = np.linspace(0, 1, num=max - min) bounds[keyname]["min"] = min bounds[keyname]["max"] = max # dimension = np.array([x + min for x in range(max - min + 1)]) dimensions = dimensions + (dimension, ) else: bounds[keyname]["options"] = options['variables'][key][ "options"] assert type == "enum" dimension = tuple([ (0, 1) for i in range(len(options['variables'][key]["options"])) ]) for t in dimension: dimensions = dimensions + (t, ) # print(dimension) data = cartesian(np.array(dimensions)) mean, variance = chooser.models[task_name].predict(data) mean = [ task.unstandardize_mean(task.unstandardize_variance(v)) for v in mean ] variance = [task.unstandardize_variance(np.sqrt(v)) for v in variance] os.chdir(config_directory) # unzip the data new_data = zip(*data.transpose().tolist()) datum = zip(new_data, mean, variance) header = ",".join(options['variables'].keys()) + ",mean,variance" with open(experiment_name + ".csv", 'w') as fileout: fileout.write(header + "\n") for i in range(len(datum)): fileout.write( str(datum[i]).replace("(", "").replace(")", "").replace( ",,", ",") + "\n") with open(experiment_name + "_bounds.yaml", 'w') as outfile: outfile.write(yaml.dump(bounds, default_flow_style=False)) # grid = cartesian(dimensions) # mean, variance = obj_model.function_over_hypers(obj_model.predict, grid) # # mean = [obj_task.unstandardize_mean(obj_task.unstandardize_variance(v)) for v in mean] # variance = [obj_task.unstandardize_variance(np.sqrt(v)) for v in variance] # # # xymv = [([x for x in xy], m, v) for xy, m, v in izip(new_grid, obj_mean, obj_std)] # if .2 < xy[0] < .25] # with open(experiment_name + ".csv", 'w') as fileout: # for i in range(len(mean)): # fileout.write(str(([x for x in grid[i]], mean[i], variance[i])).replace("(", "").replace(")", ""). # replace("[", "").replace("]", "") + "\n") xy = np.array(task.inputs) # function values: vals = task.values vals = np.array(vals) np.savetxt(experiment_name + "_" + task_name + "_runs.csv", xy, delimiter=",", fmt='%.3e') np.savetxt(experiment_name + "_" + task_name + "_runs_values.csv", vals, delimiter=",", fmt='%.3e')
def main(): """ Usage: python make_plots.py PATH_TO_DIRECTORY TODO: Some aspects of this function are specific to the simple branin example We should clean this up so that interpretation of plots are more clear and so that it works in more general cases (e.g. if objective likelihood is binomial then values should not be unstandardized) """ options, expt_dir = get_options() print("options:") print_dict(options) # reduce the grid size options["grid_size"] = 400 resources = parse_resources_from_config(options) # Load up the chooser. chooser_module = importlib.import_module('spearmint.choosers.' + options['chooser']) chooser = chooser_module.init(options) print("chooser", chooser) experiment_name = options.get("experiment-name", 'unnamed-experiment') # Connect to the database db_address = options['database']['address'] sys.stderr.write('Using database at %s.\n' % db_address) db = MongoDB(database_address=db_address) # testing below here jobs = load_jobs(db, experiment_name) remove_broken_jobs(db, jobs, experiment_name, resources) print("resources:", resources) print_dict(resources) resource = resources.itervalues().next() task_options = {task: options["tasks"][task] for task in resource.tasks} print("task_options:") print_dict(task_options ) # {'main': {'likelihood': u'NOISELESS', 'type': 'OBJECTIVE'}} task_group = load_task_group(db, options, resource.tasks) print("task_group", task_group) # TaskGroup print("tasks:") print_dict(task_group.tasks ) # {'main': <spearmint.tasks.task.Task object at 0x10bf63290>} hypers = load_hypers(db, experiment_name) print("loaded hypers", hypers) # from GP.to_dict() hypers = chooser.fit(task_group, hypers, task_options) print("\nfitted hypers:") print_dict(hypers) lp, x = chooser.best() x = x.flatten() print("best", lp, x) bestp = task_group.paramify(task_group.from_unit(x)) print("expected best position", bestp) # get the grid of points grid = chooser.grid # print("chooser objectives:", ) # print_dict(chooser.objective) print("chooser models:", chooser.models) print_dict(chooser.models) obj_model = chooser.models[chooser.objective['name']] obj_mean, obj_var = obj_model.function_over_hypers(obj_model.predict, grid) # un-normalize the function values and variances obj_task = task_group.tasks['main'] obj_mean = [ obj_task.unstandardize_mean(obj_task.unstandardize_variance(v)) for v in obj_mean ] obj_std = [obj_task.unstandardize_variance(np.sqrt(v)) for v in obj_var] # for xy, m, v in izip(grid, obj_mean, obj_var): # print(xy, m, v) grid = map(task_group.from_unit, grid) # return xymv = [(xy[0], xy[1], m, v) for xy, m, v in izip(grid, obj_mean, obj_std) ] # if .2 < xy[0] < .25] x = map(lambda x: x[0], xymv) y = map(lambda x: x[1], xymv) m = map(lambda x: x[2], xymv) sig = map(lambda x: x[3], xymv) # print(y) fig = plt.figure(dpi=100) ax = fig.add_subplot(111, projection='3d') ax.plot(x, y, m, marker='.', linestyle="None") # plot errorbars for i in np.arange(0, len(x)): ax.plot([x[i], x[i]], [y[i], y[i]], [m[i] + sig[i], m[i] - sig[i]], marker="_", color='k') # get the observed points task = task_group.tasks['main'] idata = task.valid_normalized_data_dict xy = idata["inputs"] xy = map(task_group.from_unit, xy) xy = np.array(xy) vals = idata["values"] vals = [ obj_task.unstandardize_mean(obj_task.unstandardize_variance(v)) for v in vals ] ax.plot(xy[:, 0], xy[:, 1], vals, marker='o', color="r", linestyle="None") plt.show()