Exemplo n.º 1
0
def main(expt_dir, repeat=-1):

    options = parse_config_file(expt_dir, 'config.json')
    experiment_name = options["experiment-name"]

    if repeat > 0:
        experiment_name = repeat_experiment_name(experiment_name, repeat)

    input_space = InputSpace(options["variables"])
    chooser_module = importlib.import_module('spearmint.choosers.' +
                                             options['chooser'])
    chooser = chooser_module.init(input_space, options)
    db = MongoDB(database_address=options['database']['address'])
    jobs = load_jobs(db, experiment_name)
    hypers = db.load(experiment_name, 'hypers')
    tasks = parse_tasks_from_jobs(jobs, experiment_name, options, input_space)

    for task_name, task in tasks.iteritems():

        # print 'Printing results for task %s' % task_name

        for i in xrange(len(task.values)):

            print 'Iteration %d' % (i + 1)
            input_space.paramify_and_print(task.inputs[i], left_indent=0)

            print '%s: %s' % (task_name, task.values[i])

            print ''
            print ''
Exemplo n.º 2
0
def main(expt_dir, repeat=-1):

    options = parse_config_file(expt_dir, 'config.json')
    experiment_name = options["experiment_name"]

    if repeat > 0:
        experiment_name = repeat_experiment_name(experiment_name, repeat)

    input_space = InputSpace(options["variables"])
    chooser_module = importlib.import_module('spearmint.choosers.' +
                                             options['chooser'])
    chooser = chooser_module.init(input_space, options)
    db = MongoDB(database_address=options['database']['address'])
    jobs = load_jobs(db, experiment_name)
    hypers = db.load(experiment_name, 'hypers')
    tasks = parse_tasks_from_jobs(jobs, experiment_name, options, input_space)

    for job in jobs:

        if job['status'] == 'complete':
            print 'Job %d' % job['id']

            input_space.print_params(job['params'], left_indent=0)
            for task, val in job['values'].iteritems():
                print '%s: %s' % (task, val)

            print ''
Exemplo n.º 3
0
def get_optimized_params(tool_name):
    # Database connection
    db = MongoDB(database_address='localhost')
    if not (tool_name == 'macs2' or tool_name == 'cisgenome'
            or tool_name == 'swembl' or tool_name == 'sicer'):
        dic = {}
        print('incorrect tool name')
        return dic

    jobs = load_jobs(db, tool_name + '_test')

    df = pd.DataFrame()

    # params names of each tools
    macs2_params = ['q', 'm_s', 'm_d']
    cisgenome_params = ['b', 'e', 'w']
    swembl_params = ['x', 'm', 'f']
    sicer_params = ['fs', 'gs', 'w']

    if tool_name == 'macs2':
        params = macs2_params
    elif tool_name == 'cisgenome':
        params = cisgenome_params
    elif tool_name == 'swembl':
        params = swembl_params
    elif tool_name == 'sicer':
        params = sicer_params

    # dict
    res = {}

    for job in jobs:
        df = df.append(
            {
                params[0]: float(job['params'][params[0]]['values']),
                params[1]: float(job['params'][params[1]]['values']),
                params[2]: float(job['params'][params[2]]['values']),
                'error_rate': float(job['values']['branin'])
            },
            ignore_index=True)

    df = df.sort_values('error_rate').reset_index(drop=True)

    # print(df)
    #df = df.drop('error_rate', axis=1)

    column_list = df.columns
    for column in column_list:
        res[column] = df[0:1][column][0]
        # print(df[0:1][column][0])

    # return dictionary of params
    return res
Exemplo n.º 4
0
def main(expt_dir, n_repeat):
    n_repeat = int(n_repeat)
    options = parse_config_file(expt_dir, 'config.json')
    tasks = options['tasks'].keys()

    jobs = dict()
    start_times = dict()
    for j in xrange(n_repeat):
        experiment_name = repeat_experiment_name(options["experiment_name"], j)
        db = MongoDB(database_address=options['database']['address'])
        jobs[j] = load_jobs(db, experiment_name)
        start_times[j] = db.load(experiment_name, 'start-time')['start-time']

    time_in_evals = defaultdict(lambda: np.zeros(n_repeat))
    time_in_fast_updates = np.zeros(n_repeat)
    time_in_slow_updates = np.zeros(n_repeat)

    for j in xrange(n_repeat):

        last_job_end_time = start_times[j]

        for job in jobs[j]:
            if job['status'] == 'complete':
                time_in_evals[job['tasks'][0]][j] += (job['end time'] -
                                                      job['start time']) / 60.0

                if job['fast update']:
                    time_in_fast_updates[j] += (job['start time'] -
                                                last_job_end_time) / 60.0
                else:
                    time_in_slow_updates[j] += (job['start time'] -
                                                last_job_end_time) / 60.0
                last_job_end_time = job['end time']

    for task in tasks:
        print 'Average time on task %s over %d repeats: %f +/- %f minutes (mean +/- std)' % (
            task, n_repeat, np.mean(
                time_in_evals[task]), np.std(time_in_evals[task]))
    total_time_in_evals = sum(time_in_evals.values())
    print 'Average time in JOBS over %d repeats: %f +/- %f minutes (mean +/- std)' % (
        n_repeat, np.mean(total_time_in_evals), np.std(total_time_in_evals))
    print 'Average time in FAST over %d repeats: %f +/- %f minutes (mean +/- std)' % (
        n_repeat, np.mean(time_in_fast_updates), np.std(time_in_fast_updates))
    print 'Average time in SLOW over %d repeats: %f +/- %f minutes (mean +/- std)' % (
        n_repeat, np.mean(time_in_slow_updates), np.std(time_in_slow_updates))
    total_optimizer_time = time_in_fast_updates + time_in_slow_updates
    print 'Average time in OPTIMIZER over %d repeats: %f +/- %f minutes (mean +/- std)' % (
        n_repeat, np.mean(total_optimizer_time), np.std(total_optimizer_time))
    print 'Total average time spent: %f' % np.sum([
        np.mean(total_time_in_evals),
        np.mean(time_in_fast_updates),
        np.mean(time_in_slow_updates)
    ])
def main(expt_dir, n_repeat):
    n_repeat = int(n_repeat)
    options  = parse_config_file(expt_dir, 'config.json')
    tasks    = options['tasks'].keys()

    jobs = dict()
    for j in xrange(n_repeat):
        experiment_name = repeat_experiment_name(options["experiment_name"], j)
        db              = MongoDB(database_address=options['database']['address'])
        jobs[j]         = load_jobs(db, experiment_name)

        
    n_iter_each = map(len, jobs.values())
    print 'Found %s iterations' % n_iter_each
    n_iter = min(n_iter_each)

    cum_evals = defaultdict(lambda: defaultdict(lambda:defaultdict(int)))
    for j in xrange(n_repeat):

        for i in xrange(n_iter):
            for task in tasks:
                if task in jobs[j][i]['tasks']:
                    cum_evals[j][task][i] = cum_evals[j][task][i-1] + 1
                else:
                    cum_evals[j][task][i] = cum_evals[j][task][i-1]

    # average over the j repeats
    for i in xrange(n_iter):
        for task in tasks:
            cum_evals["avg"][task][i] = np.mean([cum_evals[j][task][i] for j in xrange(n_repeat)])
            cum_evals["err"][task][i] =  np.std([cum_evals[j][task][i] for j in xrange(n_repeat)])

    plt.figure()
    iters = range(n_iter)
    for task in tasks:
        plt.errorbar(iters, [cum_evals["avg"][task][i] for i in xrange(n_iter)], 
                       yerr=[cum_evals["err"][task][i] for i in xrange(n_iter)], linewidth=2)
    plt.legend(tasks, loc='upper left')
    plt.xlabel('Iteration number', size=25)
    plt.ylabel('Cumulative evaluations',size=25)

    # Make the directory for the plots
    plots_dir = os.path.join(expt_dir, 'plots')
    if not os.path.isdir(plots_dir):
        os.mkdir(plots_dir)
    figname = os.path.join(plots_dir, 'cumulative_evals.pdf')
    print 'Saving figure at %s' % figname
    plt.savefig(figname)
Exemplo n.º 6
0
def db_to_df(db_address = 'localhost', db_name= "spearmintDB_marfra", experiment_name = 'rnn_3'):

    db = MongoDB(database_address=db_address,database_name=db_name)

    # Load jobs
    jobs = load_jobs(db, experiment_name)

    # Remove unfinished jobs
    jobs_fin = []
    for job in jobs:
        if job['status'] == 'complete':
            jobs_fin.append(job)


    df=pd.DataFrame()

    for job in jobs_fin:

        # Data from DB to a OrderedDict
        tmp=(("job_id",[job["id"]]),)
        tmp=tmp + (("exp_values",[np.exp(job["values"]['nlp'])]),)
        tmp=tmp + (("values",[job["values"]['nlp']]),)
        for par in job["params"]:
            tmp= tmp + ((par,[job["params"][par]["values"][0]]),)
        tmp=tmp + (("duration_h",[(job["end time"]-job["start time"])/3600.0]),)
        try: # for backward compatibility
            tmp=tmp + (("manual",[job["manual"]]),)
        except:
            pass
        tmp=tmp + (("start_time",[job["start time"]]),)
        tmp=tmp + (("end_time",[job["end time"]]),)

        dict_tmp = OrderedDict(tmp)

        # From OrderedDict to a pandas dataframe
        df_tmp=pd.DataFrame.from_dict(dict_tmp)

        # Append dataframe
        df=df.append(df_tmp, ignore_index=True)

    return df
Exemplo n.º 7
0
def returnBest(config_directory):
    os.chdir("/home/carrknight/code/oxfish/runs/optimization/spearmint")
    options = get_options(config_directory, config_file="config.json")
    experiment_name = str(options['experiment-name'])
    db = MongoDB()
    resources = parse_resources_from_config(options)
    resource = resources.itervalues().next()

    # load hyper parameters
    chooser_module = importlib.import_module('spearmint.choosers.' +
                                             options['chooser'])
    chooser = chooser_module.init(options)
    print "chooser", chooser

    hypers = db.load(experiment_name, "hypers")
    print "loaded hypers", hypers  # from GP.to_dict()

    jobs = load_jobs(db, experiment_name)
    remove_broken_jobs(db, jobs, experiment_name, resources)
    task_options = {task: options["tasks"][task] for task in resource.tasks}
    task_group = spearmint.main.load_task_group(db, options, resource.tasks)

    hypers = spearmint.main.load_hypers(db, experiment_name)
    print "loaded hypers", hypers  # from GP.to_dict()

    hypers = chooser.fit(task_group, hypers, task_options)
    print "\nfitted hypers:"
    print(hypers)

    lp, x = chooser.best()
    x = x.flatten()
    print "best", lp, x
    bestp = task_group.paramify(task_group.from_unit(x))
    print "expected best position", bestp

    return bestp
Exemplo n.º 8
0
def main():
    """
    Usage: python make_plots.py PATH_TO_DIRECTORY

    TODO: Some aspects of this function are specific to the simple branin example
    We should clean this up so that interpretation of plots are more clear and
    so that it works in more general cases 
    (e.g. if objective likelihood is binomial then values should not be
    unstandardized)
    """
    options, expt_dir = get_options()
    print "options:"
    print_dict(options)
    
    # reduce the grid size
    options["grid_size"] = 400

    resources = parse_resources_from_config(options)

    # Load up the chooser.
    chooser_module = importlib.import_module('spearmint.choosers.' + options['chooser'])
    chooser = chooser_module.init(options)
    print "chooser", chooser
    experiment_name     = options.get("experiment-name", 'unnamed-experiment')

    # Connect to the database
    db_address = options['database']['address']
    sys.stderr.write('Using database at %s.\n' % db_address)        
    db         = MongoDB(database_address=db_address)
    
    # testing below here
    jobs = load_jobs(db, experiment_name)
    remove_broken_jobs(db, jobs, experiment_name, resources)

    print "resources:", resources
    print_dict(resources)
    resource = resources.itervalues().next()
    
    task_options = { task: options["tasks"][task] for task in resource.tasks }
    print "task_options:"
    print_dict(task_options) # {'main': {'likelihood': u'NOISELESS', 'type': 'OBJECTIVE'}}
    
    task_group = load_task_group(db, options, resource.tasks)
    print "task_group", task_group # TaskGroup
    print "tasks:"
    print_dict(task_group.tasks) # {'main': <spearmint.tasks.task.Task object at 0x10bf63290>}
    
    
    hypers = load_hypers(db, experiment_name)
    print "loaded hypers", hypers # from GP.to_dict()
    
    hypers = chooser.fit(task_group, hypers, task_options)
    print "\nfitted hypers:"
    print_dict(hypers)

    lp, x = chooser.best()
    x = x.flatten()
    print "best", lp, x
    bestp = task_group.paramify(task_group.from_unit(x))
    print "expected best position", bestp
    
    # get the grid of points
    grid = chooser.grid
#     print "chooser objectives:", 
#     print_dict(chooser.objective)
    print "chooser models:", chooser.models
    print_dict(chooser.models)
    obj_model = chooser.models[chooser.objective['name']]
    obj_mean, obj_var = obj_model.function_over_hypers(obj_model.predict, grid)

    # un-normalize the function values and variances
    obj_task = task_group.tasks['main']
    obj_mean = [obj_task.unstandardize_mean(obj_task.unstandardize_variance(v)) for v in obj_mean]
    obj_std = [obj_task.unstandardize_variance(np.sqrt(v)) for v in obj_var]

    
#     for xy, m, v in izip(grid, obj_mean, obj_var):
#         print xy, m, v

    grid = map(task_group.from_unit, grid)
#     return
    
    xymv = [(xy[0], xy[1], m, v) for xy, m, v in izip(grid, obj_mean, obj_std)]# if .2 < xy[0] < .25] 
    
    x = map(lambda x:x[0], xymv)
    y = map(lambda x:x[1], xymv)
    m = map(lambda x:x[2], xymv)
    sig = map(lambda x:x[3], xymv)
#     print y
    
    fig = plt.figure(dpi=100)
    ax = fig.add_subplot(111, projection='3d')

    ax.plot(x, y, m, marker='.', linestyle="None")

    # plot errorbars
    for i in np.arange(0, len(x)):
        ax.plot([x[i], x[i]], [y[i], y[i]], [m[i]+sig[i], m[i]-sig[i]], marker="_", color='k')

    # get the observed points
    task = task_group.tasks['main']
    idata = task.valid_normalized_data_dict
    xy = idata["inputs"]
    xy = map(task_group.from_unit, xy)
    xy = np.array(xy)
    vals = idata["values"]
    vals = [obj_task.unstandardize_mean(obj_task.unstandardize_variance(v)) for v in vals]

    ax.plot(xy[:,0], xy[:,1], vals, marker='o', color="r", linestyle="None")
    
    plt.show()
Exemplo n.º 9
0
def main(dirs,
         n_repeat=-1,
         n_iter_spec=None,
         rec_type="model",
         average="mean",
         log_scale=False,
         violation_value=1.,
         constraint_tol=0.,
         make_dist_plot=False,
         mainfile=None,
         stretch_x=False,
         task_comp_x=None,
         plot_wall_time=False,
         bin_size=1.0,
         plot_separate=False,
         labels=None,
         y_axis_label=None,
         x_axis_label=None):

    # Create the figure that plots utility gap
    fig = dict()
    ax = dict()
    # averaging function
    if average == "mean":
        avg = np.mean
    elif average == "median":
        avg = np.median
    else:
        raise Exception("Unknown average %s" % average)

    fig['err'] = plt.figure()
    ax['err'] = fig['err'].add_subplot(1, 1, 1)
    if plot_wall_time:
        ax['err'].set_xlabel("wall time (min)", size=25)
    elif x_axis_label:
        ax['err'].set_xlabel(x_axis_label, size=25)
    else:
        ax['err'].set_xlabel('Number of function evaluations', size=25)
    ax['err'].tick_params(axis='both', which='major', labelsize=20)

    # Create the figure that plots L2 distance from solution
    fig['dist'] = plt.figure()
    ax['dist'] = fig['dist'].add_subplot(1, 1, 1)
    if x_axis_label:
        ax['dist'].set_xlabel(x_axis_label, size=25)
    else:
        ax['dist'].set_xlabel('Number of function evaluations', size=25)
    if y_axis_label:
        ax['dist'].set_ylabel(y_axis_label, size=25)
    elif log_scale:
        ax['dist'].set_ylabel('$\log_{10}\, \ell_2$-distance', size=25)
    else:
        ax['dist'].set_ylabel('$\ell_2$-distance', size=25)
    ax['dist'].tick_params(axis='both', which='major', labelsize=20)

    db_document_name = 'recommendations'

    acq_names = list()
    for expt_dir in dirs:
        options = parse_config_file(expt_dir, 'config.json')
        experiment_name = options["experiment_name"]
        input_space = InputSpace(options["variables"])
        chooser_module = importlib.import_module('spearmint.choosers.' +
                                                 options['chooser'])
        chooser = chooser_module.init(input_space, options)
        db = MongoDB(database_address=options['database']['address'])
        jobs = load_jobs(db, experiment_name)
        hypers = db.load(experiment_name, 'hypers')
        tasks = parse_tasks_from_jobs(jobs, experiment_name, options,
                                      input_space)

        if rec_type == "model":
            if mainfile is None:
                main_file = options['main_file']
            else:
                main_file = mainfile

            sys.path.append(options['main_file_path']
                            )  # TODO: make this nicer with proper importin
            if (main_file[-3:] == u'.py') is True:
                module = importlib.import_module(main_file[:len(main_file) -
                                                           3])
            else:
                module = importlib.import_module(main_file)
            sys.path.remove(options['main_file_path'])

        obj, con = get_objectives_and_constraints(options)  # get the names
        obj = obj[0]  # only one objective
        print 'Found %d constraints' % len(con)
        plot_utility_gap = rec_type == "model" and hasattr(module, 'true_val')

        if plot_utility_gap:
            print 'PLOTTING UTILITY GAP'
            if y_axis_label:
                ax['err'].set_ylabel(y_axis_label, size=25)
            elif log_scale:
                ax['err'].set_ylabel('$\log_{10}$ utility gap', size=25)
            else:
                ax['err'].set_ylabel('utility gap', size=25)
        else:
            if y_axis_label:
                ax['err'].set_ylabel(y_axis_label, size=25)
            elif log_scale:
                ax['err'].set_ylabel('$\log_{10}$ objective value', size=25)
            else:
                ax['err'].set_ylabel('objective value', size=25)

        # Make the directory for the plots
        plots_dir = os.path.join(expt_dir, 'plots')
        if not os.path.isdir(plots_dir):
            os.mkdir(plots_dir)

        # if the module provides the location of the true solution, plot the distance to this solution vs iterations
        if make_dist_plot and not hasattr(module, 'true_sol'):
            raise Exception(
                "make_dist_plot turned on but cannot find true sol in the main_file"
            )

        # If repeat >= 0, then we are averaging a number of experiments
        # We assume the experiments are stored with the original name plus a hyphen plus the number
        n_repeat = int(n_repeat)
        if n_repeat < 0:
            recs = db.load(experiment_name, db_document_name)
            if recs is None:
                raise Exception(
                    "Could not find experiment %s in database at %s" %
                    (experiment_name, options['database']['address']))

            # the x axis represents the number of evals of a particular task given by task_comp_x
            # so we only take the data where this number was incrememted, i.e. when this task was evaluated
            if task_comp_x:
                # only include recommendations when you finish a particular task
                new_recs = list()
                last_complete = 0
                for rec in recs:
                    cur_complete = rec['num_complete_tasks'][task_comp_x]
                    if cur_complete > last_complete:
                        last_complete = cur_complete
                        new_recs.append(rec)
                recs = new_recs

            n_iter = len(recs) if n_iter_spec is None else n_iter_spec

            iters = range(n_iter)

            if plot_wall_time:
                if task_comp_x:
                    raise Exception("Do not use plot wall_time with task_x")
                iters = [rec['total_elapsed_time'] / 60.0 for rec in recs]
                iters = iters[:n_iter]
                iters = np.array(iters, dtype=float)

            print 'Found %d iterations' % len(recs)

            if rec_type == "model":
                values = [
                    true_func(rec, module, violation_value, constraint_tol,
                              obj, con) for rec in recs
                ]

                if log_scale:
                    ax['err'].plot(iters, map(np.log10, values))
                else:
                    ax['err'].plot(iters, values)
            else:
                if rec_type == "observations":
                    observations = [x['obj_o'] for x in recs]
                elif rec_type == "mixed":
                    observations = [x['obj_om'] for x in recs]
                else:
                    raise Exception("unknown rec type")

                for i in xrange(len(observations)):
                    if observations[i] is None or np.isnan(observations[i]):
                        observations[i] = violation_value
                # print observations
                # print len(observations)

                if log_scale:
                    ax['err'].plot(iters, np.log10(observations))
                else:
                    ax['err'].plot(iters, observations)

            if make_dist_plot:
                distances = [
                    params_norm(rec['params'], module.true_sol())
                    for rec in recs
                ]
                if log_scale:
                    ax['dist'].plot(iters, np.log10(distances))
                else:
                    ax['dist'].plot(iters, distances)
        else:
            # MULTIPLE REPEATS
            repeat_recs = [
                db.load(repeat_experiment_name(experiment_name, j),
                        db_document_name) for j in xrange(n_repeat)
            ]
            if None in repeat_recs:
                for i, repeat_rec in enumerate(repeat_recs):
                    if repeat_rec is None:
                        print 'Could not load experiment %s repeat %d' % (
                            experiment_name, i)
                print 'Exiting...'
                return

            if task_comp_x:
                # only include recommendations when you finish a particular task
                new_repeat_recs = list()
                for recs in repeat_recs:
                    recs = sorted(recs, key=lambda k: k['id'])  # sort by id
                    new_recs = list()
                    last_complete = 0
                    for rec in recs:
                        cur_complete = rec['num_complete_tasks'][task_comp_x]
                        if cur_complete == last_complete + 1:
                            last_complete = cur_complete
                            new_recs.append(rec)
                        elif cur_complete == last_complete:
                            pass
                        else:
                            print(
                                'WARNING: cur complete=%d, last_complete=%d' %
                                (cur_complete, last_complete))
                            break
                    new_repeat_recs.append(new_recs)

                repeat_recs = new_repeat_recs

            n_iter_each = map(len, repeat_recs)
            if plot_wall_time:
                """ do everything separately from here if plotting wall time
                here is what we do... we can't have a square array because 
                we don't want to take the minimum number of iterations... 
                we want to take ALL iterations for each repeat, and this number
                may be different for different repeats.
                so we store all times/values in a list of arrays
                then we chop things up into bins
                """
                if rec_type != "model":
                    values = list()
                    wall_times = list()
                    for j in xrange(
                            n_repeat):  # loop over repeated experiments

                        wall_times.append(
                            np.array([
                                repeat_recs[j][i]['total_elapsed_time'] / 60.0
                                for i in xrange(n_iter_each[j])
                            ]))

                        if rec_type == "observations":
                            values.append([
                                repeat_recs[j][i]['obj_o']
                                for i in xrange(n_iter_each[j])
                            ])
                        elif rec_type == "mixed":
                            values.append([
                                repeat_recs[j][i]['obj_om']
                                for i in xrange(n_iter_each[j])
                            ])
                        else:
                            raise Exception("unknown rec type")

                        for i in xrange(n_iter_each[j]):
                            if values[-1][i] is None or np.isnan(
                                    values[-1][i]):
                                values[-1][i] = violation_value

                        values[-1] = np.array(values[-1])

                    # print values

                else:  # if plot wall tiem but using model
                    values = list()
                    wall_times = list()
                    for j in xrange(
                            n_repeat):  # loop over repeated experiments

                        # for this repeat, get all wall times
                        wall_times.append(
                            np.array([
                                repeat_recs[j][i]['total_elapsed_time'] / 60.0
                                for i in xrange(n_iter_each[j])
                            ]))

                        values_j = np.zeros(n_iter_each[j])
                        for i in xrange(
                                n_iter_each[j]):  # loop over iterations
                            val = true_func(repeat_recs[j][i], module, None,
                                            constraint_tol, obj, con)
                            if val is None or np.isnan(
                                    val
                            ):  #set to violation value here so we can print out this info...
                                values_j[i] = violation_value
                                print 'Violation with params %s at repeat %d iter %d' % (
                                    paramify_no_types(
                                        repeat_recs[j][i]['params']), j, i)
                            else:
                                values_j[i] = val
                        values.append(values_j)

                # change the data structure to be time bins and include everything in
                # those time bins across repeats
                end_times = map(max, wall_times)
                for j in xrange(n_repeat):
                    print 'end time for repeat %d: %f' % (j, end_times[j])
                iters = np.arange(0.0, np.round(max(end_times)), bin_size)
                new_values = list()
                for i, timestep in enumerate(iters):
                    # print 'Creating wall time bin from %f to %f. (%d/%d)' % (i, i+bin_size, i, len(iters))
                    new_value = list()
                    for j in xrange(n_repeat):
                        new_value = np.append(
                            new_value, values[j][np.logical_and(
                                wall_times[j] >= timestep, wall_times[j] <
                                timestep + bin_size)].flatten())
                    # if a time bin is empty across all repeats:
                    if len(new_value) == 0:
                        if i == 0:
                            new_value = [violation_value]
                        else:
                            new_value = new_values[-1]
                    new_values.append(new_value)
                values = new_values

                # make the first value equal to the violation value (optional)
                iters = np.append(iters, max(iters) + bin_size)
                values.insert(0, np.array([violation_value]))

                # Average over the repeated experiments
                average_values = map(avg, values)
                errorbars = bootstrap_errorbars(values, log=log_scale, avg=avg)
                # plt.yscale('log', nonposy='clip')

                if log_scale:
                    ax['err'].errorbar(iters,
                                       np.log10(average_values),
                                       yerr=errorbars)
                else:
                    ax['err'].errorbar(iters, average_values, yerr=errorbars)

            else:
                # NOT WALL TIME

                n_iter = reduce(min, n_iter_each, np.inf)
                if n_iter_spec is None:
                    print 'Found %d repeats with at least %d iterations' % (
                        n_repeat, n_iter)
                    print {i: n_iter_each[i] for i in xrange(n_repeat)}
                elif n_iter < n_iter_spec:
                    print 'You specified %d iterations but there are only %d available... so plotting %d' % (
                        n_iter_spec, n_iter, n_iter)
                else:
                    n_iter = n_iter_spec
                    print 'Plotting %d iterations' % n_iter

                iters = range(n_iter)

                if rec_type != "model":
                    values = np.zeros((n_iter, n_repeat))
                    for j in xrange(
                            n_repeat):  # loop over repeated experiments
                        for i in iters[j]:  # loop over iterations
                            if rec_type == "observations":
                                values[i, j] = repeat_recs[j][i]['obj_o']
                            elif rec_type == "mixed":
                                values[i, j] = repeat_recs[j][i]['obj_om']
                            else:
                                raise Exception("unknown rec type")
                            if values[i, j] is None or np.isnan(values[i, j]):
                                values[i, j] = violation_value

                    print values

                else:
                    values = np.zeros((n_iter, n_repeat))
                    distances = np.zeros((n_iter, n_repeat))
                    for j in xrange(
                            n_repeat):  # loop over repeated experiments
                        for i in iters:  # loop over iterations
                            val = true_func(repeat_recs[j][i], module, None,
                                            constraint_tol, obj, con)
                            if val is None:  #set to violation value here so we can print out this info...
                                values[i, j] = violation_value
                                print 'Violation with params %s at repeat %d iter %d' % (
                                    paramify_no_types(
                                        repeat_recs[j][i]['params']), j, i)
                            else:
                                values[i, j] = val

                            if make_dist_plot:
                                distances[i, j] = params_norm(
                                    repeat_recs[j][i]['params'],
                                    module.true_sol())

                if plot_separate:
                    if log_scale:
                        ax['err'].plot(iters, np.log10(values))
                    else:
                        ax['err'].plot(iters, values)

                else:
                    # Average over the repeated experiments
                    average_values = map(avg, values)
                    errorbars = bootstrap_errorbars(values,
                                                    log=log_scale,
                                                    avg=avg)
                    # plt.yscale('log', nonposy='clip')

                    if stretch_x:
                        fctr = float(n_iter_spec) / float(n_iter)
                        iters = np.array(iters) * fctr
                        print 'Stretching x axis by a factor of %f' % fctr

                    if log_scale:
                        ax['err'].errorbar(iters,
                                           np.log10(average_values),
                                           yerr=errorbars)
                    else:
                        ax['err'].errorbar(iters,
                                           average_values,
                                           yerr=errorbars)

                    if make_dist_plot:
                        average_dist = map(avg, distances)
                        errorbars_dist = bootstrap_errorbars(distances,
                                                             log=log_scale,
                                                             avg=avg)
                        if log_scale:
                            ax['dist'].errorbar(iters,
                                                np.log10(average_dist),
                                                yerr=errorbars_dist)
                        else:
                            ax['dist'].errorbar(iters,
                                                average_dist,
                                                yerr=errorbars_dist)

        acq_names.append(options["tasks"].values()[0]["acquisition"])
        if acq_names[-1] == 'PES':
            acq_names[-1] = 'PESC'
        if acq_names[-1] == 'ExpectedImprovement':
            acq_names[-1] = 'EIC'

    if labels:
        ax['err'].legend(labels.split(';'), fontsize=16, loc='lower left')
        ax['dist'].legend(labels.split(';'), fontsize=20)
    elif len(acq_names) > 1:
        ax['err'].legend(acq_names, fontsize=20)
        ax['dist'].legend(acq_names, fontsize=20)

    # save it in the last directory... (if there are multiple directories)
    if not plot_wall_time:
        if n_repeat >= 0:
            print 'Made a plot with %d repeats and %d iterations' % (n_repeat,
                                                                     n_iter)
        else:
            print 'Made a plot with %d iterations' % (n_iter)
    else:
        if n_repeat >= 0:
            print 'Made a plot with %d repeats and %f minutes' % (n_repeat,
                                                                  max(iters))
        else:
            print 'Made a plot with %f minutes' % (max(iters))

    file_prefix = '%s_' % average if n_repeat > 0 else ''
    file_postfix = '_wall_time' if plot_wall_time else ''
    fig['err'].tight_layout()
    figname = os.path.join(plots_dir,
                           '%serror%s' % (file_prefix, file_postfix))
    fig['err'].savefig(figname + '.pdf')
    fig['err'].savefig(figname + '.svg')
    print 'Saved to %s' % figname
    if make_dist_plot:
        fig['dist'].tight_layout()
        figname_dist = os.path.join(
            plots_dir, '%sl2_distance%s.pdf' % (file_prefix, file_postfix))
        fig['dist'].savefig(figname_dist)
        print 'Saved to %s' % figname_dist
Exemplo n.º 10
0
def main(expt_dir):

    os.chdir(expt_dir)
    sys.path.append(expt_dir)

    options = parse_config_file(expt_dir, 'config.json')
    experiment_name = options["experiment-name"]

    #	main_file = options['main_file']
    main_file = 'wrapper'
    if main_file[-3:] == '.py':
        main_file = main_file[:-3]
    module = __import__(main_file)

    input_space = InputSpace(options["variables"])
    chooser_module = importlib.import_module('spearmint.choosers.' +
                                             options['chooser'])
    chooser = chooser_module.init(input_space, options)
    db = MongoDB(database_address=options['database']['address'])
    jobs = load_jobs(db, experiment_name)
    hypers = db.load(experiment_name, 'hypers')
    objective = parse_tasks_from_jobs(jobs, experiment_name, options,
                                      input_space).values()[0]

    def create_fun(task):
        def fun(params, gradient=False):

            if len(params.shape) > 1 and params.shape[1] > 1:

                values = np.zeros(params.shape[0])
                params_orig = params

                for i in range(params_orig.shape[0]):
                    param = params[i, :]
                    param = param.flatten()
                    param = input_space.from_unit(np.array([param])).flatten()

                    values[i] = module.main(
                        0, paramify_no_types(input_space.paramify(param)))

            else:
                return module.main(
                    0, paramify_no_types(input_space.paramify(params)))

            return values

        return fun

    fun = create_fun(objective)

    # We iterate through each recommendation made

    i = 0
    more_recommendations = True
    while more_recommendations:

        recommendation = db.load(experiment_name, 'recommendations',
                                 {'id': i + 1})

        if recommendation == None:
            more_recommendations = False
        else:

            solution_om = input_space.vectorify(recommendation['params_om'])

            M = 1
            vsom_acum = 0.0
            for j in range(M):
                vsom_acum += fun(solution_om, gradient=False)['score']
            values_solution_om = -vsom_acum / float(M)

            with open('value_solution_om.txt', 'a') as f:
                print >> f, "%lf" % (values_solution_om)

            with open('params_om.txt', 'a') as f_handle:
                np.savetxt(f_handle,
                           np.array([solution_om]),
                           delimiter=' ',
                           newline='\n')

        i += 1
Exemplo n.º 11
0
def main(filter=None):
    """
    Usage: python make_plots.py PATH_TO_DIRECTORY
    """
    parser = argparse.ArgumentParser()
    parser.add_argument('--clean', action='store_true', help='remove broken jobs')
    parser.add_argument('--table', action='store_true', help='print table')
    parser.add_argument('--csv', action='store_true', help='save table as csv')
    parser.add_argument('--d', type=int, help='sort by distance from dth smallest result')
    parser.add_argument('--name', help='experiment name', default=None)
    args, unknown = parser.parse_known_args()

    options, expt_dir = get_options(unknown)
    # print "options:"
    # print_dict(options)

    # reduce the grid size
    options["grid_size"] = 400

    resources = parse_resources_from_config(options)

    # Load up the chooser.
    chooser_module = importlib.import_module('spearmint.choosers.' + options['chooser'])
    chooser = chooser_module.init(options)
    # print "chooser", chooser
    if args.name:
        experiment_name = args.name
    else:
        experiment_name     = options.get("experiment-name", 'unnamed-experiment')

    # Connect to the database
    db_address = options['database']['address']
    # sys.stderr.write('Using database at %s.\n' % db_address)
    db         = MongoDB(database_address=db_address)

    # testing below here
    jobs = load_jobs(db, experiment_name)
    print len(jobs), 'jobs found'
    # print jobs

    # remove_broken_jobs
    if args.clean:
        for job in jobs:
            if job['status'] == 'pending':
                sys.stderr.write('Broken job %s detected.\n' % job['id'])
                job['status'] = 'broken'
                db.save(job, experiment_name, 'jobs', {'id' : job['id']})

    # print "resources:", resources
    # print_dict(resources)
    resource = resources.itervalues().next()

    task_options = {task: options["tasks"][task] for task in resource.tasks}
    # print "task_options:"
    # print_dict(task_options) # {'main': {'likelihood': u'NOISELESS', 'type': 'OBJECTIVE'}}

    task_group = load_task_group(db, options, experiment_name, resource.tasks)
    hypers = load_hypers(db, experiment_name)
    chooser.fit(task_group, hypers, task_options)
    lp, x = chooser.best()

    if args.table:
        os.chdir(unknown[0])
        out_file = open('results.csv', 'w') if args.csv else sys.stdout

        # get the observed points
        task = task_group.tasks.itervalues().next()
        idata = task.valid_normalized_data_dict
        inputs = idata["inputs"]
        inputs = map(lambda i: [paramify(task_group, task_group.from_unit(i)).values(), i], inputs)
        vals = idata["values"]
        vals = [task.unstandardize_mean(task.unstandardize_variance(v)) for v in vals]

        out_file.write('\n%10s' % 'result')
        lengths = [10]
        for name, vdict in task.variables_meta.iteritems():
            name = '%10s' % name
            out_file.write(',' + name)
            lengths.append(len(name))
        out_file.write('\n')

        line_template = '%' + str(lengths[0]) + '.4f,' + ','.join(['%' + str(l) +
            ('.4f' if 'enum' not in inputs[0][0][i]['type'] else 's') for i, l in enumerate(lengths[1:])])

        points = sorted(zip(vals, inputs), key=lambda r: r[0])
        if args.d is not None:
            target = x
            if args.d >= 0:
                target = points[args.d][1][1]
            points = sorted(points, key=lambda r: np.linalg.norm(r[1][1] - target))
        for i, point in enumerate(points):
            subs = [point[0]] + [d['values'][0] for d in point[1][0]]
            out_file.write(line_template % tuple(subs) + '\n')
        out_file.close()
def main(expt_dir):

	os.chdir(expt_dir)
	sys.path.append(expt_dir)

	options         = parse_config_file(expt_dir, 'config.json')
	experiment_name = options["experiment-name"]
	options['main_file'] = 'prog_no_noisy'

	main_file = options['main_file']
	if main_file[-3:] == '.py':
		main_file = main_file[:-3]
	module  = __import__(main_file)

	input_space     = InputSpace(options["variables"])
	chooser_module  = importlib.import_module('spearmint.choosers.' + options['chooser'])
	chooser         = chooser_module.init(input_space, options)
	db              = MongoDB(database_address=options['database']['address'])
	jobs            = load_jobs(db, experiment_name)
	hypers          = db.load(experiment_name, 'hypers')
	tasks           = parse_tasks_from_jobs(jobs, experiment_name, options, input_space)

	if len(tasks) < 2:
		print 'Not a multi-objective problem!'
		return -1

	if options['language'] != "PYTHON":
		print 'Only python programs supported!'
		return -1

	for task in tasks:
		if tasks[ task ].type != 'objective':
			print 'Not a multi-objective problem!'
			return -1

	def create_fun(task):
		def fun(params, gradient = False):

			if len(params.shape) > 1 and params.shape[ 1 ] > 1:
				params = params.flatten()

			params = input_space.from_unit(np.array([ params ])).flatten()

			return module.main(0, paramify_no_types(input_space.paramify(params)))[ task ]

		return fun

	funs = [ create_fun(task) for task in tasks ]

	moop = MOOP_basis_functions(funs, input_space.num_dims)

#	moop.evolve(1, 8)

	grid = sobol_grid.generate(input_space.num_dims, grid_size = 1000 * input_space.num_dims)

	moop.solve_using_grid(grid)

#	reference = find_reference_point_using_direct(tasks, module, input_space)

#	reference = reference + np.abs(reference) * 0.1

	reference = np.ones(len(tasks)) * 7

	hyper_volume_solution = moop.get_hypervolume(reference.tolist())

	result = moop.compute_pareto_front_and_set()
	front = result['frontier']
	pareto_set = result['pareto_set']

#	os.remove('hypervolume_solution.txt')

	with open('hypervolume_solution.txt', 'a') as f:
		print >> f, "%lf" % (hyper_volume_solution)

#	os.remove('hypervolumes.txt')

	# We iterate through each recommendation made

	i = 0
	more_recommendations = True
	while more_recommendations:

                recommendation = db.load(experiment_name, 'recommendations', {'id' : i + 1})

		if recommendation == None:
			more_recommendations = False
		else:

			solution = input_space.to_unit(input_space.vectorify(recommendation[ 'params' ]))

			if len(solution.shape) == 1:
				solution = solution.reshape((1, len(solution)))
			
			# We compute the objective values associated to this recommendation
	
			values_solution = np.zeros((solution.shape[ 0 ], len(tasks)))
		
			for j in range(values_solution.shape[ 0 ]):
				for k in range(values_solution.shape[ 1 ]):
					values_solution[ j, k ] = funs[ k ](solution[ j : (j + 1), : ])

			moop = MOOP_basis_functions(funs, input_space.num_dims)

			moop.set_population(solution)

			hyper_volume = moop.get_hypervolume(reference.tolist())

			with open('hypervolumes.txt', 'a') as f:
				print >> f, "%lf" % (hyper_volume)

			with open('mean_min_distance_to_frontier.txt', 'a') as f: 
				print >> f, "%lf" % (average_min_distance(values_solution, front))

			with open('mean_min_distance_from_frontier.txt', 'a') as f: 
				print >> f, "%lf" % (average_min_distance(front, values_solution))

			with open('mean_min_distance_to_pareto_set.txt', 'a') as f: 
				print >> f, "%lf" % (average_min_distance(input_space.from_unit(solution), \
				input_space.from_unit(pareto_set)))

			with open('mean_min_distance_from_pareto_set.txt', 'a') as f: 
				print >> f, "%lf" % (average_min_distance(input_space.from_unit(pareto_set), \
				input_space.from_unit(solution)))

			with open('evaluations.txt','a') as f_handle: 
				np.savetxt(f_handle, np.array([recommendation['num_complete_tasks'].values()]), delimiter = ' ', newline = '\n')

		i += 1
Exemplo n.º 13
0
def main():
    """
    Usage: python make_plots.py PATH_TO_DIRECTORY

    TODO: Some aspects of this function are specific to the simple branin example
    We should clean this up so that interpretation of plots are more clear and
    so that it works in more general cases 
    (e.g. if objective likelihood is binomial then values should not be
    unstandardized)
    """
    options, expt_dir = get_options()
    print("options:")
    print_dict(options)

    # reduce the grid size
    options["grid_size"] = 400

    resources = parse_resources_from_config(options)

    # Load up the chooser.
    chooser_module = importlib.import_module('spearmint.choosers.' +
                                             options['chooser'])
    chooser = chooser_module.init(options)
    print("chooser", chooser)
    experiment_name = options.get("experiment-name", 'unnamed-experiment')

    # Connect to the database
    db_address = options['database']['address']
    sys.stderr.write('Using database at %s.\n' % db_address)
    db = MongoDB(database_address=db_address)

    # testing below here
    jobs = load_jobs(db, experiment_name)
    remove_broken_jobs(db, jobs, experiment_name, resources)

    print("resources:", resources)
    print_dict(resources)
    resource = resources.itervalues().next()

    task_options = {task: options["tasks"][task] for task in resource.tasks}
    print("task_options:")
    print_dict(task_options
               )  # {'main': {'likelihood': u'NOISELESS', 'type': 'OBJECTIVE'}}

    task_group = load_task_group(db, options, resource.tasks)
    print("task_group", task_group)  # TaskGroup
    print("tasks:")
    print_dict(task_group.tasks
               )  # {'main': <spearmint.tasks.task.Task object at 0x10bf63290>}

    hypers = load_hypers(db, experiment_name)
    print("loaded hypers", hypers)  # from GP.to_dict()

    hypers = chooser.fit(task_group, hypers, task_options)
    print("\nfitted hypers:")
    print_dict(hypers)

    lp, x = chooser.best()
    x = x.flatten()
    print("best", lp, x)
    bestp = task_group.paramify(task_group.from_unit(x))
    print("expected best position", bestp)

    # get the grid of points
    grid = chooser.grid
    #     print("chooser objectives:", )
    #     print_dict(chooser.objective)
    print("chooser models:", chooser.models)
    print_dict(chooser.models)
    obj_model = chooser.models[chooser.objective['name']]
    obj_mean, obj_var = obj_model.function_over_hypers(obj_model.predict, grid)

    # un-normalize the function values and variances
    obj_task = task_group.tasks['main']
    obj_mean = [
        obj_task.unstandardize_mean(obj_task.unstandardize_variance(v))
        for v in obj_mean
    ]
    obj_std = [obj_task.unstandardize_variance(np.sqrt(v)) for v in obj_var]

    #     for xy, m, v in izip(grid, obj_mean, obj_var):
    #         print(xy, m, v)

    grid = map(task_group.from_unit, grid)
    #     return

    xymv = [(xy[0], xy[1], m, v) for xy, m, v in izip(grid, obj_mean, obj_std)
            ]  # if .2 < xy[0] < .25]

    x = map(lambda x: x[0], xymv)
    y = map(lambda x: x[1], xymv)
    m = map(lambda x: x[2], xymv)
    sig = map(lambda x: x[3], xymv)
    #     print(y)

    fig = plt.figure(dpi=100)
    ax = fig.add_subplot(111, projection='3d')

    ax.plot(x, y, m, marker='.', linestyle="None")

    # plot errorbars
    for i in np.arange(0, len(x)):
        ax.plot([x[i], x[i]], [y[i], y[i]], [m[i] + sig[i], m[i] - sig[i]],
                marker="_",
                color='k')

    # get the observed points
    task = task_group.tasks['main']
    idata = task.valid_normalized_data_dict
    xy = idata["inputs"]
    xy = map(task_group.from_unit, xy)
    xy = np.array(xy)
    vals = idata["values"]
    vals = [
        obj_task.unstandardize_mean(obj_task.unstandardize_variance(v))
        for v in vals
    ]

    ax.plot(xy[:, 0], xy[:, 1], vals, marker='o', color="r", linestyle="None")

    plt.show()
Exemplo n.º 14
0
def main(expt_dir, repeat=None):

    options = parse_config_file(expt_dir, 'config.json')
    experiment_name = options["experiment-name"]

    if repeat is not None:
        experiment_name = repeat_experiment_name(experiment_name,repeat)

    input_space = InputSpace(options["variables"])

    chooser_module = importlib.import_module('spearmint.choosers.' + options['chooser'])
    chooser = chooser_module.init(input_space, options)

    db = MongoDB(database_address=options['database']['address'])

    jobs = load_jobs(db, experiment_name)
    hypers = db.load(experiment_name, 'hypers')

    if input_space.num_dims != 2:
        raise Exception("This plotting script is only for 2D optimizations. This problem has %d dimensions." % input_space.num_dims)

    tasks = parse_tasks_from_jobs(jobs, experiment_name, options, input_space)

    hypers = chooser.fit(tasks, hypers)

    print '\nHypers:'
    print_hypers(hypers)

    recommendation = chooser.best()
    current_best_value = recommendation['model_model_value']
    current_best_location = recommendation['model_model_input']

    plots_dir = os.path.join(expt_dir, 'plots')
    if not os.path.isdir(plots_dir):
        os.mkdir(plots_dir)
    if len(chooser.models) > 1:
        for task_name in chooser.models:
            plots_subdir = os.path.join(plots_dir, task_name)
            if not os.path.isdir(plots_subdir):
                os.mkdir(plots_subdir)

    print 'Plotting...'

    # Plot objective model
    # plot_2d_mean_and_var(chooser.objective_model, plots_dir, 
    #     chooser.objective.name, 
    #     input_space, current_best_location)
    
    # plot_hypers(chooser.objective_model, plots_dir, 'objective_function')
    for task_name, model in chooser.models.iteritems():

        plots_subdir = os.path.join(plots_dir, task_name) if len(chooser.models) > 1 else plots_dir

        plot_hypers(model, plots_subdir, task_name)

        plot_2d_mean_and_var(model, plots_subdir, task_name, input_space, current_best_location)


    if chooser.numConstraints() > 0:
        plot_2d_constraints(chooser, plots_dir, input_space, current_best_location)

    plot_acquisition_function(chooser, plots_dir, input_space, current_best_location, current_best_value)

    print 'Done plotting.'
Exemplo n.º 15
0
	def runSpearmint(self, name):
		options, expt_dir = self.get_options([os.path.abspath(os.path.join(self.scratchPath,name))])

		resources = main.parse_resources_from_config(options)

		# Load up the chooser.
		chooser_module = importlib.import_module('spearmint.choosers.' + options['chooser'])
		chooser = chooser_module.init(options)
		experiment_name = options.get("experiment-name", 'unnamed-experiment')

		# Connect to the database
		db_address = options['database']['address']
		sys.stderr.write('Using database at %s.\n' % db_address)
		db = MongoDB(database_address=db_address)

		threshold = 1e-2
		look_back = 3
		stopping = False
		while not stopping:
			for resource_name, resource in resources.iteritems():
				jobs = main.load_jobs(db, experiment_name)
				# resource.printStatus(jobs)
				# If the resource is currently accepting more jobs
				# TODO: here cost will eventually also be considered: even if the
				#	   resource is not full, we might wait because of cost incurred
				# Note: I chose to fill up one resource and them move on to the next
				# You could also do it the other way, by changing "while" to "if" here

				while resource.acceptingJobs(jobs):
					# Load jobs from DB
					# (move out of one or both loops?) would need to pass into load_tasks
					jobs = main.load_jobs(db, experiment_name)
					#pprint.pprint(main.load_hypers(db, experiment_name))

					# Remove any broken jobs from pending.
					main.remove_broken_jobs(db, jobs, experiment_name, resources)

					# Get a suggestion for the next job
					suggested_job = main.get_suggestion(chooser, resource.tasks, db, expt_dir, options, resource_name)

					# Submit the job to the appropriate resource
					process_id = resource.attemptDispatch(experiment_name, suggested_job, db_address, expt_dir)

					# Set the status of the job appropriately (successfully submitted or not)
					if process_id is None:
						suggested_job['status'] = 'broken'
						main.save_job(suggested_job, db, experiment_name)
					else:
						suggested_job['status'] = 'pending'
						suggested_job['proc_id'] = process_id
						main.save_job(suggested_job, db, experiment_name)

					jobs = main.load_jobs(db, experiment_name)

					# Print out the status of the resources
					# resource.printStatus(jobs)
					print_resources_status(resources.values(), jobs)

					stalled = []
					for task in main.load_task_group(db, options, resource.tasks).tasks.values():
						performance = task.valid_normalized_data_dict["values"][::-1]
						stalled.append(0)
						if len(performance) > look_back:
							print performance[0:look_back]
							print "Diffs: ",
							within_thresh = True
							for i,run in enumerate(performance[0:look_back]):
								diff = abs(run - performance[i+1])
								print str(round(diff,2))+", ",
								if diff > threshold:
									within_thresh = False
									print "...No stall"
									break
							if within_thresh:
								stalled[len(stalled)-1] = 1
					if all(stalled):
						sys.exit("Stalled!")
			# If no resources are accepting jobs, sleep
			# (they might be accepting if suggest takes a while and so some jobs already finished by the time this point is reached)
			if main.tired(db, experiment_name, resources):
				time.sleep(options.get('polling-time', 5))
Exemplo n.º 16
0
def main(expt_dir):

    os.chdir(expt_dir)
    sys.path.append(expt_dir)

    options = parse_config_file(expt_dir, 'config.json')
    experiment_name = options["experiment-name"]

    #	main_file = options['main_file']
    main_file = 'OSY_no_noisy'
    if main_file[-3:] == '.py':
        main_file = main_file[:-3]
    module = __import__(main_file)

    input_space = InputSpace(options["variables"])
    chooser_module = importlib.import_module('spearmint.choosers.' +
                                             options['chooser'])
    chooser = chooser_module.init(input_space, options)
    db = MongoDB(database_address=options['database']['address'])
    jobs = load_jobs(db, experiment_name)
    hypers = db.load(experiment_name, 'hypers')
    tasks = parse_tasks_from_jobs(jobs, experiment_name, options, input_space)

    if len(tasks) < 2:
        print 'Not a multi-objective problem!'
        return -1

    if options['language'] != "PYTHON":
        print 'Only python programs supported!'
        return -1

    objectives = dict()
    contraints = dict()

    for task in tasks:
        if tasks[task].type == 'objective':
            objectives[task] = tasks[task]
        else:
            contraints[task] = tasks[task]

    assert len(objectives) >= 2 and len(contraints) >= 1

    def create_fun(task):
        def fun(params, gradient=False):

            if len(params.shape) > 1 and params.shape[1] > 1:

                values = np.zeros(params.shape[0])
                params_orig = params

                for i in range(params_orig.shape[0]):
                    param = params[i, :]
                    param = param.flatten()
                    param = input_space.from_unit(np.array([param])).flatten()

                    values[i] = module.main(
                        0,
                        paramify_no_types(input_space.paramify(param)))[task]

            else:
                return module.main(
                    0, paramify_no_types(input_space.paramify(params)))[task]

            return values

        return fun

    funs_o = [create_fun(task) for task in objectives]
    funs_c = [create_fun(task) for task in contraints]

    moop = MOOP_basis_functions(funs_o,
                                input_space.num_dims,
                                constraints=funs_c)

    grid = sobol_grid.generate(input_space.num_dims,
                               grid_size=1000 * input_space.num_dims)

    # We only retain the feasible points

    moop.solve_using_grid(grid)

    reference = np.ones(len(objectives)) * 1e3

    hyper_volume_solution = moop.get_hypervolume(reference.tolist())

    result = moop.compute_pareto_front_and_set()

    front = result['frontier']
    pareto_set = result['pareto_set']

    with open('hypervolume_solution.txt', 'a') as f:
        print >> f, "%lf" % (hyper_volume_solution)

    # We iterate through each recommendation made

    i = 0
    more_recommendations = True
    while more_recommendations:

        recommendation = db.load(experiment_name, 'recommendations',
                                 {'id': i + 1})

        if recommendation == None:
            more_recommendations = False
        else:

            solution = input_space.to_unit(
                input_space.vectorify(recommendation['params']))

            if len(solution.shape) == 1:
                solution = solution.reshape((1, len(solution)))

            # We compute the objective values associated to this recommendation

            values_solution = np.zeros((solution.shape[0], len(objectives)))

            for j in range(values_solution.shape[0]):
                for k in range(values_solution.shape[1]):
                    values_solution[j, k] = funs_o[k](solution[j:(j + 1), :])

            moop = MOOP_basis_functions(funs_o, input_space.num_dims)

            moop.set_population(solution)

            hyper_volume = moop.get_hypervolume(reference.tolist())

            # We make sure that there are no infeasible points recommended
            # If there are infeasible recommendations we return 0 as the hypervolume

            all_feasible = True

            for k in range(len(funs_c)):
                all_feasible = all_feasible and not np.any(
                    funs_c[k](solution) < 0)

            if not all_feasible:
                hyper_volume = 0.0

            with open('hypervolumes.txt', 'a') as f:
                print >> f, "%lf" % (hyper_volume)

            with open('evaluations.txt', 'a') as f_handle:
                np.savetxt(
                    f_handle,
                    np.array([recommendation['num_complete_tasks'].values()]),
                    delimiter=' ',
                    newline='\n')

        i += 1
Exemplo n.º 17
0
def plot(
    config_directory="/home/carrknight/code/oxfish/runs/optimization/spearmint"
):
    os.chdir("/home/carrknight/code/oxfish/runs/optimization/spearmint")
    options = get_options(config_directory, config_file="config.json")
    experiment_name = str(options['experiment-name'])
    db = MongoDB()
    resources = parse_resources_from_config(options)
    resource = resources.itervalues().next()

    # load hyper parameters
    chooser_module = importlib.import_module('spearmint.choosers.' +
                                             options['chooser'])
    chooser = chooser_module.init(options)
    print "chooser", chooser

    hypers = db.load(experiment_name, "hypers")
    print "loaded hypers", hypers  # from GP.to_dict()

    jobs = load_jobs(db, experiment_name)
    remove_broken_jobs(db, jobs, experiment_name, resources)
    task_options = {task: options["tasks"][task] for task in resource.tasks}
    task_group = spearmint.main.load_task_group(db, options, resource.tasks)

    hypers = spearmint.main.load_hypers(db, experiment_name)
    print "loaded hypers", hypers  # from GP.to_dict()

    hypers = chooser.fit(task_group, hypers, task_options)
    print "\nfitted hypers:"
    print(hypers)

    lp, x = chooser.best()
    x = x.flatten()
    print "best", lp, x
    bestp = task_group.paramify(task_group.from_unit(x))
    print "expected best position", bestp

    print "chooser models:", chooser.models
    obj_model = chooser.models[chooser.objective['name']]
    grid = chooser.grid
    obj_mean, obj_var = obj_model.function_over_hypers(obj_model.predict, grid)

    import numpy as np

    bounds = dict()

    for task_name, task in task_group.tasks.iteritems():
        # make a grid, feed it to the predictor:

        dimensions = ()
        for key in options['variables'].keys():
            type = str(options['variables'][key]["type"]).strip().lower()
            keyname = key.encode('utf-8')
            bounds[keyname] = dict()
            bounds[keyname]["type"] = type
            if type == "float":
                dimension = np.linspace(0, 1, num=SPACING)
                dimensions = dimensions + (dimension, )
                bounds[keyname]["min"] = options['variables'][key]["min"]
                bounds[keyname]["max"] = options['variables'][key]["max"]

            elif type == "int":
                min = int(options['variables'][key]["min"])
                max = int(options['variables'][key]["max"])
                dimension = np.linspace(0, 1, num=max - min)
                bounds[keyname]["min"] = min
                bounds[keyname]["max"] = max
                #  dimension = np.array([x + min for x in range(max - min + 1)])
                dimensions = dimensions + (dimension, )
            else:
                bounds[keyname]["options"] = options['variables'][key][
                    "options"]
                assert type == "enum"
                dimension = tuple([
                    (0, 1)
                    for i in range(len(options['variables'][key]["options"]))
                ])
                for t in dimension:
                    dimensions = dimensions + (t, )
                    # print(dimension)

        data = cartesian(np.array(dimensions))

        mean, variance = chooser.models[task_name].predict(data)
        mean = [
            task.unstandardize_mean(task.unstandardize_variance(v))
            for v in mean
        ]
        variance = [task.unstandardize_variance(np.sqrt(v)) for v in variance]

        os.chdir(config_directory)

        # unzip the data
        new_data = zip(*data.transpose().tolist())
        datum = zip(new_data, mean, variance)

        header = ",".join(options['variables'].keys()) + ",mean,variance"
        with open(experiment_name + ".csv", 'w') as fileout:
            fileout.write(header + "\n")
            for i in range(len(datum)):
                fileout.write(
                    str(datum[i]).replace("(", "").replace(")", "").replace(
                        ",,", ",") + "\n")

        with open(experiment_name + "_bounds.yaml", 'w') as outfile:
            outfile.write(yaml.dump(bounds, default_flow_style=False))

        # grid = cartesian(dimensions)
        # mean, variance = obj_model.function_over_hypers(obj_model.predict, grid)
        #
        # mean = [obj_task.unstandardize_mean(obj_task.unstandardize_variance(v)) for v in mean]
        # variance = [obj_task.unstandardize_variance(np.sqrt(v)) for v in variance]
        #
        # # xymv = [([x for x in xy], m, v) for xy, m, v in izip(new_grid, obj_mean, obj_std)]  # if .2 < xy[0] < .25]
        # with open(experiment_name + ".csv", 'w') as fileout:
        #     for i in range(len(mean)):
        #         fileout.write(str(([x for x in grid[i]], mean[i], variance[i])).replace("(", "").replace(")", "").
        #                       replace("[", "").replace("]", "") + "\n")
        xy = np.array(task.inputs)
        # function values:
        vals = task.values
        vals = np.array(vals)
        np.savetxt(experiment_name + "_" + task_name + "_runs.csv",
                   xy,
                   delimiter=",",
                   fmt='%.3e')
        np.savetxt(experiment_name + "_" + task_name + "_runs_values.csv",
                   vals,
                   delimiter=",",
                   fmt='%.3e')