Exemplo n.º 1
0
def exhaustive_search(new_ids,
                      domain,
                      trials,
                      seed,
                      nbMaxSucessiveFailures=1000):
    r""" This is for exhaustive search in HyperTuning.

    """
    from hyperopt import pyll
    from hyperopt.base import miscs_update_idxs_vals
    # Build a hash set for previous trials
    hashset = set([
        hash(
            frozenset([(key, value[0]) if len(value) > 0 else ((key, None))
                       for key, value in trial['misc']['vals'].items()]))
        for trial in trials.trials
    ])

    rng = np.random.RandomState(seed)
    rval = []
    for _, new_id in enumerate(new_ids):
        newSample = False
        nbSucessiveFailures = 0
        while not newSample:
            # -- sample new specs, idxs, vals
            idxs, vals = pyll.rec_eval(domain.s_idxs_vals,
                                       memo={
                                           domain.s_new_ids: [new_id],
                                           domain.s_rng: rng,
                                       })
            new_result = domain.new_result()
            new_misc = dict(tid=new_id, cmd=domain.cmd, workdir=domain.workdir)
            miscs_update_idxs_vals([new_misc], idxs, vals)

            # Compare with previous hashes
            h = hash(
                frozenset([(key, value[0]) if len(value) > 0 else ((key, None))
                           for key, value in vals.items()]))
            if h not in hashset:
                newSample = True
            else:
                # Duplicated sample, ignore
                nbSucessiveFailures += 1

            if nbSucessiveFailures > nbMaxSucessiveFailures:
                # No more samples to produce
                return []

        rval.extend(
            trials.new_trial_docs([new_id], [None], [new_result], [new_misc]))
    return rval
Exemplo n.º 2
0
def suggest(new_ids, domain, trials, seed, samples_count=200, maxlog=8, rand_iters=3,
            create_regressor=lambda: GaussianProcessRegressor(alpha=1e-6)):
    rand_iters = max(1, rand_iters)
    if len(trials) < rand_iters:
        return rand.suggest(new_ids, domain, trials, seed)

    rng = np.random.RandomState(seed)
    rval = []
    for ii, new_id in enumerate(new_ids):
        # -- sample new specs, idxs, vals
        idxs, vals = get_best_eval(new_id, domain, trials, rng, samples_count, maxlog, create_regressor)
        new_result = domain.new_result()
        new_misc = dict(tid=new_id, cmd=domain.cmd, workdir=domain.workdir)
        miscs_update_idxs_vals([new_misc], idxs, vals)
        rval.extend(trials.new_trial_docs([new_id],
                    [None], [new_result], [new_misc]))
    return rval
Exemplo n.º 3
0
    def _hyperopt_transform(self, x):
        new_id = self.trials.new_trial_ids(1)[0]

        domain = self.domain
        rng = np.random.RandomState(1)
        idxs, vals = pyll.rec_eval(domain.s_idxs_vals,
                                   memo={
                                       domain.s_new_ids: [new_id],
                                       domain.s_rng: rng,
                                   })
        rval_miscs = [dict(tid=new_id, cmd=domain.cmd, workdir=domain.workdir)]
        rval_results = domain.new_result()
        for (k, _) in vals.items():
            vals[k][0] = x[k]
        miscs_update_idxs_vals(rval_miscs, idxs, vals)
        rval_docs = self.trials.new_trial_docs([new_id], [None], rval_results,
                                               rval_miscs)

        return rval_docs[0]
Exemplo n.º 4
0
def suggest(new_ids, domain, trials,
            chooser,
            grid_size, grid_seed,
            expt_dir,  # -- a state object will be maintained here
            verbose=0,
           ):

    variables = variables_from_domain(domain)
    gmap = GridMap(variables.values(), grid_size)

    values = []
    complete = []
    pending =  []
    durations = []

    for trial in trials.trials:
        # Each line in this file represents an experiment
        # It is whitespace separated and of the form either
        # <Value> <time taken> <space separated list of parameters>
        # incating a completed experiment or
        # P P <space separated list of parameters>
        # indicating a pending experiment

        state = trial['state']
        status = trial['result']['status']
        val = trial['result'].get('loss')
        dur = trial_duration(trial)
        unit_vals = unit_assignment(trial, variables)

        if state in (hyperopt.JOB_STATE_NEW, hyperopt.JOB_STATE_RUNNING):
            pending.append(unit_vals)
        elif state in (hyperopt.JOB_STATE_DONE,):
            if status in hyperopt.STATUS_OK:
                complete.append(unit_vals)
                durations.append(dur)
                values.append(val)

    # Some stats
    info("#Complete: %d #Pending: %d" % (len(complete), len(pending)))

    # Let's print out the best value so far
    if len(values):
        best_val = np.min(values)
        best_job = np.argmin(values)
        info("Current best: %f (job %d)" % (best_val, best_job))

    # Now lets get the next job to run
    # First throw out a set of candidates on the unit hypercube
    # Increment by the number of observed so we don't take the
    # same values twice
    seed_increment = len(pending) + len(complete)
    candidates = gmap.hypercube_grid(grid_size, grid_seed + seed_increment)

    # Ask the chooser to actually pick one.
    # First mash the data into a format that matches that of the other
    # spearmint drivers to pass to the chooser modules.        

    grid = np.asarray(complete + list(candidates) + pending)
    grid_idx = np.hstack((np.zeros(len(complete)),
                          np.ones(len(candidates)),
                          1.0 + np.ones(len(pending))))
    chosen = chooser.next(grid, np.asarray(values), np.asarray(durations),
                          np.nonzero(grid_idx == 1)[0],
                          np.nonzero(grid_idx == 2)[0],
                          np.nonzero(grid_idx == 0)[0])

    # If the chosen is a tuple, then the chooser picked a new job not from
    # the candidate list
    if isinstance(chosen, tuple):
        (chosen, candidate) = chosen
    else:
        candidate = grid[chosen]

    info("Selected job %d from the grid." % (chosen,))

    params = unit_to_list(candidate, variables)

    if len(new_ids) > 1:
        raise NotImplementedError('TODO: recurse for multiple jobs')

    rval = []
    for new_id in new_ids:
        idxs = dict([(v, [new_id]) for v in variables])
        vals = dict([(v, [p]) for v, p in zip(variables, params)])
        new_result = domain.new_result()
        new_misc = dict(tid=new_id, cmd=domain.cmd, workdir=domain.workdir)
        miscs_update_idxs_vals([new_misc], idxs, vals)
        rval.extend(trials.new_trial_docs([new_id],
                [None], [new_result], [new_misc]))

    return rval
Exemplo n.º 5
0
def suggest(
    new_ids,
    domain,
    trials,
    chooser,
    grid_size,
    grid_seed,
    expt_dir,  # -- a state object will be maintained here
    verbose=0,
):

    variables = variables_from_domain(domain)
    gmap = GridMap(variables.values(), grid_size)

    values = []
    complete = []
    pending = []
    durations = []

    for trial in trials.trials:
        # Each line in this file represents an experiment
        # It is whitespace separated and of the form either
        # <Value> <time taken> <space separated list of parameters>
        # incating a completed experiment or
        # P P <space separated list of parameters>
        # indicating a pending experiment

        state = trial['state']
        status = trial['result']['status']
        val = trial['result'].get('loss')
        dur = trial_duration(trial)
        unit_vals = unit_assignment(trial, variables)

        if state in (hyperopt.JOB_STATE_NEW, hyperopt.JOB_STATE_RUNNING):
            pending.append(unit_vals)
        elif state in (hyperopt.JOB_STATE_DONE, ):
            if status in hyperopt.STATUS_OK:
                complete.append(unit_vals)
                durations.append(dur)
                values.append(val)

    # Some stats
    info("#Complete: %d #Pending: %d" % (len(complete), len(pending)))

    # Let's print out the best value so far
    if len(values):
        best_val = np.min(values)
        best_job = np.argmin(values)
        info("Current best: %f (job %d)" % (best_val, best_job))

    # Now lets get the next job to run
    # First throw out a set of candidates on the unit hypercube
    # Increment by the number of observed so we don't take the
    # same values twice
    seed_increment = len(pending) + len(complete)
    candidates = gmap.hypercube_grid(grid_size, grid_seed + seed_increment)

    # Ask the chooser to actually pick one.
    # First mash the data into a format that matches that of the other
    # spearmint drivers to pass to the chooser modules.

    grid = np.asarray(complete + list(candidates) + pending)
    grid_idx = np.hstack((np.zeros(len(complete)), np.ones(len(candidates)),
                          1.0 + np.ones(len(pending))))
    chosen = chooser.next(grid, np.asarray(values), np.asarray(durations),
                          np.nonzero(grid_idx == 1)[0],
                          np.nonzero(grid_idx == 2)[0],
                          np.nonzero(grid_idx == 0)[0])

    # If the chosen is a tuple, then the chooser picked a new job not from
    # the candidate list
    if isinstance(chosen, tuple):
        (chosen, candidate) = chosen
    else:
        candidate = grid[chosen]

    info("Selected job %d from the grid." % (chosen, ))

    params = unit_to_list(candidate, variables)

    if len(new_ids) > 1:
        raise NotImplementedError('TODO: recurse for multiple jobs')

    rval = []
    for new_id in new_ids:
        idxs = dict([(v, [new_id]) for v in variables])
        vals = dict([(v, [p]) for v, p in zip(variables, params)])
        new_result = domain.new_result()
        new_misc = dict(tid=new_id, cmd=domain.cmd, workdir=domain.workdir)
        miscs_update_idxs_vals([new_misc], idxs, vals)
        rval.extend(
            trials.new_trial_docs([new_id], [None], [new_result], [new_misc]))

    return rval
Exemplo n.º 6
0
    def suggest(self, new_ids, domain, trials, seed):
        rng = np.random.RandomState(seed)
        rval = []    # print('new_ids', new_ids)
        for ii, new_id in enumerate(new_ids):
            while self._cnt <= self.num_combinations:
                # -- sample new specs, idxs, vals
                idxs, vals = pyll.rec_eval(
                    domain.s_idxs_vals,
                    memo={
                        domain.s_new_ids: [new_id],
                        domain.s_rng: rng,
                    })
                new_result = domain.new_result()
                new_misc = dict(tid=new_id, cmd=domain.cmd, workdir=domain.workdir)
                miscs_update_idxs_vals([new_misc], idxs, vals)
                new_trial = trials.new_trial_docs([new_id],
                            [None], [new_result], [new_misc])
                # Except the `while`, until here, code is copied from rand.suggest

                # new code from here
                self.executed_params = self.executed_params.union(
                    self._get_historical_params(trials))

                # avoid counting floating zero twice (as +0.0 and -0.0)
                this_run_params = hyperopt_grid._convert_neg_zeros_to_zeros(
                    dict(new_misc['vals']))
                # represent the params as a hashed string
                this_run_params_str = dict_to_sorted_str(this_run_params)

                # if these params are seen for the first time, then generate a new
                # trial for them
                if this_run_params_str not in self.executed_params:

                    # add the new trial to returned list
                    rval.extend(new_trial)

                    # log the new trial as executed, in order to avoid duplication
                    self._cnt += 1
                    self.executed_params = \
                        self.executed_params.union([this_run_params_str])
                    print(self._cnt, this_run_params)
                    break
                else:
                    # otherwise (params were seen), skip this trial
                    # update internal counter
                    self._cnt_skip += 1

                # Stopping condition (breaking the hyperopt loop)
                if len(self.executed_params) >= self.num_combinations:
                    # returning an empty list, breaks the hyperopt loop
                    return []


                # "Emergency" stopping condition, breaking the hyperopt loop when
                # loop runs for too long without submitted experiments
                if self._cnt_skip >= 100*self.num_combinations:
                    warnings.warn('Warning: Exited due to too many skips.'
                          ' This can happen if most of the param combinationa have '
                                  'been encountered, and drawing a new '
                                  'unseen combination, involves a very low probablity.')
                    # returning an empty list, breaks the hyperopt loop
                    return []

        return rval