Exemplo n.º 1
0
def tune_parameters():

    from utils.constants import Constants

    context_name = '_context' if Constants.USE_CONTEXT else '_nocontext'

    mongo_url =\
        'mongo://localhost:1234/topicmodel_' +\
        Constants.ITEM_TYPE + context_name + '/jobs'
    trials = MongoTrials(mongo_url, exp_key='exp1')

    print('Connected to %s' % mongo_url)

    space =\
        hp.choice(Constants.USE_CONTEXT_FIELD, [
            {
                Constants.BUSINESS_TYPE_FIELD: Constants.ITEM_TYPE,
                # 'lda_alpha': hp.uniform('lda_alpha', 0, 1),
                # 'lda_beta': hp.uniform('lda_beta', 0, 2),
                Constants.CONTEXT_EXTRACTOR_EPSILON_FIELD: hp.uniform(
                    Constants.CONTEXT_EXTRACTOR_EPSILON_FIELD, 0, 0.5),
                Constants.TOPIC_MODEL_ITERATIONS_FIELD: hp.quniform(
                    Constants.TOPIC_MODEL_ITERATIONS_FIELD, 50, 500, 1),
                Constants.TOPIC_MODEL_PASSES_FIELD: hp.quniform(
                    Constants.TOPIC_MODEL_PASSES_FIELD, 1, 100, 1),
                Constants.TOPIC_MODEL_NUM_TOPICS_FIELD: hp.quniform(
                    Constants.TOPIC_MODEL_NUM_TOPICS_FIELD, 1, 1000, 1),
                # 'topic_weighting_method': hp.choice(
                #     'topic_weighting_method',
                #     ['probability', 'binary', 'all_topics']),
                Constants.USE_CONTEXT_FIELD: True
            },
        ])

    best = fmin(run_recommender,
                space=space,
                algo=tpe.suggest,
                max_evals=1000,
                trials=trials)

    print('losses', sorted(trials.losses()))
    print('best', trials.best_trial['result'],
          trials.best_trial['misc']['vals'])
    print('num trials: %d' % len(trials.losses()))
Exemplo n.º 2
0
def run_optimization(level=1):
    print(f"Optimizing at level {level}")

    set_random_seeds(4)

    next_lvl_trials = MongoTrials('mongo://localhost:1234/covid/jobs',
                                  exp_key=f'covid-{level+1}')
    if len(next_lvl_trials.trials) > 0:
        print(f"Already completed level {level} -- skipping")
        return

    exp_key = f'covid-{level}'

    trials = MongoTrials('mongo://localhost:1234/covid/jobs', exp_key=exp_key)

    suggestion_box = hyperopt.tpe.suggest

    if level == 1:
        max_evals = LEVEL_DEFS[0][1]
        depth = 1

    elif level > 1:
        depth, new_budget, extend_budget = LEVEL_DEFS[level - 1]
        last_depth, _, _ = LEVEL_DEFS[level - 2]

        # Minimum one per node for the expensive ones -- no point wasting compute time
        num_new = int(np.ceil((new_budget / depth) / NUM_NODES) * NUM_NODES)

        if len(trials.trials) == 0:
            print("Generating estimates from previous level")
            result_docs = configure_next_level(level, depth, extend_budget)
            num_to_extend = len(result_docs)

            suggestion_box = create_suggestion_box(result_docs)

        last_level_trials = MongoTrials('mongo://localhost:1234/covid/jobs',
                                        exp_key=f'covid-{level-1}')
        prev_level_count = len(
            [x for x in last_level_trials.losses() if x is not None])

        max_evals = prev_level_count + num_new
        trials.refresh()

    objective = functools.partial(test_parameterization, num_epochs=depth)

    if len([x for x in trials.statuses() if x == 'ok']) >= max_evals:
        print(f"Already completed level {level} -- skipping")
    else:
        best = hyperopt.fmin(objective,
                             space=SEARCH_SPACE,
                             algo=suggestion_box,
                             max_evals=max_evals,
                             trials=trials)

        print(best)
def tune_parameters():

    from utils.constants import Constants

    context_name = '_context' if Constants.USE_CONTEXT else '_nocontext'

    mongo_url =\
        'mongo://localhost:1234/topicmodel_' +\
        Constants.ITEM_TYPE + context_name + '/jobs'
    trials = MongoTrials(mongo_url, exp_key='exp1')

    print('Connected to %s' % mongo_url)

    space =\
        hp.choice(Constants.USE_CONTEXT_FIELD, [
            {
                Constants.BUSINESS_TYPE_FIELD: Constants.ITEM_TYPE,
                # 'lda_alpha': hp.uniform('lda_alpha', 0, 1),
                # 'lda_beta': hp.uniform('lda_beta', 0, 2),
                Constants.CONTEXT_EXTRACTOR_EPSILON_FIELD: hp.uniform(
                    Constants.CONTEXT_EXTRACTOR_EPSILON_FIELD, 0, 0.5),
                Constants.TOPIC_MODEL_ITERATIONS_FIELD: hp.quniform(
                    Constants.TOPIC_MODEL_ITERATIONS_FIELD, 50, 500, 1),
                Constants.TOPIC_MODEL_PASSES_FIELD: hp.quniform(
                    Constants.TOPIC_MODEL_PASSES_FIELD, 1, 100, 1),
                Constants.TOPIC_MODEL_NUM_TOPICS_FIELD: hp.quniform(
                    Constants.TOPIC_MODEL_NUM_TOPICS_FIELD, 1, 1000, 1),
                # 'topic_weighting_method': hp.choice(
                #     'topic_weighting_method',
                #     ['probability', 'binary', 'all_topics']),
                Constants.USE_CONTEXT_FIELD: True
            },
        ])

    best = fmin(
        run_recommender, space=space, algo=tpe.suggest,
        max_evals=1000, trials=trials)

    print('losses', sorted(trials.losses()))
    print(
        'best', trials.best_trial['result'], trials.best_trial['misc']['vals'])
    print('num trials: %d' % len(trials.losses()))
Exemplo n.º 4
0
    def hp_parallel(self):
        trials = MongoTrials('mongo://localhost:27017/foo_db/jobs',
                             exp_key=self.task.id +
                             str(random.getrandbits(64)))
        batch_size = self.n_parallel
        best_params = fmin(fn=self.hp_objective,
                           space=self.task.hp_space,
                           algo=tpe.suggest,
                           max_evals=self.max_evals * batch_size,
                           trials=trials)
        scores = [-t['result']['loss'] for t in trials.trials]
        print("hp parallel task: %s, best: %s, params: %s" %
              (self.task.id, max(scores), best_params))

        search_path = trials.vals
        search_path['score'] = list(np.array(trials.losses()) * -1)

        return self.accumulate_max(scores, self.max_evals,
                                   batch_size), search_path
Exemplo n.º 5
0
def main():
    """
    Performs hyperparameter optimization with hyperopt. It consists of three
    steps. First define a trials object connected to a mongo database where all 
    the results will be stored. Secondly define a stochastic search space from
    which hyperopt will sample hyperparameter configurations. Thirdly define
    the define the objective function and run the minimization function.  
    """

    
    trials = MongoTrials('mongo://localhost:1234/otto-sqrt-pca-95-5/jobs',
            exp_key='15-11-03')
    
    #Search space
    space={
        

    'dense_part':  {'num_units' : hp.quniform('DL1', 512, 2048, 512),
                    'more_layers' : 
                        {'num_units' : hp.quniform('DL2', 512, 2048, 512),
                         'more_layers' : 
                          hp.choice('MD2', [0,
                            {'num_units' : hp.quniform('DL3', 512, 2048, 512),
                             'more_layers' : 0,} #DL3
                                        ])},#DL2
                                                },#DL1                                        


    'leakiness' : hp.choice('leak', [0, 0.01, 0.15] ),


    'weight_init' : hp.choice('weight',['orto','uni']),
    'input_dropout' : hp.quniform('p_in', 0.1, 0.4, 0.1),
    
    'learning_rate': hp.choice('lr',[0.001,0.01,0.025,0.05,0.1]),
    }
    
    #Optimize
    best = fmin(objective, space=space, algo=tpe.suggest, max_evals=100, 
               trials=trials)

    print(trials.losses())
    print(best)
Exemplo n.º 6
0
from TrainClassifiers import main

from hyperopt import hp
from hyperopt import fmin, tpe, hp
from hyperopt.mongoexp import MongoTrials

import matplotlib as mpl
mpl.use('Agg')
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.cm as cm
from matplotlib.colors import LogNorm

trials = MongoTrials('mongo://localhost:23888/foo_db/jobs', exp_key='l11')

print(len(trials.losses()))
print(trials.losses())
print(min([float(x) for x in trials.losses() if x]))
print(trials.best_trial)
print("\n\n")
print(trials.best_trial["misc"]["vals"])



Exemplo n.º 7
0
def configure_next_level(lvl: int, depth: int, budget: int = 50):
    new_exp_key = f'covid-{lvl}'

    src_trials = MongoTrials('mongo://localhost:1234/covid/jobs',
                             exp_key=f'covid-{lvl-1}')
    all_trials = MongoTrials('mongo://localhost:1234/covid/jobs')
    dest_trials = MongoTrials('mongo://localhost:1234/covid/jobs',
                              exp_key=new_exp_key)

    hist_length = {2: 3, 3: 5, 5: 8}.get(depth, 10)

    forward_losses = []
    for trial, loss in zip(src_trials.trials, src_trials.losses()):
        if loss is None:
            forward_losses.append(None)
            continue

        v_x, vloss, _, _ = zip(*trial['result']['validation_stats'])

        slope, intercept, _, _, _ = linregress(v_x[-hist_length:],
                                               vloss[-hist_length:])
        forward_losses.append(
            min(
                0.5 * (loss + intercept + slope * v_x[-1] + slope *
                       (1 - 0.8**(depth - v_x[-1])) / (1 - 0.8)), loss))

    ordered_idxs = list(
        np.argsort([x if x is not None else np.inf for x in forward_losses]))

    last_tid = 0 if len(all_trials.tids) == 0 else max(all_trials.tids)
    available_tids = []

    result_docs = []

    while len(ordered_idxs) > 0:
        idx = ordered_idxs.pop(0)
        if src_trials.losses()[idx] is None:
            continue

        epochs_completed = src_trials.trials[idx]['result'].get(
            'training_loss_hist', [(0, np.inf)])[-1][0]

        spec = None
        result = {'status': 'new'}
        misc = copy.deepcopy(src_trials.trials[idx]['misc'])

        result_docs.append((spec, result, misc))
        budget -= (depth - epochs_completed)
        if budget <= 0:
            break

    while len(ordered_idxs) > 0:
        idx = ordered_idxs.pop()

        if src_trials.losses()[idx] is None:
            continue

        if len(available_tids) == 0:
            available_tids = dest_trials.new_trial_ids(last_tid)

        tid = available_tids.pop(0)
        last_tid = tid

        # copy in the ones that aren't worth exploring further
        cpy = copy.deepcopy(src_trials.trials[idx])
        cpy['exp_key'] = new_exp_key
        cpy['tid'] = tid
        cpy['misc']['tid'] = tid

        cpy['misc']['idxs'] = {k: [tid] for k in cpy['misc']['idxs'].keys()}

        del cpy['_id']

        dest_trials.insert_trial_doc(cpy)

    return result_docs
Exemplo n.º 8
0
        trials=trials)

    now = datetime.datetime.now()

    if use_mongo:
        for p in workers:
            p.terminate()

        #merge the temporary log files
        filenames = os.listdir(def_logging_dir)
        temp_logs = filter(lambda x: x.find("temp_" + date_time_string) > -1, filenames)

        aggregated_log = open(def_logging_dir + "log_" + date_time_string, 'w')
        
        for temp_log in temp_logs:
            t = open(def_logging_dir + temp_log, 'r')
            aggregated_log.write(t.read())
            t.close()
            os.remove(def_logging_dir + temp_log)
        
        aggregated_log.write("Time for fmin: " + str(now - then) + "\n")
        aggregated_log.write("Trials: " + str(trials.trials) + "\n")
        aggregated_log.write("Results: " + str(trials.results) + "\n")
        aggregated_log.write("Losses: " + str(trials.losses()) + "\n")
        aggregated_log.write("Statuses: " + str(trials.statuses()) + "\n")
        
        aggregated_log.close()

print best

Exemplo n.º 9
0
def tune_parameters():

    # trials = Trials()
    from utils.constants import Constants

    context_name = '_context' if Constants.USE_CONTEXT else '_nocontext'
    cycle = '_' + str(Constants.NESTED_CROSS_VALIDATION_CYCLE)

    mongo_url =\
        'mongo://localhost:1234/' +\
        Constants.ITEM_TYPE + context_name + '_db_nested' + cycle + '/jobs'
    trials = MongoTrials(mongo_url, exp_key='exp1')

    print('Connected to %s' % mongo_url)

    params = Constants.get_properties_copy()
    params.update({
        Constants.BUSINESS_TYPE_FIELD:
        Constants.ITEM_TYPE,
        Constants.TOPN_NUM_ITEMS_FIELD:
        Constants.TOPN_NUM_ITEMS,
        Constants.NESTED_CROSS_VALIDATION_CYCLE_FIELD:
        Constants.NESTED_CROSS_VALIDATION_CYCLE,
        # 'fm_init_stdev': hp.uniform('fm_init_stdev', 0, 2),
        Constants.FM_ITERATIONS_FIELD:
        hp.quniform(Constants.FM_ITERATIONS_FIELD, 1, 500, 1),
        Constants.FM_NUM_FACTORS_FIELD:
        hp.quniform(Constants.FM_NUM_FACTORS_FIELD, 0, 200, 1),
        # 'fm_use_1way_interactions': hp.choice('fm_use_1way_interactions', [True, False]),
        # 'fm_use_bias': hp.choice('use_bias', [True, False]),
        # 'lda_alpha': hp.uniform('lda_alpha', 0, 1),
        # 'lda_beta': hp.uniform('lda_beta', 0, 2),
        # Constants.CONTEXT_EXTRACTOR_EPSILON_FIELD: hp.uniform(
        #     Constants.CONTEXT_EXTRACTOR_EPSILON_FIELD, 0, 0.5),
        # Constants.TOPIC_MODEL_ITERATIONS_FIELD: hp.quniform(
        #     Constants.TOPIC_MODEL_ITERATIONS_FIELD, 50, 500, 1),
        # Constants.TOPIC_MODEL_PASSES_FIELD: hp.quniform(
        #     Constants.TOPIC_MODEL_PASSES_FIELD, 1, 100, 1),
        # Constants.TOPIC_MODEL_NUM_TOPICS_FIELD: hp.quniform(
        #     Constants.TOPIC_MODEL_NUM_TOPICS_FIELD, 1, 1000, 1),
        # Constants.TOPIC_MODEL_NUM_TOPICS_FIELD: hp.choice(
        #     Constants.TOPIC_MODEL_NUM_TOPICS_FIELD,
        #     [10, 20, 30, 50, 75, 100, 150, 300]),
        # Constants.TOPIC_MODEL_TYPE_FIELD: hp.choice(
        #     Constants.TOPIC_MODEL_TYPE_FIELD, ['lda', 'mnf']),
        # 'topic_weighting_method': hp.choice(
        #     'topic_weighting_method',
        #     ['probability', 'binary', 'all_topics']),
        # 'use_no_context_topics_sum': hp.choice(
        #     'use_no_context_topics_sum', [True, False]),
        Constants.USE_CONTEXT_FIELD:
        Constants.USE_CONTEXT
    })

    space =\
        hp.choice(Constants.USE_CONTEXT_FIELD, [
            params,
        ])

    if not Constants.USE_CONTEXT:
        unwanted_args = [
            Constants.CONTEXT_EXTRACTOR_EPSILON_FIELD,
            Constants.TOPIC_MODEL_ITERATIONS_FIELD,
            Constants.TOPIC_MODEL_PASSES_FIELD,
            Constants.TOPIC_MODEL_NUM_TOPICS_FIELD
        ]

        for element in space.pos_args[1].named_args[:]:
            if element[0] in unwanted_args:
                space.pos_args[1].named_args.remove(element)

    # best = fmin(
    #     run_recommender, space=space, algo=tpe.suggest,
    #     max_evals=100, trials=trials)

    print('losses', sorted(trials.losses()))
    print('best', trials.best_trial['result']['loss'],
          trials.best_trial['misc']['vals'])
    print('num trials: %d' % len(trials.losses()))
Exemplo n.º 10
0
def tune_parameters():

    # trials = Trials()
    from utils.constants import Constants

    context_name = '_context' if Constants.USE_CONTEXT else '_nocontext'
    cycle = '_' + str(Constants.NESTED_CROSS_VALIDATION_CYCLE)

    mongo_url =\
        'mongo://localhost:1234/' +\
        Constants.ITEM_TYPE + context_name + '_db_nested' + cycle + '/jobs'
    trials = MongoTrials(mongo_url, exp_key='exp1')

    print('Connected to %s' % mongo_url)

    params = Constants.get_properties_copy()
    params.update({
        Constants.BUSINESS_TYPE_FIELD: Constants.ITEM_TYPE,
        Constants.TOPN_NUM_ITEMS_FIELD: Constants.TOPN_NUM_ITEMS,
        Constants.NESTED_CROSS_VALIDATION_CYCLE_FIELD:
            Constants.NESTED_CROSS_VALIDATION_CYCLE,
        # 'fm_init_stdev': hp.uniform('fm_init_stdev', 0, 2),
        Constants.FM_ITERATIONS_FIELD: hp.quniform(
            Constants.FM_ITERATIONS_FIELD, 1, 500, 1),
        Constants.FM_NUM_FACTORS_FIELD: hp.quniform(
            Constants.FM_NUM_FACTORS_FIELD, 0, 200, 1),
        # 'fm_use_1way_interactions': hp.choice('fm_use_1way_interactions', [True, False]),
        # 'fm_use_bias': hp.choice('use_bias', [True, False]),
        # 'lda_alpha': hp.uniform('lda_alpha', 0, 1),
        # 'lda_beta': hp.uniform('lda_beta', 0, 2),
        # Constants.CONTEXT_EXTRACTOR_EPSILON_FIELD: hp.uniform(
        #     Constants.CONTEXT_EXTRACTOR_EPSILON_FIELD, 0, 0.5),
        # Constants.TOPIC_MODEL_ITERATIONS_FIELD: hp.quniform(
        #     Constants.TOPIC_MODEL_ITERATIONS_FIELD, 50, 500, 1),
        # Constants.TOPIC_MODEL_PASSES_FIELD: hp.quniform(
        #     Constants.TOPIC_MODEL_PASSES_FIELD, 1, 100, 1),
        # Constants.TOPIC_MODEL_NUM_TOPICS_FIELD: hp.quniform(
        #     Constants.TOPIC_MODEL_NUM_TOPICS_FIELD, 1, 1000, 1),
        # Constants.TOPIC_MODEL_NUM_TOPICS_FIELD: hp.choice(
        #     Constants.TOPIC_MODEL_NUM_TOPICS_FIELD,
        #     [10, 20, 30, 50, 75, 100, 150, 300]),
        # Constants.TOPIC_MODEL_TYPE_FIELD: hp.choice(
        #     Constants.TOPIC_MODEL_TYPE_FIELD, ['lda', 'mnf']),
        # 'topic_weighting_method': hp.choice(
        #     'topic_weighting_method',
        #     ['probability', 'binary', 'all_topics']),
        # 'use_no_context_topics_sum': hp.choice(
        #     'use_no_context_topics_sum', [True, False]),
        Constants.USE_CONTEXT_FIELD: Constants.USE_CONTEXT
    })

    space =\
        hp.choice(Constants.USE_CONTEXT_FIELD, [
            params,
        ])

    if not Constants.USE_CONTEXT:
        unwanted_args = [
            Constants.CONTEXT_EXTRACTOR_EPSILON_FIELD,
            Constants.TOPIC_MODEL_ITERATIONS_FIELD,
            Constants.TOPIC_MODEL_PASSES_FIELD,
            Constants.TOPIC_MODEL_NUM_TOPICS_FIELD
        ]

        for element in space.pos_args[1].named_args[:]:
            if element[0] in unwanted_args:
                space.pos_args[1].named_args.remove(element)

    # best = fmin(
    #     run_recommender, space=space, algo=tpe.suggest,
    #     max_evals=100, trials=trials)

    print('losses', sorted(trials.losses()))
    print(
        'best', trials.best_trial['result']['loss'],
        trials.best_trial['misc']['vals'])
    print('num trials: %d' % len(trials.losses()))
Exemplo n.º 11
0
    exec "trials=" + trials
    return trials


if __name__ == "__main__":
    trials = MongoTrials('mongo://localhost:1234/first_try/jobs',
                         exp_key='big_run3')

    x_name = 'test_bias'
    y_name = 'learn_bias'
    z_name = 'learning_rate'

    x = trials.vals[x_name]
    y = trials.vals[y_name]
    z = trials.vals[z_name]
    w = trials.losses()

    indices = filter(lambda x: w[x] < 1, range(len(w)))
    x = [x[i] for i in indices]
    y = [y[i] for i in indices]
    z = [z[i] for i in indices]
    w = [w[i] for i in indices]

    p = ax.scatter(x, y, z, cmap='winter', s=50, c=w)
    ax.set_xlabel(x_name)
    ax.set_ylabel(y_name)
    ax.set_zlabel(z_name)
    fig.colorbar(p)
    plt.show()

    sorted_indices = sorted(range(len(w)), key=lambda x: w[x])
Exemplo n.º 12
0
    trials = trials.split('Trials: ')[1]
    exec "trials=" + trials
    return trials

if __name__=="__main__":
    trials = MongoTrials('mongo://localhost:1234/first_try/jobs', exp_key='big_run3')

    x_name = 'test_bias'
    y_name = 'learn_bias'
    z_name = 'learning_rate'

    x = trials.vals[x_name]
    y = trials.vals[y_name]
    z = trials.vals[z_name]
    w = trials.losses()

    indices = filter(lambda x: w[x] < 1, range(len(w)))
    x = [x[i] for i in indices]
    y = [y[i] for i in indices]
    z = [z[i] for i in indices]
    w = [w[i] for i in indices]

    p = ax.scatter(x, y, z, cmap='winter', s=50, c=w)
    ax.set_xlabel(x_name)
    ax.set_ylabel(y_name)
    ax.set_zlabel(z_name)
    fig.colorbar(p)
    plt.show()

    sorted_indices = sorted(range(len(w)), key=lambda x: w[x])