Пример #1
0
def suggest_multiple_from_name(dbname, host, port, bandit_algo_names, bandit_names,
                     exp_keys, N, bandit_args_list, bandit_kwargs_list,
                     bandit_algo_args_list, bandit_algo_kwargs_list,
                     mql=1, refresh=False):
    port = int(port)
    trials = MongoTrials('mongo://%s:%d/%s/jobs' % (host, port, dbname),
                        refresh=False)
    algos = []
    for bn, ban, ba, bk, baa, bak, ek in zip(bandit_names, bandit_algo_names,
            bandit_args_list, bandit_kwargs_list, bandit_algo_args_list, bandit_algo_kwargs_list,
                                             exp_keys):
        bandit = json_call(bn, ba, bk)
        subtrials = MongoTrials('mongo://%s:%d/%s/jobs' % (host, port, dbname),
                         refresh=False, exp_key=ek)
        if ba or bk:
            subtrials.attachments['bandit_data_' + ek] = cPickle.dumps((bn, ba, bk))
            bak['cmd'] = ('driver_attachment', 'bandit_data_' + ek)
        else:
            bak['cmd'] = ('bandit_json evaluate', bn)
        algo = json_call(ban, (bandit,) + baa, bak)
        algos.append(algo)

    algo = InterleaveAlgo(algos, exp_keys)
    exp = hyperopt.Experiment(trials, algo, poll_interval_secs=.1)
    exp.max_queue_len = mql
    if N is not None:
        exp.run(N, block_until_done=True)
    else:
        return exp
Пример #2
0
def test_trial_attachments():

    exp_key = "A"
    with TempMongo() as tm:
        mj = tm.mongo_jobs("foo")
        trials = MongoTrials(tm.connection_string("foo"), exp_key=exp_key)

        space = hp.uniform("x", -10, 10)
        max_evals = 3
        fmin_thread = threading.Thread(target=fmin_thread_fn,
                                       args=(space, trials, max_evals))
        fmin_thread.start()

        mw = MongoWorker(mj=mj, logfilename=None, workdir="mongoexp_test_dir")
        n_jobs = max_evals
        while n_jobs:
            try:
                mw.run_one("hostname", 10.0, erase_created_workdir=True)
                print("worker: ran job")
            except Exception as exc:
                print(f"worker: encountered error : {str(exc)}")
                traceback.print_exc()
            n_jobs -= 1
        fmin_thread.join()
        all_trials = MongoTrials(tm.connection_string("foo"))

        assert len(all_trials) == max_evals
        assert trials.count_by_state_synced(JOB_STATE_DONE) == max_evals
        assert trials.count_by_state_unsynced(JOB_STATE_DONE) == max_evals
Пример #3
0
def _refresh_trials():
    global TRIALS, TRIALS_REFRESHED

    if TRIALS is None:
        TRIALS = MongoTrials('mongo://localhost:1234/covid/jobs')

    TRIALS_REFRESHED = datetime.now()
    TRIALS.refresh()
Пример #4
0
def transfer_trials(fromdb, todb):
    """
    Insert all of the documents in `fromdb` into `todb`.
    """
    from_trials = MongoTrials('mongo://localhost:44556/%s/jobs' % fromdb)
    to_trials = MongoTrials('mongo://localhost:44556/%s/jobs' % todb)
    from_docs = [copy.deepcopy(doc) for doc in from_trials]
    for doc in from_docs:
        del doc['_id']
    to_trials.insert_trial_docs(doc)
Пример #5
0
def validate_from_tids(dbname):
    trials = MongoTrials('mongo://localhost:44556/%s/jobs' % dbname,
                         refresh=False)
    trials.refresh()
    tdict = dict([(t['tid'], t) for t in trials])
    print "TIDS", tdict.keys()

    for tid, t in tdict.items():
        assert t['misc']['tid'] == tid
        if 'from_tid' in t['misc']:
            if t['misc']['from_tid'] not in tdict:
                print 'WTF gave us', tid, t['misc']['from_tid']
Пример #6
0
def run_optimization(level=1):
    print(f"Optimizing at level {level}")

    set_random_seeds(4)

    next_lvl_trials = MongoTrials('mongo://localhost:1234/covid/jobs',
                                  exp_key=f'covid-{level+1}')
    if len(next_lvl_trials.trials) > 0:
        print(f"Already completed level {level} -- skipping")
        return

    exp_key = f'covid-{level}'

    trials = MongoTrials('mongo://localhost:1234/covid/jobs', exp_key=exp_key)

    suggestion_box = hyperopt.tpe.suggest

    if level == 1:
        max_evals = LEVEL_DEFS[0][1]
        depth = 1

    elif level > 1:
        depth, new_budget, extend_budget = LEVEL_DEFS[level - 1]
        last_depth, _, _ = LEVEL_DEFS[level - 2]

        # Minimum one per node for the expensive ones -- no point wasting compute time
        num_new = int(np.ceil((new_budget / depth) / NUM_NODES) * NUM_NODES)

        if len(trials.trials) == 0:
            print("Generating estimates from previous level")
            result_docs = configure_next_level(level, depth, extend_budget)
            num_to_extend = len(result_docs)

            suggestion_box = create_suggestion_box(result_docs)

        last_level_trials = MongoTrials('mongo://localhost:1234/covid/jobs',
                                        exp_key=f'covid-{level-1}')
        prev_level_count = len(
            [x for x in last_level_trials.losses() if x is not None])

        max_evals = prev_level_count + num_new
        trials.refresh()

    objective = functools.partial(test_parameterization, num_epochs=depth)

    if len([x for x in trials.statuses() if x == 'ok']) >= max_evals:
        print(f"Already completed level {level} -- skipping")
    else:
        best = hyperopt.fmin(objective,
                             space=SEARCH_SPACE,
                             algo=suggestion_box,
                             max_evals=max_evals,
                             trials=trials)

        print(best)
Пример #7
0
def main():
    sys.path.insert(0, PATH_OPT_CONFIGS)
    args = parse_args()
    with open(args.default_config, 'r') as stream:
        default_config = yaml.load(stream, Loader=yaml.FullLoader)
    default_config = replace_path_config(default_config)
    space = __import__(args.space_config[:-3]).space

    exp = ExperimentRunner(default_config=default_config,
                           results_dir=os.path.abspath(args.results_dir),
                           tmp_dir=os.path.abspath(args.tmp_dir),
                           python_path=sys.executable,
                           train_path=os.path.abspath(
                               os.path.join(os.getcwd(), "..", "src",
                                            "train.py")),
                           debug=args.debug,
                           num_gpus=args.num_gpus)

    trials = MongoTrials('mongo://localhost:1234/tweet_sent/jobs',
                         exp_key=args.exp_name)
    best = fmin(exp,
                space,
                trials=trials,
                algo=tpe.suggest,
                max_evals=args.num_trials)
Пример #8
0
def do_test(runs, flder):
    df = load_train()
    space = {
        'k': hp.qnormal('k', 25, 10, 1),
        'f': hp.loguniform('f', log(0.1), log(5))
    }
    trials = MongoTrials(
        'mongo://10.20.0.144:27017/bid_exp_family_and_zero_column/jobs',
        exp_key='exp1')

    log_file = os.path.join(flder, 'log.txt')
    objective = partial(bid_and_zero_optimizer.loss_for_batch,
                        df=df,
                        runs=runs,
                        flder=flder,
                        log_file=log_file)
    best = fmin(objective,
                space=space,
                algo=tpe.suggest,
                trials=trials,
                max_evals=10000)

    print
    print 'curr={}'.format(best)
    print 'best={}'.format(get_the_best_loss(trials))
def main(output_path):

    # Set up Hyperopt

    space = {
        'max_depth': hp.quniform('max_depth', 13, 13, 1),
        'num_leaves': hp.quniform('num_leaves', 50, 500, 50),
        'bagging_fraction': hp.quniform('bagging_fraction', 0.5, 0.9, 0.05),
        'feature_fraction': hp.quniform('feature_fraction', 0.25, 0.55, 0.05),
        'min_data_in_leaf': hp.quniform('min_data_in_leaf', 100, 500, 50),
        'lambda_l1': hp.loguniform('lambda_l1', -3, 2),
        'lambda_l2': hp.loguniform('lambda_l2', -3, 2),
    }

    # trials = Trials()
    trials = MongoTrials('mongo://localhost:27017/allstate/jobs',
                         exp_key='lightgbm_2')

    # Run optimization

    fmin(fn=evaluate_lightgbm,
         space=space,
         algo=tpe.suggest,
         max_evals=200,
         trials=trials)

    # Print output

    result = dumps(trials.trials)
    with open(output_path, 'w') as f:
        f.write(result)
def main(train_data, validation_data, test_data, trials_output, vw_args,
         max_evals, outer_loss_function, mongo, timeout):
    space = {
        '--ftrl_alpha': hp.loguniform('ftrl_alpha', log(1e-5), log(1e-1)),
        '--ftrl_beta': hp.uniform('ftrl_beta', 0.01, 1.),
        '--l1': hp.loguniform('l1', log(1e-8), log(1e-1)),
        '--l2': hp.loguniform('l2', log(1e-8), log(1e-1)),
        # '--passes': hp.quniform('passes', 1, 5, 1),
        # '--learning_rate': hp.loguniform('learning_rate', log(0.01), log(10)),
        # '--classweight 1:': hp.loguniform('classweight_pos', log(1), log(1000)),
        # '--classweight -1:': hp.loguniform('classweight_neg', log(0.001), log(1)),
    }

    trials = MongoTrials(mongo) if mongo else Trials()

    objective = Objective(train_data=train_data,
                          validation_data=validation_data,
                          test_data=test_data,
                          vw_args=vw_args,
                          outer_loss_function=outer_loss_function,
                          timeout=timeout)

    search(space,
           objective,
           trials=trials,
           trials_output=trials_output,
           max_evals=max_evals)

    return 0
def main():
    mongo_db_host = os.environ["MONGO_DB_HOST"]
    mongo_db_port = os.environ["MONGO_DB_PORT"]
    experiment_name = os.environ["EXPERIMENT_NAME"]

    mongo_connect_str = "mongo://{0}:{1}/foo_db/jobs".format(
        mongo_db_host, mongo_db_port)

    while True:
        try:
            trials = MongoTrials(mongo_connect_str, exp_key=experiment_name)
        except ServerSelectionTimeoutError:
            pass
        else:
            space = {'x': hp.uniform('x', -2, 2)}
            best = fmin(obj,
                        space=space,
                        trials=trials,
                        algo=tpe.suggest,
                        max_evals=100)

            if os.environ["JOB_NAME"] == "ps":
                save_path = os.path.join(get_logs_path("./logs"),
                                         "results.json")
                with open(save_path, "w") as f:
                    json.dump(json.dumps(best), f)

            return
Пример #12
0
def load_hp(hp_fname, host, port):

    if hp_fname is not None:

        hp = cPickle.load(open(hp_fname, 'r'))
        hp_space = hp['hp_space']
        trials = hp['trials']
        n_startup_jobs = hp['n_startup_jobs']

    elif host is not None:

        trials = MongoTrials('mongo://%s:%d/%s/jobs' %
                             (host, int(port), mongo_dbname),
                             exp_key=mongo_dbname,
                             refresh=True)

        # -- retrieve hp_space and n_startup_jobs from trials attachment
        cmd0, cmd1 = trials.miscs[0]['cmd']
        assert cmd0 == 'domain_attachment'
        blob = trials.attachments[cmd1]
        domain = cPickle.loads(blob)
        spec = hyperopt.base.spec_from_misc(trials.miscs[0])

        memo = domain.memo_from_config(spec)
        argdict = hyperopt.pyll.rec_eval(domain.expr, memo=memo)

        hp_space = argdict['hp_space']
        n_startup_jobs = argdict['n_startup_trials']

    else:
        raise ValueError('No Pickle file nor MongoDB host informed')

    return hp_space, trials, n_startup_jobs
Пример #13
0
def do_test(runs):
    expiriment_name = 'bid_and_mngr_hcc_trials'
    gc_host = '35.187.46.132'

    df = load_train()
    space = {
        'mngr_k': hp.quniform('mngr_k', 3, 50, 1),
        'mngr_f': hp.loguniform('mngr_f', log(0.1), log(5)),
        'mngr_n': hp.choice('mngr_n', [2, 3, 4, 5, 6, 7, 10]),
        'bid_k': hp.quniform('bid_k', 3, 50, 1),
        'bid_f': hp.loguniform('bid_f', log(0.1), log(5)),
        'bid_n': hp.choice('bid_n', [2, 3, 4, 5, 6, 7, 10])
    }

    db_path = 'mongo://{}:27017/{}/jobs'.format(gc_host, expiriment_name)
    trials = MongoTrials(db_path, exp_key='exp1')

    # trials = Trials()

    objective = partial(bid_and_mngr_optimizer.loss_for_batch,
                        df=df,
                        runs=runs)
    best = fmin(objective,
                space=space,
                algo=tpe.suggest,
                trials=trials,
                max_evals=10000)

    print
    print 'curr={}'.format(best)
    print 'best={}'.format(get_the_best_loss(trials))
Пример #14
0
def main():
    mongo_db_host = os.environ["MONGO_DB_HOST"]
    mongo_db_port = os.environ["MONGO_DB_PORT"]
    experiment_name = os.environ.get("EXPERIMENT_NAME", 'cifar10-hyperopt')
    data_dir = os.path.abspath(os.environ.get('PS_MODEL_PATH', os.getcwd()))

    mongo_connect_str = "mongo://{0}:{1}/foo_db/jobs".format(
        mongo_db_host, mongo_db_port)

    while True:
        try:
            logging.info(
                'Launching MongoTrials for {}'.format(experiment_name))
            trials = MongoTrials(mongo_connect_str, exp_key=experiment_name)
        except ServerSelectionTimeoutError:
            logging.warning('No MongoDB server is available for an operation')
            pass
        else:
            space = {'x': hp.uniform('x', -2, 2)}
            best = fmin(obj,
                        space=space,
                        trials=trials,
                        algo=tpe.suggest,
                        max_evals=100)

            if os.environ["TYPE"] == "ps":
                save_path = os.path.join(data_dir, "results.json")
                with open(save_path, "w") as f:
                    logging.debug('Saving results.json to {}'.format(data_dir))
                    logging.info('Results: {}'.format((str(best))))
                    json.dump(json.dumps(best), f)
            return
Пример #15
0
def task2(msg):
    tpe_trials = MongoTrials('mongo://localhost:27018/foo_db/jobs',
                             exp_key='exp1')
    opt_params = fmin(fn=objective,
                      space=hyper_params_space,
                      algo=tpe.suggest,
                      max_evals=300,
                      trials=tpe_trials,
                      rstate=np.random.RandomState(100))
    tpe_results = pd.DataFrame({
        'score': [x['loss'] for x in tpe_trials.results],
        'timeperiod':
        tpe_trials.idxs_vals[1]['timeperiod'],
        'nbdevup':
        tpe_trials.idxs_vals[1]['nbdevup'],
        'nbdevdn':
        tpe_trials.idxs_vals[1]['nbdevdn']
    })
    tpe_results.sort_values(by=['score'], inplace=True)

    print(tpe_results.head(10))
    print(opt_params)
    print(msg)
    print('task2 is running')
    return opt_params
Пример #16
0
def hyper_tune(train_model,
               hparam_def,
               algo=tpe.suggest,
               max_evals=25,
               func=fmin):
    """
    Function to prepare and run hyper parameter tune.

    :param train_model: User model to tune
    :param hparam_def: User hyper tune param definition
    :param algo: Search algorithm
    :param max_evals: Allow up to this many function evaluations before returning
    :param func: function that will run hyper tune logic, by default hyperopt fmin function

    :return: None if there is no result or dict with result for tune
    """
    mongo_connect_str = get_mongo_conn_str()
    is_connection_available = True

    while is_connection_available:
        try:
            trials = MongoTrials(mongo_connect_str, exp_key=_experiment_name())
        except ServerSelectionTimeoutError:
            logger.warning(
                "Hyper Tune - MongoTrials server selection Timeout Error")
            is_connection_available = _hyper_tune_check()
        else:
            return func(train_model,
                        space=hparam_def,
                        trials=trials,
                        algo=algo,
                        max_evals=max_evals)
Пример #17
0
def main():
    mongo_db_host = os.environ["MONGO_DB_HOST"]
    mongo_db_port = os.environ["MONGO_DB_PORT"]
    experiment_name = os.environ.get("EXPERIMENT_NAME", 'cifar10-hyperopt')
    data_dir = os.path.abspath(os.environ.get('PS_MODEL_PATH', os.getcwd()))

    mongo_connect_str = "mongo://{0}:{1}/foo_db/jobs".format(
        mongo_db_host, mongo_db_port)

    while True:
        try:
            trials = MongoTrials(mongo_connect_str, exp_key=experiment_name)
        except ServerSelectionTimeoutError:
            pass
        else:
            space = {
                'lr': hp.loguniform('lr', -10, 2),
                'momentum': hp.uniform('momentum', 0.1, 0.9)
            }
            best = fmin(train_cifar10,
                        space=space,
                        trials=trials,
                        algo=tpe.suggest,
                        max_evals=25)

            if os.environ["TYPE"] == "worker":
                save_path = os.path.join(data_dir, "results.json")
                with open(save_path, "w") as f:
                    logging.debug('Saving results.json to {}'.format(data_dir))
                    logging.info('Results: {}'.format((str(best))))
                    json.dump(json.dumps(best), f)
            return
Пример #18
0
def _fmin_parallel(
    queue: multiprocessing.Queue,
    fn: Callable,
    exp_key: str,
    space: dict,
    algo: Callable = tpe.suggest,
    max_evals: int = 100,
    show_progressbar: bool = False,
    mongo_port_address: str = "localhost:1234/scvi_db",
):
    """Launches a ``hyperopt`` minimization procedure.
    """
    logger.debug("Instantiating trials object.")
    # instantiate Trials object
    trials = MongoTrials(as_mongo_str(os.path.join(mongo_port_address,
                                                   "jobs")),
                         exp_key=exp_key)

    # run hyperoptimization
    logger.debug("Calling fmin.")
    _ = fmin(
        fn=fn,
        space=space,
        algo=algo,
        max_evals=max_evals,
        trials=trials,
        show_progressbar=show_progressbar,
    )
    logger.debug("fmin returned.")
    # queue.put uses pickle so remove attribute containing thread.lock
    if hasattr(trials, "handle"):
        logger.debug("Deleting Trial handle for pickling.")
        del trials.handle
    logger.debug("Putting Trials in Queue.")
    queue.put(trials)
Пример #19
0
def _fmin_parallel(
    queue: multiprocessing.Queue,
    fn: Callable,
    exp_key: str,
    space: dict,
    algo: Callable = tpe.suggest,
    max_evals: int = 100,
    fmin_timer: float = None,
    show_progressbar: bool = False,
    mongo_port_address: str = "localhost:1234/scvi_db",
):
    """Launches a ``hyperopt`` minimization procedure.
    """
    logger.debug("Instantiating trials object.")
    # instantiate Trials object
    trials = MongoTrials(
        as_mongo_str(os.path.join(mongo_port_address, "jobs")), exp_key=exp_key
    )

    # run hyperoptimization in another fork to enable the use of fmin_timer
    fmin_kwargs = {
        "fn": fn,
        "space": space,
        "algo": algo,
        "max_evals": max_evals,
        "trials": trials,
        "show_progressbar": show_progressbar,
    }
    fmin_thread = threading.Thread(target=fmin, kwargs=fmin_kwargs)
    logger.debug("Calling fmin.")
    # set fmin thread as daemon so it stops when the main process terminates
    fmin_thread.daemon = True
    fmin_thread.start()
    started_threads.append(fmin_thread)
    if fmin_timer:
        logging.debug(
            "Timer set, fmin will run for at most {timer}".format(timer=fmin_timer)
        )
        start_time = time.monotonic()
        run_time = 0
        while run_time < fmin_timer and fmin_thread.is_alive():
            time.sleep(10)
            run_time = time.monotonic() - start_time
    else:
        logging.debug("No timer, waiting for fmin")
        while True:
            if not fmin_thread.is_alive():
                break
            else:
                time.sleep(10)
    logger.debug("fmin returned or timer ran out.")
    # queue.put uses pickle so remove attribute containing thread.lock
    if hasattr(trials, "handle"):
        logger.debug("Deleting Trial handle for pickling.")
        del trials.handle
    logger.debug("Putting Trials in Queue.")
    queue.put(trials)
Пример #20
0
def hyperOptMain(max_evals, max_trials):
    '''Run the training using hyper optimization'''
    #('--num_point', type=int, default=1024, help='Point Number [256/512/1024/2048] [default: 1024]')
    #('--max_epoch', type=int, default=250, help='Epoch to run [default: 250]')
    #('--batch_size', type=int, default=32, help='Batch Size during training [default: 32]')
    #('--learning_rate', type=float, default=0.001, help='Initial learning rate [default: 0.001]')
    #('--momentum', type=float, default=0.9, help='Initial learning rate [default: 0.9]')
    #('--optimizer', default='adam', help='adam or momentum [default: adam]')
    #('--decay_step', type=int, default=200000, help='Decay step for lr decay [default: 200000]
    space = {
        #'num_points': hp.choice('num_points',[256,512,1024,2048]),
        'batch_size': hp.choice('batch_size', [2, 4, 8, 16, 32]),
        #'learning_rate': hp.uniform('learning_rate', 0.01, 0.001),
        #'momentum': hp.uniform('momentum',0.1, 0.9),
        #'optimizer': hp.choice('optimizer',['adam','momentum']),
        #'decay_step': hp.uniform('decay_step',10000, 200000),
        #'decay_rate': hp.uniform('decay_rate',0.1, 0.7)
    }
    #max_evals = 3

    #https://github.com/hyperopt/hyperopt/issues/267
    # check if any trials file are given to continue hyperopt on
    trials = Trials()
    if TRIALS_PATH:
        trialFilePath = os.path.join(TRIALS_PATH, TRIALS_FILE_NAME)
        if os.path.exists(trialFilePath):
            with open(trialFilePath, "rb") as f:
                trials = pickle.load(f)
                log_string("Loaded trials.")
    #otherwise create a new one in the log directory
    prevTrialsCount = len(trials)
    if not prevTrialsCount:
        trialFilePath = os.path.join(LOG_DIR, TRIALS_FILE_NAME)

    if FLAGS.mongo_mode == 1:
        trials = MongoTrials('mongo://localhost:27017/hyperopt/jobs',
                             exp_key='exp{}'.format(uuid.uuid4()))

    # https://github.com/hyperopt/hyperopt-sklearn/issues/80
    # Changing the number of initial evaluations to 1 instead of the default 20 runs
    eval_runs = 0
    for i in range(1, max_trials + 1):
        eval_runs = max_evals * i + prevTrialsCount
        #print ("max:{}, i:{} and prev count:{}".format(max_evals,i,prevTrialsCount))
        best = fmin(
            main,
            space=space,
            algo=tpe.
            suggest,  #partial(tpe.suggest, n_startup_jobs=1), #tpe.suggest,
            max_evals=
            eval_runs,  #increase the eval count otherwise only previous runs will be used
            trials=trials)

        summarizeTrials(i, best, trials)
        with open(trialFilePath, "wb") as w:
            pickle.dump(trials, w)
            log_string("Written trials on run {}.".format(i))
Пример #21
0
def make_trials(host, port, exp_key, refresh=True):
    if (host, port) == (None, None):
        trials = Trials()
    else:
        trials = MongoTrials('mongo://%s:%d/%s/jobs' %
                             (host, int(port), dbname),
                             exp_key=exp_key,
                             refresh=refresh)
    return trials
Пример #22
0
def load_trials(trials_path, mongo_key=None, reset_trials=False):
    is_mongo = trials_path.startswith('mongo:') if isinstance(trials_path, str) else False
    if is_mongo:
        return MongoTrials(trials_path, exp_key=mongo_key)
    else:
        if trials_path is not None and os.path.isfile(trials_path) and not reset_trials:
            return pickle.load(open(trials_path, 'rb'))
        else:
            return Trials()
def target_func1(evals):
    trials = MongoTrials('mongo://localhost:27017/mongo_hpo/jobs',
                         exp_key='exp1')
    best = fmin(math.sin,
                hp.uniform('x', -2, 2),
                trials=trials,
                algo=tpe.suggest,
                max_evals=evals)
    return best
Пример #24
0
def tune_parameters():

    from utils.constants import Constants

    context_name = '_context' if Constants.USE_CONTEXT else '_nocontext'

    mongo_url =\
        'mongo://localhost:1234/topicmodel_' +\
        Constants.ITEM_TYPE + context_name + '/jobs'
    trials = MongoTrials(mongo_url, exp_key='exp1')

    print('Connected to %s' % mongo_url)

    space =\
        hp.choice(Constants.USE_CONTEXT_FIELD, [
            {
                Constants.BUSINESS_TYPE_FIELD: Constants.ITEM_TYPE,
                # 'lda_alpha': hp.uniform('lda_alpha', 0, 1),
                # 'lda_beta': hp.uniform('lda_beta', 0, 2),
                Constants.CONTEXT_EXTRACTOR_EPSILON_FIELD: hp.uniform(
                    Constants.CONTEXT_EXTRACTOR_EPSILON_FIELD, 0, 0.5),
                Constants.TOPIC_MODEL_ITERATIONS_FIELD: hp.quniform(
                    Constants.TOPIC_MODEL_ITERATIONS_FIELD, 50, 500, 1),
                Constants.TOPIC_MODEL_PASSES_FIELD: hp.quniform(
                    Constants.TOPIC_MODEL_PASSES_FIELD, 1, 100, 1),
                Constants.TOPIC_MODEL_NUM_TOPICS_FIELD: hp.quniform(
                    Constants.TOPIC_MODEL_NUM_TOPICS_FIELD, 1, 1000, 1),
                # 'topic_weighting_method': hp.choice(
                #     'topic_weighting_method',
                #     ['probability', 'binary', 'all_topics']),
                Constants.USE_CONTEXT_FIELD: True
            },
        ])

    best = fmin(run_recommender,
                space=space,
                algo=tpe.suggest,
                max_evals=1000,
                trials=trials)

    print('losses', sorted(trials.losses()))
    print('best', trials.best_trial['result'],
          trials.best_trial['misc']['vals'])
    print('num trials: %d' % len(trials.losses()))
Пример #25
0
def main():
    """
    Performs hyperparameter optimization with hyperopt. It consists of three
    steps. First define a trials object connected to a mongo database where all 
    the results will be stored. Secondly define a stochastic search space from
    which hyperopt will sample hyperparameter configurations. Thirdly define
    the define the objective function and run the minimization function.  
    """

    
    trials = MongoTrials('mongo://localhost:1234/otto-sqrt-pca-95-5/jobs',
            exp_key='15-11-03')
    
    #Search space
    space={
        

    'dense_part':  {'num_units' : hp.quniform('DL1', 512, 2048, 512),
                    'more_layers' : 
                        {'num_units' : hp.quniform('DL2', 512, 2048, 512),
                         'more_layers' : 
                          hp.choice('MD2', [0,
                            {'num_units' : hp.quniform('DL3', 512, 2048, 512),
                             'more_layers' : 0,} #DL3
                                        ])},#DL2
                                                },#DL1                                        


    'leakiness' : hp.choice('leak', [0, 0.01, 0.15] ),


    'weight_init' : hp.choice('weight',['orto','uni']),
    'input_dropout' : hp.quniform('p_in', 0.1, 0.4, 0.1),
    
    'learning_rate': hp.choice('lr',[0.001,0.01,0.025,0.05,0.1]),
    }
    
    #Optimize
    best = fmin(objective, space=space, algo=tpe.suggest, max_evals=100, 
               trials=trials)

    print(trials.losses())
    print(best)
Пример #26
0
    def hp_parallel(self):
        trials = MongoTrials('mongo://localhost:27017/foo_db/jobs',
                             exp_key=self.task.id +
                             str(random.getrandbits(64)))
        batch_size = self.n_parallel
        best_params = fmin(fn=self.hp_objective,
                           space=self.task.hp_space,
                           algo=tpe.suggest,
                           max_evals=self.max_evals * batch_size,
                           trials=trials)
        scores = [-t['result']['loss'] for t in trials.trials]
        print("hp parallel task: %s, best: %s, params: %s" %
              (self.task.id, max(scores), best_params))

        search_path = trials.vals
        search_path['score'] = list(np.array(trials.losses()) * -1)

        return self.accumulate_max(scores, self.max_evals,
                                   batch_size), search_path
Пример #27
0
def train(air_model, train_epochs=20):
    """ Runs TPE black box optimization of the neural network to use.
  After evaluating all points, it saves the best model to disk and sets the status flag as TRAINED.
  """
    from db import get_model, save_model
    from model import ModelStatus
    info('Running training on new process')
    air_model.status = ModelStatus.TRAINING
    save_model(air_model)

    fspace = {
        'optimizer':
        hp.choice('optimzer', [
            'rmsprop', 'adagrad'
        ]),  #NEQP (Supongo que si, pero es a proposito que diga 'optimzer'?)
        'layers':
        hp.choice('layers', [(str(x), layer_choice(x))
                             for x in range(10)])  # Choose from 0 to 9 layers.
    }

    if config.DISTRIBUTED_HYPEROPT:
        # TODO: Probably not send all model from json. Just send the ids and make the worker fetch it from the DB.
        fspace['model_json'] = air_model.to_json()
        trials = MongoTrials('mongo://localhost:27017/testdb/jobs',
                             exp_key='userid.trainingid',
                             workdir='/home/paezand/pusher/bottle_air')
        best = fmin(fn=run_model_fn,
                    space=fspace,
                    trials=trials,
                    algo=tpe.suggest,
                    max_evals=train_epochs)
        # Run workers with
        # hyperopt-mongo-worker --mongo=$mongodbURL/testdb --poll-interval=0.1 --workdir=$bottle_air_dir
    else:
        trials = Trials(
        )  #NEQP (Checaste la opcion de hacer parallel search con MongoDB?)
        best = fmin(fn=air_model.run_model(),
                    space=fspace,
                    algo=tpe.suggest,
                    max_evals=train_epochs,
                    trials=trials)

    print 'best:', space_eval(fspace, best)

    print 'trials:'
    for trial in trials.trials[:2]:
        print trial

    model_fn = air_model.run_model(persist=True)
    model_fn(space_eval(fspace, best))  # Train and persist best model.

    print 'Training finished'
    air_model.status = ModelStatus.TRAINED
    air_model.best_model = best
    save_model(air_model)
Пример #28
0
def tune_parameters():

    from utils.constants import Constants

    context_name = '_context' if Constants.USE_CONTEXT else '_nocontext'

    mongo_url =\
        'mongo://localhost:1234/topicmodel_' +\
        Constants.ITEM_TYPE + context_name + '/jobs'
    trials = MongoTrials(mongo_url, exp_key='exp1')

    print('Connected to %s' % mongo_url)

    space =\
        hp.choice(Constants.USE_CONTEXT_FIELD, [
            {
                Constants.BUSINESS_TYPE_FIELD: Constants.ITEM_TYPE,
                # 'lda_alpha': hp.uniform('lda_alpha', 0, 1),
                # 'lda_beta': hp.uniform('lda_beta', 0, 2),
                Constants.CONTEXT_EXTRACTOR_EPSILON_FIELD: hp.uniform(
                    Constants.CONTEXT_EXTRACTOR_EPSILON_FIELD, 0, 0.5),
                Constants.TOPIC_MODEL_ITERATIONS_FIELD: hp.quniform(
                    Constants.TOPIC_MODEL_ITERATIONS_FIELD, 50, 500, 1),
                Constants.TOPIC_MODEL_PASSES_FIELD: hp.quniform(
                    Constants.TOPIC_MODEL_PASSES_FIELD, 1, 100, 1),
                Constants.TOPIC_MODEL_NUM_TOPICS_FIELD: hp.quniform(
                    Constants.TOPIC_MODEL_NUM_TOPICS_FIELD, 1, 1000, 1),
                # 'topic_weighting_method': hp.choice(
                #     'topic_weighting_method',
                #     ['probability', 'binary', 'all_topics']),
                Constants.USE_CONTEXT_FIELD: True
            },
        ])

    best = fmin(
        run_recommender, space=space, algo=tpe.suggest,
        max_evals=1000, trials=trials)

    print('losses', sorted(trials.losses()))
    print(
        'best', trials.best_trial['result'], trials.best_trial['misc']['vals'])
    print('num trials: %d' % len(trials.losses()))
Пример #29
0
def make_trials(host, port, exp_key, refresh=True, dbname='dbname'):
    if (host, port) == (None, None):
        trials = Trials()
    else:
        if dbname == 'dbname':
            logger.warn('You probably want to override the default dbname')
        trials = MongoTrials('mongo://%s:%d/%s/jobs' %
                             (host, int(port), dbname),
                             exp_key=exp_key,
                             refresh=refresh)
    return trials
Пример #30
0
    def create_mongo_trials(self, mongo_uri):
        jobs_col = MongoClient(mongo_uri)["netron"]["hyperopt_jobs"]
        last_job = jobs_col.find({}, {"exp_key": 1}).sort("exp_key", pymongo.DESCENDING).limit(1)
        last_job = list(last_job)
        if len(last_job) > 0:
            exp_key = int(last_job[0]["exp_key"]) + 1
        else:
            exp_key = 0
        print "Current experiment key is %s" % exp_key

        mongo_uri = mongo_uri + 'netron/hyperopt_jobs'
        return exp_key, MongoTrials(mongo_uri, exp_key=exp_key)
Пример #31
0
 def run_fmin(self,
              online=True,
              upload=True,
              objective=objective_success,
              max_evals=3,
              wrap=None,
              **kwargs):
     project = 'hyperopt-integration-test'
     if wrap == 'mongo':
         trials = MongoTrials('mongo://mongodb:27017/foo_db/jobs',
                              exp_key=str(uuid.uuid4()))
     elif wrap == 'spark':
         trials = SparkTrials()
     else:
         trials = None
     trials = SigOptTrials(project=project,
                           online=(online and upload),
                           trials=trials)
     try:
         best = fmin(objective,
                     space={
                         'x': hp.uniform('x', -10, 10),
                         'y': hp.uniform('y', -10, 10)
                     },
                     algo=tpe.suggest,
                     max_evals=max_evals,
                     trials=trials,
                     **kwargs)
     except hyperopt.exceptions.AllTrialsFailed:
         best = None
     if upload and not online:
         trials.upload()
     return trials, best
Пример #32
0
async def delete_gen(request, gen):
    if request.args.get('really', 'no') == 'yes':
        gen_trials = MongoTrials('mongo://localhost:1234/covid/jobs',
                                 f'covid-{gen}')
        gen_trials.refresh()
        gen_trials.delete_all()
        return redirect(f"/status/?refresh=true")
    return html(create_delete_prompt(f"GENERATION 'covid-{gen}'"))
Пример #33
0
def lfw_view2_randomL(host, dbname):
    trials = MongoTrials('mongo://%s:44556/%s/jobs' % (host, dbname),
            refresh=False)
    #B = main_lfw_driver(trials)
    #E = B.get_experiment(name=('random', 'foo'))
    mongo_trials = trials.view(exp_key=exp_keys['randomL'], refresh=True)

    docs = [d for d in mongo_trials.trials
            if d['result']['status'] == hyperopt.STATUS_OK]
    local_trials = hyperopt.trials_from_docs(docs)
    losses = local_trials.losses()
    best_doc = docs[np.argmin(losses)]

    #XXX: Potentially affected by the tid/injected jobs bug,
    #     but unlikely. Rerun just in case once dual svm solver is in.
    print best_doc['spec']
    namebase = '%s_randomL_%s' % (dbname, best_doc['tid'])

    get_view2_features(
            slm_desc=best_doc['spec']['model']['slm'],
            preproc=best_doc['spec']['model']['preproc'],
            comparison=best_doc['spec']['comparison'],
            namebase=namebase,
            basedir=os.getcwd(),
            )

    namebases = [namebase]
    basedirs = [os.getcwd()] * len(namebases)

    #train_view2(namebases=namebases, basedirs=basedirs)
    # running on the try2 database
    # finds id 1674
    #train err mean 0.0840740740741
    #test err mean 0.199666666667

    #running with libsvm:
    train_view2(namebases=namebases, basedirs=basedirs,
                use_libsvm={'kernel':'precomputed'})
Пример #34
0
 def __init__(self, host, port, db, config_key=None, model_name=None):
     super(ProgressTrackerMongo, self).__init__(model_name=model_name,
                                                config_key=config_key)
     self.client = MongoClient(host, port)
     try:
         self.client.admin.command('ismaster')
     except ConnectionFailure:
         print("Server not available")
         raise ConnectionFailure
     self.state['trials'] = MongoTrials('mongo://%s:%d/%s/jobs' %
                                        (host, port, db),
                                        exp_key=self.model_name)
     db = self.client[db]
     self.mongo_collection = db.results
Пример #35
0
def main(hyperparameter_search_args, tqdm=tqdm, fmin_kwargs=None):
    if fmin_kwargs is None: fmin_kwargs = {}

    search_dir = hyperparameter_search_args.search_dir
    hyperparameter_search_args.to_json_file(
        os.path.join(search_dir, HYPERPARAMETER_SEARCH_ARGS_FILENAME))

    hyperopt_space, constant_params = read_config(search_dir)

    rotation = hyperparameter_search_args.rotation
    base_dir = os.path.join(search_dir, str(rotation))
    already_existed = os.path.exists(base_dir) and len(
        os.listdir(base_dir)) > 1
    if not os.path.isdir(base_dir): os.makedirs(base_dir)

    objective = ObjectiveFntr(
        base_dir,
        rotation,
        constant_params,
        tqdm,
        single_task=hyperparameter_search_args.single_task_search,
        do_match_train_windows=hyperparameter_search_args.
        do_match_train_windows,
        do_eicu=hyperparameter_search_args.do_eicu,
    )

    algo = HP_ALGS[hyperparameter_search_args.algo]

    if hyperparameter_search_args.do_use_mongo:
        mongo_addr = '{base}/{db}/jobs'.format(
            base=hyperparameter_search_args.mongo_addr,
            db=hyperparameter_search_args.mongo_db)
        print("Parallelizing search via Mongo DB: %s" % mongo_addr)
        trials = MongoTrials(mongo_addr,
                             exp_key=hyperparameter_search_args.mongo_exp_key)
    elif already_existed:
        _, _, _, _, trials = read_or_recreate_trials(search_dir, tqdm=tqdm)
        trials = trials[str(rotation)]
    else:
        trials = Trials()

    best = fmin(objective,
                space=hyperopt_space,
                algo=algo,
                max_evals=hyperparameter_search_args.max_evals,
                trials=trials,
                **fmin_kwargs)

    return trials
Пример #36
0
def tune_parameters():

    # trials = Trials()
    from utils.constants import Constants

    context_name = '_context' if Constants.USE_CONTEXT else '_nocontext'
    cycle = '_' + str(Constants.NESTED_CROSS_VALIDATION_CYCLE)

    mongo_url =\
        'mongo://localhost:1234/' +\
        Constants.ITEM_TYPE + context_name + '_db_nested' + cycle + '/jobs'
    trials = MongoTrials(mongo_url, exp_key='exp1')

    print('Connected to %s' % mongo_url)

    params = Constants.get_properties_copy()
    params.update({
        Constants.BUSINESS_TYPE_FIELD: Constants.ITEM_TYPE,
        Constants.TOPN_NUM_ITEMS_FIELD: Constants.TOPN_NUM_ITEMS,
        Constants.NESTED_CROSS_VALIDATION_CYCLE_FIELD:
            Constants.NESTED_CROSS_VALIDATION_CYCLE,
        # 'fm_init_stdev': hp.uniform('fm_init_stdev', 0, 2),
        Constants.FM_ITERATIONS_FIELD: hp.quniform(
            Constants.FM_ITERATIONS_FIELD, 1, 500, 1),
        Constants.FM_NUM_FACTORS_FIELD: hp.quniform(
            Constants.FM_NUM_FACTORS_FIELD, 0, 200, 1),
        # 'fm_use_1way_interactions': hp.choice('fm_use_1way_interactions', [True, False]),
        # 'fm_use_bias': hp.choice('use_bias', [True, False]),
        # 'lda_alpha': hp.uniform('lda_alpha', 0, 1),
        # 'lda_beta': hp.uniform('lda_beta', 0, 2),
        # Constants.CONTEXT_EXTRACTOR_EPSILON_FIELD: hp.uniform(
        #     Constants.CONTEXT_EXTRACTOR_EPSILON_FIELD, 0, 0.5),
        # Constants.TOPIC_MODEL_ITERATIONS_FIELD: hp.quniform(
        #     Constants.TOPIC_MODEL_ITERATIONS_FIELD, 50, 500, 1),
        # Constants.TOPIC_MODEL_PASSES_FIELD: hp.quniform(
        #     Constants.TOPIC_MODEL_PASSES_FIELD, 1, 100, 1),
        # Constants.TOPIC_MODEL_NUM_TOPICS_FIELD: hp.quniform(
        #     Constants.TOPIC_MODEL_NUM_TOPICS_FIELD, 1, 1000, 1),
        # Constants.TOPIC_MODEL_NUM_TOPICS_FIELD: hp.choice(
        #     Constants.TOPIC_MODEL_NUM_TOPICS_FIELD,
        #     [10, 20, 30, 50, 75, 100, 150, 300]),
        # Constants.TOPIC_MODEL_TYPE_FIELD: hp.choice(
        #     Constants.TOPIC_MODEL_TYPE_FIELD, ['lda', 'mnf']),
        # 'topic_weighting_method': hp.choice(
        #     'topic_weighting_method',
        #     ['probability', 'binary', 'all_topics']),
        # 'use_no_context_topics_sum': hp.choice(
        #     'use_no_context_topics_sum', [True, False]),
        Constants.USE_CONTEXT_FIELD: Constants.USE_CONTEXT
    })

    space =\
        hp.choice(Constants.USE_CONTEXT_FIELD, [
            params,
        ])

    if not Constants.USE_CONTEXT:
        unwanted_args = [
            Constants.CONTEXT_EXTRACTOR_EPSILON_FIELD,
            Constants.TOPIC_MODEL_ITERATIONS_FIELD,
            Constants.TOPIC_MODEL_PASSES_FIELD,
            Constants.TOPIC_MODEL_NUM_TOPICS_FIELD
        ]

        for element in space.pos_args[1].named_args[:]:
            if element[0] in unwanted_args:
                space.pos_args[1].named_args.remove(element)

    # best = fmin(
    #     run_recommender, space=space, algo=tpe.suggest,
    #     max_evals=100, trials=trials)

    print('losses', sorted(trials.losses()))
    print(
        'best', trials.best_trial['result']['loss'],
        trials.best_trial['misc']['vals'])
    print('num trials: %d' % len(trials.losses()))
Пример #37
0
    P_hi = 0.9
    P_lo = 0.9
    V_hi = 10
    V_lo = 20
    threads = 8

    date_time_string = str(datetime.datetime.now()).split('.')[0]
    date_time_string = reduce(lambda y,z: string.replace(y,z,"_"), [date_time_string,":", " ","-"])
    log_file_prefix = def_logging_dir + "temp_" + date_time_string

    objective = make_objective(nengo_path, def_results_dir, log_file_prefix, date_time_string, D, num_neurons, neurons_per_dim,
                                num_vectors, trial_length, learning_pres, testing_pres, clean_learning,
                                learning_noise,testing_noise, num_runs, P_hi, P_lo, V_hi, V_lo, threads, dry_run)

    if use_mongo:
        trials = MongoTrials('mongo://localhost:1234/first_try/jobs', exp_key=exp_key)

        worker_call_string = \
            ["hyperopt-mongo-worker",
            "--mongo=localhost:1234/first_try",
            "--max-consecutive-failures","1",
            "--reserve-timeout", "15.0",
            "--workdir",def_results_dir,
            ]

        print worker_call_string
        workers = []
        for i in range(num_mongo_workers):
            #using Popen causes the processes to run in the background
            p = subprocess.Popen(worker_call_string)
            workers.append(p)
Пример #38
0
    def work(self):
        """
        Run a small experiment with several workers running in parallel
        using Python threads.
        """
        n_threads = self.n_threads
        jobs_per_thread = self.jobs_per_thread
        n_trials_per_exp = n_threads * jobs_per_thread
        n_trials_total = n_trials_per_exp * len(self.exp_keys)
        
        with TempMongo() as tm:
            mj = tm.mongo_jobs('foodb')
            def newth(ii):
                n_jobs = jobs_per_thread * len(self.exp_keys)
                return threading.Thread(
                        target=self.worker_thread_fn,
                        args=(('hostname', ii), n_jobs, 30.0))
            threads = map(newth, range(n_threads))
            [th.start() for th in threads]

            exp_list = []
            trials_list = []
            try:
                for key in self.exp_keys:
                    print 'running experiment'
                    trials = MongoTrials(tm.connection_string('foodb'), key)
                    assert len(trials) == 0
                    if hasattr(self, 'prep_trials'):
                        self.prep_trials(trials)
                    bandit = self.bandit
                    if self.use_stop:
                        bandit_algo = RandomStop(n_threads * jobs_per_thread,
                                                    self.bandit, cmd=self.cmd)
                        print bandit_algo
                        exp = Experiment(trials, bandit_algo, max_queue_len=1)
                        exp.run(sys.maxint, block_until_done=False)
                    else:
                        bandit_algo = Random(self.bandit, cmd=self.cmd)
                        exp = Experiment(trials, bandit_algo,
                                                       max_queue_len=10000)
                        exp.run(n_threads * jobs_per_thread,
                                 block_until_done=(len(self.exp_keys) == 1))
                    exp_list.append(exp)
                    trials_list.append(trials)
            finally:
                print 'joining worker thread...'
                [th.join() for th in threads]

            for exp in exp_list:
                exp.block_until_done()

            for trials in trials_list:
                assert trials.count_by_state_synced(JOB_STATE_DONE)\
                        == n_trials_per_exp, (trials.count_by_state_synced(JOB_STATE_DONE), n_trials_per_exp)
                assert trials.count_by_state_unsynced(JOB_STATE_DONE)\
                        == n_trials_per_exp
                assert len(trials) == n_trials_per_exp, (
                    'trials failure %d %d ' % (len(trials) , n_trials_per_exp))
                assert len(trials.results) == n_trials_per_exp, (
                    'results failure %d %d ' % (len(trials.results),
                        n_trials_per_exp))
            all_trials = MongoTrials(tm.connection_string('foodb'))
            assert len(all_trials) == n_trials_total
Пример #39
0
    while line.find('Trials:') < 0:
        line = f.readline()
    
    trials = line
    results = f.readline()
    losses = f.readline()
    statuses = f.readline()
    f.close()

    trials = trials.split('Trials: ')[1]
    exec "trials=" + trials
    return trials

if __name__=="__main__":
    trials = MongoTrials('mongo://localhost:1234/first_try/jobs', exp_key='big_run3')

    x_name = 'test_bias'
    y_name = 'learn_bias'
    z_name = 'learning_rate'

    x = trials.vals[x_name]
    y = trials.vals[y_name]
    z = trials.vals[z_name]
    w = trials.losses()

    indices = filter(lambda x: w[x] < 1, range(len(w)))
    x = [x[i] for i in indices]
    y = [y[i] for i in indices]
    z = [z[i] for i in indices]
    w = [w[i] for i in indices]