Exemplo n.º 1
0
def get_cnn_model(model_num, search_space):
    space = cnn_space(search_space)
    hparams = {'model_' + model_num: 'CNN',
            'word_vectors_' + model_num: ('word2vec', True),
            'delta_' + model_num: True,
            'flex_' + model_num: (True, .15),
            'filters_' + model_num: hp.quniform('filters_' + model_num, *space['filters_'], 1),
            'kernel_size_' + model_num: hp.quniform('kernel_size_' + model_num, *space['kernel_size_'], 1),
            'kernel_increment_' + model_num: hp.quniform('kernel_increment_' + model_num, *space['kernel_increment_'], 1),
            'kernel_num_' + model_num: hp.quniform('kernel_num_' + model_num, *space['kernel_num_'], 1),
            'dropout_' + model_num: hp.uniform('dropout_' + model_num, *space['dropout_']),
            'batch_size_' + model_num: hp.quniform('batch_size_' + model_num, *space['batch_size_'], 1),
            'activation_fn_' + model_num: hp.choice('activation_fn_' + model_num, space['activation_fn_'])}

    if space['no_reg']:
        hparams['regularizer_cnn_' + model_num] = hp.choice('regularizer_cnn_' + model_num, [
                (None, 0.0),
                ('l2', hp.uniform('l2_strength_cnn_' + model_num, *space['l2_'])),
                ('l2_clip', hp.uniform('l2_clip_norm_' + model_num, *space['l2_clip_']))
            ])

    else:
        hparams['regularizer_cnn_' + model_num] = hp.choice('regularizer_cnn_' + model_num, [
                ('l2', hp.uniform('l2_strength_cnn_' + model_num, *space['l2_'])),
                ('l2_clip', hp.uniform('l2_clip_norm_' + model_num, *space['l2_clip_']))
            ])

    if space['search_lr']:
        hparams['learning_rate_' + model_num] = hp.lognormal('learning_rate_' + model_num, 0, 1) / 3000
    else:
        hparams['learning_rate_' + model_num] = .0003
Exemplo n.º 2
0
def construct_subspace(module, pick):
    rescaling = module.rescaling_list
    rescaling_sublist = []
    for i in pick[0]:
        rescaling_sublist.append(rescaling[i])
    rescaling = hp.choice('rescaling', rescaling_sublist)

    balancing = module.balancing_list
    balancing_sublist = []
    for i in pick[1]:
        balancing_sublist.append(balancing[i])
    balancing = hp.choice('balancing', balancing_sublist)

    fp = module.feat_pre_list
    feat_pre_sublist = []
    for i in pick[2]:
        feat_pre_sublist.append(fp[i])
    feat_pre = hp.choice('feat_pre', feat_pre_sublist)

    clf = module.classifier_list
    classifier_sublist = []
    for i in pick[3]:
        classifier_sublist.append(clf[i])
    classifier = hp.choice('classifier', classifier_sublist)

    subspace = {
        'rescaling': rescaling,
        'balancing': balancing,
        'feat_pre': feat_pre,
        'classifier': classifier}

    return subspace
Exemplo n.º 3
0
def lr_bayes_search(train_fname, test_fname, out_fname_prefix='sk-svc-bayes'):
    exp = ExperimentL1(train_fname=train_fname, test_fname=test_fname)

    param_keys = ['model_type', 'C',
                  #'loss',
                  'penalty', 'tol', 'solver', 'class_weight',
                  'random_state']

    param_space = {'model_type': LogisticRegression, 'C': hp.uniform('c', 0.1, 3),
                   #'loss': hp.choice('loss', ['hinge', 'squared_hinge']),
                   #'penalty': hp.choice('pen', ['l1', 'l2']),
                   'penalty': 'l2',
                   'tol': hp.uniform('tol', 1e-6, 3e-4),
                   'solver': hp.choice('solver', ['liblinear', 'lbfgs','newton-cg']),
                   'class_weight': hp.choice('cls_w', [None, 'auto']),
                   'random_state': hp.choice('seed', [1234, 53454, 6676, 12893]),
                   #'n_jobs': 2
                   }

    bs = param_search.BayesSearch(SklearnModel, exp, param_keys, param_space,
                                  cv_out=out_fname_prefix+'-scores.pkl',
                                  cv_pred_out=out_fname_prefix+'-preds.pkl',
                                  refit_pred_out=out_fname_prefix+'-refit-preds.pkl',
                                  dump_round=1)
    best = bs.search_by_cv(max_evals=60)
    param_search.write_cv_res_csv(bs.cv_out, bs.cv_out.replace('.pkl', '.csv'))
    return best
Exemplo n.º 4
0
def decision_tree(name,
                  criterion=None,
                  splitter=None,
                  max_features=None,
                  max_depth=None,
                  min_samples_split=None,
                  min_samples_leaf=None,
                  presort=False,
                  random_state=None):

    def _name(msg):
        return '%s.%s_%s' % (name, 'sgd', msg)

    rval = scope.sklearn_DecisionTreeClassifier(
        criterion=hp.choice(
            _name('criterion'),
            ['gini', 'entropy']) if criterion is None else criterion,
        splitter=hp.choice(
            _name('splitter'),
            ['best', 'random']) if splitter is None else splitter,
        max_features=hp.choice(
            _name('max_features'),
            ['sqrt', 'log2',
             None]) if max_features is None else max_features,
        max_depth=max_depth,
        min_samples_split=hp.quniform(
            _name('min_samples_split'),
            1, 10, 1) if min_samples_split is None else min_samples_split,
        min_samples_leaf=hp.quniform(
            _name('min_samples_leaf'),
            1, 5, 1) if min_samples_leaf is None else min_samples_leaf,
        presort=presort, 
        random_state=_random_state(_name('rstate'), random_state),
        )
    return rval
Exemplo n.º 5
0
    def set_finetune_space(self, config_file):
        ''' Given the original deep net architecture, and a set of pretrained weights
        and biases, define the configuration space to search for fintuning parameters '''

        # we know these fields won't change, so go ahead and set them as
        # defaults now
        model_params = nt.get_model_params(config_file)
        optim_params = nt.get_optim_params(config_file)
        default_finetune_model_params = {k: model_params[k] for k in ('num_hids', 'activs', 'd', 'k')}
        default_finetune_model_params['loss_terms'] = ['cross_entropy']
        default_finetune_optim_params = {k: optim_params[k] for k in ('optim_method', 'optim_type')}

        # define the space of hyperparameters we wish to
        search_finetune_model_params = {'l1_reg': hp.choice('l1_reg', [None, hp.loguniform('l1_decay', log(1e-5), log(10))]),
                                        'l2_reg': hp.choice('l2_reg', [None, hp.loguniform('l2_decay', log(1e-5), log(10))])}
        search_finetune_optim_params = {'learn_rate': hp.uniform('learn_rate', 0, 1),
                                        'rho': hp.uniform('rho', 0, 1),
                                        'num_epochs': hp.qloguniform('num_epochs', log(10), log(5e3), 1),
                                        'batch_size': hp.quniform('batch_size', 128, 1024, 1),
                                        'init_method': hp.choice('init_method', ['gauss', 'fan-io']),
                                        'scale_factor': hp.uniform('scale_factor', 0, 1)}

        # combine the default and search parameters into a dictionary to define the
        # full space - this is what will be passed into the objective function
        finetune_model_params = self.merge_default_search(
            default_finetune_model_params, search_finetune_model_params)
        finetune_optim_params = self.merge_default_search(
            default_finetune_optim_params, search_finetune_optim_params)

        finetune_hyperspace = {
            'finetune_model_params': finetune_model_params, 'finetune_optim_params': finetune_optim_params}

        return finetune_hyperspace
Exemplo n.º 6
0
 def test_basic2(self):
     space = hp.choice('normal_choice', [
         hp.pchoice('fsd',
             [(.1, 'first'),
              (.8, 'second'),
              (.1, 2)]),
         hp.choice('something_else', [10, 20])
     ])
     a, b, c = 0, 0, 0
     rng=np.random.RandomState(123)
     for i in range(0, 1000):
         nesto = hyperopt.pyll.stochastic.sample(space, rng=rng)
         if nesto == 'first':
             a += 1
         elif nesto == 'second':
             b += 1
         elif nesto == 2:
             c += 1
         elif nesto in (10, 20):
             pass
         else:
             assert 0, nesto
     print((a, b, c))
     assert b > 2 * a
     assert b > 2 * c
Exemplo n.º 7
0
def nn_bayes_search(train_fname, test_fname, out_fname_prefix='nn-bayes'):
    exp = ExperimentL1(train_fname=train_fname, test_fname=test_fname)
    param_keys = ['in_size', 'hid_size', 'batch_size', 'in_dropout',
                  'hid_dropout', 'nonlinearity',
                  'updates',
                  'learning_rate',
                  #'l1_reg',
                  #'l2_reg',
                  'num_epochs']
    param_space = {'in_size': exp.train_x.shape[1],
                   'hid_size': hp.quniform('hid', 10, 300, 5),
                   'batch_size': hp.quniform('bsize', 200, 5000, 50),
                   'in_dropout': hp.uniform('in_drop',  0.0, 0.5),
                   'hid_dropout': hp.uniform('hid_drop',  0.0, 0.6),
                   'updates': hp.choice('updates', [nesterov_momentum, adam]),
                   'nonlinearity': hp.choice('nonlinear',  [sigmoid, tanh, rectify]),
                   'learning_rate': hp.uniform('lr', 0.0001, 0.1),

                   #'learning_rate': 0.01,
                   #'l1_reg': hp.uniform('l1_reg', 0.0, 0.000001),
                   #'l2_reg': hp.uniform('l2_reg', 0.0, 0.000001),
                   'num_epochs': hp.quniform('epochs', 200, 1000, 50),
                   }

    bs = param_search.BayesSearch(LasagneModel, exp, model_param_keys=param_keys, model_param_space=param_space,
                     cv_out=out_fname_prefix+'-scores.pkl',
                     cv_pred_out=out_fname_prefix+'-preds.pkl',
                     refit_pred_out=out_fname_prefix+'-refit-preds.pkl',
                     dump_round=1, use_lower=0, n_folds=5)
    bs.search_by_cv(max_evals=301)
    param_search.write_cv_res_csv(bs.cv_out, bs.cv_out.replace('.pkl', '.csv'))
Exemplo n.º 8
0
        def get_space():
            wemb_dict = {'wemb_dim': hp.choice('wemb_dim', hyperopt_params['wembdim']),
                         'wemb_init': hp.choice('wemb_init', hyperopt_params['wembinit']),
                         'wemb_dropout': hp.choice('wemb_dropout', hyperopt_params['wembdropout']),
                         'optimizer': hp.choice('optimizer', hyperopt_params['optimizer']),}
            model_dict = get_space_func()

            return dict(wemb_dict, **model_dict)
Exemplo n.º 9
0
def get_linear_model_params(name="linear_common"):
    return scope.get_lr_model(
        C=hp.loguniform(get_full_name(name, 'C'), -15, 15),
        penalty=hp.choice(get_full_name(name, 'penalty'), ('l1', 'l2')),
        class_weight=hp.choice(get_full_name(name, 'class_weight'), (defaultdict(lambda: 1.0), 'balanced')),
        fit_intercept=hp.choice(get_full_name(name, 'fit_intercept'), (False, True)),
        random_state=RANDOM_STATE,
        solver='liblinear',
    )
Exemplo n.º 10
0
def random_forest(name,
                  n_estimators=None,
                  criterion=None,
                  max_features=None,
                  max_depth=None,
                  min_samples_split=None,
                  min_samples_leaf=None,
                  bootstrap=None,
                  oob_score=None,
                  n_jobs=1,
                  random_state=None,
                  verbose=False):

    def _name(msg):
        return '%s.%s_%s' % (name, 'random_forest', msg)

    """
    Out of bag estimation only available if bootstrap=True
    """

    bootstrap_oob = hp.choice(_name('bootstrap_oob'),
                              [(True, True),
                               (True, False),
                               (False, False)])

    rval = scope.sklearn_RandomForestClassifier(
        n_estimators=scope.int(hp.quniform(
            _name('n_estimators'),
            1, 50, 1)) if n_estimators is None else n_estimators,
        criterion=hp.choice(
            _name('criterion'),
            ['gini', 'entropy']) if criterion is None else criterion,
        max_features=hp.choice(
            _name('max_features'),
            ['sqrt', 'log2',
             None]) if max_features is None else max_features,
        max_depth=max_depth,
        min_samples_split=hp.quniform(
            _name('min_samples_split'),
            1, 10, 1) if min_samples_split is None else min_samples_split,
        min_samples_leaf=hp.quniform(
            _name('min_samples_leaf'),
            1, 5, 1) if min_samples_leaf is None else min_samples_leaf,
        bootstrap=bootstrap_oob[0] if bootstrap is None else bootstrap,
        oob_score=bootstrap_oob[1] if oob_score is None else oob_score,
        #bootstrap=hp.choice(
        #    _name('bootstrap'),
        #    [ True, False ] ) if bootstrap is None else bootstrap,
        #oob_score=hp.choice(
        #    _name('oob_score'),
        #    [ True, False ] ) if oob_score is None else oob_score,
        n_jobs=n_jobs,
        random_state=_random_state(_name('rstate'), random_state),
        verbose=verbose,
        )
    return rval
Exemplo n.º 11
0
    def dense_space(self, input_shape = None, output_dim = None, activation = None):
        unq = "|" + str(random.randint(1, 99999))

        return  {
            'class_name': 'Dense',
            'config': {
                'output_dim': hp.choice("output_dim" + unq, [32, 64, 128, 256, 512]),
                'activation': hp.choice("activation" + unq, ["linear", "relu", "softmax"])
             }
        }
Exemplo n.º 12
0
def main():
    client = Client()
    print 'n. clients: ', len(client)

    digits = load_digits()

    X = MinMaxScaler().fit_transform(digits.data)
    y = digits.target

    pre_processing = hp.choice('preproc_algo', [
        scope.PCA(
            n_components=1 + hp.qlognormal(
                'pca_n_comp', np.log(10), np.log(10), 1),
            whiten=hp.choice(
                'pca_whiten', [False, True])),
        scope.GMM(
            n_components=1 + hp.qlognormal(
                'gmm_n_comp', np.log(100), np.log(10), 1),
            covariance_type=hp.choice(
                'gmm_covtype', ['spherical', 'tied', 'diag', 'full'])),
        ])

    classifier = hp.choice('classifier', [
        scope.DecisionTreeClassifier(
            criterion=hp.choice('dtree_criterion', ['gini', 'entropy']),
            max_features=hp.uniform('dtree_max_features', 0, 1),
            max_depth=hp.quniform('dtree_max_depth', 1, 25, 1)),
        scope.SVC(
            C=hp.lognormal('svc_rbf_C', 0, 3),
            kernel='rbf',
            gamma=hp.lognormal('svc_rbf_gamma', 0, 2),
            tol=hp.lognormal('svc_rbf_tol', np.log(1e-3), 1)),
        ])

    sklearn_space = {'pre_processing': pre_processing,
                     'classifier': classifier}

    digits_cv_split_filenames = mmap_utils.persist_cv_splits(
                X, y, name='digits_10', n_cv_iter=10)

    mmap_utils.warm_mmap_on_cv_splits(client, digits_cv_split_filenames)

    trials = hyperselect.IPythonTrials(client)
    trials.fmin(
        partial(compute_evaluation,
            cv_split_filename=digits_cv_split_filenames[0],
            ),
        sklearn_space,
        algo=hyperopt.tpe.suggest,
        max_evals=30,
        verbose=1,
        )
    trials.wait()
    print trials.best_trial
Exemplo n.º 13
0
def add_MIP():
    space["features"]["personal"] = hp.choice(
        "personal",
        [
            {"use": False},
            {
                "use": True,
                "per_binarize": hp.choice("per_binarize", ["True", "False"]),
                "per_min_doc_threshold": hp.choice("per_min_doc_threshold", [1, 2, 3, 4, 5]),
            },
        ],
    )
Exemplo n.º 14
0
def add_mccain():
    space["features"]["mccain"] = hp.choice(
        "mccain",
        [
            {"use": False},
            {
                "use": True,
                "om_binarize": hp.choice("om_binarize", ["True", "False"]),
                "om_min_doc_threshold": hp.choice("om_min_doc_threshold", [1, 2, 3, 4, 5]),
            },
        ],
    )
Exemplo n.º 15
0
def add_obama():
    space["features"]["obama"] = hp.choice(
        "obama",
        [
            {"use": False},
            {
                "use": True,
                "ob_binarize": hp.choice("ob_binarize", ["True", "False"]),
                "ob_min_doc_threshold": hp.choice("ob_min_doc_threshold", [1, 2, 3, 4, 5]),
            },
        ],
    )
Exemplo n.º 16
0
def helper_naive_type():
    return hp.choice('naive_subtype', [
        {'ktype': 'gaussian'},
        {'ktype': 'multinomial', 'alpha': hp.lognormal('alpha_mult', 0, 1),
         'fit_prior': hp.choice('bool_mult', [False, True])},
        {'ktype': 'bernoulli', 'alpha': hp.lognormal('alpha_ber', 0, 1),
         'fit_prior': hp.choice('bool_ber', [False, True]),
         'binarize': hp.choice('binarize_or_not',
                               [
                                   .0,
                                   hp.lognormal('threshold', 0, 1)
                               ])}
    ])
Exemplo n.º 17
0
    def set_multilayer_dropout_space(self):
        ''' defines a hyperspace for a "modern" neural networks: at least two layers with dropout + reLU '''

        # Force at least 2 layers, cuz we're modern
        min_layers = 2
        max_layers = 3

        # sets up the neural network
        nnets = [None] * (max_layers - min_layers + 1)

        for i, num_layers in enumerate(range(min_layers, max_layers + 1)):
            num_hids = [None] * num_layers
            for j in range(num_layers):
                num_hids[j] = hp.qloguniform(
                    'num_hid_%i%i' % (i, j), log(100), log(1000), 1)

            nnets[i] = num_hids

        default_mln_model_params = {
            'd': self.d, 'k': self.k, 'loss_terms': ['cross_entropy', 'dropout']}

        search_mln_model_params = {
            'arch': hp.choice('arch', nnets),
            'input_p': hp.uniform('ip', 0, 1),
            'hidden_p': hp.uniform('hp', 0, 1),
            'l1_reg': hp.choice('l1_reg', [None, hp.loguniform('l1_decay', log(1e-5), log(10))]),
            'l2_reg': hp.choice('l2_reg', [None, hp.loguniform('l2_decay', log(1e-5), log(10))])}

        default_mln_optim_params = {
            'optim_type': 'minibatch', 'optim_method': 'RMSPROP'}

        search_mln_optim_params = {
            'learn_rate': hp.uniform('learn_rate', 0, 1),
            'rho': hp.uniform('rho', 0, 1),
            'num_epochs': hp.qloguniform('num_epochs', log(1e2), log(2000), 1),
            'batch_size': hp.quniform('batch_size', 128, 1024, 1),
            'init_method': hp.choice('init_method', ['gauss', 'fan-io']),
            'scale_factor': hp.uniform('scale_factor', 0, 1)}

        # merge the default and search spaces
        mln_model_params = self.merge_default_search(
            default_mln_model_params, search_mln_model_params)
        mln_optim_params = self.merge_default_search(
            default_mln_optim_params, search_mln_optim_params)

        # define the hyperparamater space to search
        hyperspace = {'mln_model_params': mln_model_params,
                      'mln_optim_params': mln_optim_params}

        return hyperspace
Exemplo n.º 18
0
def add_MOLD():
    space["features"]["like-dislike"] = hp.choice(
        "like-dislike",
        [
            {"use": False},
            {
                "use": True,
                "ld_binarize": hp.choice("ld_binarize", ["True", "False"]),
                "ld_min_doc_threshold": hp.choice("ld_min_doc_threshold", [1, 2, 3, 4, 5]),
            },
        ],
    )
    add_obama()
    add_mccain()
Exemplo n.º 19
0
def test_expr_to_config():

    z = hp.randint('z', 10)
    a = hp.choice('a',
                  [
                      hp.uniform('b', -1, 1) + z,
                      {'c': 1, 'd': hp.choice('d',
                                              [3 + hp.loguniform('c', 0, 1),
                                               1 + hp.loguniform('e', 0, 1)])
                      }])

    expr = as_apply((a, z))

    hps = {}
    expr_to_config(expr, (True,), hps)

    for label, dct in hps.items():
        print label
        print '  dist: %s(%s)' % (
            dct['node'].name,
            ', '.join(map(str, [ii.eval() for ii in dct['node'].inputs()])))
        if len(dct['conditions']) > 1:
            print '  conditions (OR):'
            for condseq in dct['conditions']:
                print '    ', ' AND '.join(map(str, condseq))
        elif dct['conditions']:
            for condseq in dct['conditions']:
                print '  conditions :', ' AND '.join(map(str, condseq))


    assert hps['a']['node'].name == 'randint'
    assert hps['b']['node'].name == 'uniform'
    assert hps['c']['node'].name == 'loguniform'
    assert hps['d']['node'].name == 'randint'
    assert hps['e']['node'].name == 'loguniform'
    assert hps['z']['node'].name == 'randint'

    assert set([(True, EQ('a', 0))]) == set([(True, EQ('a', 0))])
    assert hps['a']['conditions'] == set([(True,)])
    assert hps['b']['conditions'] == set([
        (True, EQ('a', 0))]), hps['b']['conditions']
    assert hps['c']['conditions'] == set([
        (True, EQ('a', 1), EQ('d', 0))])
    assert hps['d']['conditions'] == set([
        (True, EQ('a', 1))])
    assert hps['e']['conditions'] == set([
        (True, EQ('a', 1), EQ('d', 1))])
    assert hps['z']['conditions'] == set([
        (True,),
        (True, EQ('a', 0))])
Exemplo n.º 20
0
 def get_hp_space():
     space_training = {'batch_size': hopt_wrapper.quniform_int('batch_size', 50, 500, 1),
                       'temporal_order': hopt_wrapper.qloguniform_int('temporal_order', log(3), log(20), 1)
                       }
     space_regularization = {'dropout_probability': hp.choice('dropout', [
         0.0,
         hp.normal('dropout_probability', 0.5, 0.1)
     ]),
         'weight_decay_coeff': hp.choice('weight_decay_coeff', [
             0.0,
             hp.uniform('a', 1e-4, 1e-4)
         ])
     }
     space_training.update(space_regularization)
     return space_training
Exemplo n.º 21
0
def xgb_parameter_search():
    from hyperopt import fmin, tpe, hp
    from kagura.xgbwrapper import XGBWrapper

    xs = load("xs")
    ys = load("ys")

    if args.tiny:
        tmp, xs, tmp, ys = stratified_split(xs, ys)

    train_xs, test_xs, train_ys, test_ys = stratified_split(xs, ys)

    def target_func((eta, max_depth, subsample, colsample_bytree)):
        global model
        model = XGBWrapper(
            eta=eta, max_depth=max_depth, test=(test_xs, test_ys),
            subsample=subsample, colsample_bytree=colsample_bytree,
            num_class=10
        )

        model.fit(train_xs, train_ys)
        log_loss = model.score(test_xs, test_ys)
        logging.info(
            "hyperopt eta=%f,max_depth=%d,subsample=%f"
            ",colsample_bytree=%f,log_loss=%f,best_iteration=%d",
            eta, max_depth, subsample, colsample_bytree,
            log_loss, model.bst.best_iteration)

        name = 'xgb_%f_%d_%f_%f_%f' % (eta, max_depth, subsample, colsample_bytree, log_loss)
        model.bst.save_model(name)
        return log_loss

    default_space = [
             hp.uniform('eta', 0, 1),
             hp.choice('max_depth', [4, 5, 6, 7, 8, 9]),
             hp.uniform('subsample', 0.4, 1),
             hp.uniform('colsample_bytree', 0.4, 1)]
    narrow_space = [
             hp.uniform('eta', 0.1, 0.4),
             hp.choice('max_depth', [5, 6]),
             hp.uniform('subsample', 0.8, 1),
             hp.uniform('colsample_bytree', 0.8, 1)]
    fmin(fn=target_func,
         space=narrow_space,
         algo=tpe.suggest,
         max_evals=10000)

    return
Exemplo n.º 22
0
def subspace_to_tpe(label, subspace, escape_char_depth = "/", escape_char_choice = "@"):
    """
        Recursively convert the search space defined by dicts, lists and Parameters
        into a TPE equivalent search space.

        label: The label for the current subspace.
        subspace: The subspace of the global search space.
        escape_char_depth: The escape char for encoding the tree path into the parameter label.
        escape_char_choice: The escape char for encoding the tree path into the parameter label.
    """
    if isinstance(subspace, dict):
        converted_space = {}
        for item_name, item in subspace.iteritems():
            nested_label = encode_tree_path(label, escape_char_depth, item_name)
            converted_item = subspace_to_tpe(nested_label,
                                             item,
                                             escape_char_depth,
                                             escape_char_choice)
            converted_space[nested_label] = converted_item
        return converted_space
    if isinstance(subspace, list):
        items = []
        for item in subspace:
            assert("type" in item)
            item_type = item["type"]
            item_label = encode_tree_path(label, escape_char_choice, item_type)
            items.append(subspace_to_tpe(item_label,
                                         item,
                                         escape_char_depth,
                                         escape_char_choice))
        return hp.choice(label, items)
    if isinstance(subspace, Parameter):
        return parameter_to_tpe(label, subspace)
    else:
        return subspace
Exemplo n.º 23
0
def any_sparse_classifier(name):
    return hp.choice('%s' % name, [
        svc(name + '.svc'),
        sgd(name + '.sgd'),
        knn(name + '.knn', sparse_data=True),
        multinomial_nb(name + '.multinomial_nb')
        ])
Exemplo n.º 24
0
    def string_to_pyll(self):
        line = shlex.split(self.command)

        algorithms = ['sgd']
        for arg in line:
            arg, value = arg.split('=')
            if arg == '--algorithms':
                algorithms = set(self.range_pattern.findall(value)[0].split(','))
                if tuple(self.distr_pattern.findall(value)) not in {(), ('O',)}:
                    raise ValueError(("Distribution options are prohibited for --algorithms flag. "
                                      "Simply list the algorithms instead (like --algorithms=ftrl,sgd)"))
                elif self.distr_pattern.findall(value) == ['O']:
                    algorithms.add('sgd')

                for algo in algorithms:
                    if algo not in self.algorithm_metadata:
                        raise NotImplementedError(("%s algorithm is not found. "
                                                   "Supported algorithms by now are %s")
                                                  % (algo, str(self.algorithm_metadata.keys())))
                break

        self.space = {algo: {'type': algo, 'argument': self.algorithm_metadata[algo]['arg']} for algo in algorithms}
        for algo in algorithms:
            for arg in line:
                arg, value = arg.split('=')
                if arg == '--algorithms':
                    continue
                if arg not in self.algorithm_metadata[algo]['prohibited_flags']:
                    distrib = self._process_vw_argument(arg, value, algo)
                    self.space[algo][arg] = distrib
                else:
                    pass
        self.space = hp.choice('algorithm', self.space.values())
Exemplo n.º 25
0
    def conv_model_space(self, n_layers, data_filename):
        layers = [hp.choice('l0', [self.first_layer(self.dense_space()), self.first_layer(self.convolution2d_space())])]
        for i in range(n_layers-2):
            layers.append(hp.choice("l%d" % (i+1), self.conv_layers_spaces()))

        layers.append(hp.choice("l%d" % (n_layers - 1), [self.last_layer(self.dense_space()), self.last_layer(self.convolution2d_space())]))

        # Full model search space for layer size `n_layers`
        return {
            'config': layers,
            'loss': 'categorical_crossentropy',
            'class_name': 'Sequential',
            'class_mode': 'categorical',
            #'optimizer': hp.choice("optimizer", [self.sgd_space()]),
            'data_filename': data_filename
        }
Exemplo n.º 26
0
def optimize(trials, X, y, max_evals):
    space = {
        'n_estimators': hp.quniform('n_estimators', 100, 500, 50),
        'criterion': hp.choice('criterion', ['gini', 'entropy']),
        'max_depth': hp.quniform('max_depth', 1, 7, 1),
        'min_samples_split': hp.quniform('min_samples_split', 1, 9, 2),
        'min_samples_leaf': hp.quniform('min_samples_leaf', 1, 5, 1),
        'bootstrap': True,
        'oob_score': True,
        'n_jobs': -1
    }
    s = Score(X, y)
    best = fmin(s.get_score,
                space,
                algo=tpe.suggest,
                trials=trials,
                max_evals=max_evals
                )
    best['n_estimators'] = int(best['n_estimators'])
    best['max_depth'] = int(best['max_depth'])
    best['min_samples_split'] = int(best['min_samples_split'])
    best['min_samples_leaf'] = int(best['min_samples_leaf'])
    best['n_estimators'] = int(best['n_estimators'])
    best['criterion'] = ['gini', 'entropy'][best['criterion']]
    best['bootstrap'] = True
    best['oob_score'] = True
    best['n_jobs'] = -1
    del s
    return best
Exemplo n.º 27
0
def xgb_model_stacking(exp_l2, out_fname_prefix, use_lower=0):
    from xgboost.sklearn import XGBClassifier
    param_keys = ['model_type', 'max_depth', 'min_child_weight', 'subsample', 'colsample_bytree',
                  'learning_rate', 'silent', 'objective', 'nthread', 'n_estimators', 'seed']
    param_space = {'model_type': XGBClassifier, 'max_depth': hp.quniform('max_depth', 2, 9, 1),
                   'min_child_weight': hp.quniform('min_child_weight', 1, 7, 1),
                   'subsample': hp.uniform('subsample', 0.1, 1.0),
                   'colsample_bytree': hp.uniform('colsample', 0.3, 1.0),
                   'learning_rate': hp.uniform('eta', 0.01, 0.02),
                   'silent': 1, 'objective': 'binary:logistic',
                   'nthread': 3, 'n_estimators': hp.quniform('n', 100, 1000, 50),
                   'seed': hp.choice('seed', [1234,53454,6676,12893])}
    # param_space = {'model_type': XGBClassifier, 'max_depth': hp.quniform('max_depth', 3, 9, 1),
    #                'min_child_weight': hp.quniform('min_child_weight', 3, 7, 1),
    #                'subsample': hp.uniform('subsample', 0.1, 1.0),
    #                'colsample_bytree': hp.uniform('colsample', 0.1, 0.6),
    #                'learning_rate': hp.uniform('eta', 0.01, 0.02),
    #                'silent': 1, 'objective': 'binary:logistic',
    #                'nthread': 4, 'n_estimators': 600, 'seed': hp.choice('seed', [1234,53454,6676,12893])}
    # l2 model output
    bs = param_search.BayesSearch(SklearnModel, exp_l2, param_keys, param_space,
                                  cv_out=out_fname_prefix+'-scores.pkl',
                                  cv_pred_out=out_fname_prefix+'-preds.pkl',
                                  refit_pred_out=out_fname_prefix+'-refit-preds.pkl',
                                  dump_round=10, use_lower=use_lower)
    best = bs.search_by_cv()
    param_search.write_cv_res_csv(bs.cv_out, bs.cv_out.replace('.pkl', '.csv'))
    return best
Exemplo n.º 28
0
    def set_lambda(self, max_evals=10, max_iters=100, n_folds=3, max_lambda=10):
        self.cv_indices = KFold(self.X.shape[0], n_folds=n_folds, shuffle=True)
        self.cross_max_iters = max_iters
        space = hp.choice("model", [{"lambda_1": hp.uniform("lambda_1", 0, max_lambda)}])

        best = fmin(self.__cross_validation, space=space, algo=tpe.suggest, max_evals=max_evals)
        self.lambda_1 = best["lambda_1"]
Exemplo n.º 29
0
def test_landing_screen():

    # define an objective function
    def objective(args):
        case, val = args
        if case == 'case 1':
            return val
        else:
            return val ** 2

    # define a search space
    from hyperopt import hp
    space = hp.choice('a',
        [
            ('case 1', 1 + hp.lognormal('c1', 0, 1)),
            ('case 2', hp.uniform('c2', -10, 10))
        ])

    # minimize the objective over the space
    import hyperopt
    best = hyperopt.fmin(objective, space,
        algo=hyperopt.tpe.suggest,
        max_evals=100)

    print best
    # -> {'a': 1, 'c2': 0.01420615366247227}

    print hyperopt.space_eval(space, best)
Exemplo n.º 30
0
def helper_neighbors():
    return hp.choice('neighbor_type', [
        {'ktype': 'kneighbors', 'n_neighbors': hp.quniform('num', 3,
                                                           19, 1)},
        {'ktype': 'radiusneighbors', 'radius': hp.uniform('rad', 0, 2),
         'out_label': 1}
    ])
Exemplo n.º 31
0
def choice(label, options):
    return hp.choice(label, options)
Exemplo n.º 32
0
 def getOptimizedHyperParametersRange(self):
     optimizedHyperParametersRange = {
         "kernelName": hp.choice("kernelName", ['epa', 'cos', 'gau', 'par']),
     }
     return optimizedHyperParametersRange
    print(config)

    for step in range(config["steps"]):
        # Iterative training function - can be any arbitrary training procedure
        intermediate_score = evaluation_fn(step, width, height, mult)
        # Feed the score back back to Tune.
        tune.report(iterations=step, mean_loss=intermediate_score)
        time.sleep(0.1)


config_space = {
    "activation": hp.choice("activation", [
        {
            "activation": "relu",
            "mult": hp.uniform("mult", 1, 2)
        },
        {
            "activation": "tanh"
        },
    ]),
    "width": hp.uniform("width", 0, 20),
    "height": hp.uniform("heright", -100, 100),
    "steps": 100
}


def run_hyperopt_tune(config_dict=config_space, smoke_test=False):
    algo = HyperOptSearch(space=config_dict, metric="mean_loss", mode="min")
    algo = ConcurrencyLimiter(algo, max_concurrent=4)
    scheduler = AsyncHyperBandScheduler()
    analysis = tune.run(
Exemplo n.º 34
0
    def _config_tuning_space(tuning_space_raw):
        if tuning_space_raw is None:
            return None

        hyper_obj = {}
        if "learning_rate" in tuning_space_raw:
            hyper_obj = {
                **hyper_obj,
                **{
                    "learning_rate":
                    hp.loguniform(
                        'learning_rate',
                        np.log(tuning_space_raw['learning_rate']['min']),
                        np.log(tuning_space_raw['learning_rate']['max']))
                }
            }
        if "hidden_size" in tuning_space_raw:
            hyper_obj = {
                **hyper_obj,
                **{
                    "hidden_size":
                    scope.int(
                        hp.qloguniform(
                            'hidden_size',
                            np.log(tuning_space_raw['hidden_size']['min']),
                            np.log(tuning_space_raw['hidden_size']['max']), 1))
                }
            }
        if "ent_hidden_size" in tuning_space_raw:
            hyper_obj = {
                **hyper_obj,
                **{
                    "ent_hidden_size":
                    scope.int(
                        hp.qloguniform(
                            "ent_hidden_size",
                            np.log(tuning_space_raw['ent_hidden_size']['min']),
                            np.log(tuning_space_raw['ent_hidden_size']['max']), 1))
                }
            }
        if "rel_hidden_size" in tuning_space_raw:
            hyper_obj = {
                **hyper_obj,
                **{
                    "rel_hidden_size":
                    scope.int(
                        hp.qloguniform(
                            "rel_hidden_size",
                            np.log(tuning_space_raw['rel_hidden_size']['min']),
                            np.log(tuning_space_raw['rel_hidden_size']['max']), 1))
                }
            }
        if "batch_size" in tuning_space_raw:
            hyper_obj = {
                **hyper_obj,
                **{
                    "batch_size":
                    scope.int(
                        hp.qloguniform(
                            "batch_size",
                            np.log(tuning_space_raw['batch_size']['min']),
                            np.log(tuning_space_raw['batch_size']['max']), 1))
                }
            }
        if "margin" in tuning_space_raw:
            hyper_obj = {
                **hyper_obj,
                **{
                    "margin":
                    hp.uniform("margin", tuning_space_raw["margin"]["min"], tuning_space_raw["margin"]["max"])
                }
            }
        if "lmbda" in tuning_space_raw:
            hyper_obj = {
                **hyper_obj,
                **{
                    "lmbda":
                    hp.loguniform('lmbda',
                                  np.log(tuning_space_raw["lmbda"]["min"]),
                                  np.log(tuning_space_raw["lmbda"]["max"]))
                }
            }
        if "distance_measure" in tuning_space_raw:
            hyper_obj = {
                **hyper_obj,
                **{
                    "distance_measure":
                    hp.choice('distance_measure', tuning_space_raw["distance_measure"])
                }
            }
        if "cmax" in tuning_space_raw:
            hyper_obj = {
                **hyper_obj,
                **{
                    "cmax":
                    hp.loguniform('cmax',
                                  np.log(tuning_space_raw["cmax"]["min"]),
                                  np.log(tuning_space_raw["cmax"]["max"]))
                }
            }
        if "cmin" in tuning_space_raw:
            hyper_obj = {
                **hyper_obj,
                **{
                    "cmin":
                    hp.loguniform('cmin',
                                  np.log(tuning_space_raw["cmin"]["min"]),
                                  np.log(tuning_space_raw["cmin"]["max"]))
                }
            }
        if "optimizer" in tuning_space_raw:
            hyper_obj = {
                **hyper_obj,
                **{
                    "optimizer":
                    hp.choice("optimizer", tuning_space_raw["optimizer"])
                }
            }
        if "bilinear" in tuning_space_raw:
            hyper_obj = {
                **hyper_obj,
                **{
                    "bilinear": hp.choice('bilinear', tuning_space_raw["bilinear"])
                }
            }
        if "epochs" in tuning_space_raw:
            hyper_obj = {
                **hyper_obj,
                **{
                    "epochs": hp.choice("epochs", tuning_space_raw["epochs"])
                }
            }
        if "feature_map_dropout" in tuning_space_raw:
            hyper_obj = {
                **hyper_obj,
                **{
                    "feature_map_dropout":
                    hp.choice('feature_map_dropout', tuning_space_raw["feature_map_dropout"])
                }
            }
        if "input_dropout" in tuning_space_raw:
            hyper_obj = {
                **hyper_obj,
                **{
                    "input_dropout":
                    hp.choice('input_dropout', tuning_space_raw["input_dropout"])
                }
            }
        if "hidden_dropout" in tuning_space_raw:
            hyper_obj = {
                **hyper_obj,
                **{
                    "hidden_dropout":
                    hp.choice('hidden_dropout', tuning_space_raw["hidden_dropout"])
                }
            }
        if "use_bias" in tuning_space_raw:
            hyper_obj = {
                **hyper_obj,
                **{
                    "use_bias": hp.choice('use_bias', tuning_space_raw["use_bias"])
                }
            }
        if "label_smoothing" in tuning_space_raw:
            hyper_obj = {
                **hyper_obj,
                **{
                    "label_smoothing":
                    hp.choice('label_smoothing', tuning_space_raw["label_smoothing"])
                }
            }
        if "lr_decay" in tuning_space_raw:
            hyper_obj = {
                **hyper_obj,
                **{
                    "lr_decay": hp.choice('lr_decay', tuning_space_raw["lr_decay"])
                }
            }
        if "l1_flag" in tuning_space_raw:
            hyper_obj = {
                **hyper_obj,
                **{
                    "l1_flag": hp.choice('l1_flag', tuning_space_raw["l1_flag"])
                }
            }
        if "sampling" in tuning_space_raw:
            hyper_obj = {
                **hyper_obj,
                **{
                    "sampling": hp.choice('sampling', tuning_space_raw["sampling"])
                }
            }

        return hyper_obj
Exemplo n.º 35
0
        'max_features': hpparams['max_features'],
        'min_samples_split': hpparams['min_samples_split'],
        'min_samples_leaf': 1,
    }

    pipeline = RandomForestClassifier(**params_est)
    scores = model_selection.cross_val_score(pipeline, X[best_columns], Y['target'],
                                             cv=5, scoring='neg_log_loss', n_jobs=2)

    return scores.mean(), scores.std()


space4dt = {
   'n_estimators': hp.uniform('n_estimators', 50, 5000),
   'max_features': hp.uniform('max_features', 0.3, 1),
   'min_samples_split': hp.choice('min_samples_split', (2, 3, 4, 5)),
   'criterion': hp.choice('criterion', ('gini', 'entropy')),
}


def f(params):
    global log_, counter, params_, std_
    mlog, mstd = hyperopt_train_test(params)
    counter += 1

    log_.append(mlog)
    params_.append(params)
    std_.append(mstd)

    print("Log Loss: %0.4f (+/- %0.3f), %s" % (mlog, mstd, params))
Exemplo n.º 36
0
    def deeplab(args):
        tf.reset_default_graph()
        model = deepLabNet()
        output = model.train(args)
        tf.reset_default_graph()
        return output

    search = "grid"  # "grid" or "random"

    if search == "random":

        # define a search space
        space = hp.choice(
            'experiment number',
            [(hp.uniform('learning_rate', 0.0001,
                         0.01), hp.uniform('dropout_prob', 0.5, 1.0),
              hp.uniform('weight_decay', 1.0e-6, 1.0e-4),
              hp.quniform('Epochs', OPTIMIZATION_EPOCHS,
                          OPTIMIZATION_EPOCHS + 1, OPTIMIZATION_EPOCHS))])

        best = fmin(deeplab, space, algo=tpe.suggest, max_evals=EVALUATIONS)

        print('Best learningRate: ', best['learning_rate'], 'Best Dropout: ',
              best['dropout_prob'], 'Best weight decay', best['weight_decay'])
        print('-----------------\n')
        print('Starting training with optimized hyperparameters... \n')
        sys.stdout.flush()

        deeplab((best['learning_rate'], best['dropout_prob'],
                 best['weight_decay'], EPOCHS))
            print("Rerunning from %d trials to add another one." %
                  len(trials.trials))
        except:
            trials = Trials()
            max_evals = nb_evals
            ITERATION = 0
            print("Starting from scratch: new trials.")

        search_params = {
            'dropout':
            hp.quniform('dropout', 0.1, 0.5, 0.1),
            'batch_size':
            2**hp.quniform('batch_size', 4, 8, 1),
            'units':
            hp.choice('units', [
                2**hp.quniform('units_a', 5, 10, 1), 25 *
                (2**hp.quniform('units_b', 0, 4, 1))
            ]),
            'learning_rate':
            hp.choice('learning_rate', [
                5 * 10**-hp.quniform('lr_a', 3, 4, 1),
                1 * 10**-hp.quniform('lr_b', 2, 4, 1)
            ]),
            'local':
            hp.choice('local', [3, 5, 7]),
            'n_kernels':
            hp.choice('n_kernels', [32, 50]),
            'window':
            hp.choice('window', [48, 72, 128, 168])
        }

        best = fmin(optimize,
Exemplo n.º 38
0
def pipeline(path):
    logger = log.init_log()
    max_evals = 50

    _, name, _, _ = tool.splitPath(path)
    logger.info(f'xgb开始训练位点: {name}')
    print(f'xgb开始训练位点: {name}')

    data = np.load(path)

    try:
        X, Y = data[:, :-1], data[:, -1]
    except:
        logger.info(f'位点: {name} 文件读取错误')
        print(f'位点: {name} 文件读取错误')
        return 0

    if len(np.unique(Y)) == 1:
        logger.info(f'位点: {name} 只有一种类标签')
        print(f'位点: {name} 只有一种类标签')
        return 0

    tmp = Y.tolist()
    tmp = dict(Counter(tmp))
    if tmp[0] > tmp[1]:
        ma, mi = tmp[0], tmp[1]
    else:
        ma, mi = tmp[1], tmp[0]
    if mi / ma < 0.01:
        logger.info(f'位点: {name} 为低频位点')
        print(f'位点: {name} 为低频位点')
        return 0

    space = {
        "max_depth":
        hp.randint("max_depth", 15),  # [0, upper)
        "n_estimators":
        hp.randint("n_estimators", 5),  # [0,1000)
        "learning_rate":
        hp.uniform("learning_rate", 0.001, 2),  # 0.001-2均匀分布
        "min_child_weight":
        hp.randint("min_child_weight", 5),
        "subsample":
        hp.randint("subsample", 4),
        "reg_alpha":
        hp.choice("reg_alpha", [1e-5, 1e-4, 1e-3, 1e-2, 0.1, 1]),
        "reg_lambda":
        hp.choice("reg_lambda", [1e-5, 1e-4, 1e-3, 1e-2, 0.1, 1, 10, 100]),
        "colsample_bytree":
        hp.choice("colsample_bytree", [0.7, 0.75, 0.8, 0.85, 0.9, 0.95, 1.0]),
        "path":
        hp.choice('path', [path])
    }

    star = time.time()
    algo = partial(tpe.suggest, n_startup_jobs=1)  # 优化算法种类
    best = fmin(XGB, space, algo=algo,
                max_evals=max_evals)  # max_evals表示想要训练的最大模型数量,越大越容易找到最优解

    best = RECOVERXGB(best)
    print(best)
    TRAINXGB(X, Y, best, name, save_path + name + '.xgb')
    end = time.time()
    times = end - star
    logger.info(f'位点: {name} xgb用时为: {times}')
Exemplo n.º 39
0
def Hyperopt_get_best_parameters(Metrics='roc_auc', evals_num=30):
    from hyperopt import fmin, tpe, hp, STATUS_OK, Trials, partial
    penalty_list = ['l1', 'l2']
    parameter_space = {
        'C': hp.uniform('C', 0, 1),
        'penalty': hp.choice('penalty', penalty_list),
    }

    def hyperopt_train_test(params):
        clf = LogisticRegression(**params, random_state=123)
        auc = cross_val_score(clf, X_train, y_train, cv=5,
                              scoring=Metrics).mean()  # replace 2
        return auc

    count = 0

    def function(params):
        auc = hyperopt_train_test(params)
        global count
        count = count + 1
        print({'loss': auc, 'status': STATUS_OK, 'count': count})
        return -auc

    count = 0

    def fuction_model(params):
        #    print(params)
        folds = KFold(n_splits=5, shuffle=True, random_state=546789)
        train_preds = np.zeros(X_train.shape[0])
        train_class = np.zeros(X_train.shape[0])
        feats = [
            f for f in X_train.columns if f not in ['Survived', 'PassengerId']
        ]  # 注意用户编号也要去掉
        for n_fold, (trn_idx, val_idx) in enumerate(folds.split(X_train)):
            trn_x, trn_y = X_train[feats].iloc[trn_idx], y_train.iloc[trn_idx]
            val_x, val_y = X_train[feats].iloc[val_idx], y_train.iloc[val_idx]
            clf = LogisticRegression(**params, random_state=123)
            clf.fit(trn_x, trn_y)
            train_preds[val_idx] = clf.predict_proba(val_x)[:, 1]
            train_class[val_idx] = clf.predict(val_x)

            del clf, trn_x, trn_y, val_x, val_y
            gc.collect()
        global count
        count = count + 1
        if Metrics == 'roc_auc':
            score = roc_auc_score(y_train, train_preds)
        elif Metrics == 'accuracy':
            score = accuracy_score(y_train, train_class)
        elif Metrics == 'f1':
            score = f1_score(y_train, train_class)
        print("第%s次,%s score为:%f" % (str(count), Metrics, score))
        return -score

    algo = partial(tpe.suggest, n_startup_jobs=20)
    trials = Trials()
    #max_evals  -- 寻找最优参数的迭代的次数
    best = fmin(fuction_model,
                parameter_space,
                algo=algo,
                max_evals=evals_num,
                trials=trials)

    #best["parameter"]返回的是数组下标,因此需要把它还原回来
    best["penalty"] = penalty_list[best['penalty']]
    print('best:\n', best)

    clf = LogisticRegression(**best, random_state=123)
    phsorce = cross_val_score(
        clf, X_train, y_train, cv=5,
        scoring=Metrics).mean()  # replace 4 roc_auc f1 accuracy
    print('贝叶斯优化参数得分:', phsorce)

    clf = LogisticRegression(random_state=123)
    nosorce = cross_val_score(clf, X_train, y_train, cv=5,
                              scoring=Metrics).mean()  # replace 5
    print('自己调参数得分:', nosorce)

    return best
from keras import backend as K
from common_defs import *
from keras.layers import Input, Conv2D, MaxPooling2D, Conv2DTranspose, concatenate
from keras.models import Model
from keras.callbacks import EarlyStopping
from keras.layers.advanced_activations import *
# a dict with x_train, y_train, x_test, y_test
from load_data import data
from hyperopt import hp
from hyperopt.pyll.stochastic import sample

im_height = 128
im_width = 128

space = {
	'batch_size': hp.choice('bs',(16, 32, 64, 128, 256)),
	'optimizer': hp.choice( 'o', ( 'rmsprop', 'adagrad', 'adadelta', 'adam', 'adamax', 'sgd'))		
}
def print_params( params ):
	print("batch_size:", params['batch_size'])
	print("\noptimizer:", params['optimizer'])	


def dice_coef(y_true, y_pred, smooth=1):
    y_true_f = K.flatten(y_true)
    y_pred_f = K.flatten(y_pred)
    intersection = K.sum(y_true_f * y_pred_f)
    return (2. * intersection + smooth) / (K.sum(y_true_f) + K.sum(y_pred_f) + smooth)

def get_params():
Exemplo n.º 41
0
    y_train = np.load(data_root + file_train + '_q8.npy')

    X_train_aug, y_train, X_val_aug, y_val = train_val_split(
        True, X_train_aug, y_train)

    return X_train_aug, y_train, X_val_aug, y_val, X_test_aug, y_test


DROPOUT_CHOICES = np.arange(0.0, 0.9, 0.1)
UNIT_CHOICES = [100, 200, 500, 800, 1000, 1200]
GRU_CHOICES = [100, 200, 300, 400, 500, 600]
BATCH_CHOICES = [16, 32]
LR_CHOICES = [0.0001, 0.0005, 0.001, 0.0025, 0.005, 0.01]
space = {
    'dense1':
    hp.choice('dense1', UNIT_CHOICES),
    'dropout1':
    hp.choice('dropout1', DROPOUT_CHOICES),
    'gru1':
    hp.choice('gru1', GRU_CHOICES),
    # nesting the layers ensures they're only un-rolled sequentially
    'gru2':
    hp.choice(
        'gru2',
        [
            False,
            {
                'gru2_units':
                hp.choice('gru2_units', GRU_CHOICES),
                # only make the 3rd layer availabile if the 2nd one is
                'gru3':
Exemplo n.º 42
0
def objective(space):
    global best_score
    model = SVC(**space)
    kfold = KFold(n_splits=3, random_state=1985, shuffle=True)
    score = -cross_val_score(
        model, X, Y, cv=kfold, scoring='neg_log_loss', verbose=False).mean()

    if (score < best_score):
        best_score = score

    return score


space = {
    'C': hp.choice('C', np.arange(0.005, 1.0, 0.005)),
    'kernel': hp.choice('x_kernel', ['linear', 'poly', 'rbf']),
    'degree': hp.choice('x_degree', [2, 3, 4]),
    'probability': hp.choice('x_probability', [True])
}

start = time.time()
trials = Trials()
best = fmin(objective,
            space=space,
            algo=tpe.suggest,
            max_evals=200,
            trials=trials)

print("Hyperopt search took %.2f seconds for 200 candidates" %
      ((time.time() - start)))
Exemplo n.º 43
0
def generate_simulated_queries(BB, proto_fields, measurer_ip, args):
    global simAF
    simAF = BB
    global server_ip

    global time_sleep

    print(proto_fields)
    phase = "random"
    config = configparser.ConfigParser()
    config.read("common_path.ini")
    query_out_dir = os.path.join(config["common_path"]["query_out_dir"],
                                 measurer_ip)

    print(query_out_dir)

    if not os.path.exists(query_out_dir):
        os.makedirs(query_out_dir)

    proto = args[df.PROTO]
    num_rand_queries = args[df.PER_SERVER_RANDOM_SAMPLE]
    print("num random queries ", num_rand_queries)
    server_ip = args["server_ip"]
    time_sleep = float(args[df.TIME_SLEEP])
    buffer_query = int(args["update_db_at_once"])

    queryBuffer = []
    simulated_space = {}
    for f, finfo in proto_fields.items():
        ar = finfo.accepted_range
        e_str = ar[0]
        e_end = ar[len(ar) - 1]
        print(e_end, e_str)
        if type(e_end) is str:
            print("STRING", ar)
            list_ap = ar
            simulated_space[f] = hp.choice(f, ar)
        else:

            len1 = e_end - e_str

            if (len1 >= df.SMALL_FIELD_THRESHOLD):
                print("VERY LARGE")
                simulated_space[f] = hp.quniform(f, e_str, e_end, 100)
            else:
                simulated_space[f] = hp.choice(f, finfo.accepted_range)

        print(f, vars(finfo), ar, e_str, e_end)
        print(simulated_space[f])

    print(simulated_space, len(simulated_space))
    per_server_budget = args["per_server_budget"]

    if ('init_trials' in args):
        points_to_evaluate = args['init_trials']
        print("OLD Trials: \n\n", points_to_evaluate, len(points_to_evaluate),
              "\n\n", " DONE")
        lp = len(points_to_evaluate)

    else:
        points_to_evaluate = None
        lp = 0
    rand_budget = 0
    trials1 = Trials()

    best = fmin(fn=f1,space=simulated_space,points_to_evaluate=points_to_evaluate,algo=hyperopt.anneal.suggest,\
        max_evals=per_server_budget ,trials=trials1)

    pq = PriorityQueue()

    for ind_ele in trials1.results:
        loss = ind_ele['loss'] * -1
        print(ind_ele['String']['value'], loss)
        ll = []

        field_values = ind_ele['String']['value']
        af = loss
        insert_data = gen_query_buffer_entry(field_values, af, server_ip,
                                             measurer_ip, 'SA')
        print("Insert data ", insert_data)
        queryBuffer.append(insert_data)

    if len(queryBuffer) != 0:
        print("updating query buffer with len ", len(queryBuffer))
        query_out_filename = os.path.join(query_out_dir, server_ip)
        query_json.write_to_json(queryBuffer, query_out_filename)

        queryBuffer.clear()

    return None
Exemplo n.º 44
0

best_score = 1.0


def objective(space):
    global best_score
    test_score = ex.run(config_updates=space).result
    score = 1 - test_score
    print("Score:", score)
    return score


space = {
    'estimator__C':
    hp.choice('C', np.arange(0.005, 1.0, 0.005)),
    'features__lower_pipe__tfidf__ngram_range':
    hp.choice('features__lower_pipe__lower_tfidf__ngram_range',
              [(1, 2), (1, 3), (1, 4)]),
    'features__with_tone_char__ngram_range':
    hp.choice('features__with_tone_char__ngram_range', [(1, 4), (1, 5),
                                                        (1, 6)]),
    'features__remove_tone__tfidf__ngram_range':
    hp.choice('features__remove_tone__tfidf__ngram_range', [(1, 2), (1, 3),
                                                            (1, 4)])
}
start = time.time()
trials = Trials()
best = fmin(objective,
            space=space,
            algo=tpe.suggest,
Exemplo n.º 45
0
# -*- coding: utf-8 -*-
#
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
# SPDX-License-Identifier: Apache-2.0

from hyperopt import hp

common_hyperparameters = {
    'lr': hp.uniform('lr', low=1e-4, high=3e-1),
    'weight_decay': hp.uniform('weight_decay', low=0, high=3e-3),
    'patience': hp.choice('patience', [30]),
    'batch_size': hp.choice('batch_size', [32, 64, 128, 256, 512]),
}

gcn_hyperparameters = {
    'gnn_hidden_feats':
    hp.choice('gnn_hidden_feats', [32, 64, 128, 256]),
    'predictor_hidden_feats':
    hp.choice('predictor_hidden_feats', [16, 32, 64, 128, 256, 512, 1024]),
    'num_gnn_layers':
    hp.choice('num_gnn_layers', [1, 2, 3, 4, 5]),
    'residual':
    hp.choice('residual', [True, False]),
    'batchnorm':
    hp.choice('batchnorm', [True, False]),
    'dropout':
    hp.uniform('dropout', low=0., high=0.6)
}

gat_hyperparameters = {
    'gnn_hidden_feats':
Exemplo n.º 46
0
            "fc_dropout_coeff":
            hp.uniform("fc_dropout_coeff", 0, 0.9),
            "cnn_dropout_coeff":
            hp.uniform("cnn_dropout_coeff", 0, 0.9),
            "num_cnn_layers":
            hp.quniform("num_cnn_layers", 4, 7, 1),
            "cnn_depth":
            hp.quniform("cnn_depth", 16, 50, 1),
            "num_hidden":
            hp.qloguniform("num_hidden", log(NUM_CLASSES), log(100), 1),

            # "padding"           : hp.choice("padding", ["same", "valid"]),
            # "cnn_kern_width"    : hp.quniform("cnn_kern_width", log(2), log(10), 1),
            # "cnn_kern_height"   : hp.quniform("cnn_kern_height", log(2), log(20), 1),
            "cnn_dim_decay":
            hp.choice("cnn_dim_decay", [1, 0.8, 0.75, 0.5]),
            "cnn_depth_growth":
            hp.choice("cnn_depth_growth", [1, 1.25, 1.5, 2]),
        }

        best = fmin(fn=train_model_with_params,
                    space=hyperopt_space,
                    algo=tpe.suggest,
                    max_evals=30)
        print("best params:", best)

        pred = predict(SoundDatagen(test_files, None), "nofolds")
        pred = encode_predictions(pred)
    elif not ENABLE_KFOLD:
        x_train, x_val, y_train, y_val = train_test_split(train_files,
                                                          train_labels,
Exemplo n.º 47
0
v_sample = (len(y_valid) / gpu_count) * gpu_count
X_valid = X_valid[:v_sample]
y_valid = y_valid[:v_sample]

# reshape to fit the Conv1D shape (#samples, 3000, 4)
X_train = np.squeeze(X_train)
X_train = np.swapaxes(X_train, 1, 2)
X_valid = np.squeeze(X_valid)
X_valid = np.swapaxes(X_valid, 1, 2)
print('shape: ', X_train.shape)

space = {
    'batch_size': 128,
    'nb_epoch': 200,
    'activation_method': ['relu', 'sigmoid'],
    'learning_rate': hp.choice("learning_rate", [0.001]),
    'optimizer': args.optimizer,
    'loss': 'binary_crossentropy',
    'gpu_count': gpu_count,
    'nb_filters_1': hp.choice('nb_filters_1', [32, 64, 128]),
    'pool_size_1': hp.choice('pool_size_1', [0, 4, 16, 32, 64, 128]),
    'kernel_size_1': hp.choice('kernel_size_1', [16, 32, 64, 128]),
    'dropout_frac_1': hp.choice('dropout_frac_1', [0.25]),
    'nb_filters_2': hp.choice('nb_filters_2', [16, 64, 128, 512]),
    'dropout_frac_2': hp.choice('dropout_frac_2', [0.25]),
    'nb_dense': hp.choice('nb_dense', [16, 32, 128])
}

trials = Trials()
best = fmin(f_nn, space, algo=tpe.suggest, max_evals=100, trials=trials)
print 'best: %s' % (best)
Exemplo n.º 48
0
from hyperopt import hp
from hyperopt.pyll.stochastic import sample
from keras.callbacks import EarlyStopping
from keras.layers import GlobalMaxPooling2D
from keras.layers.advanced_activations import *
from keras.layers.convolutional import Conv2D
from keras.layers.core import Dense, Dropout, Activation
from keras.models import Sequential
from keras.optimizers import Adadelta

from common_defs import *

# a dict with x_train, y_train, x_test, y_test

space = {
    'DROPOUT': hp.choice('drop', (0.1, 0.5, 0.75)),
    'DELTA': hp.choice('delta', (1e-04, 1e-06, 1e-08)),
    'MOMENT': hp.choice('moment', (0.9, 0.99, 0.999)),
}


def get_params():
    params = sample(space)
    return handle_integers(params)


def print_params(params):
    pprint({k: v for k, v in params.items() if not k.startswith('layer_')})
    print()

Exemplo n.º 49
0
def main(alignments, spectrograms, operation, model_dir, tst_size, n_samples,
         n_splits, trn_size, batch_size, n_epochs, max_hyperparam_sets,
         max_layers):
    # Parse the test set size
    if tst_size == None:
        tst_size = 0.2
    else:
        error_message = 'Error: Invalid value for "--tst_size": "%s" is neither a valid integer nor a valid float' % (
            tst_size, )
        if '.' in tst_size or 'e' in tst_size or 'E' in tst_size:
            try:
                tst_size = float(tst_size)
            except ValueError:
                print(error_message)
        else:
            try:
                tst_size = int(tst_size)
            except ValueError:
                print(error_message)

    # Verify that the there was at least one operation requested
    if not (alignments or spectrograms or operation):
        print(
            'No options given, try invoking the command with "--help" for help.'
        )

    # Convert the data to a fastly loadable representation
    convert(alignments, spectrograms)

    # Create default model directory if there is none
    if model_dir is None:
        now = datetime.datetime.now()
        model_dir = os.path.dirname(
            os.path.abspath(__file__)) + ('/../models/%s_%s' %
                                          (now.date(), now.time()))
    model_dir = os.path.abspath(os.path.expanduser(model_dir))
    Path(model_dir).mkdir(exist_ok=True)
    print('Output directory: %s' % (model_dir, ))

    # Setup logging
    # Change verbosity to info and log everything to a file (i.e. ../<model_dir>/main.log)
    tf.logging.set_verbosity(logging.INFO)
    logging.getLogger('tensorflow').handlers = [
        logging.FileHandler(model_dir + '/main.log'),
        # logging.StreamHandler(sys.stdout)
    ]
    os.environ[
        'TF_CPP_MIN_LOG_LEVEL'] = '3'  # Suppress tensorflow debugging output
    print('Tensorflow debugging output is suppressed')

    # Handle data loading
    loader = DataLoader(os.path.abspath(
        os.path.dirname(os.path.abspath(__file__)) + '/../dat/fast_load'),
                        tst_size=tst_size,
                        seed=SEED)
    if operation == 'hyperoptimize':  # Hyperparameter optimization
        # Define hyperparameter search space
        @scope.define
        def to_int(number):
            return int(number)

        hyperparam_space = dict(
            lstm_size=scope.to_int(hp.qloguniform('lstm_size', 2.0, 5.0, 1)),
            dense_sizes=hp.choice('n_layers', [
                tuple(
                    scope.to_int(
                        hp.qloguniform('dense_size_%d_%d' %
                                       (i, j), 2.0, 5.0, 1)) for j in range(i))
                for i in range(max_layers + 1)
            ]),
            dropout=hp.uniform('dropout', 0.0, 1.0))

        # Optimize design via cross-validation
        trial_index = iter(count())

        def objective(hyperparams):
            trial_id = next(trial_index)
            progress.print_bar(trial_id, max_hyperparam_sets, 20,
                               'Trial:            ┃', '┃', '\n')
            trial_dir = model_dir + ('/trial_%d' % (trial_id, ))
            if not os.path.exists(trial_dir):
                os.makedirs(trial_dir)
            with open(trial_dir + '/hyperparams.json',
                      'w') as hyperparams_file:
                json.dump(hyperparams, hyperparams_file)
            loss = cross_validate(trial_dir, loader, n_samples, n_splits,
                                  trn_size, batch_size, n_epochs,
                                  **hyperparams)
            report = {'loss': loss, 'status': STATUS_OK}
            with open(trial_dir + '/report.json', 'w') as report_file:
                json.dump(report, report_file)
            return report

        trials = Trials()
        hyperparams_best = fmin(fn=objective,
                                space=hyperparam_space,
                                algo=tpe.suggest,
                                max_evals=max_hyperparam_sets,
                                trials=trials)
        progress.print_bar(next(trial_index), max_hyperparam_sets, 20,
                           'Trial:            ┃', '┃', '\n')

        # Report results of hyperparameter optimization
        print('Best hyperparameters:')
        for param, value in sorted(hyperparams_best.items()):
            print('    %s: %s' % (param, value))
        with open(model_dir + '/hyperparams_best.json',
                  'w') as hyperparams_best_file:
            json.dump(hyperparams_best, hyperparams_best_file)
    # elif operation == 'train':
    #     train(estimator, loader, n_epochs)
    # elif operation == 'evaluate':
    #     evaluate(estimator, loader)
    elif operation == 'predict':
        predict(model_dir, loader, n_samples, n_splits, trn_size, batch_size,
                n_epochs, lstm_size)  # TODO Add hyperparams
# X = biomarker.drop('PERSON_ID', 1)
X = analysis.merge(biomarker,
                   on='PERSON_ID').drop(['CODE_REHOSP', 'PERSON_ID'], 1)
y = analysis['CODE_REHOSP'].replace(2, 0)
cat_colidx = [
    X.columns.get_loc(col) for col in X.columns if X[col].nunique() <= 10
]

for col in cat_colidx:
    if X[X.columns[col]].dtype == 'float64':
        X[X.columns[col]] = X[X.columns[col]].fillna(-1).astype(int)
    else:
        X[X.columns[col]] = X[X.columns[col]].fillna('')

cbc_params = {
    'max_depth': hp.choice('max_depth', np.arange(2, 11)),
    'l2_leaf_reg': hp.uniform('l2_leaf_reg', 0, 500),
    'colsample_bylevel': hp.uniform('colsample_bylevel', 0.1, 1),
    #     'subsample': hp.uniform('subsample', 0.1, 1),
    'eta': hp.uniform('eta', 0.01, 0.1),
    #     'bootstrap_type': hp.choice('bootstrap_type', ['Bernoulli', 'Poisson', 'No']),
    #     'one_hot_max_size': hp.choice('one_hot_max_size', np.arange(2,6))
}


def f_cbc(params):
    kfold = StratifiedKFold(5, True, 2019)
    auc = np.zeros(kfold.get_n_splits())
    cbc_pred = np.zeros(len(X))
    featureimp = np.zeros(X.shape[1])
    cbc = CatBoostClassifier(
Exemplo n.º 51
0
from __future__ import print_function


def test_landing_screen():

    # define an objective function
    def objective(args):
        case, val = args
        if case == 'case 1':
            return val
        else:
            return val**2

    # define a search space
    from hyperopt import hp
    space = hp.choice('a', [('case 1', 1 + hp.lognormal('c1', 0, 1)),
                            ('case 2', hp.uniform('c2', -10, 10))])

    # minimize the objective over the space
    import hyperopt
    best = hyperopt.fmin(objective,
                         space,
                         algo=hyperopt.tpe.suggest,
                         max_evals=100)

    print(best)
    # -> {'a': 1, 'c2': 0.01420615366247227}

    print(hyperopt.space_eval(space, best))
    # -> ('case 2', 0.01420615366247227}
Exemplo n.º 52
0
print (best)

# The maximum
best = fmin(
    fn=lambda x: -(x-1)**2,
    space=hp.uniform('x', -2, 2),
    algo=tpe.suggest,
    max_evals=100)
print (best)


# Search Spaces
space = {
    'x': hp.uniform('x', 0, 1),
    'y': hp.normal('y', 0, 1),
    'name': hp.choice('name', ['alice', 'bob']),
}

print (hyperopt.pyll.stochastic.sample(space))

# Capturing Info with Trials
# To see exactly what is happening inside the hyperopt black box
# The Trials object allows us to store info at each time step they are stored.

fspace = {
    'x': hp.uniform('x', -5, 5)
}

def f(params):
    x = params['x']
    val = x**2
Exemplo n.º 53
0
    'FEE': 0.002,
    'SYMBOL': 'tBTCUSD',
    'SECTION': 'hist',
    'START': '2018-07-01 00:00:00',
    'END': '2019-01-01 00:00:00',
    'TIMEFRAME': '1h',
    'MAX_NUM_REFS': 300,
    'K1': 0.56,
    'K2': 0.58,
    'NUM_REFS': 11
}

hyperopt_params = {
    'K1': hp.quniform('K1', 0.1, 2.0, 0.02),
    'K2': hp.quniform('K2', 0.1, 2.0, 0.02),
    'NUM_REFS': hp.choice('NUM_REFS', np.arange(10, 300, dtype=int))
}


class Quant_Trader():
    def __init__(self, config_params):
        self.Config = config_params.copy()

        self.path_params = {
            'TimeFrame': self.Config['TIMEFRAME'],
            'Symbol': self.Config['SYMBOL'],
            'Section': self.Config['SECTION']
        }

        self.query_params = {
            'limit': 5000,
Exemplo n.º 54
0
    def model(self):
        #cname = sys._getframe().f_code.co_name
        cname = 'p'
        train, y, test = self.train_, self.y_, self.test_

        train.drop('id', axis=1, inplace=True)
        test.drop('id', axis=1, inplace=True)
        from hyperopt import fmin, tpe, hp, STATUS_OK, Trials, space_eval

        def step_rf(params):
            clf = ensemble.RandomForestRegressor(**params)
            cv = model_selection.cross_val_score(clf,
                                                 train,
                                                 y,
                                                 scoring=metrics.make_scorer(
                                                     metrics.log_loss),
                                                 cv=6,
                                                 n_jobs=-2)
            score = np.mean(cv)
            print(cname, score, params)
            return dict(loss=score, status=STATUS_OK)

        space_rf = dict(
            n_estimators=hp.choice('n_estimators', range(50, 1500)),
            #criterion = hp.choice('criterion', ["gini", "entropy"]),
            min_samples_split=hp.choice('min_samples_split', range(2, 10)),
            min_samples_leaf=hp.choice('min_samples_leaf', range(1, 10)),
            max_features=hp.choice('max_features', range(1, 16)),
            random_state=1)
        trs = self.load('rf_trials')
        if trs == None or self.debug_:
            tr = Trials()
        else:
            tr, _ = trs
        if len(tr.trials) > 0:
            print('reusing %d trials, best was:' % (len(tr.trials)),
                  space_eval(space_rf, tr.argmin))
            best = tr.argmin
        while len(tr.trials) < 30:
            best = fmin(step_rf,
                        space_rf,
                        algo=tpe.suggest,
                        max_evals=len(tr.trials) + 1,
                        trials=tr)
            self.save('et_trials', (tr, space_rf))
        rf_params = space_eval(space_rf, best)
        print(rf_params)

        N_splits = 9
        N_seeds = 3

        v, z = self.v_, self.z_
        skf = model_selection.StratifiedKFold(n_splits=N_splits, shuffle=True)
        cv = []
        for s in range(N_seeds):
            scores = []
            cname2 = cname + str(s)
            v[cname2], z[cname2] = 0, 0
            rf_params['random_state'] = s + 4242
            for n, (itrain, ival) in enumerate(skf.split(train, y)):
                clf = ensemble.RandomForestRegressor(**rf_params)
                clf.fit(train.ix[itrain], y[itrain])
                p = clf.predict(train.ix[ival])
                v.loc[ival, cname2] += p
                score = metrics.log_loss(y[ival], p)
                z[cname2] += clf.predict(test)
                print(
                    cname, 'seed %d step %d of %d: ' %
                    (rf_params['random_state'], n + 1, skf.n_splits), score,
                    self.now())
                scores.append(score)
            z[cname2] /= N_splits
            cv.append(np.mean(scores))
            print('seed %d loss: ' % (rf_params['random_state']), scores,
                  np.mean(scores), np.std(scores))
            z['y'] = z[cname2]

        print('cv:', cv, np.mean(cv), np.std(cv))
        return cv, None
Exemplo n.º 55
0
test_data2 = numpy.reshape(test_data2,
                           (conv_hour_to_day * X_day_e.shape[0],
                            int(std_inv / std_inv2), test_data2.shape[1]))
H_t2 = numpy.reshape(H_t2, (conv_hour_to_day * H_mean_t.shape[0],
                            int(std_inv / std_inv2), H_t2.shape[1]))
H_val2 = numpy.reshape(H_val2, (conv_hour_to_day * H_mean_v.shape[0],
                                int(std_inv / std_inv2), H_val2.shape[1]))
H_e2 = numpy.reshape(H_e2, (conv_hour_to_day * H_mean_e.shape[0],
                            int(std_inv / std_inv2), H_e2.shape[1]))

#This block is for optimizing LSTM layers
space = {
    'Layer1': hp.quniform('Layer1', 10, 100, 5),
    'Layer2': hp.quniform('Layer2', 10, 100, 5),
    'Layer3': hp.quniform('Layer3', 5, 20, 1),
    'activ_l3': hp.choice('activ_l3', ['relu', 'sigmoid']),
    'activ_l4': hp.choice('activ_l4', ['linear'])
}

space2 = {
    'Layer1': hp.quniform('Layer1', 10, 100, 5),
    'activ_l1': hp.choice('activ_l1', ['tanh']),
    'activ_l2': hp.choice('activ_l2', ['tanh'])
}


def objective(params):
    optimize_model = build_lstm_v1.lstm_multi_104(
        params, train_data2.shape[2], H_t2.shape[2],
        (std_inv / std_inv2))  #Check code here, relu entering
    loss_out = NNFunctions.model_optimizer_101(optimize_model, train_data2,
Exemplo n.º 56
0
#     if penalty ==
#     return {'loss': x ** 2+y**2+z**2}


def objective(args):
    print(args)
    x, y, c = args
    case = c[0]
    z = c[1]
    t = c[2]
    if case == 'l1':
        return {'loss': x**2 + y**2 + z**2 + t, 'status': STATUS_OK}
    if case == 'l2':
        return {'loss': x**2 + y**2 + z**2 + t, 'status': STATUS_OK}


#def objective2(args):
#    return objective(*args)

space = [
    hp.uniformint('x', -10, 10),
    hp.uniformint('y', -10, 10),
    hp.choice(
        'a', [('l1', hp.randint('z', 1), hp.uniformint('t', 4, 10)),
              ('l2', hp.uniformint('z1', -10, 10), hp.uniformint('t1', 0, 4))])
]

best = fmin(objective, space=space, algo=tpe.suggest, max_evals=100)

print(best)
Exemplo n.º 57
0
    def get_hyperparameter_search_space(dataset_properties=None,
                                        optimizer='smac'):
        if optimizer == 'smac':
            cs = ConfigurationSpace()
            n_estimators = UniformIntegerHyperparameter(name="n_estimators",
                                                        lower=50,
                                                        upper=500,
                                                        default_value=50,
                                                        log=False)
            sampling_strategy = CategoricalHyperparameter(
                name="sampling_strategy",
                choices=["majority", "not minority", "not majority", "all"],
                default_value="not minority")
            replacement = CategoricalHyperparameter("replacement",
                                                    ["True", "False"],
                                                    default_value="False")

            ab_n_estimators = UniformIntegerHyperparameter(
                name="ab_n_estimators",
                lower=50,
                upper=500,
                default_value=50,
                log=False)
            ab_learning_rate = UniformFloatHyperparameter(
                name="ab_learning_rate",
                lower=0.01,
                upper=2,
                default_value=0.1,
                log=True)
            ab_algorithm = CategoricalHyperparameter(
                name="ab_algorithm",
                choices=["SAMME.R", "SAMME"],
                default_value="SAMME.R")
            ab_max_depth = UniformIntegerHyperparameter(name="ab_max_depth",
                                                        lower=1,
                                                        upper=10,
                                                        default_value=1,
                                                        log=False)
            cs.add_hyperparameters([
                n_estimators, sampling_strategy, replacement, ab_n_estimators,
                ab_learning_rate, ab_algorithm, ab_max_depth
            ])
            return cs
        elif optimizer == 'tpe':
            from hyperopt import hp
            space = {
                'n_estimators':
                hp.randint('easy_ensemble_n_estimators', 451) + 50,
                'sampling_strategy':
                hp.choice('easy_ensemble_sampling_strategy',
                          ["majority", "not minority", "not majority", "all"]),
                'replacement':
                hp.choice('easy_ensemble_replacement', ["True", "False"]),
                'ab_n_estimators':
                hp.randint('ab_n_estimators', 451) + 50,
                'ab_learning_rate':
                hp.loguniform('ab_learning_rate', np.log(0.01), np.log(2)),
                'ab_algorithm':
                hp.choice('ab_algorithm', ["SAMME.R", "SAMME"]),
                'ab_max_depth':
                hp.randint('ab_max_depth', 10) + 1
            }
            init_trial = {
                'n_estimators': 10,
                'sampling_strategy': "not minority",
                'replacement': "False",
                'ab_n_estimators': 50,
                'ab_learning_rate': 0.1,
                'ab_algorithm': "SAMME.R",
                'ab_max_depth': 1
            }
            return space
Exemplo n.º 58
0
                                              test_size=TEST_SIZE)
        x_train, y_train, x_val, y_val, x_test, label_binarizer, \
            clips_per_sample = load_data(train_idx, val_idx)

        '''
        cnn_dropout     = float(params["cnn_dropout"])
        dropout_coeff   = float(params["dropout_coeff"])
        reg_coeff       = float(params["reg_coeff"])
        num_hidden      = int(params["num_hidden"])
        shift           = float(params["shift"])
        erase           = bool(params["erase"])
        alpha           = float(params["alpha"])
        '''

        hyperopt_space = {
            "cnn_dropout"       : hp.choice("cnn_dropout", [0, 0.1]),
            "dropout_coeff"     : hp.uniform("dropout_coeff", 0.3, 0.6),
            "reg_coeff"         : hp.uniform("reg_coeff", -10, -4),
            "num_hidden"        : hp.quniform("num_hidden", 32, 128, 1),
            "shift"             : hp.uniform("shift", 0, 0.5),
            # "erase"             : hp.choice("erase", [True, False]),
            "alpha"             : hp.uniform("alpha", 0.001, 0.999),
        }

        best = fmin(fn=train_model, space=hyperopt_space,
                    algo=tpe.suggest, max_evals=HYPEROPT_EVALS)
        print("best params:", best)

        pred = predict(x_test, label_binarizer, clips_per_sample, "nofolds")
    elif not ENABLE_KFOLD:
        train_idx, val_idx = train_test_split(train_indices,
Exemplo n.º 59
0
def _optimize_hyper_params_impl(reconstructor,
                                fn,
                                params,
                                hyperopt_max_evals=1000,
                                hyperopt_rstate=None,
                                show_progressbar=True,
                                tqdm_file=None):
    grid_search_params = []
    grid_search_param_choices = []
    hyperopt_space = {}
    for k, param in params.items():
        method = param['method']
        if method == 'grid_search':
            grid_search_options = param.get('grid_search_options', {})
            choices = param.get('choices')
            if choices is None:
                range_ = param.get('range')
                if range_ is not None:
                    grid_type = grid_search_options.get('type', 'linear')
                    if grid_type == 'linear':
                        n = grid_search_options.get('num_samples', 10)
                        choices = np.linspace(range_[0], range_[1], n)
                    else:
                        raise ValueError(
                            "unknown grid type '{grid_type}' in {reco_cls}."
                            "HYPER_PARAMS['{k}']['grid_search_options']".
                            format(grid_type=grid_type,
                                   reco_cls=reconstructor.__class__.__name__,
                                   k=k))
                else:
                    raise ValueError(
                        "neither 'choices' nor 'range' is specified in "
                        "{reco_cls}.HYPER_PARAMS['{k}'], one of them must be "
                        "specified for grid search".format(
                            reco_cls=reconstructor.__class__.__name__, k=k))
            grid_search_params.append(k)
            grid_search_param_choices.append(choices)
        elif method == 'hyperopt':
            hyperopt_options = param.get('hyperopt_options', {})
            space = hyperopt_options.get('space')
            if space is None:
                choices = param.get('choices')
                if choices is None:
                    range_ = param.get('range')
                    if range_ is not None:
                        space_type = hyperopt_options.get('type', 'uniform')
                        if space_type == 'uniform':
                            space = hp.uniform(k, range_[0], range_[1])
                        else:
                            raise ValueError(
                                "unknown hyperopt space type '{space_type}' "
                                "in {reco_cls}.HYPER_PARAMS['{k}']"
                                "['hyperopt_options']".format(
                                    space_type=space_type,
                                    reco_cls=reconstructor.__class__.__name__,
                                    k=k))
                    else:
                        raise ValueError(
                            "neither 'choices' nor 'range' is specified in "
                            "{reco_cls}.HYPER_PARAMS['{k}']"
                            "['hyperopt_options']. One of these or "
                            "{reco_cls}.HYPER_PARAMS['{k}']"
                            "['hyperopt_options']['space'] must be specified "
                            "for hyperopt param search".format(
                                reco_cls=reconstructor.__class__.__name__,
                                k=k))
                else:
                    space = hp.choice(k, choices)
            hyperopt_space[k] = space
        else:
            raise ValueError("unknown method '{method}' for "
                             "{reco_cls}.HYPER_PARAMS['{k}']".format(
                                 method=method,
                                 reco_cls=reconstructor.__class__.__name__,
                                 k=k))

    best_loss = np.inf

    best_hyper_params = None
    with std_out_err_redirect_tqdm(tqdm_file) as orig_stdout:
        grid_search_total = np.prod(
            [len(c) for c in grid_search_param_choices])
        for grid_search_values in tqdm(
                product(*grid_search_param_choices),
                desc='hyper param opt. for {reco_cls}'.format(
                    reco_cls=type(reconstructor).__name__),
                total=grid_search_total,
                file=orig_stdout,
                leave=False,
                disable=not show_progressbar):
            grid_search_param_dict = dict(
                zip(grid_search_params, grid_search_values))
            reconstructor.hyper_params.update(grid_search_param_dict)
            if len(hyperopt_space) == 0:
                result = fn({})
                if result['loss'] < best_loss:
                    best_loss = result['loss']
                    best_hyper_params = result.get('best_sub_hp', {})
                    best_hyper_params.update(grid_search_param_dict)
            else:
                trials = Trials()
                argmin = fmin(fn=fn,
                              space=hyperopt_space,
                              algo=tpe.suggest,
                              max_evals=hyperopt_max_evals,
                              trials=trials,
                              rstate=hyperopt_rstate,
                              show_progressbar=False)
                best_trial = trials.best_trial
                if best_trial['result']['loss'] < best_loss:
                    best_loss = best_trial['result']['loss']
                    best_hyper_params = best_trial['result'].get(
                        'best_sub_hp', {})
                    best_hyper_params.update(grid_search_param_dict)
                    best_hyper_params.update(space_eval(
                        hyperopt_space, argmin))

    if best_hyper_params is not None:
        reconstructor.hyper_params.update(best_hyper_params)

    return best_hyper_params
Exemplo n.º 60
0
 def getOptimizedHyperParametersRange(self):
     optimizedHyperParametersRange = {
         "kernelName": hp.choice("kernelName", ['RBF', 'Matern', 'RationalQuadratic']),
     }
     return optimizedHyperParametersRange