Пример #1
0
def model(X_train, Y_train, X_test, Y_test):

    model = layers.Sequential()
    model.add(layers.Dense(512, input_shape=(784, )))
    model.add(layers.Activation('relu'))
    model.add(layers.Dropout({{hyperopt.uniform(0, 1)}}))
    model.add(layers.Dense({{hyperopt.choice([256, 512, 1024])}}))
    model.add(layers.Activation({{hyperopt.choice(['relu', 'sigmoid'])}}))
    model.add(layers.Dropout({{hyperopt.uniform(0, 1)}}))

    # If we choose 'four', add an additional fourth layer
    if {{hyperopt.choice(['three', 'four'])}} == 'four':
        model.add(layers.Dense(100))
        model.add({{
            hyperopt.choice([layers.Dropout(0.5),
                             layers.Activation('linear')])
        }})
        model.add(layers.Activation('relu'))

    model.add(layers.Dense(10))
    model.add(layers.Activation('softmax'))

    model.compile(loss='categorical_crossentropy',
                  optimizer={{hyperopt.choice(['rmsprop', 'adam', 'sgd'])}},
                  metrics=['accuracy'])

    model.fit(X_train,
              Y_train,
              batch_size={{hyperopt.choice([64, 128])}},
              nb_epoch=1,
              verbose=2,
              validation_data=(X_test, Y_test))
    score, acc = model.evaluate(X_test, Y_test, verbose=0)
    print('Test accuracy:', acc)
    return {'loss': -acc, 'status': hyperopt.STATUS_OK, 'model': model}
Пример #2
0
    FN = CM[1][0]
    TP = CM[1][1]
    FP = CM[0][1]
    print("TP = {}".format(TP))
    print("FP = {}".format(FP))
    print("FN = {}".format(FN))

    f1 = 2. * TP / (2. * TP + FP + FN)
    print("F1 : ", f1)

    return {'loss': 1 - f1, 'status': STATUS_OK}


space = {
    'n_estimators': hp.choice('n_estimators', np.arange(200,
                                                        501,
                                                        25,
                                                        dtype=int)),
    'max_depth': hp.choice('max_depth', np.arange(15, 20, dtype=int)),
    'max_features': hp.choice('max_features', np.arange(15, 30, dtype=int)),
    'mss': hp.choice('mss', np.arange(2, 40, 1, dtype=int)),
    'cw': hp.uniform('cw', 1, 5),
    'msl': hp.choice('msl', np.arange(1, 11, dtype=int))
}

trials = Trials()
best = fmin(fn=objective,
            space=space,
            algo=tpe.suggest,
            max_evals=100,
            trials=trials)
def objective(x):
    return {
        'loss': x**2,
        'status': STATUS_OK,
        'eval_time': time.time(),
        'other_stuff': {
            'type': None,
            'value': [0, 1, 2]
        },
        'attachments': {
            'time_module': pickle.dumps(time.time)
        }
    }


trials = Trials()
best = fmin(objective,
            space=hp.uniform('x', -10, 10),
            algo=tpe.suggest,
            max_evals=100,
            trials=trials)
print(best)

space = hp.choice('a', [('case 1', 1 + hp.lognormal('c1', 0, 1)),
                        ('case 2', hp.uniform('c2', -10, 10))])

import hyperopt.pyll.stochastic

print(hyperopt.pyll.stochastic.sample(space))
Пример #4
0
            plt.show()

    return -best_AUC


# =========use library "hyperopt" to finetune the hyerparameters==============

from hyperopt import fmin, tpe, hp, partial

batch_list = [32, 64, 128]

for dist in [5.]:
    space = {
        "lr_rate": hp.uniform("lr_rate", 0.0005, 0.01),
        "dp_out": hp.uniform("dp_out", 0.5, 1),
        "bt_size": hp.choice("bt_size", batch_list),
        "distance": hp.choice("distance", [dist])
    }
    # algo = partial(tpe.suggest, n_startup_jobs=10)
    try:
        best = fmin(main, space, algo=tpe.suggest, max_evals=50)
        best["bt_size"] = batch_list[best["bt_size"]]
        best["distance"] = dist
        best_AUC = -main(best)
        with open('finetune.txt', 'a') as f:
            f.write(
                "At distance {}, the best AUC is {}, its lr_rate is {}, drop_out is {}, batch_size is {}\n\n"
                .format(dist, best_AUC, best["lr_rate"], best["dp_out"],
                        best["bt_size"]))
    except Exception as err:
        with open("error_info.txt", "a") as f:
Пример #5
0
    
    
    if booster == "gbtree":
        pred_test = model.predict(X_test)
    elif booster == "dart":
        pred_test = model.predict(X_test, ntree_limit = num_round)
        

    error= MSE(y_test,pred_test)
    r2=-r2_score(y_train,model.predict(X_train))
    
    return float(error)


# DEFINING SEARCH SPACE
search_space = {'booster': hp.choice('booster', ['gbtree',"dart"]),
        'n_estimators': hp.quniform('n_estimators', 50, 3000, 1),
        'eta': hp.uniform('eta', 0, 1),
        'gamma': hp.uniform('gamma', 1, 500),
        'max_depth': hp.quniform('max_depth', 3, 100, 1),
        'min_child_weight': hp.uniform('min_child_weight', 0, 100),
        'random_state': sample(scope.int(hp.quniform('random_state', 4, 8, 1))),
        'subsample': hp.uniform('subsample', 0, 1),
        'alpha': hp.uniform('alpha', 1, 8),
        'colsample_bytree': hp.uniform('colsample_bytree', 0, 1),
        'sample_type': hp.choice('sample_type', ['uniform', 'weighted']),
        'normalize_type': hp.choice('normalize_type', ['tree', 'forest']),
        'grow_policy': hp.choice('grow_policy', ['depthwise', 'lossguide']),
        'rate_drop': hp.uniform('rate_drop', 0, 1),
        'skip_drop': hp.uniform('skip_drop', 0, 1),
        'colsample_bylevel':  hp.uniform('colsample_bylevel', 0, 1),
    hp.quniform("max_depth", 1, 10, 1),
    "subsample":
    hp.quniform("subsample", 0.1, 1, 0.05),
    "colsample_bytree":
    1,
    "colsample_bylevel":
    hp.quniform("colsample_bylevel", 0.1, 1, 0.05),
    "nthread":
    xgb_nthread,
    "seed":
    xgb_random_seed,
}

param_space_clf_skl_random_logistic = {
    "alpha": hp.loguniform("alpha", np.log(0.01), np.log(20)),
    "normalize": hp.choice("normalize", [True, False]),
    "poly": hp.choice("poly", [False]),
    "n_estimators": hp.quniform("n_estimators", 2, 50, 2),
    "max_features": hp.quniform("max_features", 0.1, 1, 0.05),
    "bootstrap": hp.choice("bootstrap", [True, False]),
    "subsample": hp.quniform("subsample", 0.5, 1, 0.05),
    "random_state": skl_random_seed
}

## linear support vector classifier
param_space_clf_skl_lsvc = {
    "normalize":
    hp.choice("normalize", [True, False]),
    "C":
    hp.loguniform("C", np.log(1), np.log(100)),
    "epsilon":
Пример #7
0
def lgb_tuning(lgb_cv,N_FOLDS=5,MAX_EVALS=100,output_file='bayes_test.csv',metric='auc',objection='binary',groups=None):
    def objective(hyperparameters,groups=groups):
        # Keep track of evals
        ITERATION =0

        # Using early stopping to find number of trees trained
        if 'n_estimators' in hyperparameters:
            del hyperparameters['n_estimators']

        # Retrieve the subsample
        subsample = hyperparameters['boosting_type'].get('subsample', 1.0)

        # Extract the boosting type and subsample to top level keys
        hyperparameters['boosting_type'] = hyperparameters['boosting_type']['boosting_type']
        hyperparameters['subsample'] = subsample

        # Make sure parameters that need to be integers are integers
        for parameter_name in ['num_leaves', 'subsample_for_bin', 'min_child_samples','max_depth']:
            hyperparameters[parameter_name] = int(hyperparameters[parameter_name])
        hyperparameters['objective']=objection
        #hyperparameters['verbose']=-1
        start = timer()
        
        # Perform n_folds cross validation
        if groups:           
            groups=lgb_cv.get_group()
            folds=GroupKFold().split(lgb_cv.get_label(),groups=groups)
        else:
            folds=None
        
        if metric.lower()=='map':
            hyperparameters['eval_at']=1
        
        
        cv_results = lgb.cv(hyperparameters, lgb_cv, num_boost_round = 4000, nfold = N_FOLDS,folds=folds,\
                            early_stopping_rounds=300, metrics = metric)

        run_time = timer() - start
        
        score_key=sorted(cv_results.keys())[0]
        # Extract the best score
        best_score = cv_results[score_key][-1]

        # Loss must be minimized
        if metric=='binary_error':
            loss=best_score
        else:
            loss = 1 - best_score

        # Boosting rounds that returned the highest cv score
        n_estimators = len(cv_results[score_key])

        # Add the number of estimators to the hyperparameters
        hyperparameters['n_estimators'] = n_estimators

        # Write to the csv file ('a' means append)
        of_connection = open(OUT_FILE, 'a')
        writer = csv.writer(of_connection)
        writer.writerow([loss, hyperparameters, ITERATION, run_time, best_score])
        of_connection.close()

        # Dictionary with information for evaluation
        return {'loss': loss, 'hyperparameters': hyperparameters, 'iteration': ITERATION,
                'train_time': run_time, 'status': STATUS_OK}
    
    # Define the search space
    space = {
        'boosting_type': hp.choice('boosting_type', 
                                    [{'boosting_type': 'gbdt', 'subsample': hp.uniform('gdbt_subsample', 0.5, 1)}, 
                                    {'boosting_type': 'dart', 'subsample': hp.uniform('dart_subsample', 0.5, 1)},
                                         {'boosting_type': 'goss', 'subsample': 1.0}]),
        'num_leaves': hp.quniform('num_leaves', 20, 200, 4),
        'learning_rate': hp.loguniform('learning_rate', np.log(0.005), np.log(0.5)),
        #'subsample_for_bin': hp.quniform('subsample_for_bin', 20000, 300000, 20000),
        'min_child_samples': hp.quniform('min_child_samples', 20, 300, 5),
        'reg_alpha': hp.uniform('reg_alpha', 0.0, 0.2),
        'reg_lambda': hp.uniform('reg_lambda', 0.0, 0.2),
        #'colsample_bytree': hp.uniform('colsample_by_tree', 0.6, 1.0),
        'is_unbalance': hp.choice('is_unbalance', [True, False]),
        'max_depth': hp.quniform('max_depth', 4, 8, 1)
        }
       

    # Create the algorithm
    tpe_algorithm = tpe.suggest
    # Record results
    trials = Trials()
    
    
    # Create a file and open a connection
    OUT_FILE = output_file
    of_connection = open(OUT_FILE, 'w')
    writer = csv.writer(of_connection)


    # Write column names
    headers = ['loss', 'hyperparameters', 'iteration', 'runtime', 'score']
    writer.writerow(headers)
    of_connection.close()
    #global  ITERATION

    ITERATION = 0
    # Run optimization
    best = fmin(fn = objective, space = space, algo = tpe.suggest, trials = trials,
                max_evals = MAX_EVALS)
    return best