Пример #1
0
def gaussian_nb_model_tpe():
    estim = HyperoptEstimator(classifier=gaussian_nb('my_clf'),
                              preprocessing=[pca('my_pca')],
                              algo=tpe.suggest,
                              max_evals=150,
                              trial_timeout=60,
                              verbose=0)
    estim.fit(x_train, y_train)
    print("f1score", f1_score(estim.predict(x_test), y_test))
    print("accuracy score", accuracy_score(estim.predict(x_test), y_test))
    print(estim.best_model())
Пример #2
0
def bench_classifiers(name):
    classifiers = [
        ada_boost(name + '.ada_boost'),  # boo
        gaussian_nb(name + '.gaussian_nb'),  # eey
        knn(name + '.knn', sparse_data=True),  # eey
        linear_discriminant_analysis(name + '.linear_discriminant_analysis', n_components=1),  # eey
        random_forest(name + '.random_forest'),  # boo
        sgd(name + '.sgd')  # eey
    ]
    if xgboost:
        classifiers.append(xgboost_classification(name + '.xgboost'))  # boo
    return hp.choice('%s' % name, classifiers)
Пример #3
0
def tpe_classifier(name='clf'):
    linear_svc_space = hp.choice('liblinear_combination',
                                 [{'penalty': "l1", 'loss': "squared_hinge", 'dual': False},
                                  {'penalty': "l2", 'loss': "hinge", 'dual': True},
                                  {'penalty': "l2", 'loss': "squared_hinge", 'dual': True},
                                  {'penalty': "l2", 'loss': "squared_hinge", 'dual': False}])
    return hp.choice(name,
                     [gaussian_nb('hpsklearn_gaussian_nb'),
                      liblinear_svc('hpsklearn_liblinear_svc',
                                    C=hp.choice('hpsklearn_liblinear_svc_c',
                                                [1e-4, 1e-3, 1e-2, 1e-1, 0.5, 1., 5., 10., 15., 20., 25.]),
                                    loss=linear_svc_space['loss'],
                                    penalty=linear_svc_space['penalty'],
                                    dual=linear_svc_space['dual'],
                                    tol=hp.choice('hpsklearn_liblinear_svc_tol', [1e-5, 1e-4, 1e-3, 1e-2, 1e-1])
                                    ),
                      decision_tree('decision_tree',
                                    criterion=hp.choice('decision_tree_criterion', ["gini", "entropy"]),
                                    max_depth=hp.randint('decision_tree_max_depth', 10) + 1,
                                    min_samples_split=hp.randint('decision_tree_min_samples_split', 19) + 2,
                                    min_samples_leaf=hp.randint('decision_tree_min_samples_leaf', 20) + 1),
                      knn('knn',
                          n_neighbors=hp.randint('knn_n', 100) + 1,
                          weights=hp.choice('knn_weights', ['uniform', 'distance']),
                          p=hp.choice('knn_p', [1, 2])),
                      extra_trees('et',
                                  n_estimators=100,
                                  criterion=hp.choice('et_criterion', ["gini", "entropy"]),
                                  max_features=hp.randint('et_max_features', 20) * 0.05 + 0.05,
                                  min_samples_split=hp.randint('et_min_samples_split', 19) + 2,
                                  min_samples_leaf=hp.randint('et_min_samples_leaf', 20) + 1,
                                  bootstrap=hp.choice('et_bootstrap', [True, False])),
                      random_forest('rf',
                                    n_estimators=100,
                                    criterion=hp.choice('rf_criterion', ["gini", "entropy"]),
                                    max_features=hp.randint('rf_max_features', 20) * 0.05 + 0.05,
                                    min_samples_split=hp.randint('rf_min_samples_split', 19) + 2,
                                    min_samples_leaf=hp.randint('rf_min_samples_leaf', 20) + 1,
                                    bootstrap=hp.choice('rf_bootstrap', [True, False])),
                      gradient_boosting('gb',
                                        n_estimators=100,
                                        learning_rate=hp.choice('gb_lr', [1e-3, 1e-2, 1e-1, 0.5, 1.]),
                                        max_depth=hp.randint('gb_max_depth', 10) + 1,
                                        min_samples_split=hp.randint('gb_min_samples_split', 19) + 2,
                                        min_samples_leaf=hp.randint('gb_min_samples_leaf', 20) + 1,
                                        subsample=hp.randint('gb_subsample', 20) * 0.05 + 0.05,
                                        max_features=hp.randint('gb_max_features', 20) * 0.05 + 0.05,
                                        )
                      ])
def main():

    df_train = pd.read_csv('../train_dataset.csv')
    df_test = pd.read_csv('../test_dataset.csv')

    X_train, y_train = df_train.iloc[:, 2:].values, df_train.iloc[:, 0].values
    X_test, y_test = df_test.iloc[:, 2:].values, df_test.iloc[:, 0].values

    estim = HyperoptEstimator(classifier=gaussian_nb('myNB'),
                              algo=tpe.suggest,
                              max_evals=150,
                              trial_timeout=120,
                              verbose=True)

    estim.fit(X_train, y_train)

    print("\n\n{}\n\n".format(estim.score(X_test, y_test)))
    print("\n\n{}\n\n".format(estim.best_model()))
Пример #5
0
                   Y_val_mini.sum(axis=0),
                   Y_test_mini.sum(axis=0)
               ])))
        print(seed_val)

        print("\ndata is loaded  - next step > model testing\n")

        n_job = 6
        select_classes = [0, 1, 2, 3, 4, 5]
        val_dist = X_val_mini.shape[0] / X_train_mini.shape[0]
        name = 'my_est_oVa'

        tic_mod_all = time.time()
        select_alg = [
            ada_boost(name + '.ada_boost'),
            gaussian_nb(name + '.gaussian_nb'),
            knn(name + '.knn', sparse_data=True),
            linear_discriminant_analysis(name +
                                         '.linear_discriminant_analysis',
                                         n_components=1),
            random_forest(name + '.random_forest'),
            sgd(name + '.sgd'),
            xgboost_classification(name + '.xgboost')
        ]

        # fitting models
        estim_one_vs_rest = dict()
        # scoring models
        algo_scoring = dict()
        save_score_path = r'C:/Users/anden/PycharmProjects/NovelEEG/results'
        for alg in [select_alg[args.index]]:
Пример #6
0
        print("\ndata is loaded  - next step > model testing\n")

        n_job = 6
        select_classes = [0, 1, 2, 3, 4, 5]
        val_dist = X_val_mini.shape[0] / X_train_mini.shape[0]
        name = 'my_est_oVa'

        tic_mod_all = time.time()
        #select_alg = [ada_boost(name + '.ada_boost'),
        #              gaussian_nb(name + '.gaussian_nb'),
        #              knn(name + '.knn', sparse_data=True),
        #              linear_discriminant_analysis(name + '.linear_discriminant_analysis', n_components=1),
        #              random_forest(name + '.random_forest'),
        #              sgd(name + '.sgd'),
        #              xgboost_classification(name + '.xgboost')]
        select_alg = [gaussian_nb(name + '.gaussian_nb')]

        # fitting models
        estim_one_vs_rest = dict()
        # scoring models
        algo_scoring = dict()
        save_score_path = os.getcwd() + r'/tmp'
        for alg in select_alg:
            tic_mod = time.time()
            print("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n",
                  "running on %s" % (alg.name + '.one_V_all'),
                  "\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~")
            clf_method = one_vs_rest(str(alg.name + '.one_V_all'),
                                     estimator=alg,
                                     n_jobs=n_job)
            estim_one_vs_rest[alg.name + '.one_V_all'] = HyperoptEstimator(