Пример #1
0
def test_bank_svmguide1():
    print("========== Test BaNK on svmguide1 dataset ==========")

    data_name = 'svmguide1'

    (x_train, y_train), (x_test, y_test) = demo.load_svmguide1()

    learner = BaNK(gamma=1.0 / 1.25727,
                   rf_dim=384,
                   inner_regularization=0.125,
                   outer_regularization=0.125,
                   alpha=10.0,
                   kappa=0.1,
                   inner_max_loop=1,
                   max_outer_loop=5,
                   batch_size=5)

    learner.fit(x_train, y_train)

    y_train_pred = learner.predict(x_train)
    y_test_pred = learner.predict(x_test)
    print("Dataset: {}".format(data_name))
    print("Training error = %.4f" %
          (1 - metrics.accuracy_score(y_train, y_train_pred)))
    print("Test error = %.4f" %
          (1 - metrics.accuracy_score(y_test, y_test_pred)))
Пример #2
0
def test_svmguide1(show_figure=False, block_figure_on_end=False):
    print("========== Test SVRG on svmguide1 dataset ==========")

    data_name = 'svmguide1'

    (x_train, y_train), (x_test, y_test) = demo.load_svmguide1()

    loss_display = Display(
        freq=1,
        dpi='auto',
        show=show_figure,
        block_on_end=block_figure_on_end,
        monitor=[{'metrics': ['train_loss', 'obj_func'],
                  'type': 'line',
                  'title': "Learning losses",
                  'xlabel': "X1",
                  'ylabel': "X2",
                  }]
    )

    print(x_train.shape)

    learner = SVRG(
        mode='online',
        regular_param=0.01,
        learning_rate_scale=0.7,
        gamma=2.0,
        rf_dim=400,
        num_epochs=1,
        cache_size=50,
        freq_update_full_model=100,
        oracle=SVRG.COVERAGE,
        core_max=100,
        coverage_radius=0.9,
        loss_func=SVRG.LOGISTIC,
        smooth_hinge_theta=0.5,
        smooth_hinge_tau=0.5,
        callbacks=[loss_display],
        metrics=['train_loss', 'obj_func'],
        freq_calc_metrics=300,
        random_state=random_seed()
    )

    learner.fit(x_train, y_train)

    y_train_pred = learner.predict(x_train)
    y_test_pred = learner.predict(x_test)
    print("Dataset: {}".format(data_name))
    print("Training error = %.4f" % (1 - metrics.accuracy_score(y_train, y_train_pred)))
    print("Test error = %.4f" % (1 - metrics.accuracy_score(y_test, y_test_pred)))
Пример #3
0
def test_sgd_svmguide1_bin():
    (x_train, y_train), (x_test, y_test) = demo.load_svmguide1()

    learner = KSGD(lbd=0.1,
                   eps=0.01,
                   gamma=2,
                   kernel='gaussian',
                   loss='hinge',
                   batch_size=10,
                   avg_weight=False,
                   random_state=random_seed())

    learner.fit(x_train, y_train)

    train_err = 1.0 - learner.score(x_train, y_train)
    test_err = 1.0 - learner.score(x_test, y_test)
    print("Training error = %.4f" % train_err)
    print("Testing error = %.4f" % test_err)
def test_svmguide1():
    print("========== Test IGKOL on svmguide1 dataset ==========")

    data_name = 'svmguide1'

    (x_train, y_train), (x_test, y_test) = demo.load_svmguide1()

    display = Display(
        freq=1,
        dpi='auto',
        block_on_end=True,
        monitor=[{'metrics': ['mistake_rate'],
                  'type': 'line',
                  'title': "Mistake Rate",
                  'xlabel': "data points",
                  'ylabel': "Error",
                  }]
    )

    learner = OnlineDualSVRG(
        regular_param=0.01,
        learning_rate_scale=1.0,
        gamma=2.0,
        rf_dim=400,
        cache_size=50,
        freq_update_full_model=10,
        oracle='budget',
        core_max=100,
        coverage_radius=0.5,
        loss_func='hinge',
        smooth_hinge_theta=0.5,
        smooth_hinge_tau=0.5,
        callbacks=[display],
        metrics=['mistake_rate'],
        random_state=random_seed())

    learner.fit(x_train, y_train)

    print('OFFLINE Testing')
    y_train_pred = learner.predict(x_train)
    y_test_pred = learner.predict(x_test)
    print("Dataset: {}".format(data_name))
    print("Training error = %.4f" % (1 - metrics.accuracy_score(y_train, y_train_pred)))
    print("Test error = %.4f" % (1 - metrics.accuracy_score(y_test, y_test_pred)))
Пример #5
0
def run_svmguide1_gridsearch():
    print("========== Tune parameters for SVRG for classification ==========")

    data_name = 'svmguide1'

    (x_total, y_total), (x_test, y_test) = demo.load_svmguide1()

    n_total = x_total.shape[0]
    percent = 0.8
    n_train = int(percent * n_total)
    idx_train = np.random.permutation(n_total)[:n_train]
    mask = np.zeros(n_total, dtype=bool)
    mask[idx_train] = True
    x_train = x_total[mask, :]
    x_valid = x_total[~mask, :]

    y_train = y_total[mask]
    y_valid = y_total[~mask]

    print("Number of training samples = {}".format(x_train.shape[0]))
    print("Number of testing samples = {}".format(x_valid.shape[0]))

    x = np.vstack((x_train, x_valid))
    y = np.concatenate((y_train, y_valid))

    params = {'regular_param': [0.0001, 0.00001],
              'gamma': [0.25, 0.5]}

    ps = PredefinedSplit(test_fold=[-1] * x_train.shape[0] + [1] * x_valid.shape[0])

    clf = SVRG(
        mode='batch',
        regular_param=0.01,
        learning_rate_scale=0.8,
        gamma=2.0,
        rf_dim=400,
        num_epochs=1,
        cache_size=50,
        freq_update_full_model=100,
        oracle=SVRG.COVERAGE,
        core_max=100,
        coverage_radius=0.9,
        loss_func=SVRG.LOGISTIC,
        smooth_hinge_theta=0.5,
        smooth_hinge_tau=0.5,
        random_state=random_seed()
    )

    gs = GridSearchCV(clf, params, cv=ps, n_jobs=-1, refit=False, verbose=True)
    gs.fit(x, y)

    print("Best error {} @ params {}".format(1.0 - gs.best_score_, gs.best_params_))

    # best param prediction
    print("Best param prediction")
    best_clf = clone(clf).set_params(**gs.best_params_)
    best_clf.fit(x_total, y_total)

    y_total_pred = best_clf.predict(x_total)
    y_test_pred = best_clf.predict(x_test)
    total_err = 1 - metrics.accuracy_score(y_total, y_total_pred)
    test_err = 1 - metrics.accuracy_score(y_test, y_test_pred)
    print("Training error = %.4f" % total_err)
    print("Testing error = %.4f" % test_err)