Exemplo n.º 1
0
def test_dualsgd_regression():
    print("========== Test DualSGD for Regression ==========")

    np.random.seed(random_seed())

    (x_train, y_train), (x_test, y_test) = demo.load_housing()
    print("Number of training samples = {}".format(x_train.shape[0]))
    print("Number of testing samples = {}".format(x_test.shape[0]))

    est = DualSGD(model_name="DualSGD_eps_insensitive",
                  k=20,
                  D=200,
                  gamma=1.0,
                  eps=0.001,
                  lbd=0.00128,
                  loss='eps_insensitive',
                  maintain='k-merging',
                  max_budget_size=100,
                  random_state=random_seed())
    est.fit(x_train, y_train)

    print("Mistake rate = %.4f" % est.mistake)
    print("Budget size = %d" % est.budget_size)

    # offline prediction
    print("Offline prediction")
    y_train_pred = est.predict(x_train)
    y_test_pred = est.predict(x_test)
    train_err = metrics.mean_squared_error(y_train, y_train_pred)
    test_err = metrics.mean_squared_error(y_test, y_test_pred)
    print("Training error = %.4f" % train_err)
    print("Testing error = %.4f" % test_err)
Exemplo n.º 2
0
def test_glm_regression():
    print("========== Test GLM for regression ==========")

    np.random.seed(random_seed())

    (x_train, y_train), (x_test, y_test) = demo.load_housing()
    print("Number of training samples = {}".format(x_train.shape[0]))
    print("Number of testing samples = {}".format(x_test.shape[0]))

    clf = GLM(
        model_name="GLM_regression",
        task='regression',
        link='linear',  # link function
        loss='quadratic',  # loss function
        l2_penalty=0.0,  # ridge regularization
        l1_penalty=0.0,  # Lasso regularization
        l1_smooth=1E-5,  # smoothing for Lasso regularization
        l1_method='pseudo_huber',  # approximation method for L1-norm
        random_state=random_seed())

    clf.fit(x_train, y_train)

    train_err = -clf.score(x_train, y_train)
    test_err = -clf.score(x_test, y_test)
    print("Training MSE = %.4f" % train_err)
    print("Testing MSE = %.4f" % test_err)
Exemplo n.º 3
0
def test_tfglm_regression_gridsearch():
    print(
        "========== Tune parameters for TensorFlowGLM for regression =========="
    )

    np.random.seed(random_seed())

    (x_train, y_train), (x_test, y_test) = demo.load_housing()
    print("Number of training samples = {}".format(x_train.shape[0]))
    print("Number of testing samples = {}".format(x_test.shape[0]))

    x = np.vstack((x_train, x_test))
    y = np.concatenate((y_train, y_test))

    params = {'l1_penalty': [0.0, 0.0001], 'l2_penalty': [0.0001, 0.001, 0.01]}

    ps = PredefinedSplit(test_fold=[-1] * x_train.shape[0] +
                         [1] * x_test.shape[0])

    clf = TensorFlowGLM(
        model_name="TensorFlowGLM_regression_gridsearch",
        task='regression',
        link='linear',  # link function
        loss='quadratic',  # loss function
        l2_penalty=0.0,  # ridge regularization
        l1_penalty=0.0,  # Lasso regularization
        l1_smooth=1E-5,  # smoothing for Lasso regularization
        l1_method='pseudo_huber',  # approximation method for L1-norm
        learning_rate=0.0001,
        catch_exception=True,
        random_state=random_seed())

    gs = GridSearchCV(clf, params, cv=ps, n_jobs=1, refit=False, verbose=True)
    gs.fit(x, y)

    print("Best MSE {} @ params {}".format(-gs.best_score_, gs.best_params_))

    best_clf = clone(clf).set_params(**gs.best_params_)
    best_clf.fit(x_train, y_train)

    train_err = -best_clf.score(x_train, y_train)
    test_err = -best_clf.score(x_test, y_test)
    print("Training MSE = %.4f" % train_err)
    print("Testing MSE = %.4f" % test_err)
    assert abs(test_err + gs.best_score_) < 1e-4
Exemplo n.º 4
0
def test_fogd_regression_gridsearch():
    print("========== Tune parameters for FOGD for regression ==========")

    np.random.seed(random_seed())

    (x_train, y_train), (x_test, y_test) = demo.load_housing()
    print("Number of training samples = {}".format(x_train.shape[0]))
    print("Number of testing samples = {}".format(x_test.shape[0]))

    x = np.vstack((x_train, x_test))
    y = np.concatenate((y_train, y_test))

    params = {'gamma': [0.5, 1.0], 'learning_rate': [0.01, 0.5, 0.1]}

    ps = PredefinedSplit(test_fold=[-1] * x_train.shape[0] +
                         [1] * x_test.shape[0])

    clf = FOGD(model_name="FOGD_l2",
               D=100,
               lbd=0.0,
               gamma=0.5,
               loss='l2',
               catch_exception=True,
               random_state=random_seed())

    gs = GridSearchCV(clf, params, cv=ps, n_jobs=-1, refit=False, verbose=True)
    gs.fit(x, y)

    print("Best MSE {} @ params {}".format(-gs.best_score_, gs.best_params_))

    best_clf = clone(clf).set_params(**gs.best_params_)
    best_clf.fit(x_train, y_train)

    print("Mistake rate = %.4f" % best_clf.mistake)

    # offline prediction
    print("Offline prediction")
    y_train_pred = best_clf.predict(x_train)
    y_test_pred = best_clf.predict(x_test)
    train_err = metrics.mean_squared_error(y_train, y_train_pred)
    test_err = metrics.mean_squared_error(y_test, y_test_pred)
    print("Training error = %.4f" % train_err)
    print("Testing error = %.4f" % test_err)
Exemplo n.º 5
0
def test_rks_regression_gridsearch():
    print("========== Tune parameters for RKS for regression ==========")

    np.random.seed(random_seed())

    (x_train, y_train), (x_test, y_test) = demo.load_housing()
    print("Number of training samples = {}".format(x_train.shape[0]))
    print("Number of testing samples = {}".format(x_test.shape[0]))

    x = np.vstack((x_train, x_test))
    y = np.concatenate((y_train, y_test))

    params = {'gamma': [0.5, 1.0], 'learning_rate': [0.01, 0.3, 0.1]}

    ps = PredefinedSplit(test_fold=[-1] * x_train.shape[0] +
                         [1] * x_test.shape[0])

    clf = RKS(model_name="RKS_regression_gridsearch",
              D=100,
              lbd=0.0,
              gamma=0.5,
              loss='l2',
              num_epochs=10,
              catch_exception=True,
              random_state=random_seed())

    gs = GridSearchCV(clf, params, cv=ps, n_jobs=-1, refit=False, verbose=True)
    gs.fit(x, y)

    print("Best MSE {} @ params {}".format(-gs.best_score_, gs.best_params_))

    best_clf = clone(clf).set_params(**gs.best_params_)
    best_clf.fit(x_train, y_train)

    train_err = -best_clf.score(x_train, y_train)
    test_err = -best_clf.score(x_test, y_test)
    print("Training MSE = %.4f" % train_err)
    print("Testing MSE = %.4f" % test_err)
    assert abs(test_err + gs.best_score_) < 1e-4
Exemplo n.º 6
0
def test_kmm_regression_gridsearch():
    print("========== Tune parameters for KMM for regression ==========")

    np.random.seed(random_seed())

    (x_train, y_train), (x_test, y_test) = demo.load_housing()
    print("Number of training samples = {}".format(x_train.shape[0]))
    print("Number of testing samples = {}".format(x_test.shape[0]))

    x = np.vstack((x_train, x_test))
    y = np.concatenate((y_train, y_test))

    params = {'gamma': [0.5, 1.0], 'num_kernels': [1, 2, 4]}

    ps = PredefinedSplit(test_fold=[-1] * x_train.shape[0] +
                         [1] * x_test.shape[0])

    clf = KMM(model_name="KMM_l2",
              D=4,
              lbd=0.01,
              gamma=0.01,
              mode='batch',
              loss='l2',
              num_kernels=4,
              batch_size=100,
              temperature=0.1,
              num_epochs=10,
              num_nested_epochs=1,
              learning_rate=0.001,
              learning_rate_mu=0.001,
              learning_rate_gamma=0.001,
              learning_rate_alpha=0.001,
              catch_exception=True,
              random_state=random_seed())

    gs = GridSearchCV(clf, params, cv=ps, n_jobs=-1, refit=False, verbose=True)
    gs.fit(x, y)

    print("Best MSE {} @ params {}".format(-gs.best_score_, gs.best_params_))

    best_clf = clone(clf).set_params(**gs.best_params_)
    best_clf.fit(x_train, y_train)

    y_train_pred = best_clf.predict(x_train)
    y_test_pred = best_clf.predict(x_test)
    train_err = metrics.mean_squared_error(y_train, y_train_pred)
    test_err = metrics.mean_squared_error(y_test, y_test_pred)
    print("Training error = %.4f" % train_err)
    print("Testing error = %.4f" % test_err)
    assert abs(test_err + gs.best_score_) < 1e-2

    clf = KMM(model_name="KMM_l2",
              D=4,
              lbd=0.01,
              gamma=0.01,
              mode='online',
              loss='l2',
              num_kernels=4,
              batch_size=100,
              temperature=0.1,
              num_epochs=10,
              num_nested_epochs=1,
              learning_rate=0.001,
              learning_rate_mu=0.001,
              learning_rate_gamma=0.001,
              learning_rate_alpha=0.001,
              catch_exception=True,
              random_state=random_seed())

    gs = GridSearchCV(clf, params, cv=ps, n_jobs=-1, refit=False, verbose=True)
    gs.fit(x, y)

    print("Best MSE {} @ params {}".format(-gs.best_score_, gs.best_params_))

    best_clf = clone(clf).set_params(**gs.best_params_)
    best_clf.fit(x_train, y_train)
    print("Mistake rate = %.4f" % best_clf.mistake)
    assert abs(best_clf.mistake + gs.best_score_) < 1e-2
Exemplo n.º 7
0
def test_ssrbm_regression(show_figure=False, block_figure_on_end=False):
    print("========== Test Semi-Supervised RBM for Classification ==========")

    num_labeled_data = 100

    from sklearn.svm import SVR
    from sklearn.metrics import mean_squared_error

    np.random.seed(random_seed())

    (x_train, y_train), (x_test, y_test) = demo.load_housing()

    # remove some labels
    idx_train, idx_test = next(
        iter(
            ShuffleSplit(n_splits=1,
                         test_size=num_labeled_data,
                         random_state=random_seed()).split(x_train, y_train)))
    y_train[idx_train] = 10**8

    x = np.vstack([x_train, x_test])
    y = np.concatenate([y_train, y_test])

    learning_display = Display(
        title="Learning curves",
        dpi='auto',
        layout=(3, 1),
        freq=1,
        show=show_figure,
        block_on_end=block_figure_on_end,
        monitor=[
            {
                'metrics': ['recon_err', 'val_recon_err'],
                'type': 'line',
                'labels': ["training recon error", "validation recon error"],
                'title': "Reconstruction Errors",
                'xlabel': "epoch",
                'ylabel': "error",
            },
            {
                'metrics': ['loss', 'val_loss'],
                'type': 'line',
                'labels': ["training loss", "validation loss"],
                'title': "Learning Losses",
                'xlabel': "epoch",
                'ylabel': "loss",
            },
            {
                'metrics': ['err', 'val_err'],
                'type': 'line',
                'labels': ["training error", "validation error"],
                'title': "Prediction Errors",
                'xlabel': "epoch",
                'ylabel': "error",
            },
            # {'metrics': ['loglik_csl', 'val_loglik_csl'],
            #  'type': 'line',
            #  'labels': ["training loglik (CSL)", "validation loglik (CSL)"],
            #  'title': "Loglikelihoods using CSL",
            #  'xlabel': "epoch",
            #  'ylabel': "loglik",
            #  },
        ])

    filter_display = Display(
        title="Receptive Fields",
        dpi='auto',
        layout=(1, 1),
        figsize=(8, 8),
        freq=1,
        show=show_figure,
        block_on_end=block_figure_on_end,
        monitor=[
            {
                'metrics': ['filters'],
                'title': "Receptive Fields",
                'type': 'img',
                'num_filters': 9,
                # 'disp_dim': (28, 28),
                'tile_shape': (3, 3),
            },
        ])

    hidden_display = Display(title="Hidden Activations",
                             dpi='auto',
                             layout=(1, 1),
                             figsize=(8, 8),
                             freq=1,
                             show=show_figure,
                             block_on_end=block_figure_on_end,
                             monitor=[
                                 {
                                     'metrics': ['hidden_activations'],
                                     'title': "Hidden Activations",
                                     'type': 'img',
                                     'data': x_train[:100],
                                 },
                             ])

    early_stopping = EarlyStopping(monitor='val_loss', patience=2, verbose=1)
    filepath = os.path.join(
        model_dir(), "male/ssRBM/housing_{epoch:04d}_{val_loss:.6f}.pkl")
    checkpoint = ModelCheckpoint(filepath,
                                 mode='min',
                                 monitor='val_loss',
                                 verbose=0,
                                 save_best_only=True)
    model = SemiSupervisedRBM(task='regression',
                              num_hidden=10,
                              num_visible=x_train.shape[1],
                              batch_size=20,
                              num_epochs=4,
                              w_init=0.01,
                              learning_rate=0.01,
                              momentum_method='sudden',
                              weight_cost=0.0,
                              inference_engine='variational_inference',
                              approx_method='first_order',
                              random_state=random_seed(),
                              metrics=['recon_err', 'loss', 'err'],
                              callbacks=[
                                  filter_display, learning_display,
                                  hidden_display, early_stopping, checkpoint
                              ],
                              cv=[-1] * x_train.shape[0] +
                              [0] * x_test.shape[0],
                              verbose=1)

    model.fit(x, y)

    print("Test reconstruction error = %.4f" %
          model.get_reconstruction_error(x_test).mean())
    print("Test loss = %.4f" % model.get_loss(x_test, y_test))

    print("=========== Predicted by Semi-Supervised RBM ============")
    print("Train RMSE = {0:>1.4f}\tTest RMSE = {1:>1.4f}".format(
        -model.score(x_train, y_train), -model.score(x_test, y_test)))

    # fit a Support Vector Regressor
    s = SVR()
    s.fit(x_train, y_train)
    print("=========== Predicted by Support Vector Regressor ============")
    print("Train RMSE = {0:>1.4f}\tTest RMSE = {1:>1.4f}".format(
        np.sqrt(mean_squared_error(y_train, s.predict(x_train))),
        np.sqrt(mean_squared_error(y_test, s.predict(x_test)))))