def test_dualsgd_regression(): print("========== Test DualSGD for Regression ==========") np.random.seed(random_seed()) (x_train, y_train), (x_test, y_test) = demo.load_housing() print("Number of training samples = {}".format(x_train.shape[0])) print("Number of testing samples = {}".format(x_test.shape[0])) est = DualSGD(model_name="DualSGD_eps_insensitive", k=20, D=200, gamma=1.0, eps=0.001, lbd=0.00128, loss='eps_insensitive', maintain='k-merging', max_budget_size=100, random_state=random_seed()) est.fit(x_train, y_train) print("Mistake rate = %.4f" % est.mistake) print("Budget size = %d" % est.budget_size) # offline prediction print("Offline prediction") y_train_pred = est.predict(x_train) y_test_pred = est.predict(x_test) train_err = metrics.mean_squared_error(y_train, y_train_pred) test_err = metrics.mean_squared_error(y_test, y_test_pred) print("Training error = %.4f" % train_err) print("Testing error = %.4f" % test_err)
def test_glm_regression(): print("========== Test GLM for regression ==========") np.random.seed(random_seed()) (x_train, y_train), (x_test, y_test) = demo.load_housing() print("Number of training samples = {}".format(x_train.shape[0])) print("Number of testing samples = {}".format(x_test.shape[0])) clf = GLM( model_name="GLM_regression", task='regression', link='linear', # link function loss='quadratic', # loss function l2_penalty=0.0, # ridge regularization l1_penalty=0.0, # Lasso regularization l1_smooth=1E-5, # smoothing for Lasso regularization l1_method='pseudo_huber', # approximation method for L1-norm random_state=random_seed()) clf.fit(x_train, y_train) train_err = -clf.score(x_train, y_train) test_err = -clf.score(x_test, y_test) print("Training MSE = %.4f" % train_err) print("Testing MSE = %.4f" % test_err)
def test_tfglm_regression_gridsearch(): print( "========== Tune parameters for TensorFlowGLM for regression ==========" ) np.random.seed(random_seed()) (x_train, y_train), (x_test, y_test) = demo.load_housing() print("Number of training samples = {}".format(x_train.shape[0])) print("Number of testing samples = {}".format(x_test.shape[0])) x = np.vstack((x_train, x_test)) y = np.concatenate((y_train, y_test)) params = {'l1_penalty': [0.0, 0.0001], 'l2_penalty': [0.0001, 0.001, 0.01]} ps = PredefinedSplit(test_fold=[-1] * x_train.shape[0] + [1] * x_test.shape[0]) clf = TensorFlowGLM( model_name="TensorFlowGLM_regression_gridsearch", task='regression', link='linear', # link function loss='quadratic', # loss function l2_penalty=0.0, # ridge regularization l1_penalty=0.0, # Lasso regularization l1_smooth=1E-5, # smoothing for Lasso regularization l1_method='pseudo_huber', # approximation method for L1-norm learning_rate=0.0001, catch_exception=True, random_state=random_seed()) gs = GridSearchCV(clf, params, cv=ps, n_jobs=1, refit=False, verbose=True) gs.fit(x, y) print("Best MSE {} @ params {}".format(-gs.best_score_, gs.best_params_)) best_clf = clone(clf).set_params(**gs.best_params_) best_clf.fit(x_train, y_train) train_err = -best_clf.score(x_train, y_train) test_err = -best_clf.score(x_test, y_test) print("Training MSE = %.4f" % train_err) print("Testing MSE = %.4f" % test_err) assert abs(test_err + gs.best_score_) < 1e-4
def test_fogd_regression_gridsearch(): print("========== Tune parameters for FOGD for regression ==========") np.random.seed(random_seed()) (x_train, y_train), (x_test, y_test) = demo.load_housing() print("Number of training samples = {}".format(x_train.shape[0])) print("Number of testing samples = {}".format(x_test.shape[0])) x = np.vstack((x_train, x_test)) y = np.concatenate((y_train, y_test)) params = {'gamma': [0.5, 1.0], 'learning_rate': [0.01, 0.5, 0.1]} ps = PredefinedSplit(test_fold=[-1] * x_train.shape[0] + [1] * x_test.shape[0]) clf = FOGD(model_name="FOGD_l2", D=100, lbd=0.0, gamma=0.5, loss='l2', catch_exception=True, random_state=random_seed()) gs = GridSearchCV(clf, params, cv=ps, n_jobs=-1, refit=False, verbose=True) gs.fit(x, y) print("Best MSE {} @ params {}".format(-gs.best_score_, gs.best_params_)) best_clf = clone(clf).set_params(**gs.best_params_) best_clf.fit(x_train, y_train) print("Mistake rate = %.4f" % best_clf.mistake) # offline prediction print("Offline prediction") y_train_pred = best_clf.predict(x_train) y_test_pred = best_clf.predict(x_test) train_err = metrics.mean_squared_error(y_train, y_train_pred) test_err = metrics.mean_squared_error(y_test, y_test_pred) print("Training error = %.4f" % train_err) print("Testing error = %.4f" % test_err)
def test_rks_regression_gridsearch(): print("========== Tune parameters for RKS for regression ==========") np.random.seed(random_seed()) (x_train, y_train), (x_test, y_test) = demo.load_housing() print("Number of training samples = {}".format(x_train.shape[0])) print("Number of testing samples = {}".format(x_test.shape[0])) x = np.vstack((x_train, x_test)) y = np.concatenate((y_train, y_test)) params = {'gamma': [0.5, 1.0], 'learning_rate': [0.01, 0.3, 0.1]} ps = PredefinedSplit(test_fold=[-1] * x_train.shape[0] + [1] * x_test.shape[0]) clf = RKS(model_name="RKS_regression_gridsearch", D=100, lbd=0.0, gamma=0.5, loss='l2', num_epochs=10, catch_exception=True, random_state=random_seed()) gs = GridSearchCV(clf, params, cv=ps, n_jobs=-1, refit=False, verbose=True) gs.fit(x, y) print("Best MSE {} @ params {}".format(-gs.best_score_, gs.best_params_)) best_clf = clone(clf).set_params(**gs.best_params_) best_clf.fit(x_train, y_train) train_err = -best_clf.score(x_train, y_train) test_err = -best_clf.score(x_test, y_test) print("Training MSE = %.4f" % train_err) print("Testing MSE = %.4f" % test_err) assert abs(test_err + gs.best_score_) < 1e-4
def test_kmm_regression_gridsearch(): print("========== Tune parameters for KMM for regression ==========") np.random.seed(random_seed()) (x_train, y_train), (x_test, y_test) = demo.load_housing() print("Number of training samples = {}".format(x_train.shape[0])) print("Number of testing samples = {}".format(x_test.shape[0])) x = np.vstack((x_train, x_test)) y = np.concatenate((y_train, y_test)) params = {'gamma': [0.5, 1.0], 'num_kernels': [1, 2, 4]} ps = PredefinedSplit(test_fold=[-1] * x_train.shape[0] + [1] * x_test.shape[0]) clf = KMM(model_name="KMM_l2", D=4, lbd=0.01, gamma=0.01, mode='batch', loss='l2', num_kernels=4, batch_size=100, temperature=0.1, num_epochs=10, num_nested_epochs=1, learning_rate=0.001, learning_rate_mu=0.001, learning_rate_gamma=0.001, learning_rate_alpha=0.001, catch_exception=True, random_state=random_seed()) gs = GridSearchCV(clf, params, cv=ps, n_jobs=-1, refit=False, verbose=True) gs.fit(x, y) print("Best MSE {} @ params {}".format(-gs.best_score_, gs.best_params_)) best_clf = clone(clf).set_params(**gs.best_params_) best_clf.fit(x_train, y_train) y_train_pred = best_clf.predict(x_train) y_test_pred = best_clf.predict(x_test) train_err = metrics.mean_squared_error(y_train, y_train_pred) test_err = metrics.mean_squared_error(y_test, y_test_pred) print("Training error = %.4f" % train_err) print("Testing error = %.4f" % test_err) assert abs(test_err + gs.best_score_) < 1e-2 clf = KMM(model_name="KMM_l2", D=4, lbd=0.01, gamma=0.01, mode='online', loss='l2', num_kernels=4, batch_size=100, temperature=0.1, num_epochs=10, num_nested_epochs=1, learning_rate=0.001, learning_rate_mu=0.001, learning_rate_gamma=0.001, learning_rate_alpha=0.001, catch_exception=True, random_state=random_seed()) gs = GridSearchCV(clf, params, cv=ps, n_jobs=-1, refit=False, verbose=True) gs.fit(x, y) print("Best MSE {} @ params {}".format(-gs.best_score_, gs.best_params_)) best_clf = clone(clf).set_params(**gs.best_params_) best_clf.fit(x_train, y_train) print("Mistake rate = %.4f" % best_clf.mistake) assert abs(best_clf.mistake + gs.best_score_) < 1e-2
def test_ssrbm_regression(show_figure=False, block_figure_on_end=False): print("========== Test Semi-Supervised RBM for Classification ==========") num_labeled_data = 100 from sklearn.svm import SVR from sklearn.metrics import mean_squared_error np.random.seed(random_seed()) (x_train, y_train), (x_test, y_test) = demo.load_housing() # remove some labels idx_train, idx_test = next( iter( ShuffleSplit(n_splits=1, test_size=num_labeled_data, random_state=random_seed()).split(x_train, y_train))) y_train[idx_train] = 10**8 x = np.vstack([x_train, x_test]) y = np.concatenate([y_train, y_test]) learning_display = Display( title="Learning curves", dpi='auto', layout=(3, 1), freq=1, show=show_figure, block_on_end=block_figure_on_end, monitor=[ { 'metrics': ['recon_err', 'val_recon_err'], 'type': 'line', 'labels': ["training recon error", "validation recon error"], 'title': "Reconstruction Errors", 'xlabel': "epoch", 'ylabel': "error", }, { 'metrics': ['loss', 'val_loss'], 'type': 'line', 'labels': ["training loss", "validation loss"], 'title': "Learning Losses", 'xlabel': "epoch", 'ylabel': "loss", }, { 'metrics': ['err', 'val_err'], 'type': 'line', 'labels': ["training error", "validation error"], 'title': "Prediction Errors", 'xlabel': "epoch", 'ylabel': "error", }, # {'metrics': ['loglik_csl', 'val_loglik_csl'], # 'type': 'line', # 'labels': ["training loglik (CSL)", "validation loglik (CSL)"], # 'title': "Loglikelihoods using CSL", # 'xlabel': "epoch", # 'ylabel': "loglik", # }, ]) filter_display = Display( title="Receptive Fields", dpi='auto', layout=(1, 1), figsize=(8, 8), freq=1, show=show_figure, block_on_end=block_figure_on_end, monitor=[ { 'metrics': ['filters'], 'title': "Receptive Fields", 'type': 'img', 'num_filters': 9, # 'disp_dim': (28, 28), 'tile_shape': (3, 3), }, ]) hidden_display = Display(title="Hidden Activations", dpi='auto', layout=(1, 1), figsize=(8, 8), freq=1, show=show_figure, block_on_end=block_figure_on_end, monitor=[ { 'metrics': ['hidden_activations'], 'title': "Hidden Activations", 'type': 'img', 'data': x_train[:100], }, ]) early_stopping = EarlyStopping(monitor='val_loss', patience=2, verbose=1) filepath = os.path.join( model_dir(), "male/ssRBM/housing_{epoch:04d}_{val_loss:.6f}.pkl") checkpoint = ModelCheckpoint(filepath, mode='min', monitor='val_loss', verbose=0, save_best_only=True) model = SemiSupervisedRBM(task='regression', num_hidden=10, num_visible=x_train.shape[1], batch_size=20, num_epochs=4, w_init=0.01, learning_rate=0.01, momentum_method='sudden', weight_cost=0.0, inference_engine='variational_inference', approx_method='first_order', random_state=random_seed(), metrics=['recon_err', 'loss', 'err'], callbacks=[ filter_display, learning_display, hidden_display, early_stopping, checkpoint ], cv=[-1] * x_train.shape[0] + [0] * x_test.shape[0], verbose=1) model.fit(x, y) print("Test reconstruction error = %.4f" % model.get_reconstruction_error(x_test).mean()) print("Test loss = %.4f" % model.get_loss(x_test, y_test)) print("=========== Predicted by Semi-Supervised RBM ============") print("Train RMSE = {0:>1.4f}\tTest RMSE = {1:>1.4f}".format( -model.score(x_train, y_train), -model.score(x_test, y_test))) # fit a Support Vector Regressor s = SVR() s.fit(x_train, y_train) print("=========== Predicted by Support Vector Regressor ============") print("Train RMSE = {0:>1.4f}\tTest RMSE = {1:>1.4f}".format( np.sqrt(mean_squared_error(y_train, s.predict(x_train))), np.sqrt(mean_squared_error(y_test, s.predict(x_test)))))