def test_predict_and_score(eng): X = randn(10, 2) y = fromarray(randn(10, 4).T, engine=eng) model = LinearRegression().fit(X, y) yhat = model.predict(X).toarray() rsq = model.score(X, y).toarray() truth = hstack([yhat, rsq[:, newaxis]]) result = model.predict_and_score(X, y).toarray() assert allclose(truth, result)
def __test_cross_validation_methods(): # A small implementation of a test case from regression import LinearRegression import matplotlib.pyplot as plt # Initial values n = 100 N_bs = 1000 k_splits = 4 test_percent = 0.2 noise = 0.3 np.random.seed(1234) # Sets up random matrices x = np.random.rand(n, 1) def func_excact(_x): return 2*_x*_x + np.exp(-2*_x) + noise * \ np.random.randn(_x.shape[0], _x.shape[1]) y = func_excact(x) def design_matrix(_x): return np.c_[np.ones(_x.shape), _x, _x * _x] # Sets up design matrix X = design_matrix(x) # Performs regression reg = LinearRegression() reg.fit(X, y) y = y.ravel() y_predict = reg.predict(X).ravel() print("Regular linear regression") print("R2: {:-20.16f}".format(reg.score(y, y_predict))) print("MSE: {:-20.16f}".format(metrics.mse(y, y_predict))) # print (metrics.bias(y, y_predict)) print("Bias^2:{:-20.16f}".format(metrics.bias2(y, y_predict))) # Small plotter import matplotlib.pyplot as plt plt.plot(x, y, "o", label="data") plt.plot(x, y_predict, "o", label=r"Pred, $R^2={:.4f}$".format(reg.score(y, y_predict))) print("k-fold Cross Validation") kfcv = kFoldCrossValidation(x, y, LinearRegression, design_matrix) kfcv.cross_validate(k_splits=k_fold_size, test_percent=test_percent) print("R2: {:-20.16f}".format(kfcv.R2)) print("MSE: {:-20.16f}".format(kfcv.MSE)) print("Bias^2:{:-20.16f}".format(kfcv.bias)) print("Var(y):{:-20.16f}".format(kfcv.var)) print("MSE = Bias^2 + Var(y) = ") print("{} = {} + {} = {}".format(kfcv.MSE, kfcv.bias, kfcv.var, kfcv.bias + kfcv.var)) print("Diff: {}".format(abs(kfcv.bias + kfcv.var - kfcv.MSE))) plt.errorbar(kfcv.x_pred_test, kfcv.y_pred, yerr=np.sqrt(kfcv.y_pred_var), fmt="o", label=r"k-fold CV, $R^2={:.4f}$".format(kfcv.R2)) print("kk Cross Validation") kkcv = kkFoldCrossValidation(x, y, LinearRegression, design_matrix) kkcv.cross_validate(k_splits=k_fold_size, test_percent=test_percent) print("R2: {:-20.16f}".format(kkcv.R2)) print("MSE: {:-20.16f}".format(kkcv.MSE)) print("Bias^2:{:-20.16f}".format(kkcv.bias)) print("Var(y):{:-20.16f}".format(kkcv.var)) print("MSE = Bias^2 + Var(y) = ") print("{} = {} + {} = {}".format(kkcv.MSE, kkcv.bias, kkcv.var, kkcv.bias + kkcv.var)) print("Diff: {}".format(abs(kkcv.bias + kkcv.var - kkcv.MSE))) plt.errorbar(kkcv.x_pred_test.ravel(), kkcv.y_pred.ravel(), yerr=np.sqrt(kkcv.y_pred_var.ravel()), fmt="o", label=r"kk-fold CV, $R^2={:.4f}$".format(kkcv.R2)) print("Monte Carlo Cross Validation") mccv = MCCrossValidation(x, y, LinearRegression, design_matrix) mccv.cross_validate(N_bs, k_splits=k_fold_size, test_percent=test_percent) print("R2: {:-20.16f}".format(mccv.R2)) print("MSE: {:-20.16f}".format(mccv.MSE)) print("Bias^2:{:-20.16f}".format(mccv.bias)) print("Var(y):{:-20.16f}".format(mccv.var)) print("MSE = Bias^2 + Var(y) = ") print("{} = {} + {} = {}".format(mccv.MSE, mccv.bias, mccv.var, mccv.bias + mccv.var)) print("Diff: {}".format(abs(mccv.bias + mccv.var - mccv.MSE))) print("\nCross Validation methods tested.") plt.errorbar(mccv.x_pred_test, mccv.y_pred, yerr=np.sqrt(mccv.y_pred_var), fmt="o", label=r"MC CV, $R^2={:.4f}$".format(mccv.R2)) plt.xlabel(r"$x$") plt.ylabel(r"$y$") plt.title(r"$y=2x^2$") plt.legend() plt.show()
def __test_bootstrap_fit(): # A small implementation of a test case from regression import LinearRegression N_bs = 1000 # Initial values n = 200 noise = 0.2 np.random.seed(1234) test_percent = 0.35 # Sets up random matrices x = np.random.rand(n, 1) def func_excact(_x): return 2*_x*_x + np.exp(-2*_x) + noise * \ np.random.randn(_x.shape[0], _x.shape[1]) y = func_excact(x) def design_matrix(_x): return np.c_[np.ones(_x.shape), _x, _x*_x] # Sets up design matrix X = design_matrix(x) # Performs regression reg = LinearRegression() reg.fit(X, y) y = y.ravel() y_predict = reg.predict(X).ravel() print("Regular linear regression") print("R2: {:-20.16f}".format(reg.score(y_predict, y))) print("MSE: {:-20.16f}".format(metrics.mse(y, y_predict))) print("Beta: ", reg.coef_.ravel()) print("var(Beta): ", reg.coef_var.ravel()) print("") # Performs a bootstrap print("Bootstrapping") bs_reg = BootstrapRegression(x, y, LinearRegression, design_matrix) bs_reg.bootstrap(N_bs, test_percent=test_percent) print("R2: {:-20.16f}".format(bs_reg.R2)) print("MSE: {:-20.16f}".format(bs_reg.MSE)) print("Bias^2:{:-20.16f}".format(bs_reg.bias)) print("Var(y):{:-20.16f}".format(bs_reg.var)) print("Beta: ", bs_reg.coef_.ravel()) print("var(Beta): ", bs_reg.coef_var.ravel()) print("MSE = Bias^2 + Var(y) = ") print("{} = {} + {} = {}".format(bs_reg.MSE, bs_reg.bias, bs_reg.var, bs_reg.bias + bs_reg.var)) print("Diff: {}".format(abs(bs_reg.bias + bs_reg.var - bs_reg.MSE))) import matplotlib.pyplot as plt plt.plot(x.ravel(), y, "o", label="Data") plt.plot(x.ravel(), y_predict, "o", label=r"Pred, R^2={:.4f}".format(reg.score(y_predict, y))) print (bs_reg.y_pred.shape, bs_reg.y_pred_var.shape) plt.errorbar(bs_reg.x_pred_test, bs_reg.y_pred, yerr=np.sqrt(bs_reg.y_pred_var), fmt="o", label=r"Bootstrap Prediction, $R^2={:.4f}$".format(bs_reg.R2)) plt.xlabel(r"$x$") plt.ylabel(r"$y$") plt.title(r"$2x^2 + \sigma^2$") plt.legend() plt.show()