def test_ridge(): """ Checking that the implemented Ordinary Least Squared method gives the same results as the scikit version for a few different functions, within a tolerance.""" N = 100 deg = 3 lamb = 1.0 x = np.random.uniform(0, 1, (N, 1)) y = np.random.uniform(0, 1, (N, 1)) z1 = 2 * x**2 + 2 * x * y z2 = 3 * y**4 z3 = FrankeFunction(x, y) for z in [z1, z2, z3]: a = LinReg(x, y, z, deg) a.lamb = lamb beta = a.ridge() poly = PolynomialFeatures(degree=deg) XY_sk = poly.fit_transform(np.append(x, y, axis=1)) ridge = RidgeCV([lamb], fit_intercept=False) ridge.fit(XY_sk, z) beta_sk = ridge.coef_.reshape(-1, 1) assert np.allclose(beta, beta_sk)
def boot_stats(): """ Bootstrapping and creating bias variace statistics """ lambdas = np.logspace(-4, 5, 10) model = LinReg(X, Y) models = [] print("Bootstrapping models:") for regmethod in ['ols', 'ridge', 'lasso']: method = getattr(model, regmethod) for lamb in lambdas: model.lamb = lamb bias, variance, mse_train, mse_test, r2_train, r2_test = model.bootstrap( 100, method) models.append([regmethod, lamb, mse_train, mse_test,\ r2_train, r2_test, bias, variance]) if regmethod == 'ols': break print("\nMODEL ANALYSIS (BOOTSTRAP):") print("=" * 85) print( " Method | lambda | MSE Train | MSE Test | R2 Train | R2 Test | Bias | Variance" ) print("-" * 85) for i in range(len(models)): print("%8s|%8g|%11g|%10g|%10f|%10f|%10f|%10g|" % tuple(models[i])) print("-" * 85)
def test_design_matrix(): """ Checks that the design matrix made by the self-written code matches the one made by scikit learn, within a tolerance""" N = 100 degrees = [2, 10, 20] x = np.random.uniform(0, 1, (N, 1)) y = np.random.uniform(0, 1, (N, 1)) z = x + y for deg in degrees: a = LinReg(x, y, z, deg) XY = a.XY poly = PolynomialFeatures(degree=deg) XY_sk = poly.fit_transform(np.append(x, y, axis=1)) assert np.allclose(XY, XY_sk)
def plot_stuff(): """ PLoting the matrix of the coupling constants """ model = LinReg(X, Y) fig, axarr = plt.subplots(nrows=2, ncols=3) cmap_args = dict(vmin=-1., vmax=1., cmap='seismic') lambdas = [0.0001, 0.01] for i in range(len(lambdas)): model.lamb = lambdas[i] J_ols = model.ols().reshape(L, L) J_ridge = model.ridge().reshape(L, L) J_lasso = model.lasso().reshape(L, L) axarr[i][0].imshow(J_ols, **cmap_args) axarr[i][0].set_title('$\\mathrm{OLS}$', fontsize=16) axarr[i][0].tick_params(labelsize=16) axarr[i][1].imshow(J_ridge, **cmap_args) axarr[i][1].set_title('$\\mathrm{Ridge},\ \\lambda=%.4f$' % (lambdas[i]), fontsize=16) axarr[i][1].tick_params(labelsize=16) im = axarr[i][2].imshow(J_lasso, **cmap_args) axarr[i][2].set_title('$\\mathrm{LASSO},\ \\lambda=%.4f$' % (lambdas[i]), fontsize=16) axarr[i][2].tick_params(labelsize=16) divider = make_axes_locatable(axarr[i][2]) cax = divider.append_axes("right", size="5%", pad=0.05) cbar = fig.colorbar(im, cax=cax) cbar.ax.set_yticklabels(np.arange(-1.0, 1.0 + 0.25, 0.25), fontsize=14) cbar.set_label('$J_{i,j}$', labelpad=-40, y=1.12, fontsize=16, rotation=0) #fig.subplots_adjust(right=2.0) plt.show()
def stats(): """ Testing all linear regression models for different regularization strengths and calcualting MSE and R2 scores """ lambdas = np.logspace(-4, 5, 10) model = LinReg(X, Y) models = [] for regmethod in ['ols', 'ridge', 'lasso']: method = getattr(model, regmethod) for lamb in lambdas: model.lamb = lamb J = method(model.xTrain, model.yTrain) Ypred_train = model.xTrain @ J Ypred_test = model.xTest @ J mse_train = model.MSE(model.yTrain, Ypred_train) mse_test = model.MSE(model.yTest, Ypred_test) r2_train = model.R2(model.yTrain, Ypred_train) r2_test = model.R2(model.yTest, Ypred_test) models.append([regmethod, lamb, mse_train, mse_test,\ r2_train, r2_test]) if regmethod == 'ols': break print("\nMODEL ANALYSIS:") print("=" * 85) print(" Method | lambda | MSE Train | MSE Test | R2 Train | R2 Test |") print("-" * 85) for i in range(len(models)): print("%8s|%8g|%11g|%10g|%10f|%10f|" % tuple(models[i])) print("-" * 85)
import matplotlib.pylab as plt from matplotlib.ticker import MaxNLocator import plotparams N = 1500 x = np.random.uniform(0, 1, (N, 1)) y = np.random.uniform(0, 1, (N, 1)) for deg in [2, 5]: for sigma, noisy in zip([0, 0.5], ['', '_noisy']): noise = sigma * np.random.randn(N, 1) #z = 2*x**3 + y**2 + noise z = FrankeFunction(x, y) + noise a = LinReg(x, y, z, deg) a.split_data(frac=0.1) beta_ols = a.ols(a.XY_Train, a.z_Train) zpredict = a.XY_Test @ beta_ols mse = a.MSE(a.z_Test, zpredict) var = np.diag(a.var_ols) conf = a.conf_ols #print(beta_ols) #print(var) #print(conf) print("MSE: ", mse) ax = plt.figure().gca() ax.errorbar(range(len(var)), beta_ols,