def test_bootstrap_resample(): """Tests the resample method of the Bootstrap class Tests comprise of simple resampling cases for which we can evalue the exact answer by hand. """ # All data is the same, variance should be zero. OLS = LeastSquares() DM = DesignMatrix('polynomial', 3) bootstrap = Bootstrap(OLS, DM) x = np.ones(10) * 2.25 y = x**3 #bootstrap.resample(x, y, 10) # This fails with an raise LinAlgError("Singular matrix") error on # TravisCI, but passes locally. Removing for now. #assert bootstrap.betaVariance == pytest.approx(np.zeros(4), abs=1e-15) # Ensure that larger noise in the data set gives larger computed # variance in the beta values from resampling. functions = { 0: lambda x: np.sin(x), 1: lambda x: np.cos(x), 2: lambda x: np.sin(2 * x), 3: lambda x: np.cos(2 * x), 4: lambda x: np.sin(3 * x), 5: lambda x: np.cos(3 * x), 6: lambda x: np.sin(4 * x), 7: lambda x: np.cos(4 * x), 8: lambda x: np.sin(5 * x), 9: lambda x: np.cos(5 * x), 10: lambda x: np.sin(x)**2, 11: lambda x: np.cos(x)**2, 12: lambda x: np.sin(2 * x)**2, 13: lambda x: np.cos(2 * x)**2, 14: lambda x: np.sin(3 * x)**2, 15: lambda x: np.cos(3 * x)**2, } DM = DesignMatrix(lambda j, x: functions[j](x), 9) OLS = LeastSquares() bootstrap = Bootstrap(OLS, DM) N = 100 x = np.linspace(0, 2 * np.pi, N) meanBetaVariance = np.zeros(6) ind = 0 for noiseScale in [0.0, 0.1, 1.0]: y = np.sin(1.5 * x) - 0.5 * np.cos(2 * x)**2 + np.random.normal( 0, noiseScale, N) bootstrap.resample(x, y, 100) meanBetaVariance[ind] = np.mean(bootstrap.betaVariance) if ind > 0: assert meanBetaVariance[ind - 1] < meanBetaVariance[ind] ind += 1
def test_LeastSquares_predict() : """Tests the predict method of the Least Squares class The test is done with a known beta array, comparing results to a known MSE value. """ N = 5 P = 3 x = np.linspace(0,1,N) random.seed(10) y = 3*x**2 - 9*x - 2.4*x**5 - 3.1 X = np.zeros(shape=(N,P)) X[:,0] = 1.0 for j in range(1,P) : X[:,j] = x**j OLS = LeastSquares(backend='skl') beta_skl = OLS.fit(X,y) predict_skl = OLS.predict() OLS = LeastSquares(backend='manual') beta_manual = OLS.fit(X,y) # Ensure the exact same beta value are used by both backend versions. OLS.beta = beta_skl predict_manual = OLS.predict() assert (predict_manual == pytest.approx(predict_skl, abs=1e-15))
def test_LeastSquares_R2() : """Tests the R2 score method of the Least Squares class The test is done with a known beta array, comparing results to a known MSE value. """ N = 5 P = 3 x = np.linspace(0,1,N) random.seed(10) y = 3*x**2 - 9*x - 2.4*x**5 + 3.1 X = np.zeros(shape=(N,P)) X[:,0] = 1.0 for j in range(1,P) : X[:,j] = x**j OLS = LeastSquares(backend='skl') beta_skl = OLS.fit(X,y) R2_skl = OLS.R2() OLS = LeastSquares(backend='manual') beta_manual = OLS.fit(X,y) # Ensure the manual and the skl fit both use the exact same beta # values. OLS.beta = beta_skl R2_manual = OLS.R2() yHat = np.dot(X, beta_skl) R2_true = 1.0 - np.sum((y - yHat)**2) / np.sum((y - np.mean(y))**2) # beta: # 2.98147321428571299151 # -6.48616071428570872826 # -1.66071428571428914012 # # yHat = beta0 + beta1 x + beta2 x^2 # 2.98147321428571299151 # 1.25613839285714279370 # -0.67678571428571365765 # -2.81729910714285569640 # -5.16540178571428487686 # # y = 3x^2 - 9x -2.4x^5 + 3.1 # 3.10000000000000008882 # 1.03515625000000000000 # -0.72500000000000008882 # -2.53203124999999973355 # -5.30000000000000071054 # # R2 = 1.0 - sum(yHat - y)**2 / sum(yHat - mean(y))**2 # 0.99605957942938250227 assert R2_skl == pytest.approx(R2_manual, abs=1e-15) assert R2_skl == pytest.approx(R2_true, abs=1e-15) assert R2_manual == pytest.approx(R2_true, abs=1e-15)
def test_LeastSquares_fit() : """Tests the fit method of the Least Squares class The tests comprise fitting of models to known data. """ # Ensure fitting polynomials of order 1 through 5 to y(x) = x results # in the beta corresponding to the x term equal to 1.0 and all other # beta values zero. # # Secondly we test on y(x)=x + 2 to also make sure the intercept is # calculated correctly. for intercept in [0, 2] : for method in ['manual', 'skl'] : N = 10 x = np.linspace(0,1,N) y = x + intercept for i in range(2,5+1) : P = i X = np.zeros(shape=(N,P)) X[:,0] = 1.0 for j in range(1,P) : X[:,j] = x**j OLS = LeastSquares(backend=method) beta = OLS.fit(X,y) assert beta[0] == pytest.approx(intercept, abs=1e-10) assert beta[1] == pytest.approx(1.0, abs=1e-10) for j in range(2,P) : assert beta[j] == pytest.approx(0.0, abs=1e-10) # Ensure the backend='manual' and the backend='skl' versions of # LeastSquares.fit(X,y) give the same result. N = 5 P = 5 x = np.linspace(0,1,N) y = x + x**2 - (1.0 - x)**5 X = np.zeros(shape=(N,P)) X[:,0] = 1.0 for j in range(1,P) : X[:,j] = x**j OLS = LeastSquares(backend='manual') beta_manual = OLS.fit(X,y) OLS = LeastSquares(backend='skl') beta_skl = OLS.fit(X,y) assert beta_manual == pytest.approx(beta_skl, abs=1e-10)
def test_LeastSquares_meanSquaredError() : """Tests the meanSquaredError method of the Least Squares class The test is done with a known beta array, comparing results to a known MSE value. """ N = 5 P = 3 x = np.linspace(0,1,N) random.seed(10) y = 3*x**2 - 9*x - 2.4*x**5 + 3.1 X = np.zeros(shape=(N,P)) X[:,0] = 1.0 for j in range(1,P) : X[:,j] = x**j OLS = LeastSquares(backend='skl') beta_skl = OLS.fit(X,y) MSE_skl = OLS.meanSquaredError() OLS = LeastSquares(backend='manual') beta_manual = OLS.fit(X,y) # Ensure the manual and the skl fit both use the exact same beta # values. OLS.beta = beta_skl MSE_manual = OLS.meanSquaredError() # beta: # 2.98147321428571299151 # -6.48616071428570872826 # -1.66071428571428914012 # # yHat = beta0 + beta1 x + beta2 x^2 # 2.98147321428571299151 # 1.25613839285714279370 # -0.67678571428571365765 # -2.81729910714285569640 # -5.16540178571428487686 # # MSE = 1/5 * sum(yHat - y)**2 # 0.03294015066964287725 MSE_true = 0.03294015066964287725 assert MSE_skl == pytest.approx(MSE_manual, abs=1e-15) assert MSE_skl == pytest.approx(MSE_true, abs=1e-15) assert MSE_manual == pytest.approx(MSE_true, abs=1e-15)
def test_LeastSquares_fit_lasso() : """Tests the fit method of the Least Squares class with method='lasso' """ N = 500 P = 5 x = np.linspace(0,1,N) y = 2.0 + x + x**2 X = np.zeros(shape=(N,P)) X[:,0] = 1.0 for j in range(1,P) : X[:,j] = x**j lasso = LeastSquares(method='lasso', backend='skl') lasso.setLambda(0.01) beta_lasso = lasso.fit(X,y) # Make sure lasso regression zeroes out x*3 and x**4 beta terms. assert beta_lasso[-2:] == pytest.approx(np.zeros(2), abs=1e-15)
def plot_MSE_R2() : leastSquares = LeastSquares(backend='manual') N = int(1e4) x = np.random.rand(N) y = np.random.rand(N) x_data = np.zeros(shape=(N,2)) x_data[:,0] = x x_data[:,1] = y y_data = np.zeros(shape=(N)) @jit(nopython=True, cache=True) def computeFrankeValues(x_data, y) : N = x_data.shape[0] for i in range(N) : y[i] = franke(x_data[i,0], x_data[i,1]) computeFrankeValues(x_data, y_data) p_max = 10 p = [i for i in range(2, p_max+1)] R2 = [None for i in range(2, p_max+1)] MSE = [None for i in range(2, p_max+1)] for degree in p : designMatrix = DesignMatrix('polynomial2D', degree) X = designMatrix.getMatrix(x_data) leastSquares.fit(X, y_data) _ = leastSquares.predict() R2[degree-2] = leastSquares.R2() MSE[degree-2] = leastSquares.MSE() print(R2[degree-2]) print(MSE[degree-2]) p = np.array(p) plt.semilogy(p, 1-np.array(R2),'b-o', markersize=3) plt.rc('text', usetex=True) plt.xlabel(r"$p$", fontsize=10) plt.ylabel(r"$1-(R^2$ score$)$", fontsize=10) plt.subplots_adjust(left=0.2,bottom=0.2) #plt.savefig(os.path.join(os.path.dirname(__file__), 'figures', 'OLS_R2.png'), transparent=True, bbox_inches='tight') plt.show() plt.figure() ax = plt.gca() plt.semilogy(p, MSE, 'r-o', markersize=3) plt.rc('text', usetex=True) plt.xlabel(r"$p$", fontsize=10) plt.ylabel(r"MSE", fontsize=10) plt.subplots_adjust(left=0.2,bottom=0.2) #plt.savefig(os.path.join(os.path.dirname(__file__), 'figures', 'OLS_MSE.png'), transparent=True, bbox_inches='tight') plt.show()
def visualize_beta(): L = 10 nn = pickle.load(open('nn5_final.p', 'rb')) #for w in nn.weights[:1] : # with np.printoptions(precision=2, suppress=True) : # print(w.reshape((-1,L))) #plt.imshow(nn.weights[0].reshape((L,-1))) #plt.show() N = 5000 training_fraction = 0.4 ising = Ising(L, N) #X, y = ising.generateTrainingData1D() D, ry = ising.generateDesignMatrix1D() #y /= L ols = LeastSquares(method='ols', backend='manual') ols.setLambda(0.1) ols.fit(D, ry) print(ising.states.shape) #W = nn.weights[0].reshape((-1,L))*nn.weights[1] W = ols.beta.reshape((-1, L)) J = ising.J for i in range(10): row = ising.states[i, :] des = D[i, :] E = ry[i] print(row.shape) row = np.expand_dims(row, 1) print("s W s:", row.T @ W @ row) print("s (W+W')/2 s:", row.T @ (W + W.T) / 2 @ row) print("s J s:", row.T @ J @ row) #print("D w: ", W.T @ des * nn.weights[1]) #print("pred: ", nn.predict(np.expand_dims(des.T,1))) print("E: ", E) print("") for i in range(N): row = ising.states[i, :] des = D[i, :] E = ry[i] atol = 1e-14 rtol = 1e-14 #assert np.allclose(row.T @ W @ row, row.T @ (W+W.T)/2 @ row, atol=atol, rtol=rtol) #assert np.allclose(row.T @ (W+W.T)/2 @ row, row.T @ J @ row, atol=atol, rtol=rtol) with np.printoptions(precision=2, suppress=True): for a in np.linalg.eig(W): print(a) with np.printoptions(precision=2, suppress=True): print("det=", np.linalg.det(W)) print("cond=", np.linalg.cond(W)) print("") print("J:\n:", J) print("W+W'/2\n", (W + W.T) / 2) print("J+J'/2\n", (J + J.T) / 2) print("Tr D:", np.sum(np.diag((W + W.T) / 2)))
from leastSquares import LeastSquares from designMatrix import DesignMatrix from franke import franke np.random.seed(2018) @jit(nopython=True, cache=True) def computeFrankeValues(x_data, y, noise_strength=0.1): N = x_data.shape[0] for i in range(N): y[i] = franke(x_data[i, 0], x_data[i, 1]) + np.random.normal( 0, noise_strength) leastSquares = LeastSquares(method="ridge", backend='manual') Lambda = 1 leastSquares.setLambda(Lambda) #crossvalidation = CrossValidation(leastSquares, designMatrix) N = int(1e4) x1 = np.random.rand(N) x2 = np.random.rand(N) X = np.zeros(shape=(N, 2)) X[:, 0] = x1 X[:, 1] = x2 #Vector to hold y = Franke(x1,x2) y = np.zeros(shape=(N)) noise_strength = 0.3
def part_a(plotting=False) : MSE_degree = [] R2_degree = [] betaVariance_degree = [] for degree in [2,3,4,5]: #,6,7,8,9] : designMatrix = DesignMatrix('polynomial2D', degree) leastSquares = LeastSquares(backend='manual') bootstrap = Bootstrap(leastSquares, designMatrix) N = int(1e4) x = np.random.rand(N) y = np.random.rand(N) x_data = np.zeros(shape=(N,2)) x_data[:,0] = x x_data[:,1] = y y_data = np.zeros(shape=(N)) @jit(nopython=True, cache=True) def computeFrankeValues(x_data, y) : N = x_data.shape[0] for i in range(N) : y[i] = franke(x_data[i,0], x_data[i,1]) computeFrankeValues(x_data, y_data) bootstrap.resample(x_data, y_data, 1000) MSE_degree. append(leastSquares.MSE()) R2_degree. append(leastSquares.R2()) betaVariance_degree.append(bootstrap.betaVariance) if plotting : print("MSE: ", MSE_degree[-1]) print("R2: ", R2_degree[-1]) print("Beta Variance: ") for b in betaVariance_degree[-1] : print(b) print("Beta: ") for b in leastSquares.beta : print(b) print(" ") M = 100 fig = plt.figure() ax = fig.gca(projection='3d') x = np.linspace(0, 1, M) y = np.linspace(0, 1, M) X, Y = np.meshgrid(x,y) x_data = np.vstack([X.ravel(), Y.ravel()]).T # When plotting the Franke function itself, we use these lines. yy_data = np.zeros(shape=(x_data.data.shape[0])) computeFrankeValues(x_data, yy_data) # When plotting the linear regression model: XX = designMatrix.getMatrix(x_data) leastSquares.X = XX y_data = leastSquares.predict() Z = np.reshape(y_data.T, X.shape) ZF = np.reshape(yy_data.T, X.shape) #ax.plot_surface(X,Y,Z,cmap=cm.coolwarm,linewidth=0, antialiased=False) ax.plot_surface(X,Y,abs(Z-ZF),cmap=cm.coolwarm,linewidth=0, antialiased=False) ax.set_zlim(-0.10, 1.40) ax.zaxis.set_major_locator(LinearLocator(5)) ax.zaxis.set_major_formatter(FormatStrFormatter('%.02f')) ax.view_init(30, 45) #plt.savefig(os.path.join(os.path.dirname(__file__), 'figures', 'franke.png'), transparent=True, bbox_inches='tight') #plt.savefig(os.path.join(os.path.dirname(__file__), 'figures', 'OLS'+str(degree)+'.png'), transparent=True, bbox_inches='tight') #plt.savefig(os.path.join(os.path.dirname(__file__), 'figures', 'OLS'+str(degree)+'_diff.png'), transparent=True, bbox_inches='tight') plt.show() print("\nMSE :") print(MSE_degree) print("\nR2 :") print(R2_degree) print("\nσ²(β) :") print(betaVariance_degree) return MSE_degree, R2_degree, betaVariance_degree
def fit_franke_noise() : R2 = [] MSE = [] R2_noise = [] MSE_noise = [] beta_noise = [] betaVariance_noise = [] noise = np.logspace(-4,0,50) k = 1 @jit(nopython=True, cache=True) def computeFrankeValues(x_data, y) : N = x_data.shape[0] for i in range(N) : y[i] = franke(x_data[i,0], x_data[i,1]) for eta in noise : designMatrix = DesignMatrix('polynomial2D', 10) leastSquares = LeastSquares(backend='manual') bootstrap = Bootstrap(leastSquares, designMatrix) N = int(1e5) x = np.random.rand(N) y = np.random.rand(N) x_data = np.zeros(shape=(N,2)) x_data[:,0] = x x_data[:,1] = y y_data = np.zeros(shape=(N)) computeFrankeValues(x_data, y_data) y_data_noise = y_data + eta * np.random.standard_normal(size=N) bootstrap.resample(x_data, y_data_noise, k) MSE_noise. append(leastSquares.MSE()) R2_noise. append(leastSquares.R2()) beta_noise. append(bootstrap.beta) betaVariance_noise.append(bootstrap.betaVariance) leastSquares.y = y_data MSE.append(leastSquares.MSE()) R2. append(leastSquares.R2()) """ betaVariance_noise = np.array(betaVariance_noise) for beta in betaVariance_noise : print(beta) colors = ['b', 'g', 'r', 'c', 'm', 'y', 'k','#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf'] for i in range(6) : plt.loglog(noise, betaVariance_noise[:,i], colors[i]+'-o', markersize=2) plt.rc('text', usetex=True) #plt.rc('font',**{'family':'sans-serif','sans-serif':['Helvetica']}) ## for Palatino and other serif fonts use: #plt.rc('font',**{'family':'serif','serif':['Palatino']}) plt.xlabel(r"$p$", fontsize=16) plt.xlabel(r"noise scale $\eta$", fontsize=10) plt.ylabel(r"$ \sigma^2(\beta_j)$", fontsize=10) plt.legend([r"intercept", r"$\beta_{x}$", r"$\beta_{y}$", r"$\beta_{x^2}$", r"$\beta_{xy}$", r"$\beta_{y^2}$"], fontsize=10) plt.subplots_adjust(left=0.2,bottom=0.2) #plt.savefig(os.path.join(os.path.dirname(__file__), 'figures', 'beta_variance_OLS_noise.png'), transparent=True, bbox_inches='tight') #plt.show() """ print(R2_noise) print(1-np.array(R2_noise)) fig, ax1 = plt.subplots() ax1.loglog(noise, 1-np.array(R2_noise),'k-o',markersize=2) ax1.loglog(noise, 1-np.array(R2),'k--',markersize=2) plt.xlabel(r"noise scale $\eta$", fontsize=10) plt.ylabel(r"$1-R^2$", color='k', fontsize=10) ax2 = ax1.twinx() ax2.loglog(noise, np.array(MSE_noise), 'b-o',markersize=2) ax2.loglog(noise, np.array(MSE), 'b--',markersize=2) plt.ylabel(r"MSE", color='b', fontsize=10) plt.subplots_adjust(left=0.2,bottom=0.2,right=0.9) ax1.set_ylim([0.95*min(min(MSE_noise), min(R2_noise)), 1.05*(max(max(MSE_noise), max(R2_noise)))]) ax2.set_ylim([0.95*min(min(MSE_noise), min(R2_noise)), 1.05*(max(max(MSE_noise), max(R2_noise)))]) ax2.get_yaxis().set_ticks([]) plt.savefig(os.path.join(os.path.dirname(__file__), 'figures', 'R2MSE_OLS_noise.png'), transparent=True, bbox_inches='tight') plt.show()
def part_e_2(): x_train, y_train, x_test, y_test = real_data(file_number=2, plotting=False) degree = 10 designMatrix = DesignMatrix('polynomial2D', degree) leastSquares = LeastSquares(backend='manual', method='ols') X = designMatrix.getMatrix(x_train) leastSquares.fit(X, y_train) X_test = designMatrix.getMatrix(x_test) leastSquares.predict() leastSquares.y = y_test ols_MSE = leastSquares.MSE() print(ols_MSE) ols_MSE = np.array([ols_MSE, ols_MSE]) ols_lambda = np.array([1e-5, 1]) ridge_lambda = np.logspace(-5, 0, 20) ridge_MSE = [] for lambda_ in ridge_lambda: print("ridge " + str(lambda_)) designMatrix = DesignMatrix('polynomial2D', degree) leastSquares = LeastSquares(backend='manual', method='ridge') leastSquares.setLambda(lambda_) X = designMatrix.getMatrix(x_train) leastSquares.fit(X, y_train) X_test = designMatrix.getMatrix(x_test) leastSquares.predict() leastSquares.y = y_test ridge_MSE.append(leastSquares.MSE()) print(leastSquares.MSE()) ridge_MSE = np.array(ridge_MSE) lasso_lambda = np.logspace(-4, 0, 20) lasso_MSE = [] for lambda_ in lasso_lambda: print("lasso " + str(lambda_)) designMatrix = DesignMatrix('polynomial2D', degree) leastSquares = LeastSquares(backend='manual', method='lasso') leastSquares.setLambda(lambda_) X = designMatrix.getMatrix(x_train) leastSquares.fit(X, y_train) X_test = designMatrix.getMatrix(x_test) leastSquares.predict() leastSquares.y = y_test lasso_MSE.append(leastSquares.MSE()) lasso_MSE = np.array(ridge_MSE) ######################################################## plt.rc('text', usetex=True) plt.loglog(ols_lambda, ols_MSE, 'k--o', markersize=1, linewidth=1, label=r'OLS') plt.loglog(ridge_lambda, ridge_MSE, 'r-o', markersize=1, linewidth=1, label=r'Ridge') plt.loglog(lasso_lambda, lasso_MSE, 'b-o', markersize=1, linewidth=1, label=r'Lasso') plt.xlabel(r"$\lambda$", fontsize=10) plt.ylabel(r"MSE", fontsize=10) #plt.subplots_adjust(left=0.2,bottom=0.2) plt.legend(fontsize=10) plt.savefig(os.path.join(os.path.dirname(__file__), 'figures', 'lambda_terrain.png'), transparent=True, bbox_inches='tight') plt.show()
def part_e(plotting=False): x_train, y_train, x_test, y_test = real_data(file_number=2, plotting=False) for method in ['ols', 'ridge', 'lasso']: designMatrix = DesignMatrix('polynomial2D', 10) leastSquares = LeastSquares(backend='manual', method=method) leastSquares.setLambda(1e-3) if method == 'lasso': leastSquares.setLambda(1e-4) X = designMatrix.getMatrix(x_train) leastSquares.fit(X, y_train) X_test = designMatrix.getMatrix(x_test) leastSquares.predict() leastSquares.y = y_test print(leastSquares.MSE()) if plotting: x = np.linspace(0, 1, 60) y = np.copy(x) XX, YY = np.meshgrid(x, y) ZZ = np.reshape(leastSquares.yHat, XX.shape) fig = plt.figure() ax = fig.gca(projection='3d') ax.plot_surface(XX, YY, ZZ, cmap=cm.coolwarm, linewidth=0, antialiased=False) ax.set_zlim(-0.10, 1.40) ax.zaxis.set_major_locator(LinearLocator(5)) ax.zaxis.set_major_formatter(FormatStrFormatter('%.02f')) ax.view_init(30, 45 + 90) #plt.savefig(os.path.join(os.path.dirname(__file__), 'figures', method+'terrain.png'), transparent=True, bbox_inches='tight') plt.show() if plotting: x = np.linspace(0, 1, 60) y = np.copy(x) XX, YY = np.meshgrid(x, y) ZZ = np.reshape(y_test, XX.shape) fig = plt.figure() ax = fig.gca(projection='3d') ax.plot_surface(XX, YY, ZZ, cmap=cm.coolwarm, linewidth=0, antialiased=False) ax.set_zlim(-0.10, 1.40) ax.zaxis.set_major_locator(LinearLocator(5)) ax.zaxis.set_major_formatter(FormatStrFormatter('%.02f')) ax.view_init(30, 45 + 90) #plt.savefig(os.path.join(os.path.dirname(__file__), 'figures', 'test_terrain.png'), transparent=True, bbox_inches='tight') plt.show()
def part_b(): R2 = [] MSE = [] R2_noise = [] MSE_noise = [] beta_noise = [] betaVariance_noise = [] noise = np.linspace(0, 1.0, 50) k = 1 fig, ax1 = plt.subplots() plt.rc('text', usetex=True) @jit(nopython=True, cache=True) def computeFrankeValues(x_data, y): N = x_data.shape[0] for i in range(N): y[i] = franke(x_data[i, 0], x_data[i, 1]) ind = -1 for lambda_ in np.logspace(-2, 0, 3): ind += 1 MSE_noise = [] for eta in noise: designMatrix = DesignMatrix('polynomial2D', 10) if ind == 0: leastSquares = LeastSquares(backend='manual', method='ols') else: leastSquares = LeastSquares(backend='manual', method='ridge') leastSquares.setLambda(lambda_) bootstrap = Bootstrap(leastSquares, designMatrix) N = int(1000) x = np.random.rand(N) y = np.random.rand(N) x_data = np.zeros(shape=(N, 2)) x_data[:, 0] = x x_data[:, 1] = y y_data = np.zeros(shape=(N)) computeFrankeValues(x_data, y_data) y_data_noise = y_data + eta * np.random.standard_normal(size=N) bootstrap.resample(x_data, y_data_noise, k) MSE_noise.append(leastSquares.MSE()) R2_noise.append(leastSquares.R2()) beta_noise.append(bootstrap.beta) betaVariance_noise.append(bootstrap.betaVariance) # Different noise, test data N = int(1000) x = np.random.rand(N) y = np.random.rand(N) x_data = np.zeros(shape=(N, 2)) x_data[:, 0] = x x_data[:, 1] = y y_data = np.zeros(shape=(N)) computeFrankeValues(x_data, y_data) y_data_noise = y_data + eta * np.random.standard_normal(size=N) X = designMatrix.getMatrix(x_data) leastSquares.X = X leastSquares.predict() leastSquares.y = y_data_noise MSE.append(leastSquares.MSE()) R2.append(leastSquares.R2()) colors = ['b', 'g', 'r', 'c', 'm', 'y', 'k'] if ind == 0: ax1.loglog(noise, np.array(MSE_noise), colors[ind] + '--', markersize=1, label=r"OLS") else: ax1.loglog(noise, np.array(MSE_noise), colors[ind] + '-', markersize=1, label=r"$\lambda=10^{%d}$" % (int(np.log10(lambda_)))) plt.ylabel(r"MSE", fontsize=10) plt.xlabel(r"noise scale $\eta$", fontsize=10) plt.subplots_adjust(left=0.2, bottom=0.2) #ax1.set_ylim([0.95*min(min(MSE_noise), min(R2_noise)), 1.05*(max(max(MSE_noise), max(R2_noise)))]) ax1.legend() #plt.savefig(os.path.join(os.path.dirname(__file__), 'figures', 'MSE_ridge_noise.png'), transparent=True, bbox_inches='tight') plt.show()
def test_LeastSquares_fit_ridge() : """Tests the fit method of the Least Squares class with method='ridge' """ N = 5 P = 5 x = np.linspace(0,1,N) y = x + x**2 - (1.0 - x)**5 X = np.zeros(shape=(N,P)) X[:,0] = 1.0 for j in range(1,P) : X[:,j] = x**j OLS = LeastSquares(method='ols', backend='manual') beta_ols = OLS.fit(X,y) OLS = LeastSquares(method='ridge', backend='manual') OLS.setLambda(0.0) beta_lambda0 = OLS.fit(X,y) assert beta_lambda0 == pytest.approx(beta_ols, abs=1e-15) # Make sure the skl and the manual backends give the same result SKL = LeastSquares(method='ridge', backend='skl') SKL.setLambda(0.0) beta_skl = SKL.fit(X,y) assert beta_lambda0 == pytest.approx(beta_skl, abs=1e-10) SKL.setLambda = 0.5 OLS.setLambda = 0.5 beta_skl = SKL.fit(X,y) beta_lambda0 = OLS.fit(X,y) print(beta_lambda0) print(beta_skl) assert beta_lambda0 == pytest.approx(beta_skl, abs=1e-10)
] return averages if __name__ == '__main__': Degree = 1 + np.arange(20) train_MSE = [] test_MSE = [] print("#########################################################") for degree in Degree: degree = int(degree) designMatrix = DesignMatrix('polynomial2D', degree) leastSquares = LeastSquares(method="ridge", backend='manual') Lambda = 4 leastSquares.setLambda(Lambda) crossvalidation = CrossValidation(leastSquares, designMatrix) N = int(1e4) x = np.random.rand(N) y = np.random.rand(N) x_data = np.zeros(shape=(N, 2)) x_data[:, 0] = x x_data[:, 1] = y y_data = np.zeros(shape=(N)) X = designMatrix.getMatrix(x_data) XT_X = np.dot(X.T, X) print("det(X^T*X): %g" %
def R2_versus_lasso(): L = 3 N = 10000 training_fraction = 0.4 ising = Ising(L, N) D, ry = ising.generateDesignMatrix1D() X, y = ising.generateTrainingData1D() y /= L D_train = D[int(training_fraction * N):, :] ry_train = ry[int(training_fraction * N):] D_validation = D[:int(training_fraction * N), :] ry_validation = ry[:int(training_fraction * N)] lasso = LeastSquares(method='lasso', backend='skl') lasso.setLambda(1e-2) lasso.fit(D_train, ry_train) lasso.y = ry_validation lasso_R2 = sklearn.metrics.mean_squared_error( ry_validation / L, lasso.predict(D_validation) / L) n_samples, n_features = X.shape nn = NeuralNetwork(inputs=L * L, neurons=L, outputs=1, activations='identity', cost='mse', silent=False) nn.addLayer(neurons=1) nn.addOutputLayer(activations='identity') validation_skip = 100 epochs = 50000 nn.fit(D.T, ry, shuffle=True, batch_size=2000, validation_fraction=1 - training_fraction, learning_rate=0.0001, verbose=False, silent=False, epochs=epochs, validation_skip=validation_skip, optimizer='adam') plt.rc('text', usetex=True) validation_loss = nn.validation_loss_improving validation_ep = np.linspace(0, epochs, len(nn.validation_loss_improving)) plt.semilogy(validation_ep, validation_loss, 'r-', label=r'NN') plt.semilogy([0, epochs], np.array([lasso_R2, lasso_R2]), 'k--', label=r'Lasso') plt.xlabel(r'Epoch', fontsize=10) plt.ylabel(r'Mean squared error', fontsize=10) plt.legend(fontsize=10) plt.xlim((0, epochs)) ax = plt.gca() ymin, ymax = ax.get_ylim() if ymin > pow(10, -5): ymin = pow(10, -5) #plt.ylim((ymin,ymax)) plt.savefig(os.path.join(os.path.dirname(__file__), 'figures', 'NN_compare_lasso.png'), transparent=True, bbox_inches='tight')
if len(sys.argv) > 1: name = sys.argv[1] mode = sys.argv[2] if mode == "render_time": txt = Txt() txt.readTxt('txt/' + name + '.txt') renderTime = txt.getRenderTime() numberPolygons = txt.getNumberPolygons() plot = Plot() plot.setChartTile(name.capitalize()) plot.setVertTitle('Render Time (nanoseconds)') lsq = LeastSquares() #PLOT NORMAL plot.plotChartRT(numberPolygons,renderTime) #PLOT LINEAR lsq.linearLeastSquares(False,False,numberPolygons,renderTime) lsRenderTime = lsq.createLinearEquation("Vertex") lsRenderTimeFormula = lsq.getLinearEqFormula("Vertex") plot.plotLeastSquareChartRT(numberPolygons,lsRenderTime, renderTime,'Least Squares: Linear',lsRenderTimeFormula) linearRenderTimeError = lsq.calculateError(renderTime,lsRenderTime) #PLOT EXPONENTIAL lsq.linearLeastSquares(False,True,numberPolygons,renderTime) lsRenderTime = lsq.createExpEquation("Vertex") lsRenderTimeFormula = lsq.getExpEqFormula("Vertex")
def plot_beta_ridge(): beta = [] betaVariance = [] MSE = [] R2 = [] k = 10000 fig, ax1 = plt.subplots() plt.rc('text', usetex=True) @jit(nopython=True, cache=True) def computeFrankeValues(x_data, y): N = x_data.shape[0] for i in range(N): y[i] = franke(x_data[i, 0], x_data[i, 1]) ind = -1 lam = np.logspace(-3, 5, 20) for lambda_ in lam: if ind == 0: leastSquares = LeastSquares(backend='manual', method='ols') else: leastSquares = LeastSquares(backend='manual', method='ridge') designMatrix = DesignMatrix('polynomial2D', 3) bootstrap = Bootstrap(leastSquares, designMatrix) leastSquares.setLambda(lambda_) ind += 1 N = int(1e4) x = np.random.rand(N) y = np.random.rand(N) x_data = np.zeros(shape=(N, 2)) x_data[:, 0] = x x_data[:, 1] = y y_data = np.zeros(shape=(N)) computeFrankeValues(x_data, y_data) eta = 1.0 y_data_noise = y_data + eta * np.random.standard_normal(size=N) bootstrap.resample(x_data, y_data_noise, k) MSE.append(leastSquares.MSE()) R2.append(leastSquares.R2()) beta.append(bootstrap.beta) betaVariance.append(bootstrap.betaVariance) leastSquares.y = y_data MSE.append(leastSquares.MSE()) R2.append(leastSquares.R2()) beta = np.array(beta) betaVariance = np.array(betaVariance) monomial = [ '1', 'x', 'y', 'x^2', 'xy', 'y^2', 'x^3', 'x^2y', 'xy^2', 'y^3' ] colors = [ '#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf' ] for i in range(10): plt.errorbar(lam[1:], beta[1:, i], yerr=2 * betaVariance[1:, i], fmt='-o', markersize=2, linewidth=1, color=colors[i], elinewidth=0.5, capsize=2, capthick=0.5, label=r"$\beta_{%s}$" % (monomial[i])) plt.rc('text', usetex=True) plt.ylabel(r"$\beta_j$", fontsize=10) plt.xlabel(r"shrinkage parameter $\lambda$", fontsize=10) plt.subplots_adjust(left=0.2, bottom=0.2) plt.legend(fontsize=8) for i in range(10): plt.errorbar(1e-3, beta[0, i], yerr=2 * betaVariance[0, i], fmt='-o', markersize=2, linewidth=1, color=colors[i], elinewidth=0.5, capsize=2, capthick=0.5) fig.gca().set_xscale('log') #plt.savefig(os.path.join(os.path.dirname(__file__), 'figures', 'beta_ridge.png'), transparent=True, bbox_inches='tight') plt.show()