def train(self, X, y, reg=1e-5, num_iters=400, norm=True): """ Train a linear model using scipy's function minimization. Inputs: - X: N X D array of training data. Each training point is a D-dimensional row. - y: 1-dimensional array of length N with values in the reals. - reg: (float) regularization strength. - num_iters: (integer) number of steps to take when optimizing - norm: a boolean which indicates whether the X matrix is standardized before solving the optimization problem Outputs: - optimal value for theta """ num_train, dim = X.shape # standardize features if norm=True if norm: # take out the first column and do the feature normalize X_without_1s = X[:, 1:] X_norm, mu, sigma = utils.feature_normalize(X_without_1s) # add the ones back XX = np.vstack([np.ones((X_norm.shape[0], )), X_norm.T]).T else: XX = X # initialize theta theta = np.zeros((dim, )) # Run scipy's fmin algorithm to run gradient descent theta_opt_norm = scipy.optimize.fmin_bfgs(self.loss, theta, fprime=self.grad_loss, args=(XX, y, reg), maxiter=num_iters) if norm: # convert theta back to work with original X theta_opt = np.zeros(theta_opt_norm.shape) theta_opt[1:] = theta_opt_norm[1:] / sigma theta_opt[0] = theta_opt_norm[0] - np.dot(theta_opt_norm[1:], mu / sigma) else: theta_opt = theta_opt_norm return theta_opt
def train(self,X,y,reg=1e-5,num_iters=400,norm=True): """ Train a linear model using scipy's function minimization. Inputs: - X: N X D array of training data. Each training point is a D-dimensional row. - y: 1-dimensional array of length N with values in the reals. - reg: (float) regularization strength. - num_iters: (integer) number of steps to take when optimizing - norm: a boolean which indicates whether the X matrix is standardized before solving the optimization problem Outputs: - optimal value for theta """ num_train,dim = X.shape # standardize features if norm=True if norm: # take out the first column and do the feature normalize X_without_1s = X[:,1:] X_norm, mu, sigma = utils.feature_normalize(X_without_1s) # add the ones back XX = np.vstack([np.ones((X_norm.shape[0],)),X_norm.T]).T else: XX = X # initialize theta theta = np.zeros((dim,)) # Run scipy's fmin algorithm to run gradient descent theta_opt_norm = scipy.optimize.fmin_bfgs(self.loss, theta, fprime = self.grad_loss, args=(XX,y,reg),maxiter=num_iters) if norm: # convert theta back to work with original X theta_opt = np.zeros(theta_opt_norm.shape) theta_opt[1:] = theta_opt_norm[1:]/sigma theta_opt[0] = theta_opt_norm[0] - np.dot(theta_opt_norm[1:],mu/sigma) else: theta_opt = theta_opt_norm return theta_opt
def test_3_1_b1(): """ Testing the normlaize function. """ file = open("grader_data/norm_test.txt", "r") x_str = file.read() correct_mu = np.array([ 3.61352356e+00, 1.13636364e+01, 1.11367787e+01, 6.91699605e-02, 5.54695059e-01, 6.28463439e+00, 6.85749012e+01, 3.79504269e+00, 9.54940711e+00, 4.08237154e+02, 1.84555336e+01, 3.56674032e+02, 1.26530632e+01 ]) correct_sigma = np.array([ 8.59304135e+00, 2.32993957e+01, 6.85357058e+00, 2.53742935e-01, 1.15763115e-01, 7.01922514e-01, 2.81210326e+01, 2.10362836e+00, 8.69865112e+00, 1.68370495e+02, 2.16280519e+00, 9.12046075e+01, 7.13400164e+00 ]) correct_x = np.array([[float(x_val) for x_val in x_row.split(",")] for x_row in x_str.split("\n")]) X_norm, mu, sigma = feature_normalize(df.values) grader.requireIsEqual(correct_mu, mu) grader.requireIsEqual(correct_sigma, sigma) grader.requireIsEqual(correct_x, X_norm[:10, :])
####################################################################### ## =========== Part 4: Feature Mapping for Polynomial Regression =====# ####################################################################### from utils import feature_normalize import sklearn from sklearn.preprocessing import PolynomialFeatures # Map X onto polynomial features and normalize # We will consider a 6th order polynomial fit for the data p = 6 poly = sklearn.preprocessing.PolynomialFeatures(degree=p,include_bias=False) X_poly = poly.fit_transform(np.reshape(X,(len(X),1))) X_poly, mu, sigma = utils.feature_normalize(X_poly) # add a column of ones to X_poly XX_poly = np.vstack([np.ones((X_poly.shape[0],)),X_poly.T]).T # map Xtest and Xval into the same polynomial features X_poly_test = poly.fit_transform(np.reshape(Xtest,(len(Xtest),1))) X_poly_val = poly.fit_transform(np.reshape(Xval,(len(Xval),1))) # normalize these two sets with the same mu and sigma X_poly_test = (X_poly_test - mu) / sigma X_poly_val = (X_poly_val - mu) / sigma
df = pd.DataFrame(data=bdata.data, columns=bdata.feature_names) X = df.values y = bdata.target from sklearn.model_selection import train_test_split X_train_val, X_test, y_train_val, y_test = train_test_split(X, y, test_size=0.3, random_state=10) X_train, X_val, y_train, y_val = train_test_split(X_train_val, y_train_val, test_size=0.3, random_state=10) X_train_norm, mu, sigma = utils.feature_normalize(X_train) X_test_norm = (X_test - mu) / sigma X_val_norm = (X_val - mu) / sigma XX_train_norm = np.vstack([np.ones((X_train_norm.shape[0], )), X_train_norm.T]).T XX_test_norm = np.vstack([np.ones((X_test_norm.shape[0], )), X_test_norm.T]).T XX_val_norm = np.vstack([np.ones((X_val_norm.shape[0], )), X_val_norm.T]).T # lambda = 0 reglinear_reg1 = RegularizedLinearReg_SquaredLoss() theta_opt0 = reglinear_reg1.train(XX_train_norm, y_train, reg=0.0, num_iters=1000) print 'Theta at lambda = 0 is ', theta_opt0 print 'Test error of the best linear model with lambda = 0 is: ' + str(
import numpy as np import matplotlib.pyplot as plt from mpl_toolkits import mplot3d data = load_dataset("data2.txt") X_Original = data[:, 0:3] y = data[:, 3:4] plt.scatter(X_Original[:, 0], X_Original[:, 1], c=y, s=50, cmap=plt.cm.Spectral) X, mu, sigma = feature_normalize(X_Original) plt.show() X = add_x0(X) m = X.shape[0] n = X.shape[1] learning_rate = .3 theta = np.zeros((n, 1)) max_iter = 800 his = np.zeros((max_iter, 1)) for i in range(max_iter): cost = compute_cost(X, y, theta)
print 'Reading data ...' bdata = load_boston() df = pd.DataFrame(data = bdata.data, columns = bdata.feature_names) ######################################################################## # ======= Part 2: Linear regression with multiple variables ===========# ######################################################################## X = df.values y = bdata.target # need to scale the features (use zero mean scaling) X_norm,mu,sigma = utils.feature_normalize(X) # add intercept term to X_norm XX = np.vstack([np.ones((X.shape[0],)),X_norm.T]).T print 'Running gradient descent ..' # set up model and train linear_reg3 = LinearReg_SquaredLoss() J_history3 = linear_reg3.train(XX,y,learning_rate=0.01,num_iters=5000,verbose=False) # Plot the convergence graph and save it in fig5.pdf plot_utils.plot_data(range(len(J_history3)),J_history3,'Number of iterations','Cost J')
############################################################ if __name__ == "__main__": grader = graderUtil.Grader() reg_submission = grader.load('reg_linear_regressor_multi') util_submission = grader.load('utils') test_regressor = reg_submission.RegularizedLinearReg_SquaredLoss() # Load the housing test dataset. X, y, Xtest, ytest, Xval, yval = utils.load_mat('ex2data1.mat') XX = np.vstack([np.ones((X.shape[0],)),X]).T poly = PolynomialFeatures(degree=6,include_bias=False) X_poly = poly.fit_transform(np.reshape(X,(len(X),1))) X_poly, mu, sigma = utils.feature_normalize(X_poly) # add a column of ones to X_poly XX_poly = np.vstack([np.ones((X_poly.shape[0],)),X_poly.T]).T print(X, XX_poly) # map Xtest and Xval into the same polynomial features X_poly_test = poly.fit_transform(np.reshape(Xtest,(len(Xtest),1))) X_poly_val = poly.fit_transform(np.reshape(Xval,(len(Xval),1))) # normalize these two sets with the same mu and sigma X_poly_test = (X_poly_test - mu) / sigma X_poly_val = (X_poly_val - mu) / sigma
num_iters, normalize=args['normalize']) print('theta: ', theta) print('mu: ', mu) print('sigma: ', sigma) # Use learned parameters to make predictions if X.shape[1] == 1: test = np.array([[7]]) result = predict(test, theta, mu, sigma) * 10000 print(result) if X.shape[1] == 2: test2 = np.array([[1650, 3]]) result2 = predict(test2, theta, mu, sigma) print(result2) fig = plt.figure(num=2, figsize=(10, 6)) if normalize: X, _, _ = feature_normalize(X) plt.title('Population is normalized') else: plt.title('Population is not normalized') if X.shape[1] == 1: plt.plot(X, np.dot(np.insert(X, 0, 1, axis=1), theta), zorder=1) plot_data(X, Y) fig = plt.figure(num=3, figsize=(10, 6)) plt.plot([i for i in range(1, len(J_history) + 1)], J_history) plt.show()
correct_sigma = np.array([ 8.59304135e+00, 2.32993957e+01, 6.85357058e+00, 2.53742935e-01, 1.15763115e-01, 7.01922514e-01, 2.81210326e+01, 2.10362836e+00, 8.69865112e+00, 1.68370495e+02, 2.16280519e+00, 9.12046075e+01, 7.13400164e+00 ]) correct_x = np.array([[float(x_val) for x_val in x_row.split(",")] for x_row in x_str.split("\n")]) X_norm, mu, sigma = feature_normalize(df.values) grader.requireIsEqual(correct_mu, mu) grader.requireIsEqual(correct_sigma, sigma) grader.requireIsEqual(correct_x, X_norm[:10, :]) grader.addPart('3.1.B1', test_3_1_b1, 5) X_norm, mu, sigma = feature_normalize(X) multi_test_case = TestCase( grader, np.vstack([np.ones((X.shape[0], )), X_norm.T]).T, bdata.target) mult_regressor = mult_submission.LinearReg_SquaredLoss() def test_3_1_b2(): """ Test the mutlivariable loss and gradient descent. """ loss = np.array([296.07345849, 301.90740686]) grad1 = np.array([ -22.53280632, 3.56774723, -3.31177597, 4.44447236, -1.61029253, 3.92622819, -6.38897522, 3.4634629, -2.29634809, 3.50638621, 4.30491357, 4.66554993, -3.06384186, 6.77765364 ])