def output(partId): # Random Test Cases X = np.stack([ np.ones(20), np.exp(1) * np.sin(np.arange(1, 21)), np.exp(0.5) * np.cos(np.arange(1, 21)) ], axis=1) y = np.reshape((np.sin(X[:, 0] + X[:, 1]) > 0).astype(float), (20, 1)) if partId == '1': out = formatter('%0.5f ', sigmoid(X)) elif partId == '2': out = formatter( '%0.5f ', costFunction(np.reshape(np.array([0.25, 0.5, -0.5]), (3, 1)), X, y)) elif partId == '3': cost, grad = costFunction( np.reshape(np.array([0.25, 0.5, -0.5]), (3, 1)), X, y) out = formatter('%0.5f ', grad) elif partId == '4': out = formatter( '%0.5f ', predict(np.reshape(np.array([0.25, 0.5, -0.5]), (3, 1)), X)) elif partId == '5': out = formatter( '%0.5f ', costFunctionReg(np.reshape(np.array([0.25, 0.5, -0.5]), (3, 1)), X, y, 0.1)) elif partId == '6': cost, grad = costFunctionReg( np.reshape(np.array([0.25, 0.5, -0.5]), (3, 1)), X, y, 0.1) out = formatter('%0.5f ', grad) return out
def stochasticGradientDescent(w, x, y, tolerance, batch_size, alpha, decay): """Use stochastic gradient descent to minimize cost.""" epochs = 1 iterations = 0 while True: order = np.random.permutation(len(train_x)) x = x[order] y = y[order] b = 0 while b < len(train_x): tx = x[b: b+batch_size] ty = y[b: b+batch_size] gradient = costFunction(w, tx, ty)[0] error = costFunction(w, x, y)[1] w -= alpha * gradient iterations += 1 b += batch_size # Keep track of our performance if epochs % 100 == 0: new_error = costFunction(w, x, y)[1] print("Epoch: %d - Error: %.4f" % (epochs, new_error)) drawLine(fig, ax, x, x.dot(w), 'yellow', 'estimate') # Stopping Condition if abs(new_error - error) < tolerance: print("Converged.") break alpha = alpha * (decay ** int(epochs/1000)) epochs += 1 return w, error, iterations
def gradientCheck(thetas, X, y): epsilon = .0000001 gradientEstimate = [0] * len(thetas) for i, theta in enumerate(thetas): gradientEstimate[i] = theta.flatten() for j, ele in enumerate(theta.flatten()): thetaPlus = theta.flatten() thetaPlus[j] = ele + epsilon thetaPlus = thetaPlus.reshape(theta.shape) tempPlus = list(thetas) tempPlus[i] = thetaPlus thetaPlus = tempPlus thetaMinus = theta.flatten() thetaMinus[j] = ele - epsilon thetaMinus = thetaMinus.reshape(theta.shape) tempMinus = list(thetas) tempMinus[i] = thetaMinus thetaMinus = tempMinus gradientEstimate[i][j] = (costFunction(X,y,thetaPlus,0)[0] - costFunction(X,y,thetaMinus,0)[0]) / (2*epsilon) gradientEstimate[i]=gradientEstimate[i].reshape(theta.shape) return gradientEstimate
def miniBatchGradientDescent(thetas, thetaShapes, X, y, lam, alpha, iterations, batchSize): m = y[:, 0].size for i in range(iterations): shuffle_in_unison(X, y) if m > batchSize: passes = m // batchSize for j in range(passes): Xstoch = X[j * batchSize:(j + 1) * batchSize, :] ystoch = y[j * batchSize:(j + 1) * batchSize] J, DS = costFunction(thetas, thetaShapes, Xstoch, ystoch, lam) thetas = thetas - np.multiply(alpha, DS) else: randIndList = random.sample(range(0, m - batchSize), iterations) for i, index in enumerate(randIndList): J, DS = costFunction(thetas, thetaShapes, X, y, lam) thetas = thetas - np.multiply(alpha, DS) J, DS = costFunction(thetas, thetaShapes, X[0:10000, :], y[0:10000], lam) print("The cost at iteration %s is approximately: " % i, J) return thetas
def gradientDescent(X, Y, theta, alpha, iters): m = len(Y) J_history = np.zeros((iters, 1)) for run in range(iters): h_theta = X @ theta theta = theta - ((alpha / m) * X.T @ (h_theta - Y)) J = costFunction.costFunction(X, Y, theta) J_history[run] = costFunction.costFunction(X, Y, theta) return theta, J_history
def computeNumericalGradient(theta, layers, X, y, num_labels, l): numgrad = np.zeros(theta.shape) perturb = np.zeros(theta.shape) e = 0.0001 for i in range(theta.size): perturb[i] = e loss1 = costFunction(theta - perturb, layers, X, y, num_labels, l) loss2 = costFunction(theta + perturb, layers, X, y, num_labels, l) # Compute Numerical Gradient numgrad[i] = (loss2 - loss1) / (2 * e) perturb[i] = 0.0 return numgrad
def computeNumericalGradient(theta, layers, X, y, num_labels, l): numgrad = zeros(theta.shape) perturb = zeros(theta.shape) e = 0.0001 for i in range(theta.size): perturb[i] = e; loss1 = costFunction(theta - perturb,layers, X, y, num_labels, l) loss2 = costFunction(theta + perturb,layers, X, y, num_labels, l) # Compute Numerical Gradient numgrad[i] = (loss2 - loss1) / (2*e) perturb[i] = 0.0 return numgrad
def costFunctionReg(theta, X, y, Lambda): """ Compute cost and gradient for logistic regression with regularization computes the cost of using theta as the parameter for regularized logistic regression and the gradient of the cost w.r.t. to the parameters. """ # Initialize some useful values m = len(y) # number of training examples # ====================== YOUR CODE HERE ====================== # Instructions: Compute the cost of a particular choice of theta. # You should set J to the cost. # Compute the partial derivatives and set grad to the partial # derivatives of the cost w.r.t. each parameter in theta J = 0. uregJ = costFunction(theta, X, y) squr_theta = np.power(theta, 2) reg_theta = np.sum(squr_theta) - np.power(theta[0], 2) J = uregJ + (1.0 * Lambda / (2.0 * m)) * reg_theta # ============================================================= return J
def gradientDescent(X, Y, theta, alpha, iterations): """ %GRADIENTDESCENTMULTI Performs gradient descent to learn theta theta = GRADIENTDESCENTMULTI(x, y, theta, alpha, num_iters) updates theta by taking num_iters gradient steps with learning rate alpha """ m = len(Y) J_history = np.zeros((iterations, 1)) for i in range(iterations): h_theta = X @ theta theta = theta - ((alpha / m) * X.T @ (h_theta - Y)) J = costFunction.costFunction(X, Y, theta) J_history[i] = costFunction.costFunction(X, Y, theta) return theta, J_history
def gradientDescent(X, y, theta, alpha, iterations): J_history = [] for i in range(iterations): J, grad = costFunction(theta, X, y, requires_grad=True) J_history.append(J) theta = theta - alpha * grad return theta, np.array(J_history).reshape(iterations, )
def costFunctionReg(theta, X, y, Lambda): """ Compute cost and gradient for logistic regression with regularization computes the cost of using theta as the parameter for regularized logistic regression and the gradient of the cost w.r.t. to the parameters. """ # Initialize some useful values m = len(y) # number of training examples #the sum sum_1 = 0 sum_2 = 0 #the cost J = 0 # ====================== YOUR CODE HERE ====================== # Instructions: Compute the cost of a particular choice of theta. # You should set J to the cost. # Compute the partial derivatives and set grad to the partial # derivatives of the cost w.r.t. each parameter in theta sum_1 = costFunction(theta, X.values, y.values) for i in range(theta.shape[0]): sum_2 += theta[i]**2 J = sum_1 + (sum_2 * Lambda) / (2 * m) # ============================================================= return J
def test_costFunction_1(self): 'Not regualarized, hypo matches data perfectly' m = 3 regParam = 0 hypo = verticalize(np.ones(m)) labels = verticalize(np.ones(m)) theta = verticalize(np.ones(m)) #TODO: make cost function response to params isn't actually used: regParam = 0 self.assertEqual(0, costFunction(hypo,theta,labels,regParam,m))
def test_costFunction_2(self): 'Not regularized, hypo opposite of data' m = 3 regParam = 0 hypo = verticalize(np.zeros(m)) labels = verticalize(np.ones(m)) theta = verticalize(np.ones(m)) self.assertEqual(m / (2 * m), costFunction(hypo,theta,labels,regParam, m))
def gradientDescent(x_data, y_data, alpha, theta, delta): m = np.size(x_data, 0) #数据的组数 J_0, grad = cf.costFunction(x_data, y_data, theta) #根据参数计算梯度 i = 0 while True: thetat = theta - alpha * grad #进行梯度下降 J, grad = cf.costFunction(x_data, y_data, thetat) if np.abs(J_0 - J) < delta: print("梯度下降次数为:%d" % (i)) break else: i = i + 1 if J > J_0: alpha = alpha / 2 J_0 = J theta = thetat return theta
def trainNN(X, Y, Input_layer_size, First_hidden_layer_size, Second_hidden_layer_size, num_labels, alpha): #Initialize random weights W2 = np.random.rand(First_hidden_layer_size, Input_layer_size + 1) * 2 * 10e-1 - 10e-1 W3 = np.random.rand(Second_hidden_layer_size, First_hidden_layer_size + 1) * 2 * 10e-1 - 10e-1 W4 = np.random.rand(num_labels, Second_hidden_layer_size + 1) * 2 * 10e-1 - 10e-1 #Begin gradient descent i = 1 # Number of iterations cost = 10e4 #cost to compare while True: # Unroll parameters to calculate gradients and cost nnParams = np.array([W2.reshape(W2.size, order='F'), W3.reshape(W3.size, order='F')\ , W4.reshape(W4.size, order='F')]) #Compute cost and gradients [J, grad] = costFunction(nnParams, Input_layer_size, First_hidden_layer_size, Second_hidden_layer_size,\ num_labels, X, Y) #Print some things for control print('Iteración: ', i, '| Costo: ', J) #Check cost if J < 0.1: break if abs(J - cost) < 10e-20: break #Update cost to stop program cost = J #Update weights W2_grad = np.reshape(grad[0], (First_hidden_layer_size, Input_layer_size + 1), order='F') W3_grad = np.reshape( grad[1], (Second_hidden_layer_size, First_hidden_layer_size + 1), order='F') W4_grad = np.reshape(grad[2], (num_labels, Second_hidden_layer_size + 1), order='F') W2 = W2 - alpha * W2_grad W3 = W3 - alpha * W3_grad W4 = W4 - alpha * W4_grad #Next iteration i = i + 1 #Return final weights nnParams = np.array([W2.reshape(W2.size, order='F'), W3.reshape(W3.size, order='F')\ , W4.reshape(W4.size, order='F')]) return (nnParams)
def gradientDescent(x, y, theta, alpha, iterations): J = [None] for i in range(iterations - 1): pridiction = x.dot(theta) error = pridiction - y cost = (((np.transpose(error)).dot(x)) / (len(x))) theta = theta - (alpha * cost) J.append(costFunction(x, y, theta)) return J, theta
def output(partId): # Random Test Cases X = column_stack((ones(20), exp(1) * sin(arange(1, 21, 1)), exp(0.5) * cos(arange(1, 21, 1)))) y = (sin(X[:,0] + X[:,1]) > 0).astype(int) if partId == '1': return sprintf('%0.5f ', sigmoid(X)) elif partId == '2': return sprintf('%0.5f ', costFunction(array([0.25, 0.5, -0.5]), X, y)) elif partId == '3': cost, grad = costFunction(array([0.25, 0.5, -0.5]), X, y) return sprintf('%0.5f ', grad) elif partId == '4': return sprintf('%0.5f ', predict(array([0.25, 0.5, -0.5]), X)) elif partId == '5': return sprintf('%0.5f ', costFunctionReg(array([0.25, 0.5, -0.5]), X, y, 0.1)) elif partId == '6': cost, grad = costFunctionReg(array([0.25, 0.5, -0.5]), X, y, 0.1) return sprintf('%0.5f ', grad)
def costFunctionReg(theta, X, y, lambdaa): m = X.shape[0] J = 0 t = 0 J = costFunction.costFunction(theta, X, y) for i in range(1, theta.shape[0]): t += theta[i]**2 l = lambdaa / (2 * m) J += l * t return J
def gradientDescent(X, Y, Theta, learninRate, numIter): m = X.shape[0] for i in range(numIter): H = sigmoid(np.matmul(X, np.transpose(Theta))) Theta = Theta - learninRate / m * np.matmul(np.transpose(H - Y), X) cost = costFunction(X, Y, Theta) if (i % 100 == 0): print(i, ":", cost) return (Theta)
def fminunc(func, theta, max_iter, alpha, X, y): lastCost = 1000000 for i in range(max_iter): [cost, grad] = costFunction(theta, X, y) #if i%10 == 0: print("iter,cost:{},{}".format(i, cost)) if lastCost - cost < 0.01: break else: lastCost = cost theta = theta - alpha * grad return theta
def costFunctionReg(theta, X, y, Lambda): """ Compute cost and gradient for logistic regression with regularization computes the cost of using theta as the parameter for regularized logistic regression and the gradient of the cost w.r.t. to the parameters. """ # Initialize some useful values m = len(y) # number of training examples J=costFunction(theta, X, y) + (sum(theta ** 2) - theta[0]**2) * Lambda/(2*m) return J
def gradientDescent(w, x, y, tolerance): iterations = 1 while True: gradient, error = costFunction(w, x, y) new_w = w - alpha * gradient if np.sum(abs(new_w - w)) < tolerance: print("Converged.") break # Print error every 50 iterations if iterations % 50 == 0: drawLine(fig, ax, x, x.dot(w), 'yellow', 'estimate') print("Iteration: %d: - Error: %.8f\nwith w: " % (np.int(iterations), error)) print(w) iterations += 1 w = new_w print("final cost: %8f" % costFunction(w, train_x, train_y)[1]) return w, error
def gradientDescent(X, y, theta, iterations, alpha): m = y.shape[0] J_history = np.zeros((iterations)) for turn in range(iterations): h0 = 0 h1 = 0 for i in range(m): h0 += (np.dot(X[i, 0:2], theta) - y[i]) h1 += (np.dot(X[i, 0:2], theta) - y[i]) * X[i, 1] temp0 = theta[0] - (alpha / m) * h0 temp1 = theta[1] - (alpha / m) * h1 theta[0] = temp0 theta[1] = temp1 J_history[turn] = costFunction(X, y, theta) return J_history, theta
def gradientDescent(X, y, theta, m, iteration, alpha): Jhistory = np.zeros((m, 1)) for i in range(iteration): Hx = X.dot(theta) diff = np.subtract(Hx, y) Xtranspose = X.T partialDerivative = (1.0/ float(m)) * (Xtranspose.dot(diff)) A = theta - (alpha * partialDerivative) theta = A f = costFunction(X, y, theta, m) Jhistory = f return theta
def gradientDescent(X, y, theta, alpha, num_iters, lamb=0): m = len(y) x1 = X[:,[1]] x2 = X[:,[2]] print("Initial Theta -\n",theta) J_history = np.zeros((num_iters, 1)) for i in range(num_iters): h = sigmoid(X@theta) theta[0] = theta[0] - alpha*(1/m) * sum(h-y) theta[1] = theta[1] - alpha*(1/m) * sum((h - y)*x1) theta[2] = theta[0] - alpha*(1/m) * sum((h - y)*x2) J_history[i] = costFunction(theta, X,y) print(i," -",J_history[i]) print(theta) return (J_history, theta)
def main(): df = pd.read_csv("ex2data1.txt", sep=",", header=None) m = df.shape[0] X = df.values[:, 0:2] X = np.concatenate((np.ones((m, 1)), X), axis=1) y = df.values[:, 2:3] initial_theta = np.zeros(()) [cost, grad] = costFunction(initial_theta, X, y) theta = fminunc(costFunction, initial_theta, 300, 0.01, X, y) # 重ね合わせる散布図の表示 color = ["y", "b"] for index, rec in df.iterrows(): plt.scatter(rec[0], rec[1], c=color[int(rec[2])]) plt.show()
def checkNNCost(lambd): input_layer_size = 3 hidden_layer_size = 5 num_labels = 3 m = 5 layers = [3, 5, 3] Theta = [] Theta.append(debugInitializeWeights(hidden_layer_size, input_layer_size)) Theta.append(debugInitializeWeights(num_labels, hidden_layer_size)) nn_params = unroll_params(Theta) X = debugInitializeWeights(m, input_layer_size - 1) y = remainder(arange(m)+1, num_labels) cost = costFunction(nn_params, layers, X, y, num_labels, lambd) print 'Cost: ' + str(cost)
def checkNNCost(lambd): input_layer_size = 3; hidden_layer_size = 5; num_labels = 3; m = 5; layers = [3, 5, 3] Theta = [] Theta.append(debugInitializeWeights(hidden_layer_size, input_layer_size)) Theta.append(debugInitializeWeights(num_labels, hidden_layer_size)) nn_params = unroll_params(Theta) X = debugInitializeWeights(m, input_layer_size - 1) y = remainder(arange(m)+1, num_labels) cost = costFunction(nn_params, layers, X, y, num_labels, lambd) print 'Cost: ' + str(cost)
def costFunctionReg(theta, X, y, Lambda): """ Compute cost and gradient for logistic regression with regularization computes the cost of using theta as the parameter for regularized logistic regression and the gradient of the cost w.r.t. to the parameters. """ # Initialize some useful values m = len(y) # number of training examples # ====================== YOUR CODE HERE ====================== # Instructions: Compute the cost of a particular choice of theta. # You should set J to the cost. # Compute the partial derivatives and set grad to the partial # derivatives of the cost w.r.t. each parameter in theta # ============================================================= J = Lambda * np.sum(np.square(theta[1:])) / float(2 * m) + costFunction( theta, X, y) return J
def costFunctionReg(theta, X, y, Lambda): """ @brief Compute cost and gradient for logistic regression with regularization @param theta The theta @param X features @param y target @param Lambda The lambda @return the cost """ J = 0 m = y.size # skip x_0 theta_ = theta[1:] J = costFunction(theta, X, y) + Lambda*np.sum(theta_**2) / (2*m) return J
def computeGradient(x, y, theta, alpha, iterations): J = [None] for i in range(iterations - 1): # compute error here error = 0 m = len(y) # write your code here to implement gradient descent dot(...) function # can be help full to find matrix multiplication in numpy. Implement it # in such a general way that it can be used with multivariate Linear # Regression. hypothesis = np.dot(x, theta) error = hypothesis - y xTrans = np.transpose(x) x_mat = np.dot(xTrans, error) gradient = (alpha / m) * x_mat theta = theta - gradient # each time you compute theta following code call costFunction to get # cost with newtheta. J.append(costFunction(x, y, theta)) return J, theta
def costFunctionReg(theta, X, y, lambda_): """Computes the cost of using theta as the parameter for regularized logistic regression and the gradient of the cost w.r.t. to the parameters. """ # Initialize some useful values m = len(y) # number of training examples # ====================== YOUR CODE HERE ====================== # Instructions: Compute the cost of a particular choice of theta. # You should set J to the cost. # Compute the partial derivatives and set grad to the partial # derivatives of the cost w.r.t. each parameter in theta J, grad = costFunction(theta, X, y) theta = np.r_[0, theta[1:]] J += lambda_ * sum(theta**2) / (2 * m) grad += lambda_ * theta / m # ============================================================= return J, grad
# plt.close() ## ============ Part 2: Compute Cost and Gradient ============ # In this part of the exercise, you will implement the cost and gradient # for logistic regression. You neeed to complete the code in # costFunction.m # Setup the data matrix appropriately, and add ones for the intercept term m,n = X.shape X_padded = np.column_stack((np.ones((m,1)), X)) # Initialize fitting parameters initial_theta = np.zeros((n + 1, 1)) # Compute and display initial cost and gradient cost, grad = cf.costFunction(initial_theta, X_padded, y, return_grad=True) print('Cost at initial theta (zeros): {:f}'.format(cost)) print('Gradient at initial theta (zeros):') print(grad) raw_input('Program paused. Press enter to continue.\n') ## ============= Part 3: Optimizing using fmin (and fmin_bfgs) ============= # In this exercise, you will use a built-in function (fmin) to find the # optimal parameters theta. # Run fmin and fmin_bfgs to obtain the optimal theta # This function will return theta and the cost # fmin followed by fmin_bfgs inspired by stackoverflow.com/a/23089696/583834
##%% ============ Part 2: Compute Cost and Gradient ============ ##% In this part of the exercise, you will implement the cost and gradient ##% for logistic regression. You neeed to complete the code in ##% costFunction.m ##% Setup the data matrix appropriately, and add ones for the intercept term m, n = X.shape ##% Add intercept term to x and X_test X = np.concatenate((np.ones((m, 1)), X),axis=1) ##% Initialize fitting parameters initial_theta = np.zeros((n + 1, 1)) ##% Compute and display initial cost and gradient cost= costFunction.costFunction(initial_theta, X, y) grad= gradfun.gradfun(initial_theta,X,y) print('Cost at initial theta (zeros): ', cost) print('Expected cost (approx): 0.693\n') print('Gradient at initial theta (zeros): ') print(grad) print('Expected gradients (approx):\n -0.1000\n -12.0092\n -11.2628\n') ##% Compute and display cost and gradient with non-zero theta test_theta = np.array([[-24],[0.2],[0.2]]) cost= costFunction.costFunction(test_theta, X, y) grad= gradfun.gradfun(test_theta,X,y) print('\nCost at test theta: ', cost) print('\nExpected cost (approx): 0.218\n')
## ============ Part 2: Compute Cost and Gradient ============ # In this part of the exercise, you will implement the cost and gradient # for logistic regression. You neeed to complete the code in # costFunction.py # Setup the data matrix appropriately, and add ones for the intercept term [m, n] = X.shape # Add intercept term to x and X_test X = np.vstack((np.ones(m), X.T)).T y = y.reshape(-1,1) # Initialize fitting parameters theta = np.zeros(n+1) # Compute and display initial cost and gradient cost, grad = costFunction(theta, X, y) print('Cost at initial theta (zeros): %f\n'%cost) print('Gradient at initial theta (zeros): \n') print(grad) input('\nProgram paused. Press enter to continue.\n') ## ============= Part 3: Optimizing using minimize ============= # In this exercise, you will use a scipy function (minimize) to find the # optimal parameters theta. # Set options for minimize res = minimize(costFunction, theta, method='BFGS', jac=True, options={'maxiter': 400}, args=(X, y))
# Add Polynomial Features # Note that mapFeature also adds a column of ones for us, so the intercept # term is handled X = mapFeature(X[:,0], X[:,1]) # Initialize fitting parameters initial_theta = np.zeros((X.shape[1], 1)) # Set regularization parameter lambda to 1 reg_lambda = 1 # Compute and display initial cost and gradient for regularized logistic # regression cost, grad = costFunction(initial_theta, X, y, reg_lambda), gradient(initial_theta, X, y, reg_lambda) print('Cost at initial theta (zeros): #f\n', cost) print('Expected cost (approx): 0.693\n') print('Gradient at initial theta (zeros) - first five values only:\n') print(' #f \n', grad[0:6]) print('Expected gradients (approx) - first five values only:\n') print(' 0.0085\n 0.0188\n 0.0001\n 0.0503\n 0.0115\n') print('\nProgram paused. Press enter to continue.\n') pause() # Compute and display cost and gradient # with all-ones theta and lambda = 10 test_theta = np.ones((X.shape[1],1)) cost, grad = costFunction(test_theta, X, y, 10), gradient(test_theta, X, y, reg_lambda)
print('\nProgram paused. Press enter to continue.\n'); raw_input() #getting the size of matrix X in the form [m,n] m=X.shape[0] n=X.shape[1] #Adding an intercept column of ones to the matrix X= np.column_stack((np.ones(m), X)) #initial theta initial_theta= np.zeros(n + 1) cost,grad=cF.costFunction(initial_theta,X,y) print('Cost at initial theta (zeros): %s\n'% cost); print('Gradient at initial theta (zeros): \n'); print(' %s \n'% grad); print('\nProgram paused. Press enter to continue.\n'); raw_input() """ %% ============= Part 3: Optimizing using fminunc ============= % In this exercise, you will use a built-in function (fminunc) to find the % optimal parameters theta. % Set options for fminunc """
# Stack a columns of 1 as intercept term to X # Optimisation note: It is faster to copy into matrix of ones than numpy's hstack function #X = np.hstack( [np.ones([m, 1]), X] ) temp = np.copy(X) X = np.ones([m,n+1]) X[:,1:] = temp del temp # Initialize fitting parameters initial_theta = np.zeros( [n+1, 1] ) from sigmoid import sigmoid # Compute and display initial cost and gradient from costFunction import costFunction [cost, grad] = costFunction(initial_theta, X, y); print('Cost at initial theta (zeros): %f\n' % cost) print('Gradient at initial theta (zeros)',grad) from scipy.optimize import fmin_bfgs #minimize #fmin_ncg from costFunction import fun_costFunction, jac_costFunction res = fmin_bfgs( f=fun_costFunction, x0=initial_theta,args=(X,y),maxiter=400,fprime=jac_costFunction) #options = {'maxiter':400} #res = fmin( costFunction, x0=initial_theta, args=(X,y))#, #maxiter=500, full_output=True) # jac=jac_costFunction,
# the problem we are working with. print('Plotting data with + indicating (y = 1) examples,', 'and o indicating (y = 0) examples.\n') plotData(X, y, xlabel='Exam 1 score', ylabel='Exam 2 score', legends=['Admitted', 'Not Admitted']) # ============ Part 2: Compute Cost and Gradient ============ # In this part of the exercise, you will implement the cost and gradient # for logistic regression. You neeed to complete the code in # costFunction.py m, n = X.shape X = np.hstack((np.ones((m, 1)), X)) initial_theta = np.zeros(n + 1) cost, grad = costFunction(initial_theta, X, y) print('Cost at initial theta (zeros):', cost) print('Gradient at initial theta (zeros):', grad, '\n') # =========== Part 3: Optimizing using fmin_bfgs =========== # In this exercise, you will use a built-in function (fminunc) to find the # optimal parameters theta. cost_function = lambda p: costFunction(p, X, y)[0] grad_function = lambda p: costFunction(p, X, y)[1] theta = fmin_bfgs(cost_function, initial_theta, fprime=grad_function) print('theta:', theta, '\n') plotDecisionBoundary(theta, X[:, 1:], y, xlabel='Exam 1 score', ylabel='Exam 2 score', legends=['Admitted', 'Not Admitted', 'Decision Boundary'])
pause() """## Part 2: Compute Cost and Gradient """ # Setup the data matrix appropriately, and add ones for the intercept term m, n = X.shape #Add intercept term to x and X_test X = np.c_[np.ones((m, 1)), X] #Initialize fitting parameters initial_theta = np.zeros((n + 1, 1)) #Compute and display initial cost and gradient cost, grad = costFunction(initial_theta, X, y), gradient(initial_theta, X, y) print("Cost at initial theta (zeros): ", cost, "\n") print("Expected cost (approx): 0.693\n") print('Gradient at initial theta (zeros): \n') print(grad) print("Expected gradients (approx):\n -0.1000\n -12.0092\n -11.2628\n") #Compute and display cost and gradient with non-zero theta test_theta = np.array([[-24], [0.2], [0.2]]) cost, grad = costFunction(test_theta, X, y), gradient(test_theta, X, y) print("\nCost at test theta:", cost, "\n") print("Expected cost (approx): 0.218\n") print("Gradient at test theta: \n") print(grad)
plt.ylabel('Exam 2 score') # # ============ Part 2: Compute Cost and Gradient ============ # # Setup the data matrix appropriately, and add ones for the intercept term m, n = X.shape # Add intercept term to x and X_test X = np.concatenate((np.ones((m, 1)), X), axis=1) # Initialize fitting parameters initial_theta = np.zeros(n + 1) test_theta = np.array([-0.5 , -1.0 , -1.0]) cost = costFunction(test_theta, X, y) print 'Cost at initial theta (zeros): %f' % cost # Compute and display initial cost and gradient cost = costFunction(initial_theta, X, y) print 'Cost at initial theta (zeros): %f' % cost grad = gradientFunction(initial_theta, X, y) print 'Gradient at initial theta (zeros): ' + str(grad) # ============= Part 3: Optimizing using scipy ============= res = minimize(costFunction, initial_theta, method='TNC', jac=False, args=(X, y), options={'gtol': 1e-3, 'disp': True, 'maxiter': 1000}) theta = res.x
m, n = data.shape print(m,n) n = n-1 X = data[:, 0:2] print(X.shape) y = data[:, 2].reshape(m, 1) print(y.shape) # x1 = X[:,0] # x2 = X[:,1] # plotData('scatter', x1[np.nonzero(y == 1)[0]], x2[np.nonzero(y == 1)[0]], 'data1', 'Exam 1 score', 'Exam 2 score') # plotData('scatter', x1[np.nonzero(y == 0)[0]], x2[np.nonzero(y == 0)[0]], 'data1', 'Exam 1 score', 'Exam 2 score',marker='o') # plt.show() X = np.concatenate((np.ones((m,1)), X), axis=1) print(X.shape) init_theta = np.zeros((n+1,)) print(init_theta.shape) cost = costFunction(init_theta, X, y) print(cost, cost.shape) grad = gradFunction(init_theta, X, y) print(grad.shape) result = opt.minimize(costFunction, x0=init_theta, method='BFGS', jac=gradFunction, args=(X, y)) theta = result.x print('Cost at theta found by fmin_bfgs: ', result.fun) print('theta: ', theta)
import numpy as np import matplotlib.pyplot as plt from scipy.optimize import fmin from plotData import plotData from costFunction import costFunction from plotDecisionBoundary import plotDecisionBoundary data = np.loadtxt("ex2data1.txt", usecols=(0,1,2), delimiter=',',dtype=None) X = data[:, 0:2] y = data[:, 2] y = y[:, np.newaxis] m, n = X.shape plotData(X, y) plt.show() X = np.concatenate((np.ones((m, 1)), X), axis =1 ) theta = np.zeros((1, n+ 1)) #costFunction(X, y, theta) options = {'full_output': True, 'maxiter': 400} theta , cost, _, _, _ = fmin(lambda t: costFunction(X, y, t), theta, **options) plotDecisionBoundary(X, y, theta) plt.show()