def gradientDescent(X, y, theta, alpha, num_iters): m = len(y) J_history = np.zeros(num_iters) for iter in range(num_iters): theta = theta - (alpha/m)*X.T*(X*theta - y) J_history[iter] = computeCost.computeCost(X, y, theta) return [theta, J_history]
def O_u_fit(X_train, y_train, X_val, y_val, theta): """过拟合和欠拟合评估""" train_J = computeCost(X_train, y_train, theta) #计算训练集代价函数 val_J = computeCost(X_val, y_val, theta) #计算测试集代价函数 dg = pd.Series([train_J, val_J], index=["train_J", "val_J"]) #合并显示 print(dg) print("一般情况下,如果train_J<<val_J,则是过拟合,如果train_J约等于val_J,且其值都比较大,则为欠拟合")
def gradientDescent(X, y, theta, alpha, num_iters): """ Performs gradient descent to learn theta theta = gradientDescent(x, y, theta, alpha, num_iters) updates theta by taking num_iters gradient steps with learning rate alpha """ # Initialize some useful values J_history = [] m = y.size # number of training examples for i in range(num_iters): # ====================== YOUR CODE HERE ====================== # Instructions: Perform a single gradient step on the parameter vector # theta. # # Hint: While debugging, it can be useful to print out the values # of the cost function (computeCost) and gradient here. # print theta, computeCost(X, y, theta) origin_theta = theta h = X.dot(origin_theta) for index in xrange(len(theta)): theta[index] -= alpha / m * sum((h - y) * X[:, index]) # ============================================================ # Save the cost J in every iteration J_history.append(computeCost(X, y, theta)) return theta, J_history
def gradientDescent(X, y, theta, alpha, num_iters): """ Performs gradient descent to learn theta theta = gradientDescent(x, y, theta, alpha, num_iters) updates theta by taking num_iters gradient steps with learning rate alpha """ # Initialize some useful values J_history = [] m = y.size # number of training examples for i in range(num_iters): # ====================== YOUR CODE HERE ====================== # Instructions: Perform a single gradient step on the parameter vector # theta. # # Hint: While debugging, it can be useful to print out the values # of the cost function (computeCost) and gradient here. # theta=theta-(np.dot((np.apply_along_axis(sum,1,X*theta)-y).T,X)*(alpha/m)) print '%0.4f \n' % computeCost(X,y, theta) # ============================================================ # Save the cost J in every iteration J_history.append(computeCost(X, y, theta)) return theta, J_history
def k_fold2(X, y, numval=10): """梯度下降法的k折交叉验证函数""" y = np.array(y) m = len(y) g = int(m / numval) J3 = [] #创建装每次训练集代价函数的空列表 J6 = [] #创建装每次测试集代价函数的空列表 for i in range(numval): X, y = random_data(X, y) #k次k折,每一次都会随机打乱顺序重排 J1 = [] #创建装一次中每轮训练集代价函数的空列表 J4 = [] #创建装一次中每轮测试集代价函数的空列表 for j in range(numval): """分开训练集和验证集,得到各自的代价函数""" if j == 0: X_test = X[:(j + 1) * g, :] y_test = y[:(j + 1) * g, :] X_train = X[(j + 1) * g:, :] y_train = y[(j + 1) * g:, :] theta1, J_train = gd.grad(X_train, y_train, 0.003, 1500) J_test = computeCost(X_test, y_test, theta1) #J_train = R2(y_train, np.dot(X_train, theta1)) #J_test = R2(y_test, np.dot(X_test, theta1)) elif j == numval - 1: X_test = X[j * g:, :] y_test = y[j * g:, :] X_train = X[:j * g, :] y_train = y[:j * g, :] theta1, J_train = gd.grad(X_train, y_train, 0.003, 1500) J_test = computeCost(X_test, y_test, theta1) #J_train = R2(y_train, np.dot(X_train, theta1)) #J_test = R2(y_test, np.dot(X_test, theta1)) else: X_test = X[j * g:(j + 1) * g, :] y_test = y[j * g:(j + 1) * g, :] x1 = X[:j * g, :] x2 = X[(j + 1) * g:, :] y1 = y[:j * g, :] y2 = y[(j + 1) * g:, :] X_train = np.vstack([x1, x2]) y_train = np.vstack([y1, y2]) theta1, J_train = gd.grad(X_train, y_train, 0.003, 1500) J_test = computeCost(X_test, y_test, theta1) #J_train=R2(y_train,np.dot(X_train,theta1)) #J_test = R2( y_test, np.dot(X_test,theta1)) J1.append(J_train) J4.append(J_test) J3.append(np.mean(J1)) #每轮下来后对其取平均值当作这轮的值 J6.append(np.mean(J4)) return np.mean(J3), np.mean(J6) #总共完成后取平均值当作整个操作得出的最终代价函数值
def gradientDescent(X, y, theta, alpha, num_iters): """ Performs gradient descent to learn theta theta, cost_history, theta_history = gradientDescent(X, y, theta, alpha, num_iters) updates theta by taking num_iters gradient steps with learning rate alpha """ # Initialize some useful values m = y.size # number of training examples n = theta.size # number of parameters cost_history = np.zeros(num_iters) # cost over iters theta_history = np.zeros((n, num_iters)) # theta over iters for i in range(num_iters): # ====================== YOUR CODE HERE ====================== # Instructions: Perform a single gradient step on the parameter vector # theta. # # Hint: While debugging, it can be useful to print out the values # of the cost function (computeCost) and gradient here. summ = 0. for j in range(m): h_theta = np.dot(X[j, :], theta) summ += (h_theta - y[j]) * X[j, :].reshape(theta.shape) theta -= (alpha / m) * summ cost_history[i] = computeCost(X, y, theta) theta_history[:, i] = theta.reshape((2, )) # ============================================================ return theta, cost_history, theta_history
def gradientDescentMulti(Xdata, y, theta, alpha, num_iters): # GRADIENTDESCENT Performs gradient descent to learn theta # theta = GRADIENTDESENT(Xdata, y, theta, alpha, num_iters) updates theta by # taking num_iters gradient steps with learning rate alpha # Input: # Xdata- input data, size nxD # Y- target Y values for input data # theta- initial theta values, size Dx1 # alpha- learning rate # num_iters- number of iterations # Where n is the number of samples, and D is the dimension # of the sample plus 1 (the plus 1 accounts for the constant column) # Output: # theta- the learned theta # J_history- The least squares cost after each iteration # Initialize some useful values n = y.shape[0] J_history = np.zeros(num_iters) theta_temp = np.zeros(theta.shape[0]) for iter in range(num_iters): h = np.dot(Xdata, theta).reshape((-1,1)) for i in range(theta.shape[0]): theta_temp[i] = theta[i] - (alpha / n) * np.sum(np.multiply(h - y, Xdata[:,i].reshape((-1,1)))) theta = np.copy(theta_temp) # save the cost J in every iteration J_history[iter] = computeCost(Xdata, y, theta) return (theta, J_history)
def gradientDescent(X, y, theta, alpha, num_iters): """ GRADIENTDESCENT Performs gradient descent to learn theta theta = GRADIENTDESCENT(X, y, theta, alpha, num_iters) updates theta by taking num_iters gradient steps with learning rate alpha """ # Initialize some useful values m = len(y); # number of training examples J_history = np.zeros((num_iters, 1)) for i in range(num_iters): # ====================== YOUR CODE HERE ====================== # Instructions: Perform a single gradient step on the parameter vector # theta. # # Hint: While debugging, it can be useful to print out the values # of the cost function (computeCost) and gradient here. # tmp1 = theta[0] - alpha / m * np.sum( np.dot((np.matmul(X, theta) - y).ravel(), X[:, 0])) tmp2 = theta[1] - alpha / m * np.sum( np.dot((np.matmul(X, theta) - y).ravel(), X[:, 1])) theta[0] = tmp1 theta[1] = tmp2 # ============================================================ # Save the cost J in every iteration J_history[i] = computeCost(X, y, theta) return theta, J_history
def Linear(self, X, y, iters, alp): theta = np.matrix(np.zeros(2)).T #initialize fitting parameters # gradient descent setting iterations = int(iters) alpha = float(alp) # compute and display initial cost J = computeCost.computeCost(X, y, theta) # Run gradietn descent self.theta, J_hisotry = gradientDescent.gradientDescent(X, y, theta, alpha, iterations) # Print theta to screen root = Tk.Tk() root.wm_title("Linear Fit Plot") f = Figure(figsize=(5, 4), dpi=100) a = f.add_subplot(111) a.plot(X[:,1], y, 'o', label = 'Training data', color = 'blue') a.plot(X[:,1], X*self.theta, '-', label = 'Linear regression', color = 'red') a.legend(loc = 4) a.set_title('Linear fitting') a.set_xlabel('population') a.set_ylabel('Profit') #a.text(7, 20, 'Initial cost: %s \n Theta: %s ' % (J[0,0], self.theta)) PlotFig(root, f).mainloop() return [J, self.theta[0], self.theta[1]]
def gradientDescent(X, y, theta, alpha, num_iters): """Performs gradient descent to learn theta theta = GRADIENTDESCENT(X, y, theta, alpha, num_iters) updates theta by taking num_iters gradient steps with learning rate alpha args: X: numpy array, Input y: numpy array, Output theta: numpy array, weight matrix alpha: learning rate num_iters: number of epochs return: theta: the last theta after updates J_history: values of cost function """ J_history = [] m = len(y) # number of samples for i in range(num_iters): error = X.dot(theta) - y gradient = X.T.dot(error) theta += -alpha / m * gradient J_history.append(computeCost(X, y, theta)) return theta, J_history
def gradientDescent(X, y, theta, alpha, num_iters): '''GRADIENTDESCENT Performs gradient descent to learn theta theta = GRADIENTDESCENT(X, y, theta, alpha, num_iters) updates theta by taking num_iters gradient steps with learning rate alpha ''' # Initialize some useful values m = y.size; # number of training examples J_history = []; for iter in range(num_iters): # ====================== YOUR CODE HERE ====================== # Instructions: Perform a single gradient step on the parameter vector # theta. # # Hint: While debugging, it can be useful to print out the values # of the cost function (computeCost) and gradient here. # theta_change = alpha * (X.T.dot(X.dot(theta)-y))/m #Vectorized implementation in python theta = theta - theta_change; # ============================================================ # Save the cost J in every iteration J_history.append(computeCost(X, y, theta)) return theta,J_history
def gradientDescent(X, y, theta, alpha, num_iters): """ Performs gradient descent to learn theta theta = gradientDescent(x, y, theta, alpha, num_iters) updates theta by taking num_iters gradient steps with learning rate alpha """ # Initialize some useful values J_history = [] m = y.size # number of training examples for i in range(num_iters): # ====================== YOUR CODE HERE ====================== # Instructions: Perform a single gradient step on the parameter vector # theta. # # Hint: While debugging, it can be useful to print out the values # of the cost function (computeCost) and gradient here. # predictions = X.dot(theta).flatten() errors_x1 = (predictions - y) * X[:, 0] errors_x2 = (predictions - y) * X[:, 1] theta[0] = theta[0] - alpha * (1.0 / m) * errors_x1.sum() theta[1] = theta[0] - alpha * (1.0 / m) * errors_x2.sum() # ============================================================ # Save the cost J in every iteration J_history.append(computeCost(X, y, theta)) return theta, J_history
def gradientDescent(X, y, theta, alpha, num_iters): # theta = GRADIENTDESCENT(X, y, theta, alpha, num_iters) updates theta by # taking num_iters gradient steps with learning rate alpha # Initialize some useful values m = y.size # number of training examples J_history = np.zeros((num_iters, 1)) for iter in range(1, num_iters): # ====================== YOUR CODE HERE ====================== # Instructions: Perform a single gradient step on the parameter vector # theta. # # Hint: While debugging, it can be useful to print out the values # of the cost function (computeCost) and gradient here. # hypothesis = X @ theta diff = np.subtract(hypothesis, y) theta = np.subtract(theta, (alpha / m) * X.transpose() @ diff) # ============================================================ # Save the cost J in every iteration J_history[iter] = computeCost(X, y, theta) return theta, J_history
def gradientDescent(X, y, theta, alpha, num_iters): #GRADIENTDESCENT Performs gradient descent to learn theta # theta = GRADIENTDESENT(X, y, theta, alpha, num_iters) updates theta by # taking num_iters gradient steps with learning rate alpha # Initialize some useful values m = len(y) # number of training examples J_history = zeros((num_iters, 1)) for iteration in range(num_iters): # ====================== YOUR CODE HERE ====================== # Instructions: Perform a single gradient step on the parameter vector # theta. # # Hint: While debugging, it can be useful to print out the values # of the cost function (computeCost) and gradient here. # # use "pdb.set_trace()" to drop into the debugger at this point # ============================================================ # Save the cost J in every iteration J_history[iteration] = computeCost(X, y, theta) #J_history(iter) return (theta, J_history)
def gradientDescent(X, y, theta, alpha, num_iters): """ Performs gradient descent to learn theta theta = gradientDescent(x, y, theta, alpha, num_iters) updates theta by taking num_iters gradient steps with learning rate alpha """ theta = np.mat(theta).T X = np.mat(X) y = np.mat(y) print("zheli", X.shape, y.shape, theta.shape) # Initialize some useful values J_history = [] m = y.size # number of training examples for i in range(num_iters): # ====================== YOUR CODE HERE ====================== # Instructions: Perform a single gradient step on the parameter vector # theta. # # Hint: While debugging, it can be useful to print out the values # of the cost function (computeCost) and gradient here. # theta = theta - 1 / m * alpha * X.T * (X * theta - y) # ============================================================ # Save the cost J in every iteration J_history.append(computeCost(X, y, theta.T)) return theta, J_history
def gradientDescent(X, y, theta, alpha, num_iters): """ Performs gradient descent to learn theta theta = gradientDescent(x, y, theta, alpha, num_iters) updates theta by taking num_iters gradient steps with learning rate alpha """ # Initialize some useful values theta_history = [] J_history = np.zeros(num_iters) m = y.size # number of training examples for i in range(num_iters): # ====================== YOUR CODE HERE ====================== # Perform a single gradient step on the parameter vector theta. # Hint: While debugging, it can be useful to print out the values # of the cost function (computeCost) and gradient here. # ============================================================ # J = np.sum (1.0/(2*m) * (np.square(y_hat - y))) y_hat = np.dot(X, theta) theta = theta - alpha * (1.0 / m) * np.dot(X.T, y_hat - y) # Save the cost J in every iteration theta_history.append(theta) J_history[i] = computeCost(X, y, theta) if i % 100 == 0: print J_history[i] return theta, theta_history, J_history
def runModel(L, params, activations, X, y, debug=False, printcost=False): # Runs a model # L - number of layers (excl. input) # params: dictionary of parameters; keys: # W1,...,WL # b1,...,bL # activations: list of activation functions, array of size L # X: input of shape [nx,m] # y: correct labels of shape [n_y,m]; values 0. or 1. # debug: # printcost: print costs and accuracy m = y.shape[1] _, yhat = fp.forwardProp(L, params, activations, X, regularization_technique="None", keep_prob=None, debug=debug) cost = cc.computeCost(y, yhat, debug=debug) accuracy = np.sum(y[np.argmax(yhat, axis=0), range(m)]) / m if printcost: print("Final cost:", cost) print("Final accuracy:", accuracy) return yhat, cost, accuracy
def gradientDescent(X, y, theta, alpha, num_iters): """ Performs gradient descent to learn theta theta = gradientDescent(x, y, theta, alpha, num_iters) updates theta by taking num_iters gradient steps with learning rate alpha """ # Initialize some useful values J_history = [] m = y.size # number of training examples for i in range(num_iters): # ====================== YOUR CODE HERE ====================== # Instructions: Perform a single gradient step on the parameter vector # theta. # # Hint: While debugging, it can be useful to print out the values # of the cost function (computeCost) and gradient here. # hyp = np.dot(X,theta) sub_hyp = hyp - y tem0 = 0. tem1 = 0. tem0 = theta[0] - alpha * (1./m) * np.sum((np.multiply(sub_hyp,X[:,0]))) tem1 = theta[1] - alpha * (1./m) * np.sum((np.multiply(sub_hyp,X[:,1]))) theta[0] = tem0 theta[1] = tem1 # ============================================================ # Save the cost J in every iteration J_history.append(computeCost(X, y, theta)) return theta, J_history
def output(partId): # Random Test Cases X1 = np.column_stack( (np.ones(20), np.exp(1) + np.exp(2) * np.linspace(0.1, 2, 20))) Y1 = X1[:, 1] + np.sin(X1[:, 0]) + np.cos(X1[:, 1]) X2 = np.column_stack((X1, X1[:, 1]**0.5, X1[:, 1]**0.25)) Y2 = np.power(Y1, 0.5) + Y1 if partId == '1': out = formatter('%0.5f ', warmUpExercise()) elif partId == '2': out = formatter('%0.5f ', computeCost(X1, Y1, np.array([0.5, -0.5]))) elif partId == '3': out = formatter( '%0.5f ', gradientDescent(X1, Y1, np.array([0.5, -0.5]), 0.01, 10)) elif partId == '4': out = formatter('%0.5f ', featureNormalize(X2[:, 1:4])) elif partId == '5': out = formatter( '%0.5f ', computeCostMulti(X2, Y2, np.array([0.1, 0.2, 0.3, 0.4]))) elif partId == '6': out = formatter( '%0.5f ', gradientDescentMulti(X2, Y2, np.array([-0.1, -0.2, -0.3, -0.4]), 0.01, 10)) elif partId == '7': out = formatter('%0.5f ', normalEqn(X2, Y2)) return out
def gradientDescentMulti(X, y, theta, alpha, num_iters): #GRADIENTDESCENTMULTI Performs gradient descent to learn theta # theta = GRADIENTDESCENTMULTI(x, y, theta, alpha, num_iters) updates theta by # taking num_iters gradient steps with learning rate alpha # Initialize some useful values m = len(y) # number of training examples J_history = np.zeros((num_iters, 1)) for i in range(num_iters): # ====================== YOUR CODE HERE ====================== # Instructions: Perform a single gradient step on the parameter vector # theta. # # Hint: While debugging, it can be useful to print out the values # of the cost function (computeCost) and gradient here. # theta = theta - alpha * ( 1.0 / m) * np.transpose(X).dot(X.dot(theta) - np.transpose([y])) # ============================================================ # Save the cost J in every iteration J_history[i] = cc.computeCost(X, y, theta) # print(J_history[i]) return theta, J_history
def gradientDescentMulti(X, y, theta, alpha, num_iters): #GRADIENTDESCENTMULTI Performs gradient descent to learn theta # theta = GRADIENTDESCENTMULTI(x, y, theta, alpha, num_iters) updates theta by # taking num_iters gradient steps with learning rate alpha # Initialize some useful values m = len(y) # number of training examples J_history = np.zeros((num_iters, 1)) for i in xrange(num_iters): # ====================== YOUR CODE HERE ====================== # Instructions: Perform a single gradient step on the parameter vector # theta. # # Hint: While debugging, it can be useful to print out the values # of the cost function (computeCost) and gradient here. # theta = theta - alpha*(1.0/m) * np.transpose(X).dot(X.dot(theta) - np.transpose([y])) # ============================================================ # Save the cost J in every iteration J_history[i] = cc.computeCost(X, y, theta) # print(J_history[i]) return theta, J_history
def gradientDescent(X, y, theta, alpha, num_iters): """ Performs gradient descent to learn theta theta = gradientDescent(x, y, theta, alpha, num_iters) updates theta by taking num_iters gradient steps with learning rate alpha """ # Initialize some useful values J_history = np.zeros((num_iters, )) m = np.size(y, 0) # number of training examples for i in range(num_iters): # ====================== YOUR CODE HERE ====================== # Instructions: Perform a single gradient step on the parameter vector # theta. # # Hint: While debugging, it can be useful to print out the values # of the cost function (computeCost) and gradient here. # # ============================================================ delta_J = X.T.dot(X.dot(theta) - y) / m theta = theta - alpha * delta_J J_history[i] = computeCost(X, y, theta) return theta, J_history
def main(): set_printoptions(precision=6, linewidth=200) A = eye(5) print A print 'load data from labeled txt file which delimited by ","' data = genfromtxt('data/ex1data1.txt', delimiter = ',') X, y = data[:, 0], data[:, 1] m = len(y) y = y.reshape(m,1) print 'The length of matrix labeled file is ', m print 'Show 2D data' plot(X, y) pyplot.show(block=True) X = c_[ones((m,1)), X] theta = zeros((2,1)) iterations = 1500 alpha = 0.01 cost = computeCost(X,y,theta) print cost cost, theta = gradientDescent(X, y, theta, alpha, iterations) #print cost print 'theta = ', theta print 'prediction1: population city in ', 3.5, 's' predict1 = array([1, 3.5]).dot(theta) print 'profit is ', predict1 plot(X[:,1], y) pyplot.plot(X[:, 1], X.dot(theta), 'b-') pyplot.show(block=True)
def gradientDescent(X, y, theta, alpha, num_iters): # Initialize some useful values m = len(y) J_history = np.zeros((num_iters,1)) for iter in range(num_iters): # ====================== YOUR CODE HERE ====================== # Instructions: Perform a single gradient step on the parameter vector # theta. # # Hint: While debugging, it can be useful to print out the values # of the cost function (computeCost) and gradient here. # theta = theta - (alpha/m) * np.matmul(np.transpose(X), (np.matmul(X, theta) - y)) # ============================================================ #Save the cost J in every iteration J_history[iter] = computeCost(X, y, theta) return theta
def gradientDescent(X, y, theta, alpha, num_iters): """ Performs gradient descent to learn theta theta = gradientDescent(x, y, theta, alpha, num_iters) updates theta by taking num_iters gradient steps with learning rate alpha """ # Initialize some useful values J_history = [] m = y.size # number of training examples temp0 = 0 temp1 = 0 for i in np.arange(num_iters): # ====================== YOUR CODE HERE ====================== # Instructions: Perform a single gradient step on the parameter vector # theta. # # Hint: While debugging, it can be useful to print out the values # of the cost function (computeCost) and gradient here. # temp0 = computeTheta0(theta[0], theta[1], X, y, alpha, m) temp1 = computeTheta1(theta[0], theta[1], X, y, alpha, m) #updata both theta values theta[0] = temp0 theta[1] = temp1 # ============================================================ # Save the cost J in every iteration J_history.append(computeCost(X, y, theta)) print J_history return theta, J_history
def gradientDescent(X, y, theta, alpha, num_iters): """ Функция позволяет выполнить градиентный спуск для поиска параметров модели theta, используя матрицу объекты-признаки X, вектор меток y, параметр сходимости alpha и число итераций алгоритма num_iters """ J_history = [] m = y.shape[0] for i in range(num_iters): # ====================== Ваш код здесь ====================== # Инструкция: выполнить градиентный спуск для num_iters итераций # с целью вычисления вектора параметров theta, минимизирующего # стоимостную функцию s = np.zeros([theta.shape[0], 1]) for idx in range(0, theta.shape[0]): for j in range(0, m): s[idx] = s[idx] + (np.dot(np.transpose(theta), X[j]) - y[j]) * X[j, idx] theta = theta - alpha / m * s # ============================================================ J_history.append(computeCost( X, y, theta)) # сохранение значений стоимостной функции # на каждой итерации return theta, J_history
def gradientDescent(X, y, theta, alpha, num_iters): """Performs gradient descent to learn theta """ # Initialize some useful values m = len(y) # number of training examples J_history = np.zeros(num_iters) for iter_ in range(num_iters): # ====================== YOUR CODE HERE ====================== # Instructions: Perform a single gradient step on the parameter vector # theta. # # Hint: While debugging, it can be useful to print out the values # of the cost function (computeCost) and gradient here. # h = X @ theta theta -= alpha * (h - y) @ X / m # ============================================================ # Save the cost J in every iteration J_history[iter_] = computeCost(X, y, theta) return theta, J_history
def gradientDescentMulti(X, y, theta, alpha, num_iters): # %GRADIENTDESCENTMULTI Performs gradient descent to learn theta # % theta = GRADIENTDESCENTMULTI(x, y, theta, alpha, num_iters) updates theta by # % taking num_iters gradient steps with learning rate alpha # % Initialize some useful values m = len(y) #% number of training examples J_history = np.zeros((num_iters, 1)) y = np.reshape(y, (len(y), 1)) for iter in range(num_iters): # % ====================== YOUR CODE HERE ====================== # % Instructions: Perform a single gradient step on the parameter vector # % theta. # % # % Hint: While debugging, it can be useful to print out the values # % of the cost function (computeCostMulti) and gradient here. H = np.dot(X, theta) theta0_temp = theta[0, 0] - alpha * np.sum(np.multiply( (H - y), X[:, 0:1])) / m theta1_temp = theta[1, 0] - alpha * np.sum(np.multiply( (H - y), X[:, 1:2])) / m theta2_temp = theta[2, 0] - alpha * np.sum(np.multiply( (H - y), X[:, 2:3])) / m theta = np.transpose( np.asmatrix([theta0_temp, theta1_temp, theta2_temp])) # % Save the cost J in every iteration J_history[iter] = computeCost(X, y, theta) return theta, J_history
def gradientDescent(X, y, theta=np.zeros((2, 1)), alpha=0.01, num_iters=1500): """ GRADIENTDESCENT Performs gradient descent to learn theta gradientDescent(X, y, theta, alpha, num_iters) updates theta by taking num_iters gradient steps with learning rate alpha """ # Initialize some useful values J_history = [] m = y.size # number of training examples for _ in range(num_iters): # ====================== YOUR CODE HERE ====================== # Instructions: Perform a single gradient step on the parameter vector # theta. # # Hint: While debugging, it can be useful to print out the values # of the cost function (computeCost) and gradient here. # h = X.dot(theta) theta = theta - alpha / m * (X.T.dot(h - y)) # ============================================================ # Save the cost J in every iteration J_history.append(computeCost(X, y, theta)) return theta, J_history
def gradientDescent(X, y, theta, alpha, num_iters): """ Performs gradient descent to learn theta theta = gradientDescent(x, y, theta, alpha, iterations) updates theta by taking num_iters gradient steps with learning rate alpha """ # Initialize some useful values J_history = [] m = y.size # number of training examples for i in range(num_iters): # ====================== YOUR CODE HERE ====================== # Instructions: Perform a single gradient step on the parameter vector # theta. #work through hypothesis and subtract y h = np.dot(X, theta) e = (h - y) #finish with dot product of xT and error grad = np.dot(np.transpose(X), e) #solve for theta theta = theta - (alpha * grad / m) # Hint: While debugging, it can be useful to print out the values # of the cost function (computeCost) and gradient here. #print(computeCost(X,y,theta)) # ============================================================ # Save the cost J in every iteration J_history.append(computeCost(X, y, theta)) return theta, J_history
def gradientDescent(X, y, theta, alpha, num_iters): #GRADIENTDESCENT Performs gradient descent to learn theta # theta = GRADIENTDESENT(X, y, theta, alpha, num_iters) updates theta by # taking num_iters gradient steps with learning rate alpha # Initialize some useful values m = y.shape[0] # number of training examples J_history = np.reshape(np.zeros((num_iters, 1)), (num_iters, 1)) for i in range(num_iters): # ====================== YOUR CODE HERE ====================== # Instructions: Perform a single gradient step on the parameter vector # theta. # # Hint: While debugging, it can be useful to print out the values # of the cost function (computeCost) and gradient here. # theta = np.subtract(theta, (alpha / m) * np.dot(np.subtract(np.dot(X, theta), y).T, X).T) # ============================================================ # Save the cost J in every iteration J_history[i, 0] = computeCost(X, y, theta) return (theta, J_history)
def gradientDescent(X, y, num_labels, Theta1, Theta2, alpha, num_iters, lam): """ Функция позволяет выполнить градиентный спуск для поиска параметров модели Theta1 и Theta2, используя матрицу объекты-признаки X, вектор меток y, число классов num_labels, параметр сходимости alpha, число итераций алгоритма num_iters и параметр регуляризации lam """ J_history = [] m = y.shape[0] Theta1_grad = np.zeros(Theta1.shape) Theta2_grad = np.zeros(Theta2.shape) Y = np.zeros((m, num_labels)) for c in range(num_labels): Y[np.where(y == c)[0], c] = 1 for i in range(num_iters): print('Эпоха обучения №', i + 1) # ====================== Ваш код здесь ====================== # Инструкция: выполнить алгоритм обратного распространения ошибки # с целью поиска частных производных от стоимостной функции по # параметрам модели D1 = np.zeros(Theta1.shape) D2 = np.zeros(Theta2.shape) for i in range(m): a1 = X[i:i+1, :] a2 = sigmoid(np.dot(a1, Theta1.transpose())) a2 = np.concatenate((np.ones((1, 1)), a2), axis=1) a3 = sigmoid(np.dot(a2, Theta2.transpose())) h = a3 delta3 = (h - Y[i:i+1]).transpose() delta2 = np.dot(Theta2.transpose(), delta3) delta2 = delta2[1:, :]*sigmoidGradient((np.dot(a1, Theta1.transpose())).transpose()) D1 = D1 + np.dot(delta2, a1) D2 = D2 + np.dot(delta3, a2) Temp1 = np.copy(Theta1) Temp1[:, 0] = 0 Temp2 = np.copy(Theta2) Temp2[:, 0] = 0 Theta1_grad = D1 / m + Temp1 * lam / m Theta2_grad = D2 / m + Temp2 * lam / m # ============================================================ Theta1 = Theta1 - alpha * Theta1_grad Theta2 = Theta2 - alpha * Theta2_grad J_history.append(computeCost(X, y, num_labels, Theta1, Theta2, lam)) # сохранение значений стоимостной функции # на каждой итерации return Theta1, Theta2, J_history
def gradientDescent(X, y, theta, alpha, iterations): m = X.shape[0] J_history = [] for i in range(iterations): J_history.append(computeCost(X, y, theta)) d_J = 1.0 / m * X.T @ (X @ theta - y) theta = theta - alpha * d_J return theta, np.array(J_history).reshape(iterations, )
def gradientDescent(X, y, theta, alpha, iterations): m = len(y) # number of training examples J_history = np.zeros((iterations, 1)) for i in range(iterations): temp = np.dot(X.T, np.dot(X, theta) - y) theta = theta - alpha / m * temp from computeCost import computeCost J_history[i] = computeCost(X, y, theta) return theta, J_history
def gradientDescent(X, y, theta, alpha, iterations): J_history = np.zeros([iterations, 1]) for i in range(iterations): m = len(y) # using the matrix is faster temp = np.dot(X.T, (np.dot(X, theta) - y)) * alpha / m theta = theta - temp J_history[i] = computeCost(X, y, theta) return theta, J_history
def gradientDescent(X, y, theta, alpha, num_iters): m = len(y); J_history = np.zeros((num_iters, 1)); for iter in range(num_iters): h = np.sum((np.dot(X, theta) - y) * X, axis = 0) theta = theta - ((alpha/m) * h)[np.newaxis].T; J_history[iter] = computeCost(X, y, theta) return theta, J_history
def gradientDescent(X, y, theta, alpha, num_iters): #function [theta, J_history] = gradientDescent(X, y, theta, alpha, num_iters) # theta = GRADIENTDESENT(X, y, theta, alpha, num_iters) updates theta by # taking num_iters gradient steps with learning rate alpha # Initialize some useful values m = len(y); # number of training examples J_history = np.zeros((num_iters, 1)) for iter in range(1,num_iters): # ====================== YOUR CODE HERE ====================== # Instructions: Perform a single gradient step on the parameter vector # theta. # # Hint: While debugging, it can be useful to print out the values # of the cost function (computeCost) and gradient here. # x1 = X.dot(theta) x1 = np.subtract(x1,y) x1 = x1.T x1 = x1.dot(X) x1 = (alpha/m) * x1 x1 = x1.T theta = np.subtract(theta,x1) #theta = theta - ((alpha/m) * ((X * theta) - y).T * X).T J_history[iter] = computeCost(X, y, theta) if iter>2 and J_history(iter) >= J_history(iter-1): print("Bang") raise Exception('bang') #end # ============================================================ # Save the cost J in every iteration J_history[iter] = computeCost(X, y, theta); #end return theta, J_history
def gradientDescent(X, y, theta, alpha, iterations): grad = copy(theta) m = len(y) J_history = [] for counter in range(0, iterations): inner_sum = X.T.dot(hypothesis(X, grad) - y) grad -= alpha / m * inner_sum J_history.append(computeCost(X, y, grad)) return J_history, grad
def gradientDescent(X,y,theta,alpha,num_iters): m = len(y) nfeatures = len(theta) J_history = np.zeros(num_iters) for ii in range(num_iters): for jj in range(nfeatures): theta[jj] -= alpha*sum((np.dot(theta,X)-y)*X[jj,:])/m if plotJ: J_history[ii] = computeCost(X,y,theta) if plotJ: p.plot(J_history) p.show() return theta
def gradientDescent(X, y, theta, alpha, num_iters): # Initialize some useful values m = len(y) # number of training examples J_history = np.zeros((num_iters, 1)) for iter in range(num_iters): error = np.dot(X,theta) - y theta = theta - alpha * np.dot(X.T,error) / m J_history[iter][0] = computeCost(X, y, theta) return theta, J_history
def gradientDescent(X, y, theta, alpha, num_iters): # Initialize some useful values m = y.shape[0] # number of training examples J_history = np.zeros((num_iters, 1)) for i in range(num_iters): # ====================== YOUR CODE HERE ====================== # ============================================================ # Save the cost J in every iteration J_history[i] = computeCost(X, y, theta) return theta, J_history
def gradientDescent(X,y,theta,alpha,num_iters): #neccesary modules import numpy as np #importing the computeCost to calculate the cost of using theta import computeCost as cC m=len(y)#number of training examples J_history=np.zeros((num_iters,1),dtype=float)#initializing cost to zeros #loop to update theta in evry iteration for iter in range(num_iters): #update theta in every iteration theta=theta-(alpha/m) * np.dot(X.T , ((np.dot(X,theta))-y)) #save the cost after every iteration J_history[iter,0]=(cC.computeCost(X,y,theta)) #return the cost and theta values return [theta,J_history]
def output(partId): # Random Test Cases X1 = column_stack((ones(20), exp(1) + dot(exp(2), arange(0.1, 2.1, 0.1)))) Y1 = X1[:,1] + sin(X1[:,0]) + cos(X1[:,1]) X2 = column_stack((X1, X1[:,1]**0.5, X1[:,1]**0.25)) Y2 = Y1**0.5 + Y1 if partId == '1': return sprintf('%0.5f ', warmUpExercise()) elif partId == '2': return sprintf('%0.5f ', computeCost(X1, Y1, array([0.5, -0.5]))) elif partId == '3': return sprintf('%0.5f ', gradientDescent(X1, Y1, array([0.5, -0.5]), 0.01, 10)) elif partId == '4': return sprintf('%0.5f ', featureNormalize(X2[:,1:3])); elif partId == '5': return sprintf('%0.5f ', computeCostMulti(X2, Y2, array([0.1, 0.2, 0.3, 0.4]))) elif partId == '6': return sprintf('%0.5f ', gradientDescentMulti(X2, Y2, array([-0.1, -0.2, -0.3, -0.4]), 0.01, 10)) elif partId == '7': return sprintf('%0.5f ', normalEqn(X2, Y2))
def gradientDescent(X, y, theta, alpha, num_iters): """Performs gradient descent to learn theta theta = GRADIENTDESCENT(X, y, theta, alpha, num_iters) updates theta by taking num_iters gradient steps with learning rate alpha """ m = y.size #number of training examples J_history = np.zeros((num_iters, 1)) print("theta before for", theta.shape) #train for iter in range(num_iters): #do linear regression with identity (f(x) = x) as an activation function prediction = np.dot(X, theta) errors = np.subtract(prediction, y) delta = (1.0/m) * np.dot(X.T, errors) #update weight theta = theta - alpha * delta #save the cost J in every iteration J_history[iter] = costModule.computeCost(X, y, theta) return J_history, theta
X = np.vstack(zip(np.ones(m),data[:,0])) y = data[:, 1] # Plot Data # Note: You have to complete the code in plotData.py print 'Plotting Data ...' plotData(data) #show() #raw_input("Program paused. Press Enter to continue...") # =================== Part 3: Gradient descent =================== print 'Running Gradient Descent ...' theta = np.zeros(2) # compute and display initial cost J = computeCost(X, y, theta) print 'cost: %0.4f ' % J # Some gradient descent settings iterations = 1500 alpha = 0.01 # run gradient descent theta, J_history = gradientDescent(X, y, theta, alpha, iterations) # print theta to screen print 'Theta found by gradient descent: ' print '%s %s \n' % (theta[0], theta[1]) # Plot the linear fit plt.figure()
import numpy as np import matplotlib.pyplot as plt import computeCost import gradientDescent foodtruck = np.loadtxt('ex1data1.txt', delimiter = ',') plt.plot(foodtruck[:, 0], foodtruck[:, 1], '^') plt.xlabel('Population of City in 10,000s') plt.ylabel('Profit in $10,000s') #plt.show() x = foodtruck[:, 0] y = foodtruck[:, 1] x = x.reshape(97, 1) y = y.reshape(97, 1) m = len(x) x_intercept = np.ones((m, 1)) x_total = np.concatenate((x_intercept, x), axis = 1) thetas = np.zeros((2, 1)) iterations = 1500 alpha = 0.01 J = computeCost.computeCost(x_total, y, thetas) print(J) thetas = gradientDescent.gradientDescent(thetas, iterations, x_total, y, alpha) print(thetas) plt.plot(x_total[:, 1], np.dot(x_total, thetas), '-')
__date__="January 8, 2015" import numpy as np import readData import plotData from computeCost import computeCost from gradientDescent import logisiticDeriv from scipy.optimize import fmin_bfgs if __name__=="__main__": (x,y,nexamples) = readData.readFirst() plotData.plotPoints(x,y) nfeatures = x.shape nfeatures = nfeatures[0] X = np.ones([nfeatures+1,nexamples]) X[1:,:] = x[:,:] theta = np.zeros(nfeatures+1) # should return 0.693 for the first data set print computeCost(theta,X,y) #iterations = 100000 #alpha = 0.001 #gradientDescent.gradientDescent(X,y,theta,alpha,iterations) theta=fmin_bfgs(computeCost,theta,fprime=logisiticDeriv,args=(X,y)) plotData.plotTheta(x,y,theta)
def testComputeCost2(): X = column_stack((ones(10), arange(10))) y = arange(10)*2 theta = array([1., 2.]) assert_almost_equal(computeCost(X, y, theta), 0.5)
y=y.reshape(m,1) #calling the function to plot data ax=plt.plotData(X,y) """%% =================== Part 3: Gradient descent =================== """ #initializing theta to zeros that is for initial values for theta we set it to zeros with a data type float theta=np.zeros((2,1),dtype=float) #adding a ones column to X so that we can use the preceeding column as a feature X=np.c_[np.ones(m),X] #compute the cost of the initial values J=cC.computeCost(X, y,theta) #setting variables needed by the gradient descent which requires X,theta,y alpha ,num_iters iterations = 1500; alpha = 0.01; #printing the cost with one variable when theta is initialized to zeros .it should be approximately 32.07 print("the cost should be approximately equal to 32.07 \n %s"%(J)) #calculating the gradientDescent [theta,J_history ] = gD.gradientDescent(X, y, theta, alpha, iterations); #dot multiplication of the array same as X* theta in matlab df=np.dot(X,theta) #plotting the data for the linear regression curve pl.plot((X[:,1]), (df[:,0]), '-') pl.legend('dt') pl.ion() pl.show(ax)#former data plot
if __name__=="__main__": if nfeatures == 1: (x,y,nexamples) = readData.readSingleFeature() elif nfeatures == 2: (x,y,nexamples) = readData.readMultiFeature() # transforming the X array into a matrix to simplify the # matrix multiplication with the theta_zero feature X = np.ones((nfeatures+1,nexamples)) X[1:,:]=x[:,:] theta = np.zeros(nfeatures+1) if nfeatures==2: (X_norm,mu,sigma) = featureNormalization(X) # computes the cost as a test, should return 32.07 print computeCost(X_norm,y,theta) if nfeatures == 1: iterations = 1500 elif nfeatures == 2: iterations = 400 alpha = 0.01 # computes the linear regression coefficients using gradient descent theta = gradientDescent(X_norm,y,theta,alpha,iterations) print theta[0]+theta[1]*((1650-mu[0])/sigma[0])+theta[2]*((3-mu[1])/sigma[1]) if nfeatures==1: plot.plot(x,y,'o',x,np.dot(theta,X)) plot.show()
input('Program paused. Press enter to continue.\n') ## =================== Part 3: Gradient descent =================== print('Running Gradient Descent ...\n') X = np.vstack((np.ones(m), X)).T # Add a column of ones to x y = y.reshape(-1,1) theta = np.zeros((2, 1)) # initialize fitting parameters # Some gradient descent settings iterations = 1500 alpha = 0.01 # compute and display initial cost computeCost(X, y, theta) # run gradient descent theta, J_history = gradientDescent(X, y, theta, alpha, iterations) print(theta) # print theta to screen print('Theta found by gradient descent: ') print('%lf %lf \n'%(theta[0], theta[1])) # Plot the linear fit plt.plot(X[:,1], np.dot(X,theta), '-') plt.legend(['Training data', 'Linear regression']) # Predict values for population sizes of 35,000 and 70,000 predict1 = np.dot(np.array([1, 3.5]),theta);
plt.show() print "'Program paused. Press enter to continue." raw_input("Press ENTER to continue") print "Running gradient descent." X = np.matrix([np.ones(m), data[:,0]]).T #add a column of ones to X theta = np.matrix(np.zeros(2)).T #initialize fitting parameters # gradient descent setting iterations = 1500 alpha = 0.01 # compute and display initial cost computeCost.computeCost(X, y, theta) # Run gradietn descent theta, J_hisotry = gradientDescent.gradientDescent(X, y, theta, alpha, iterations) # Print theta to screen print "Theta found by gradient descent: ", theta[0], theta[1] plt.figure() plt.plot(X[:,1], y, 'o', label = 'Training data', color = 'blue') plt.plot(X[:,1], X*theta, '-', label = 'Linear regression', color = 'red') plt.legend(loc = 4) plt.show() #Predict values for population sizes of 35,000 and 70,000 predict1 = [1, 3.5]*theta
# print('Program paused. Press enter to continue.\n')# raw_input(">>>") # # ## =================== Part 3: Gradient descent =================== print('Running Gradient Descent ...\n') # X = np.column_stack((np.ones((m, 1)), data[:,1]))# # Add a column of ones to x theta = np.zeros((2, 1))# # initialize fitting parameters # # # Some gradient descent settings iterations = 1500# alpha = 0.01# # # # compute and display initial cost computeCost(X, y, theta) # # # run gradient descent theta = gradientDescent(X, y, theta, alpha, iterations)# # # # print theta to screen # print('Theta found by gradient descent: ')# # print('#f #f \n', theta(1), theta(2))# # # # Plot the linear fit # hold on# # keep previous plot visible # plot(X(:,2), X*theta, '-') # legend('Training data', 'Linear regression') # hold off # don't overlay any more plots on this figure # # # Predict values for population sizes of 35,000 and 70,000
def testComputeCost1(): X = array([[1., 0.]]) y = array([0.]) theta = array([0., 0.]) assert_equal(computeCost(X, y, theta), 0)
def Prediction(self): #Predict values for population sizes of 35,000 and 70,000 predict1 = [1, 3.5]*self.theta print 'For population = 35,000, we predict a profit of ', predict1*1000 predict2 = [1, 7]*self.theta print "For population = 70,000, we predict a profit of", predict2*10000 # Visualizing J(theta_0, theta_1) print "Visualizing J(theta_0, theta_1)" theta0_vals = np.linspace(-10, 10, 100) theta1_vals = np.linspace(-1, 4, 100) J_vals = np.zeros((len(theta0_vals), len(theta1_vals))) for i in range(len(theta0_vals)): for j in range(len(theta1_vals)): t = np.matrix([theta0_vals[i], theta1_vals[j]]).T J_vals[i,j] = computeCost.computeCost(self.X, self.y, t) # transpose J_vals J_vals = J_vals.T #surface plot root = Tk.Tk() root.wm_title("Scatter Plot") f = Figure(figsize=(5, 4), dpi=100) a = f.add_subplot(111) a.contour(theta0_vals, theta1_vals, J_vals, np.logspace(-2, 3, 20)) a.xlabel('theta_0') a.ylabel('theta_1') # a tk.DrawingArea canvas = FigureCanvasTkAgg(f, master=root) canvas.show() canvas.get_tk_widget().pack(side=Tk.TOP, fill=Tk.BOTH, expand=1) toolbar = NavigationToolbar2TkAgg(canvas, root) toolbar.update() canvas._tkcanvas.pack(side=Tk.TOP, fill=Tk.BOTH, expand=1) def on_key_event(event): print('you pressed %s' % event.key) key_press_handler(event, canvas, toolbar) canvas.mpl_connect('key_press_event', on_key_event) def _quit(): root.quit() # stops mainloop root.destroy() # this is necessary on Windows to prevent # Fatal Python Error: PyEval_RestoreThread: NULL tstate button = Tk.Button(master=root, text='Quit', command=_quit) button.pack(side=Tk.BOTTOM) Tk.mainloop() ax = Axes3D(a) #ax = fig.add_subplot(111, projection='3d') ax.plot_surface(theta0_vals, theta1_vals, J_vals) ax.set_xlabel('theta_0') ax.set_ylabel('theta_1') ax.set_zlabel('J Values')
def testComputeCost3(): X = column_stack((ones(101), linspace(0,10,101))) y = sin(linspace(0,10,101)) theta = array([0., 0.]) assert_almost_equal(computeCost(X, y, theta), 0.23699618)
import numpy as np from computeCost import computeCost from gradientDescent import gradientDescent # https://www.coursera.org/learn/machine-learning/discussions/5wftpZnyEeWKNwpBrKr_Fw #ans should be 11.9450 J_1 = computeCost(np.array([[1, 2], [1, 3], [1, 4], [1, 5]]), np.array([[7],[6],[5],[4]]), np.array([[0.1], [0.2]]) ) #ans should be 7.0175 J_2 = computeCost(np.array([[1, 2, 3], [1, 3, 4], [1, 4, 5], [1, 5, 6]]), np.array([[7],[6],[5],[4]]), np.array([[0.1], [0.2], [0.3]]) ) # theta = 5.2148 -0.5733 # J_hist(1) = 5.9794 # J_hist(1000) = 0.85426 theta_1, J_hist_1= gradientDescent(np.array([[1, 5], [1, 2], [1, 4], [1, 5]]), np.array([[1], [6], [4], [2]]), np.array([[0], [0]]), 0.01, 1000); print ("====gradientDescent Test Case 1====\ntheta = %f, %f \nJ_hist(1): %f, \n\ J_hist(1000): %f" % (theta_1[0], theta_1[1], J_hist_1[0], J_hist_1[999]))
print("Plotting Data ...\n") data = np.genfromtxt("../data/ex1data1.txt", delimiter = ",") X = data[:, 0] y = data[:, 1] plotData.plotData(X, y) pause = code.InteractiveConsole() pause.raw_input(prompt = "Press Enter to continue: ") # ============================== Gradient descent ================================ print("Running Gradient Descent ...\n") m = len(y) X = np.c_[np.ones((m, 1)), data[:, 0]] X = np.reshape(X, (m, 2)) y = np.reshape(y, (m, 1)) theta = np.zeros((2, 1)) iterations = 1500 alpha = 0.01 temp = computeCost.computeCost(X, y, theta) print("The first J: ", temp) [theta, J] = gradientDescent.gradientDescent(X, y, theta, alpha, iterations) print("Theta found by gradient descent: ") print("%f %f \n" % (theta[0], theta[1])) print("The sequence of J: \n") print(J)
input() ## =================== Part 3: Gradient descent =================== print('Running Gradient Descent ...') X = column_stack((ones(m), data[:,0])) # Add a column of ones to x theta = zeros(2) # initialize fitting parameters # Some gradient descent settings iterations = 1500 alpha = 0.01 # compute and display initial cost print(computeCost(X, y, theta)) # run gradient descent theta, J_history = gradientDescent(X, y, theta, alpha, iterations) #pdb.set_trace() # print theta to screen print('Theta found by gradient descent: ',) print('%f %f ' % (theta[0], theta[1])) # Plot the linear fit hold(True) # keep previous plot visible plot(X[:,1], X.dot(theta), '-') legend(('Training data', 'Linear regression')) firstPlot.show() # not sure how to avoid overlaying any more plots on this figure - call figure()?