def Linear(self, X, y, iters, alp): theta = np.matrix(np.zeros(2)).T #initialize fitting parameters # gradient descent setting iterations = int(iters) alpha = float(alp) # compute and display initial cost J = computeCost.computeCost(X, y, theta) # Run gradietn descent self.theta, J_hisotry = gradientDescent.gradientDescent(X, y, theta, alpha, iterations) # Print theta to screen root = Tk.Tk() root.wm_title("Linear Fit Plot") f = Figure(figsize=(5, 4), dpi=100) a = f.add_subplot(111) a.plot(X[:,1], y, 'o', label = 'Training data', color = 'blue') a.plot(X[:,1], X*self.theta, '-', label = 'Linear regression', color = 'red') a.legend(loc = 4) a.set_title('Linear fitting') a.set_xlabel('population') a.set_ylabel('Profit') #a.text(7, 20, 'Initial cost: %s \n Theta: %s ' % (J[0,0], self.theta)) PlotFig(root, f).mainloop() return [J, self.theta[0], self.theta[1]]
def sgd(self, X, y, w, compute_objF, compute_gradF, **kwargs): print "start sgd" h = kwargs.get('h', 0.3) c = kwargs.get('c', 1) maxiter = kwargs.get('maxiter', 5) ita = kwargs.get('ita', 0.11) Step_backtrack = kwargs.get('Step_backtrack', False) stopMethod = kwargs.get('stopMethod', None) #user can specify the function mysgd = gd.gradientDescent(X, y) itaOverIteration = kwargs.get('itaOverIteration', False) tnot = kwargs.get('tnot', 1) if compute_objF == "Default" or compute_gradF == "Default": return mysgd.my_sgd(w, h=h, c=c, maxiter=maxiter, ita=ita, Step_backtrack=Step_backtrack, stopMethod=stopMethod, itaOverIteration=itaOverIteration, tnot=tnot) else: return mysgd.my_sgd(w, compute_obj=compute_objF, compute_grad=compute_gradF, h=h, c=c, maxiter=maxiter, ita=ita, Step_backtrack=Step_backtrack, stopMethod=stopMethod, itaOverIteration=itaOverIteration, tnot=tnot)
def gd(self, X, y, w, compute_objF, compute_gradF, **kwargs): print "start gd" h = kwargs.get('h', 0.3) c = kwargs.get('c', 1) maxiter = kwargs.get('maxiter', 100) ita = kwargs.get('ita', 0.11) Step_backtrack = kwargs.get('Step_backtrack', False) stopMethod = kwargs.get('stopMethod', None) #user can specify the function mygd = gd.gradientDescent(X, y) if compute_objF == "Default" or compute_gradF == "Default": return mygd.my_gradient_decent(w, h=h, c=c, maxiter=maxiter, ita=ita, Step_backtrack=Step_backtrack, stopMethod=stopMethod) else: return mygd.my_gradient_decent(w, compute_obj=compute_objF, compute_grad=compute_gradF, h=h, c=c, maxiter=maxiter, ita=ita, Step_backtrack=Step_backtrack, stopMethod=stopMethod)
def main(): set_printoptions(precision=6, linewidth=200) A = eye(5) print A print 'load data from labeled txt file which delimited by ","' data = genfromtxt('data/ex1data1.txt', delimiter = ',') X, y = data[:, 0], data[:, 1] m = len(y) y = y.reshape(m,1) print 'The length of matrix labeled file is ', m print 'Show 2D data' plot(X, y) pyplot.show(block=True) X = c_[ones((m,1)), X] theta = zeros((2,1)) iterations = 1500 alpha = 0.01 cost = computeCost(X,y,theta) print cost cost, theta = gradientDescent(X, y, theta, alpha, iterations) #print cost print 'theta = ', theta print 'prediction1: population city in ', 3.5, 's' predict1 = array([1, 3.5]).dot(theta) print 'profit is ', predict1 plot(X[:,1], y) pyplot.plot(X[:, 1], X.dot(theta), 'b-') pyplot.show(block=True)
def gd(self, X, y, w, compute_objF, compute_gradF, **kwargs): print "start gd" h = kwargs.get("h", 0.3) c = kwargs.get("c", 1) maxiter = kwargs.get("maxiter", 100) ita = kwargs.get("ita", 0.11) Step_backtrack = kwargs.get("Step_backtrack", False) stopMethod = kwargs.get("stopMethod", None) # user can specify the function mygd = gd.gradientDescent(X, y) if compute_objF == "Default" or compute_gradF == "Default": return mygd.my_gradient_decent( w, h=h, c=c, maxiter=maxiter, ita=ita, Step_backtrack=Step_backtrack, stopMethod=stopMethod ) else: return mygd.my_gradient_decent( w, compute_obj=compute_objF, compute_grad=compute_gradF, h=h, c=c, maxiter=maxiter, ita=ita, Step_backtrack=Step_backtrack, stopMethod=stopMethod, )
def logicRegressionRegularized(data): X = mapFeature(data[:, :-1], 6) y = data[:, -1] theta = np.zeros(shape=X.shape[1]) theta, loss = gradientDescent(X, y, theta, options) accurates = [] # for i in range(25): # theta, _ = gradientDescent(X, y, theta, options) # test # predict = (np.round(sigmoid(np.dot(X, theta))) == y) # accurate = 1.0 * np.sum(predict == True) / len(y) # accurates.append(accurate) # print i * options["iterations"], accurate # print theta # test # predict = (np.round(sigmoid(np.dot(X, theta))) == y) # print 1.0 * np.sum(predict == True) / len(y) # print 1.0 * np.sum(y==0) / len(y) # plotLoss(accurates, 50) plotLoss(loss, options["iterations"]) plotSortBlock(data, theta) plotSortScatter(data) plotShow()
def output(partId): # Random Test Cases X1 = np.column_stack( (np.ones(20), np.exp(1) + np.exp(2) * np.linspace(0.1, 2, 20))) Y1 = X1[:, 1] + np.sin(X1[:, 0]) + np.cos(X1[:, 1]) X2 = np.column_stack((X1, X1[:, 1]**0.5, X1[:, 1]**0.25)) Y2 = np.power(Y1, 0.5) + Y1 if partId == '1': out = formatter('%0.5f ', warmUpExercise()) elif partId == '2': out = formatter('%0.5f ', computeCost(X1, Y1, np.array([0.5, -0.5]))) elif partId == '3': out = formatter( '%0.5f ', gradientDescent(X1, Y1, np.array([0.5, -0.5]), 0.01, 10)) elif partId == '4': out = formatter('%0.5f ', featureNormalize(X2[:, 1:4])) elif partId == '5': out = formatter( '%0.5f ', computeCostMulti(X2, Y2, np.array([0.1, 0.2, 0.3, 0.4]))) elif partId == '6': out = formatter( '%0.5f ', gradientDescentMulti(X2, Y2, np.array([-0.1, -0.2, -0.3, -0.4]), 0.01, 10)) elif partId == '7': out = formatter('%0.5f ', normalEqn(X2, Y2)) return out
def thinkValueGD(self, X, y): #TODO: grid search for minimize loss function X_new = np.apply_along_axis(lambda x:np.append(x,0),1,X) n, dim = X_new.shape w = np.zeros(dim) # w = np.random.random_sample((dim,)) # w = (self.w if self.w!= None else np.zeros(dim)) mygd = gd.gradientDescent(X_new, y) if self.iter >= 0: bestIta = -1 bestw = None bestAccuracy = -100 for ita in [0.2, 0.15, 0.13, 0.12, 0.11, 0.09, 0.07, 0.05, 0.03, 0.01]: # averagedwIter, allw, w, iterCount = mygd.my_sgd(w, maxiter=50, ita=ita, c=1, Step_backtrack=False, stopMethod="performance") wIter, w, iterCount = mygd.my_gradient_decent(w, maxiter=10, ita=ita, c=1, Step_backtrack=False) accu_b = self.getAccuracy(w, self.y) # accu_b = self.compute_obj(np.delete(w, -1, 0)) if bestAccuracy < accu_b: bestAccuracy = accu_b bestw = w bestIta = ita print "thinkValueGD ita update:", self.itaV, "-->", bestIta, "bestAccuracy", bestAccuracy self.itaV = bestIta w = bestw # averagedwIter, allw, w, iterCount = mygd.my_sgd(w, maxiter=self.maxiterV, ita=self.itaV, c=1, Step_backtrack=False, stopMethod="performance") # print "Done iterCount:", iterCount wIter, w, iterCount = mygd.my_gradient_decent(w, maxiter=self.maxiterV, ita=self.itaV, c=1, Step_backtrack=False, stopMethod="optimize") # self.w = w # accuracy = gd.getAccuracyOverIteration(wIter, X_new, y) print "iterCount:", iterCount # print "thinkValueGD accuracy", accuracy, "\niterCount:", iterCount return np.delete(w, -1, 0)
def main(): set_printoptions(precision=6, linewidth=200) # loading data data = genfromtxt('data/ex1data2.txt', delimiter = ',') X = data[:, 0:2] y = data[:, 2:3] m = shape(X)[0] X, mu, sigma = featureNormalize(X) # print X # print mu # print sigma X = c_[ones((m, 1)), X] m ,n = shape(X)[0], shape(X)[1] iterations = 400 alphas = [0.01, 0.03, 0.1, 0.3, 1.0] for alpha in alphas: theta = zeros((n,1)) J_history, theta = gradientDescent(X, y, theta, alpha, iterations) #print theta number_of_iterations = array([x for x in range(1, iterations + 1)]).reshape(iterations, 1) pyplot.plot(number_of_iterations, J_history, '-b') pyplot.title("Alpha = %f" % (alpha)) pyplot.xlabel('Number of iterations') pyplot.xlim([0, 50]) pyplot.show(block=True)
def simpleLinerRegression(data): X = np.c_[data[:, :-1], np.ones(shape=data.shape[0])] y = data[:, -1] theta = np.ones(shape=data.shape[1]) theta, loss = gradientDescent(X, y, theta, options) print theta plotLine(data, theta) plotLoss(loss, options["iterations"])
def Xtest_1(self): #TODO: add back 'Base case: everything 0' m = 5 n = 5 X = np.zeros((m, n)) theta = np.zeros((n, 1)) alpha = 1 regParam = 0 iterations = 50 y = np.zeros((m, 1)) theta = gradientDescent(theta, X, y, m, alpha, regParam, iterations) self.assertTrue(np.array_equal(theta, np.zeros((n, 1)))) self.assertEqual(type(theta), type(np.array([])))
def gradientdescentfunc(X, y): theta = numpy.zeros([2,1]) # stgradientDescent = gradientDescent(X, y, 1500, 0.01, theta) stgradientDescent.printcomputeCost() thetagradient = stgradientDescent.cal_gradient() print('gradient result:') print(thetagradient) plotdatafunc(thetagradient) print('For population = 35,000, we predict a profit of') print(numpy.dot(numpy.matrix([[1,3.5]]), thetagradient)[0][0]*10000) print('For population = 70,000, we predict a profit of') print(numpy.dot(numpy.matrix([[1,7]]), thetagradient)[0][0]*10000) return thetagradient
def oneVsAll(X, y, num_labels, all_theta, alpha, iterations, flag): """ Функция позволяет выполнить обучение num_labels классификаторов на основе логистической регрессии для решения задачи многоклассовой классификации на основе подхода "один против всех". Для проведения процедуры обучения требуется матрица объекты-признаки X, вектор меток y, а также параметр сходимости alpha и число итераций iterations для выполнения градиентного спуска. Обученные параметры модели сохраняются в матрицу all_theta, в которой каждый столбец является вектором параметров i-го классификатора. При этом число параметров в столбце равно числу признаков, которыми описывается каждый объект в матрице X. Формальный параметр flag позволяет включить или отключить визуализацию процесса сходимости градиентного спуска для i-го классификатора """ J_history = [] for i in range(num_labels): print('Обучение классификатора №', i + 1) # ====================== Ваш код здесь ====================== # Инструкция: выполнить обучение нескольких классифкаторов на # основе логистической регрессии для решения задачи многоклассовой # классификации на основе подхода "один против всех" all_theta[:, i:i + 1] = gradientDescent(X, (y == i).astype('uint8'), all_theta[:, i:i + 1], alpha, iterations) # ============================================================ # Визуализация процесса сходимости для i-го классифкатора if flag == True: plt.figure() plt.plot(np.arange(len(J_history)) + 1, J_history, '-b', linewidth=2) plt.xlabel('Число итераций') plt.ylabel('Значение стоимостной функции') plt.title('Классификатор № ' + str(i + 1)) plt.grid() plt.show() print('выполнено \n') return all_theta
def logicRegressionLine(data): X = np.c_[data[:, :-1], np.ones(shape=data.shape[0])] y = data[:, -1] theta = np.zeros(shape=data.shape[1]) theta, loss = gradientDescent(X, y, theta, options) print theta # test (use train data) predict = (np.round(sigmoid(np.dot(X, theta))) == y) print 1.0 * np.sum(predict == True) / len(y) plotLoss(loss, options["iterations"]) plotSortScatter(data) plotSortLine(data, theta) plotShow()
def mulLinerRegression(data): X = data[:, :-1] X_norm, mu, sigma = featureNormalize(X) X_norm = np.c_[X_norm, np.ones(shape=data.shape[0])] y = data[:, -1] theta = np.zeros(shape=data.shape[1]) theta, loss = gradientDescent(X_norm, y, theta, options) plotLoss(loss, options["iterations"]) print theta, mu, sigma plot3D(data, theta, mu, sigma) # test x = [[1380, 3], [1494, 3], [1940, 4]] x = np.c_[(x - mu) / sigma, np.ones(3)] print np.dot(x, theta)
def learningCurve(X, y, X_val, y_val, alpha, num_iters, lam): """ Функция позволяет выполнить ошибку на обучающем (X, y) и проверочном (X_val, y_val) множествах данных. Вычисленные ошибки необходимы для построения кривых обучения. Здесь alpha - параметр сходимости, num_iters - число итераций градиентного спуска, а lam - параметр регуляризации """ m = y.shape[0] error_train = np.zeros([m, 1]) error_val = np.zeros([m, 1]) for i in range(m): # ====================== Ваш код здесь ====================== # Инструкция: выполнить вычисление ошибок на обучающем и проверочном # множествах данных. При реализации программного кода требуется выполнить # обучение и оценить ошибку обучения на первых i тренировочных примерах, # то есть (X[0:i + 1, :], y[0:i + 1, :]). При вычислении ошибки на проверочном # множестве данных требуется использовать проверочное множество целиком, # то есть (X_val, y_val) на каждом этапе вычисления ошибки theta = np.zeros([X.shape[1], 1]) theta = gradientDescent(X[0:i + 1, :], y[0:i + 1, :], theta, alpha, num_iters, lam)[0] error_train[i] = computeCost(X[0:i + 1, :], y[0:i + 1, :], theta, 0.0) error_val[i] = computeCost(X_val, y_val, theta, 0.0) for j in range(theta.shape[0]): print('{:.4f} '.format(theta[j, 0])) # обучить модели на 1, 2, ..., 12 примерах. # Для каждого случая вычисляется ошибка обучения (формула- лекция №5, слайд 23) # Вычисляется ошибка проверки (формула- лекция №5, слайд 23) с учетом # всех элементов в проверочном множестве для каждого случая обучения модели (1, ..., 12) # ============================================================ return error_train, error_val
def oneVsAll(images, labels, K): images = np.c_[images, np.ones(images.shape[0])] print images.shape all_theta = np.zeros(shape=(K, images.shape[1])) splices = images.shape[0] / 10 data = np.split(images, splices, axis=0) y = np.split(labels, splices, axis=0) losses = [] for i in range(K): for j in range(splices): # print images[j].shape # print y[j].shape # print all_theta[i].shape print j all_theta, loss = gradientDescent(data[j], (y[j] == i), all_theta[i], options) # losses.append(loss) plotLoss(loss, options["iterations"])
def output(partId): # Random Test Cases X1 = column_stack((ones(20), exp(1) + dot(exp(2), arange(0.1, 2.1, 0.1)))) Y1 = X1[:,1] + sin(X1[:,0]) + cos(X1[:,1]) X2 = column_stack((X1, X1[:,1]**0.5, X1[:,1]**0.25)) Y2 = Y1**0.5 + Y1 if partId == '1': return sprintf('%0.5f ', warmUpExercise()) elif partId == '2': return sprintf('%0.5f ', computeCost(X1, Y1, array([0.5, -0.5]))) elif partId == '3': return sprintf('%0.5f ', gradientDescent(X1, Y1, array([0.5, -0.5]), 0.01, 10)) elif partId == '4': return sprintf('%0.5f ', featureNormalize(X2[:,1:3])); elif partId == '5': return sprintf('%0.5f ', computeCostMulti(X2, Y2, array([0.1, 0.2, 0.3, 0.4]))) elif partId == '6': return sprintf('%0.5f ', gradientDescentMulti(X2, Y2, array([-0.1, -0.2, -0.3, -0.4]), 0.01, 10)) elif partId == '7': return sprintf('%0.5f ', normalEqn(X2, Y2))
def sgd(self, X, y, w, compute_objF, compute_gradF, **kwargs): print "start sgd" h = kwargs.get("h", 0.3) c = kwargs.get("c", 1) maxiter = kwargs.get("maxiter", 5) ita = kwargs.get("ita", 0.11) Step_backtrack = kwargs.get("Step_backtrack", False) stopMethod = kwargs.get("stopMethod", None) # user can specify the function mysgd = gd.gradientDescent(X, y) itaOverIteration = kwargs.get("itaOverIteration", False) tnot = kwargs.get("tnot", 1) if compute_objF == "Default" or compute_gradF == "Default": return mysgd.my_sgd( w, h=h, c=c, maxiter=maxiter, ita=ita, Step_backtrack=Step_backtrack, stopMethod=stopMethod, itaOverIteration=itaOverIteration, tnot=tnot, ) else: return mysgd.my_sgd( w, compute_obj=compute_objF, compute_grad=compute_gradF, h=h, c=c, maxiter=maxiter, ita=ita, Step_backtrack=Step_backtrack, stopMethod=stopMethod, itaOverIteration=itaOverIteration, tnot=tnot, )
print("Plotting Data ...\n") data = np.genfromtxt("../data/ex1data1.txt", delimiter = ",") X = data[:, 0] y = data[:, 1] plotData.plotData(X, y) pause = code.InteractiveConsole() pause.raw_input(prompt = "Press Enter to continue: ") # ============================== Gradient descent ================================ print("Running Gradient Descent ...\n") m = len(y) X = np.c_[np.ones((m, 1)), data[:, 0]] X = np.reshape(X, (m, 2)) y = np.reshape(y, (m, 1)) theta = np.zeros((2, 1)) iterations = 1500 alpha = 0.01 temp = computeCost.computeCost(X, y, theta) print("The first J: ", temp) [theta, J] = gradientDescent.gradientDescent(X, y, theta, alpha, iterations) print("Theta found by gradient descent: ") print("%f %f \n" % (theta[0], theta[1])) print("The sequence of J: \n") print(J)
## =================== Part 3: Gradient descent =================== print('Running Gradient Descent...') X_padded = np.column_stack((np.ones((m, 1)), X)) # Add a column of ones to x theta = np.zeros((2, 1)) # initialize fitting parameters # Some gradient descent settings iterations = 1500 alpha = 0.01 # compute and display initial cost print cc.computeCost(X_padded, y, theta) # run gradient descent theta = gd.gradientDescent(X_padded, y, theta, alpha, iterations) # print theta to screen print('Theta found by gradient descent: ') print("{:f}, {:f}".format(theta[0, 0], theta[1, 0])) # # Plot the linear fit plt.plot(X, X_padded.dot(theta), '-', label='Linear regression') plt.legend(loc='lower right') plt.draw() plt.hold(False) # prevents further plotting on the same figure # # Predict values for population sizes of 35,000 and 70,000 predict1 = np.array([1, 3.5]).dot(theta) print("For population = 35,000, we predict a profit of {:f}".format( float(predict1 * 10000)))
print('\nTesting the cost function ...') # compute and display initial cost J = computeCost(X, y, theta) print('With theta = [0 0]\nCost computed = {:.2f}'.format(J)) print('Expected cost value (approx) 32.07') # further testing of the cost function J = computeCost(X, y, np.array([-1, 2])) print('\nWith theta = [-1 2]\nCost computed = {:.2f}'.format(J)) print('Expected cost value (approx) 54.24') input('Program paused. Press enter to continue.\n') print('\nRunning Gradient Descent ...') # run gradient descent theta, _ = gradientDescent(X, y, theta, alpha, iterations) # print theta to screen print('Theta found by gradient descent:') print('{}'.format(theta)) print('Expected theta values (approx)') print('[-3.6303 1.1664]') # Plot the linear fit plt.ion() # keep previous plot visible plt.plot(X[:, 1], X @ theta, '-') plt.legend(['Training data', 'Linear regression'], loc='lower right') plt.ioff() # dont't overlay any more plots on this figure # Predict values for population sizes of 35,000 and 70,000 predict1 = np.array([1, 3.5]) @ theta
# add ones column data2.insert(0, 'Ones', 1) # set X (training data) and y (target variable) cols = data2.shape[1] X2 = data2.iloc[:, 0:cols - 1] y2 = data2.iloc[:, cols - 1:cols] # convert to matrices and initialize theta X2 = np.matrix(X2.values) y2 = np.matrix(y2.values) theta2 = np.matrix(np.array([0, 0, 0])) # initialize variables for learning rate and iterations alpha = 0.01 #learning rate iters = 2000 # perform linear regression on the data set g2, cost2 = gd.gradientDescent(X2, y2, theta2, alpha, iters) # get the cost (error) of the model cf.computeCost(X2, y2, g2) print("Gradient Descent: ", g2) print("cost: ", cf.computeCost(X2, y2, g2)) fig, ax = plt.subplots(figsize=(12, 8)) ax.plot(np.arange(iters), cost2, 'r') ax.set_xlabel('Iterations') ax.set_ylabel('Cost') ax.set_title('Error vs. Training Epoch')
J = computeLoss(X, y, theta) print("La valeur initiale de la loss est J = %f" % J) pause() plt.clf() ################################################################################### print colored( "Partie 3 : Calcul du gradient de la fonction de cout et mise a jour des parametres du modele : \n", 'red', attrs=['bold']) learning_rate = 0.01 n_iter = 1500 start_time = time.time() print("Apprentissage en cours ...") theta, costHistory = gradientDescent(X, y, theta, learning_rate, n_iter) stop_time = time.time() print("\n") print("\nTemps de calcul = %f secondes" % (stop_time - start_time)) print("\n") print("Vecteur de parametres retourner par l'algorithme :") print theta print("\n") pause() ################################################################################### print colored("Partie 4 : Affichage de notre brave modele appris : \n", 'red', attrs=['bold'])
np.array([[7],[6],[5],[4]]), np.array([[0.1], [0.2]]) ) #ans should be 7.0175 J_2 = computeCost(np.array([[1, 2, 3], [1, 3, 4], [1, 4, 5], [1, 5, 6]]), np.array([[7],[6],[5],[4]]), np.array([[0.1], [0.2], [0.3]]) ) # theta = 5.2148 -0.5733 # J_hist(1) = 5.9794 # J_hist(1000) = 0.85426 theta_1, J_hist_1= gradientDescent(np.array([[1, 5], [1, 2], [1, 4], [1, 5]]), np.array([[1], [6], [4], [2]]), np.array([[0], [0]]), 0.01, 1000); print ("====gradientDescent Test Case 1====\ntheta = %f, %f \nJ_hist(1): %f, \n\ J_hist(1000): %f" % (theta_1[0], theta_1[1], J_hist_1[0], J_hist_1[999])) theta_2, J_hist_2 = gradientDescent(np.array([[1, 5], [1, 2]]), np.array([[1], [6]]), np.array([[0.5], [0.5]]), 0.1, 10); print ("====gradientDescent Test Case 2====\ntheta = %f, %f \nJ_hist(1): %f, \n\
(x, y, nexamples) = readData.readMultiFeature() # transforming the X array into a matrix to simplify the # matrix multiplication with the theta_zero feature X = np.ones((nfeatures + 1, nexamples)) X[1:, :] = x[:, :] theta = np.zeros(nfeatures + 1) if nfeatures == 2: (X_norm, mu, sigma) = featureNormalization(X) # computes the cost as a test, should return 32.07 print computeCost(X_norm, y, theta) if nfeatures == 1: iterations = 1500 elif nfeatures == 2: iterations = 400 alpha = 0.01 # computes the linear regression coefficients using gradient descent theta = gradientDescent(X_norm, y, theta, alpha, iterations) print theta[0] + theta[1] * ((1650 - mu[0]) / sigma[0]) + theta[2] * ( (3 - mu[1]) / sigma[1]) if nfeatures == 1: plot.plot(x, y, 'o', x, np.dot(theta, X)) plot.show() #plot.plot(x[0,:],y,'o',x[0,:],np.dot(theta[:1],X[:1,:]) #plot.show()
## =================== Part 3: Gradient descent =================== print('Running Gradient Descent...') X_padded = np.column_stack((np.ones((m,1)), X)) # Add a column of ones to x theta = np.zeros((2, 1)) # initialize fitting parameters # Some gradient descent settings iterations = 1500 alpha = 0.01 # compute and display initial cost print cc.computeCost(X_padded, y, theta) # run gradient descent theta = gd.gradientDescent(X_padded, y, theta, alpha, iterations) # print theta to screen print('Theta found by gradient descent: ') print("{:f}, {:f}".format(theta[0,0], theta[1,0])) # # Plot the linear fit plt.plot(X,X_padded.dot(theta),'-', label='Linear regression') plt.legend(loc='lower right') plt.draw() plt.hold(False) # prevents further plotting on the same figure # # Predict values for population sizes of 35,000 and 70,000 predict1 = np.array([1, 3.5]).dot(theta) print("For population = 35,000, we predict a profit of {:f}".format( float(predict1*10000) )) predict2 = np.array([1, 7]).dot(theta)
def problem1(): alpha = 0.05 # gradient descent learning rate for housing beta = 0.005 # gradient descent (linear) learning rate for spambase gama = 0.05 # gradient descent (logistic) learning rate for spambase iterations = 1000 # gradient descent maximum iterations linear = True logistic = False threshold = 0.4 K = 10 print "=============================================" print "loading housing data..." housing_train = np.loadtxt("../dataset/housing/housing_train.txt") housing_test = np.loadtxt("../dataset/housing/housing_test.txt") h_X,h_y = gd.extractData(housing_train) ht_X,ht_y = gd.extractData(housing_test) print "training for linear regression with gradient descent..." h_X_norm,h_X_means,h_X_stds = gd.normalize(h_X) w = gd.gradientDescent(h_X_norm,h_y,alpha,iterations,linear) print "predict for housing training data, mse is:" htrain_predict = gd.predict(h_X_norm,w) print np.mean((htrain_predict-h_y)**2) print "predict for housing test data, mse is:" ht_X_norm = gd.normalizeMS(ht_X,h_X_means,h_X_stds) htest_predict = gd.predict(ht_X_norm,w) print np.mean((htest_predict-ht_y)**2) print "=============================================" print "loading spambase data..." spambase = np.loadtxt("../dataset/spambase/spambase.data", delimiter=",") np.random.shuffle(spambase) k_folds = np.array_split(spambase,K) mses = np.zeros(K) mses_train = np.zeros(K) conf_m = np.zeros(4, dtype=int) print "=============================================" print "training (linear regression with gradient descent)" print "with %d folds cross-validation..." %K for i in range(K): print "iteration %d..." %i start = time.time() test = k_folds[i] train = np.vstack(np.delete(k_folds, i, axis=0)) train_X,train_y = gd.extractData(train) test_X,test_y = gd.extractData(test) train_X_norm,t_means,t_stds = gd.normalize(train_X) w = gd.gradientDescent(train_X_norm,train_y,beta,iterations,linear) test_X_norm = gd.normalizeMS(test_X,t_means,t_stds) predict = gd.predictBoolean(test_X_norm,w,threshold) mses[i] = np.mean((predict-test_y)**2) print "mse is %f, time is %f seconds." %(mses[i],(time.time()-start)) conf_m += cm.confusionMatrix(predict,test_y) predict_train = gd.predictBoolean(train_X_norm,w,threshold) mses_train[i] = np.mean((predict_train-train_y)**2) #plot ROC for last iteration plt.plotROC(test_X_norm,test_y,w,"Linear") print "the average acc (train) is: %f" %(1-np.mean(mses_train)) print "the average acc (test) is: %f" %(1-np.mean(mses)) print "the confusion matrix is: (TP,FP,TN,FN)" print conf_m/K print "=============================================" mses = np.zeros(K) mses_train = np.zeros(K) conf_m = np.zeros(4,dtype=int) print "training (logistic regression with gradient descent)" print "with %d folds cross-validation..." %K for i in range(K): print "iteration %d..." %i start = time.time() test = k_folds[i] train = np.vstack(np.delete(k_folds, i, axis=0)) train_X,train_y = gd.extractData(train) test_X,test_y = gd.extractData(test) train_X_norm,t_means,t_stds = gd.normalize(train_X) w = gd.gradientDescent(train_X_norm,train_y,gama,iterations,logistic) test_X_norm = gd.normalizeMS(test_X,t_means,t_stds) predict = gd.predictBoolean(test_X_norm,w,threshold) mses[i] = np.mean((predict-test_y)**2) print "mse is %f, time is %f seconds." %(mses[i],(time.time()-start)) conf_m += cm.confusionMatrix(predict,test_y) predict_train = gd.predictBoolean(train_X_norm,w,threshold) mses_train[i] = np.mean((predict_train-train_y)**2) #plot ROC for last iteration plt.plotROC(test_X_norm,test_y,w,"Logistic") print "the average acc (train) is: %f" %(1-np.mean(mses_train)) print "the average acc (test) is: %f" %(1-np.mean(mses)) print "the confusion matrix is: (TP,FP,TN,FN)" print conf_m/K print "============================================="
import numpy as np import matplotlib.pyplot as plt import computeCost import gradientDescent foodtruck = np.loadtxt('ex1data1.txt', delimiter = ',') plt.plot(foodtruck[:, 0], foodtruck[:, 1], '^') plt.xlabel('Population of City in 10,000s') plt.ylabel('Profit in $10,000s') #plt.show() x = foodtruck[:, 0] y = foodtruck[:, 1] x = x.reshape(97, 1) y = y.reshape(97, 1) m = len(x) x_intercept = np.ones((m, 1)) x_total = np.concatenate((x_intercept, x), axis = 1) thetas = np.zeros((2, 1)) iterations = 1500 alpha = 0.01 J = computeCost.computeCost(x_total, y, thetas) print(J) thetas = gradientDescent.gradientDescent(thetas, iterations, x_total, y, alpha) print(thetas) plt.plot(x_total[:, 1], np.dot(x_total, thetas), '-')
def testGradientDescent3(): X = column_stack((ones(10), arange(10))) y = arange(10)*2 theta = array([1.,2.]) th = gradientDescent(X, y, theta, 1, 1)[0] assert_array_almost_equal(th, array([0.,-2.5]))
def testGradientDescent4(): X = column_stack((ones(10), arange(10))) y = arange(10)*2 theta = array([1.,2.]) th = gradientDescent(X, y, theta, 0.05, 100)[0] assert_array_almost_equal(th, array([0.2353, 1.9625]), decimal=3)
import matplotlib.pyplot as plt import featureNormalize as fn import gradientDescent as gd import normalEqn as ne house = np.loadtxt('ex1data2.txt', delimiter = ',') x = house[:, 0:2] y = house[:, 2] y = y.reshape(47, 1) x_fn = fn.featureNormalize(x) m = len(x) x_intercept = np.ones((m, 1)) x_total = np.concatenate((x_intercept, x_fn), axis = 1) thetas1 = np.zeros((x_total.shape[1], 1)) iterations = 400 alpha = 0.03 # using the gradientDescent function from univariate linear regression thetas1 = gd.gradientDescent(thetas1, iterations, x_total, y, alpha) # normal equations x = house[:, 0:2] y = house[:, 2] y = y.reshape(47, 1) x_intercept = np.ones((m, 1)) x_total = np.concatenate((x_intercept, x), axis = 1) thetas2 = np.zeros((x_total.shape[1], 1)) thetas2 = ne.normalEquation(thetas2, x_total, y)
input("Program paused. Press Enter to continue...") # =================== Part 3: Gradient descent =================== print('Running Gradient Descent ...') theta = np.zeros(2) # compute and display initial cost J = computeCost(X, y, theta) print('cost: %0.4f ' % J) # Some gradient descent settings iterations = 1500 alpha = 0.01 # run gradient descent theta, J_history = gradientDescent(X, y, theta, alpha, iterations) # print theta to screen print('Theta found by gradient descent: ') print('%s %s \n' % (theta[0], theta[1])) # Plot the linear fit plt.figure() plotData(data) plt.plot(X[:, 1], X.dot(theta), '-', label='Linear regression') plt.legend(loc='upper right', shadow=True, fontsize='x-large', numpoints=1) plt.show() input("Program paused. Press Enter to continue...") # Predict values for population sizes of 35,000 and 70,000
def testGradientDescent5(): X = column_stack((ones(101), linspace(0,10,101))) y = sin(linspace(0,10,101)) theta = array([1.,-1.]) th = gradientDescent(X, y, theta, 0.05, 100)[0] assert_array_almost_equal(th, array([0.5132, -0.0545]), decimal=3)
print('Running Gradient Descent ...\n') x = np.array([np.ones(y.shape)]) x = np.concatenate((x,[X]),axis=0) theta = np.zeros((2,1)) iteration = 1500 alpha = 0.01 # compute and display initial cost from computeCost import computeCost as compC x = np.matrix(x.T) y = np.matrix(np.array([y])).T print compC(x,y,theta) # compute and display initial cost from gradientDescent import gradientDescent theta = gradientDescent(x,y,theta,alpha,iteration) print theta # Predict values for population sizes of 35,000 and 70,000 prex1 = 35000 prex2 = 70000 print('For population = 35,000, we predict a profit of %f\n'%([1,prex1]*theta)[0,0]) print('For population = 70,000, we predict a profit of %f\n'%([1,prex2]*theta)[0,0]) raw_input('Program paused. Press enter to continue.\n'); ## ============= Part 4: Visualizing J(theta_0, theta_1) ============= print('Visualizing J(theta_0, theta_1) ...\n') theta_0 = np.array([np.linspace(-10,10,num=100)]) theta_1 = np.array([np.linspace(-1,4,num=100)]) J_theta = np.array(np.empty((100,100))) for i in range(100): for j in range(100):
@author: Trey """ import numpy as np import gradientDescent as gd import featureNormalize as fn import normalEquation as ne data = np.loadtxt(open("data2.txt", "rb"), delimiter=",") num_features = len(data[1, ...]) X = data[..., range(num_features - 1)] y = np.array([data[..., num_features - 1]]).T m = len(y) X_norm = fn.featureNormalize(X) X_norm = np.append(np.ones((m, 1)), X_norm, axis=1) theta = np.zeros((3, 1)) theta = gd.gradientDescent(X_norm, y, theta, 0.01, 400) print(theta) test = np.array([[1650, 3]]).T test = fn.featureNormalize(test) test = np.insert(test, 0, [[1]]) print(test @ theta) X = np.append(np.ones((m, 1)), X, axis=1) test = np.array([[1, 1650, 3]]) theta = ne.normalEquation(X, y) print(test @ theta)
theta = np.matrix(np.array([0, 0])) print(X.shape) print(theta.shape) print(y.shape) print("Cost Function:", cf.computeCost(X, y, theta)) print("my Cost Function:", cf.myComputeCost(X, y, theta)) print("my Cost Function 2:", cf.myComputeCost2(X, y, theta)) # initialize variables for learning rate and iterations alpha = 0.01 #learning rate iters = 2000 # perform gradient descent to "fit" the model parameters g, cost = gd.gradientDescent(X, y, theta, alpha, iters) print("Gradient Descent: ", g) print("cost: ", cost) myGD, myCost = gd.myGD(X, y, theta, alpha, iters) print("my Gradient Descent: ", myGD) print("my cost: ", myCost) myGD2, myCost2 = gd.myGD2(X, y, theta, alpha, iters) print("my Gradient Descent 2: ", myGD2) print("my cost 2: ", myCost2) print("Cost Function with GD: ", cf.computeCost(X, y, g)) print("my Cost Function with GD: ", cf.myComputeCost(X, y, g)) x = np.linspace(data.Population.min(), data.Population.max(), 100)
from readData import readData import numpy as np X, Y = readData() m = len(X) t0 = 0 t1 = 0 alpha = .01 iter = 9 from gradientDescent import gradientDescent t0, t1 = gradientDescent(X, Y, t0, t1, alpha, iter, m) print t0, t1
testX = [] testIndex = [i for i in range(len(data)) if i not in trainIndex] if len(testIndex) == 0: testIndex = trainIndex else: for i in testIndex: testX.append(data[i]) if len(testX) == 0: testX = trainX ################ TRAINING MODEL ################ model = gradientDescent(eta, theta) model.train(trainX, trainY) print('Learned Weights:') print(model.weights[1:]) print('\nPrediction:') predictions = model.predict(testX) print(predictions) print('\nDistance from Origin:') print(model.distToOrigin()) ################ SAVING PREDICTIONS ################ if '-save' in sys.argv:
#raw_input("Program paused. Press Enter to continue...") # =================== Part 3: Gradient descent =================== print 'Running Gradient Descent ...' theta = np.zeros(2) # compute and display initial cost J = computeCost(X, y, theta) print 'cost: %0.4f ' % J # Some gradient descent settings iterations = 1500 alpha = 0.01 # run gradient descent theta, J_history = gradientDescent(X, y, theta, alpha, iterations) # print theta to screen print 'Theta found by gradient descent: ' print '%s %s \n' % (theta[0], theta[1]) # Plot the linear fit plt.figure() plotData(data) plt.plot(X[:, 1], X.dot(theta), '-', label='Linear regression') plt.legend(loc='upper right', shadow=True, fontsize='x-large', numpoints=1) #show() #raw_input("Program paused. Press Enter to continue...") # Predict values for population sizes of 35,000 and 70,000
print('Running Gradient Descent ...\n') #X = [ones(m, 1), data(:,1)]; # Add a column of ones to x ones = np.ones((m,1)) X = np.hstack((ones,X)) theta = np.zeros((2, 1)) # initialize fitting parameters # Some gradient descent settings iterations = 1500; alpha = 0.01; # compute and display initial cost computeCost(X, y, theta) # run gradient descent theta_J_Hist = gradientDescent(X, y, theta, alpha, iterations) theta = theta_J_Hist[0] J_Hist = theta_J_Hist[1] # print theta to screen print('Coefficients found by gradient descent: '); print(theta) #Plot the linear fit plt.hold(True) # keep previous plot visible plt.plot(X[:,1], X.dot(theta), '-')
# -*- coding: utf-8 -*- """ Created on Sun Jan 14 16:51:28 2018 @author: Trey Practicing machine learning with python running gradient descent with one feature. """ import numpy as np import computeCost as cc import gradientDescent as gd data = np.loadtxt(open("data1.txt", "rb"), delimiter=",") X = data[..., 0] y = np.array([data[..., 1]]).T m = len(y) X = np.array([np.ones(m), X]).T theta = np.zeros((2,1)) J = cc.computeCost(X, y, theta) print("With theta = [0 0] the cost coumputed is: " + str(J)) theta = np.zeros((2,1)) theta = gd.gradientDescent(X, y, theta, 0.01, 2000) print("Value of theta: " + str(theta))
% % Hint: By using the 'hold on' command, you can plot multiple % graphs on the same figure. % % Hint: At prediction, make sure you do the same feature normalization. % ''' print('Running gradient descent ...\n') # Choose some alpha value alpha = 0.01 num_iters = 400; # Init Theta and Run Gradient Descent theta = np.zeros((3, 1)) theta_J_history = gradientDescent(X, y, theta, alpha, num_iters) theta = theta_J_history[0] J_history= theta_J_history[1] alpha = 0.001 theta = np.zeros((3, 1)) J_history1 = gradientDescent(X, y, theta, alpha, num_iters)[1] alpha = 1 theta = np.zeros((3, 1)) J_history2 = gradientDescent(X, y, theta, alpha, num_iters)[1] # Plot the convergence graph x = np.arange(0,np.size(J_history))
iterations = 1500 alpha = 0.01 print('\nTesting the cost function ...') j = computeCost(x, y, theta) print('With theta = [0 ; 0]\nCost computed = ' + str(j)) print('Expected cost value (approx) 32.07\n') # further testing of the cost function j = computeCost(x, y, [-1 , 2]) print('With theta = [-1 ; 2]\nCost computed = ' + str(j)) print('Expected cost value (approx) 54.24\n') print('\nRunning Gradient Descent ...') # run gradient descent theta = gradientDescent(x, y, theta, alpha, iterations) # print theta to screen print('Theta found by gradient descent:') print(theta) print('Expected theta values (approx)') print(' -3.6303\n 1.1664\n') # Plot the linear fit plt.plot(x[:,1], np.dot(x,theta), 'b-') plt.legend(["Training data", "Linear regression"]) plt.draw() # Predict values for population sizes of 35,000 and 70,000 predict1 = np.dot([1, 3.5], theta) print("For population = 35,000, we predict a profit of " + str(predict1 * 10000))
import numGen as g import matplotlib.pyplot as plt import numpy as np import gradientDescent as gd n = 100 thetaArr = [3, 5] (x, y) = g.gen((n, 2), thetaArr) xArr = np.squeeze(np.asarray(x)) yArr = np.squeeze(np.asarray(y)) #print(xArr, yArr) plt.plot(xArr, yArr, 'ro') plt.show() alpha = 0.005 theta = gd.gradientDescent(x, y, alpha, 2000) print('Designed theta: ', thetaArr) print('Trained theta: ', theta[:, [1, theta.shape[1] - 1]])
J = costFunction.costFunction(X, Y, theta) print(f'\nwith theta at [0,0], the cost function is {J}') print('Expected cost function value (approx) = 32.07\n ') print('Program paused for 5.5 seconds\n') time.sleep(1.5) # pause for 1.5 secs # Further testing of the cost function J = costFunction.costFunction(X, Y, [[-1],[2]]) print(f'With theta = [-1 ; 2]\n the cost computed is {J}') print('Expected cost value (approx) 54.24\n') print('Program paused for 5.5 seconds\n') time.sleep(1.5) # pause for 1.5 secs print('\nRunning Gradient Descent ...\n') theta, costFunc = gradientDescent.gradientDescent(X, Y, theta, alpha, iterations) #print theta to screen print(f'Theta found by gradient descent: {theta}\n'); print('Expected theta values (approx)\n'); print(' -3.6303\n 1.1664\n\n'); plt.figure(2) plt.plot(X[:,1], X @ theta, '-', color = 'red', label = 'Linear regression') plt.plot(x_1, Y, 'x', label = 'Training data') plt.title('Plot of Population against Profits') plt.xlabel('Population of cities in 10,000s') plt.ylabel('Profit in $10,000s') plt.legend()
# # ## =================== Part 3: Gradient descent =================== print('Running Gradient Descent ...\n') # X = np.column_stack((np.ones((m, 1)), data[:,1]))# # Add a column of ones to x theta = np.zeros((2, 1))# # initialize fitting parameters # # # Some gradient descent settings iterations = 1500# alpha = 0.01# # # # compute and display initial cost computeCost(X, y, theta) # # # run gradient descent theta = gradientDescent(X, y, theta, alpha, iterations)# # # # print theta to screen # print('Theta found by gradient descent: ')# # print('#f #f \n', theta(1), theta(2))# # # # Plot the linear fit # hold on# # keep previous plot visible # plot(X(:,2), X*theta, '-') # legend('Training data', 'Linear regression') # hold off # don't overlay any more plots on this figure # # # Predict values for population sizes of 35,000 and 70,000 # predict1 = [1, 3.5] *theta# # print('For population = 35,000, we predict a profit of #f\n',... # predict1*10000)#
plt.ylabel('Profit in $10,000s') # set Y label plt.show() # make the graph visible to us X = X[:, np.newaxis] # converting X from shape (m,) to (m,1) y = y[:, np.newaxis] # converting y from shape (m,) to (m,1) ones = np.ones( (m, 1)) # initializing an array of 1s as value for intercept terms X = np.hstack((ones, X)) # adding the intercept term to X theta = np.zeros([2, 1]) # Initializing parameters (theta 0 and theta1) iterations = 1500 # number of iterations to run alpha = 0.01 # value of alpha input( "Press enter if you have completed computeCost file, else Ctrl+C then enter to exit" ) '''Functions defined in other files will be imported here''' from computeCost import computeCost from gradientDescent import gradientDescent J = computeCost(X, y, theta) #calling function computeCost from computeCost.py file print('Cost function J value :', J) input( "Press enter if you have completed gradient descent file, else Ctrl+C then enter to exit" ) theta = gradientDescent( X, y, theta, alpha, iterations) #calling function gradientDescent from gradientDescent.py file J = computeCost(X, y, theta) print('New Cost function value:', J)
# Inserting the first column X0 with the ones x_norm.insert(loc=0, column='X0', value=np.ones(m)) # Calculate the number of features (columns) and rows (training examples) num_of_feat = len(x_norm.columns) num_of_train = len(x_norm) # Choose the header feature index # Variables for Gradient Descent alpha = 0.1 num_iters = 400 theta_0 = pd.DataFrame(np.zeros([num_of_feat, 1])) # Gradient Descent and Cost Function History [theta, Jhist, thetahist] = gd.gradientDescent(x_norm, y, theta_0, alpha, num_iters) #%% Part 5 - Plotting the Learning Curve # Iterations list values iterations = pd.DataFrame(list(range(num_iters))) iterations.columns = ['Iter'] # Plot the learning curve (alpha is the variable) Leacur_plot = pl.plot2D(iterations, Jhist) Leacur_plot.set_title(r'Learning curve for $\alpha$={0}'.format(alpha)) Leacur_plot.set_xlabel('Iterations') Leacur_plot.set_ylabel('Cost Function') #%% Part 6 - Plotting the Regression
def logisticRegression(X, y, alpha, regParam, iterations): 'Runs logistic regression, returning trained theta' m, n = X.shape # 'n' includes intercept theta = _initTheta(n) return gradientDescent(theta, X, y, m, alpha, regParam, iterations)
def testGradientDescent1(): X = array([[1., 0.]]) y = array([0.]) theta = array([0.,0.]) th = gradientDescent(X, y, theta, 0, 0)[0] assert_array_equal(th, theta)