def gradient_descent(X, y, theta, alpha, num_iters): #GRADIENTDESCENT Performs gradient descent to learn theta # theta = GRADIENTDESENT(X, y, theta, alpha, num_iters) updates theta by # taking num_iters gradient steps with learning rate alpha # Initialize some useful values m = len(y) # number of training examples #J_history = np.zeros(num_iters) J_history = [compute_cost(X, y, theta)] for _ in xrange(num_iters): # ====================== YOUR CODE HERE ====================== # Instructions: Perform a single gradient step on the parameter vector # theta. # # Hint: While debugging, it can be useful to print out the values # of the cost function (computeCost) and gradient here. # # ============================================================ delta = np.dot(X, theta).transpose() - y delta = np.multiply(delta.transpose(), X).transpose() delta = np.dot(delta, np.ones((m, 1))) # sum theta = theta - alpha * delta / m # Save the cost J in every iteration J_history.append(compute_cost(X, y, theta)) return theta, J_history
def model(X_train, Y_train, layers_dims, learning_rate, num_iter, lambd, print_cost): with tf.device('/device:GPU:0'): tf.reset_default_graph( ) # to be able to rerun the model without overwriting tf variables ( n_x, m ) = X_train.shape # Number of features and number of training examples n_y = Y_train.shape[0] # Number of classes n_hidden_layers = len(layers_dims) # Number of hidden layers costs = [] # Keep track of the cost ### Create Placheholders ### X, Y = create_placeholders(n_x, n_y) ### Initialize Parameters ### parameters = init_params(layers_dims) ### Foward propagation - Build the forward propagation in the tensorflow graph ### ZL = forward_propagation(X, parameters) ### Cost - Add cost function to tensorflow graph ### cost_function = compute_cost(ZL, Y, parameters, n_hidden_layers, lambd, m) ### Backpropagation - Define the tensorflow optimizer ### optimizer = tf.train.AdamOptimizer( learning_rate=learning_rate).minimize(cost_function) #optimizer = tf.train.GradientDescentOptimizer(learning_rate = learning_rate).minimize(cost_function) ### Initializer all the variables ### init = tf.global_variables_initializer() ### Start the session to compute the tensorflow graph ### with tf.Session() as sess: # Run the initialization sess.run(init) # Do Training lopp # for i in range(num_iter): # Run the session to execute the optimizer and the cost _, cost_value = sess.run([optimizer, cost_function], feed_dict={ X: X_train, Y: Y_train }) # Print the cost every 1000 iterations #if print_cost == True and i % 1000 == 0: # print ("Cost after iteration %i: %f" % (i, cost_value)) if print_cost == True and i % 1000 == 0: costs.append(cost_value) # Save the parameters in a variable parameters = sess.run(parameters) return parameters, costs
def gradient_descent(X, y, theta, alpha, num_iters): """ Performs gradient descent to learn theta. Parameters ---------- X : ndarray, shape (n_samples, n_features) Training data, where n_samples is the number of samples and n_features is the number of features. y : ndarray, shape (n_samples,) Labels. theta : ndarray, shape (n_features,) Initial linear regression parameter. alpha : float Learning rate. num_iters: int Number of iteration. Returns ------- theta : ndarray, shape (n_features,) Linear regression parameter. J_history: ndarray, shape (num_iters,) Cost history. """ m = len(y) J_history = np.zeros(num_iters) for i in range(num_iters): theta -= alpha / m * ((X.dot(theta) - y).T.dot(X)) J_history[i] = compute_cost(X, y, theta) return theta, J_history
def L_layer_model(X, Y, layers_dims, learning_rate=0.0075, num_iterations=2400, print_cost=False): # lr was 0.009 """ Implements a L-layer neural network: [LINEAR->RELU]*(L-1)->LINEAR->SIGMOID. Arguments: X -- data, numpy array of shape (number of examples, num_px * num_px * 3) Y -- true "label" vector (containing 0 if cat, 1 if non-cat), of shape (1, number of examples) layers_dims -- list containing the input size and each layer size, of length (number of layers + 1). learning_rate -- learning rate of the gradient descent update rule num_iterations -- number of iterations of the optimization loop print_cost -- if True, it prints the cost every 100 steps Returns: parameters -- parameters learnt by the model. They can then be used to predict. """ np.random.seed(1) costs = [] # keep track of cost # Parameters initialization. ### START CODE HERE ### parameters = initialize_parameters_deep(layers_dims) ### END CODE HERE ### # Loop (gradient descent) for i in range(0, num_iterations): # Forward propagation: [LINEAR -> RELU]*(L-1) -> LINEAR -> SIGMOID. ### START CODE HERE ### (≈ 1 line of code) AL, caches = L_model_forward(X=X, parameters=parameters) ### END CODE HERE ### # Compute cost. ### START CODE HERE ### (≈ 1 line of code) cost = compute_cost(AL, Y) ### END CODE HERE ### # Backward propagation. ### START CODE HERE ### (≈ 1 line of code) grads = L_model_backward(AL, Y, caches) ### END CODE HERE ### # Update parameters. ### START CODE HERE ### (≈ 1 line of code) parameters = update_parameters(parameters, grads, learning_rate) ### END CODE HERE ### # Print the cost every 100 training example if print_cost and i % 100 == 0: print("Cost after iteration %i: %f" % (i, cost)) if print_cost and i % 100 == 0: costs.append(cost) # plot the cost plt.plot(np.squeeze(costs)) plt.ylabel('cost') plt.xlabel('iterations (per tens)') plt.title("Learning rate =" + str(learning_rate)) plt.show() return parameters
def train_model(x, y, ax): # gradient descent settings (_, n) = x.shape iters = 1500 alpha = 0.01 theta = np.zeros(n) # compute and display initial cost print('Testing the cost function ...\n') j = compute_cost.compute_cost(x, y, theta) print(' With theta = [0.0, 0.0]') print(' Cost computed = %0.2f' % j) print(' Expected cost value (approx) 32.07\n') # run gradient descent print('Running Gradient Descent ...\n') (theta, j_history) = gradient_descent.gradient_descent(x, y, theta, alpha, iters) print(' Theta found by gradient descent:') print(' ', theta) print(' Expected theta values (approx):') print(' [-3.6303, 1.1664]\n') return (alpha, theta, j_history)
def gradient_descent(X: np.array, y: np.array, theta: np.array, alpha: float, num_iters: int): m = len(y) J_history = np.zeros((num_iters, 1)) for iter in range(num_iters): theta = theta - (alpha / m) * X.T.dot((X.dot(theta) - y)) J_history[iter] = compute_cost(X, y, theta) return [theta, J_history]
def gradient_descent_multi(x, y, theta, alpha, n): j_history = np.zeros((n, 1)) am = alpha / len(y) for i in range(n): theta -= am * np.dot(np.transpose(x), np.dot(x, theta) - y) j_history[i] = compute_cost(x, y, theta) return theta, j_history
def model(X, Y, layers_dims, learning_rate=0.01, initialization='random', init_const=0.01, num_of_iterations=10000, print_cost=True, print_cost_after=1000, seed=None): L = len(layers_dims) - 1 # number of layers # Initialize parameters parameters = initialize_parameters(layers_dims, initialization, init_const, seed) # Gradient Descent for i in range(num_of_iterations): # Forward propagation AL, caches = forward_propagation(X, parameters, L) # Compute cost cost = compute_cost(AL, Y) # Backward propagation grads = backward_propagation(AL, Y, caches) # Updating parameters parameters = update_parameters(parameters, grads, learning_rate, L) # Priniting cost after given iterations if print_cost and i % print_cost_after == 0: print("Cost after iteration %i: %f" % (i, cost)) return parameters
def nn_model(X, Y, n_h, num_iterations=1500, print_cost=False): np.random.seed(3) n_x = layer_sizes(X, Y)[0] n_y = layer_sizes(X, Y)[2] parameters = initialize_parameters(n_x, n_h, n_y) W1 = parameters["W1"] b1 = parameters["b1"] W2 = parameters["W2"] b2 = parameters["b2"] for i in range(0, num_iterations): A2, cache = forward_propagation(X, parameters) cost = compute_cost(A2, Y, parameters) grads = backward_propagation(parameters, cache, X, Y) parameters = update_parameters(parameters, grads) if print_cost and i % 100 == 0: print("Cost after iteration %i: %f" % (i, cost)) plt.scatter(i + 1, cost) plt.title('cost curve') plt.xlabel('iteration times') plt.ylabel('cost') plt.savefig('cost curve.jpg') return parameters
def gradient_descent(x, y, theta, alpha, num_iters): m = len(y) j_hist = zeros((num_iters, 1)) for i in range(num_iters): prediction = x.dot(theta) - y theta -= (alpha / m) * x.T.dot(prediction) j_hist[i] = compute_cost(x, y, theta) return [theta, j_hist]
def model_using_sgd(X, Y, layers_dims, learning_rate=0.01, initialization='random', _lambda=0, keep_prob=1, init_const=0.01, num_of_iterations=10000, print_cost=True, print_cost_after=1000, seed=None): L = len(layers_dims) - 1 # number of layers m = X.shape[1] # number of training examples # Initialize parameters parameters = initialize_parameters(layers_dims, initialization, init_const, seed) # Gradient Descent for i in range(num_of_iterations): for j in range(m): # Forward propagation if keep_prob == 1: AL, caches = forward_propagation(X[:, j], parameters, L) elif keep_prob < 1: AL, caches = forward_propagation_with_dropout( X[:, j], parameters, L, keep_prob) # Compute cost if _lambda == 0: cost = compute_cost(AL, Y[:, j]) else: cost = compute_cost_with_regularization( AL, Y[:, j], parameters, _lambda, L) # Backward propagation if _lambda == 0 and keep_prob == 1: grads = backward_propagation(AL, Y[:, j], caches) elif _lambda != 0: grads = backward_propagation_with_regularization( AL, Y[:, j], caches, _lambda) elif keep_prob < 1: grads = backward_propagation_with_dropout( AL, Y[:, j], caches, keep_prob) # Updating parameters parameters = update_parameters_using_gd(parameters, grads, learning_rate, L) # Priniting cost after given iterations if print_cost and i % print_cost_after == 0: print("Cost after iteration %i: %f" % (i, cost)) # Gradient checking gradient_checking(parameters, grads, X, Y, layers_dims, _lambda=_lambda) return parameters
def gradient_descent(X, y, theta, alpha, iterations): m = len(y) J_history = np.zeros(iterations) for i in range(iterations): theta -= ((X.dot(theta) - y).T.dot(X)) * alpha / m J_history[i] = compute_cost.compute_cost(X, y, theta) return theta, J_history
def gradient_descent(x, y, learning_step, number_of_iterations): # initial theta vector set to zero theta = np.zeros((x.shape[1], 1)) # initial cost function history vector set to zero j_history = np.zeros((number_of_iterations, 1)) am = learning_step/len(y) for i in range(number_of_iterations): theta -= am*np.dot(np.transpose(x), (np.dot(x, theta)-y)) j_history[i] = compute_cost(x, y, theta) return theta, j_history
def gradient_descent(x, y, size, theta, alpha, iterations): """ Performs gradient descent to optimize the 'theta' parameters. Updates theta for a total of inputted 'iterations', with a learning rate 'alpha'. Parameters ---------- x : array_like Shape (m, n+1), where m is the number of examples, and n is the number of features including the vector of ones for the zeroth parameter. y : array_like Shape (m,), where m is the value of the function at each point. size : int Number of total training points. theta : array_like Shape (n+1, 1). Starting parameters of the regression function. alpha : float The learning rate. iterations : int The number of iterations for gradient descent. Returns ------- theta : array_like Shape (n+1, 1). The optimized linear regression parameters. cost_history : list A list of the values of the cost function after each iteration. """ cost_history = [] converge = False for i in range(iterations): temp_cost = compute_cost(x, y, size, theta) try: if cost_history[-1] - temp_cost <= 0.0001: converge = True except IndexError: pass cost_history.append(temp_cost) delta = (1 / size) * ((np.dot(theta.T, x)) - y) * x delta2 = delta.sum(axis=1, keepdims=True) theta = (theta - (alpha * delta2)) if converge: print("The function converged, use less iterations.") print(f"The new optimized parameters are: \n{theta}\n") return theta, cost_history
def nn_model(X, Y, n_h, num_iterations=10000, learning_rate=0.01, print_cost=False): """ Parameters ---------- X : dataset of shape (2, number of examples) Y : labels of shape (1, number of examples) n_h : size of the hidden layer num_iterations : Number of iterations in gradient descent loop print_cost : if True, print the cost every 1000 iterations Returns ------- parameters : parameters learnt by the model. They can then be used to predict. """ np.random.seed(3) n_x = network_structure(X, Y, n_h)[0] n_h = network_structure(X, Y, n_h)[1] n_y = network_structure(X, Y, n_h)[2] # Initialize parameters parameters = initialize_parameters(n_x, n_h, n_y) W1 = parameters["W1"] b1 = parameters["b1"] W2 = parameters["W2"] b2 = parameters["b2"] # Loop (gradient descent) for i in range(0, num_iterations): # Forward propagation. Inputs: "X, parameters". Outputs: "A2, cache". A2, cache = forward_propagation(X, parameters) # Cost function. Inputs: "A2, Y, parameters". Outputs: "cost". cost = compute_cost(A2, Y) # Backpropagation. Inputs: "parameters, cache, X, Y". Outputs: "grads". grads = backward_propagation(parameters, cache, X, Y) # Gradient descent parameter update. Inputs: "parameters, grads". Outputs: "parameters". parameters = update_parameters(parameters, grads, learning_rate=0.01) # Print the cost every 1000 iterations if print_cost and i % 1000 == 0: print("Cost after iteration %i: %f" % (i, cost)) return parameters
def plot_j_history(x, y, theta, ax3, ax4): print('Visualizing J(theta_0, theta_1) ...\n') # grid over which we will calculate j_vals theta0_vals = np.linspace(-10, 10, 100) theta1_vals = np.linspace(-1, 4, 100) # calculate j_vals j_vals = np.zeros([len(theta0_vals), len(theta1_vals)]) for i in range(len(theta0_vals)): for j in range(len(theta1_vals)): t = [theta0_vals[i], theta1_vals[j]] j_vals[i, j] = compute_cost.compute_cost(x, y, t) # make x, y and z data objects axis_z = np.transpose(j_vals) axis_x, axis_y = np.meshgrid(theta0_vals, theta1_vals) # plot a new 3d surface figure surf = ax3.plot_surface(axis_x, axis_y, axis_z, rstride=1, cstride=1, cmap=cm.coolwarm, linewidth=0, antialiased=False) ax3.get_figure().colorbar(surf, shrink=0.5, aspect=10) ax3.set_title('Surface') ax3.set_xlabel('$\\theta_0$') ax3.set_ylabel('$\\theta_1$') ax3.set_xticks(range(-10, 11, 5)) ax3.set_yticks(range(-1, 5, 1)) plt.show() # plot the corresponding contour figure cs = ax4.contour(axis_x, axis_y, np.log10(axis_z)) ax4.plot(theta[0], theta[1], color='r', marker='x', linewidth=0.5) ax4.set_title('Contour, showing minimum') ax4.set_xlabel('$\\theta_0$') ax4.set_ylabel('$\\theta_1$') ax4.set_xticks(range(-10, 11, 2)) ax4.set_yticks(np.linspace(-1, 4, 11)) plt.show()
def nn_model(X, Y, n_h, num_iterations=10000, print_cost=False): """ Arguments: X -- dataset of shape (2, number of examples) Y -- labels of shape (1, number of examples) n_h -- size of the hidden layer num_iterations -- Number of iterations in gradient descent loop print_cost -- if True, print the cost every 1000 iterations Returns: parameters -- parameters learnt by the model. They can then be used to predict. """ np.random.seed(3) n_x = layer_sizes(X, Y)[0] n_y = layer_sizes(X, Y)[2] # Initialize parameters, then retrieve W1, b1, W2, b2. Inputs: "n_x, n_h, n_y". Outputs = "W1, b1, W2, b2, parameters". ### START CODE HERE ### (≈ 5 lines of code) parameters = initialize_parameters(n_x, n_h, n_y) W1 = parameters['W1'] b1 = parameters['b1'] W2 = parameters['W2'] b2 = parameters['b2'] ### END CODE HERE ### # Loop (gradient descent) for i in range(0, num_iterations): ### START CODE HERE ### (≈ 4 lines of code) # Forward propagation. Inputs: "X, parameters". Outputs: "A2, cache". A2, cache = forward_propagation(X, parameters) # Cost function. Inputs: "A2, Y, parameters". Outputs: "cost". cost = compute_cost(A2, Y, parameters) # Backpropagation. Inputs: "parameters, cache, X, Y". Outputs: "grads". grads = backward_propagation(parameters, cache, X, Y) # Gradient descent parameter update. Inputs: "parameters, grads". Outputs: "parameters". parameters = update_parameters(parameters, grads) ### END CODE HERE ### # Print the cost every 1000 iterations if print_cost and i % 1000 == 0: print("Cost after iteration %i: %f" % (i, cost)) return parameters
def compute_cost_with_regularization(AL, Y, parameters, _lambda, num_of_layers): m = Y.shape[1] # number of examples # Compute sum of squares of parameters W = 0 for i in range(1, num_of_layers + 1): W += np.sum(np.square(parameters[f'W{i}'])) # Regularization parameters L2_regularization_cost = (1 / m) * (_lambda / 2) * W # Cross entropy cost cross_entropy_cost = compute_cost(AL, Y) cost = cross_entropy_cost + L2_regularization_cost return cost
def plot_figures(x, y, theta): # Make data. theta0_values = np.linspace(-10, 10, 100) theta1_values = np.linspace(-1, 4, 100) j_values = np.zeros((len(theta0_values), len(theta1_values))) # Fill out J_values for i in range(1, len(theta0_values)): for j in range(1, len(theta1_values)): t = np.transpose(np.matrix([theta0_values[i], theta1_values[j]])) j_values[i, j] = compute_cost(x, y, t) x_plot, y_plot = np.meshgrid(theta0_values, theta1_values, indexing='ij') plt.figure() cs = plt.contour(x_plot, y_plot, j_values, np.logspace(-2, 3, 20)) plt.plot(theta[0], theta[1], 'rx') plt.xlabel(r'$\theta0$') plt.ylabel(r'$\theta1$') plt.clabel(cs, inline=1, fontsize=8) plt.title('Contour plot for cost function J()\n') plt.show() fig = plt.figure(figsize=plt.figaspect(0.5)) ax = fig.add_subplot(121, projection='3d') ax.plot_surface(x_plot, y_plot, j_values, cmap='bwr') ax.set_xlabel(r'$\theta0$') ax.set_ylabel(r'$\theta1$') ax.set_zlabel('Cost function') # Customize the z axis. # ax.set_zlim(0, 700) # ax.zaxis.set_major_locator(LinearLocator(10)) # ax.zaxis.set_major_formatter(FormatStrFormatter('%3.0f')) # Add a color bar which maps values to colors. #fig.colorbar(surf, shrink=0.5, aspect=5) plt.show() return
def gradient_descent(x, y, theta, alpha, iters): temp_theta = theta j_history = np.zeros(iters) # ====================== YOUR CODE HERE ====================== (m, n) = x.shape for iter in range(iters): for j in range(n): sum_j = 0 for i in range(m): h = np.dot(x.iloc[i], theta) sum_j = sum_j + (h - y.iloc[i]) * x.iloc[i,j] temp_theta[j] = theta[j] - alpha * sum_j / m # update theta values simultaneously theta = temp_theta j_history[iter] = compute_cost.compute_cost(x, y, theta) # ============================================================ return (theta, j_history)
parameters = initialize_parameters(n_x, n_h, n_y) print("W1 = " + str(parameters["W1"])) print("b1 = " + str(parameters["b1"])) print("W2 = " + str(parameters["W2"])) print("b2 = " + str(parameters["b2"])) print('=============== 4.3 - The Loop ====================') # forward_propagation X_assess, parameters = forward_propagation_test_case() A2, cache = forward_propagation(X_assess, parameters) print(np.mean(cache['Z1']), np.mean(cache['A1']), np.mean(cache['Z2']), np.mean(cache['A2'])) # compute_cost A2, Y_assess, parameters = compute_cost_test_case() print("cost = " + str(compute_cost(A2, Y_assess, parameters))) # backward_propagation parameters, cache, X_assess, Y_assess = backward_propagation_test_case() grads = backward_propagation(parameters, cache, X_assess, Y_assess) print("dW1 = " + str(grads["dW1"])) print("db1 = " + str(grads["db1"])) print("dW2 = " + str(grads["dW2"])) print("db2 = " + str(grads["db2"])) # update_parameters parameters, grads = update_parameters_test_case() parameters = update_parameters(parameters, grads) print("W1 = " + str(parameters["W1"])) print("b1 = " + str(parameters["b1"])) print("W2 = " + str(parameters["W2"]))
X = data[:, 0].reshape(-1, 1) y = data[:, 1].reshape(-1, 1) m = len(y) plot_data(X, y, 'x') X = np.c_[np.ones((m, 1)), data[:, 0]] theta = np.zeros((2, 1)) iterations = 1500 alpha = 0.01 print('\nTesting the cost function ...\n') J = compute_cost(X, y, theta) print('With theta = [0 ; 0]\nCost computed = %f\n' % J) print('Expected cost value (approx) 32.07\n') J = compute_cost(X, y, np.mat('-1 ; 2')) print('With theta = [-1 ; 2]\nCost computed = %f\n' % J) print('Expected cost value (approx) 54.24\n') [theta, J_history] = gradient_descent(X, y, theta, alpha, iterations) print('Theta found by gradient descent:\n') print('%s\n' % theta) print('Expected theta values (approx)\n') print(' -3.6303\n 1.1664\n\n')
# Set train parameters. # lambdav = 0.00001 lambdav = 0 # alpha = 0.0000001 # iterations = 1000000 alpha = 0.1 iterations = 1200 # print "Solving normal equation." theta = solve_normal_equation(music_train.X, music_train.y, lambdav) print "Solving using gradient descent." # theta = gradient_descent(music_train.X, music_train.y, None, alpha, lambdav, iterations) #theta, J_history = gradient_descent_with_J_history(music_train.X, music_train.y, None, alpha, lambdav, iterations) #plot_history(J_history) print "Computing cost." print compute_cost(music_train.X, music_train.y, theta, lambdav) print compute_cost(music_validation.X, music_validation.y, theta, lambdav) print compute_cost(music_test.X, music_test.y, theta, lambdav) for delta_year in range(10): print delta_year print "Computing train accuracy." print compute_accuracy(music_train.X, music_train.y, theta, delta_year) print compute_accuracy(music_validation.X, music_validation.y, theta, delta_year) print compute_accuracy(music_test.X, music_test.y, theta, delta_year)
plt.show() # =================== Part 3: Gradient descent =================== print 'Running Gradient Descent...' # Add a column of ones to x X = np.hstack((np.ones((m, 1)), X.reshape(m, 1))) # Initialize fitting parameters theta = np.zeros(2) # Some gradient descent settings iterations = 1500 alpha = 0.01 # Compute and display initial cost cost = compute_cost(X, y, theta) print cost # Run gradient descent theta, _ = gradient_descent(X, y, theta, alpha, iterations) # Print theta to screen print "Theta found by gradient descent:", theta plt.figure() plot_data(X[:, 1], y) plt.plot(X[:, 1], X.dot(theta), label='Linear Regression') plt.legend(loc='upper left', numpoints=1) plt.show() # Predict values for population sizes of 35,000 and 70,000
W, b, activation="relu") print("With ReLU: A = " + str(A)) #L-Layer Model X, parameters = L_model_forward_test_case() AL, caches = L_model_forward(X, parameters) print("AL = " + str(AL)) print("Length of caches list = " + str(len(caches))) #Cost function Y, AL = compute_cost_test_case() print("cost = " + str(compute_cost(AL, Y))) #Linear backward # Set up some test inputs dZ, linear_cache = linear_backward_test_case() dA_prev, dW, db = linear_backward(dZ, linear_cache) print("dA_prev = " + str(dA_prev)) print("dW = " + str(dW)) print("db = " + str(db)) #Linear-Activation backward AL, linear_activation_cache = linear_activation_backward_test_case() dA_prev, dW, db = linear_activation_backward(AL, linear_activation_cache,
# Set train parameters. lambdav = 0.0000000001 n = len(music_train.X[0]) print "Solving normal equation." # Get thetas to reduce data. theta = solve_normal_equation(music_train.X, music_train.y, lambdav) ordered_theta = np.argsort(np.abs(theta).reshape(len(theta))) ordered_theta = ordered_theta[::-1] # Initialize costs. J_history_train = np.zeros(n) J_history_validation = np.zeros(n) for iteration in range(n): theta = solve_normal_equation(music_train.X[:, ordered_theta[:(n - iteration)]], music_train.y, lambdav) J_history_train[iteration] = compute_cost(music_train.X[:, ordered_theta[:(n - iteration)]], music_train.y, theta, 0) J_history_validation[iteration] = compute_cost(music_validation.X[:, ordered_theta[:(n - iteration)]], music_validation.y, theta, 0) print "Theta size: " + str(n - iteration) print "J_train: %f" % J_history_train[iteration] print "J_validation: %f" % J_history_validation[iteration] print "Accuracy: %f" % compute_accuracy(music_test.X[:, ordered_theta[:(n - iteration)]], music_test.y, theta, 9) ordered_theta = np.argsort(np.abs(theta).reshape(len(theta))) ordered_theta = ordered_theta[::-1] plot_history_train_validation(J_history_train, J_history_validation) plot_history(J_history_train -J_history_validation)
X, mu, sigma = feature_normalize(X_Original) plt.show() X = add_x0(X) m = X.shape[0] n = X.shape[1] learning_rate = .3 theta = np.zeros((n, 1)) max_iter = 800 his = np.zeros((max_iter, 1)) for i in range(max_iter): cost = compute_cost(X, y, theta) grad = gradient_descent(X, y, theta, learning_rate, m) theta = theta - grad his[i, :] = cost if i % 100 == 99: print("iterate number: " + str(i + 1) + " -- cost: " + str(cost)) plt.plot(his, label='cost') plt.ylabel('cost') plt.xlabel('step') plt.title("logistic regression'") plt.legend(loc='upper center', shadow=True)
X_dat = np.hstack([ones, X_dat]) X = np.linspace(-5, 1, 30) Y = np.linspace(-1, 2, 30) X, Y = np.meshgrid(X,Y) Z = [] Z_flat = [] for i in range(0, len(X)): Z.append([]) Z_flat.append([]) for j in range(0, len(Y)): Z[i].append(compute_cost(X_dat, y_dat, np.matrix( [X[i][j], Y[i][j]] ).T )) Z_flat[i].append(4.48339) fig = plt.figure() ax = fig.add_subplot(1, 1, 1, projection='3d') ax.plot_wireframe(X, Y, Z) ax.plot_surface(X, Y, Z_flat) ax.scatter(0, 0, compute_cost(X_dat, y_dat, np.matrix( [0, 0] ).T), c='g') ax.scatter(-3.89530051, 1.19298539, 4.48339, c='r') ax.set_xlabel("Theta_0") ax.set_ylabel("Theta_1") ax.set_zlabel("Cost")
def model(X_train, Y_train, X_test, Y_test, learning_rate=0.009, num_epochs=100, minibatch_size=64, print_cost=True, operation='save', predict=None): """ Implements a three-layer ConvNet in Tensorflow: CONV2D -> RELU -> MAXPOOL -> CONV2D -> RELU -> MAXPOOL -> FLATTEN -> FULLYCONNECTED Arguments: X_train -- training set, of shape (None, 64, 64, 3) Y_train -- test set, of shape (None, n_y = 6) X_test -- training set, of shape (None, 64, 64, 3) Y_test -- test set, of shape (None, n_y = 6) learning_rate -- learning rate of the optimization num_epochs -- number of epochs of the optimization loop minibatch_size -- size of a minibatch print_cost -- True to print the cost every 100 epochs Returns: train_accuracy -- real number, accuracy on the train set (X_train) test_accuracy -- real number, testing accuracy on the test set (X_test) parameters -- parameters learnt by the model. They can then be used to predict. """ ops.reset_default_graph( ) # to be able to rerun the model without overwriting tf variables tf.set_random_seed(1) # to keep results consistent (tensorflow seed) seed = 3 # to keep results consistent (numpy seed) (m, n_H0, n_W0, n_C0) = X_train.shape n_y = Y_train.shape[1] costs = [] # To keep track of the cost X, Y = create_placeholders(n_H0, n_W0, n_C0, n_y) parameters = initialize_parameters() Z3 = forward_propagation(X, parameters) cost = compute_cost(Z3, Y) optimizer = tf.train.AdamOptimizer(learning_rate).minimize(cost) init = tf.global_variables_initializer() saver = tf.train.Saver() with tf.Session() as sess: if operation == 'save': sess.run(init) for epoch in range(num_epochs): minibatch_cost = 0. num_minibatches = int( m / minibatch_size ) # number of minibatches of size minibatch_size in the train set seed = seed + 1 minibatches = random_mini_batches(X_train, Y_train, minibatch_size, seed) for minibatch in minibatches: (minibatch_X, minibatch_Y) = minibatch _, temp_cost = sess.run([optimizer, cost], feed_dict={ X: minibatch_X, Y: minibatch_Y }) minibatch_cost += temp_cost / num_minibatches if print_cost == True and epoch % 5 == 0: print("Cost after epoch %i: %f" % (epoch, minibatch_cost)) if print_cost == True and epoch % 1 == 0: costs.append(minibatch_cost) save_path = saver.save(sess, "model.ckpt") print("Model saved in path: %s" % save_path) predict_op = tf.argmax(Z3, 1) correct_prediction = tf.equal(predict_op, tf.argmax(Y, 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float")) print(accuracy) train_accuracy = accuracy.eval({X: X_train, Y: Y_train}) test_accuracy = accuracy.eval({X: X_test, Y: Y_test}) print("Train Accuracy:", train_accuracy) print("Test Accuracy:", test_accuracy) elif operation == 'restore': saver.restore(sess, "model.ckpt") predict_op = tf.argmax(Z3, 1) result = predict_op.eval({X: predict}) print result
# Linear Regression: Company Profit per City Population # Load File and Set Initial Parameters filename = 'city_profit.txt' x, y = load_data(filename) size = y.size theta = np.array([[0.0], [0.0]]) alpha = 0.01 iterations = 1500 population = 175000 # Cost Function cost = compute_cost(x, y, size, theta=theta) print(f"With given theta: \n\tCost computed = {cost}\n") # Gradient Descent new_theta, cost_history = gradient_descent(x, y, size, theta=theta, alpha=alpha, iterations=iterations) # Plot Data and Regression Line plot_data(x, y, new_theta)
ax1.plot(X, y, 'rx') plt.xlabel('Population of City in 10,000s') plt.ylabel('Profit in $10,000s') print " =================== Part 3: Gradient descent ===================" print 'Running Gradient Descent ...' X = np.array([np.ones(m), X]).transpose() # Add a column of ones to x theta = np.zeros((2, 1)) # initialize fitting parameters # Some gradient descent settings iterations = 1500 alpha = 0.01 # compute and display initial cost print 'Initial cost is', compute_cost(X, y, theta) # run gradient descent theta, J_history = gradient_descent(X, y, theta, alpha, iterations) # print theta to screen print 'Theta found by gradient descent:\n', theta print 'J_history=', J_history # Plot the linear fit ax1.plot(X[:, 1], np.dot(X, theta), 'k-') # Predict values for population sizes of 35,000 and 70,000 predict1 = np.dot([1, 10], theta) print 'For population = 100,000, we predict a profit:', predict1 * 10000 print '============= Part 4: Visualizing J(theta_0, theta_1) ============= '