def nnCostFunction(nn_params, input_layer_size, hidden_layer_size, num_labels, X, y, lambda_nn): import numpy as np from sigmoid_function import sigmoid_function from sigmoid_gradient import sigmoid_gradient Theta1 = np.reshape( nn_params[0:hidden_layer_size * (input_layer_size + 1)], [hidden_layer_size, input_layer_size + 1]) Theta2 = np.reshape(nn_params[hidden_layer_size * (input_layer_size + 1):], [num_labels, hidden_layer_size + 1]) m = len(X[:, 0]) J = 0 Theta1_grad = np.zeros(Theta1.size) Theta2_grad = np.zeros(Theta2.size) X = np.concatenate((np.ones([len(X[:, 0]), 1]), X), axis=1) a1 = X z2 = np.dot(a1, Theta1.T) a2 = sigmoid_function(z2) a2 = np.concatenate((np.ones([len(X[:, 0]), 1]), a2), axis=1) a3 = sigmoid_function(np.dot(a2, Theta2.T)) #num_labels_eye=np.eye(num_labels) #ry=num_labels_eye[y,:] ry = np.zeros([len(X[:, 0]), num_labels]) for i in range(5000): ry[i, y[i] - 1] = 1 cost = ry * np.log(a3) + (1 - ry) * np.log(1 - a3) J = -np.sum(cost) / m reg = np.sum(Theta1[:, 1:]**2) + np.sum(Theta2[:, 1:]**2) J = J + lambda_nn * 1.0 / (2 * m) * reg # Backpropagation algorithm delta3 = a3 - ry temp = np.dot(delta3, Theta2) delta2 = temp[:, 1:] * sigmoid_gradient(z2) Delta1 = np.dot(delta2.T, a1) Delta2 = np.dot(delta3.T, a2) Theta1_grad = Delta1 / m + lambda_nn * np.concatenate( (np.zeros([hidden_layer_size, 1]), Theta1[:, 1:]), axis=1) / m Theta2_grad = Delta2 / m + lambda_nn * np.concatenate( (np.zeros([num_labels, 1]), Theta2[:, 1:]), axis=1) / m Theta1_grad = np.reshape(Theta1_grad, [Theta1_grad.size, 1]) Theta2_grad = np.reshape(Theta2_grad, [Theta2_grad.size, 1]) grad = np.concatenate((Theta1_grad, Theta2_grad), axis=0) return J, grad
def gradients(gradient_parameters): """Gradient""" theta = gradient_parameters['theta'] input_layer_size = gradient_parameters['input_layer_size'] hidden_layer_size = gradient_parameters['hidden_layer_size'] number_of_labels = gradient_parameters['number_of_labels'] x_values = gradient_parameters['x_values'] y_values = gradient_parameters['y_values'] lambda_value = gradient_parameters['lambda_value'] theta_1_params = theta[0:(hidden_layer_size * (input_layer_size + 1))] theta_2_params = theta[(hidden_layer_size * (input_layer_size + 1)):] theta_1 = theta_1_params.reshape(hidden_layer_size, input_layer_size + 1) theta_2 = theta_2_params.reshape(number_of_labels, (hidden_layer_size + 1)) input_examples_size = x_values.shape[0] hidden_layer_input = numpy.c_[numpy.ones(input_examples_size), x_values].dot(theta_1.T) hidden_layer_output = sigmoid(hidden_layer_input) output_layer_input = numpy.c_[numpy.ones(hidden_layer_output.shape[0]), hidden_layer_output].dot(theta_2.T) output = sigmoid(output_layer_input) errors = output - y_values backpropagated_errors = errors.dot( theta_2[:, 1:]) * sigmoid_gradient(hidden_layer_input) delta_1 = backpropagated_errors.T.dot( numpy.c_[numpy.ones(input_examples_size), x_values]) delta_2 = errors.T.dot(numpy.c_[numpy.ones(hidden_layer_output.shape[0]), hidden_layer_output]) theta_1[:, 0] = 0 theta_2[:, 0] = 0 theta_1_gradient = ((1.0 / input_examples_size) * delta_1) + ( (lambda_value / input_examples_size) * theta_1) theta_2_gradient = ((1.0 / input_examples_size) * delta_2) + ( (lambda_value / input_examples_size) * theta_2) gradient = numpy.append(theta_1_gradient.flatten(), theta_2_gradient.flatten()) return gradient
def nnCostFunction(nn_params,input_layer_size,hidden_layer_size,num_labels,X,y,lambda_nn): import numpy as np from sigmoid_function import sigmoid_function from sigmoid_gradient import sigmoid_gradient Theta1=np.reshape(nn_params[0:hidden_layer_size*(input_layer_size+1)],[hidden_layer_size,input_layer_size+1]) Theta2=np.reshape(nn_params[hidden_layer_size*(input_layer_size+1):],[num_labels,hidden_layer_size+1]) m=len(X[:,0]) J=0 Theta1_grad=np.zeros(Theta1.size) Theta2_grad=np.zeros(Theta2.size) X=np.concatenate((np.ones([len(X[:,0]),1]),X),axis=1) a1=X z2=np.dot(a1,Theta1.T) a2=sigmoid_function(z2) a2=np.concatenate((np.ones([len(X[:,0]),1]),a2),axis=1) a3=sigmoid_function(np.dot(a2,Theta2.T)) #num_labels_eye=np.eye(num_labels) #ry=num_labels_eye[y,:] ry=np.zeros([len(X[:,0]),num_labels]) for i in range(5000): ry[i,y[i]-1]=1 cost=ry*np.log(a3)+(1-ry)*np.log(1-a3) J=-np.sum(cost)/m reg=np.sum(Theta1[:,1:]**2)+np.sum(Theta2[:,1:]**2) J=J+lambda_nn*1.0/(2*m)*reg # Backpropagation algorithm delta3=a3-ry temp=np.dot(delta3,Theta2) delta2=temp[:,1:]*sigmoid_gradient(z2) Delta1=np.dot(delta2.T,a1) Delta2=np.dot(delta3.T,a2) Theta1_grad=Delta1/m+lambda_nn*np.concatenate((np.zeros([hidden_layer_size,1]),Theta1[:,1:]),axis=1)/m Theta2_grad=Delta2/m+lambda_nn*np.concatenate((np.zeros([num_labels,1]),Theta2[:,1:]),axis=1)/m Theta1_grad=np.reshape(Theta1_grad,[Theta1_grad.size,1]) Theta2_grad=np.reshape(Theta2_grad,[Theta2_grad.size,1]) grad=np.concatenate((Theta1_grad,Theta2_grad),axis=0) return J,grad
def gradients(gradient_parameters): """Gradient""" theta = gradient_parameters['theta'] input_layer_size = gradient_parameters['input_layer_size'] hidden_layer_size = gradient_parameters['hidden_layer_size'] number_of_labels = gradient_parameters['number_of_labels'] x_values = gradient_parameters['x_values'] y_values = gradient_parameters['y_values'] lambda_value = gradient_parameters['lambda_value'] theta_1_params = theta[0: (hidden_layer_size * (input_layer_size + 1))] theta_2_params = theta[(hidden_layer_size * (input_layer_size + 1)):] theta_1 = theta_1_params.reshape(hidden_layer_size, input_layer_size + 1) theta_2 = theta_2_params.reshape(number_of_labels, (hidden_layer_size + 1)) input_examples_size = x_values.shape[0] hidden_layer_input = numpy.c_[numpy.ones(input_examples_size), x_values].dot(theta_1.T) hidden_layer_output = sigmoid(hidden_layer_input) output_layer_input = numpy.c_[numpy.ones(hidden_layer_output.shape[0]), hidden_layer_output].dot(theta_2.T) output = sigmoid(output_layer_input) errors = output - y_values backpropagated_errors = errors.dot(theta_2[:, 1:]) * sigmoid_gradient(hidden_layer_input) delta_1 = backpropagated_errors.T.dot(numpy.c_[numpy.ones(input_examples_size), x_values]) delta_2 = errors.T.dot(numpy.c_[numpy.ones(hidden_layer_output.shape[0]), hidden_layer_output]) theta_1[:, 0] = 0 theta_2[:, 0] = 0 theta_1_gradient = ((1.0 / input_examples_size) * delta_1) + ((lambda_value / input_examples_size) * theta_1) theta_2_gradient = ((1.0 / input_examples_size) * delta_2) + ((lambda_value / input_examples_size) * theta_2) gradient = numpy.append(theta_1_gradient.flatten(), theta_2_gradient.flatten()) return gradient
# compute cost(FeedForward) with lambda_nn=0 print "FeedForwad Using Neural Network..." lambda_nn=0 J,grad=nnCostFunction(nn_params,input_layer_size,hidden_layer_size,num_labels,data,labels,lambda_nn) print "Cost at parameters (loaded from ex4weight): (%s)"%(J) # Compute the cost with lambda_nn=1 print "FeedForwad Using Neural Network...(with lambda=1)" lambda_nn=1 J,grad=nnCostFunction(nn_params,input_layer_size,hidden_layer_size,num_labels,data,labels,lambda_nn) print "Cost at parameters (loaded from ex4weight): (%s)"%(J) # Sigmoid Gradient print "Evaluating sigmoid gradient..." test=np.array([1,-0.5,0,0.5,1]) g=sigmoid_gradient(test) print "sigmoid gradient with :[1,-0.5,0,0.5,1] (%s)"%(g) # Initializing Pameters print "Initializing Neural Network Parameters..." from randInitializeWeights import randInitializeWeights initial_Theta1=randInitializeWeights(input_layer_size,hidden_layer_size) initial_Theta2=randInitializeWeights(hidden_layer_size,num_labels) initial_Theta1=np.reshape(initial_Theta1,[initial_Theta1.size,1]) initial_Theta2=np.reshape(initial_Theta2,[initial_Theta2.size,1]) initial_nn_params=np.concatenate((initial_Theta1,initial_Theta2)) def costFunction(p): J,gradient=nnCostFunction(p,input_layer_size,hidden_layer_size,num_labels,data,labels,lambda_nn) print "training" print J
print 'Feedforward Using Neural Network...' l = 0.0 j, _ = nn_cost_function(params_trained, input_layer_size, hidden_layer_size, num_labels, X, y, l) print 'Cost at parameters (loaded from ex4weights):', j, '(this value should be about 0.287629)' # =============== Part 4: Implement Regularization =============== print 'Checking Cost Function (w/ Regularization)...' l = 1.0 j, _ = nn_cost_function(params_trained, input_layer_size, hidden_layer_size, num_labels, X, y, l) print 'Cost at parameters (loaded from ex4weights):', j, '(this value should be about 0.383770)' # ================ Part 5: Sigmoid Gradient ================ print 'Evaluating sigmoid gradient...' g = sigmoid_gradient(np.array([-1, -0.5, 0, 0.5, 1])) print 'Sigmoid gradient evaluated at [-1 -0.5 0 0.5 1]:' print g # ================ Part 6: Initializing Parameters ================ print 'Initializing Neural Network Parameters...' initial_theta_1 = rand_initialize_weights(input_layer_size, hidden_layer_size) initial_theta_2 = rand_initialize_weights(hidden_layer_size, num_labels) initial_nn_params = np.hstack((initial_theta_1.ravel(), initial_theta_2.ravel())) # =============== Part 7: Implement Backpropagation =============== print 'Checking Backpropagation...'
lambda_nn = 0 J, grad = nnCostFunction(nn_params, input_layer_size, hidden_layer_size, num_labels, data, labels, lambda_nn) print "Cost at parameters (loaded from ex4weight): (%s)" % (J) # Compute the cost with lambda_nn=1 print "FeedForwad Using Neural Network...(with lambda=1)" lambda_nn = 1 J, grad = nnCostFunction(nn_params, input_layer_size, hidden_layer_size, num_labels, data, labels, lambda_nn) print "Cost at parameters (loaded from ex4weight): (%s)" % (J) # Sigmoid Gradient print "Evaluating sigmoid gradient..." test = np.array([1, -0.5, 0, 0.5, 1]) g = sigmoid_gradient(test) print "sigmoid gradient with :[1,-0.5,0,0.5,1] (%s)" % (g) # Initializing Pameters print "Initializing Neural Network Parameters..." from randInitializeWeights import randInitializeWeights initial_Theta1 = randInitializeWeights(input_layer_size, hidden_layer_size) initial_Theta2 = randInitializeWeights(hidden_layer_size, num_labels) initial_Theta1 = np.reshape(initial_Theta1, [initial_Theta1.size, 1]) initial_Theta2 = np.reshape(initial_Theta2, [initial_Theta2.size, 1]) initial_nn_params = np.concatenate((initial_Theta1, initial_Theta2)) def costFunction(p): J, gradient = nnCostFunction(p, input_layer_size, hidden_layer_size, num_labels, data, labels, lambda_nn)
def nn_cost_function(nn_params, input_layer_size, hidden_layer_size, num_labels, X, Y, lamda): #Implements the neural network cost function for a two layer neural network #Reshaping nn_params back into the parameters params_1 and params_2 params_1 = nn_params[0:hidden_layer_size * (input_layer_size + 1)] params_1 = (np.reshape(params_1, (input_layer_size + 1, hidden_layer_size))).T params_2 = nn_params[hidden_layer_size * (input_layer_size + 1):] params_2 = (np.reshape(params_2, (hidden_layer_size + 1, -1))).T m = len(X) J = 0 def sigmoid(x): #returns the value of the sigmoid function evaluated at z (can be scalar/vector/matrix) return 1 / (1 + np.exp(-x)) sigmoid = np.vectorize(sigmoid) #Adding bias unit in the input layer X = np.insert(X, 0, 1, axis=1) z2 = X @ params_1.T a2 = sigmoid(z2) #Adding bias unit in the hidden layer a2 = np.insert(a2, 0, 1, axis=1) h = a2 @ params_2.T h = sigmoid(h) J = np.sum(np.square(h - Y)) / (2 * m) #Calculating Jreg params_1_reg = np.square(params_1[:, 1::]) params_2_reg = np.square(params_2[:, 1::]) Jreg = (np.sum(params_1_reg) + np.sum(params_2_reg)) * (lamda / (2 * m)) J = J + Jreg params_1_grad = np.zeros(params_1.shape) params_2_grad = np.zeros(params_2.shape) delta_a1 = np.zeros(params_1.shape) delta_a2 = np.zeros(params_2.shape) error_h = h - Y error_a2 = np.multiply((error_h @ params_2), np.insert(sigmoid_gradient(z2), 0, 1, axis=1)) delta_a2 = delta_a2 + error_h.T @ a2 delta_a1 = delta_a1 + (error_a2[:, 1::]).T @ X params_1_grad = delta_a1 / m params_2_grad = delta_a2 / m params_1_grad[:, 1::] += (lamda / m) * params_1[:, 1::] params_2_grad[:, 1::] += (lamda / m) * params_2[:, 1::] #Unroll gradients grad = np.concatenate( ((params_1_grad.T).ravel(), (params_2_grad.T).ravel()), axis=None) print(J) return J, grad
# J_data = [] for i in range(NUM_ITERATIONS): # forward propagation J, h, new_a2_sets, z2 = cost_function.cost_function( new_training_sets, Theta1, Theta2, number_features_sets, y_training_sets) # J_data.append(J) print(J) if(J < COST_THRESHOLD): break # back propagation delta3 = h - y_training_sets.T Theta2_grad = (delta3@new_a2_sets.T)/number_features_sets Fake_theta2 = Theta2[:, 1:] delta2 = (Fake_theta2.T@delta3)*sigmoid_gradient.sigmoid_gradient(z2) Theta1_grad = (delta2@training_sets)/number_features_sets # gradient descent Theta1 = Theta1 - LEARNING_RATE*Theta1_grad Theta2 = Theta2 - LEARNING_RATE*Theta2_grad # J_datas.append(J_data) np.savetxt("Theta1.csv", Theta1, delimiter=",") np.savetxt("Theta2.csv", Theta2, delimiter=",") # for i in range(len(J_datas)): # plt.plot(J_datas[i]) # plt.xlabel("Number of iterations") # plt.ylabel(f"Cost function for learning rate = {LEARNING_RATES[i]}") # plt.show()