def hypothesis(x, Th1, Th2): """ calculate the output layer of the neural network and return the most probable class for the datapoint x x does not contain the bias node using parameter matrices Th1 and Th2 arguments are lists not numpy arrays """ x.insert(0, 1) x, Th1, Th2 = np.array(x), np.array(Th1), np.array(Th2) a2 = np.hstack(([1], sig(x @ Th1))) a3 = sig(a2 @ Th2) return np.argmax(a3)
def gradient(theta, X, y, greek_lambda): m = len(y) h = sigmoid.sig(X @ theta) j0 = (1 / m) * (X.T @ (h - y))[0] j1 = (1 / m) * ((X.T @ (h - y))[1:] + greek_lambda * theta[1:]) grad = np.vstack((j0[:, np.newaxis], j1)) return grad
def cost(theta, X, y, greek_lambda): m = len(y) J = (-1/m) * (y.transpose() @ np.log(sigmoid.sig(X @ theta)) + (1 - y.transpose()) @ np.log(1 - sigmoid.sig(X @ theta))) # print(theta.shape) # print(theta[1:].shape) reg = (greek_lambda/(2*m)) * (theta[1:].transpose() @ theta[1:]) J = J + reg return J
def node_output(in_put, W_l_i): # Given input and W[l][i], returns output of node i in layer l j_max = len(in_put) - 1 j = 0 # counter over node outputs from previous layer z = 0 # initialize the argument for sigmoid while j <= j_max: z += in_put[j] * W_l_i[j] j += 1 return sigmoid.sig(z - W_l_i[j]) # Subtract the bias term
def cost(theta, X, y): #m = len(y) #h= np.log(sigmoid.sig(np.dot(X, theta))) #print(np.shape(X)) #print(np.shape(theta)) #temp0 = y*h #temp1 = (1-y)*h #J = sum(temp0 + temp1)/(-m) #return J m = len(y) J = (-1/m) * np.sum(np.multiply(y, np.log(sigmoid.sig(X @ theta)))+ np.multiply((1-y), np.log(1 - sigmoid.sig(X @ theta)))) return J
def cost(theta, X, y, greek_lambda): m = len(y) h = sigmoid.sig(X @ theta) # This is vector multiplication. ''' h = [[ _ ] each row is for 1 training example [ _ ] [ _ ] [ _ ] [ _ ]] ''' c = sum((y * np.log(h)) + ((1 - y) * np.log(1 - h))) / (-m) ''' np.log, y * np.log(h)) are element wise and not vector operation. sum() adds all the elements in the matrix. ''' regularized_c = c + greek_lambda / (2 * m) * sum(theta[1:]**2) #print(np.shape(regularized_c)) return regularized_c
def propagate(self): # Propagates an input across the ann. # Also regenerates a 2D ouputs matrix indexed outputs[layer][node] that is used in backpropagation. # Start building a new outputs matrix: outputs=[[1]] outputs[0].extend(self.inputs) # 0th element of outputs list are the inputs # Forward propagate across ann continuing to build outputs matrix shape=self.shape # layer l and node n indix bounds come from this. weights=self.weights # we will calculate arguments of sig from this. l=1 # Start at layer 1 to calculate outputs of layer 1 l_max=len(shape) while l<l_max: outputs_update=[1] n=0 while n<shape[l]: outputs_update.append(sig(dotProd(outputs[l-1],weights[l][n]))) n+=1 outputs.append(outputs_update) l+=1 self.outputs=outputs
y = y[:, np.newaxis] theta = np.zeros((n, 1)) greek_lambda = 1 J = regularlizedcostfuntion.cost(theta, X, y, greek_lambda) print(J) output = opt.fmin_tnc(func=regularlizedcostfuntion.cost, x0=theta.flatten(), fprime=regularizedgradient.grad, args=(X, y.flatten(), greek_lambda)) theta = output[0] print(theta) # theta contains the optimized values #print(np.shape(sigmoid.sig(X @ theta))) pred = [sigmoid.sig(X @ theta) >= 0.5] #print(np.shape(pred)) print(np.mean(pred == y.flatten()) * 100) a = np.linspace(-1, 1.5, 100) b = np.linspace(-1, 1.5, 100) z = np.zeros((len(a), len(b))) for i in range(len(a)): for j in range(len(b)): z[i, j] = np.dot(mapfeature.feature_plotting(a[i], b[j]), theta) #admitted = y.flatten() == 1 X = data.iloc[:, :-1] passed = plt.scatter(X[admitted][0].values, X[admitted][1].values,
def accuracy(X, y, theta): pred = [sigmoid.sig(np.dot(X, theta)) >= 0.5] acc = np.mean(pred == y) print(acc * 100)
# parameters for multiplying with first and second layers h = n + 2 # hidden layer size Th1 = np.random.rand(n + 1, h - 1) Th2 = np.random.rand(h, k) lamb = 0.01 # regularization parameter niter = 100000 # number of iterations for learning alpha = 0.01 # learning rate # cost function J = [] for N in range(niter): # forward propagation Z2 = X @ Th1 A2 = np.hstack((np.ones((m, 1)), sig(Z2))) Z3 = A2 @ Th2 A3 = sig(Z3) J.append(cost_func(Y, A3, Th1, Th2, m, lamb)) # back propagation del3 = A3 - Y del2 = ((del3 @ np.transpose(Th2)) * np.hstack((np.ones( (m, 1)), sigd(Z2))))[:, 1:] Th2_grad = 1 / m * (np.transpose(A2) @ del3) Th1_grad = 1 / m * (np.transpose(X) @ del2) Th2_grad += lamb / m * np.hstack((np.zeros((h, 1)), Th2[:, 1:])) Th1_grad += lamb / m * np.hstack((np.zeros((n + 1, 1)), Th1[:, 1:]))
def grad(theta, X, y, greek_lambda): m = len(y) grad = np.zeros([m, 1]) grad = (1 / m) * X.T @ (sigmoid.sig(X @ theta) - y) grad[1:] = grad[1:] + (greek_lambda / m) * theta[1:] return grad
def grad(theta, X, y): m = len(y) temp = sigmoid.sig(np.dot(X, theta)) - y print(np.shape(temp)) return np.dot(X.transpose(), temp) / m
def cost(nn_params, input_layer_size, hidden_layer_size, num_labels, X, y, greek_lambda): theta1 = nn_params[:( (input_layer_size + 1) * hidden_layer_size )] # retrieving theta1 from nn_params. (input_layer_size+1)*hidden_layer_size) are taken from the starting. theta1 = theta1.reshape(hidden_layer_size, input_layer_size + 1) theta2 = nn_params[( (input_layer_size + 1) * hidden_layer_size):] # Retrieving theta2 from nn_params. theta2 = theta2.reshape(num_labels, hidden_layer_size + 1) m = X.shape[0] J = 0 X = np.hstack((np.ones((m, 1)), X)) y10 = np.zeros((m, num_labels)) # its size is 5000x10 a1 = sigmoid.sig(X @ theta1.T) a1 = np.hstack((np.ones((m, 1)), a1)) a2 = sigmoid.sig(a1 @ theta2.T) for i in range(1, num_labels + 1): y10[:, i - 1][:, np.newaxis] = np.where(y == i, 1, 0) ''' y10[:, i-1] gives 1-D array so [:,np.newaxis] is used to add another column. Above loop is used to change the value of elements of y10 array to 1, which should be the correct digit. ''' for j in range(num_labels): J = J + sum(-y10[:, j] * np.log(a2[:, j]) - (1 - y10[:, j]) * np.log(1 - a2[:, j])) cost = 1 / m * J reg_J = cost + greek_lambda / (2 * m) * ( np.sum(theta1[:, 1:]**2) + np.sum(theta2[:, 1:]**2) ) # [:,1:] means whole array except 1st column(0th index); so 1 onwards. Here 1 is inclusive. #backpropagation to compute the gradients grad1 = np.zeros((theta1.shape)) grad2 = np.zeros((theta2.shape)) for i in range(m): xi = X[i, :] # 1 X 401, xi is 1-D array with size 401 a1i = a1[i, :] # 1 X 26, a1i is 1-D array with size 26 a2i = a2[i, :] # 1 X 10, a2i is 1-D array with size 10 d2 = a2i - y10[i, :] # d2 is 1-D array with size 10 d1 = theta2.T @ d2.T * sigmoidgradient.sigmoidgrad( np.hstack((1, xi @ theta1.T)) ) # (26x10 @ 10x1 = 26x1)* (1x401 @ 401x25 = 1x25 -> 1x26) d1 = 1x26 #print(np.shape(theta2.T @ d2.T)) #print("Only this",np.shape(xi[:,np.newaxis].T)) grad1 = grad1 + d1[ 1:][:, np.newaxis] @ xi[:, np.newaxis].T # 25x1 @ 1x401 = 25x401 grad2 = grad2 + d2.T[:, np.newaxis] @ a1i[:, np. newaxis].T # 4x1 @ 1x26 = 4x26 ''' theta2.T @ d2.T return 1-D array of (26,) and np.hstack((1,xi @ theta1.T)) return 1-D array of (26,). * is used for element wise operation. so d1 is (26,) ''' grad1 = 1 / m * grad1 grad2 = 1 / m * grad2 temp_theta1 = theta1 temp_theta2 = theta2 temp_theta1[:, 0] = 0 # changing value of bais variable theta to 0. While calculating gradient. temp_theta2[:, 0] = 0 grad1_reg = grad1 + (greek_lambda / m) * temp_theta1 grad2_reg = grad2 + (greek_lambda / m) * temp_theta2 ''' np.hstack((np.zeros((theta1.shape[0],1)),theta1[:,1:]) print(type(cost)) print(type(grad1)) print(type(grad2)) print(type(reg_J)) print(type(grad1_reg)) print(type(grad2_reg)) ''' return cost, grad1, grad2, reg_J, grad1_reg, grad2_reg
#!/usr/bin/env python2 # -*- coding: utf-8 -*- """ Created on Mon Aug 21 22:19:05 2017 @author: camera1 """ import numpy as np import sigmoid as sg print sg.sig(10) print np.version.version
from scipy.io import loadmat import pandas as pd import numpy as np import sigmoid data = loadmat('Data/ex3data1.mat') X = data['X'] y = data['y'] mat2=loadmat("Data/ex3weights.mat") Theta1=mat2["Theta1"] # Theta1 has size 25 x 401 Theta2=mat2["Theta2"] # Theta2 has size 10 x 26 m = X.shape[0] X = np.hstack((np.ones((m,1)),X)) a1 = sigmoid.sig(X @ Theta1.T) a1 = np.hstack((np.ones((m,1)), a1)) # hidden layer a2 = sigmoid.sig(a1 @ Theta2.T) # output layer pred2 = np.argmax(a2, axis = 1) + 1 #print(sum(pred2[:, np.newaxis]== y)[0]) #print(np.shape(pred2)) acc = sum(pred2[:,np.newaxis]==y)[0]/5000*100 print("Accuracy:", acc ,"%")