def backpropagation(self, theta, nn, X, y, lamb): layersNumb=len(nn['structure']) thetaDelta = [0]*(layersNumb) m=len(X) #calculate matrix of outpit values for all input vectors X hLoc = self.runAll(nn, X).copy() yLoc = np.array(y) thetaLoc = nn['theta'].copy() derFunct = np.vectorize( 'float *x, float *res', 'float z = 1/(1+exp(-x[i])); res[i] = z*(1-z)' ) zLoc = nn['z'].copy() aLoc = nn['a'].copy() for n in range(0, len(X)): delta = [0]*(layersNumb+1) #fill list with zeros delta[len(delta)-1] = (hLoc[n] - yLoc[n]).T #calculate delta of error of output layer delta[len(delta)-1] = delta[len(delta)-1].reshape(1, -1) for i in range(layersNumb-1, 0, -1): if i>1: # we can not calculate delta[0] because we don't have theta[0] (and even we don't need it) z = zLoc[i-1][n] z = np.concatenate( ([[1]], z.reshape((1,)*(2-z.ndim) + z.shape),), axis=1) #add one for correct matrix multiplication delta[i] = np.dot(thetaLoc[i].T, delta[i+1]).reshape(-1, 1) * derFunct(z).T delta[i] = delta[i][1:] #print(thetaDelta[i], delta[i+1].shape, aLoc[i-1][n], '\n') #print(np.dot(thetaLoc[i].T, delta[i+1]).shape, derFunct(z).T.shape, '\n') #print(delta[i+1].shape, aLoc[i-1][n].shape ) thetaDelta[i] = thetaDelta[i] + np.dot(delta[i+1].reshape(-1, 1), aLoc[i-1][n].reshape(1, -1)) #delta[i+1]*aLoc[i-1][n] #exit() for i in range(1, len(thetaDelta)): thetaDelta[i]=thetaDelta[i]/m thetaDelta[i][:,1:]=thetaDelta[i][:,1:]+thetaLoc[i][:,1:]*(lamb/m) #regularization if type(theta) == np.ndarray: return np.asarray(self.unroll(thetaDelta)).reshape(-1) # to work also with fmin_cg return thetaDelta
def run(self, nn, input): z=[0] a=[] a.append(copy.deepcopy(input)) a[0]=np.array(a[0]).T # nx1 vector logFunc = self.logisticFunction() for i in range(1, len(nn['structure'])): a[i-1]=np.vstack(([1], a[i-1])) z.append(np.dot(nn['theta'][i], a[i-1])) a.append(logFunc(z[i])) nn['z'] = z nn['a'] = a return a[len(nn['structure'])-1]
def runAll(self, nn, X): z=[0] m = len(X) a = [ copy.deepcopy(X) ] # a[0] is equal to the first input values logFunc = self.logisticFunction() for i in range(1, len(nn['structure'])): # for each layer except the input a[i-1] = np.concatenate((np.ones((m,1,)), a[i-1]), axis=1); # add bias column to the previous matrix of activation functions z.append(np.dot(a[i-1], nn['theta'][i].T)) # for all neurons in current layer multiply corresponds neurons #print("Shapes is ", a[i-1].shape, nn['theta'][i].T.shape) #print("Result is ", z[-1].shape) # in previous layers by the appropriate weights and sum the productions a.append(logFunc(z[i])) # apply activation function for each value nn['z'] = z nn['a'] = a return a[len(nn['structure'])-1]
def cost(self, h, y): logH=np.log(h) log1H=np.log(1-h) y_t = y.T cost = np.dot(-1*y_t, logH) - np.dot((1-y_t), log1H) #transpose y for matrix multiplication return cost.sum() # sum matrix of costs for each output neuron and input vector