Exemplo n.º 1
0
    def backpropagation(self, theta, nn, X, y, lamb):
        layersNumb=len(nn['structure'])
        thetaDelta = [0]*(layersNumb)
        m=len(X)
        #calculate matrix of outpit values for all input vectors X
        hLoc = self.runAll(nn, X).copy()
        yLoc = np.array(y)
        thetaLoc = nn['theta'].copy()
        derFunct = np.vectorize( 'float *x, float *res', 'float z = 1/(1+exp(-x[i])); res[i] = z*(1-z)' )
        
        zLoc = nn['z'].copy()
        aLoc = nn['a'].copy()
        for n in range(0, len(X)):
            delta = [0]*(layersNumb+1)  #fill list with zeros
            delta[len(delta)-1] = (hLoc[n] - yLoc[n]).T #calculate delta of error of output layer
            delta[len(delta)-1] = delta[len(delta)-1].reshape(1, -1)
            for i in range(layersNumb-1, 0, -1):
                if i>1: # we can not calculate delta[0] because we don't have theta[0] (and even we don't need it)
                    z = zLoc[i-1][n]
                    z = np.concatenate( ([[1]], z.reshape((1,)*(2-z.ndim) + z.shape),), axis=1) #add one for correct matrix multiplication
                    delta[i] = np.dot(thetaLoc[i].T, delta[i+1]).reshape(-1, 1) * derFunct(z).T
                    delta[i] = delta[i][1:]
                #print(thetaDelta[i], delta[i+1].shape, aLoc[i-1][n], '\n')
                #print(np.dot(thetaLoc[i].T, delta[i+1]).shape, derFunct(z).T.shape, '\n')
                #print(delta[i+1].shape, aLoc[i-1][n].shape )
                thetaDelta[i] = thetaDelta[i] + np.dot(delta[i+1].reshape(-1, 1), aLoc[i-1][n].reshape(1, -1)) #delta[i+1]*aLoc[i-1][n]
                #exit()

        for i in range(1, len(thetaDelta)):
            thetaDelta[i]=thetaDelta[i]/m
            thetaDelta[i][:,1:]=thetaDelta[i][:,1:]+thetaLoc[i][:,1:]*(lamb/m) #regularization
       
        if type(theta) == np.ndarray: return np.asarray(self.unroll(thetaDelta)).reshape(-1) # to work also with fmin_cg
        return thetaDelta
Exemplo n.º 2
0
 def logisticFunction(self):
     return np.vectorize( 'float *x, float *res', 'res[i] = 1/(1+exp(-x[i]))' )