def checkDeriv(self): propagation.calcDeriv(self) grad = np.concatenate((np.reshape(self.inputDeriv, -1), np.reshape(self.hiddenDeriv[:], -1))) eps = 1e-5 theta1 = np.copy(self.inputWeight) theta2 = np.copy(self.hiddenWeight) perturb1 = np.zeros(theta1.shape) perturb2 = np.zeros(theta2.shape) numgrad = [] for i in range(theta1.shape[0]): for j in range(theta1.shape[1]): perturb1[i,j] = eps l1 = self.cost(theta1-perturb1, theta2) l2 = self.cost(theta1+perturb1, theta2) numgrad.append((l2-l1)/(2*eps)) perturb1[i,j] = 0 for i in range(theta2.shape[0]): for j in range(theta2.shape[1]): perturb2[i,j] = eps l1 = self.cost(theta1, theta2-perturb2) l2 = self.cost(theta1, theta2+perturb2) numgrad.append((l2-l1)/(2*eps)) perturb2[i,j] = 0 numgrad = np.asarray(numgrad) return np.linalg.norm(grad-numgrad)/np.linalg.norm(grad+numgrad)
def hybrDescent(Network, numIter, gamma, beta, delta, eta, eps, nhat, batchSize): j = 0 mu = 0 lastUpdate = 0 numBatches = Network.m/batchSize alpha = stepSize(mu, gamma, eta) while calcAccuracy(Network) < .95 and j < numIter: j += 1 randvec = range(Network.m) shuffle(randvec) oldInputWeight = Network.inputWeight.copy() oldHiddenWeight = Network.hiddenWeight.copy() gInput = np.zeros(Network.inputWeight.shape) hInput = np.zeros(Network.inputWeight.shape) gHidden = np.zeros(Network.hiddenWeight.shape) hHidden = np.zeros(Network.hiddenWeight.shape) for i in range(numBatches-1): obs = randvec[i*batchSize:(i+1)*batchSize] prop.calcDeriv(Network, Network.train[obs], obs) xi = 1/sum(np.power(mu,range(numBatches-i))) gInput += xi*Network.inputDeriv gHidden += xi*Network.hiddenDeriv hInput = mu*hInput + gInput hHidden = mu*hHidden + gHidden Network.inputWeight = oldInputWeight - alpha*hInput Network.hiddenWeight = oldHiddenWeight - alpha*hHidden lastUpdate += 1 if lastUpdate > nhat or np.linalg.norm(Network.inputWeight-oldInputWeight) < eps: mu = beta*mu + delta alpha = stepSize(mu, gamma, eta) lastUpdate = 0 print "Training set accuracy: %.3f" %calcAccuracy(Network)
def gradDescent(Network, numIter, alpha): j = 0 while calcAccuracy(Network) < .95 and j < numIter: j += 1 prop.calcDeriv(Network) Network.inputWeight -= alpha*Network.inputDeriv Network.hiddenWeight -= alpha*Network.hiddenDeriv print "Training set accuracy: %.3f" %calcAccuracy(Network)
def incrDescent(Network, numIter, alpha): j = 0 while calcAccuracy(Network) < .95 and j < numIter: j += 1 randvec = range(Network.m) shuffle(randvec) for i in randvec: prop.calcDeriv(Network, Network.train[i], i) Network.inputWeight -= alpha*Network.inputDeriv Network.hiddenWeight -= alpha*Network.hiddenDeriv print "Training set accuracy: %.3f" %calcAccuracy(Network)