def MiniBatchGradientDecentWithMomentum(net, trainData, trainTargets, itr, batchSize, eta=0.5, gamma=0.5, valData=None, valTargets=None, testData=None, testTargets=None, annel=False, regularization=False, lamda=0.1): deltaWeights = [None] * (net.noOfLayers + 1) batchStart = 0 step = 0 epoch = 0 aneelCount = 0 previousEpochValLoss = np.inf eta, gamma = SetInitialETA(net, trainData[:, 0:batchSize], trainTargets[:, 0:batchSize], eta, gamma) for i in range(0, itr): step = step + 1 batchData = trainData[:, batchStart:batchStart + batchSize] batchTargets = trainTargets[:, batchStart:batchStart + batchSize] batchStart = batchSize + batchStart networkOutput, layerOutputs = net.FeedForward(batchData) if (batchStart >= trainData.shape[1]): epoch = epoch + 1 batchStart = batchStart - trainData.shape[1] step = 0 if annel and valData != None: previousEpochValLoss, tempNet = HandleAneeling( net, valData, valTargets, previousEpochValLoss) if tempNet != None: net = tempNet eta = eta * 3.0 / 4.0 aneelCount += 1 if aneelCount > 3: return net print( 'Mini Batch Loss:', net.LossFunction[net.lossFunctionName](networkOutput, batchTargets)) gradients = net.BackProbGradients(batchTargets, networkOutput, layerOutputs) for j in range(0, net.noOfLayers + 1): if regularization: gradients[j] = gradients[j] + lamda * net.weights[j] if deltaWeights[j] == None: deltaWeights[j] = eta / batchSize * gradients[j] else: deltaWeights[j] = eta / batchSize * gradients[ j] + gamma * deltaWeights[j] net.weights[j] = net.weights[j] - deltaWeights[j] if net.logDir != None and step % 250 == 0: fns.WriteLog(net, batchData, batchTargets, step, epoch, eta, valData, valTargets, testData, testTargets) return net
def BatchGradientDecent(net, trainData, trainTargets, eta, itr, valData=None, valTargets=None, testData=None, testTargets=None, annel=False): eta, _ = SetInitialETA(net, trainData, trainTargets, eta) lossToPlotTrain = [] lossToPlotVal = [] for i in range(0, itr): networkOutput, layerOutputs = net.FeedForward(trainData) print( 'Loss:', net.LossFunction[net.lossFunctionName](networkOutput, trainTargets)) gradients = net.BackProbGradients(trainTargets, networkOutput, layerOutputs) for j in range(0, net.noOfLayers + 1): net.weights[j] = net.weights[j] - ( eta / trainData.shape[1]) * gradients[j] plot.close('all') # lossToPlotTrain.append(CrossEntropy.CrossEntropy(networkOutput, trainTargets)) # valOutput,_ = net.FeedForward(valData) # lossToPlotVal.append(CrossEntropy.CrossEntropy(valOutput, valTargets)) # plot.plot(lossToPlotTrain) # plot.plot(lossToPlotVal) # plot.legend(['TrainErr', 'ValErr']) # plot.show() valOutput, _ = net.FeedForward(valData) valLoss = fns.CrossEntropy(valOutput, valTargets) print('Val Loss: ', valLoss) if net.logDir != None and i % 250 == 0: fns.WriteLog(net, trainData, trainTargets, i, i, eta, valData, valTargets, testData, testTargets) return net
def AdamOptimizer(net, trainData, trainTargets, itr, batchSize, eta=0.5, b1=0.9, b2=0.999, valData=None, valTargets=None, testData=None, testTargets=None, annel=False, regularization=False, lamda=0.1): flag = True mt = [None] * (net.noOfLayers + 1) vt = [None] * (net.noOfLayers + 1) batchStart = 0 step = 0 epoch = 0 aneelCount = 0 previousEpochValLoss = np.inf eta, _ = SetInitialETA(net, trainData[:, 0:batchSize], trainTargets[:, 0:batchSize], eta) lossToPlotTrain = [] lossToPlotVal = [] for i in range(0, itr): step = step + 1 batchData = trainData[:, batchStart:batchStart + batchSize] batchTargets = trainTargets[:, batchStart:batchStart + batchSize] batchStart = batchSize + batchStart networkOutput, layerOutputs = net.FeedForward(batchData) if (batchStart >= trainData.shape[1]): epoch = epoch + 1 batchStart = batchStart - trainData.shape[1] step = 0 if annel and valData != None: previousEpochValLoss, tempNet = HandleAneeling( net, valData, valTargets, previousEpochValLoss) if tempNet != None: net = tempNet eta = eta * 3.0 / 4.0 aneelCount += 1 if aneelCount > 3: return net print( 'Mini Batch Loss:', net.LossFunction[net.lossFunctionName](networkOutput, batchTargets)) gradients = net.BackProbGradients(batchTargets, networkOutput, layerOutputs) for j in range(0, net.noOfLayers + 1): if regularization: gradients[j] += lamda * net.weights[j] if mt[j] is None: mt[j] = (1 - b1) * gradients[j] vt[j] = (1 - b2) * np.square(gradients[j]) else: mt[j] = b1 * mt[j] + (1 - b1) * gradients[j] vt[j] = b2 * vt[j] + (1 - b2) * np.square(gradients[j]) net.weights[j] = net.weights[j] - (eta / batchSize) * np.multiply( (1 / np.sqrt(vt[j] + 1e-8)), gradients[j]) plot.close('all') lossToPlotTrain.append(fns.CrossEntropy(networkOutput, batchTargets)) valOutput, _ = net.FeedForward(valData) valLoss = fns.CrossEntropy(valOutput, valTargets) print('Val Loss: ', valLoss) if net.logDir != None and step % 250 == 0: fns.WriteLog(net, batchData, batchTargets, step, epoch, eta, valData, valTargets, testData, testTargets) lossToPlotVal.append() plot.plot(lossToPlotTrain) plot.plot(lossToPlotVal) plot.legend(['TrainErr', 'ValErr']) plot.show() return net