示例#1
0
 def BackProbGradients(self, output, networkOutput, layerOutputs):
     weightGradients = [None] * (self.noOfLayers + 1)
     if self.outputFunctionName == "SoftMax" and self.lossFunctionName == "CrossEntropy":
         gradientsWRTActivation = self.LossAndOutputGradients[
             'CrossEntropyWithSoftMax'](networkOutput, output)
         weightGradients[self.noOfLayers] = np.matmul(
             gradientsWRTActivation,
             np.transpose(
                 fns.IntergrateBiasAndData(layerOutputs[self.noOfLayers])))
     else:
         gradientsWRTActivation = self.LossGradients[self.lossFunctionName](
             networkOutput, output)
         gradientsWRTActivation = self.OutputGradients[
             self.outputFunctionName](networkOutput, gradientsWRTActivation)
         weightGradients[self.noOfLayers] = np.matmul(
             gradientsWRTActivation,
             np.transpose(
                 fns.IntergrateBiasAndData(layerOutputs[self.noOfLayers])))
     for i in reversed(range(0, self.noOfLayers)):
         backProbGradient = np.matmul(
             np.transpose(
                 fns.DisIntergrateBiasFromWeights(self.weights[i + 1])),
             gradientsWRTActivation)
         gradientsWRTActivation = self.ActivationGradients[
             self.activationFunctionNames[i]](layerOutputs[i + 1],
                                              backProbGradient)
         weightGradients[i] = np.matmul(
             gradientsWRTActivation,
             np.transpose(fns.IntergrateBiasAndData(layerOutputs[i])))
     return weightGradients
示例#2
0
    def FeedForward(self, data):
        layersOutputs = [data]
        data = fns.IntergrateBiasAndData(data)
        for i in range(0, self.noOfLayers):
            data = self.Activation[self.activationFunctionNames[i]](
                data, self.weights[i])
            layersOutputs.append(data)
            data = fns.IntergrateBiasAndData(data)

        return self.OutputFunction[self.outputFunctionName](
            data, self.weights[self.noOfLayers]), layersOutputs
def MiniBatchGradientDecentWithMomentum(net,
                                        trainData,
                                        trainTargets,
                                        itr,
                                        batchSize,
                                        eta=0.5,
                                        gamma=0.5,
                                        valData=None,
                                        valTargets=None,
                                        testData=None,
                                        testTargets=None,
                                        annel=False,
                                        regularization=False,
                                        lamda=0.1):
    deltaWeights = [None] * (net.noOfLayers + 1)
    batchStart = 0
    step = 0
    epoch = 0
    aneelCount = 0
    previousEpochValLoss = np.inf
    eta, gamma = SetInitialETA(net, trainData[:, 0:batchSize],
                               trainTargets[:, 0:batchSize], eta, gamma)
    for i in range(0, itr):
        step = step + 1
        batchData = trainData[:, batchStart:batchStart + batchSize]
        batchTargets = trainTargets[:, batchStart:batchStart + batchSize]
        batchStart = batchSize + batchStart
        networkOutput, layerOutputs = net.FeedForward(batchData)
        if (batchStart >= trainData.shape[1]):
            epoch = epoch + 1
            batchStart = batchStart - trainData.shape[1]
            step = 0
            if annel and valData != None:
                previousEpochValLoss, tempNet = HandleAneeling(
                    net, valData, valTargets, previousEpochValLoss)
                if tempNet != None:
                    net = tempNet
                    eta = eta * 3.0 / 4.0
                    aneelCount += 1
                    if aneelCount > 3:
                        return net
        print(
            'Mini Batch Loss:',
            net.LossFunction[net.lossFunctionName](networkOutput,
                                                   batchTargets))
        gradients = net.BackProbGradients(batchTargets, networkOutput,
                                          layerOutputs)
        for j in range(0, net.noOfLayers + 1):
            if regularization:
                gradients[j] = gradients[j] + lamda * net.weights[j]
            if deltaWeights[j] == None:
                deltaWeights[j] = eta / batchSize * gradients[j]
            else:
                deltaWeights[j] = eta / batchSize * gradients[
                    j] + gamma * deltaWeights[j]
            net.weights[j] = net.weights[j] - deltaWeights[j]
        if net.logDir != None and step % 250 == 0:
            fns.WriteLog(net, batchData, batchTargets, step, epoch, eta,
                         valData, valTargets, testData, testTargets)
    return net
def BatchGradientDecent(net,
                        trainData,
                        trainTargets,
                        eta,
                        itr,
                        valData=None,
                        valTargets=None,
                        testData=None,
                        testTargets=None,
                        annel=False):
    eta, _ = SetInitialETA(net, trainData, trainTargets, eta)
    lossToPlotTrain = []
    lossToPlotVal = []
    for i in range(0, itr):
        networkOutput, layerOutputs = net.FeedForward(trainData)
        print(
            'Loss:', net.LossFunction[net.lossFunctionName](networkOutput,
                                                            trainTargets))
        gradients = net.BackProbGradients(trainTargets, networkOutput,
                                          layerOutputs)
        for j in range(0, net.noOfLayers + 1):
            net.weights[j] = net.weights[j] - (
                eta / trainData.shape[1]) * gradients[j]
        plot.close('all')
        # lossToPlotTrain.append(CrossEntropy.CrossEntropy(networkOutput, trainTargets))
        # valOutput,_ = net.FeedForward(valData)
        # lossToPlotVal.append(CrossEntropy.CrossEntropy(valOutput, valTargets))
        # plot.plot(lossToPlotTrain)
        # plot.plot(lossToPlotVal)
        # plot.legend(['TrainErr', 'ValErr'])
        # plot.show()

        valOutput, _ = net.FeedForward(valData)
        valLoss = fns.CrossEntropy(valOutput, valTargets)
        print('Val Loss: ', valLoss)
        if net.logDir != None and i % 250 == 0:
            fns.WriteLog(net, trainData, trainTargets, i, i, eta, valData,
                         valTargets, testData, testTargets)
    return net
def AdamOptimizer(net,
                  trainData,
                  trainTargets,
                  itr,
                  batchSize,
                  eta=0.5,
                  b1=0.9,
                  b2=0.999,
                  valData=None,
                  valTargets=None,
                  testData=None,
                  testTargets=None,
                  annel=False,
                  regularization=False,
                  lamda=0.1):
    flag = True
    mt = [None] * (net.noOfLayers + 1)
    vt = [None] * (net.noOfLayers + 1)
    batchStart = 0
    step = 0
    epoch = 0
    aneelCount = 0
    previousEpochValLoss = np.inf
    eta, _ = SetInitialETA(net, trainData[:, 0:batchSize],
                           trainTargets[:, 0:batchSize], eta)

    lossToPlotTrain = []
    lossToPlotVal = []
    for i in range(0, itr):
        step = step + 1
        batchData = trainData[:, batchStart:batchStart + batchSize]
        batchTargets = trainTargets[:, batchStart:batchStart + batchSize]
        batchStart = batchSize + batchStart
        networkOutput, layerOutputs = net.FeedForward(batchData)
        if (batchStart >= trainData.shape[1]):
            epoch = epoch + 1
            batchStart = batchStart - trainData.shape[1]
            step = 0
            if annel and valData != None:
                previousEpochValLoss, tempNet = HandleAneeling(
                    net, valData, valTargets, previousEpochValLoss)
                if tempNet != None:
                    net = tempNet
                    eta = eta * 3.0 / 4.0
                    aneelCount += 1
                    if aneelCount > 3:
                        return net
        print(
            'Mini Batch Loss:',
            net.LossFunction[net.lossFunctionName](networkOutput,
                                                   batchTargets))
        gradients = net.BackProbGradients(batchTargets, networkOutput,
                                          layerOutputs)
        for j in range(0, net.noOfLayers + 1):
            if regularization:
                gradients[j] += lamda * net.weights[j]
            if mt[j] is None:
                mt[j] = (1 - b1) * gradients[j]
                vt[j] = (1 - b2) * np.square(gradients[j])
            else:
                mt[j] = b1 * mt[j] + (1 - b1) * gradients[j]
                vt[j] = b2 * vt[j] + (1 - b2) * np.square(gradients[j])
            net.weights[j] = net.weights[j] - (eta / batchSize) * np.multiply(
                (1 / np.sqrt(vt[j] + 1e-8)), gradients[j])
        plot.close('all')
        lossToPlotTrain.append(fns.CrossEntropy(networkOutput, batchTargets))
        valOutput, _ = net.FeedForward(valData)
        valLoss = fns.CrossEntropy(valOutput, valTargets)
        print('Val Loss: ', valLoss)
        if net.logDir != None and step % 250 == 0:
            fns.WriteLog(net, batchData, batchTargets, step, epoch, eta,
                         valData, valTargets, testData, testTargets)

    lossToPlotVal.append()
    plot.plot(lossToPlotTrain)
    plot.plot(lossToPlotVal)
    plot.legend(['TrainErr', 'ValErr'])
    plot.show()
    return net