def SGD(self, train_X, train_y, test_X, test_y): trainLossVec = [] testAccVec = [] testLossVec = [] trainAccVec = [] # Epoch loop for i in range(1, EPOCHS + 1): self.correct = 0 self.used = 0 # Batch loop for j in range( 1, int(len(train_X) * USED_SHARE_OF_DATA / BATCH_SIZE) + 1): dw, db = self.stochasticGradient(train_X, train_y, trainLossVec) self.used += BATCH_SIZE """print("\n", self.weights[1][5][5], "\n") input()""" # Layer loop for l in range(1, self.L): self.weights[l - 1] -= self.lr * dw[l - 1] self.biases[l - 1] -= self.lr * db[l - 1] hf.progress_bar( j, int(len(train_X) * USED_SHARE_OF_DATA / BATCH_SIZE), EPOCHS, i, round(100 * self.correct / (self.used), 4)) testAcc, testLoss = hf.test(self, 50, test_X, test_y) testAccVec.append(testAcc) testLossVec.append(testLoss) trainAcc = self.correct / self.used trainAccVec.append(trainAcc) self.updateLearningRate(i) #loss = hf.averageLoss(self.lossFuncValues) return trainAccVec, trainLossVec, testAccVec, testLossVec
def updateLearningRate(self, epoch): if self.sc == "exponentialDecay": self.lr = hf.exponentialDecay(self.initialLR, self.dr, epoch) elif self.sc == "polynomialDecay": self.lr = hf.polynomialDecay(self.initialLR, END_LEARNING_RATE, epoch, EPOCHS) elif self.sc == "inverseTimeDecay": self.lr = hf.inverseTimeDecay(self.initialLR, self.dr, epoch) elif self.sc == "piecewiseConstantDecay": self.lr = hf.piecewiseConstantDecay(self.initialLR, epoch) else: pass
def SGDADAM(self, train_X, train_y, test_X, test_y): trainLossVec = [] testAccVec = [] testLossVec = [] trainAccVec = [] beta1 = 0.9 beta2 = 0.999 epsilon = 1e-8 alpha = self.lr t = 1 Vdw = [0] * (self.L - 1) Sdw = [0] * (self.L - 1) Vdb = [0] * (self.L - 1) Sdb = [0] * (self.L - 1) # Epoch loop for i in range(1, EPOCHS + 1): self.correct = 0 self.used = 0 # Minibatch loop for j in range( 1, int(len(train_X) * USED_SHARE_OF_DATA / BATCH_SIZE + 1)): dw, db = self.stochasticGradient(train_X, train_y, trainLossVec) self.used += BATCH_SIZE """print("\n", self.weights[1][5][5], "\n") input()""" # Layer loop for l in range(1, self.L): # Update first and second moments Vdw[l - 1] = beta1 * Vdw[l - 1] + (1 - beta1) * dw[l - 1] Vdb[l - 1] = beta1 * Vdb[l - 1] + (1 - beta1) * db[l - 1] Sdw[l - 1] = beta2 * Sdw[l - 1] + (1 - beta2) * (np.square( dw[l - 1])) Sdb[l - 1] = beta2 * Sdb[l - 1] + (1 - beta2) * (np.square( db[l - 1])) # Get corrected values Vdwcor = Vdw[l - 1] / (1 - beta1**t) Vdbcor = Vdb[l - 1] / (1 - beta1**t) Sdwcor = Sdw[l - 1] / (1 - beta2**t) Sdbcor = Sdb[l - 1] / (1 - beta2**t) # Update weights and biases cw = np.divide(Vdwcor, np.sqrt(Sdwcor) + epsilon) cb = np.divide(Vdbcor, np.sqrt(Sdbcor) + epsilon) self.weights[l - 1] -= alpha * cw self.biases[l - 1] -= alpha * cb t += 1 #hf.progress_bar(j, int(len(train_X)*USED_SHARE_OF_DATA/BATCH_SIZE), EPOCHS, i, round(100*self.correct/(self.used), 2)) testAcc, testLoss = hf.test(self, 10000, test_X, test_y) trainAcc = self.correct / self.used trainAccVec.append(trainAcc) testAccVec.append(testAcc) testLossVec.append(testLoss) #loss = hf.averageLoss(self.lossFuncValues) return trainAccVec, trainLossVec, testAccVec, testLossVec
def stochasticGradient(self, train_X, train_y, trainLossVec): changeWeights = [0] * (self.L - 1) changeBiases = [0] * (self.L - 1) lossFuncSum = 0 for i in range(0, BATCH_SIZE): aVec = [] DVec = [] deltaVec = [] k = random.randint(0, len(train_X) - 1) xk = train_X[k] yk = train_y[k] a = hf.flatten(xk) aVec.append(a) # Performs back-propagation for all layers for l in range(0, self.L - 1): z = np.matmul(self.weights[l], a) + self.biases[l] a = hf.relu(z) D = np.diag(hf.reluPrim(z)) #a = hf.sigmoid(z) #D = np.diag(hf.sigmoidPrim(z)) aVec.append(a) DVec.append(D) delta_L = np.matmul(DVec[-1], (a - hf.formatY(yk))) deltaVec.append(delta_L) for l in reversed(range(-self.L + 1, -1)): delta_l = np.matmul( DVec[l], np.matmul(np.transpose(self.weights[l + 1]), deltaVec[l + 1])) deltaVec.insert(0, delta_l) for l in reversed(range(-self.L + 1, 0)): changeBiases[l] += deltaVec[l] changeWeights[l] += np.outer(deltaVec[l], aVec[l - 1]) prediction = max(aVec[-1]) index = aVec[-1].index(prediction) if (index == int(yk)): self.correct += 1 lossFuncSum += hf.lossFunc(aVec[-1], yk) trainLossVec.append(lossFuncSum / BATCH_SIZE) # Calculates average values dw = [cw / BATCH_SIZE for cw in changeWeights] db = [cb / BATCH_SIZE for cb in changeBiases] return dw, db
def main(): (train_X, train_y), (test_X, test_y) = mnist.load_data() train_X = train_X / 255.0 test_X = test_X / 255.0 nameList = ['trainLoss', 'testLoss', 'trainAcc', 'testAcc'] steppingSchedules = [ "inverseTimeDecay", "inverseTimeDecay", "inverseTimeDecay", "piecewiseConstantDecay", "polynomialDecay", "ADAM" ] learningRates = [0.5, 0.5, 0.5, 0.3, 0.3, 0.003] decayRates = [0.1, 0.5, 2, 0, 2.5, 0] # Loop through stepping schedule for i, steppingSchedule in enumerate(steppingSchedules): trainLossMtrx = [] testLossMtrx = [] trainAccMtrx = [] testAccMtrx = [] learningRate = learningRates[i] decayRate = decayRates[i] # Each scheme is run 10 times for i in range(1, 11): NN = Network() print("-" * 50) print('Körning ', i) trainAccVec, trainLossVec, testAccVec, testLossVec = NN.train( train_X, train_y, test_X, test_y, steppingSchedule, learningRate, decayRate) trainLossMtrx.append(trainLossVec) testLossMtrx.append(testLossVec) trainAccMtrx.append(trainAccVec) testAccMtrx.append(testAccVec) # Write data to file nameEnd = '_' + steppingSchedule + '_' + str(learningRate) + '_' + str( decayRate) hf.writeToFile(trainLossMtrx, 'TrainLoss' + nameEnd) hf.writeToFile(testLossMtrx, 'TestLoss' + nameEnd) hf.writeToFile(trainAccMtrx, 'TrainAcc' + nameEnd) hf.writeToFile(testAccMtrx, 'TestAcc' + nameEnd)
def predict(self, image): a = hf.flatten(image) for l in range(0, self.L - 1): a = self.nextLayer(a, l) return a
def nextLayer(self, a, layer): b = self.biases[layer] w = self.weights[layer] #a1 = hf.sigmoid(np.matmul(w, a) + b) a1 = hf.relu(np.matmul(w, a) + b) return a1