예제 #1
0
    def start_Gpu_training(self,
                           datafolder,
                           batchsize,
                           learning_rate,
                           number_of_epoch,
                           display=1000):
        data = Dataloader.loaddata(datafolder["data"])
        testData = Dataloader.loaddata(datafolder["test"])
        labels = Dataloader.loaddata(datafolder["label"])
        data = np.array(data)
        train, val, temptrainlabel, tempvallabel = train_test_split(
            data, labels, test_size=0.3)

        # train = data[0:25000, :, :]
        # val = data[25001:29160, :, :]
        # temptrainlabel = labels[0:25000]
        # tempvallabel = labels[25001:29160]
        trainlabel = Dataloader.toHotEncoding(temptrainlabel)
        vallabel = Dataloader.toHotEncoding(tempvallabel)
        run_trainingepoch(number_of_epoch, train, trainlabel, val, vallabel,
                          testData, batchsize, train.shape[0])
예제 #2
0
    def start_training(self,
                       datafolder,
                       batchsize,
                       learning_rate,
                       number_of_epoch,
                       display=1000):
        # self.initWeigth(saveParameter="Assignment_weight_3.pkl")
        self.initWeigth()
        self.optimizer.initADAM(5, 5)
        data = Dataloader.loaddata(datafolder["data"])
        testData = Dataloader.loaddata(datafolder["test"])
        labels = Dataloader.loaddata(datafolder["label"])
        data = np.array(data)
        train, val, temptrainlabel, tempvallabel = train_test_split(
            data, labels, test_size=0.2)
        # train = data[0:25000, :, :]
        # val = data[25001:29160, :, :]
        # temptrainlabel = labels[0:25000]
        # tempvallabel = labels[25001:29160]
        trainlabel = Dataloader.toHotEncoding(temptrainlabel)
        vallabel = Dataloader.toHotEncoding(tempvallabel)

        t = 0
        # numberOfImages = 10
        pEpochTrainLoss = []
        pEpochTrainAccuracy = []
        pEpochTestLoss = []
        pEpochTestAccuracy = []
        for epoch in range(number_of_epoch):
            train, temptrainlabel = sklearn.utils.shuffle(train,
                                                          temptrainlabel,
                                                          random_state=1)
            trainlabel = Dataloader.toHotEncoding(temptrainlabel)

            # if epoch > 20:
            #     learning_rate = 0.0001
            if epoch > 70:
                learning_rate = 0.00001
            if epoch > 130:
                learning_rate = 0.000001
            if epoch > 175:
                learning_rate = 0.0000001

            avgLoss = 0
            trainAcc = 0.0
            count = 0.0
            countacc = 0.0
            pIterLoss = []
            total_train_image = train.shape[0]
            iter = 0
            countiter = 0.0
            countitertemp = 0.0
            loss_iter = 0.0
            # for iter in range(total_train_image - batchsize):
            t += 1
            while iter < (total_train_image - batchsize):

                randomSelect = iter
                # randomSelect = np.random.randint(0 ,(total_train_image - batchsize))
                image = train[randomSelect:randomSelect + batchsize, :, :]
                labels = trainlabel[randomSelect:randomSelect + batchsize, :]
                image = np.array(image, dtype=np.float32)
                image = np.subtract(image, self.mean)
                image = np.divide(image, self.std_v)
                input_data = np.reshape(image, (batchsize, 108 * 108))
                input_data, labels = sklearn.utils.shuffle(input_data,
                                                           labels,
                                                           random_state=1)
                # label = np.reshape(label, (1, label.size))
                loss, outputs = self.Train(input_data, labels)
                # self.parameter = self.optimizer.SGD(self.parameter, self.grad, learning_rate)
                self.grad, reg_loss = self.optimizer.l2_regularization(
                    self.parameter, self.grad, 0)
                loss += reg_loss
                for outiter in range(batchsize):
                    # output = output[0]
                    pred = np.argmax(outputs[outiter, :])
                    gt = np.argmax(labels[outiter, :])
                    if pred == gt:
                        count += 1.0
                        countacc += 1.0
                    countiter += 1.0
                    countitertemp += 1.0
                    # print("True")
                    # self.parameter = self.optimizer.SGD(self.parameter, self.grad, learning_rate)

                pIterLoss.append(loss)
                avgLoss += loss
                if iter % display == 0:
                    print("Preiction: ", outputs[0, :])
                    print("Train Accuracy {} with prob : {}".format(
                        (countacc / float(countitertemp)), outputs[0, pred]))
                    print("Train Loss: ", loss)
                    countacc = 0.0
                    countitertemp = 0.0
                    loss, acc = self.Test(val, vallabel)
                    # if acc > 0.55:
                    #     assignmentOut = self.doTest(testData)
                    #     fileName = "result_" + str(acc) + "_.csv"
                    #     with open(fileName, 'w') as f:
                    #         for key in assignmentOut.keys():
                    #
                    # f.write("%s,%s\n" % (key, assignmentOut[key]))
                self.parameter = self.optimizer.ADAM(self.parameter, self.grad,
                                                     learning_rate, t)
                iter += batchsize
                loss_iter += 1.0

            trainAcc = (float(count) / float(countiter))
            print("##################Overall Accuracy & Loss Calculation")
            print(iter, ":TrainAccuracy: ", trainAcc)
            print(iter, ":TrainLoss: ", (float(avgLoss) / float(loss_iter)))
            avgtestloss, avgtestacc = self.Test(val, vallabel)
            totaloss = float(avgLoss) / float(total_train_image)
            pEpochTrainLoss.append(totaloss)
            pEpochTrainAccuracy.append(trainAcc)
            pEpochTestLoss.append(avgtestloss)
            pEpochTestAccuracy.append(avgtestacc)
            # fileName = "Assignment_weight_" + str(trainAcc) + "_" + str(avgtestacc) + ".pkl"
            file = open("Assignment_weight_4.pkl", "wb")
            file.write(pickle.dumps(self.parameter))
            file.close()
            fill2 = open("Assignment_parameter.pkl", "wb")
            fill2.write(
                pickle.dumps((pEpochTrainAccuracy, pEpochTrainLoss,
                              pEpochTestAccuracy, pEpochTestLoss)))
            fill2.close()
            print("############################################")
            if avgtestacc > 0.55:
                assignmentOut = self.doTest(testData)
                fileName = "result_ov_" + str(avgtestacc) + "_.csv"
                with open(fileName, 'w') as f:
                    for key in assignmentOut.keys():
                        f.write("%s,%s\n" % (key, assignmentOut[key]))
예제 #3
0
    def start_training_mnist(self,
                             data_folder,
                             batch_size,
                             learning_rate,
                             NumberOfEpoch,
                             display=1000):
        self.initWeigth()
        self.optimizer.initADAM(5, 5)
        trainingImages, trainingLabels = Dataloader.loadMNIST(
            'train', data_folder)
        testImages, testLabels = Dataloader.loadMNIST('t10k', data_folder)
        trainLabelsHotEncoding = Dataloader.toHotEncoding(trainingLabels)
        testLabelsHotEncoding = Dataloader.toHotEncoding(testLabels)
        numberOfImages = trainingImages.shape[0]
        # numberOfImages = 10
        pEpochTrainLoss = []
        pEpochTrainAccuracy = []
        pEpochTestLoss = []
        pEpochTestAccuracy = []
        print("Training started")
        t = 0
        for epoch in range(NumberOfEpoch):
            avgLoss = 0
            trainAcc = 0.0
            count = 0.0
            countacc = 0.0
            pIterLoss = []
            print("##############EPOCH : {}##################".format(epoch))
            for iter in range(numberOfImages):
                t += 1
                image = trainingImages[iter, :, :]
                labels = trainLabelsHotEncoding[iter, :]
                loss, output = self.Train(image, labels)
                output = output[0]
                pred = np.argmax(output)
                gt = np.argmax(labels)
                if pred == gt:
                    count += 1.0
                    countacc += 1.0
                    # print("True")
                # self.parameter = self.optimizer.SGD(self.parameter, self.grad, learning_rate)

                pIterLoss.append(loss)
                avgLoss += loss
                if iter % display == 0:
                    print("Train Accuracy {} with prob : {}".format(
                        (countacc / float(display)), output[pred]))
                    print("Train Loss: ", loss)
                    countacc = 0.0
                    loss, acc = self.Test(testImages, testLabelsHotEncoding)

                self.parameter = self.optimizer.ADAM(self.parameter, self.grad,
                                                     learning_rate, t)
                self.parameter = self.optimizer.l2_regularization(
                    self.parameter, 0.001)
            trainAcc = (float(count) / float(numberOfImages))
            print("##################Overall Accuracy & Loss Calculation")
            print("TrainAccuracy: ", trainAcc)
            print("TrainLoss: ", (float(avgLoss) / float(numberOfImages)))
            avgtestloss, avgtestacc = self.Test(testImages,
                                                testLabelsHotEncoding)
            totaloss = float(avgLoss) / float(numberOfImages)
            pEpochTrainLoss.append(totaloss)
            pEpochTrainAccuracy.append(trainAcc)
            pEpochTestLoss.append(avgtestloss)
            pEpochTestAccuracy.append(avgtestacc)

            x_axis = np.linspace(0, epoch, len(pEpochTrainLoss), endpoint=True)
            plt.semilogy(x_axis, pEpochTrainLoss)
            plt.xlabel('epoch')
            plt.ylabel('loss')
            plt.draw()
            file = open("Assignment_test_2.pkl", "wb")
            file.write(pickle.dumps(self.parameter))
            file.close()
            fill2 = open("Assignment_parameter.pkl", "wb")
            fill2.write(
                pickle.dumps((pEpochTrainAccuracy, pEpochTrainLoss,
                              pEpochTestAccuracy, pEpochTestLoss)))
            fill2.close()