Пример #1
0
    def _runEpoch(self, id, Xtrain, Ytrain, Xvalid, Yvalid):
        Xtrain, Ytrain = self._shuffleDataset(Xtrain, Ytrain)

        print("Train epoch {}/{}: ".format(id + 1, self.EPOCHS),
              end='',
              flush=True)

        t = Timing()
        t.start()

        yPredicted = self._model.train(Xtrain, Ytrain)

        trainAccuracy = 100 * self.evaluateScore(Ytrain, yPredicted)

        self.trainAccuracies[id] = trainAccuracy
        self.trainTimes[id] = t.get_elapsed_secs()
        print("{:.1f}% in {}.\t".format(trainAccuracy, t.get_elapsed_time()),
              end='',
              flush=True)

        # Validation
        validRes = self._test(Xvalid, Yvalid)
        self.validAccuracies[id] = validRes["accuracy"]
        print('Validation: {:.1f}% ({}).\t'.format(
            validRes["accuracy"], Timing.secondsToString(validRes["time"])),
              end='',
              flush=True)

        if id == 0 and validRes["extra_output"] is not None:
            print(validRes["extra_output"], end=' ', flush=True)

        return validRes
Пример #2
0
    def run(self,
            Xtrain,
            Ytrain,
            Xvalid,
            Yvalid,
            Xtest,
            Ytest,
            modelLogPath=None,
            returnBestValidatedModel=False):
        bestRes = 0

        if modelLogPath is not None:
            # If we need to save the model, we increase the recursion limit
            # so we can save objects like the tree in the code manager etc.
            # The code itself is not recursive!
            import sys
            sys.setrecursionlimit(10**5)

        # Run training epochs
        for epoch in range(0, self.EPOCHS):
            validRes = self._runEpoch(epoch, Xtrain, Ytrain, Xvalid, Yvalid)

            # If our validation score is the highest so far
            if validRes["accuracy"] > bestRes:
                bestRes = validRes["accuracy"]

                # We save the model to file whenever we reach a better validation performance,
                # so that if the simulation will be terminated for some reason
                # (usually only happen when we didn't allocate enough space for the process)
                # we will have a backup.
                if modelLogPath is not None:
                    tSave = Timing()
                    tSave.start()

                    # Important:
                    # numpy.savez is originally meant for saving arrays, not objects,
                    # We use it here for simplicity but it sometimes causes very high additional memory requirements
                    # (it processes the data before saving).
                    #
                    # A better way would be to save the data of the binary learners (e.g. means of AROW),
                    # and the coding matrix and allocation, one by one, and then load them with a special method.
                    np.savez(modelLogPath, ltls=self._model)
                    print("Saved model ({}).".format(tSave.get_elapsed_time()),
                          end='',
                          flush=True)
                    del tSave

            print("")

        # If we need to return the best validated model, load it from file before continuing
        if returnBestValidatedModel:
            del self._model

            self._model = np.load(modelLogPath + ".npz")["ltls"][()]

        # Test
        testRes = self._test(Xtest, Ytest)
        print('Test accuracy: {:.1f}% ({})'.format(
            testRes["accuracy"], Timing.secondsToString(testRes["time"])))

        # Calculate average binary loss
        decodingLoss = self._calcBitwiseLoss(Xtrain, Ytrain)
        print("Average binary loss: {:.2f}".format(decodingLoss))