示例#1
0
class TestTrainer(TestCase):
    def setUp(self):
        self.img_height = 48
        self.epochs = 1
        self.train_batch_generator = BatchGeneratorIAMHandwriting('../../fixtures/iam_handwriting/', self.img_height)
        self.test_batch_generator = BatchGeneratorIAMHandwriting('../../fixtures/iam_handwriting/', self.img_height)
        self.alphabet = self.test_batch_generator.alphabet
        self.model = ModelOcropy(self.alphabet, self.img_height)
        self.trainer = Trainer(self.model, self.train_batch_generator, self.test_batch_generator, epochs=self.epochs)

    def test_train(self):
        self.trainer.train()
示例#2
0
    def start_train(self):
        self.progress()

        # data generators
        data_path = '/home/arsleust/projects/simple-ocr/data/bodmer'
        img_height = 48
        train_data_generator = BatchGeneratorManuscript(data_path,
                                                        img_height=img_height)
        test_data_generator = BatchGeneratorManuscript(
            data_path,
            img_height=img_height,
            sample_size=10,
            alphabet=train_data_generator.alphabet)

        # model
        self.model = ModelOcropy(train_data_generator.alphabet, img_height)
        print(self.model.summary())

        # callbacks
        str_date_time = strftime("%Y-%m-%d %H:%M:%S", gmtime())
        callbacks = []
        if True:
            if not os.path.exists("checkpoints"):
                os.mkdir("checkpoints")
            checkpoints_path = os.path.join("checkpoints",
                                            str_date_time + '.hdf5')
            callback_checkpoint = keras.callbacks.ModelCheckpoint(
                checkpoints_path,
                monitor='val_loss',
                verbose=1,
                save_best_only=True,
                save_weights_only=True)
            callbacks.append(callback_checkpoint)
        if True:
            callback_gui = GUICallback(test_data_generator, self)
            callbacks.append(callback_gui)

        # trainer
        trainer = Trainer(self.model,
                          train_data_generator,
                          test_data_generator,
                          lr=self.lrate,
                          epochs=self.nb_epoch,
                          steps_per_epochs=20,
                          callbacks=callbacks)

        trainer.train()
        print("Training done")

        self.end_train()
示例#3
0
def main():
    # cmd args
    parser = argparse.ArgumentParser(
        "A Python command-line tool for training ocr models")
    parser.add_argument('generator', choices=['iam', 'bodmer'])
    parser.add_argument('data_path', type=str)
    parser.add_argument('--epochs', type=int, default=1)
    parser.add_argument('--steps-epochs', type=int, default=None)
    parser.add_argument('--lr', type=float, default=0.01)
    parser.add_argument('--plateau-reduce-lr', type=bool, default=True)
    parser.add_argument('--image-height', type=int, default=48)
    parser.add_argument('--levenshtein', type=bool, default=True)
    parser.add_argument('--tensorboard', type=bool, default=True)
    args = parser.parse_args()

    # parameters
    generator_type = args.generator
    img_height = args.image_height
    data_path = args.data_path
    epochs = args.epochs
    steps_per_epochs = args.steps_epochs
    lr = args.lr
    reduce_lr_on_plateau = args.plateau_reduce_lr
    levenshtein = args.levenshtein
    tensorboard = args.tensorboard

    # data generators
    if generator_type == 'iam':
        train_data_generator = BatchGeneratorIAMHandwriting(
            data_path, img_height=img_height)
        test_data_generator = BatchGeneratorIAMHandwriting(
            data_path,
            img_height=img_height,
            sample_size=100,
            alphabet=train_data_generator.alphabet)
    elif generator_type == 'bodmer':
        train_data_generator = BatchGeneratorManuscript(data_path,
                                                        img_height=img_height)
        test_data_generator = BatchGeneratorManuscript(
            data_path,
            img_height=img_height,
            sample_size=100,
            alphabet=train_data_generator.alphabet)
    else:
        raise Exception("Data generator is not defined.")

    # model
    model = ModelOcropy(train_data_generator.alphabet, img_height)
    print(model.summary())

    # callbacks
    str_date_time = strftime("%Y-%m-%d %H:%M:%S", gmtime())
    callbacks = []
    if reduce_lr_on_plateau:
        callback_lr_plateau = keras.callbacks.ReduceLROnPlateau(
            monitor='val_ctc_loss', factor=0.1, patience=4, verbose=1)
        callbacks.append(callback_lr_plateau)
    if levenshtein:
        callback_levenshtein = LevenshteinCallback(test_data_generator,
                                                   size=10)
        callbacks.append(callback_levenshtein)
    if tensorboard:
        log_path = os.path.join("logs", str_date_time)
        callback_tensorboard = keras.callbacks.TensorBoard(
            log_dir=log_path,
            batch_size=1,
        )
        callbacks.append(callback_tensorboard)
    if True:
        if not os.path.exists("checkpoints"):
            os.mkdir("checkpoints")
        checkpoints_path = os.path.join("checkpoints", str_date_time + '.hdf5')
        callback_checkpoint = keras.callbacks.ModelCheckpoint(
            checkpoints_path,
            monitor='val_loss',
            verbose=1,
            save_best_only=True,
            save_weights_only=True)
        callbacks.append(callback_checkpoint)

    # trainer
    trainer = Trainer(model,
                      train_data_generator,
                      test_data_generator,
                      lr=lr,
                      epochs=epochs,
                      steps_per_epochs=steps_per_epochs,
                      callbacks=callbacks)

    trainer.train()