示例#1
0
def test_filename(tmpdir):
    with tmpdir.as_cwd():
        filepath = "test"
        log.config(filepath=filepath)
        log.info("test!")
        log.clear()
    assert tmpdir.join(filepath).exists()
示例#2
0
def common_main(args: Dict[str, Any]) -> Dict[str, Any]:
    del args["config"]
    # configure logging
    logging = args.pop("logging")
    if logging["filepath"] is not None:
        logging["filepath"] = join(
            args["common"].experiment_dirpath, logging["filepath"]
        )
    log.config(**logging)
    log.info(f"Arguments: {args}")
    versions = get_installed_versions()
    if versions:
        log.info("Installed:", versions)
    return args
示例#3
0
def run(
    syms: str,
    fixed_input_height: Optional[NonNegativeInt] = 0,
    adaptive_pooling: str = "avgpool-16",
    common: CommonArgs = CommonArgs(),
    crnn: CreateCRNNArgs = CreateCRNNArgs(),
    save_model: bool = False,
) -> LaiaCRNN:
    seed_everything(common.seed)

    crnn.num_output_labels = len(SymbolsTable(syms))
    if crnn is not None:
        if fixed_input_height:
            conv_output_size = LaiaCRNN.get_conv_output_size(
                size=(fixed_input_height, fixed_input_height),
                cnn_kernel_size=crnn.cnn_kernel_size,
                cnn_stride=crnn.cnn_stride,
                cnn_dilation=crnn.cnn_dilation,
                cnn_poolsize=crnn.cnn_poolsize,
            )
            fixed_size_after_conv = conv_output_size[1 if crnn.
                                                     vertical_text else 0]
            assert (fixed_size_after_conv >
                    0), "The image size is too small for the CNN architecture"
            crnn.image_sequencer = f"none-{fixed_size_after_conv}"
        else:
            crnn.image_sequencer = adaptive_pooling
        crnn.rnn_type = getattr(nn, crnn.rnn_type)
        crnn.cnn_activation = [getattr(nn, act) for act in crnn.cnn_activation]

    model = LaiaCRNN(**vars(crnn))
    log.info(
        "Model has {} parameters",
        sum(param.numel() for param in model.parameters()),
    )
    if save_model:
        ModelSaver(common.train_path,
                   common.model_filename).save(LaiaCRNN, **vars(crnn))
    return model
                 default=[1, 2, 3, 4, 5],
                 nargs='+',
                 help='PHOC levels used to encode the transcript')
    add_argument('syms', help='Symbols table mapping from strings to integers')
    add_argument('img_dir', help='Directory containing word images')
    add_argument('queries', help='Transcription of each query image')
    add_argument('model_checkpoint', help='Filepath of the model checkpoint')
    add_argument('output',
                 type=argparse.FileType('w'),
                 help='Filepath of the output file')
    args = args()

    syms = laia.utils.SymbolsTable(args.syms)
    phoc_size = sum(args.phoc_levels) * len(syms)
    model = build_dortmund_model(phoc_size)
    log.info('Model has {} parameters',
             sum(param.data.numel() for param in model.parameters()))
    model.load_state_dict(torch.load(args.model_checkpoint))
    model = model.cuda(args.gpu - 1) if args.gpu > 0 else model.cpu()
    model.eval()

    queries_dataset = TextImageFromTextTableDataset(
        args.queries, args.img_dir, img_transform=ImageToTensor())
    queries_loader = DataLoader(queries_dataset)

    def process_image(sample):
        sample = Variable(sample, requires_grad=False)
        sample = sample.cuda(args.gpu - 1) if args.gpu > 0 else sample.cpu()
        phoc = torch.nn.functional.logsigmoid(model(sample))
        return phoc.data.cpu().squeeze()

    # Predict PHOC vectors
示例#5
0
if __name__ == '__main__':
    add_defaults('gpu')
    add_argument('--phoc_levels', type=int, default=[1, 2, 3, 4, 5], nargs='+',
                 help='PHOC levels used to encode the transcript')
    add_argument('syms', help='Symbols table mapping from strings to integers')
    add_argument('img_dir', help='Directory containing word images')
    add_argument('gt_txt', help='Transcription of each image')
    add_argument('model_checkpoint', help='Filepath of the model checkpoint')
    add_argument('output', type=argparse.FileType('w'),
                 help='Filepath of the output file')
    args = args()

    syms = laia.utils.SymbolsTable(args.syms)
    phoc_size = sum(args.phoc_levels) * len(syms)
    model = build_dortmund_model(phoc_size)
    log.info('Model has {} parameters',
             sum(param.data.numel() for param in model.parameters()))
    model.load_state_dict(torch.load(args.model_checkpoint))
    model = model.cuda(args.gpu - 1) if args.gpu > 0 else model.cpu()
    model.eval()

    dataset = TextImageFromTextTableDataset(
        args.gt_txt, args.img_dir, img_transform=ImageToTensor())
    loader = DataLoader(dataset)

    def process_image(sample):
        sample = Variable(sample, requires_grad=False)
        sample = sample.cuda(args.gpu - 1) if args.gpu > 0 else sample.cpu()
        phoc = torch.nn.functional.sigmoid(model(sample))
        return phoc.data.cpu().numpy()

    # Predict PHOC vectors
示例#6
0
                 default=[1, 2, 3, 4, 5],
                 nargs='+',
                 help='PHOC levels used to encode the transcript')
    add_argument('--distractors',
                 help='Transcription of each distractor image')
    add_argument('syms', help='Symbols table mapping from strings to integers')
    add_argument('img_dir', help='Directory containing word images')
    add_argument('queries', help='Transcription of each query image')
    add_argument('model_checkpoint', help='Filepath of the model checkpoint')
    add_argument('output', type=FileType('w'), help='Output file')
    args = args()

    syms = laia.utils.SymbolsTable(args.syms)
    phoc_size = sum(args.phoc_levels) * len(syms)
    model = build_dortmund_model(phoc_size)
    log.info('Model has {} parameters',
             sum(param.data.numel() for param in model.parameters()))
    model.load_state_dict(torch.load(args.model_checkpoint))
    model = model.cuda(args.gpu - 1) if args.gpu > 0 else model.cpu()
    model.eval()

    def process_image(sample):
        sample = Variable(sample, requires_grad=False)
        sample = sample.cuda(args.gpu - 1) if args.gpu > 0 else sample.cpu()
        phoc = torch.nn.functional.sigmoid(model(sample))
        return phoc.data.cpu().numpy()

    def process_dataset(filename):
        dataset = TextImageFromTextTableDataset(filename,
                                                args.img_dir,
                                                img_transform=ImageToTensor())
        data_loader = DataLoader(dataset)
示例#7
0
def run(
        syms: str,
        img_dirs: List[str],
        tr_txt_table: str,
        va_txt_table: str,
        common: CommonArgs = CommonArgs(),
        train: TrainArgs = TrainArgs(),
        optimizer: OptimizerArgs = OptimizerArgs(),
        scheduler: SchedulerArgs = SchedulerArgs(),
        data: DataArgs = DataArgs(),
        trainer: TrainerArgs = TrainerArgs(),
):
    pl.seed_everything(common.seed)

    loader = ModelLoader(common.train_path,
                         filename=common.model_filename,
                         device="cpu")
    # maybe load a checkpoint
    checkpoint = None
    if train.resume:
        checkpoint = loader.prepare_checkpoint(common.checkpoint,
                                               common.experiment_dirpath,
                                               common.monitor)
        trainer.max_epochs = torch.load(checkpoint)["epoch"] + train.resume
        log.info(f'Using checkpoint "{checkpoint}"')
        log.info(f"Max epochs set to {trainer.max_epochs}")

    # load the non-pytorch_lightning model
    model = loader.load()
    assert (
        model is not None
    ), "Could not find the model. Have you run pylaia-htr-create-model?"

    # prepare the symbols
    syms = SymbolsTable(syms)
    for d in train.delimiters:
        assert d in syms, f'The delimiter "{d}" is not available in the symbols file'

    # prepare the engine
    engine_module = HTREngineModule(
        model,
        [syms[d] for d in train.delimiters],
        optimizer=optimizer,
        scheduler=scheduler,
        batch_input_fn=Compose([ItemFeeder("img"),
                                ImageFeeder()]),
        batch_target_fn=ItemFeeder("txt"),
        batch_id_fn=ItemFeeder("id"),  # Used to print image ids on exception
    )

    # prepare the data
    data_module = DataModule(
        syms=syms,
        img_dirs=img_dirs,
        tr_txt_table=tr_txt_table,
        va_txt_table=va_txt_table,
        batch_size=data.batch_size,
        color_mode=data.color_mode,
        shuffle_tr=not bool(trainer.limit_train_batches),
        augment_tr=train.augment_training,
        stage="fit",
    )

    # prepare the training callbacks
    # TODO: save on lowest_va_wer and every k epochs https://github.com/PyTorchLightning/pytorch-lightning/issues/2908
    checkpoint_callback = pl.callbacks.ModelCheckpoint(
        dirpath=common.experiment_dirpath,
        filename="{epoch}-lowest_" + common.monitor,
        monitor=common.monitor,
        verbose=True,
        save_top_k=train.checkpoint_k,
        mode="min",
        save_last=True,
    )
    checkpoint_callback.CHECKPOINT_NAME_LAST = "{epoch}-last"
    early_stopping_callback = pl.callbacks.EarlyStopping(
        monitor=common.monitor,
        patience=train.early_stopping_patience,
        verbose=True,
        mode="min",
        strict=False,  # training_step may return None
    )
    callbacks = [
        ProgressBar(refresh_rate=trainer.progress_bar_refresh_rate),
        checkpoint_callback,
        early_stopping_callback,
        checkpoint_callback,
    ]
    if train.gpu_stats:
        callbacks.append(ProgressBarGPUStats())
    if scheduler.active:
        callbacks.append(LearningRate(logging_interval="epoch"))

    # prepare the trainer
    trainer = pl.Trainer(
        default_root_dir=common.train_path,
        resume_from_checkpoint=checkpoint,
        callbacks=callbacks,
        logger=EpochCSVLogger(common.experiment_dirpath),
        checkpoint_callback=True,
        **vars(trainer),
    )

    # train!
    trainer.fit(engine_module, datamodule=data_module)

    # training is over
    if early_stopping_callback.stopped_epoch:
        log.info(
            "Early stopping triggered after epoch"
            f" {early_stopping_callback.stopped_epoch + 1} (waited for"
            f" {early_stopping_callback.wait_count} epochs). The best score was"
            f" {early_stopping_callback.best_score}")
    log.info(f"Model has been trained for {trainer.current_epoch + 1} epochs"
             f" ({trainer.global_step + 1} steps)")
    log.info(
        f"Best {checkpoint_callback.monitor}={checkpoint_callback.best_model_score} "
        f"obtained with model={checkpoint_callback.best_model_path}")
示例#8
0
                 nargs='+',
                 help='PHOC levels used to encode the transcript')
    add_argument('syms', help='Symbols table mapping from strings to integers')
    add_argument('img_dir', help='Directory containing word images')
    add_argument('candidates', help='Transcription of each candidate image')
    add_argument('queries', help='Transcription of each query image')
    add_argument('model_checkpoint', help='Filepath of the model checkpoint')
    add_argument('output',
                 type=argparse.FileType('w'),
                 help='Filepath of the output file')
    args = args()

    syms = laia.utils.SymbolsTable(args.syms)
    phoc_size = sum(args.phoc_levels) * len(syms)
    model = build_dortmund_model(phoc_size)
    log.info('Model has {} parameters',
             sum(param.data.numel() for param in model.parameters()))
    model.load_state_dict(torch.load(args.model_checkpoint))
    model = model.cuda(args.gpu - 1) if args.gpu > 0 else model.cpu()
    model.eval()

    candidates_dataset = TextImageFromTextTableDataset(
        args.candidates, args.img_dir, img_transform=ImageToTensor())
    candidates_loader = DataLoader(candidates_dataset)

    queries_dataset = TextImageFromTextTableDataset(
        args.queries, args.img_dir, img_transform=ImageToTensor())
    queries_loader = DataLoader(queries_dataset)

    def process_image(sample):
        sample = Variable(sample, requires_grad=False)
        sample = sample.cuda(args.gpu - 1) if args.gpu > 0 else sample.cpu()
示例#9
0
def test_filepath(tmpdir):
    filepath = tmpdir / "test"
    log.config(filepath=filepath)
    log.info("test!")
    log.clear()
    assert filepath.exists()
示例#10
0
                 help='PHOC levels used to encode the transcript')
    add_argument('--distractors',
                 help='Transcription of each distractor image')
    add_argument('syms', help='Symbols table mapping from strings to integers')
    add_argument('img_dir', help='Directory containing word images')
    add_argument('queries', help='Transcription of each query image')
    add_argument('model_checkpoint', help='Filepath of the model checkpoint')
    add_argument('output',
                 type=argparse.FileType('w'),
                 help='Filepath of the output file')
    args = args()

    syms = laia.utils.SymbolsTable(args.syms)
    phoc_size = sum(args.phoc_levels) * len(syms)
    model = build_dortmund_model(phoc_size)
    log.info('Model has {} parameters',
             sum(param.data.numel() for param in model.parameters()))
    model.load_state_dict(torch.load(args.model_checkpoint))
    model = model.cuda(args.gpu - 1) if args.gpu > 0 else model.cpu()
    model.eval()

    def process_image(sample):
        sample = Variable(sample, requires_grad=False)
        sample = sample.cuda(args.gpu - 1) if args.gpu > 0 else sample.cpu()
        phoc = torch.nn.functional.logsigmoid(model(sample))
        return phoc.data.cpu().squeeze()

    def process_dataset(filename):
        dataset = TextImageFromTextTableDataset(filename,
                                                args.img_dir,
                                                img_transform=ImageToTensor())
        data_loader = DataLoader(dataset)