示例#1
0
 def setup(self, _):
     self.tr_ds = TextImageFromTextTableDataset(
         self.root / "tr.gt",
         self.root / "tr",
         img_transform=self.train_transforms,
     )
     self.va_ds = TextImageFromTextTableDataset(
         self.root / "va.gt",
         self.root / "va",
         img_transform=self.train_transforms,
     )
示例#2
0
 def process_dataset(filename):
     dataset = TextImageFromTextTableDataset(
         filename, args.img_dir, img_transform=ImageToTensor())
     data_loader = DataLoader(dataset)
     phocs = []
     samples = []
     for sample in tqdm(data_loader):
         phocs.append(process_image(sample['img']))
         samples.append(sample['id'][0])
     return np.concatenate(phocs), samples
def test_text_image_from_text_table_dataset(tmpdir, monkeypatch):
    monkeypatch.setattr(ImageDataset, "__getitem__", lambda *_: {"img": None})
    f = tmpdir / "foo"
    f.write(None)
    txt = "12 3 4"
    dataset = TextImageFromTextTableDataset([f"{f} {txt}"])
    assert len(dataset) == 1
    assert list(dataset[0].keys()) == ["img", "txt", "id"]
    assert dataset[0]["id"] == f
    assert dataset[0]["txt"] == txt
 def process_dataset(filename):
     dataset = TextImageFromTextTableDataset(
         filename, args.img_dir, img_transform=ImageToTensor()
     )
     data_loader = DataLoader(dataset)
     phocs = []
     samples = []
     for sample in tqdm(data_loader):
         phocs.append(process_image(sample["img"]))
         samples.append(sample["id"][0])
     return torch.stack(phocs).type("torch.DoubleTensor"), samples
示例#5
0
 def setup(self, stage: Optional[str] = None):
     if stage == "fit":
         tr_img_transform, txt_transform = self.train_transforms
         self.tr_ds = TextImageFromTextTableDataset(
             self.tr_txt_table,
             self.img_dirs,
             img_transform=tr_img_transform,
             txt_transform=txt_transform,
         )
         self.va_ds = TextImageFromTextTableDataset(
             self.va_txt_table,
             self.img_dirs,
             img_transform=self.val_transforms,
             txt_transform=txt_transform,
         )
     elif stage == "test":
         self.te_ds = ImageFromListDataset(
             self.te_img_list,
             img_dirs=self.img_dirs,
             img_transform=self.test_transforms,
         )
     else:
         raise ValueError
示例#6
0
    # Load checkpoint
    ckpt = torch.load(args.checkpoint)
    if 'model' in ckpt and 'optimizer' in ckpt:
        model.load_state_dict(ckpt['model'])
    else:
        model.load_state_dict(ckpt)

    # Ensure parameters are in the correct device
    model.eval()
    if args.gpu > 0:
        model = model.cuda(args.gpu - 1)
    else:
        model = model.cpu()

    dataset = TextImageFromTextTableDataset(args.gt_file,
                                            args.img_dir,
                                            img_transform=ImageToTensor(),
                                            txt_transform=TextToTensor(syms))
    dataset_loader = ImageDataLoader(dataset=dataset,
                                     image_channels=1,
                                     num_workers=8)

    with torch.cuda.device(args.gpu - 1):
        for batch in dataset_loader:
            if args.gpu > 0:
                x = batch['img'].data.cuda(args.gpu - 1)
            else:
                x = batch['img'].data.cpu()
            y = model(torch.autograd.Variable(x)).data
            if args.add_softmax:
                y = torch.nn.functional.log_softmax(y, dim=-1)
            dump_output_matrix(batch['id'], [y], args.output,
示例#7
0
    ckpt = torch.load(args.checkpoint)
    if "model" in ckpt and "optimizer" in ckpt:
        model.load_state_dict(ckpt["model"])
    else:
        model.load_state_dict(ckpt)

    # Ensure parameters are in the correct device
    model.eval()
    if args.gpu > 0:
        model = model.cuda(args.gpu - 1)
    else:
        model = model.cpu()

    dataset = TextImageFromTextTableDataset(
        args.gt_file,
        args.img_dir,
        img_transform=transforms.vision.ToTensor(),
        txt_transform=transforms.text.ToTensor(syms),
    )
    dataset_loader = ImageDataLoader(dataset=dataset,
                                     image_channels=1,
                                     num_workers=8)

    import sys

    with torch.cuda.device(args.gpu - 1):
        for batch in dataset_loader:
            if args.gpu > 0:
                x = batch["img"].data.cuda(args.gpu - 1)
            else:
                x = batch["img"].data.cpu()
            y = model(torch.autograd.Variable(x))
    add_argument('model_checkpoint', help='Filepath of the model checkpoint')
    add_argument('output',
                 type=argparse.FileType('w'),
                 help='Filepath of the output file')
    args = args()

    syms = laia.utils.SymbolsTable(args.syms)
    phoc_size = sum(args.phoc_levels) * len(syms)
    model = build_dortmund_model(phoc_size)
    log.info('Model has {} parameters',
             sum(param.data.numel() for param in model.parameters()))
    model.load_state_dict(torch.load(args.model_checkpoint))
    model = model.cuda(args.gpu - 1) if args.gpu > 0 else model.cpu()
    model.eval()

    queries_dataset = TextImageFromTextTableDataset(
        args.queries, args.img_dir, img_transform=ImageToTensor())
    queries_loader = DataLoader(queries_dataset)

    def process_image(sample):
        sample = Variable(sample, requires_grad=False)
        sample = sample.cuda(args.gpu - 1) if args.gpu > 0 else sample.cpu()
        phoc = torch.nn.functional.logsigmoid(model(sample))
        return phoc.data.cpu().squeeze()

    # Predict PHOC vectors
    phocs = []
    labels = []
    samples = []
    for query in tqdm(queries_loader):
        phocs.append(process_image(query['img']))
        labels.append(query['txt'][0])
示例#9
0
    add_argument('gt_txt', help='Transcription of each image')
    add_argument('model_checkpoint', help='Filepath of the model checkpoint')
    add_argument('output', type=argparse.FileType('w'),
                 help='Filepath of the output file')
    args = args()

    syms = laia.utils.SymbolsTable(args.syms)
    phoc_size = sum(args.phoc_levels) * len(syms)
    model = build_dortmund_model(phoc_size)
    log.info('Model has {} parameters',
             sum(param.data.numel() for param in model.parameters()))
    model.load_state_dict(torch.load(args.model_checkpoint))
    model = model.cuda(args.gpu - 1) if args.gpu > 0 else model.cpu()
    model.eval()

    dataset = TextImageFromTextTableDataset(
        args.gt_txt, args.img_dir, img_transform=ImageToTensor())
    loader = DataLoader(dataset)

    def process_image(sample):
        sample = Variable(sample, requires_grad=False)
        sample = sample.cuda(args.gpu - 1) if args.gpu > 0 else sample.cpu()
        phoc = torch.nn.functional.sigmoid(model(sample))
        return phoc.data.cpu().numpy()

    # Predict PHOC vectors
    for query in tqdm(loader):
        phoc = process_image(query['img'])
        print(query['id'][0], file=args.output, end='')
        for j in range(phoc.shape[1]):
          print(' %.12g' % phoc[0, j], file=args.output, end='')
        print('', file=args.output)
    import laia.random
    from laia.data import TextImageFromTextTableDataset, ImageDataLoader
    from laia.plugins.arguments import add_argument, add_defaults, args

    add_defaults("seed")
    add_argument("--num_images",
                 type=int,
                 help="Show only this number of images")
    add_argument("--shuffle",
                 action="store_true",
                 help="Shuffle the list of images")
    add_argument("img_dir", help="Directory containing images")
    add_argument("txt_table", help="Transcriptions of each image")
    args = args()
    laia.random.manual_seed(args.seed)

    dataset = TextImageFromTextTableDataset(
        args.txt_table, args.img_dir, img_transform=DortmundImageToTensor())
    dataset_loader = ImageDataLoader(dataset=dataset,
                                     image_channels=1,
                                     shuffle=args.shuffle)

    for i, batch in enumerate(dataset_loader, 1):
        if args.num_images and i > args.num_images:
            break
        # Note: batch['img'] is a PaddedTensor
        img = batch["img"].data.squeeze().numpy()
        imgplt = plt.imshow(img, cmap="gray")
        imgplt.axes.set_title(" ".join(batch["txt"][0]))
        plt.show()
    model = ModelLoader(args.train_path,
                        filename=args.model_filename,
                        device=device).load()
    if model is None:
        log.error("Could not find the model")
        exit(1)
    state = CheckpointLoader(device=device).load_by(
        os.path.join(args.train_path, args.checkpoint))
    model.load_state_dict(state if args.source ==
                          "model" else Experiment.get_model_state_dict(state))
    model = model.to(device)
    model.eval()

    dataset = TextImageFromTextTableDataset(
        args.txt_table,
        args.img_dirs,
        img_transform=ImageToTensor(),
        txt_transform=transforms.text.ToTensor(syms),
    )

    dataset_loader = ImageDataLoader(
        dataset=dataset,
        image_channels=1,
        batch_size=args.batch_size,
        num_workers=multiprocessing.cpu_count(),
    )

    batch_input_fn = ImageFeeder(device=device,
                                 parent_feeder=ItemFeeder("img"))
    batch_target_fn = ItemFeeder("txt")
    batch_id_fn = ItemFeeder("id")
示例#12
0
        optimizer = SGD(model.parameters(),
                        lr=args.learning_rate,
                        momentum=args.momentum)
        parameters = {
            'model': model,
            'criterion': None,  # Set automatically by HtrEngineWrapper
            'optimizer': optimizer,
            'batch_target_fn': ItemFeeder('txt'),
            'progress_bar': 'Train' if args.show_progress_bar else None
        }
        trainer = Trainer(**parameters)
        TrainerSaver(args.train_path).save(Trainer, **parameters)

    tr_ds = TextImageFromTextTableDataset(
        args.tr_txt_table,
        args.img_dir,
        img_transform=DortmundImageToTensor(fixed_height=args.fixed_height),
        txt_transform=TextToTensor(syms))

    tr_ds_loader = ImageDataLoader(
        dataset=tr_ds,
        image_channels=1,
        batch_size=args.batch_size,
        num_workers=mp.cpu_count(),
        shuffle=not bool(args.train_samples_per_epoch),
        sampler=FixedSizeSampler(tr_ds, args.train_samples_per_epoch)
        if args.train_samples_per_epoch else None)

    # Set all these separately because they might change between executions
    trainer.iterations_per_update = args.iterations_per_update
    trainer.set_data_loader(tr_ds_loader)
def test_text_image_from_text_table_dataset_empty():
    dataset = TextImageFromTextTableDataset([])
    assert len(dataset) == 0