def setup(self, _): self.tr_ds = TextImageFromTextTableDataset( self.root / "tr.gt", self.root / "tr", img_transform=self.train_transforms, ) self.va_ds = TextImageFromTextTableDataset( self.root / "va.gt", self.root / "va", img_transform=self.train_transforms, )
def process_dataset(filename): dataset = TextImageFromTextTableDataset( filename, args.img_dir, img_transform=ImageToTensor()) data_loader = DataLoader(dataset) phocs = [] samples = [] for sample in tqdm(data_loader): phocs.append(process_image(sample['img'])) samples.append(sample['id'][0]) return np.concatenate(phocs), samples
def test_text_image_from_text_table_dataset(tmpdir, monkeypatch): monkeypatch.setattr(ImageDataset, "__getitem__", lambda *_: {"img": None}) f = tmpdir / "foo" f.write(None) txt = "12 3 4" dataset = TextImageFromTextTableDataset([f"{f} {txt}"]) assert len(dataset) == 1 assert list(dataset[0].keys()) == ["img", "txt", "id"] assert dataset[0]["id"] == f assert dataset[0]["txt"] == txt
def process_dataset(filename): dataset = TextImageFromTextTableDataset( filename, args.img_dir, img_transform=ImageToTensor() ) data_loader = DataLoader(dataset) phocs = [] samples = [] for sample in tqdm(data_loader): phocs.append(process_image(sample["img"])) samples.append(sample["id"][0]) return torch.stack(phocs).type("torch.DoubleTensor"), samples
def setup(self, stage: Optional[str] = None): if stage == "fit": tr_img_transform, txt_transform = self.train_transforms self.tr_ds = TextImageFromTextTableDataset( self.tr_txt_table, self.img_dirs, img_transform=tr_img_transform, txt_transform=txt_transform, ) self.va_ds = TextImageFromTextTableDataset( self.va_txt_table, self.img_dirs, img_transform=self.val_transforms, txt_transform=txt_transform, ) elif stage == "test": self.te_ds = ImageFromListDataset( self.te_img_list, img_dirs=self.img_dirs, img_transform=self.test_transforms, ) else: raise ValueError
# Load checkpoint ckpt = torch.load(args.checkpoint) if 'model' in ckpt and 'optimizer' in ckpt: model.load_state_dict(ckpt['model']) else: model.load_state_dict(ckpt) # Ensure parameters are in the correct device model.eval() if args.gpu > 0: model = model.cuda(args.gpu - 1) else: model = model.cpu() dataset = TextImageFromTextTableDataset(args.gt_file, args.img_dir, img_transform=ImageToTensor(), txt_transform=TextToTensor(syms)) dataset_loader = ImageDataLoader(dataset=dataset, image_channels=1, num_workers=8) with torch.cuda.device(args.gpu - 1): for batch in dataset_loader: if args.gpu > 0: x = batch['img'].data.cuda(args.gpu - 1) else: x = batch['img'].data.cpu() y = model(torch.autograd.Variable(x)).data if args.add_softmax: y = torch.nn.functional.log_softmax(y, dim=-1) dump_output_matrix(batch['id'], [y], args.output,
ckpt = torch.load(args.checkpoint) if "model" in ckpt and "optimizer" in ckpt: model.load_state_dict(ckpt["model"]) else: model.load_state_dict(ckpt) # Ensure parameters are in the correct device model.eval() if args.gpu > 0: model = model.cuda(args.gpu - 1) else: model = model.cpu() dataset = TextImageFromTextTableDataset( args.gt_file, args.img_dir, img_transform=transforms.vision.ToTensor(), txt_transform=transforms.text.ToTensor(syms), ) dataset_loader = ImageDataLoader(dataset=dataset, image_channels=1, num_workers=8) import sys with torch.cuda.device(args.gpu - 1): for batch in dataset_loader: if args.gpu > 0: x = batch["img"].data.cuda(args.gpu - 1) else: x = batch["img"].data.cpu() y = model(torch.autograd.Variable(x))
add_argument('model_checkpoint', help='Filepath of the model checkpoint') add_argument('output', type=argparse.FileType('w'), help='Filepath of the output file') args = args() syms = laia.utils.SymbolsTable(args.syms) phoc_size = sum(args.phoc_levels) * len(syms) model = build_dortmund_model(phoc_size) log.info('Model has {} parameters', sum(param.data.numel() for param in model.parameters())) model.load_state_dict(torch.load(args.model_checkpoint)) model = model.cuda(args.gpu - 1) if args.gpu > 0 else model.cpu() model.eval() queries_dataset = TextImageFromTextTableDataset( args.queries, args.img_dir, img_transform=ImageToTensor()) queries_loader = DataLoader(queries_dataset) def process_image(sample): sample = Variable(sample, requires_grad=False) sample = sample.cuda(args.gpu - 1) if args.gpu > 0 else sample.cpu() phoc = torch.nn.functional.logsigmoid(model(sample)) return phoc.data.cpu().squeeze() # Predict PHOC vectors phocs = [] labels = [] samples = [] for query in tqdm(queries_loader): phocs.append(process_image(query['img'])) labels.append(query['txt'][0])
add_argument('gt_txt', help='Transcription of each image') add_argument('model_checkpoint', help='Filepath of the model checkpoint') add_argument('output', type=argparse.FileType('w'), help='Filepath of the output file') args = args() syms = laia.utils.SymbolsTable(args.syms) phoc_size = sum(args.phoc_levels) * len(syms) model = build_dortmund_model(phoc_size) log.info('Model has {} parameters', sum(param.data.numel() for param in model.parameters())) model.load_state_dict(torch.load(args.model_checkpoint)) model = model.cuda(args.gpu - 1) if args.gpu > 0 else model.cpu() model.eval() dataset = TextImageFromTextTableDataset( args.gt_txt, args.img_dir, img_transform=ImageToTensor()) loader = DataLoader(dataset) def process_image(sample): sample = Variable(sample, requires_grad=False) sample = sample.cuda(args.gpu - 1) if args.gpu > 0 else sample.cpu() phoc = torch.nn.functional.sigmoid(model(sample)) return phoc.data.cpu().numpy() # Predict PHOC vectors for query in tqdm(loader): phoc = process_image(query['img']) print(query['id'][0], file=args.output, end='') for j in range(phoc.shape[1]): print(' %.12g' % phoc[0, j], file=args.output, end='') print('', file=args.output)
import laia.random from laia.data import TextImageFromTextTableDataset, ImageDataLoader from laia.plugins.arguments import add_argument, add_defaults, args add_defaults("seed") add_argument("--num_images", type=int, help="Show only this number of images") add_argument("--shuffle", action="store_true", help="Shuffle the list of images") add_argument("img_dir", help="Directory containing images") add_argument("txt_table", help="Transcriptions of each image") args = args() laia.random.manual_seed(args.seed) dataset = TextImageFromTextTableDataset( args.txt_table, args.img_dir, img_transform=DortmundImageToTensor()) dataset_loader = ImageDataLoader(dataset=dataset, image_channels=1, shuffle=args.shuffle) for i, batch in enumerate(dataset_loader, 1): if args.num_images and i > args.num_images: break # Note: batch['img'] is a PaddedTensor img = batch["img"].data.squeeze().numpy() imgplt = plt.imshow(img, cmap="gray") imgplt.axes.set_title(" ".join(batch["txt"][0])) plt.show()
model = ModelLoader(args.train_path, filename=args.model_filename, device=device).load() if model is None: log.error("Could not find the model") exit(1) state = CheckpointLoader(device=device).load_by( os.path.join(args.train_path, args.checkpoint)) model.load_state_dict(state if args.source == "model" else Experiment.get_model_state_dict(state)) model = model.to(device) model.eval() dataset = TextImageFromTextTableDataset( args.txt_table, args.img_dirs, img_transform=ImageToTensor(), txt_transform=transforms.text.ToTensor(syms), ) dataset_loader = ImageDataLoader( dataset=dataset, image_channels=1, batch_size=args.batch_size, num_workers=multiprocessing.cpu_count(), ) batch_input_fn = ImageFeeder(device=device, parent_feeder=ItemFeeder("img")) batch_target_fn = ItemFeeder("txt") batch_id_fn = ItemFeeder("id")
optimizer = SGD(model.parameters(), lr=args.learning_rate, momentum=args.momentum) parameters = { 'model': model, 'criterion': None, # Set automatically by HtrEngineWrapper 'optimizer': optimizer, 'batch_target_fn': ItemFeeder('txt'), 'progress_bar': 'Train' if args.show_progress_bar else None } trainer = Trainer(**parameters) TrainerSaver(args.train_path).save(Trainer, **parameters) tr_ds = TextImageFromTextTableDataset( args.tr_txt_table, args.img_dir, img_transform=DortmundImageToTensor(fixed_height=args.fixed_height), txt_transform=TextToTensor(syms)) tr_ds_loader = ImageDataLoader( dataset=tr_ds, image_channels=1, batch_size=args.batch_size, num_workers=mp.cpu_count(), shuffle=not bool(args.train_samples_per_epoch), sampler=FixedSizeSampler(tr_ds, args.train_samples_per_epoch) if args.train_samples_per_epoch else None) # Set all these separately because they might change between executions trainer.iterations_per_update = args.iterations_per_update trainer.set_data_loader(tr_ds_loader)
def test_text_image_from_text_table_dataset_empty(): dataset = TextImageFromTextTableDataset([]) assert len(dataset) == 0