def __call__(self, batch): images, labels = zip(*batch) imgH = self.imgH imgW = self.imgW images = [ resizePadding(image, self.imgW, self.imgH) for image in images ] images = torch.cat([t.unsqueeze(0) for t in images], 0) return images, labels
def predict(self, image): image = Image.fromarray(image) image = utils.resizePadding(image, None, self.height) image = image.view(1, *image.size()) image = Variable(image) image = image.to(self.device) preds = self.model(image) preds = preds.squeeze(1) sim_pred, sent_prob = self.decode(preds) return sim_pred, sent_prob
def predicts(self, images): images = [Image.fromarray(image) for image in images] sizes = [image.size for image in images] maxW = utils.maxWidth(sizes, self.height) images = [ utils.resizePadding(image, maxW, self.height) for image in images ] image = torch.cat([t.unsqueeze(0) for t in images], 0) image = image.to(self.device) preds = self.model(image) rs = [] for i in range(len(images)): sim_pred, sent_prob = self.decode(preds[:, i, :]) rs.append((sim_pred, sent_prob)) return rs
opt = parser.parse_args() alphabet = open(opt.alphabet).read().rstrip() nclass = len(alphabet) + 1 nc = 3 model = crnn.CRNN(opt.imgH, nc, nclass, 256) if torch.cuda.is_available(): model = model.cuda() print('loading pretrained model from %s' % opt.model) model.load_state_dict(torch.load(opt.model, map_location='cpu')) converter = strLabelConverter(alphabet, ignore_case=False) image = Image.open(opt.img).convert('RGB') image = resizePadding(image, opt.imgW, opt.imgH) if torch.cuda.is_available(): image = image.cuda() image = image.view(1, *image.size()) image = Variable(image) model.eval() start_time = time.time() preds = model(image) values, prob = softmax(preds, dim=-1).max(2) preds_idx = (prob > 0).nonzero() sent_prob = values[preds_idx[:, 0], preds_idx[:, 1]].mean().item()