def detect_plate(wpod, vocab, ocr, raw, dims, threshold, plt_hw, beam, beam_size, context): h = raw.shape[0] w = raw.shape[1] f = min(288 * max(h, w) / min(h, w), 608) / min(h, w) ts = time.time() img = mx.image.imresize(raw, int(w * f) + (0 if w % 16 == 0 else 16 - w % 16), int(h * f) + (0 if h % 16 == 0 else 16 - h % 16)) x = color_normalize(img).transpose((2, 0, 1)).expand_dims(0) y = wpod(x.as_in_context(context)) probs = y[0, :, :, 0] affines = y[0, :, :, 2:] labels = plate_labels(img, probs, affines, dims, 16, threshold) plates = reconstruct_plates(raw, [pts for pts, _ in labels], (plt_hw[1], plt_hw[0])) print("wpod profiling: %f" % (time.time() - ts)) plt.subplot(math.ceil((len(plates) + 2) / 2), 2, 1) visualize(img, [(pts.reshape((-1)).asnumpy().tolist(), str(prob)) for pts, prob in labels]) plt.subplot(math.ceil((len(plates) + 2) / 2), 2, 2) visualize(probs > threshold) for i, plate in enumerate(plates): plt.subplot(math.ceil((len(plates) + 2) / 2), 2, i + 3) visualize(plate) print("plate[%d]:" % i) recognize_plate(vocab, ocr, plate, beam, beam_size, context) plt.show()
def _recognize_plate(self, img): x = color_normalize(img).transpose((2, 0, 1)).expand_dims(0) enc_y, self_attn = self.ocr.encode(x.as_in_context(self.context)) sequences = [([self.vocab.char2idx("<GO>")], 0.0)] while True: candidates = [] for seq, score in sequences: if seq[-1] == self.vocab.char2idx( "<EOS>") or len(seq) >= self.seq_len + 2: candidates.append((seq, score)) else: tgt = mx.nd.array(seq, ctx=self.context).reshape((1, -1)) tgt_len = mx.nd.array([len(seq)], ctx=self.context) y, context_attn = self.ocr.decode(tgt, tgt_len, enc_y) probs = mx.nd.softmax(y, axis=2) beam = probs[0, -1].topk(k=self.beam_size, ret_typ="both") for i in range(self.beam_size): candidates.append( (seq + [int(beam[1][i].asscalar())], score + math.log(beam[0][i].asscalar()))) if len(candidates) <= len(sequences): break sequences = sorted(candidates, key=lambda tup: tup[1], reverse=True)[:self.beam_size] scores = mx.nd.array([score for _, score in sequences], ctx=self.context) probs = mx.nd.softmax(scores) return "".join([ self.vocab.idx2char(token) for token in sequences[0][0][1:-1] ]), probs[0].asscalar()
def __call__(self, data): if data: img = load_image(data[0]) img, pts = augment_sample(img, data[1], self._dims, 0.0) img = reconstruct_plates(img, [mx.nd.array(pts).reshape((2, 4))], (self._out_hw[1], self._out_hw[0]))[0] pts = [ val + random.uniform(-0.1, 0.1) for val in [0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0] ] lbl = data[2] else: img, lbl = fake_plate(self._smudge) pts = [ val + random.uniform(-0.1, 0.1) for val in [0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0] ] img, pts = augment_sample(img, pts, self._dims, 0.0) plt = reconstruct_plates(img, [mx.nd.array(pts).reshape((2, 4))], (self._out_hw[1], self._out_hw[0]))[0] plt = color_normalize(plt) return plt.transpose( (2, 0, 1)).expand_dims(0), [self._vocab.char2idx(ch) for ch in lbl], len(lbl)
def __call__(self, data): img = load_image(data[0]) if random.random() < self._fake: fake, _ = fake_plate(self._smudge) img = apply_plate(img, data[1], fake) img, pts = augment_sample(img, data[1], self._dims) img = color_normalize(img) lbl = object_label(pts, self._dims, 16) return img.transpose((2, 0, 1)).expand_dims(0), lbl.expand_dims(0)
def _detect_plates(self, raw): h = raw.shape[0] w = raw.shape[1] f = min(288 * max(h, w) / min(h, w), 608) / min(h, w) img = mx.image.imresize( raw, int(w * f) + (0 if w % 16 == 0 else 16 - w % 16), int(h * f) + (0 if h % 16 == 0 else 16 - h % 16) ) x = color_normalize(img).transpose((2, 0, 1)).expand_dims(0) y = self.wpod(x.as_in_context(self.context)) probs = y[0, :, :, 0] affines = y[0, :, :, 2:] labels = plate_labels(img, probs, affines, self.dims, 16, self.threshold) plates = reconstruct_plates(raw, [pts for pts, _ in labels], (self.plt_hw[1], self.plt_hw[0])) return [(plates[i], labels[i][1].item()) for i in range(len(labels))]
def recognize_plate(vocab, ocr, plate, beam, beam_size, context): ts = time.time() x = color_normalize(plate).transpose((2, 0, 1)).expand_dims(0) enc_y, self_attn = ocr.encode(x.as_in_context(context)) if beam: sequences = [([vocab.char2idx("<GO>")], 0.0)] while True: candidates = [] for seq, score in sequences: if seq[-1] == vocab.char2idx("<EOS>"): candidates.append((seq, score)) else: tgt = mx.nd.array(seq, ctx=context).reshape((1, -1)) tgt_len = mx.nd.array([len(seq)], ctx=context) y, context_attn = ocr.decode(tgt, tgt_len, enc_y) probs = mx.nd.softmax(y, axis=2) beam = probs[0, -1].topk(k=beam_size, ret_typ="both") for i in range(beam_size): candidates.append( (seq + [int(beam[1][i].asscalar())], score + math.log(beam[0][i].asscalar()))) if len(candidates) <= len(sequences): break sequences = sorted(candidates, key=lambda tup: tup[1], reverse=True)[:beam_size] scores = mx.nd.array([score for _, score in sequences], ctx=context) probs = mx.nd.softmax(scores) print("ocr profiling: %f" % (time.time() - ts)) for i, (seq, score) in enumerate(sequences): print("".join([vocab.idx2char(token) for token in seq[1:-1]]), score, probs[i].asscalar()) print(seq) else: sequence = [vocab.char2idx("<GO>")] while True: tgt = mx.nd.array(sequence, ctx=context).reshape((1, -1)) tgt_len = mx.nd.array([len(sequence)], ctx=context) y, context_attn = ocr.decode(tgt, tgt_len, enc_y) index = mx.nd.argmax(y, axis=2) char_token = index[0, -1].asscalar() sequence += [char_token] if char_token == vocab.char2idx("<EOS>"): break print(vocab.idx2char(char_token), end="", flush=True) print("") print(sequence)