Exemplo n.º 1
0
def detect_plate(wpod, vocab, ocr, raw, dims, threshold, plt_hw, beam,
                 beam_size, context):
    h = raw.shape[0]
    w = raw.shape[1]
    f = min(288 * max(h, w) / min(h, w), 608) / min(h, w)
    ts = time.time()
    img = mx.image.imresize(raw,
                            int(w * f) + (0 if w % 16 == 0 else 16 - w % 16),
                            int(h * f) + (0 if h % 16 == 0 else 16 - h % 16))
    x = color_normalize(img).transpose((2, 0, 1)).expand_dims(0)
    y = wpod(x.as_in_context(context))
    probs = y[0, :, :, 0]
    affines = y[0, :, :, 2:]
    labels = plate_labels(img, probs, affines, dims, 16, threshold)
    plates = reconstruct_plates(raw, [pts for pts, _ in labels],
                                (plt_hw[1], plt_hw[0]))
    print("wpod profiling: %f" % (time.time() - ts))
    plt.subplot(math.ceil((len(plates) + 2) / 2), 2, 1)
    visualize(img, [(pts.reshape((-1)).asnumpy().tolist(), str(prob))
                    for pts, prob in labels])
    plt.subplot(math.ceil((len(plates) + 2) / 2), 2, 2)
    visualize(probs > threshold)
    for i, plate in enumerate(plates):
        plt.subplot(math.ceil((len(plates) + 2) / 2), 2, i + 3)
        visualize(plate)
        print("plate[%d]:" % i)
        recognize_plate(vocab, ocr, plate, beam, beam_size, context)
    plt.show()
Exemplo n.º 2
0
 def _recognize_plate(self, img):
     x = color_normalize(img).transpose((2, 0, 1)).expand_dims(0)
     enc_y, self_attn = self.ocr.encode(x.as_in_context(self.context))
     sequences = [([self.vocab.char2idx("<GO>")], 0.0)]
     while True:
         candidates = []
         for seq, score in sequences:
             if seq[-1] == self.vocab.char2idx(
                     "<EOS>") or len(seq) >= self.seq_len + 2:
                 candidates.append((seq, score))
             else:
                 tgt = mx.nd.array(seq, ctx=self.context).reshape((1, -1))
                 tgt_len = mx.nd.array([len(seq)], ctx=self.context)
                 y, context_attn = self.ocr.decode(tgt, tgt_len, enc_y)
                 probs = mx.nd.softmax(y, axis=2)
                 beam = probs[0, -1].topk(k=self.beam_size, ret_typ="both")
                 for i in range(self.beam_size):
                     candidates.append(
                         (seq + [int(beam[1][i].asscalar())],
                          score + math.log(beam[0][i].asscalar())))
         if len(candidates) <= len(sequences):
             break
         sequences = sorted(candidates,
                            key=lambda tup: tup[1],
                            reverse=True)[:self.beam_size]
     scores = mx.nd.array([score for _, score in sequences],
                          ctx=self.context)
     probs = mx.nd.softmax(scores)
     return "".join([
         self.vocab.idx2char(token) for token in sequences[0][0][1:-1]
     ]), probs[0].asscalar()
Exemplo n.º 3
0
 def __call__(self, data):
     if data:
         img = load_image(data[0])
         img, pts = augment_sample(img, data[1], self._dims, 0.0)
         img = reconstruct_plates(img, [mx.nd.array(pts).reshape((2, 4))],
                                  (self._out_hw[1], self._out_hw[0]))[0]
         pts = [
             val + random.uniform(-0.1, 0.1)
             for val in [0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0]
         ]
         lbl = data[2]
     else:
         img, lbl = fake_plate(self._smudge)
         pts = [
             val + random.uniform(-0.1, 0.1)
             for val in [0.0, 1.0, 1.0, 0.0, 0.0, 0.0, 1.0, 1.0]
         ]
         img, pts = augment_sample(img, pts, self._dims, 0.0)
     plt = reconstruct_plates(img, [mx.nd.array(pts).reshape((2, 4))],
                              (self._out_hw[1], self._out_hw[0]))[0]
     plt = color_normalize(plt)
     return plt.transpose(
         (2, 0,
          1)).expand_dims(0), [self._vocab.char2idx(ch)
                               for ch in lbl], len(lbl)
Exemplo n.º 4
0
 def __call__(self, data):
     img = load_image(data[0])
     if random.random() < self._fake:
         fake, _ = fake_plate(self._smudge)
         img = apply_plate(img, data[1], fake)
     img, pts = augment_sample(img, data[1], self._dims)
     img = color_normalize(img)
     lbl = object_label(pts, self._dims, 16)
     return img.transpose((2, 0, 1)).expand_dims(0), lbl.expand_dims(0)
Exemplo n.º 5
0
 def _detect_plates(self, raw):
     h = raw.shape[0]
     w = raw.shape[1]
     f = min(288 * max(h, w) / min(h, w), 608) / min(h, w)
     img = mx.image.imresize(
         raw,
         int(w * f) + (0 if w % 16 == 0 else 16 - w % 16),
         int(h * f) + (0 if h % 16 == 0 else 16 - h % 16)
     )
     x = color_normalize(img).transpose((2, 0, 1)).expand_dims(0)
     y = self.wpod(x.as_in_context(self.context))
     probs = y[0, :, :, 0]
     affines = y[0, :, :, 2:]
     labels = plate_labels(img, probs, affines, self.dims, 16, self.threshold)
     plates = reconstruct_plates(raw, [pts for pts, _ in labels], (self.plt_hw[1], self.plt_hw[0]))
     return [(plates[i], labels[i][1].item()) for i in range(len(labels))]
Exemplo n.º 6
0
def recognize_plate(vocab, ocr, plate, beam, beam_size, context):
    ts = time.time()
    x = color_normalize(plate).transpose((2, 0, 1)).expand_dims(0)
    enc_y, self_attn = ocr.encode(x.as_in_context(context))
    if beam:
        sequences = [([vocab.char2idx("<GO>")], 0.0)]
        while True:
            candidates = []
            for seq, score in sequences:
                if seq[-1] == vocab.char2idx("<EOS>"):
                    candidates.append((seq, score))
                else:
                    tgt = mx.nd.array(seq, ctx=context).reshape((1, -1))
                    tgt_len = mx.nd.array([len(seq)], ctx=context)
                    y, context_attn = ocr.decode(tgt, tgt_len, enc_y)
                    probs = mx.nd.softmax(y, axis=2)
                    beam = probs[0, -1].topk(k=beam_size, ret_typ="both")
                    for i in range(beam_size):
                        candidates.append(
                            (seq + [int(beam[1][i].asscalar())],
                             score + math.log(beam[0][i].asscalar())))
            if len(candidates) <= len(sequences):
                break
            sequences = sorted(candidates,
                               key=lambda tup: tup[1],
                               reverse=True)[:beam_size]
        scores = mx.nd.array([score for _, score in sequences], ctx=context)
        probs = mx.nd.softmax(scores)
        print("ocr profiling: %f" % (time.time() - ts))
        for i, (seq, score) in enumerate(sequences):
            print("".join([vocab.idx2char(token) for token in seq[1:-1]]),
                  score, probs[i].asscalar())
            print(seq)
    else:
        sequence = [vocab.char2idx("<GO>")]
        while True:
            tgt = mx.nd.array(sequence, ctx=context).reshape((1, -1))
            tgt_len = mx.nd.array([len(sequence)], ctx=context)
            y, context_attn = ocr.decode(tgt, tgt_len, enc_y)
            index = mx.nd.argmax(y, axis=2)
            char_token = index[0, -1].asscalar()
            sequence += [char_token]
            if char_token == vocab.char2idx("<EOS>"):
                break
            print(vocab.idx2char(char_token), end="", flush=True)
        print("")
        print(sequence)