def decode(out): hm = out['hm'] wh = out['wh'] off = out['off'] lm = out['lm'] hm = VisionKit.nms(hm, kernel=3) hm.squeeze_() off.squeeze_() wh.squeeze_() lm.squeeze_() hm = hm.numpy() hm[hm < cfg.threshold] = 0 xs, ys = np.nonzero(hm) bboxes = [] landmarks = [] for x, y in zip(xs, ys): ow = off[0][x, y] oh = off[1][x, y] cx = (ow + y) * 4 cy = (oh + x) * 4 w = wh[0][x, y] h = wh[1][x, y] width = np.exp(w) * 4 height = np.exp(h) * 4 left = cx - width / 2 top = cy - height / 2 right = cx + width / 2 bottom = cy + height / 2 bboxes.append([left, top, right, bottom]) # landmark lms = [] for i in range(0, 10, 2): lm_x = lm[i][x, y] lm_y = lm[i + 1][x, y] lm_x = lm_x * width + left lm_y = lm_y * height + top lms += [lm_x, lm_y] landmarks.append(lms) return bboxes, landmarks
def visualize(im, bboxes, landmarks): return VisionKit.visualize(im, bboxes, landmarks, skip=2)
def postprocess(bboxes, landmarks, params): bboxes, landmarks = VisionKit.letterbox_inverse(*params, bboxes, landmarks, skip=2) return bboxes, landmarks
def preprocess(im): new_im, _, _, *params = VisionKit.letterbox(im, cfg.insize) return new_im, params