def detect(interpreter, input_blob, output_blob, image, threshold=0.4, nms_iou=0.5): interpreter.set_tensor(input_blob[0]['index'], image) interpreter.invoke() # hm, box, landmark = outputs['1028'], outputs['1029'], outputs['1027'] lm = interpreter.get_tensor(output_blob[0]['index']).transpose( (0, 3, 1, 2)) # 1,h,w,10 box = interpreter.get_tensor(output_blob[1]['index']).transpose( (0, 3, 1, 2)) # 1,h,w,4 hm = interpreter.get_tensor(output_blob[2]['index']).transpose( (0, 3, 1, 2)) # 1,1,h,w x = torch.from_numpy(hm).clone() y = torch.from_numpy(box).clone() z = torch.from_numpy(lm).clone() for var in [x, y, z]: if var.shape[1] == 1: hm = var elif var.shape[1] == 4: box = var elif var.shape[1] == 10: landmark = var hm_pool = F.max_pool2d(hm, 3, 1, 1) scores, indices = ((hm == hm_pool).float() * hm).view(1, -1).cpu().topk(1000) hm_height, hm_width = hm.shape[2:] scores = scores.squeeze() indices = indices.squeeze() ys = list((indices / hm_width).int().data.numpy()) xs = list((indices % hm_width).int().data.numpy()) scores = list(scores.data.numpy()) box = box.cpu().squeeze().data.numpy() landmark = landmark.cpu().squeeze().data.numpy() stride = 4 objs = [] for cx, cy, score in zip(xs, ys, scores): if score < threshold: break x, y, r, b = box[:, cy, cx] xyrb = (np.array([cx, cy, cx, cy]) + [-x, -y, r, b]) * stride x5y5 = landmark[:, cy, cx] x5y5 = (common.exp(x5y5 * 4) + ([cx] * 5 + [cy] * 5)) * stride box_landmark = list(zip(x5y5[:5], x5y5[5:])) objs.append( common.BBox(0, xyrb=xyrb, score=score, landmark=box_landmark)) return nms(objs, iou=nms_iou)
def detect_images_giou_with_netout(output_hm, output_tlrb, output_landmark, threshold=0.4, ibatch=0): stride = 4 _, num_classes, hm_height, hm_width = output_hm.shape hm = output_hm[ibatch].reshape(1, num_classes, hm_height, hm_width) tlrb = output_tlrb[ibatch].cpu().data.numpy().reshape( 1, num_classes * 4, hm_height, hm_width) landmark = output_landmark[ibatch].cpu().data.numpy().reshape( 1, num_classes * 10, hm_height, hm_width) nmskey = _nms(hm, 3) kscore, kinds, kcls, kys, kxs = _topk(nmskey, 2000) kys = kys.cpu().data.numpy().astype(np.int) kxs = kxs.cpu().data.numpy().astype(np.int) kcls = kcls.cpu().data.numpy().astype(np.int) key = [[], [], [], []] for ind in range(kscore.shape[1]): score = kscore[0, ind] if score > threshold: key[0].append(kys[0, ind]) key[1].append(kxs[0, ind]) key[2].append(score) key[3].append(kcls[0, ind]) imboxs = [] if key[0] is not None and len(key[0]) > 0: ky, kx = key[0], key[1] classes = key[3] scores = key[2] for i in range(len(kx)): class_ = classes[i] cx, cy = kx[i], ky[i] x1, y1, x2, y2 = tlrb[0, class_ * 4:(class_ + 1) * 4, cy, cx] x1, y1, x2, y2 = (np.array([cx, cy, cx, cy]) + np.array([-x1, -y1, x2, y2])) * stride x5y5 = landmark[0, 0:10, cy, cx] x5y5 = np.array(common.exp(x5y5 * 4)) x5y5 = (x5y5 + np.array([cx] * 5 + [cy] * 5)) * stride boxlandmark = list(zip(x5y5[:5], x5y5[5:])) imboxs.append( common.BBox(label=str(class_), xyrb=common.floatv([x1, y1, x2, y2]), score=scores[i].item(), landmark=boxlandmark)) return imboxs
def detect(model, image, threshold=0.4, nms_iou=0.5) -> typing.List[common.BBox]: mean = [0.408, 0.447, 0.47] std = [0.289, 0.274, 0.278] image = common.pad(image) image = ((image / 255.0 - mean) / std).astype(np.float32) image = image.transpose(2, 0, 1) torch_image = torch.from_numpy(image)[None] if HAS_CUDA: torch_image = torch_image.cuda() hm, box, landmark = model(torch_image) hm_pool = F.max_pool2d(hm, 3, 1, 1) scores, indices = ((hm == hm_pool).float() * hm).view(1, -1).cpu().topk(1000) hm_height, hm_width = hm.shape[2:] scores = scores.squeeze() indices = indices.squeeze() ys = list((indices / hm_width).int().data.numpy()) xs = list((indices % hm_width).int().data.numpy()) scores = list(scores.data.numpy()) box = box.cpu().squeeze().data.numpy() landmark = landmark.cpu().squeeze().data.numpy() stride = 4 objs = [] for cx, cy, score in zip(xs, ys, scores): if score < threshold: break x, y, r, b = box[:, cy, cx] xyrb = (np.array([cx, cy, cx, cy]) + [-x, -y, r, b]) * stride x5y5 = landmark[:, cy, cx] x5y5 = (common.exp(x5y5 * 4) + ([cx] * 5 + [cy] * 5)) * stride box_landmark = list(zip(x5y5[:5], x5y5[5:])) objs.append( common.BBox(0, xyrb=xyrb, score=score, landmark=box_landmark)) return nms(objs, iou=nms_iou)
def detect(exec_net, input_blob, image, threshold=0.4, nms_iou=0.5): outputs = exec_net.infer(inputs={input_blob: image}) # print('outputs:', outputs) # print('outputs[\'Sigmoid_526\'].shape:', outputs['Sigmoid_526'].shape) # print('outputs[\'Exp_527\'].shape:', outputs['Exp_527'].shape) # print('outputs[\'Conv_525\'].shape:', outputs['Conv_525'].shape) hm, box, landmark = outputs['Sigmoid_526'], outputs['Exp_527'], outputs[ 'Conv_525'] hm = torch.from_numpy(hm).clone() box = torch.from_numpy(box).clone() landmark = torch.from_numpy(landmark).clone() hm_pool = F.max_pool2d(hm, 3, 1, 1) scores, indices = ((hm == hm_pool).float() * hm).view(1, -1).cpu().topk(1000) hm_height, hm_width = hm.shape[2:] scores = scores.squeeze() indices = indices.squeeze() ys = list((indices / hm_width).int().data.numpy()) xs = list((indices % hm_width).int().data.numpy()) scores = list(scores.data.numpy()) box = box.cpu().squeeze().data.numpy() landmark = landmark.cpu().squeeze().data.numpy() stride = 4 objs = [] for cx, cy, score in zip(xs, ys, scores): if score < threshold: break x, y, r, b = box[:, cy, cx] xyrb = (np.array([cx, cy, cx, cy]) + [-x, -y, r, b]) * stride x5y5 = landmark[:, cy, cx] x5y5 = (common.exp(x5y5 * 4) + ([cx] * 5 + [cy] * 5)) * stride box_landmark = list(zip(x5y5[:5], x5y5[5:])) objs.append( common.BBox(0, xyrb=xyrb, score=score, landmark=box_landmark)) return nms(objs, iou=nms_iou)
def detect_images_giou_with_retinaface_style_eval(output_hm, output_tlrb, output_landmark, threshold=0.4, ibatch=0): stride = 4 _, _, hm_height, hm_width = output_hm.shape hm = output_hm[ibatch].reshape(1, 1, hm_height, hm_width) tlrb = output_tlrb[ibatch] landmark = output_landmark[ibatch] area = hm_height * hm_width keep = (hm > threshold).view(area) indices = torch.arange(0, area)[keep] hm = hm.view(1, area).cpu().data.numpy() tlrb = tlrb.view(4, area).cpu().data.numpy() landmark = landmark.view(10, area).cpu().data.numpy() cx, cy = indices % hm_width, indices // hm_width scores = hm[0, indices] x1, y1, x2, y2 = tlrb[0:4, indices] cts = np.vstack([cx, cy, cx, cy]) locs = np.vstack([-x1, -y1, x2, y2]) x1, y1, x2, y2 = (cts + locs) * stride x5y5 = landmark[0:10, indices] x5y5 = common.exp(x5y5 * 4) x5y5 = (x5y5 + np.vstack([cx] * 5 + [cy] * 5)) * stride imboxs = [] for i in range(len(indices)): boxlandmark = list(zip(x5y5[0:5, i], x5y5[5:, i])) imboxs.append( common.BBox(label="facial", xyrb=common.floatv([x1[i], y1[i], x2[i], y2[i]]), score=scores[i], landmark=boxlandmark)) return imboxs