def detect_pnet(im, min_face_size, scale_factor, thresh): """ 通过pnet筛选box和landmark 参数: im:输入图像[h,2,3] """ net_size = 12 # 人脸和输入图像的比率 current_scale = float(net_size) / min_face_size im_resized = processed_image(im, current_scale) _, current_height, current_width = im_resized.shape all_boxes = list() # 图像金字塔 while min(current_height, current_width) > net_size: # 类别和box cls_cls_map, reg = predict_pnet(im_resized) boxes = generate_bbox(cls_cls_map[1, :, :], reg, current_scale, thresh) current_scale *= scale_factor # 继续缩小图像做金字塔 im_resized = processed_image(im, current_scale) _, current_height, current_width = im_resized.shape if boxes.size == 0: continue # 非极大值抑制留下重复低的box keep = py_nms(boxes[:, :5], 0.5, mode='Union') boxes = boxes[keep] all_boxes.append(boxes) if len(all_boxes) == 0: return None all_boxes = np.vstack(all_boxes) # 将金字塔之后的box也进行非极大值抑制 keep = py_nms(all_boxes[:, 0:5], 0.7, mode='Union') all_boxes = all_boxes[keep] # box的长宽 bbw = all_boxes[:, 2] - all_boxes[:, 0] + 1 bbh = all_boxes[:, 3] - all_boxes[:, 1] + 1 # 对应原图的box坐标和分数 boxes_c = np.vstack([all_boxes[:, 0] + all_boxes[:, 5] * bbw, all_boxes[:, 1] + all_boxes[:, 6] * bbh, all_boxes[:, 2] + all_boxes[:, 7] * bbw, all_boxes[:, 3] + all_boxes[:, 8] * bbh, all_boxes[:, 4]]) boxes_c = boxes_c.T return boxes_c
def detect_rnet(im, dets, thresh): """通过rent选择box 参数: im:输入图像 dets:pnet选择的box,是相对原图的绝对坐标 返回值: box绝对坐标 """ h, w, c = im.shape # 将pnet的box变成包含它的正方形,可以避免信息损失 dets = convert_to_square(dets) dets[:, 0:4] = np.round(dets[:, 0:4]) # 调整超出图像的box [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph] = pad(dets, w, h) delete_size = np.ones_like(tmpw) * 20 ones = np.ones_like(tmpw) zeros = np.zeros_like(tmpw) num_boxes = np.sum( np.where((np.minimum(tmpw, tmph) >= delete_size), ones, zeros)) cropped_ims = np.zeros((num_boxes, 3, 24, 24), dtype=np.float32) if int(num_boxes) == 0: print('P模型检测结果为空!') return None, None for i in range(int(num_boxes)): # 将pnet生成的box相对与原图进行裁剪,超出部分用0补 if tmph[i] < 20 or tmpw[i] < 20: continue tmp = np.zeros((tmph[i], tmpw[i], 3), dtype=np.uint8) try: tmp[dy[i]:edy[i] + 1, dx[i]:edx[i] + 1, :] = im[y[i]:ey[i] + 1, x[i]:ex[i] + 1, :] img = cv2.resize(tmp, (24, 24)) img = img.transpose((2, 0, 1)) img = (img - 127.5) / 128 cropped_ims[i, :, :, :] = img except: continue cls_scores, reg = predict_rnet(cropped_ims) cls_scores = cls_scores[:, 1] keep_inds = np.where(cls_scores > thresh)[0] if len(keep_inds) > 0: boxes = dets[keep_inds] boxes[:, 4] = cls_scores[keep_inds] reg = reg[keep_inds] else: return None, None keep = py_nms(boxes, 0.6, mode='Union') boxes = boxes[keep] # 对pnet截取的图像的坐标进行校准,生成rnet的人脸框对于原图的绝对坐标 boxes_c = calibrate_box(boxes, reg[keep]) return boxes, boxes_c
def predict(self, image): org_image = np.copy(image) org_h, org_w, _ = org_image.shape original_image_size = org_image.shape[:2] image_data = utils.image_preporcess(image, [self.input_size, self.input_size]) image_data = image_data[np.newaxis, ...] detections = self.sess.run(self.det, feed_dict={self.input_data: image_data}) detections = utils.post_process(detections, original_image_size, [cfg.input_image_h, cfg.input_image_w], cfg.down_ratio, cfg.score_threshold) bboxes = [] scores = [0] classes = [0] if cfg.use_nms: cls_in_img = list(set(detections[:, 5])) results = [] for c in cls_in_img: cls_mask = (detections[:, 5] == c) classified_det = detections[cls_mask] classified_bboxes = classified_det[:, :4] classified_scores = classified_det[:, 4] inds = utils.py_nms(classified_bboxes, classified_scores, max_boxes=50, iou_thresh=0.5) results.extend(classified_det[inds]) results = np.asarray(results) if len(results) != 0: bboxes = results[:, 0:4] scores = results[:, 4] classes = results[:, 5] #bboxes_draw_on_img(original_image, classes, scores, bboxes, class_names) else: bboxes = detections[:, 0:4] scores = detections[:, 4] classes = detections[:, 5] #bboxes_draw_on_img(original_image, classes, scores, bboxes, class_names) return bboxes, scores, classes
def detect_onet(im, dets, thresh): """将onet的选框继续筛选基本和rnet差不多但多返回了landmark""" h, w, c = im.shape dets = convert_to_square(dets) dets[:, 0:4] = np.round(dets[:, 0:4]) [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph] = pad(dets, w, h) num_boxes = dets.shape[0] cropped_ims = np.zeros((num_boxes, 3, 48, 48), dtype=np.float32) for i in range(num_boxes): tmp = np.zeros((tmph[i], tmpw[i], 3), dtype=np.uint8) tmp[dy[i]:edy[i] + 1, dx[i]:edx[i] + 1, :] = im[y[i]:ey[i] + 1, x[i]:ex[i] + 1, :] img = cv2.resize(tmp, (48, 48), interpolation=cv2.INTER_LINEAR) img = img.transpose((2, 0, 1)) img = (img - 127.5) / 128 cropped_ims[i, :, :, :] = img cls_scores, reg, landmark = predict_onet(cropped_ims) cls_scores = cls_scores[:, 1] keep_inds = np.where(cls_scores > thresh)[0] if len(keep_inds) > 0: boxes = dets[keep_inds] boxes[:, 4] = cls_scores[keep_inds] reg = reg[keep_inds] landmark = landmark[keep_inds] else: return None, None w = boxes[:, 2] - boxes[:, 0] + 1 h = boxes[:, 3] - boxes[:, 1] + 1 landmark[:, 0::2] = (np.tile(w, (5, 1)) * landmark[:, 0::2].T + np.tile(boxes[:, 0], (5, 1)) - 1).T landmark[:, 1::2] = (np.tile(h, (5, 1)) * landmark[:, 1::2].T + np.tile(boxes[:, 1], (5, 1)) - 1).T boxes_c = calibrate_box(boxes, reg) keep = py_nms(boxes_c, 0.6, mode='Minimum') boxes_c = boxes_c[keep] landmark = landmark[keep] return boxes_c, landmark
detections = sess.run(det, feed_dict={inputs: image_data}) detections = post_process(detections, original_image_size, [cfg.input_image_h, cfg.input_image_w], cfg.down_ratio, cfg.score_threshold) print('Inferencce took %.1f ms (%.2f fps)' % ((time.time() - t0) * 1000, 1 / (time.time() - t0))) if cfg.use_nms: cls_in_img = list(set(detections[:, 5])) results = [] for c in cls_in_img: cls_mask = (detections[:, 5] == c) classified_det = detections[cls_mask] classified_bboxes = classified_det[:, :4] classified_scores = classified_det[:, 4] inds = py_nms(classified_bboxes, classified_scores, max_boxes=50, iou_thresh=0.5) results.extend(classified_det[inds]) results = np.asarray(results) print("results is", results) if len(results) != 0: bboxes = results[:, 0:4] scores = results[:, 4] classes = results[:, 5] bboxes_draw_on_img(original_image, classes, scores, bboxes, class_names) else: bboxes = detections[:, 0:4] scores = detections[:, 4] classes = detections[:, 5]