def detect_pnet(self, im): ##图像金字塔 h, w, c = im.shape net_size = 12 minl = np.min((w, h)) base_scale = net_size / float(self.min_face_size) scales = [] face_count = 0 while minl > net_size: s = base_scale * self.scalor ** face_count if np.floor(minl * s) <= 12: break scales += [s] face_count += 1 # 对每个scale层做预测 total_boxes = [] for scale in scales: hs = np.ceil(h * scale) ws = np.ceil(w * scale) hs = int(hs) ws = int(ws) im_data = cv2.resize(im, (ws, hs)) input = Image2Tensor(im_data, (127.5,127.5,127.5)) input = input.to(self.device) self.pnet.eval() with torch.no_grad(): output_cls, output_reg = self.pnet(input) output_cls = output_cls.squeeze_(0).detach().cpu().numpy() output_reg = output_reg.squeeze_(0).detach().cpu().numpy() bboxes = self.generate_bbox(output_cls, output_reg, scale, self.threshold[0]) # inter-scale nms if len(bboxes) <= 0: continue keep = py_nms(bboxes, 0.5, 'Union') if len(keep) <= 0: continue bboxes = bboxes[keep] # total_boxes.extend(bboxes) # 金字塔所有层做完之后,再做一次NMS # NMS if len(total_boxes) <= 0: return None total_boxes = np.array(total_boxes) keep = py_nms(total_boxes, 0.7, 'Union') if len(keep) <= 0: return None return total_boxes[keep]
def detect_pnet(self, im): #print('pnet.......') h, w, c = im.shape net_size = 12 minl = np.min((w, h)) base_scale = net_size / float(self.min_face_size) scales = [] face_count = 0 while minl > net_size: s = base_scale * self.scalor ** face_count if np.floor(minl * s) <= 12: break scales += [s] face_count += 1 total_boxes = [] for scale in scales: hs = np.ceil(h * scale) ws = np.ceil(w * scale) hs = int(hs) ws = int(ws) im_data = cv2.resize(im, (ws, hs)) input = Image2NArray(im_data, [127.5,127.5,127.5] ) input = input.as_in_context(self.ctx) output_cls, output_reg = self.pnet(input) output_cls = output_cls.asnumpy().squeeze(axis=0) output_reg = output_reg.asnumpy().squeeze(axis=0) bboxes = self.generate_bbox(output_cls, output_reg, scale, self.threshold[0]) if len(bboxes) <= 0: continue keep = py_nms(bboxes, 0.5, 'Union') if len(keep) <= 0: continue bboxes = bboxes[keep] # total_boxes.extend(bboxes) # NMS if len(total_boxes) <= 0: return None total_boxes = np.array(total_boxes) keep = py_nms(total_boxes, 0.7, 'Union') if len(keep) <= 0: return None return total_boxes[keep]
def detect_ronet(self, img, bboxes, image_size): H, W, C = img.shape IMAGE_SIZE = image_size # 1, 先将bbox转换成矩形 sb = [] for i in range(bboxes.shape[0]): box = bboxes[i, :] sq = square_bbox(box) sb += [sq] # 2,pad crops = [] origin_bbox = [] for i in sb: size = i[2] sx0, sy0, sx1, sy1, dx0, dy0, dx1, dy1 = pad_bbox(i, W, H) crop = np.zeros((size, size, 3), dtype=np.uint8) if sx0 < 0 or sy0 < 0 or dx0 < 0 or dy0 < 0 or sx1 > W or sy1 > H or dx1 > size or dy1 > size: continue if sx0 > W or sy0 > H or dx0 > size or dy0 > size or sx1 < 0 or sy1 < 0 or dx1 < 0 or dy1 < 0: continue crop[dy0:dy1, dx0:dx1, :] = img[sy0:sy1, sx0:sx1, :] out = cv2.resize(crop, (IMAGE_SIZE, IMAGE_SIZE)) out = out.astype(np.float32) - np.array([127.5,127.5,127.5], dtype=np.float32) out = out / 128 out = out.swapaxes(1, 2).swapaxes(0, 1) crops += [out] origin_bbox += [i] # 3, 预测 origin_bbox = np.array(origin_bbox) crops = np.array(crops) input = torch.from_numpy(crops).to(self.device) detector = self.rnet threshold = self.threshold[1] if image_size == 48: detector = self.onet threshold = self.threshold[2] detector.eval() with torch.no_grad(): out = detector(input) # 4,映射 ## out[0] -> N * 2 ## out[1] -> N * 4 cls_map = out[0].detach().cpu().numpy() reg = out[1].detach().cpu().numpy() landmark = out[2].detach().cpu().numpy() face_map = cls_map[:, 1] t_index = np.where(face_map > threshold) if t_index[0].shape[0] <= 0: return None # # origin_bbox = origin_bbox[t_index] score = face_map[t_index] reg_map = reg[t_index] landmark_map = landmark[t_index] dx = reg_map[:, 0] dy = reg_map[:, 1] dw = reg_map[:, 2] dh = reg_map[:, 3] # backward for smooth l1 loss(RCNN) dx *= IMAGE_SIZE dy *= IMAGE_SIZE dw = np.exp(dw) * IMAGE_SIZE dh = np.exp(dh) * IMAGE_SIZE landmark_map *= IMAGE_SIZE # add Gx AND Gy G = origin_bbox G = G.astype(np.float32) dx = dx / (float(IMAGE_SIZE) / G[:, 2]) + G[:, 0] dy = dy / (float(IMAGE_SIZE) / G[:, 3]) + G[:, 1] dw = dw / (float(IMAGE_SIZE) / G[:, 2]) dh = dh / (float(IMAGE_SIZE) / G[:, 3]) for i in range(5): landmark_map[:,i*2] = landmark_map[:,i*2] / (float(IMAGE_SIZE) / G[:, 2]) + G[:, 0] landmark_map[:,1+i*2] = landmark_map[:,1+i*2] / (float(IMAGE_SIZE) / G[:, 3]) + G[:, 1] #landmark_map = landmark_map / # compose #print("dx",dx) #print(landmark_map) bbox = np.vstack([dx, dy, dw, dh, score]) bbox = bbox.T #landmark_map = landmark_map.T bbox = np.hstack([bbox,landmark_map]) # do nms if image_size == 24: keep = py_nms(bbox, 0.6, "Union") if len(keep) <= 0: return None return bbox[keep] if image_size == 48: keep = py_nms(bbox, 0.6, "Minimum") if len(keep) <= 0: return None return bbox[keep]
def detect_ronet(self, img, bboxes, image_size): H, W, C = img.shape IMAGE_SIZE = image_size sb = [] for i in range(bboxes.shape[0]): box = bboxes[i, :] sq = square_bbox(box) sb += [sq] #pad crops = [] origin_bbox = [] for i in sb: size = i[2] sx0, sy0, sx1, sy1, dx0, dy0, dx1, dy1 = pad_bbox(i, W, H) crop = np.zeros((size, size, 3), dtype=np.uint8) if sx0 < 0 or sy0 < 0 or dx0 < 0 or dy0 < 0 or sx1 > W or sy1 > H or dx1 > size or dy1 > size: continue crop[dy0:dy1, dx0:dx1, :] = img[sy0:sy1, sx0:sx1, :] out = cv2.resize(crop, (IMAGE_SIZE, IMAGE_SIZE)) out = out.astype(np.float32) - np.array([127.5,127.5,127.5], dtype=np.float32) out = out.swapaxes(1, 2).swapaxes(0, 1) crops += [out] origin_bbox += [i] origin_bbox = np.array(origin_bbox) crops = nd.array(crops) input = crops.as_in_context(self.ctx) detector = self.rnet threshold = self.threshold[1] if image_size == 48: detector = self.onet threshold = self.threshold[2] out = detector(input) cls_map = out[0].asnumpy() reg = out[1].asnumpy() face_map = cls_map[:, 1] t_index = np.where(face_map > threshold) if t_index[0].shape[0] <= 0: return None origin_bbox = origin_bbox[t_index] score = face_map[t_index] reg_map = reg[t_index] dx = reg_map[:, 0] dy = reg_map[:, 1] dw = reg_map[:, 2] dh = reg_map[:, 3] dx *= IMAGE_SIZE dy *= IMAGE_SIZE dw = np.exp(dw) * IMAGE_SIZE dh = np.exp(dh) * IMAGE_SIZE # add Gx AND Gy G = origin_bbox G = G.astype(np.float32) dx = dx / (float(IMAGE_SIZE) / G[:, 2]) + G[:, 0] dy = dy / (float(IMAGE_SIZE) / G[:, 3]) + G[:, 1] dw = dw / (float(IMAGE_SIZE) / G[:, 2]) dh = dh / (float(IMAGE_SIZE) / G[:, 3]) # compose bbox = np.vstack([dx, dy, dw, dh, score]) bbox = bbox.T # do nms if image_size == 24: keep = py_nms(bbox, 0.7, "Union") if len(keep) <= 0: return None return bbox[keep] if image_size == 48: keep = py_nms(bbox, 0.7, "Minimum") if len(keep) <= 0: return None return bbox[keep]