def __rnet_detect(self, image, pnet_boxes): pnet_boxes_ = util.convert_to_square(pnet_boxes) # 转正方形 img_datas = [] for box in pnet_boxes_: x1_ = int(box[0]) y1_ = int(box[1]) x2_ = int(box[2]) y2_ = int(box[3]) img = image.crop((x1_, y1_, x2_, y2_)) img = img.resize((24, 24), Image.ANTIALIAS) img_data = self.transform(img) img_datas.append(img_data) img_datas = torch.stack(img_datas) cond, offset = self.r_net( img_datas) # cond的形式为(batch,1), offset的形式为(batch,4) cond = cond.detach().numpy() offset = offset.detach().numpy() indexs, _ = np.where(cond > 0.7) boxes = [] for index in indexs: box = pnet_boxes_[index] _x1 = int(box[0]) _y1 = int(box[1]) _x2 = int(box[2]) _y2 = int(box[3]) ow = _x2 - _x1 oh = _y2 - _y1 x1 = offset[index][0] * ow + _x1 y1 = offset[index][1] * oh + _y1 x2 = offset[index][2] * ow + _x2 y2 = offset[index][3] * oh + _y2 boxes.append([x1, y1, x2, y2, cond[index][0]]) return util.NMS(np.array(boxes), thresh=0.5)
def __onet_detect(self, image, rnet_boxes): _img_dataset = [] _rnet_boxes = util.convert_to_square(rnet_boxes) for _box in _rnet_boxes: _x1 = int(_box[0]) _y1 = int(_box[1]) _x2 = int(_box[2]) _y2 = int(_box[3]) img = image.crop((_x1, _y1, _x2, _y2)) img = img.resize((48, 48), Image.ANTIALIAS) img_data = self.transform(img) _img_dataset.append(img_data) img_dataset = torch.stack(_img_dataset) _cls, _offset = self.o_net(img_dataset) cls = _cls.detach().numpy() offset = _offset.detach().numpy() boxes = [] idxs, _ = np.where(cls > 0.97) for idx in idxs: _box = _rnet_boxes[idx] _x1 = int(_box[0]) _y1 = int(_box[1]) _x2 = int(_box[2]) _y2 = int(_box[3]) ow = _x2 - _x1 oh = _y2 - _y1 x1 = _x1 + ow * offset[idx][0] y1 = _y1 + oh * offset[idx][1] x2 = _x2 + ow * offset[idx][2] y2 = _y2 + oh * offset[idx][3] boxes.append([x1, y1, x2, y2, cls[idx][0]]) # O网络最后使用(交集/最小值)的方法做IOU运算 return util.NMS(np.array(boxes), isMin=True, thresh=0.7)
def __pnet_detect(self, image): boxes = [] img = image w, h = img.size min_side_len = min(w, h) # 最小边长 scale = 1 # 缩放比例 # 通过循环生成图像金字塔 while min_side_len > 12: # P网络建议框长度为12 img_data = self.transform(img) # 向量化 img_data.unsqueeze_(0) # 升维,由 CHW 转为 1CHW,相当于加了个批次 cond, offset = self.p_net(img_data) # 1CHW cond_ = cond[0][0] offset_ = offset[0] cond_mask = cond_ > 0.6 # 置信度大于0.6 indexs = torch.nonzero(cond_mask) # 找出置信度大于0.6的下标 for index in indexs: orignal_box = self.__restore_box(index, cond_[index[0], index[1]], offset_, scale) boxes.append(orignal_box) rate = 0.9 scale *= rate _w = int(w * scale) _h = int(h * scale) img = img.resize([_w, _h]) min_side_len = min(_w, _h) return util.NMS(np.array(boxes), 0.5) # nms操作
for id_, box in enumerate(neg_box): neg_db_tmp[id_, :] = util.img2array(box[5], param.img_size_12) calib_result = net_12_calib.prediction.eval( feed_dict={input_12_node: neg_db_tmp}) neg_box = util.calib_box(neg_box, calib_result, img) #NMS for each scale scale_cur = 0 scale_box = [] suppressed = [] for id_, box in enumerate(neg_box): if box[6] == scale_cur: scale_box.append(box) if box[6] != scale_cur or id_ == len(neg_box) - 1: suppressed += util.NMS(scale_box) scale_cur = box[6] scale_box = [box] neg_box = suppressed suppressed = [] if sys.argv[1] == str(param.img_size_48): #24-net result_db_12 = np.zeros((len(neg_box), param.img_size_12, param.img_size_12, param.input_channel), np.float32) result_db_24 = np.zeros((len(neg_box), param.img_size_24, param.img_size_24, param.input_channel), np.float32) for bid, box in enumerate(neg_box):
for id_, box in enumerate(result_box): result_db_tmp[id_, :] = util.img2array(box[5], param.img_size_12) calib_result = net_12_calib.prediction.eval( feed_dict={input_12_node: result_db_tmp}) result_box = util.calib_box(result_box, calib_result, img) #NMS for each scale scale_cur = 0 scale_box = [] suppressed = [] for id_, box in enumerate(result_box): if box[6] == scale_cur: scale_box.append(box) if box[6] != scale_cur or id_ == len(result_box) - 1: suppressed += util.NMS(scale_box) scale_cur = box[6] scale_box = [box] result_box = suppressed suppressed = [] #24-net result_db_12 = np.zeros((len(result_box), param.img_size_12, param.img_size_12, param.input_channel), np.float32) result_db_24 = np.zeros((len(result_box), param.img_size_24, param.img_size_24, param.input_channel), np.float32) for bid, box in enumerate(result_box): resized_img_12 = util.img2array(box[5], param.img_size_12)