def oNetDetect(self, imge, rNetBoxes): imgDataset = [] #无框返回 if len(rNetBoxes) == 0: return np.array([]) rNetBoxes = utils.convert_to_square(rNetBoxes) for box in rNetBoxes: imgeCRop = imge.crop( (int(box[0]), int(box[1]), int(box[2]), int(box[3]))) imgeCrop = imgeCRop.resize((48, 48)) imgData = self.imgTransform(imgeCrop) imgDataset.append(imgData) imgDataset = torch.stack(imgDataset) imgDataset = imgDataset.to(self.device) cons, offsets, landmak = self.oNet(imgDataset) return self.boxDetect(rNetBoxes, cons, offsets, conMax=self.oCon, nmsMax=self.oNms, iouMode='min', landMark=landmak)
def rNetDetect(self, imge, pNetBoxes): #无框返回 if len(pNetBoxes) == 0: return [] imgDateSets = [] pNetBoxes = utils.convert_to_square(pNetBoxes) #扣下P网络的图 for pBox in pNetBoxes: imgeCrop = imge.crop( (int(pBox[0]), int(pBox[1]), int(pBox[2]), int(pBox[3]))) imgeCrop = imgeCrop.resize((24, 24)) imgData = self.imgTransform(imgeCrop) imgDateSets.append(imgData) #转换图片 imgDateSets = torch.stack(imgDateSets) imgDateSets = imgDateSets.to(self.device) cons, offsets, landMark = self.rNet(imgDateSets) return self.boxDetect(pNetBoxes, cons, offsets, conMax=self.rCon, nmsMax=self.rNms, landMark=landMark)
def __onet_detect(self, image, rnet_boxes): _img_dataset = [] _rnet_boxes = utils.convert_to_square(rnet_boxes) for _box in _rnet_boxes: _x1 = int(_box[0]) _y1 = int(_box[1]) _x2 = int(_box[2]) _y2 = int(_box[3]) img = image.crop((_x1, _y1, _x2, _y2)) img = img.resize((48, 48)) img_data = self.__image_transform(img) _img_dataset.append(img_data) img_dataset = Variable(torch.stack(_img_dataset)) if self.isCuda: img_dataset = img_dataset.cuda() _cls, _offset, landmark = self.onet(img_dataset) cls = _cls.cpu().data.numpy() offset = _offset.cpu().data.numpy() landmark = landmark.cpu().data.numpy() boxes = [] idxs, _ = np.where(cls > 0.7) for idx in idxs: _box = _rnet_boxes[idx] _x1 = int(_box[0]) _y1 = int(_box[1]) _x2 = int(_box[2]) _y2 = int(_box[3]) ow = _x2 - _x1 oh = _y2 - _y1 x1 = _x1 + ow * offset[idx][0] y1 = _y1 + oh * offset[idx][1] x2 = _x2 + ow * offset[idx][2] y2 = _y2 + oh * offset[idx][3] px1 = _x1 + ow * landmark[idx][0] py1 = _y1 + oh * landmark[idx][1] px2 = _x1 + ow * landmark[idx][2] py2 = _y1 + oh * landmark[idx][3] px3 = _x1 + ow * landmark[idx][4] py3 = _y1 + oh * landmark[idx][5] px4 = _x1 + ow * landmark[idx][6] py4 = _y1 + oh * landmark[idx][7] px5 = _x1 + ow * landmark[idx][8] py5 = _y1 + oh * landmark[idx][9] boxes.append([ x1, y1, x2, y2, cls[idx][0], px1, py1, px2, py2, px3, py3, px4, py4, px5, py5 ]) return utils.nms(np.array(boxes), 0.7, isMin=True)
def __onet_detect(self, image, rnet_boxes): boxes = [] _img_dataset = [] _rnet_boxes = utils.convert_to_square(rnet_boxes) for _box in _rnet_boxes: _x1 = int(_box[0]) _y1 = int(_box[1]) _x2 = int(_box[2]) _y2 = int(_box[3]) img = image.crop((_x1, _y1, _x2, _y2)) img = img.resize((48, 48)) img_data = self.__image_transform(img) _img_dataset.append(img_data) # img_dataset = torch.stack(_img_dataset) img_dataset = tuple(_img_dataset) img_dataset = torch.cat(img_dataset, 2) img_dataset = torch.unsqueeze(img_dataset, 0) # print(img_dataset.size()) if self.isCuda: img_dataset = img_dataset.cuda() _cls, _offset = self.onet(img_dataset) _cls = _cls.view(1, -1) _cls = _cls.permute(1, 0) _offset = _offset.view(4, -1) _offset = _offset.permute(1, 0) # cls = _cls.cpu().detach().numpy() # offset = _offset.cpu().detach().numpy() cls = [] offset = [] for j in range(len(rnet_boxes)): cls.append(_cls[j * 6:j * 6 + 1]) offset.append(_offset[j * 6:j * 6 + 1]) cls = tuple(cls) cls = torch.cat(cls, 0).cpu().detach().numpy() offset = tuple(offset) offset = torch.cat(offset, 0).cpu().detach().numpy() idxs, _ = np.where(cls > 0.999) for idx in idxs: _box = rnet_boxes[idx] _x1 = int(_box[0]) _y1 = int(_box[1]) _x2 = int(_box[2]) _y2 = int(_box[3]) ow = _x2 - _x1 oh = _y2 - _y1 x1 = _x1 + ow * offset[idx][0] y1 = _y1 + oh * offset[idx][1] x2 = _x2 + ow * offset[idx][2] y2 = _y2 + oh * offset[idx][3] boxes.append([x1, y1, x2, y2, cls[idx][0]]) # print(len(boxes)) return utils.nms(np.array(boxes), 0.2, isMin=True)
def __rnet_detect(self, image, pnet_boxes): _img_dataset = [] _pnet_boxes = utils.convert_to_square(pnet_boxes) for _box in _pnet_boxes: _x1 = int(_box[0]) _y1 = int(_box[1]) _x2 = int(_box[2]) _y2 = int(_box[3]) img = image.crop((_x1, _y1, _x2, _y2)) img = img.resize((24, 24)) img_data = self.__image_transform(img) # img_data = torch.Tensor(np.array(img) / 255. - 0.5) # img_data = img_data.permute(2, 0, 1) _img_dataset.append(img_data) img_dataset = torch.stack(_img_dataset) if self.isCuda: img_dataset = img_dataset.cuda() _cls, _offset = self.rnet(img_dataset) cls = _cls.cpu().data offset = _offset.cpu().data # for full convolution cls = cls.view(-1, 1) offset = offset.view(-1, 4) # idxs, _ = np.where(cls > 0.9) idxs = torch.nonzero(cls > 0.99)[:, 0] # idxs = torch.nonzero((cls > 0.90) & (cls < 0.95))[:, 0] _boxs = torch.tensor(_pnet_boxes)[idxs] # _x1 = np.array(_boxs[:, 0], dtype=np.int32) # _y1 = np.array(_boxs[:, 1], dtype=np.int32) # _x2 = np.array(_boxs[:, 2], dtype=np.int32) # _y2 = np.array(_boxs[:, 3], dtype=np.int32) _x1 = _boxs[:, 0] _y1 = _boxs[:, 1] _x2 = _boxs[:, 2] _y2 = _boxs[:, 3] ow = _x2 - _x1 oh = _y2 - _y1 x1 = _x1 + ow * offset[idxs][:, 0] y1 = _y1 + oh * offset[idxs][:, 1] x2 = _x2 + ow * offset[idxs][:, 2] y2 = _y2 + oh * offset[idxs][:, 3] # boxes = np.stack((x1, y1, x2, y2, cls[idxs][:, 0]), axis=1) boxes = torch.stack((x1, y1, x2, y2, cls[idxs][:, 0]), dim=1) return utils.nms(np.array(boxes), 0.1)
def __rnet_detect(self, image, pnet_boxes): _img_dataset = [] _pnet_boxes = utils.convert_to_square(pnet_boxes) for _box in _pnet_boxes: _x1 = int(_box[0]) _y1 = int(_box[1]) _x2 = int(_box[2]) _y2 = int(_box[3]) img = image.crop((_x1, _y1, _x2, _y2)) img = img.resize((24, 24)) img_data = self._image_transform(img) _img_dataset.append(img_data) img_dataset = torch.stack(_img_dataset) if self.isCuda: img_dataset = img_dataset.cuda() _cls, _offset = self.rnet(img_dataset) cls = _cls.cpu().data.numpy() offset = _offset.cpu().data.numpy() boxes = [] idxs, _ = np.where(cls > 0.7) # 0.6、0.7、0.8 _box = _pnet_boxes[idxs] # print(_box.shape) # (16, 5) _x1 = np.array(_box[:, 0]) _y1 = np.array(_box[:, 1]) _x2 = np.array(_box[:, 2]) _y2 = np.array(_box[:, 3]) ow = _x2 - _x1 oh = _y2 - _y1 offset = offset[idxs].T cls = cls[idxs].T x1 = _x1 + ow * offset[0, :] # 偏移框反算到真实框 y1 = _y1 + oh * offset[1, :] x2 = _x2 + ow * offset[2, :] y2 = _y2 + oh * offset[3, :] out_boxes = np.dstack((x1, y1, x2, y2, cls)) # print(out_boxes.shape) # (1, 16, 5) out_boxes = np.squeeze(out_boxes, 0) # print(out_boxes.shape) # (16, 5) return utils.nms(np.array(out_boxes), 0.5) # 0.5
def __onet_detect(self, image, rnet_boxes): _img_dataset = [] _rnet_boxes = utils.convert_to_square(rnet_boxes) for _box in _rnet_boxes: _x1 = int(_box[0]) _y1 = int(_box[1]) _x2 = int(_box[2]) _y2 = int(_box[3]) img = image.crop((_x1, _y1, _x2, _y2)) img = img.resize((48, 48)) img_data = self.__image_transform(img) _img_dataset.append(img_data) img_dataset = torch.stack(_img_dataset) if self.isCuda: img_dataset = img_dataset.cuda() _cls, _offset, _alli = self.onet(img_dataset) cls = _cls.cpu().data.numpy() # (1, 1) offset = _offset.cpu().data.numpy() # (1, 4) alli = _alli.cpu().data.numpy() idxs, _ = np.where(cls > o_cls) _box = _rnet_boxes[idxs] _x1 = _box[:, 0].astype('int32') _y1 = _box[:, 1].astype('int32') _x2 = _box[:, 2].astype('int32') _y2 = _box[:, 3].astype('int32') ow = _x2 - _x1 oh = _y2 - _y1 x1 = _x1 + ow * offset[idxs, 0] y1 = _y1 + oh * offset[idxs, 1] x2 = _x2 + ow * offset[idxs, 2] y2 = _y2 + oh * offset[idxs, 3] le_x = _x1 + ow * alli[idxs, 0] le_y = _y1 + oh * alli[idxs, 1] re_x = _x2 + ow * alli[idxs, 2] re_y = _y1 + oh * alli[idxs, 3] n_x = _x1 + ow * alli[idxs, 4] n_y = _y1 + oh * alli[idxs, 5] lm_x = _x1 + ow * alli[idxs, 6] lm_y = _y2 + oh * alli[idxs, 7] rm_x = _x2 + ow * alli[idxs, 8] rm_y = _y2 + oh * alli[idxs, 9] boxes = np.stack((x1, y1, x2, y2, cls[idxs].T[0], le_x, le_y, re_x, re_y, n_x, n_y, lm_x, lm_y, rm_x, rm_y)).T return utils.nms(np.array(boxes), o_nms, isMin=True) #iou is divided by smallest area no union
def _rnet_detect(self, inputs, boxes): """ 通过PNet结果修正人脸框后送入RNet网络 :param boxes: :return: """ # 将pnet的box变成包含它的正方形,可以避免信息损失 convert_to_square(boxes) rnet_box = Detector._pad(inputs, boxes, 24) cls, reg = self.rnet.predict(rnet_box) keep = np.where(cls[:, 1] > self._r_pro) if keep[0].size == 0: return np.empty((0, 4)), np.empty((0, 1)) scores = cls[:, 1][keep] boxes = Detector._get_reg_box(boxes[keep], reg[keep]) keep = nms(boxes, scores, 0.7, 'union') return boxes[keep], scores[keep]
def __rnet_detect(self, image, pnet_boxes): boxes = [] _img_dataset = [] _pnet_boxes = utils.convert_to_square(pnet_boxes) for _box in _pnet_boxes: _x1 = int(_box[0]) _y1 = int(_box[1]) _x2 = int(_box[2]) _y2 = int(_box[3]) img = image.crop((_x1, _y1, _x2, _y2)) img = img.resize((24, 24)) img_data = self.__image_transform(img) _img_dataset.append(img_data) # img_dataset = torch.stack(_img_dataset) img_dataset = tuple(_img_dataset) img_dataset = torch.cat(img_dataset, 2) img_dataset = torch.unsqueeze(img_dataset, 0) # print(img_dataset.size()) if self.isCuda: img_dataset = img_dataset.cuda() _cls, _offset = self.rnet(img_dataset) _cls = _cls.view(1, -1) _cls = _cls.permute(1, 0) _offset = _offset.view(4, -1) _offset = _offset.permute(1, 0) cls = _cls.cpu().detach().numpy() offset = _offset.cpu().detach().numpy() idxs, _ = np.where(cls > 0.99) for idx in idxs: _box = _pnet_boxes[idx] _x1 = int(_box[0]) _y1 = int(_box[1]) _x2 = int(_box[2]) _y2 = int(_box[3]) ow = _x2 - _x1 oh = _y2 - _y1 x1 = _x1 + ow * offset[idx][0] y1 = _y1 + oh * offset[idx][1] x2 = _x2 + ow * offset[idx][2] y2 = _y2 + oh * offset[idx][3] boxes.append([x1, y1, x2, y2, cls[idx][0]]) return utils.nms(np.array(boxes), 0.4)
def _onet_detect(self, inputs, boxes): convert_to_square(boxes) onet_box = Detector._pad(inputs, boxes, 48) cls, reg, marks = self.onet.predict(onet_box) keep = np.where(cls[:, 1] > self._o_pro) if keep[0].size == 0: return np.empty((0, 4)), np.empty((0, 1)), np.empty((0, 10)) scores = cls[:, 1][keep] marks = marks[keep] boxes = boxes[keep] marks = np.reshape(marks, (-1, 5, 2)) marks[:, :, 0] = marks[:, :, 0] * (boxes[:, 2:3] - boxes[:, 0:1]) + boxes[:, 0:1] marks[:, :, 1] = marks[:, :, 1] * (boxes[:, 3:4] - boxes[:, 1:2]) + boxes[:, 1:2] marks = np.int32(marks) boxes = Detector._get_reg_box(boxes, reg[keep]) keep = nms(boxes, scores, 0.7, 'min') return boxes[keep], scores[keep], marks[keep]
def __rnet_detect(self, image, pnet_boxes): _img_dataset = [] _pnet_boxes = utils.convert_to_square(pnet_boxes) for _box in _pnet_boxes: _x1 = int(_box[0]) _y1 = int(_box[1]) _x2 = int(_box[2]) _y2 = int(_box[3]) image = np.array(image) image = Image.fromarray(image) img = image.crop((_x1, _y1, _x2, _y2)) img = img.resize((24, 24)) img_data = self._image_transform(img) _img_dataset.append(img_data) img_dataset = torch.stack(_img_dataset) if self.isCuda: img_dataset = img_dataset.cuda() _cls, _offset = self.rnet(img_dataset) cls = _cls.cpu().data.numpy() offset = _offset.cpu().data.numpy() boxes = [] idxs, _ = np.where(cls > 0.6) # 0.7-0.8 for idx in idxs: _box = _pnet_boxes[idx] # 根据索引拿到裁剪框(预测框)的坐标,便于反算 _x1 = int(_box[0]) _y1 = int(_box[1]) _x2 = int(_box[2]) _y2 = int(_box[3]) ow = _x2 - _x1 oh = _y2 - _y1 x1 = _x1 + ow * offset[idx][0] y1 = _y1 + oh * offset[idx][1] x2 = _x2 + ow * offset[idx][2] y2 = _y2 + oh * offset[idx][3] # print(cls[idx][0]) # 拿到标量 boxes.append([x1, y1, x2, y2, cls[idx][0]]) return utils.nms(np.array(boxes), 0.5)
def __rnet_detect(self, image, pnet_boxes): # 创建空列表,存放抠图 _img_dataset = [] # 给p网络输出的框,找出中心点,沿着最大边长的两边扩充成“正方形”,再抠图 _pnet_boxes = utils.convert_to_square(pnet_boxes) # 遍历每个框,每个框返回框4个坐标点,抠图,放缩,数据类型转换,添加列表 for _box in _pnet_boxes: _x1 = int(_box[0]) _y1 = int(_box[1]) _x2 = int(_box[2]) _y2 = int(_box[3]) img = image.crop((_x1, _y1, _x2, _y2)) # 根据4个坐标点抠图 img = img.resize((24, 24)) # 放缩在固尺寸 img_data = self.data_for(img) # 将图片数组转成张量 _img_dataset.append(img_data) # stack堆叠(默认在0轴),此处相当数据类型转换,list chw —> nchw img_dataset = torch.stack(_img_dataset) if self.isCuda: img_dataset = img_dataset.cuda() # 通过r网络训练 _cond, _offset_face = self.rnet(img_dataset) cond = _cond.cpu().data.numpy() # 将gpu上的数据放到cpu上去,在转成numpy数组 offset_face = _offset_face.cpu().data.numpy() boxes = [] # 原置信度0.6是偏低的,时候很多框并没有用(可打印出来观察), # 可以适当调高些;idxs置信度框大于0.6的索引; # ★返回idxs:0轴上索引[0,1],_:1轴上索引[0,0],共同决定元素位置,见例子3 idxs, _ = np.where(cond > r_cls) # 得到框的数据 _box = _pnet_boxes[idxs] # 框的两个坐标点 _x1 = _box[:, 0].astype(np.int32) _y1 = _box[:, 1].astype(np.int32) _x2 = _box[:, 2].astype(np.int32) _y2 = _box[:, 3].astype(np.int32) # 框的宽和高 ow = _x2 - _x1 oh = _y2 - _y1 # 实际框的坐标点 x1 = _x1 + ow * offset_face[idxs][:, 0] # 实际框的坐标点 y1 = _y1 + oh * offset_face[idxs][:, 1] x2 = _x2 + ow * offset_face[idxs][:, 2] y2 = _y2 + oh * offset_face[idxs][:, 3] cls = cond[idxs][:, 0] boxes_ = np.array([x1, y1, x2, y2, cls], dtype=np.float64) boxes_aaaaaaa = np.swapaxes(boxes_, 1, 0) return utils.nms(np.array(boxes_aaaaaaa), r_nms) # 原r_nms为0.5(0.5要往小调),上面的0.6要往大调;小于0.5的框被保留下来
def __onet_detect(self, image, rnet_boxes): _img_dataset = [] # 创建列表,存放抠图r _rnet_boxes = utils.convert_to_square( rnet_boxes) # 给r网络输出的框,找出中心点,沿着最大边长的两边扩充成“正方形” for _box in _rnet_boxes: # 遍历R网络筛选出来的框,计算坐标,抠图,缩放,数据类型转换,添加列表,堆叠 _x1 = int(_box[0]) _y1 = int(_box[1]) _x2 = int(_box[2]) _y2 = int(_box[3]) img = image.crop((_x1, _y1, _x2, _y2)) # 根据坐标点“抠图” img = img.resize((48, 48)) img_data = self.data_for(img) # 将抠出的图转成张量 _img_dataset.append(img_data) img_dataset = torch.stack(_img_dataset) # 堆叠,此处相当数据格式转换,见例子2 if self.isCuda: img_dataset = img_dataset.cuda() _cls, _offset, _offset_facial = self.onet(img_dataset) cls = _cls.cpu().data.numpy() # (1, 1) offset = _offset.cpu().data.numpy() # (1, 4) boxes = [] # 存放o网络的计算结果 idxs, _ = np.where( cls > o_cls ) # 原o_cls为0.97是偏低的,最后要达到标准置信度要达到0.99999,这里可以写成0.99998,这样的话出来就全是人脸;留下置信度大于0.97的框;★返回idxs:0轴上索引[0],_:1轴上索引[0],共同决定元素位置,见例子3 for idx in idxs: # 根据索引,遍历符合条件的框;1轴上的索引,恰为符合条件的置信度索引(0轴上索引此处用不到) _box = _rnet_boxes[idx] # 以R网络做为基准框 _x1 = int(_box[0]) _y1 = int(_box[1]) _x2 = int(_box[2]) _y2 = int(_box[3]) ow = _x2 - _x1 # 框的基准宽,框是“方”的,ow=oh oh = _y2 - _y1 x1 = _x1 + ow * offset[idx][ 0] # O网络最终生成的框的坐标;生样,偏移量△δ=x1-_x1/w*side_len y1 = _y1 + oh * offset[idx][1] x2 = _x2 + ow * offset[idx][2] y2 = _y2 + oh * offset[idx][3] boxes.append([x1, y1, x2, y2, cls[idx][0]]) #返回4个坐标点和1个置信度 return utils.nms(np.array(boxes), o_nms, isMin=True) # 用最小面积的IOU;原o_nms(IOU)为小于0.7的框被保留下来
def __rnet_detect(self, image, pnet_boxes): _img_dataset = [] _pnet_boxes = utils.convert_to_square(pnet_boxes) pool = mp.Pool(5) for _box in _pnet_boxes: _img_dataset.append( pool.apply_async(self.__crop_box, args=( _box, image, ))) pool.close() pool.join() _img_dataset = [_img_data.get() for _img_data in _img_dataset] img_dataset = torch.stack(tuple(_img_dataset)) if self.isCuda: img_dataset = img_dataset.cuda() _cls, _offset = self.rnet(img_dataset) cls = _cls.cpu().data offset = _offset.cpu().data # idxs, _ = np.where(cls > 0.9) idxs = torch.nonzero(cls > 0.9)[:, 0] _boxs = torch.tensor(_pnet_boxes)[idxs] # _x1 = np.array(_boxs[:, 0], dtype=np.int32) # _y1 = np.array(_boxs[:, 1], dtype=np.int32) # _x2 = np.array(_boxs[:, 2], dtype=np.int32) # _y2 = np.array(_boxs[:, 3], dtype=np.int32) _x1 = _boxs[:, 0] _y1 = _boxs[:, 1] _x2 = _boxs[:, 2] _y2 = _boxs[:, 3] ow = _x2 - _x1 oh = _y2 - _y1 x1 = _x1 + ow * offset[idxs][:, 0] y1 = _y1 + oh * offset[idxs][:, 1] x2 = _x2 + ow * offset[idxs][:, 2] y2 = _y2 + oh * offset[idxs][:, 3] # boxes = np.stack((x1, y1, x2, y2, cls[idxs][:, 0]), axis=1) boxes = torch.stack((x1, y1, x2, y2, cls[idxs][:, 0]), dim=1) return utils.nms(np.array(boxes), 0.4)
def __onet_detect(self, image, rnet_boxes): _img_dataset = [] _rnet_boxes = utils.convert_to_square(rnet_boxes) for _box in _rnet_boxes: _x1 = int(_box[0]) _y1 = int(_box[1]) _x2 = int(_box[2]) _y2 = int(_box[3]) img = image.crop((_x1, _y1, _x2, _y2)) img = img.resize((48, 48)) img_data = self.__image_transform(img) # img_data = torch.Tensor(np.array(img) / 255. - 0.5) # img_data = img_data.permute(2, 0, 1) _img_dataset.append(img_data) img_dataset = torch.stack(_img_dataset) if self.isCuda: img_dataset = img_dataset.cuda() _cls, _offset = self.onet(img_dataset) cls = _cls.cpu().data offset = _offset.cpu().data # for full convolution cls = cls.view(-1, 1) offset = offset.view(-1, 4) idxs = torch.nonzero(cls > 0.9999)[:, 0] _boxs = torch.tensor(rnet_boxes)[idxs] _x1 = _boxs[:, 0] _y1 = _boxs[:, 1] _x2 = _boxs[:, 2] _y2 = _boxs[:, 3] ow = _x2 - _x1 oh = _y2 - _y1 x1 = _x1 + ow * offset[idxs][:, 0] y1 = _y1 + oh * offset[idxs][:, 1] x2 = _x2 + ow * offset[idxs][:, 2] y2 = _y2 + oh * offset[idxs][:, 3] boxes = torch.stack((x1, y1, x2, y2, cls[idxs][:, 0]), dim=1) # print(len(boxes)) return utils.nms(np.array(boxes), 0.2, isMin=True)
def __onet_detect(self, image, rnet_boxes): _img_dataset = [] # 创建列表,存放抠图r _rnet_boxes = utils.convert_to_square( rnet_boxes) # 给r网络输出的框,找出中心点,沿着最大边长的两边扩充成“正方形” for _box in _rnet_boxes: # 遍历R网络筛选出来的框,计算坐标,抠图,缩放,数据类型转换,添加列表,堆叠 _x1 = int(_box[0]) _y1 = int(_box[1]) _x2 = int(_box[2]) _y2 = int(_box[3]) img = image.crop((_x1, _y1, _x2, _y2)) # 根据坐标点“抠图” img = img.resize((48, 48)) img_data = self.data_for(img) # 将抠出的图转成张量 _img_dataset.append(img_data) img_dataset = torch.stack(_img_dataset) # 堆叠,此处相当数据格式转换,见例子2 if self.isCuda: img_dataset = img_dataset.cuda() _cls, _offset = self.onet(img_dataset) cls = _cls.cpu().data.numpy() # (1, 1) offset = _offset.cpu().data.numpy() # (1, 4) boxes = [] # 存放o网络的计算结果 idxs, _ = np.where( cls > o_cls ) # 原o_cls为0.97是偏低的,最后要达到标准置信度要达到0.99999,这里可以写成0.99998,这样的话出来就全是人脸;留下置信度大于0.97的框;★返回idxs:0轴上索引[0],_:1轴上索引[0],共同决定元素位置,见例子3 # 得到框的数据 _box = _rnet_boxes[idxs] # 框的两个坐标点 _x1 = _box[:, 0].astype(np.int32) _y1 = _box[:, 1].astype(np.int32) _x2 = _box[:, 2].astype(np.int32) _y2 = _box[:, 3].astype(np.int32) # 框的宽和高 ow = _x2 - _x1 oh = _y2 - _y1 # 实际框的坐标点 x1 = _x1 + ow * offset[idxs][:, 0] # 实际框的坐标点 y1 = _y1 + oh * offset[idxs][:, 1] x2 = _x2 + ow * offset[idxs][:, 2] y2 = _y2 + oh * offset[idxs][:, 3] cls = cls[idxs][:, 0] boxes_ = np.array([x1, y1, x2, y2, cls], dtype=np.float64) boxes_aaaaaaa = np.swapaxes(boxes_, 1, 0) return utils.nms(np.array(boxes_aaaaaaa), o_nms, isMin=True) # 用最小面积的IOU;原o_nms(IOU)为小于0.7的框被保留下来
def __onet_detect(self, image, rnet_boxes): _img_dataset = [] _rnet_boxes = utils.convert_to_square(rnet_boxes) for _box in _rnet_boxes: _x1 = int(_box[0]) _y1 = int(_box[1]) _x2 = int(_box[2]) _y2 = int(_box[3]) img = image.crop((_x1, _y1, _x2, _y2)) img = img.resize((48, 48)) img_data = self._image_transform(img) _img_dataset.append(img_data) img_dataset = torch.stack(_img_dataset) if self.isCuda: img_dataset = img_dataset.cuda() _cls, _offset = self.onet(img_dataset) cls = _cls.cpu().data.numpy() offset = _offset.cpu().data.numpy() boxes = [] idxs, _ = np.where(cls > 0.95) # 一般为0.99 for idx in idxs: _box = _rnet_boxes[idx] _x1 = int(_box[0]) _y1 = int(_box[1]) _x2 = int(_box[2]) _y2 = int(_box[3]) ow = _x2 - _x1 oh = _y2 - _y1 # 网络在输入裁剪图片的时候,坐标已经知道,无需反算预测框,直接反算预测框到真实框的位置即可。 x1 = _x1 + ow * offset[idx][0] y1 = _y1 + oh * offset[idx][1] x2 = _x2 + ow * offset[idx][2] y2 = _y2 + oh * offset[idx][3] boxes.append([x1, y1, x2, y2, cls[idx][0]]) return utils.nms(np.array(boxes), 0.7, isMin=True) # #阈值0.7。保留IOU小于0.7的框
def __rnet_detect(self, image, pnet_boxes): _img_dataset = [] _pnet_boxes = utils.convert_to_square(pnet_boxes) for _box in _pnet_boxes: _x1 = int(_box[0]) _y1 = int(_box[1]) _x2 = int(_box[2]) _y2 = int(_box[3]) img = image.crop((_x1, _y1, _x2, _y2)) img = img.resize((24, 24)) img_data = self.__image_transform(img) # img_data = torch.Tensor(np.array(img) / 255. - 0.5) # img_data = img_data.permute(2, 0, 1) _img_dataset.append(img_data) img_dataset = torch.stack(_img_dataset) if self.isCuda: img_dataset = img_dataset.cuda() _cls, _offset = self.rnet(img_dataset) cls = _cls.cpu().data.numpy() offset = _offset.cpu().data.numpy() boxes = [] idxs, _ = np.where(cls > 0.9) for idx in idxs: _box = _pnet_boxes[idx] _x1 = int(_box[0]) _y1 = int(_box[1]) _x2 = int(_box[2]) _y2 = int(_box[3]) ow = _x2 - _x1 oh = _y2 - _y1 x1 = _x1 + ow * offset[idx][0] y1 = _y1 + oh * offset[idx][1] x2 = _x2 + ow * offset[idx][2] y2 = _y2 + oh * offset[idx][3] boxes.append([x1, y1, x2, y2, cls[idx][0]]) return utils.nms(np.array(boxes), 0.4)
def __rnet_detect(self, image, pnet_boxes): _img_dataset = [] _pnet_boxes = utils.convert_to_square(pnet_boxes) # convert box to square for _box in _pnet_boxes: # for loop is slow _x1 = int(_box[0]) _y1 = int(_box[1]) _x2 = int(_box[2]) _y2 = int(_box[3]) img = image.crop((_x1, _y1, _x2, _y2)) # crop img = img.resize((24, 24)) # resize img_data = self.__image_transform(img) _img_dataset.append(img_data) img_dataset = torch.stack(_img_dataset) if self.isCuda: img_dataset = img_dataset.cuda() _cls, _offset, _ = self.rnet(img_dataset) cls = _cls.cpu().data.numpy() offset = _offset.cpu().data.numpy() idxs, _ = np.where(cls > r_cls) _box = _pnet_boxes[idxs] _x1 = _box[:,0].astype('int32') _y1 = _box[:,1].astype('int32') _x2 = _box[:,2].astype('int32') _y2 = _box[:,3].astype('int32') ow = _x2 - _x1 oh = _y2 - _y1 x1 = _x1 + ow * offset[idxs,0] y1 = _y1 + oh * offset[idxs,1] x2 = _x2 + ow * offset[idxs,2] y2 = _y2 + oh * offset[idxs,3] boxes = np.stack((x1,y1,x2,y2,cls[idxs].T[0])).T return utils.nms(np.array(boxes), r_nms)
def __detect_rnet(self, image, pnet_boxes): _img_dataset = [] _pnet_boxes = convert_to_square(pnet_boxes, image) for _box in _pnet_boxes: _x1 = int(_box[0]) _y1 = int(_box[1]) _x2 = int(_box[2]) _y2 = int(_box[3]) img = image.crop([_x1, _y1, _x2, _y2]) img = img.resize((24, 24)) img_data = self.__image_transform(img).to(self.device) _img_dataset.append(img_data) img_dataset = torch.stack(_img_dataset) _cls, _offset, _landmark = self.rnet(img_dataset) cls = _cls.cpu().data.numpy() offset = _offset.cpu().data.numpy() landmark = _landmark.cpu().data.numpy() index, _ = np.where(cls > 0.9) if len(index) == 0: return np.array([]) boxes_ = pnet_boxes[index] cls = cls[index].reshape(-1, 1) offset = offset[index] landmark = landmark[index] wh = np.array([boxes_[:, i + 3] - boxes_[:, i + 1] for i in range(2)]).transpose(1, 0) print(np.tile(wh, (1, 2))) center = np.array([0.5 * (boxes_[:, i + 1] + boxes_[:, i + 3]) for i in range(2)]).transpose(1, 0) print(np.tile(center, (1, 2))) offset_boxes = offset * np.tile(wh, (1, 2)) + np.tile(center, (1, 2)) landmark_boxes = landmark * np.tile(wh, (1, 5)) + np.tile(center, (1, 5)) boxes = np.concatenate([cls, offset_boxes, landmark_boxes], axis=1) return nms(boxes, 0.6)
def save_hard_example(net, data,save_path): # load ground truth from annotation file # format of each line: image/path [x1,y1,x2,y2] for each gt_box in this image im_idx_list = data['images'] # print(images[0]) gt_boxes_list = data['bboxes'] num_of_images = len(im_idx_list) print("processing %d images in total" % num_of_images) # save files neg_label_file = "../DATA/no_LM%d/neg_%d.txt" % (net, image_size) neg_file = open(neg_label_file, 'w+') pos_label_file = "../DATA/no_LM%d/pos_%d.txt" % (net, image_size) pos_file = open(pos_label_file, 'w+') part_label_file = "../DATA/no_LM%d/part_%d.txt" % (net, image_size) if not os.path.exists(part_label_file): os.mkdir(part_label_file) part_file = open(part_label_file, 'w+') #read detect result # test=os.path.join(save_path, 'detections.pkl') # if not os.path.exists(save_path): # os.mkdir(save_path) # print(save_path) # print(os.path.join(save_path, 'detections.pkl')) # with open(os.path.join(save_path, 'detections.pkl'), 'w+b') as f: # print("after opening it") # det_boxes = pickle.load(f) # f.close() det_boxes = pickle.load(open(os.path.join(save_path, 'detections.pkl'), 'rb')) # print(len(det_boxes), num_of_images) print(len(det_boxes)) print(num_of_images) assert len(det_boxes) == num_of_images, "incorrect detections or ground truths" # index of neg, pos and part face, used as their image names n_idx = 0 p_idx = 0 d_idx = 0 image_done = 0 #im_idx_list image index(list) #det_boxes detect result(list) #gt_boxes_list gt(list) for im_idx, dets, gts in zip(im_idx_list, det_boxes, gt_boxes_list): gts = np.array(gts, dtype=np.float32).reshape(-1, 4) if image_done % 100 == 0: print("%d images done" % image_done) image_done += 1 if dets.shape[0] == 0: continue img = cv2.imread(im_idx) #change to square dets = convert_to_square(dets) dets[:, 0:4] = np.round(dets[:, 0:4]) neg_num = 0 for box in dets: x_left, y_top, x_right, y_bottom, _ = box.astype(int) width = x_right - x_left + 1 height = y_bottom - y_top + 1 # ignore box that is too small or beyond image border if width < 20 or x_left < 0 or y_top < 0 or x_right > img.shape[1] - 1 or y_bottom > img.shape[0] - 1: continue # compute intersection over union(IoU) between current box and all gt boxes Iou = IoU(box, gts) cropped_im = img[y_top:y_bottom + 1, x_left:x_right + 1, :] resized_im = cv2.resize(cropped_im, (image_size, image_size), interpolation=cv2.INTER_LINEAR) # save negative images and write label # Iou with all gts must below 0.3 if np.max(Iou) < 0.3 and neg_num < 60: #save the examples save_file = get_path(neg_dir, "%s.jpg" % n_idx) # print(save_file) neg_file.write(save_file + ' 0\n') cv2.imwrite(save_file, resized_im) n_idx += 1 neg_num += 1 else: # find gt_box with the highest iou idx = np.argmax(Iou) assigned_gt = gts[idx] x1, y1, x2, y2 = assigned_gt # compute bbox reg label offset_x1 = (x1 - x_left) / float(width) offset_y1 = (y1 - y_top) / float(height) offset_x2 = (x2 - x_right) / float(width) offset_y2 = (y2 - y_bottom) / float(height) # save positive and part-face images and write labels if np.max(Iou) >= 0.65: save_file = get_path(pos_dir, "%s.jpg" % p_idx) pos_file.write(save_file + ' 1 %.2f %.2f %.2f %.2f\n' % ( offset_x1, offset_y1, offset_x2, offset_y2)) cv2.imwrite(save_file, resized_im) p_idx += 1 elif np.max(Iou) >= 0.4: save_file = os.path.join(part_dir, "%s.jpg" % d_idx) part_file.write(save_file + ' -1 %.2f %.2f %.2f %.2f\n' % ( offset_x1, offset_y1, offset_x2, offset_y2)) cv2.imwrite(save_file, resized_im) d_idx += 1 neg_file.close() part_file.close() pos_file.close()
def inference(): sess = tf.Session() with gfile.FastGFile(os.path.dirname(__file__) + '/model.pb', 'rb') as f: graph_def = tf.GraphDef() graph_def.ParseFromString(f.read()) sess.graph.as_default() tf.import_graph_def(graph_def, name='') # ops = tf.get_default_graph().get_operations() # print(ops) sess.run(tf.global_variables_initializer()) input_image = sess.graph.get_tensor_by_name('input_image:0') landmark = sess.graph.get_tensor_by_name('ONet/landmark_fc/BiasAdd:0') data_file = "/mnt/data/changshuang/data/flickr/" anno_file = "/mnt/data/changshuang/data/aflw_anno.txt" # data: {'images': images, 'bboxes': bboxes, 'landmarks':landmarks} data = read_annotation(data_file, anno_file) img_data = list(zip(data["images"], data["bboxes"], data["landmarks"])) for img_path, img_bbox, img_landmarks in img_data: img = cv.imread(img_path) bbox = np.array(img_bbox) dets = convert_to_square(bbox) dets[:, 0:4] = np.round(dets[:, 0:4]) h, w, c = img.shape [dy, edy, dx, edx, y, ey, x, ex, tmpw, tmph] = pad(dets, w, h) num_boxes = dets.shape[0] cropped_ims = np.zeros((num_boxes, 48, 48, 3), dtype=np.float32) for i in range(num_boxes): # 17 tmp = np.zeros((tmph[i], tmpw[i], 3), dtype=np.uint8) tmp[dy[i]:edy[i] + 1, dx[i]:edx[i] + 1, :] = img[y[i]:ey[i] + 1, x[i]:ex[i] + 1, :] cropped_ims[i, :, :, :] = (cv.resize(tmp, (48, 48)) - 127.5) / 128 t1 = time() # batch_size = 16 # minibatch = [] # cur = 0 # n = cropped_ims.shape[0] # while cur < n: # minibatch.append(cropped_ims[cur: min(cur + batch_size, n), :, :, :]) # cur += batch_size # landmark_pred_list = [] # for data in minibatch: # m = data.shape[0] # real_size = batch_size # # 最后一组数据不够一个batch的处理,m size as a batch # if m < batch_size: # keep_inds = np.arange(m) # m = 5 keep_inds = [0,1,2,3,4] # gap = batch_size - m # batch_size = 7, gap = 2 # while gap >= len(keep_inds): # gap -= len(keep_inds) # -3 # keep_inds = np.concatenate((keep_inds, keep_inds)) # if gap != 0: # keep_inds = np.concatenate((keep_inds, keep_inds[:gap])) # data = data[keep_inds] # real_size = m # pre_landmarks = sess.run(landmark, feed_dict={input_image: data}) # landmark_pred_list.append(pre_landmarks[:real_size]) # if len(landmark_pred_list) == 0: # continue # else: # pre_landmarks = np.concatenate(landmark_pred_list, axis=0) pre_landmarks = sess.run(landmark, feed_dict={input_image: cropped_ims}) print(time() - t1) w = bbox[:, 2] - bbox[:, 0] + 1 h = bbox[:, 3] - bbox[:, 1] + 1 pre_landmarks[:, 0::2] = (np.tile(w, (5, 1)) * pre_landmarks[:, 0::2].T + np.tile(bbox[:, 0], (5, 1)) - 1).T pre_landmarks[:, 1::2] = (np.tile(h, (5, 1)) * pre_landmarks[:, 1::2].T + np.tile(bbox[:, 1], (5, 1)) - 1).T for i in range(bbox.shape[0]): box_gt = bbox[i, :4] corpbbox_gt = [int(box_gt[0]), int(box_gt[1]), int(box_gt[2]), int(box_gt[3])] # 画人脸框 cv.rectangle(img, (corpbbox_gt[0], corpbbox_gt[1]), (corpbbox_gt[2], corpbbox_gt[3]), (0, 225, 255), 2) # 画关键点 for i in range(pre_landmarks.shape[0]): for j in range(len(pre_landmarks[i]) // 2): cv.circle(img, (int(pre_landmarks[i][2 * j]), int(int(pre_landmarks[i][2 * j + 1]))), 3, (0, 0, 255), -1) cv.circle(img, (int(img_landmarks[i][2 * j]), int(int(img_landmarks[i][2 * j + 1]))), 3, (0, 255, 255), -1) cv.imshow('show image', img) k = cv.waitKey(0) & 0xFF if k == ord('q'): break cv.destroyAllWindows()
def gen_onet_data(data_dir, anno_file, p_model_path, r_model_path, prefix=''): ''' :param data_dir: train dataset dir :param anno_file: annotation file :param pnet_model_file: pnet model file :param rnet_model_file: rnet model file :param prefix_path: origin image root dir :return: ''' neg_save_dir = os.path.join(data_dir, '48_train/negative') pos_save_dir = os.path.join(data_dir, '48_train/positive') part_save_dir = os.path.join(data_dir, '48_train/part') neg_save_dir_val = os.path.join(data_dir, '48_val/negative') pos_save_dir_val = os.path.join(data_dir, '48_val/positive') part_save_dir_val = os.path.join(data_dir, '48_val/part') neg_save_dir_test = os.path.join(data_dir, '48_test/negative') pos_save_dir_test = os.path.join(data_dir, '48_test/positive') part_save_dir_test = os.path.join(data_dir, '48_test/part') per_train = 0.7 per_val = 0.2 per_test = 0.1 image_size = 48 for dir_path in [ neg_save_dir, pos_save_dir, part_save_dir, neg_save_dir_val, pos_save_dir_val, part_save_dir_val, neg_save_dir_test, pos_save_dir_test, part_save_dir_test ]: if not os.path.exists(dir_path): os.mkdir(dir_path) post_save_file = os.path.join(config.ANNO_STORE_DIR, config.ONET_POSITIVE_ANNO_FILENAME) neg_save_file = os.path.join(config.ANNO_STORE_DIR, config.ONET_NEGATIVE_ANNO_FILENAME) part_save_file = os.path.join(config.ANNO_STORE_DIR, config.ONET_PART_ANNO_FILENAME) post_save_test_file = os.path.join(config.ANNO_STORE_DIR, config.ONET_POSITIVE_TEST_ANNO_FILENAME) neg_save_test_file = os.path.join(config.ANNO_STORE_DIR, config.ONET_NEGATIVE_TEST_ANNO_FILENAME) part_save_test_file = os.path.join(config.ANNO_STORE_DIR, config.ONET_PART_TEST_ANNO_FILENAME) post_save_val_file = os.path.join(config.ANNO_STORE_DIR, config.ONET_POSITIVE_VALID_ANNO_FILENAME) neg_save_val_file = os.path.join(config.ANNO_STORE_DIR, config.ONET_NEGATIVE_VALID_ANNO_FILENAME) part_save_val_file = os.path.join(config.ANNO_STORE_DIR, config.ONET_PART_VALID_ANNO_FILENAME) f1 = open(post_save_file, 'w') f2 = open(neg_save_file, 'w') f3 = open(part_save_file, 'w') f1_test = open(post_save_test_file, 'w') f2_test = open(neg_save_test_file, 'w') f3_test = open(part_save_test_file, 'w') f1_val = open(post_save_val_file, 'w') f2_val = open(neg_save_val_file, 'w') f3_val = open(part_save_val_file, 'w') with open(anno_file, 'r') as f: annotations = f.readlines() random.shuffle(annotations) num = len(annotations) pnet, rnet = creat_prnet(p_model_path, r_model_path, 'cuda: 1') prnetDetector = PRnetDetector(pnet=pnet, rnet=rnet, min_face_size=12) p_idx = 0 n_idx = 0 d_idx = 0 image_done = 0 all_boxes = list() for annotation in annotations[:10000]: try: annotation = annotation.strip().split(' ') path = os.path.join(prefix, annotation[0]) bbox = list(map(float, annotation[1:]) ) # generate boxes randomly to get negtive images boxes = np.array(bbox, dtype=np.float32).reshape(-1, 4) per = random.randint(0, 10000) img = cv2.imread(path) b, boxes_align = prnetDetector.detect_face(img) if isinstance(boxes_align, tuple): continue if boxes_align is None: continue if boxes_align.shape[0] == 0: continue all_boxes.append(boxes_align) if image_done % 100 == 0: print("%d images done" % image_done) image_done += 1 dets = convert_to_square(boxes_align) dets[:, 0:4] = np.round(dets[:, 0:4]) for box in dets: x_left, y_top, x_right, y_bottom = box[0:4].astype(int) width = x_right - x_left + 1 height = y_bottom - y_top + 1 if width < 20 or x_left < 0 or y_top < 0 or x_right > img.shape[ 1] - 1 or y_bottom > img.shape[0] - 1: continue Iou = IoU(box, boxes) cropped_im = img[y_top:y_bottom + 1, x_left:x_right + 1, :] resized_im = cv2.resize(cropped_im, (image_size, image_size), interpolation=cv2.INTER_LINEAR) # save negative images and write label if np.max(Iou) < 0.3: # Iou with all gts must below 0.3 if per < 1000: save_file = os.path.join(neg_save_dir_test, "%s.jpg" % n_idx) f2_test.write(save_file + ' 0\n') cv2.imwrite(save_file, resized_im) elif per < 3000: save_file = os.path.join(neg_save_dir_val, "%s.jpg" % n_idx) f2_val.write(save_file + ' 0\n') cv2.imwrite(save_file, resized_im) else: save_file = os.path.join(neg_save_dir, "%s.jpg" % n_idx) f2.write(save_file + ' 0\n') cv2.imwrite(save_file, resized_im) n_idx += 1 else: # find gt_box with the highest iou idx = np.argmax(Iou) assigned_gt = boxes[idx] x1, y1, x2, y2 = assigned_gt # compute bbox reg label offset_x1 = (x1 - x_left) / float(width) offset_y1 = (y1 - y_top) / float(height) offset_x2 = (x2 - x_right) / float(width) offset_y2 = (y2 - y_bottom) / float(height) # save positive and part-face images and write labels if np.max(Iou) >= 0.65: if per < 1000: save_file = os.path.join(pos_save_dir_test, "%s.jpg" % p_idx) f1_test.write( save_file + ' 1 %.2f %.2f %.2f %.2f\n' % (offset_x1, offset_y1, offset_x2, offset_y2)) cv2.imwrite(save_file, resized_im) elif per < 3000: save_file = os.path.join(pos_save_dir_val, "%s.jpg" % p_idx) f1_val.write( save_file + ' 1 %.2f %.2f %.2f %.2f\n' % (offset_x1, offset_y1, offset_x2, offset_y2)) cv2.imwrite(save_file, resized_im) else: save_file = os.path.join(pos_save_dir, "%s.jpg" % p_idx) f1.write( save_file + ' 1 %.2f %.2f %.2f %.2f\n' % (offset_x1, offset_y1, offset_x2, offset_y2)) cv2.imwrite(save_file, resized_im) p_idx += 1 elif np.max(Iou) >= 0.4: if per < 1000: save_file = os.path.join(part_save_dir_test, "%s.jpg" % p_idx) f3_test.write( save_file + ' -1 %.2f %.2f %.2f %.2f\n' % (offset_x1, offset_y1, offset_x2, offset_y2)) cv2.imwrite(save_file, resized_im) elif per < 3000: save_file = os.path.join(part_save_dir_val, "%s.jpg" % p_idx) f3_val.write( save_file + ' -1 %.2f %.2f %.2f %.2f\n' % (offset_x1, offset_y1, offset_x2, offset_y2)) cv2.imwrite(save_file, resized_im) else: save_file = os.path.join(part_save_dir, "%s.jpg" % p_idx) f3.write( save_file + ' -1 %.2f %.2f %.2f %.2f\n' % (offset_x1, offset_y1, offset_x2, offset_y2)) cv2.imwrite(save_file, resized_im) d_idx += 1 except RuntimeError as e: if 'out of memory' in str(e): print('| WARNING: ran out of memory') if hasattr(torch.cuda, 'empty_cache'): torch.cuda.empty_cache() else: raise e f1.close() f2.close() f3.close() f1_val.close() f2_val.close() f3_val.close() f1_test.close() f2_test.close() f3_test.close()
def save_hard_example(target, data, save_path): # load ground truth from annotation file # format of each line: image/path [x1,y1,x2,y2] for each gt_box in this image image_size = cfg.resize[target] im_idx_list = data['images'] gt_boxes_list = data['bboxes'] num_of_images = len(im_idx_list) neg_file = open(join(cfg.path_output_txt, '%s_neg.txt' % target), 'w') pos_file = open(join(cfg.path_output_txt, '%s_pos.txt' % target), 'w') part_file = open(join(cfg.path_output_txt, '%s_part.txt' % target), 'w') dirs = ['neg', 'part', 'pos'] dirs = [join(cfg.path_output_files, '%s_%s' % (target, d)) for d in dirs] for d in dirs: if not os.path.exists(d): os.makedirs(d) det_boxes = pickle.load(open(save_path, 'rb')) assert len( det_boxes) == num_of_images, "incorrect detections or ground truths" # index of neg, pos and part face, used as their image names n_idx = 0 p_idx = 0 d_idx = 0 # image_done = 0 #im_idx_list image index(list) #det_boxes detect result(list) #gt_boxes_list gt(list) for im_idx, dets, gts in tqdm(zip(im_idx_list, det_boxes, gt_boxes_list), total=num_of_images): gts = np.array(gts, dtype=np.float32).reshape(-1, 4) if dets.shape[0] == 0: continue img = cv2.imread(im_idx) #change to square dets = convert_to_square(dets) dets[:, 0:4] = np.round(dets[:, 0:4]) neg_num = 0 for box in dets: x_left, y_top, x_right, y_bottom, _ = box.astype(int) width = x_right - x_left + 1 height = y_bottom - y_top + 1 # ignore box that is too small or beyond image border if width < 20 or x_left < 0 or y_top < 0 or x_right > img.shape[ 1] - 1 or y_bottom > img.shape[0] - 1: continue # compute intersection over union(IoU) between current box and all gt boxes Iou = IoU(box, gts) cropped_im = img[y_top:y_bottom + 1, x_left:x_right + 1, :] resized_im = cv2.resize(cropped_im, (image_size, image_size), interpolation=cv2.INTER_LINEAR) # save negative images and write label # Iou with all gts must below 0.3 if np.max(Iou) < 0.3 and neg_num < 60: content = '0\n' SaveTxt(neg_file, target, n_idx, content, 'neg') SaveImg(resized_im, target, n_idx, 'neg') n_idx += 1 neg_num += 1 else: # find gt_box with the highest iou idx = np.argmax(Iou) assigned_gt = gts[idx] x1, y1, x2, y2 = assigned_gt # compute bbox reg label offset_x1 = (x1 - x_left) / float(width) offset_y1 = (y1 - y_top) / float(height) offset_x2 = (x2 - x_right) / float(width) offset_y2 = (y2 - y_bottom) / float(height) # save positive and part-face images and write labels if np.max(Iou) >= 0.65: content = '1 %.2f %.2f %.2f %.2f\n' % ( offset_x1, offset_y1, offset_x2, offset_y2) SaveTxt(pos_file, target, p_idx, content, 'pos') SaveImg(resized_im, target, p_idx, 'pos') p_idx += 1 elif np.max(Iou) >= 0.4: content = '-1 %.2f %.2f %.2f %.2f\n' % ( offset_x1, offset_y1, offset_x2, offset_y2) SaveTxt(part_file, target, d_idx, content, 'part') SaveImg(resized_im, target, d_idx, 'part') d_idx += 1 neg_file.close() part_file.close() pos_file.close()
def __onet_detect(self, image, rnet_boxes): '' '创建列表,存放抠图' _img_dataset = [] '给r网络输出的框,找出中心点,沿着最大边长的两加边扩充成“正方形”' _rnet_boxes = utils.convert_to_square(rnet_boxes) '''遍历R网络筛选出来的框,计算坐标,抠图,缩放,数据类型转换,添加列表,堆叠''' for _box in _rnet_boxes: _x1 = int(_box[0]) _y1 = int(_box[1]) _x2 = int(_box[2]) _y2 = int(_box[3]) '根据坐标点“抠图”' img = image.crop((_x1, _y1, _x2, _y2)) img = img.resize((48, 48)) '将抠出的图转成张量' img_data = self.__image_transform(img) _img_dataset.append(img_data) '堆叠,此处相当于数据格式 转换' img_dataset = torch.stack(_img_dataset) if self.isCuda: img_dataset = img_dataset.cuda() _cls, _offset = self.onet(img_dataset) cls = _cls.cpu().data.numpy() offset = _offset.cpu().data.numpy() '存放O网络的计算结果' boxes = [] '''原o_cls为0.97是偏低的,最后要达到标准置信度要达到0.99999,这里可以写成0.99998,这样的话出来就全是人脸, 留下置 信度大于0.97的框。 返回idx:0轴上索引[0], _:1轴上索引[0],共同决定元素位置。''' idxs, _ = np.where(cls > o_cls) '根据索引,遍历符合条件的框,1轴上的索引,恰为符合条件的置信度索引(0轴上索引此处用不到)' for idx in idxs: '以R网络做为基准框' _box = _rnet_boxes[idx] _x1 = int(_box[0]) _y1 = int(_box[1]) _x2 = int(_box[2]) _y2 = int(_box[3]) '框的基准宽,框是“方”的,ow = oh' ow = _x2 - _x1 oh = _y2 - _y1 'O网络最终生成的框的坐标,生样,偏移量△δ=x1-_x1/w*side_len' x1 = _x1 + ow * offset[idx][0] y1 = _y1 + oh * offset[idx][1] x2 = _x2 + ow * offset[idx][2] y2 = _y2 + oh * offset[idx][3] '返回4个坐标点和1个置信度' boxes.append([x1, y1, x2, y2, cls[idx][0]]) '用最小面积的IOU,原o_nms(IOU)为小于0.7的框被保留下来' return utils.nms(np.array(boxes), o_nms, isMin=True)
def __rnet_detect(self, image, pnet_boxes): '' '创建空列表,存放抠图' _img_dataset = [] '给P网络输出的框,找出中心点,沿着最大边长的两边扩充成“正方形”,再抠图' _pnet_boxes = utils.convert_to_square(pnet_boxes) '遍历每个框,每个框返回4个坐标点,抠图,放缩,数据类型转换,添加列表' for _box in _pnet_boxes: _x1 = int(_box[0]) _y1 = int(_box[1]) _x2 = int(_box[2]) _y2 = int(_box[3]) '根据4个坐标点枢图' img = image.crop((_x1, _y1, _x2, _y2)) '放缩在固定尺寸' img = img.resize((24, 24)) '将图片数组转成张量' img_data = self.__image_transform(img) _img_dataset.append(img_data) 'stack堆叠(默认在0轴),此处相当数据类型转换,' img_dataset = torch.stack(_img_dataset) if self.isCuda: img_dataset = img_dataset.cuda() '将27*24的图片传入到网络再进行一次筛选' _cls, _offset = self.rnet(img_dataset) '将gpu上的数据放到cpu上去,在转成numpy数组' cls = _cls.cpu().data.numpy() offset = _offset.cpu().data.numpy() 'R网络要留下来的框,存到boxes中' boxes = [] '''原置信度0.6是偏低的时候很多框并没有用(可打印出来观察),可以适当调高些, idxs置信度框大于0.6的索引;返回idx:0轴上索引[0, 1], _:1轴上索引[0,0],共同决定元素位置''' idxs, _ = np.where(cls > r_cls) '根据索引,遍历符合条件的框,1轴上的索引,恰为符合条件的置信度索引(0轴上索引此处用不到)' for idx in idxs: _box = _pnet_boxes[idx] _x1 = int(_box[0]) _y1 = int(_box[1]) _x2 = int(_box[2]) _y2 = int(_box[3]) '基准框 的高' ow = _x2 - _x1 oh = _y2 - _y1 '实际框的坐标点' x1 = _x1 + ow * offset[idx][0] y1 = _y1 + oh * offset[idx][1] x2 = _x2 + ow * offset[idx][2] y2 = _y2 + oh * offset[idx][3] '返回4个点坐标点和置信度' boxes.append([x1, y1, x2, y2, cls[idx][0]]) '原r_nms为0.5(0.5要往小调),上面的0.6要往大调,小于0.5的框被保留下来' return utils.nms(np.array(boxes), r_nms)
n_idx = 0 p_idx = 0 d_idx = 0 counter = 0 for image, boxes in tqdm(zip(im_idx_list, gt_boxes_list)): #TODO: dtype = np.int, origin is np.float32 boxes = np.array(boxes, dtype=np.int).reshape(-1, 4) # print(image) img = cv2.imread(image) dets, _ = mtcnn_detector.detect(img) if dets.shape[0] == 0: continue # if no boxes generate ,continue dets = convert_to_square(dets) dets = dets.astype(np.int) neg_num = 0 pos_num = 0 part_num = 0 for det in dets: x1, y1, x2, y2, _ = det wd = x2 - x1 + 1 ht = y2 - y1 + 1 if wd < 24 or ht < 24 or x1 < 0 or y1 < 0 or\ x2 > img.shape[0] or y2 > img.shape[0]: continue
def __rnet_detect(self, image, pnet_boxes): # 创建空列表,存放抠图 _img_dataset = [] # 给p网络输出的框,找出中心点,沿着最大边长的两边扩充成“正方形”,再抠图 _pnet_boxes = utils.convert_to_square(pnet_boxes) # 遍历每个框,每个框返回框4个坐标点,抠图,放缩,数据类型转换,添加列表 for _box in _pnet_boxes: _x1 = int(_box[0]) _y1 = int(_box[1]) _x2 = int(_box[2]) _y2 = int(_box[3]) img = image.crop((_x1, _y1, _x2, _y2)) # 根据4个坐标点抠图 img = img.resize((24, 24)) # 放缩在固尺寸 img_data = self.data_for(img) # 将图片数组转成张量 _img_dataset.append(img_data) # stack堆叠(默认在0轴),此处相当数据类型转换,list chw —> nchw img_dataset = torch.stack(_img_dataset) if self.isCuda: img_dataset = img_dataset.cuda() # 通过r网络训练 _cond, _offset_face, _offset_facial = self.rnet(img_dataset) cond = _cond.cpu().data.numpy() # 将gpu上的数据放到cpu上去,在转成numpy数组 offset_face = _offset_face.cpu().data.numpy() offset_facial = _offset_facial.cpu().data.numpy() boxes = [] # 原置信度0.6是偏低的,时候很多框并没有用(可打印出来观察), # 可以适当调高些;idxs置信度框大于0.6的索引; # ★返回idxs:0轴上索引[0,1],_:1轴上索引[0,0],共同决定元素位置,见例子3 idxs, _ = np.where(cond > r_cls) # # 得到框的数据 # _box = _pnet_boxes[idxs] # # 框的两个坐标点 # _x1 = _box[:, 0].astype(np.int32) # _y1 = _box[:, 1].astype(np.int32) # _x2 = _box[:, 2].astype(np.int32) # _y2 = _box[:, 3].astype(np.int32) # # 框的宽和高 # ow = _x2 - _x1 # oh = _y2 - _y1 # # 实际框的坐标点 # x1 = _x1 + ow * offset_face[idxs][:, 0] # 实际框的坐标点 # y1 = _y1 + oh * offset_face[idxs][:, 1] # x2 = _x2 + ow * offset_face[idxs][:, 2] # y2 = _y2 + oh * offset_face[idxs][:, 3] # cls = cond[idxs][:, 0] # boxes_ = np.array([x1, y1, x2, y2, cls], dtype=np.float64) # boxes_aaaaaaa = np.swapaxes(boxes_, 1, 0) print(idxs, _) for idx in idxs: # 根据索引,遍历符合条件的框;1轴上的索引,恰为符合条件的置信度索引(0轴上索引此处用不到) _box = _pnet_boxes[idx] _x1 = int(_box[0]) _y1 = int(_box[1]) _x2 = int(_box[2]) _y2 = int(_box[3]) ow = _x2 - _x1 # 基准框的宽 oh = _y2 - _y1 x1 = _x1 + ow * offset_face[idx][0] # 实际框的坐标点 y1 = _y1 + oh * offset_face[idx][1] x2 = _x2 + ow * offset_face[idx][2] y2 = _y2 + oh * offset_face[idx][3] # efteye_x = _x1 + offset_facial[0] # 左眼坐标点x # lefteye_y = _y1 + offset_facial[1] # 左眼坐标点y # righteye_x = _x1 + offset_facial[2] # 右眼坐标点x # righteye_y = _y1 + offset_facial[3] # 右眼坐标点y # nose_x = _x1 + offset_facial[4] # 鼻子坐标点x # nose_y = _y1 + offset_facial[5] # 鼻子坐标点y # leftmouth_x = _x1 + offset_facial[6] # 左嘴角坐标点x # leftmouth_y = _y1 + offset_facial[7] # 左嘴角坐标点y # rightmouth_x = _x1 + offset_facial[8] # 右嘴角坐标点x # rightmouth_y = _y1 + offset_facial[9] # 右嘴角坐标点y boxes.append([x1, y1, x2, y2, cond[idx][0]]) # 返回4个坐标点和置信度 return utils.nms(np.array(boxes), r_nms) # 原r_nms为0.5(0.5要往小调),上面的0.6要往大调;小于0.5的框被保留下来
def save_hard_example(save_size, data, neg_dir, pos_dir, part_dir, detectors): '''将网络识别的box用来裁剪原图像作为下一个网络的输入''' im_idx_list = data['images'] gt_boxes_list = data['bboxes'] num_of_images = len(im_idx_list) # save files neg_label_file = "data/%d/neg_%d.txt" % (save_size, save_size) neg_file = open(neg_label_file, 'w') pos_label_file = "data/%d/pos_%d.txt" % (save_size, save_size) pos_file = open(pos_label_file, 'w') part_label_file = "data/%d/part_%d.txt" % (save_size, save_size) part_file = open(part_label_file, 'w') #read detect result det_boxes = detectors # print(len(det_boxes), num_of_images) assert len(det_boxes) == num_of_images, "bboxes length equals not images" n_idx = 0 p_idx = 0 d_idx = 0 image_done = 0 for im_idx, dets, gts in tqdm(zip(im_idx_list, det_boxes, gt_boxes_list)): gts = np.array(gts, dtype=np.float32).reshape(-1, 4) image_done += 1 if dets.shape[0] == 0: continue img = cv2.imread(im_idx) #转换成正方形 dets = convert_to_square(dets) dets[:, 0:4] = np.round(dets[:, 0:4]) neg_num = 0 for box in dets: x_left, y_top, x_right, y_bottom, _ = box.astype(int) width = x_right - x_left + 1 height = y_bottom - y_top + 1 # 除去过小的 if width < 24 or x_left < 0 or y_top < 0 or x_right > img.shape[1] - 1 or y_bottom > img.shape[0] - 1: continue Iou = iou(box, gts) cropped_im = img[y_top:y_bottom + 1, x_left:x_right + 1, :] resized_im = cv2.resize(cropped_im, (save_size, save_size), interpolation=cv2.INTER_LINEAR) #划分种类 if np.max(Iou) < 0.3 and neg_num < 60: save_file = os.path.join(neg_dir, "%s.jpg" % n_idx) neg_file.write(save_file + ' 0\n') cv2.imwrite(save_file, resized_im) n_idx += 1 neg_num += 1 else: idx = np.argmax(Iou) assigned_gt = gts[idx] x1, y1, x2, y2 = assigned_gt #偏移量 offset_x1 = (x1 - x_left) / float(width) offset_y1 = (y1 - y_top) / float(height) offset_x2 = (x2 - x_right) / float(width) offset_y2 = (y2 - y_bottom) / float(height) # pos和part if np.max(Iou) >= 0.65: save_file = os.path.join(pos_dir, "%s.jpg" % p_idx) pos_file.write(save_file + ' 1 %.2f %.2f %.2f %.2f\n' % ( offset_x1, offset_y1, offset_x2, offset_y2)) cv2.imwrite(save_file, resized_im) p_idx += 1 elif np.max(Iou) >= 0.4: save_file = os.path.join(part_dir, "%s.jpg" % d_idx) part_file.write(save_file + ' -1 %.2f %.2f %.2f %.2f\n' % ( offset_x1, offset_y1, offset_x2, offset_y2)) cv2.imwrite(save_file, resized_im) d_idx += 1 neg_file.close() part_file.close() pos_file.close()
w,h = image.size if w > h : return image.rotate(-90,expand = True) if __name__ == '__main__': image_path = r"test_images" output_path = r"output" for i in os.listdir(image_path): detector = Detector() with Image.open(os.path.join(image_path,i)) as im: print(i) print("----------------------------") im.load() im = rotate(im) boxes = detector.detect(im) boxes = utils.convert_to_square(boxes) print("size:",im.size) imDraw = ImageDraw.Draw(im) cx = im.size[0]/2 cy = im.size[1]/2 for box in boxes: x1 = int(box[0]) y1 = int(box[1]) x2 = int(box[2]) y2 = int(box[3]) le_x = int(box[5]) le_y = int(box[6]) re_x = int(box[7]) re_y = int(box[8]) n_x = int(box[9]) n_y = int(box[10])