def rnet_boxes(img, rnet, bounding_boxes, thresholds=THRESHOLDS, nms_thresholds=NMS_THRESHOLDS, show_boxes=True): rnet.eval() img_boxes = get_image_boxes(bounding_boxes, img, size=24) img_boxes = torch.FloatTensor(img_boxes) img_boxes = img_boxes.to( torch.device('cuda' if torch.cuda.is_available() else 'cpu')) output = rnet(img_boxes) probs = output[0].data.cpu().numpy() # shape [n_boxes, 1] offsets = output[1].data.cpu().numpy() # shape [n_boxes, 4] keep = np.where(probs[:, 0] > thresholds[1])[0] bounding_boxes = bounding_boxes[keep] bounding_boxes[:, 4] = probs[keep, 0].reshape((-1, )) offsets = offsets[keep] keep = nms(bounding_boxes, nms_thresholds[1]) bounding_boxes = bounding_boxes[keep] bounding_boxes = calibrate_box(bounding_boxes, offsets[keep]) bounding_boxes = convert_to_square(bounding_boxes) bounding_boxes[:, 0:4] = np.round(bounding_boxes[:, 0:4]) if show_boxes: show_bboxes(img, bounding_boxes, []).show() return bounding_boxes
def pnet_boxes(img, pnet, min_face_size=MIN_FACE_SIZE, thresholds=THRESHOLDS, nms_thresholds=NMS_THRESHOLDS, show_boxes=True): pnet.eval() width, height = img.size min_length = min(height, width) # print('img min_length is {}'.format(min_length)) min_detection_size = 12 factor = 0.707 # sqrt(0.5) scales = [] # min_face_size 哪来的? m = min_detection_size / min_face_size # 缩放原图使得最小脸尺寸为12pix min_length *= m # 将图片从最小脸为12pix到整张图为12pix,保存对应的缩放比例,都为小于1的数? factor_count = 0 while min_length > min_detection_size: scales.append(m * factor ** factor_count) min_length *= factor factor_count += 1 # STAGE 1 bounding_boxes = [] for s in scales: # run P-Net on different scales boxes = run_first_stage(img, pnet, scale=s, threshold=thresholds[0]) bounding_boxes.append(boxes) # bounding_boxes shape:[scales,boxes_num_each_sale,5] # 把每个scale找到的框框全部打开堆在一起 # [total_boxes_num, 5] 是list bounding_boxes = [i for i in bounding_boxes if i is not None] # print(bounding_boxes) # bounding_boxes = np.array(bounding_boxes) # print(bounding_boxes.shape, img.size) try: _ = bounding_boxes[0] # print('bounding_boxes:{}'.format(len(bounding_boxes))) # print('bounding_boxes[0]:{}'.format(len(bounding_boxes[0]))) except Exception: print(bounding_boxes) img.show() if len(bounding_boxes) == 0: return None bounding_boxes = np.vstack(bounding_boxes) # print(bounding_boxes.shape) keep = nms(bounding_boxes[:, 0:5], nms_thresholds[0]) bounding_boxes = bounding_boxes[keep] # print('bounding_boxes:{}'.format(bounding_boxes[:, 4] > 0.5)) # 根据 w、h 对 x1,y1,x2,y2 的位置进行微调 bounding_boxes = calibrate_box(bounding_boxes[:, 0:5], bounding_boxes[:, 5:]) # 将检测出的框转化成矩形 bounding_boxes = convert_to_square(bounding_boxes) bounding_boxes[:, 0:4] = np.round(bounding_boxes[:, 0:4]) # print('bounding_boxes:{}'.format(bounding_boxes[:, 4] > 0.5)) # print('bounding_boxes:', len(bounding_boxes), bounding_boxes) if show_boxes: show_bboxes(img, bounding_boxes, []).show() return bounding_boxes
def __rnet_detect(self, image, pnet_boxes): pnet_boxes_ = util.convert_to_square(pnet_boxes) # 转正方形 img_datas = [] for box in pnet_boxes_: x1_ = int(box[0]) y1_ = int(box[1]) x2_ = int(box[2]) y2_ = int(box[3]) img = image.crop((x1_, y1_, x2_, y2_)) img = img.resize((24, 24), Image.ANTIALIAS) img_data = self.transform(img) img_datas.append(img_data) img_datas = torch.stack(img_datas) cond, offset = self.r_net( img_datas) # cond的形式为(batch,1), offset的形式为(batch,4) cond = cond.detach().numpy() offset = offset.detach().numpy() indexs, _ = np.where(cond > 0.7) boxes = [] for index in indexs: box = pnet_boxes_[index] _x1 = int(box[0]) _y1 = int(box[1]) _x2 = int(box[2]) _y2 = int(box[3]) ow = _x2 - _x1 oh = _y2 - _y1 x1 = offset[index][0] * ow + _x1 y1 = offset[index][1] * oh + _y1 x2 = offset[index][2] * ow + _x2 y2 = offset[index][3] * oh + _y2 boxes.append([x1, y1, x2, y2, cond[index][0]]) return util.NMS(np.array(boxes), thresh=0.5)
def __onet_detect(self, image, rnet_boxes): _img_dataset = [] _rnet_boxes = util.convert_to_square(rnet_boxes) for _box in _rnet_boxes: _x1 = int(_box[0]) _y1 = int(_box[1]) _x2 = int(_box[2]) _y2 = int(_box[3]) img = image.crop((_x1, _y1, _x2, _y2)) img = img.resize((48, 48), Image.ANTIALIAS) img_data = self.transform(img) _img_dataset.append(img_data) img_dataset = torch.stack(_img_dataset) _cls, _offset = self.o_net(img_dataset) cls = _cls.detach().numpy() offset = _offset.detach().numpy() boxes = [] idxs, _ = np.where(cls > 0.97) for idx in idxs: _box = _rnet_boxes[idx] _x1 = int(_box[0]) _y1 = int(_box[1]) _x2 = int(_box[2]) _y2 = int(_box[3]) ow = _x2 - _x1 oh = _y2 - _y1 x1 = _x1 + ow * offset[idx][0] y1 = _y1 + oh * offset[idx][1] x2 = _x2 + ow * offset[idx][2] y2 = _y2 + oh * offset[idx][3] boxes.append([x1, y1, x2, y2, cls[idx][0]]) # O网络最后使用(交集/最小值)的方法做IOU运算 return util.NMS(np.array(boxes), isMin=True, thresh=0.7)
def landmark_dataset(landmark_faces, output_path, save_dir_name, crop_size): """ :param landmark_faces: list_shape[absolute_img_path,[x1,x2,y1,y2],(x,y)of[left_eye,right_eye,nose,mouse_left, mouse_right]] :param output_path: path to save dataset dir :param save_dir_name: :param crop_size: resize the face to crop size :return: save the landmark_dataset at output_path/save_dir_name/landmark, .txt at output_path/save_dir_name/save_dir_name/txt """ ''' for img_face in img_faces: absolute_img_path = img_face[0] [x1, y1, w, h] = img_face[1] [left_eye_x, left_eye_y, right_eye_x, right_eye_y, nose_x, nose_y, mouse_left_x, mouse_left_y, mouse_right_x, mouse_right_y] = img_face[2] print() ''' # boxes: x1,y1,w,h # print('img_faces:', landmark_faces) # print('img_faces[:][1]:', landmark_faces[:][1]) boxes = np.array( [landmark_faces[i][1] for i in range(len(landmark_faces))]) # boxes_two_point: x1,y1,x2,y2 [sample_num, 4] # CNN_face_point [x1,x2,y1,y2], 左上角为(0, 0) boxes_two_point = np.array( [boxes[:, 0], boxes[:, 2], boxes[:, 1], boxes[:, 3]]).T # print('boxes_two_point shape:', boxes_two_point.shape) # print('boxes_two_point :', boxes_two_point) square_boxes = convert_to_square(boxes_two_point) # print('square_boxes shape', square_boxes.shape) # landmark :[sample_num, 10] landmark = np.array( [landmark_faces[i][2] for i in range(len(landmark_faces))]) # print('landmark shape:', landmark.shape) # square_boxes_length : [sample_num, 1] square_boxes_length = square_boxes[:, 2] - square_boxes[:, 0] + 1 # print('square_boxes_length shape:', square_boxes_length.shape) # offset : [sample_num, 4] offset = np.array([ (boxes_two_point[:, 0] - square_boxes[:, 0]) / square_boxes_length, (boxes_two_point[:, 1] - square_boxes[:, 1]) / square_boxes_length, (boxes_two_point[:, 2] - square_boxes[:, 2]) / square_boxes_length, (boxes_two_point[:, 3] - square_boxes[:, 3]) / square_boxes_length, ]).T # print('offset shape', offset.shape) # print('landmark:', landmark) # print('square_boxes:', square_boxes) # print('square_boxes_length:', square_boxes_length) landmark = np.array([ (landmark[:, i] - square_boxes[:, i % 2]) / square_boxes_length for i in range(landmark.shape[1]) ]).T # print('landmark', landmark) # print('landmark.shape', landmark.shape) landmark_faces_path = [ landmark_faces[i][0] for i in range(len(landmark_faces)) ] dataset_txt_save_path = osp.join(output_path, save_dir_name, save_dir_name + '.txt') dataset_save_path = osp.join(output_path, save_dir_name, 'landmark/') f = open(dataset_txt_save_path, 'a') if not osp.exists(dataset_save_path): os.mkdir(dataset_save_path) for img_path, sqbx, ofst, ldmk in zip(landmark_faces_path, square_boxes, offset, landmark): file_name = osp.split(img_path)[1] img = Image.open(img_path) img = img.convert('RGB') # h x w x c img_np_crop = np.array(img)[sqbx[1]:sqbx[3], sqbx[0]:sqbx[2], :] # test: # Image.fromarray(img_np_crop).show() img_resized = Image.fromarray(img_np_crop).resize( (crop_size, crop_size)) img_resized.save(osp.join(dataset_save_path, file_name)) # TODO:TypeError: unsupported operand type(s) for +: 'int' and 'str' ofst_str = '' for s in [str(i) + ' ' for i in ofst]: ofst_str += s ldmk_str = '' for s in [str(i) + ' ' for i in ldmk]: ldmk_str += s f.write('landmark/' + file_name + ' l {} {}'.format(ofst_str, ldmk_str) + '\n') f.close()