class DataWriter: def __init__(self, cfg, opt, save_video=False, video_save_opt=DEFAULT_VIDEO_SAVE_OPT, queueSize=1024): self.cfg = cfg self.opt = opt self.video_save_opt = video_save_opt self.eval_joints = EVAL_JOINTS self.save_video = save_video self.heatmap_to_coord = get_func_heatmap_to_coord(cfg) # initialize the queue used to store frames read from # the video file if opt.sp: self.result_queue = Queue(maxsize=queueSize) else: self.result_queue = mp.Queue(maxsize=queueSize) if opt.save_img: if not os.path.exists(opt.outputpath + '/vis'): os.mkdir(opt.outputpath + '/vis') if opt.pose_flow: from trackers.PoseFlow.poseflow_infer import PoseFlowWrapper self.pose_flow_wrapper = PoseFlowWrapper( save_path=os.path.join(opt.outputpath, 'poseflow')) def start_worker(self, target): if self.opt.sp: p = Thread(target=target, args=()) else: p = mp.Process(target=target, args=()) # p.daemon = True p.start() return p def start(self): # start a thread to read pose estimation results per frame self.result_worker = self.start_worker(self.update) return self def update(self): final_result = [] norm_type = self.cfg.LOSS.get('NORM_TYPE', None) hm_size = self.cfg.DATA_PRESET.HEATMAP_SIZE if self.save_video: # initialize the file video stream, adapt ouput video resolution to original video stream = cv2.VideoWriter(*[ self.video_save_opt[k] for k in ['savepath', 'fourcc', 'fps', 'frameSize'] ]) if not stream.isOpened(): print("Try to use other video encoders...") ext = self.video_save_opt['savepath'].split('.')[-1] fourcc, _ext = self.recognize_video_ext(ext) self.video_save_opt['fourcc'] = fourcc self.video_save_opt[ 'savepath'] = self.video_save_opt['savepath'][:-4] + _ext stream = cv2.VideoWriter(*[ self.video_save_opt[k] for k in ['savepath', 'fourcc', 'fps', 'frameSize'] ]) assert stream.isOpened(), 'Cannot open video for writing' # keep looping infinitelyd while True: # ensure the queue is not empty and get item (boxes, scores, ids, hm_data, cropped_boxes, orig_img, im_name) = self.wait_and_get(self.result_queue) if orig_img is None: # if the thread indicator variable is set (img is None), stop the thread if self.save_video: stream.release() write_json(final_result, self.opt.outputpath, form=self.opt.format, for_eval=self.opt.eval) print("Results have been written to json.") return # image channel RGB->BGR orig_img = np.array(orig_img, dtype=np.uint8)[:, :, ::-1] if boxes is None or len(boxes) == 0: if self.opt.save_img or self.save_video or self.opt.vis: self.write_image( orig_img, im_name, stream=stream if self.save_video else None) else: # location prediction (n, kp, 2) | score prediction (n, kp, 1) assert hm_data.dim() == 4 # pred = hm_data.cpu().data.numpy() if hm_data.size()[1] == 136: self.eval_joints = [*range(0, 136)] elif hm_data.size()[1] == 26: self.eval_joints = [*range(0, 26)] pose_coords = [] pose_scores = [] for i in range(hm_data.shape[0]): bbox = cropped_boxes[i].tolist() pose_coord, pose_score = self.heatmap_to_coord( hm_data[i][self.eval_joints], bbox, hm_shape=hm_size, norm_type=norm_type) pose_coords.append( torch.from_numpy(pose_coord).unsqueeze(0)) pose_scores.append( torch.from_numpy(pose_score).unsqueeze(0)) preds_img = torch.cat(pose_coords) preds_scores = torch.cat(pose_scores) if not self.opt.pose_track: boxes, scores, ids, preds_img, preds_scores, pick_ids = \ pose_nms(boxes, scores, ids, preds_img, preds_scores, self.opt.min_box_area) _result = [] for k in range(len(scores)): _result.append({ 'keypoints': preds_img[k], 'kp_score': preds_scores[k], 'proposal_score': torch.mean(preds_scores[k]) + scores[k] + 1.25 * max(preds_scores[k]), 'idx': ids[k], 'box': [ boxes[k][0], boxes[k][1], boxes[k][2] - boxes[k][0], boxes[k][3] - boxes[k][1] ] }) result = {'imgname': im_name, 'result': _result} if self.opt.pose_flow: poseflow_result = self.pose_flow_wrapper.step( orig_img, result) for i in range(len(poseflow_result)): result['result'][i]['idx'] = poseflow_result[i]['idx'] final_result.append(result) if self.opt.save_img or self.save_video or self.opt.vis: if hm_data.size()[1] == 49: from alphapose.utils.vis import vis_frame_dense as vis_frame elif self.opt.vis_fast: from alphapose.utils.vis import vis_frame_fast as vis_frame else: from alphapose.utils.vis import vis_frame img = vis_frame(orig_img, result, self.opt) self.write_image( img, im_name, stream=stream if self.save_video else None) def write_image(self, img, im_name, stream=None): if self.opt.vis: cv2.imshow("AlphaPose Demo", img) cv2.waitKey(30) if self.opt.save_img: cv2.imwrite(os.path.join(self.opt.outputpath, 'vis', im_name), img) if self.save_video: stream.write(img) def wait_and_put(self, queue, item): queue.put(item) def wait_and_get(self, queue): return queue.get() def save(self, boxes, scores, ids, hm_data, cropped_boxes, orig_img, im_name): # save next frame in the queue self.wait_and_put( self.result_queue, (boxes, scores, ids, hm_data, cropped_boxes, orig_img, im_name)) def running(self): # indicate that the thread is still running return not self.result_queue.empty() def count(self): # indicate the remaining images return self.result_queue.qsize() def stop(self): # indicate that the thread should be stopped self.save(None, None, None, None, None, None, None) self.result_worker.join() def terminate(self): # directly terminate self.result_worker.terminate() def clear_queues(self): self.clear(self.result_queue) def clear(self, queue): while not queue.empty(): queue.get() def results(self): # return final result print(self.final_result) return self.final_result def recognize_video_ext(self, ext=''): if ext == 'mp4': return cv2.VideoWriter_fourcc(*'mp4v'), '.' + ext elif ext == 'avi': return cv2.VideoWriter_fourcc(*'XVID'), '.' + ext elif ext == 'mov': return cv2.VideoWriter_fourcc(*'XVID'), '.' + ext else: print("Unknow video format {}, will use .mp4 instead of it".format( ext)) return cv2.VideoWriter_fourcc(*'mp4v'), '.mp4'
class DataWriter(): def __init__(self, cfg, opt, save_video=False, video_save_opt=DEFAULT_VIDEO_SAVE_OPT, queueSize=1024): self.cfg = cfg self.opt = opt self.video_save_opt = video_save_opt self.eval_joints = EVAL_JOINTS self.save_video = save_video self.heatmap_to_coord = get_func_heatmap_to_coord(cfg) # initialize the queue used to store frames read from # the video file if opt.sp: self.result_queue = Queue(maxsize=queueSize) else: self.result_queue = mp.Queue(maxsize=queueSize) # if opt.save_img: # if not os.path.exists(opt.outputpath + '/vis'): # os.mkdir(opt.outputpath + '/vis') if opt.pose_flow: from trackers.PoseFlow.poseflow_infer import PoseFlowWrapper self.pose_flow_wrapper = PoseFlowWrapper( save_path=os.path.join(opt.outputpath, 'poseflow')) def start_worker(self, target): if self.opt.sp: p = Thread(target=target, args=()) else: p = mp.Process(target=target, args=()) # p.daemon = True p.start() return p def start(self): # start a thread to read pose estimation results per frame self.result_worker = self.start_worker(self.update) return self def update(self): final_result = [] norm_type = self.cfg.LOSS.get('NORM_TYPE', None) hm_size = self.cfg.DATA_PRESET.HEATMAP_SIZE if self.save_video: # initialize the file video stream, adapt ouput video resolution to original video stream = cv2.VideoWriter(*[ self.video_save_opt[k] for k in ['savepath', 'fourcc', 'fps', 'frameSize'] ]) if not stream.isOpened(): print("Try to use other video encoders...") ext = self.video_save_opt['savepath'].split('.')[-1] fourcc, _ext = self.recognize_video_ext(ext) self.video_save_opt['fourcc'] = fourcc self.video_save_opt[ 'savepath'] = self.video_save_opt['savepath'][:-4] + _ext stream = cv2.VideoWriter(*[ self.video_save_opt[k] for k in ['savepath', 'fourcc', 'fps', 'frameSize'] ]) assert stream.isOpened(), 'Cannot open video for writing' # keep looping infinitelyd while True: # ensure the queue is not empty and get item (boxes, scores, ids, hm_data, cropped_boxes, orig_img, im_name) = self.wait_and_get(self.result_queue) if orig_img is None: # if the thread indicator variable is set (img is None), stop the thread if self.save_video: stream.release() if self.opt.save_json: pathjson = os.path.join(self.opt.outputpath, 'pose_estimation') os.makedirs(pathjson, exist_ok=True) write_json(final_result, pathjson, form=self.opt.format, for_eval=self.opt.eval) print("Results have been written to json.") else: print( 'json file not save. If needeed change --json_file to True' ) return # image channel RGB->BGR if self.opt.save_RGB: orig_img = np.array( orig_img, dtype=np.uint8 )[:, :, :: -1] # CAS OU ON DESSINE SUR LE PHOTO D'ORIGINE (DEFAULT) # print('CAS OU ON DESSINE LES LABELS SUR L''IMAGE D''ORIGINE') if self.opt.save_black: black_img = np.array( orig_img, dtype=np.uint8 )[:, :, ::-1] # CAS OU ON DESSINE LES LABELS SUR FOND NOIR black_img[:, :, :] = [0, 0, 0] # print('CAS OU ON DESSINE LES LABELS SUR UNE IMAGE FOND NOIR') if self.opt.save_other: # CAS OU ON DESSINE LES LABELS SUR UNE AUTRE IMAGE Pathlab = self.opt.path_other other_img = cv2.imread(Pathlab + im_name, cv2.IMREAD_UNCHANGED) try: other_img[0] except: print( '\033[91m', 'PATH TO OTHER DATA NOT DEFINED OR THE NAME IS DIFFERENT', '\033[0m') sys.exit(1) # print('CAS OU ON DESSINE LES LABELS SUR UNE IMAGE CHOISIE') if boxes is None or len(boxes) == 0: if self.opt.save_img or self.save_video or self.opt.vis: if self.opt.save_prev_images: self.write_image( orig_img, im_name, stream=stream if self.save_video else None) else: assert hm_data.dim() == 4 if hm_data.size()[1] == 136: self.eval_joints = [*range(0, 136)] elif hm_data.size()[1] == 26: self.eval_joints = [*range(0, 26)] pose_coords = [] pose_scores = [] for i in range(hm_data.shape[0]): bbox = cropped_boxes[i].tolist() pose_coord, pose_score = self.heatmap_to_coord( hm_data[i][self.eval_joints], bbox, hm_shape=hm_size, norm_type=norm_type) pose_coords.append( torch.from_numpy(pose_coord).unsqueeze(0)) pose_scores.append( torch.from_numpy(pose_score).unsqueeze(0)) preds_img = torch.cat(pose_coords) preds_scores = torch.cat(pose_scores) if not self.opt.pose_track: boxes, scores, ids, preds_img, preds_scores, pick_ids = \ pose_nms(boxes, scores, ids, preds_img, preds_scores, self.opt.min_box_area) _result = [] for k in range(len(scores)): _result.append({ 'keypoints': preds_img[ k], # if add *2 increase the position for an images two time bigger 'kp_score': preds_scores[k], # 'proposal_score': torch.mean(preds_scores[k]) + scores[k] + 1.25 * max(preds_scores[k]), 'idx': ids[k], 'box': [ boxes[k][0], boxes[k][1], boxes[k][2] - boxes[k][0], boxes[k][3] - boxes[k][1] ] }) result = {'imgname': im_name, 'result': _result} if self.opt.pose_flow: poseflow_result = self.pose_flow_wrapper.step( orig_img, result) for i in range(len(poseflow_result)): result['result'][i]['idx'] = poseflow_result[i]['idx'] final_result.append(result) if self.opt.save_img or self.save_video or self.opt.vis: if hm_data.size()[1] == 49: from alphapose.utils.vis_pose_estimation import vis_frame_dense as vis_frame elif self.opt.vis_fast: from alphapose.utils.vis_pose_estimation import vis_frame_fast as vis_frame else: from alphapose.utils.vis_pose_estimation import vis_frame, vis_frame_bbox # Never write original image with output with this programm if self.opt.save_RGB: img = vis_frame(orig_img, result, self.opt) cas = 1 self.write_image( img, im_name, 1, stream=stream if self.save_video else None) if self.opt.save_black: img2 = vis_frame(black_img, result, self.opt) self.write_image( img2, im_name, 2, stream=stream if self.save_video else None) if self.opt.save_other: img3 = vis_frame(other_img, result, self.opt) self.write_image( img3, im_name, 3, stream=stream if self.save_video else None) else: print( 'predictions drawn on the input image are not saved. If needeed change --save_prev_images to True' ) # SAVE ALL DETECTED BBOX AS NEW IMAGE # vis_frame_bbox(orig_img, result, self.opt, im_name) def write_image(self, img, im_name, cas, stream=None): if self.opt.vis: cv2.imshow("AlphaPose Demo", img) cv2.waitKey(30) if self.opt.save_img: if cas == 1: path = os.path.join(self.opt.outputpath, 'pose_estimation', 'vis_pose_est_RGB') os.makedirs(path, exist_ok=True) cv2.imwrite( os.path.join(self.opt.outputpath, 'pose_estimation', 'vis_pose_est_RGB', im_name), img) if cas == 2: path = os.path.join(self.opt.outputpath, 'pose_estimation', 'vis_pose_est_black') os.makedirs(path, exist_ok=True) cv2.imwrite( os.path.join(self.opt.outputpath, 'pose_estimation', 'vis_pose_est_black', im_name), img) if cas == 3: path = os.path.join(self.opt.outputpath, 'pose_estimation', 'vis_pose_est_other') os.makedirs(path, exist_ok=True) cv2.imwrite( os.path.join(self.opt.outputpath, 'pose_estimation', 'vis_pose_est_other', im_name), img) if self.save_video: stream.write(img) def wait_and_put(self, queue, item): queue.put(item) def wait_and_get(self, queue): return queue.get() def save(self, boxes, scores, ids, hm_data, cropped_boxes, orig_img, im_name): # save next frame in the queue self.wait_and_put( self.result_queue, (boxes, scores, ids, hm_data, cropped_boxes, orig_img, im_name)) def running(self): # indicate that the thread is still running return not self.result_queue.empty() def count(self): # indicate the remaining images return self.result_queue.qsize() def stop(self): # indicate that the thread should be stopped self.save(None, None, None, None, None, None, None) self.result_worker.join() def terminate(self): # directly terminate self.result_worker.terminate() def clear_queues(self): self.clear(self.result_queue) def clear(self, queue): while not queue.empty(): queue.get() def results(self): # return final result print(self.final_result) return self.final_result def recognize_video_ext(self, ext=''): if ext == 'mp4': return cv2.VideoWriter_fourcc(*'mp4v'), '.' + ext elif ext == 'avi': return cv2.VideoWriter_fourcc(*'XVID'), '.' + ext elif ext == 'mov': return cv2.VideoWriter_fourcc(*'XVID'), '.' + ext else: print("Unknow video format {}, will use .mp4 instead of it".format( ext)) return cv2.VideoWriter_fourcc(*'mp4v'), '.mp4'
class DataDealer: def __init__(self, cfg, opt, save_video=False, video_save_opt=DEFAULT_VIDEO_SAVE_OPT, queueSize=1024): self.cfg = cfg self.opt = opt self.video_save_opt = video_save_opt self.head_pose = opt.head_pose # 是否开启头部姿态相关内容 self.eval_joints = EVAL_JOINTS self.save_video = save_video self.heatmap_to_coord = get_func_heatmap_to_coord(cfg) # initialize the queue used to store frames read from # the video file if opt.sp: self.result_queue = Queue(maxsize=queueSize) else: self.result_queue = mp.Queue(maxsize=queueSize) if opt.save_img: if not os.path.exists(opt.outputpath + '/vis'): os.mkdir(opt.outputpath + '/vis') if opt.pose_flow: from trackers.PoseFlow.poseflow_infer import PoseFlowWrapper self.pose_flow_wrapper = PoseFlowWrapper( save_path=os.path.join(opt.outputpath, 'poseflow')) if opt.tracking: # 实例状态 self.reid_states = {} self.reid_global_states = { 'frame': 0, "interval": 1, "time": time.time() } def start_worker(self, target): if self.opt.sp: p = Thread(target=target, args=()) else: p = mp.Process(target=target, args=()) # p.daemon = True p.start() return p def start(self): # start a thread to read pose estimation results per frame self.result_worker = self.start_worker(self.update) return self def update(self): final_result = [] norm_type = self.cfg.LOSS.get('NORM_TYPE', None) hm_size = self.cfg.DATA_PRESET.HEATMAP_SIZE if self.save_video: # initialize the file video stream, adapt ouput video resolution to original video stream = cv2.VideoWriter(*[ self.video_save_opt[k] for k in ['savepath', 'fourcc', 'fps', 'frameSize'] ]) if not stream.isOpened(): print("Try to use other video encoders...") ext = self.video_save_opt['savepath'].split('.')[-1] fourcc, _ext = self.recognize_video_ext(ext) self.video_save_opt['fourcc'] = fourcc self.video_save_opt[ 'savepath'] = self.video_save_opt['savepath'][:-4] + _ext stream = cv2.VideoWriter(*[ self.video_save_opt[k] for k in ['savepath', 'fourcc', 'fps', 'frameSize'] ]) assert stream.isOpened(), 'Cannot open video for writing' # ======头部姿态估计准备========= if self.head_pose: # 进行头部姿态估计 pose_estimator = PoseEstimator(img_size=self.opt.img_size) # Introduce scalar stabilizers for pose. pose_stabilizers = [ Stabilizer(state_num=2, measure_num=1, cov_process=0.1, cov_measure=0.1) for _ in range(6) ] masks_list = [] # 头部关键点列表 emoji_available_list = [] # 需要进行表情识别的目标的索引 face_naked_rate = [] # 所有人的脸部露出率 # keep looping infinitelyd while True: if self.opt.tracking: # 处理重识别状态 reid_states = self.reid_states reid_global_states = self.reid_global_states reid_global_states["frame"] = (reid_global_states["frame"] + 1) % 9999 current_time = time.time() reid_global_states[ "interval"] = current_time - reid_global_states['time'] reid_global_states['time'] = current_time # ensure the queue is not empty and get item (boxes, scores, ids, hm_data, cropped_boxes, orig_img, im_name) = self.wait_and_get(self.result_queue) if orig_img is None: # if the thread indicator variable is set (img is None), stop the thread if self.save_video: stream.release() write_json(final_result, self.opt.outputpath, form=self.opt.format, for_eval=self.opt.eval) print("Results have been written to json.") return # ==========================进一步处理================================= # image channel RGB->BGR orig_img = np.array(orig_img, dtype=np.uint8)[:, :, ::-1] if boxes is None or len(boxes) == 0: if self.opt.save_img or self.save_video or self.opt.vis: self.write_image( orig_img, im_name, stream=stream if self.save_video else None) else: # location prediction (n, kp, 2) | score prediction (n, kp, 1) assert hm_data.dim() == 4 # pred = hm_data.cpu().data.numpy() if hm_data.size()[1] == 136: self.eval_joints = [*range(0, 136)] elif hm_data.size()[1] == 26: self.eval_joints = [*range(0, 26)] pose_coords = [] pose_scores = [] for i in range(hm_data.shape[0]): bbox = cropped_boxes[i].tolist() pose_coord, pose_score = self.heatmap_to_coord( hm_data[i][self.eval_joints], bbox, hm_shape=hm_size, norm_type=norm_type) pose_coords.append( torch.from_numpy(pose_coord).unsqueeze(0)) pose_scores.append( torch.from_numpy(pose_score).unsqueeze(0)) preds_img = torch.cat(pose_coords) preds_scores = torch.cat(pose_scores) if not self.opt.pose_track: boxes, scores, ids, preds_img, preds_scores, pick_ids = \ pose_nms(boxes, scores, ids, preds_img, preds_scores, self.opt.min_box_area) if len(preds_img) != 0: preds_img = torch.stack(preds_img) # print(boxes[0], cropped_boxes[0],hm_data[0].shape) # =========================目标检测对象处理=========================== if len(preds_img) != 0: if self.head_pose: masks_list.clear() emoji_available_list.clear() for i in range(preds_img.shape[0]): if self.opt.tracking: self_state = self.get_reid_state( ids[i], reid_states, reid_global_states) self_state['index'] = i # ===头部姿态估计相关====== if self.head_pose: # 取出脸部关键点 face_keypoints = preds_img[i, 26:94] face_keypoints_scores = preds_scores[i, 26:94] # 获取标准化后的人脸关键点坐标 # scale_face_keypoints, _ = self.get_scaled_face_keypoints(face_keypoints) # =====脸部露出判定====== face_naked = torch.sum(face_keypoints_scores[27:48] > 0.01) / 21 # 这部分暂时不包括嘴部数据 mouth_naked = torch.sum( face_keypoints_scores[48:68] > 0.1 ) / 20 # 这部分是嘴部的裸露程度 if face_naked > 0.5 or mouth_naked > 0.5: # 判断是否能够识别表情 emoji_available_list.append(i) # ====指标====脸部遮挡检测======= if self_state is not None: self.face_hide(self_state, reid_global_states, face_naked) # ====进行头部姿态估计===== self.estimate_head_pose(pose_estimator, face_keypoints, masks_list) # ==口型识别== 打哈欠和说话 if mouth_naked > 0.5 and False: scaled_mouth_keypoints, _ = self.get_scaled_mouth_keypoints( face_keypoints) mouth_distance = self.mouth_open_degree( scaled_mouth_keypoints) if mouth_distance[1] > 0.3: open_mouth = "open mouth!!!!" elif mouth_distance[1] > 0.2: open_mouth = "open" else: open_mouth = "" print(mouth_distance, open_mouth) # =====开始表情识别===== # =========================目标检测对象处理完成========================= # =========================整理数据======================== _result = [] for k in range(len(scores)): _result.append({ 'keypoints': preds_img[k], 'kp_score': preds_scores[k], 'proposal_score': torch.mean(preds_scores[k]) + scores[k] + 1.25 * max(preds_scores[k]), 'idx': ids[k], 'box': [ boxes[k][0], boxes[k][1], boxes[k][2] - boxes[k][0], boxes[k][3] - boxes[k][1] ] }) result = {'imgname': im_name, 'result': _result} if self.opt.pose_flow: poseflow_result = self.pose_flow_wrapper.step( orig_img, result) for i in range(len(poseflow_result)): result['result'][i]['idx'] = poseflow_result[i]['idx'] final_result.append(result) # ==========================绘图================================= if self.opt.save_img or self.save_video or self.opt.vis: if hm_data.size()[1] == 49: from alphapose.utils.vis import vis_frame_dense as vis_frame, DEFAULT_FONT elif self.opt.vis_fast: from alphapose.utils.vis import vis_frame_fast as vis_frame, DEFAULT_FONT else: from alphapose.utils.vis import vis_frame, DEFAULT_FONT # 开始绘图============== img = vis_frame(orig_img, result, self.opt) if self.head_pose and len(masks_list) != 0: for p in masks_list: pose_estimator.draw_annotation_box(img, p[0], p[1], color=(128, 255, 128)) if self.opt.tracking: # 行人重识别状态 for _id in ids: _state = reid_states[_id] index = _state['index'] bbox = _result[index]['box'] bbox = [ bbox[0], bbox[0] + bbox[2], bbox[1], bbox[1] + bbox[3] ] cv2.putText( img, f'no focus: {round(_state["face_hide_rate"], 2)}', (int(bbox[0]), int((bbox[2] + 52))), DEFAULT_FONT, 1, (255, 0, 0), 2) # 结束绘图==============》显示图片 self.write_image( img, im_name, stream=stream if self.save_video else None) @staticmethod def get_reid_state(idx, reid_states, reid_global_states): # 获取重识别状态 if idx in reid_states: self_state = reid_states[idx] if (reid_global_states['time'] - self_state['time']) > reid_loss_interval: self_state = {"time": time.time()} reid_states[idx] = self_state else: self_state['time'] = time.time() else: self_state = {"time": time.time()} reid_states[idx] = self_state return self_state @staticmethod def get_scaled_face_keypoints(face_keypoints): """ 获取标准化后的人脸关键点坐标 :param face_keypoints: 脸部关键点 :return: 标准化后的人脸关键点坐标,人脸框的位置 """ face_outline_keypoints = face_keypoints[:27] face_x1 = torch.min(face_outline_keypoints[:, 0]) face_y1 = torch.min(face_outline_keypoints[:, 1]) face_x2 = torch.max(face_outline_keypoints[:, 0]) face_y2 = torch.max(face_outline_keypoints[:, 1]) # 获取标准化的脸部坐标 face_x1_y1 = torch.tensor([face_x1, face_y1]) face_width = torch.tensor([face_x2 - face_x1, face_y2 - face_y1]) scaled_face_keypoints = (face_keypoints - face_x1_y1) / face_width return scaled_face_keypoints, (face_x1, face_y1, face_x2, face_y2) @staticmethod def get_scaled_mouth_keypoints(face_keypoints): mouth_keypoints = face_keypoints[48:68] mouth_x1 = torch.min(mouth_keypoints[:, 0]) mouth_y1 = torch.min(mouth_keypoints[:, 1]) mouth_x2 = torch.max(mouth_keypoints[:, 0]) mouth_y2 = torch.max(mouth_keypoints[:, 1]) mouth_x1_y1 = torch.tensor([mouth_x1, mouth_y1]) mouth_width = torch.tensor([mouth_x2 - mouth_x1, mouth_y2 - mouth_y1]) scaled_mouth_keypoints = (mouth_keypoints - mouth_x1_y1) / mouth_width return scaled_mouth_keypoints, (mouth_x1, mouth_y1, mouth_x2, mouth_y2) @staticmethod def mouth_open_degree(scaled_mouth_keypoints): """ 计算张嘴程度 :param scaled_mouth_keypoints: 按嘴部框范围标准化后的关键点坐标 :return: """ up_mouth_keypoints = scaled_mouth_keypoints[13:16] down_mouth_keypoints = scaled_mouth_keypoints[17:20] # 计算嘴对应点之间的距离 mouth_distance = torch.linalg.norm(up_mouth_keypoints - down_mouth_keypoints, axis=1) return mouth_distance @staticmethod def estimate_head_pose(pose_estimator, face_68_keypoints, masks_list=None): """ :param pose_estimator: 姿态评估器 :param face_68_keypoints: 人脸68关键点 :param masks_list: 保存姿态评估结果的列表 :return: """ if masks_list is None: masks_list = [] marks = face_68_keypoints marks = np.float32(marks) pose = pose_estimator.solve_pose(marks) # Stabilize the pose. # 这段使用卡尔曼滤波平滑结果,目前没有必要 # steady_pose = [] # pose_np = np.array(pose).flatten() # for value, ps_stb in zip(pose_np, pose_stabilizers): # ps_stb.update([value]) # steady_pose.append(ps_stb.state[0]) # steady_pose = np.reshape(steady_pose, (-1, 3)) # masks_list.append(steady_pose) masks_list.append(pose) return masks_list @staticmethod def face_hide(self_state, reid_global_states, face_naked, face_hide_lambda=face_hide_lambda): """ :param self_state: 目标的状态对象 :param reid_global_states: 全局的状态对象 :param face_naked: 人脸裸露率 :param face_hide_lambda: 平滑人脸遮挡率 :return: 修改后的状态字典 """ face_hide_time = self_state.get("face_hide_time", 0) if face_naked < 0.5: face_hide_time += reid_global_states['interval'] else: face_hide_time = 0 # 动态遮挡率 face_hide_rate = self_state.get("face_hide_rate", 0) if face_hide_time > face_hide_refresh_interval: face_hide_rate = ( 1 - face_hide_lambda) + face_hide_lambda * face_hide_rate elif face_hide_time == 0: face_hide_rate = face_hide_lambda * face_hide_rate self_state["face_hide_time"] = face_hide_time self_state["face_hide_rate"] = face_hide_rate return self_state def write_image(self, img, im_name, stream=None): if self.opt.vis: cv2.imshow("AlphaPose Demo", img) cv2.waitKey(30) if self.opt.save_img: cv2.imwrite(os.path.join(self.opt.outputpath, 'vis', im_name), img) if self.save_video: stream.write(img) @staticmethod def wait_and_put(queue, item): queue.put(item) @staticmethod def wait_and_get(queue): return queue.get() def save(self, boxes, scores, ids, hm_data, cropped_boxes, orig_img, im_name): # save next frame in the queue self.wait_and_put( self.result_queue, (boxes, scores, ids, hm_data, cropped_boxes, orig_img, im_name)) def running(self): # indicate that the thread is still running return not self.result_queue.empty() def count(self): # indicate the remaining images return self.result_queue.qsize() def stop(self): # indicate that the thread should be stopped self.save(None, None, None, None, None, None, None) self.result_worker.join() def terminate(self): # directly terminate self.result_worker.terminate() def clear_queues(self): self.clear(self.result_queue) @staticmethod def clear(queue): while not queue.empty(): queue.get() def results(self): # return final result print(self.final_result) return self.final_result @staticmethod def recognize_video_ext(ext=''): if ext == 'mp4': return cv2.VideoWriter_fourcc(*'mp4v'), '.' + ext elif ext == 'avi': return cv2.VideoWriter_fourcc(*'XVID'), '.' + ext elif ext == 'mov': return cv2.VideoWriter_fourcc(*'XVID'), '.' + ext else: print("Unknow video format {}, will use .mp4 instead of it".format( ext)) return cv2.VideoWriter_fourcc(*'mp4v'), '.mp4'
class DataDealer: def __init__(self, cfg, opt, save_video=False, video_save_opt=DEFAULT_VIDEO_SAVE_OPT, queueSize=1024): self.cfg = cfg self.opt = opt self.video_save_opt = video_save_opt self.head_pose = opt.head_pose # 是否开启头部姿态相关内容 self.eval_joints = EVAL_JOINTS self.save_video = save_video self.heatmap_to_coord = get_func_heatmap_to_coord(cfg) # initialize the queue used to store frames read from # the video file if opt.sp: self.result_queue = Queue(maxsize=queueSize) else: self.result_queue = mp.Queue(maxsize=queueSize) if opt.save_img: if not os.path.exists(opt.outputpath + '/vis'): os.mkdir(opt.outputpath + '/vis') if opt.pose_flow: from trackers.PoseFlow.poseflow_infer import PoseFlowWrapper self.pose_flow_wrapper = PoseFlowWrapper( save_path=os.path.join(opt.outputpath, 'poseflow')) if opt.tracking: # 实例状态 self.reid_states = ReIDStates() # 是否使用场景蒙版 self.scene_mask = SceneMasker( opt.scene_mask) if opt.scene_mask else None if self.opt.analyse_cheating and self.opt.save_cheaters: try: os.mkdir(os.path.join(self.opt.outputpath, 'cheating')) except FileExistsError: pass def start_worker(self, target): if self.opt.sp: p = Thread(target=target, args=()) else: p = mp.Process(target=target, args=()) # p.daemon = True p.start() return p def start(self): # start a thread to read pose estimation results per frame self.result_worker = self.start_worker(self.update) return self def update(self): final_result = [] norm_type = self.cfg.LOSS.get('NORM_TYPE', None) hm_size = self.cfg.DATA_PRESET.HEATMAP_SIZE if self.save_video: # initialize the file video stream, adapt ouput video resolution to original video stream = cv2.VideoWriter(*[ self.video_save_opt[k] for k in ['savepath', 'fourcc', 'fps', 'frameSize'] ]) if not stream.isOpened(): print("Try to use other video encoders...") ext = self.video_save_opt['savepath'].split('.')[-1] fourcc, _ext = self.recognize_video_ext(ext) self.video_save_opt['fourcc'] = fourcc self.video_save_opt[ 'savepath'] = self.video_save_opt['savepath'][:-4] + _ext stream = cv2.VideoWriter(*[ self.video_save_opt[k] for k in ['savepath', 'fourcc', 'fps', 'frameSize'] ]) assert stream.isOpened(), 'Cannot open video for writing' # ======头部姿态估计准备========= if self.head_pose: # 进行头部姿态估计 self.pose_estimator = PoseEstimator(img_size=self.opt.img_size) # Introduce scalar stabilizers for pose. # pose_stabilizers = [Stabilizer( # state_num=2, # measure_num=1, # cov_process=0.1, # cov_measure=0.1) for _ in range(6)] face_naked_rate = [] # 所有人的脸部露出率 # keep looping infinitelyd while True: if self.opt.tracking: # 处理重识别状态 self.reid_states.next_frame() # ensure the queue is not empty and get item (boxes, scores, ids, hm_data, cropped_boxes, orig_img, im_name) = self.wait_and_get(self.result_queue) if orig_img is None: # if the thread indicator variable is set (img is None), stop the thread if self.save_video: stream.release() write_json(final_result, self.opt.outputpath, form=self.opt.format, for_eval=self.opt.eval) print("Results have been written to json.") return # ==========================进一步处理================================= # image channel RGB->BGR orig_img = np.array(orig_img, dtype=np.uint8)[:, :, ::-1] if boxes is None or len(boxes) == 0: if self.opt.save_img or self.save_video or self.opt.vis: self.write_image( orig_img, im_name, stream=stream if self.save_video else None) else: # location prediction (n, kp, 2) | score prediction (n, kp, 1) assert hm_data.dim() == 4 # pred = hm_data.cpu().data.numpy() if hm_data.size()[1] == 136: self.eval_joints = [*range(0, 136)] elif hm_data.size()[1] == 26: self.eval_joints = [*range(0, 26)] pose_coords = [] pose_scores = [] for i in range(hm_data.shape[0]): bbox = cropped_boxes[i].tolist() pose_coord, pose_score = self.heatmap_to_coord( hm_data[i][self.eval_joints], bbox, hm_shape=hm_size, norm_type=norm_type) pose_coords.append( torch.from_numpy(pose_coord).unsqueeze(0)) pose_scores.append( torch.from_numpy(pose_score).unsqueeze(0)) preds_img = torch.cat(pose_coords) preds_scores = torch.cat(pose_scores) if not self.opt.pose_track: boxes, scores, ids, preds_img, preds_scores, pick_ids = \ pose_nms(boxes, scores, ids, preds_img, preds_scores, self.opt.min_box_area) preds_img = torch.stack(preds_img) else: preds_scores = torch.tensor(preds_scores) # print(boxes[0], cropped_boxes[0],hm_data[0].shape) # =========================目标检测对象处理=========================== if len(preds_img) != 0: self.deal_objects(np.stack(boxes), scores, ids, hm_data, cropped_boxes, orig_img, im_name, preds_img, preds_scores) # =========================目标检测对象处理完成========================= # =========================整理数据======================== _result = [] for k in range(len(scores)): _result.append({ 'keypoints': preds_img[k], 'kp_score': preds_scores[k], 'proposal_score': torch.mean(preds_scores[k]) + scores[k] + 1.25 * max(preds_scores[k]), 'idx': ids[k], 'box': [ boxes[k][0], boxes[k][1], boxes[k][2] - boxes[k][0], boxes[k][3] - boxes[k][1] ] }) result = {'imgname': im_name, 'result': _result} if self.opt.pose_flow: poseflow_result = self.pose_flow_wrapper.step( orig_img, result) for i in range(len(poseflow_result)): result['result'][i]['idx'] = poseflow_result[i]['idx'] final_result.append(result) # ==========================绘图================================= if self.opt.save_img or self.save_video or self.opt.vis: if hm_data.size()[1] == 49: from alphapose.utils.vis import vis_frame_dense as vis_frame, DEFAULT_FONT elif self.opt.vis_fast: from alphapose.utils.vis import vis_frame_fast as vis_frame, DEFAULT_FONT else: from alphapose.utils.vis import vis_frame, DEFAULT_FONT # 开始绘图============== img = vis_frame(orig_img, result, self.opt) if self.head_pose and hasattr( self, 'pose_list') and len(self.pose_list) != 0: pose_list = self.pose_list self.draw_pose(self.pose_estimator, img, pose_list) if self.opt.tracking: # 行人重识别状态 for reid in ids: self.draw_objects(img, reid, _result, DEFAULT_FONT) if self.scene_mask and self.opt.show_scene_mask: img = self.scene_mask.mask_on_img(img) # 结束绘图==============》显示图片 self.write_image( img, im_name, stream=stream if self.save_video else None) if self.opt.analyse_cheating and self.opt.save_cheaters: self.save_cheaters(img, boxes, ids, self.cheating_indexs) def deal_objects(self, boxes, scores, ids, hm_data, cropped_boxes, orig_img, im_name, preds_img, preds_scores): """ 处理识别出来的目标 """ object_num = preds_img.shape[0] # 目标数量 # 场景位置识别 场景遮罩 # print(self.scene_mask.is_in_seat(boxes)) indexes = torch.arange(0, len(preds_img)) # 辅助索引 if self.opt.classroom_action: self.actions = action_classifier.action_classify(preds_img) self.actions_texts = [ action_classifier.action_type[i] for i in self.actions ] # self.actions_colors = [(255, 0, 0) if i <= 3 else (0, 0, 255) for i in self.actions] self.actions_colors = [(0, 0, 255) for i in self.actions] if self.head_pose: # 头部状态估计 # 取出脸部关键点 face_keypoints = preds_img[:, 26:94] face_keypoints_scores = preds_scores[:, 26:94] # =====脸部露出判定====== self.naked_faces = torch.sum( face_keypoints_scores[:, 27:48, 0] > 0.005, dim=1) / 21 # 这部分暂时不包括嘴部数据 self.naked_mouths = torch.sum( face_keypoints_scores[:, 48:68, 0] > 0.05, dim=1) / 20 # 这部分是嘴部的裸露程度 # 判断是否能够识别表情 self.emotion_available_list = (self.naked_faces > 0.5) & (self.naked_mouths > 0.5) # 头部姿态估计 也包括其他姿态估计 pose_estimator = self.pose_estimator self.pose_list = [{ 'head': self.estimate_head_pose(pose_estimator, face_keypoints[i]), 'body': None, 'neck': self.estimate_neck_pose(pose_estimator, preds_img[i, [17, 18, 5, 6]]), 'index': i } for i in range(object_num)] # self.angles_yaw0 = [turn_head_angle_yaw(pose_estimator.get_euler(*pose['head'])[0][0], pose['head'][1]) # for pose in self.pose_list] # self.angles_pitch0 = [turn_head_angle_yaw(pose_estimator.get_euler(*pose['head'])[1][0], pose['head'][1]) # for pose in self.pose_list] if self.opt.analyse_focus or self.opt.analyse_cheating: self.angles_pitch = [ pose['head'][0][0][0] * 57.3 for pose in self.pose_list ] # self.angles_yaw = [pose['head'][0][1][0] * 57.3 for pose in self.pose_list] self.angles_yaw = [ action_analysis.turn_head_angle(*pose['head']) * 57.3 for pose in self.pose_list ] if self.opt.analyse_focus: # 是否分析专注度 self.attention_results = attention_degrees( face_keypoints, self.angles_pitch) if self.opt.analyse_cheating: # self.actions = action_classifier.action_classify(preds_img) # self.actions_texts = [action_classifier.action_type[i] for i in self.actions] # self.actions_colors = [(255, 0, 0) if i <= 3 else (0, 0, 255) for i in self.actions] # 伸手识别处理 self.is_passing = action_analysis.is_passing(preds_img) self.passing_texts = [ "left" if p == 1 else "right" if p == -1 else 'no' for p in self.is_passing ] self.passing_colors = [(0, 0, 255) if abs(p) == 1 else (255, 0, 0) for p in self.is_passing] # 低头识别 self.peeps = torch.tensor( [pitch < peep_angle_gate for pitch in self.angles_pitch]) self.peep_texts = ['yes' if p else 'no' for p in self.peeps] self.peep_colors = [(0, 0, 255) if p else (255, 0, 0) for p in self.peeps] # 转头识别 if self.opt.tracking: # self.probes = [self.turn_head_rate(ids[i], i) > cheating_turn_head_gate for i in indexes] self.probes = torch.tensor( [a > cheating_turn_head_gate for a in self.angles_yaw]) self.probe_texts = [ 'yes' if p else 'no' for p in self.probes ] self.probe_colors = [(0, 0, 255) if p else (255, 0, 0) for p in self.probes] if self.opt.save_cheaters: self.cheating_indexs = indexes[self.probes | self.peeps | (self.is_passing != 0)] # 逐个处理 for i in range(object_num): if self.opt.tracking: self_state = self.reid_states[ids[i]] self_state['index'] = i if self.head_pose: # ====指标====脸部遮挡检测======= if self_state is not None and self.opt.analyse_focus: self.focus_rates(ids[i], self.attention_results[i][0], self.naked_faces[i]) # ==口型识别== 打哈欠和说话 if self.naked_mouths[i] > 0.5 and False: scaled_mouth_keypoints, _ = self.get_scaled_mouth_keypoints( face_keypoints) mouth_distance = self.mouth_open_degree( scaled_mouth_keypoints) if mouth_distance[1] > 0.3: open_mouth = "open mouth!!!!" elif mouth_distance[1] > 0.2: open_mouth = "open" else: open_mouth = "" print(mouth_distance, open_mouth) # =====伸手识别===== # =====开始表情识别===== def draw_objects(self, img, reid, result, font): self_state = self.reid_states[reid] i = self_state['index'] bbox = result[i]['box'] bbox = [bbox[0], bbox[0] + bbox[2], bbox[1], bbox[1] + bbox[3]] text_pos = 52 text_gap = 20 text_scale = 0.5 text_width = 1 if self.opt.analyse_focus: focus_rate = round(self_state["focus_rate"], 2) text = f'focus:{focus_rate}' cv2.putText(img, text, (int(bbox[0]), int((bbox[2] + text_pos))), font, text_scale, (0, 255, 0), text_width) text_pos += text_gap emotion_available = self.emotion_available_list[i] emotion = expression_names[self.attention_results[i] [1]] if emotion_available else 'no' expression_color = expression_colors[self.attention_results[i][1]] emotion = f'emotion:{emotion}' cv2.putText(img, emotion, (int(bbox[0]), int( (bbox[2] + text_pos))), font, text_scale, expression_color if emotion_available else (255, 0, 0), text_width) text_pos += text_gap # angle_pitch = f'angle_pitch:{round(self.angles_pitch[i], 2)}' # cv2.putText(img, angle_pitch, # (int(bbox[0]), int((bbox[2] + text_pos))), font, # text_scale, (0, 255, 0), text_width) # text_pos += text_gap if self.opt.classroom_action: action_text = self.actions_texts[i] action_color = self.actions_colors[i] cv2.putText(img, action_text, (int(bbox[0]), int( (bbox[2] + text_pos))), font, 0.6, action_color, 2) text_pos += 20 if self.opt.analyse_cheating: passing_text = f'passing: {self.passing_texts[i]}' passing_color = self.passing_colors[i] cv2.putText(img, passing_text, (int(bbox[0]), int((bbox[2] + text_pos))), font, text_scale, passing_color, text_width) text_pos += text_gap peep_text = f'peep: {self.peep_texts[i]}' peep_color = self.peep_colors[i] cv2.putText(img, peep_text, (int(bbox[0]), int( (bbox[2] + text_pos))), font, text_scale, peep_color, text_width) text_pos += text_gap if self.opt.tracking: probe_text = f'probe: {self.probe_texts[i]}' probe_color = self.probe_colors[i] cv2.putText(img, probe_text, (int(bbox[0]), int((bbox[2] + text_pos))), font, text_scale, probe_color, text_width) text_pos += text_gap # angle_pitch = f'angle_pitch:{round(self.angles_pitch[i], 2)}' # cv2.putText(img, angle_pitch, # (int(bbox[0]), int((bbox[2] + text_pos))), font, # text_scale, (0, 255, 0), text_width) # text_pos += text_gap # angle_yaw = f'angle_yaw:{round(self.angles_yaw0[i], 2)}' # cv2.putText(img, angle_yaw, # (int(bbox[0]), int((bbox[2] + text_pos))), font, # 1, (20, 20, 255), 2) # text_pos += text_gap # angle_yaw = f'angle_pitch:{round(self.angles_pitch0[i], 2)}' # cv2.putText(img, angle_yaw, # (int(bbox[0]), int((bbox[2] + text_pos))), font, # 1, (20, 20, 255), 2) # text_pos += text_gap def save_cheaters(self, img, boxes, ids, cheating_indexs): save_time_str = time.strftime('%Y%m%d%H%M%S') for i in cheating_indexs: box = boxes[i].astype(int) chipped_img = img[box[1]:box[3], box[0]:box[2]] cv2.imwrite( os.path.join(self.opt.outputpath, 'cheating', f'{save_time_str}_{ids[i]}.jpg'), chipped_img) @staticmethod def draw_pose(pose_estimator, img, pose_list): for pose in pose_list: # pose_estimator.draw_annotation_box( # img, p[0], p[1], color=(128, 255, 128)) head_pose = pose['head'] # 头部姿态 pose_estimator.draw_axis(img, head_pose[0], head_pose[1]) # neck_pose = pose['neck'] # 颈部姿态 # pose_estimator.draw_axis( # img, neck_pose[0], neck_pose[1]) # print(action_analysis.turn_head_angle(*head_pose)) # print(r1, r2, abs(r1 - r2)) # body_pose = pose['body'] # if body_pose is not None: # pose_estimator.draw_axis( # img, body_pose[0], body_pose[1]) # r1 = body_pose[0][1] # r2 = head_pose[0][1] # print(abs(r1 - r2), r1, r2) @staticmethod def get_scaled_face_keypoints(face_keypoints): """ 获取标准化后的人脸关键点坐标 :param face_keypoints: 脸部关键点 :return: 标准化后的人脸关键点坐标,人脸框的位置 """ face_outline_keypoints = face_keypoints[:27] face_x1 = torch.min(face_outline_keypoints[:, 0]) face_y1 = torch.min(face_outline_keypoints[:, 1]) face_x2 = torch.max(face_outline_keypoints[:, 0]) face_y2 = torch.max(face_outline_keypoints[:, 1]) # 获取标准化的脸部坐标 face_x1_y1 = torch.tensor([face_x1, face_y1]) face_width = torch.tensor([face_x2 - face_x1, face_y2 - face_y1]) scaled_face_keypoints = (face_keypoints - face_x1_y1) / face_width return scaled_face_keypoints, (face_x1, face_y1, face_x2, face_y2) @staticmethod def get_scaled_mouth_keypoints(face_keypoints): mouth_keypoints = face_keypoints[48:68] mouth_x1 = torch.min(mouth_keypoints[:, 0]) mouth_y1 = torch.min(mouth_keypoints[:, 1]) mouth_x2 = torch.max(mouth_keypoints[:, 0]) mouth_y2 = torch.max(mouth_keypoints[:, 1]) mouth_x1_y1 = torch.tensor([mouth_x1, mouth_y1]) mouth_width = torch.tensor([mouth_x2 - mouth_x1, mouth_y2 - mouth_y1]) scaled_mouth_keypoints = (mouth_keypoints - mouth_x1_y1) / mouth_width return scaled_mouth_keypoints, (mouth_x1, mouth_y1, mouth_x2, mouth_y2) @staticmethod def mouth_open_degree(scaled_mouth_keypoints): """ 计算张嘴程度 :param scaled_mouth_keypoints: 按嘴部框范围标准化后的关键点坐标 :return: """ up_mouth_keypoints = scaled_mouth_keypoints[13:16] down_mouth_keypoints = scaled_mouth_keypoints[17:20] # 计算嘴对应点之间的距离 mouth_distance = torch.linalg.norm(up_mouth_keypoints - down_mouth_keypoints, axis=1) return mouth_distance @staticmethod def estimate_head_pose(pose_estimator, face_68_keypoints): """ :param pose_estimator: 姿态评估器 :param face_68_keypoints: 人脸68关键点 :param masks_list: 保存姿态评估结果的列表 :return: """ marks = face_68_keypoints marks = np.float32(marks) pose = pose_estimator.solve_pose(marks) # Stabilize the pose. # 这段使用卡尔曼滤波平滑结果,目前没有必要 # steady_pose = [] # pose_np = np.array(pose).flatten() # for value, ps_stb in zip(pose_np, pose_stabilizers): # ps_stb.update([value]) # steady_pose.append(ps_stb.state[0]) # steady_pose = np.reshape(steady_pose, (-1, 3)) # masks_list.append(steady_pose) return pose @staticmethod def estimate_body_pose(pose_estimator, body_keypoints): """ :param body_keypoints: :param pose_estimator: 姿态评估器 :return: """ marks = body_keypoints marks = np.float32(marks) pose = pose_estimator.solve_body_pose(marks) # Stabilize the pose. # 这段使用卡尔曼滤波平滑结果,目前没有必要 # steady_pose = [] # pose_np = np.array(pose).flatten() # for value, ps_stb in zip(pose_np, pose_stabilizers): # ps_stb.update([value]) # steady_pose.append(ps_stb.state[0]) # steady_pose = np.reshape(steady_pose, (-1, 3)) # masks_list.append(steady_pose) return pose @staticmethod def estimate_neck_pose(pose_estimator, neck_keypoints): """ :param neck_keypoints: :param pose_estimator: 姿态评估器 :return: """ marks = neck_keypoints marks = np.float32(marks) pose = pose_estimator.solve_neck_pose(marks) # Stabilize the pose. # 这段使用卡尔曼滤波平滑结果,目前没有必要 # steady_pose = [] # pose_np = np.array(pose).flatten() # for value, ps_stb in zip(pose_np, pose_stabilizers): # ps_stb.update([value]) # steady_pose.append(ps_stb.state[0]) # steady_pose = np.reshape(steady_pose, (-1, 3)) # masks_list.append(steady_pose) return pose def focus_rates(self, reid, attention_score, face_naked, focus_lambda=default_focus_lambda): """ :param attention_score: 注意力分数 :param reid: 目标的重识别id :param face_naked: 人脸露出率 :param focus_lambda: 平滑专注度 :return: 修改后的状态字典 """ if face_naked < 0.5: face_hide_time = self.reid_states.timer_set(reid, "face_hide_time") else: self.reid_states.timer_reset(reid, "face_hide_time") face_hide_time = 0 # 动态遮挡率 if face_hide_time > face_hide_refresh_interval: focus_rate = self.reid_states.smooth_set(reid, "focus_rate", 0, focus_lambda) elif face_hide_time == 0: focus_rate = self.reid_states.smooth_set(reid, "focus_rate", attention_score, focus_lambda) else: focus_rate = self.reid_states.get(reid, "focus_rate") return focus_rate def turn_head_rate(self, reid, index): self_state = self.reid_states[reid] angle_yaw = self.angles_yaw[index] if 'right_front' not in self_state: self_state['right_front'] = angle_yaw self_state['angle_yaw'] = angle_yaw old_yaw_angle = self_state['angle_yaw'] right_front = self_state['right_front'] angle_yaw = self.reid_states.smooth_set(reid, 'angle_yaw', angle_yaw, limit=60) global_turn_head_angle = abs(angle_yaw - right_front) current_turn_head_angle = abs(angle_yaw - old_yaw_angle) reset_condition = global_turn_head_angle < turn_head_tolerance and current_turn_head_angle < turn_head_tolerance if reset_condition: right_front_timer = self.reid_states.timer_reset( reid, 'right_front_timer') else: right_front_timer = self.reid_states.timer_set( reid, 'right_front_timer') if right_front_timer > right_font_reset_interval: self_state['right_front'] = angle_yaw # print( # f'right_front:{right_front}\n' # f'global_turn_head:{global_turn_head_angle}\n' # f'angle_yaw:{angle_yaw}\n' # f'right_front_timer:{right_front_timer}' # ) return global_turn_head_angle def write_image(self, img, im_name, stream=None): if self.opt.vis: cv2.imshow("AlphaPose Demo", img) cv2.waitKey(30) if self.opt.save_img: cv2.imwrite(os.path.join(self.opt.outputpath, 'vis', im_name), img) if self.save_video: stream.write(img) @staticmethod def wait_and_put(queue, item): queue.put(item) @staticmethod def wait_and_get(queue): return queue.get() def save(self, boxes, scores, ids, hm_data, cropped_boxes, orig_img, im_name): # save next frame in the queue self.wait_and_put( self.result_queue, (boxes, scores, ids, hm_data, cropped_boxes, orig_img, im_name)) def running(self): # indicate that the thread is still running return not self.result_queue.empty() def count(self): # indicate the remaining images return self.result_queue.qsize() def stop(self): # indicate that the thread should be stopped self.save(None, None, None, None, None, None, None) self.result_worker.join() def terminate(self): # directly terminate self.result_worker.terminate() def clear_queues(self): self.clear(self.result_queue) @staticmethod def clear(queue): while not queue.empty(): queue.get() def results(self): # return final result print(self.final_result) return self.final_result @staticmethod def recognize_video_ext(ext=''): if ext == 'mp4': return cv2.VideoWriter_fourcc(*'mp4v'), '.' + ext elif ext == 'avi': return cv2.VideoWriter_fourcc(*'XVID'), '.' + ext elif ext == 'mov': return cv2.VideoWriter_fourcc(*'XVID'), '.' + ext else: print("Unknow video format {}, will use .mp4 instead of it".format( ext)) return cv2.VideoWriter_fourcc(*'mp4v'), '.mp4'
class DataWriter(): def __init__(self, cfg, opt, save_video=False, video_save_opt=DEFAULT_VIDEO_SAVE_OPT, queueSize=1024): self.cfg = cfg self.opt = opt self.video_save_opt = video_save_opt self.eval_joints = EVAL_JOINTS self.save_video = save_video self.heatmap_to_coord = get_func_heatmap_to_coord(cfg) # initialize the queue used to store frames read from # the video file if opt.sp: self.result_queue = Queue(maxsize=queueSize) else: self.result_queue = mp.Queue(maxsize=queueSize) if opt.save_img: if not os.path.exists(opt.outputpath + '/vis'): os.mkdir(opt.outputpath + '/vis') if opt.pose_flow: from trackers.PoseFlow.poseflow_infer import PoseFlowWrapper self.pose_flow_wrapper = PoseFlowWrapper( save_path=os.path.join(opt.outputpath, 'poseflow')) def start_worker(self, target): if self.opt.sp: p = Thread(target=target, args=()) else: p = mp.Process(target=target, args=()) # p.daemon = True p.start() return p def start(self): # start a thread to read pose estimation results per frame self.result_worker = self.start_worker(self.update) return self def update(self): #### person_height = 165 frame_offset = 20 max_diff_angle = 15 max_diff_distance = 10 N_angle = 23 N_distance = 20 # frames = [] ground_points = [] head_points = [] final_result = [] final_angles = {'Frame': []} final_min_angles = {'Frame': []} final_max_angles = {'Frame': []} final_distances = {'Frame': []} final_min_distances = {'Frame': []} final_max_distances = {'Frame': []} # for i in range(1, N_angle + 1): final_angles['Angle_' + str(i)] = [] final_min_angles['Angle_' + str(i)] = [] final_max_angles['Angle_' + str(i)] = [] for i in range(1, N_distance + 1): final_distances['Distance_' + str(i)] = [] final_min_distances['Distance_' + str(i)] = [] final_max_distances['Distance_' + str(i)] = [] # frame = 0 min_angle = 180 max_angle = 0 min_distance = person_height + 100 max_distance = 0 ##### norm_type = self.cfg.LOSS.get('NORM_TYPE', None) hm_size = self.cfg.DATA_PRESET.HEATMAP_SIZE if self.save_video: # initialize the file video stream, adapt ouput video resolution to original video stream = cv2.VideoWriter(*[ self.video_save_opt[k] for k in ['savepath', 'fourcc', 'fps', 'frameSize'] ]) if not stream.isOpened(): print("Try to use other video encoders...") ext = self.video_save_opt['savepath'].split('.')[-1] fourcc, _ext = self.recognize_video_ext(ext) self.video_save_opt['fourcc'] = fourcc self.video_save_opt[ 'savepath'] = self.video_save_opt['savepath'][:-4] + _ext stream = cv2.VideoWriter(*[ self.video_save_opt[k] for k in ['savepath', 'fourcc', 'fps', 'frameSize'] ]) assert stream.isOpened(), 'Cannot open video for writing' # keep looping infinitelyd while True: # ensure the queue is not empty and get item (boxes, scores, ids, hm_data, cropped_boxes, orig_img, im_name) = self.wait_and_get(self.result_queue) if orig_img is None: # if the thread indicator variable is set (img is None), stop the thread if self.save_video: stream.release() write_json(final_result, self.opt.outputpath, form=self.opt.format, for_eval=self.opt.eval) print("Results have been written to json.") return # image channel RGB->BGR orig_img = np.array(orig_img, dtype=np.uint8)[:, :, ::-1] if boxes is None or len(boxes) == 0: if self.opt.save_img or self.save_video or self.opt.vis: self.write_image( orig_img, im_name, stream=stream if self.save_video else None) else: # location prediction (n, kp, 2) | score prediction (n, kp, 1) assert hm_data.dim() == 4 #pred = hm_data.cpu().data.numpy() if hm_data.size()[1] == 136: self.eval_joints = [*range(0, 136)] elif hm_data.size()[1] == 26: self.eval_joints = [*range(0, 26)] pose_coords = [] pose_scores = [] for i in range(hm_data.shape[0]): bbox = cropped_boxes[i].tolist() pose_coord, pose_score = self.heatmap_to_coord( hm_data[i][self.eval_joints], bbox, hm_shape=hm_size, norm_type=norm_type) pose_coords.append( torch.from_numpy(pose_coord).unsqueeze(0)) pose_scores.append( torch.from_numpy(pose_score).unsqueeze(0)) preds_img = torch.cat(pose_coords) preds_scores = torch.cat(pose_scores) if not self.opt.pose_track: boxes, scores, ids, preds_img, preds_scores, pick_ids = \ pose_nms(boxes, scores, ids, preds_img, preds_scores, self.opt.min_box_area) _result = [] for k in range(len(scores)): _result.append({ 'keypoints': preds_img[k], 'kp_score': preds_scores[k], 'proposal_score': torch.mean(preds_scores[k]) + scores[k] + 1.25 * max(preds_scores[k]), 'idx': ids[k], 'box': [ boxes[k][0], boxes[k][1], boxes[k][2] - boxes[k][0], boxes[k][3] - boxes[k][1] ] }) result = {'imgname': im_name, 'result': _result} if self.opt.pose_flow: poseflow_result = self.pose_flow_wrapper.step( orig_img, result) for i in range(len(poseflow_result)): result['result'][i]['idx'] = poseflow_result[i]['idx'] final_result.append(result) if self.opt.save_img or self.save_video or self.opt.vis: if hm_data.size()[1] == 49: from alphapose.utils.vis import vis_frame_dense as vis_frame elif self.opt.vis_fast: from alphapose.utils.vis import vis_frame_fast as vis_frame else: from alphapose.utils.vis import vis_frame img = vis_frame(orig_img, result, self.opt) ##### frame += 1 if frame <= frame_offset: ground_point, head_point = self.calc_bound_points( result, vis_thres=0.4) if ground_point is not None: ground_points.append(ground_point) x_point = [x for x, _ in ground_points] y_point = [y for _, y in ground_points] ground_point = (int(np.average(x_point)), int(np.average(y_point))) if head_point is not None: head_points.append(head_point) x_point = [x for x, _ in head_points] y_point = [y for _, y in head_points] head_point = (int(np.average(x_point)), int(np.average(y_point))) if ground_point is not None and head_point is not None: dist_height = np.linalg.norm( np.array(head_point) - np.array(ground_point)) height_ratio = person_height / (dist_height + 1e-6) else: height_ratio = 0 distances = self.calc_distances(result, ground_point, head_point, height_ratio, vis_thres=0.4) angles = self.calc_angles(result, vis_thres=0.4) frames.append(frame) final_angles['Frame'].append(frame) final_min_angles['Frame'].append(frame) final_max_angles['Frame'].append(frame) final_distances['Frame'].append(frame) final_min_distances['Frame'].append(frame) final_max_distances['Frame'].append(frame) ## for angle_name, angle in angles.items(): angle = int(angle) if angle < 0 and frame > frame_offset: angle = final_angles[angle_name][frame - 2] ## final_angles[angle_name].append(angle) ## if frame <= frame_offset: if angle >= 0 and angle < min_angle: final_min_angles[angle_name].append(angle) else: final_min_angles[angle_name].append(min_angle) if angle >= 0 and angle > max_angle: final_max_angles[angle_name].append(angle) else: final_max_angles[angle_name].append(max_angle) else: previous_min_angle = final_min_angles[angle_name][ frame - 2] previous_max_angle = final_max_angles[angle_name][ frame - 2] diff_angle = abs( final_angles[angle_name][frame - 1] - final_angles[angle_name][frame - 2]) if angle >= 0 and angle < previous_min_angle and diff_angle < max_diff_angle: final_min_angles[angle_name].append(angle) else: final_min_angles[angle_name].append( previous_min_angle) if angle >= 0 and angle > previous_max_angle and diff_angle < max_diff_angle: final_max_angles[angle_name].append(angle) else: final_max_angles[angle_name].append( previous_max_angle) ## plt.figure() plt.plot(frames[frame_offset + 1:], final_angles[angle_name][frame_offset + 1:]) plt.plot(frames[frame_offset + 1:], final_min_angles[angle_name][frame_offset + 1:], linestyle='--', dashes=(5, 3)) plt.plot(frames[frame_offset + 1:], final_max_angles[angle_name][frame_offset + 1:], linestyle='--', dashes=(5, 3)) plt.xlabel('Frames') plt.ylabel('Angle (degree)') plt.title(angle_name) plt.grid(True) plt.savefig( os.path.join(self.opt.outputpath_plot, angle_name + ".jpg")) plt.close() ## for distance_name, distance in distances.items(): distance = round(distance, 2) if distance < 0 and frame > frame_offset: distance = final_distances[distance_name][frame - 2] ## final_distances[distance_name].append(distance) ## if frame <= frame_offset: if distance >= 0 and distance < min_distance: final_min_distances[distance_name].append( distance) else: final_min_distances[distance_name].append( min_distance) if distance >= 0 and distance > max_distance: final_max_distances[distance_name].append( distance) else: final_max_distances[distance_name].append( max_distance) else: previous_min_distance = final_min_distances[ distance_name][frame - 2] previous_max_distance = final_max_distances[ distance_name][frame - 2] diff_distance = abs( final_distances[distance_name][frame - 1] - final_distances[distance_name][frame - 2]) if distance_name is 'Distance_10' or distance_name is 'Distance_11': diff_distance *= 100 if distance >= 0 and distance < previous_min_distance and diff_distance < max_diff_distance: final_min_distances[distance_name].append( distance) else: final_min_distances[distance_name].append( previous_min_distance) if distance >= 0 and distance > previous_max_distance and diff_distance < max_diff_distance: final_max_distances[distance_name].append( distance) else: final_max_distances[distance_name].append( previous_max_distance) ## plt.figure() plt.plot( frames[frame_offset + 1:], final_distances[distance_name][frame_offset + 1:]) plt.plot( frames[frame_offset + 1:], final_min_distances[distance_name][frame_offset + 1:], linestyle='--', dashes=(5, 3)) plt.plot( frames[frame_offset + 1:], final_max_distances[distance_name][frame_offset + 1:], linestyle='--', dashes=(5, 3)) plt.xlabel('Frames') plt.ylabel('Distance (cm)') plt.title(distance_name) plt.grid(True) plt.savefig( os.path.join(self.opt.outputpath_plot, distance_name + ".jpg")) plt.close() ## df_angle = pd.DataFrame.from_dict(final_angles) df_min_angle = pd.DataFrame.from_dict(final_min_angles) df_max_angle = pd.DataFrame.from_dict(final_max_angles) with pd.ExcelWriter( os.path.join(self.opt.outputpath_plot, "Angles.xlsx")) as writer: df_angle.to_excel(writer, sheet_name='Angles', index=False) df_min_angle.to_excel(writer, sheet_name='Min_Angles', index=False) df_max_angle.to_excel(writer, sheet_name='Max_Angles', index=False) ## df_distance = pd.DataFrame.from_dict(final_distances) df_min_distance = pd.DataFrame.from_dict( final_min_distances) df_max_distance = pd.DataFrame.from_dict( final_max_distances) with pd.ExcelWriter( os.path.join(self.opt.outputpath_plot, "Distances.xlsx")) as writer: df_distance.to_excel(writer, sheet_name='Distances', index=False) df_min_distance.to_excel(writer, sheet_name='Min_Distances', index=False) df_max_distance.to_excel(writer, sheet_name='Max_Distances', index=False) ######### self.write_image( img, im_name, stream=stream if self.save_video else None, frame=frame) def write_image(self, img, im_name, stream=None, frame=1): img = cv2.putText(img, f'frame: {frame}', (20, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 2) if self.opt.vis: cv2.imshow("AlphaPose Demo", img) cv2.waitKey(30) if self.opt.save_img: cv2.imwrite(os.path.join(self.opt.outputpath, 'vis', im_name), img) if self.save_video: stream.write(img) def wait_and_put(self, queue, item): queue.put(item) def wait_and_get(self, queue): return queue.get() def save(self, boxes, scores, ids, hm_data, cropped_boxes, orig_img, im_name): # save next frame in the queue self.wait_and_put( self.result_queue, (boxes, scores, ids, hm_data, cropped_boxes, orig_img, im_name)) def running(self): # indicate that the thread is still running return not self.result_queue.empty() def count(self): # indicate the remaining images return self.result_queue.qsize() def stop(self): # indicate that the thread should be stopped self.save(None, None, None, None, None, None, None) self.result_worker.join() def terminate(self): # directly terminate self.result_worker.terminate() def clear_queues(self): self.clear(self.result_queue) def clear(self, queue): while not queue.empty(): queue.get() def results(self): # return final result print(self.final_result) return self.final_result def recognize_video_ext(self, ext=''): if ext == 'mp4': return cv2.VideoWriter_fourcc(*'mp4v'), '.' + ext elif ext == 'avi': return cv2.VideoWriter_fourcc(*'XVID'), '.' + ext elif ext == 'mov': return cv2.VideoWriter_fourcc(*'XVID'), '.' + ext else: print("Unknow video format {}, will use .mp4 instead of it".format( ext)) return cv2.VideoWriter_fourcc(*'mp4v'), '.mp4' def calc_angles(self, im_res, vis_thres=0.4): def find_angle(p1, p2, p3): a = np.array(p1) b = np.array(p2) c = np.array(p3) ba = a - b bc = c - b cosine_angle = np.dot( ba, bc) / (np.linalg.norm(ba) * np.linalg.norm(bc) + 1e-9) angle = np.arccos(cosine_angle) * (180 / np.pi) return angle kp_num = len(im_res['result'][0]['keypoints']) if kp_num == 26: angle_vertices = { 'Angle_1': (17, 18, 19), 'Angle_2': (0, 18, 19), 'Angle_3': (18, 5, 7), 'Angle_4': (18, 6, 8), 'Angle_5': (5, 7, 9), 'Angle_6': (6, 8, 10), 'Angle_7': (10, 18, 9), 'Angle_8': (19, 11, 13), 'Angle_9': (19, 12, 14), 'Angle_10': (12, 19, 11), 'Angle_11': (11, 13, 15), 'Angle_12': (12, 14, 16), 'Angle_13': (16, 19, 15), 'Angle_14': (20, 24, 15), 'Angle_15': (21, 25, 16), 'Angle_16': (5, 11, 13), 'Angle_17': (6, 12, 14), 'Angle_18': (11, 5, 7), 'Angle_19': (12, 6, 8), 'Angle_20': (0, 18, 5), 'Angle_21': (0, 18, 6), 'Angle_22': (18, 19, 24), 'Angle_23': (18, 19, 25), } else: raise NotImplementedError for human in im_res['result']: part_angle = {} kp_preds = human['keypoints'] kp_scores = human['kp_score'] # Valid keypoints for n in range(kp_scores.shape[0]): if kp_scores[n] <= vis_thres: continue cor_x, cor_y = int(kp_preds[n, 0]), int(kp_preds[n, 1]) part_angle[n] = (cor_x, cor_y) # Calc angles angles = {} for angle_name, (start_p, center_p, end_p) in angle_vertices.items(): if start_p in part_angle and end_p in part_angle and center_p in part_angle: start_xy = part_angle[start_p] center_xy = part_angle[center_p] end_xy = part_angle[end_p] angle = find_angle(start_xy, center_xy, end_xy) angles[angle_name] = angle else: angles[angle_name] = -10 return angles def calc_bound_points(self, im_res, vis_thres=0.4): kp_num = len(im_res['result'][0]['keypoints']) if kp_num == 26: ground_vertices = [24, 25, 20, 21] head_vertices = [17] else: raise NotImplementedError for human in im_res['result']: ground_points = {} head_points = {} kp_preds = human['keypoints'] kp_scores = human['kp_score'] # Valid keypoints for n in range(kp_scores.shape[0]): if kp_scores[n] <= vis_thres: continue cor_x, cor_y = int(kp_preds[n, 0]), int(kp_preds[n, 1]) if n in head_vertices: head_points[n] = (cor_x, cor_y) elif n in ground_vertices: ground_points[n] = (cor_x, cor_y) # Calc bound points if len(ground_points) == 0: ground_point = None else: x_points = [x for x, _ in ground_points.values()] y_points = [y for _, y in ground_points.values()] ground_point = (np.average(x_points), np.average(y_points)) if len(head_points) == 0: head_point = None else: x_points = [x for x, _ in head_points.values()] y_points = [y for _, y in head_points.values()] head_point = (np.average(x_points), np.average(y_points)) return ground_point, head_point def calc_distances(self, im_res, ground_point, head_point, height_ratio, vis_thres=0.4): if ground_point is None: ground_point = (-10, -10) if head_point is None: head_point = (-10, -10) def find_distance(p1, p2): a = np.array(p1) b = np.array(p2) ba = a - b distance = np.linalg.norm(ba) return distance kp_num = len(im_res['result'][0]['keypoints']) if kp_num == 26: distance_vertices = { 'Distance_1': (20, 26), 'Distance_2': (21, 26), 'Distance_3': (24, 26), 'Distance_4': (25, 26), 'Distance_5': (20, 25), 'Distance_6': (21, 24), 'Distance_7': (24, 25), 'Distance_8': (24, 11), 'Distance_9': (25, 12), 'Distance_10': (17, 26), 'Distance_11': (17, 26), 'Distance_12': (9, 20), 'Distance_13': (10, 21), 'Distance_14': (9, 0), 'Distance_15': (10, 0), 'Distance_16': (9, 10), 'Distance_17': (7, 3), 'Distance_18': (8, 4), 'Distance_19': (26, 17), 'Distance_20': (17, 27), } else: raise NotImplementedError for human in im_res['result']: part_distance = {} kp_preds = human['keypoints'] kp_scores = human['kp_score'] # Valid keypoints for n in range(kp_scores.shape[0]): if kp_scores[n] <= vis_thres: continue cor_x, cor_y = int(kp_preds[n, 0]), int(kp_preds[n, 1]) part_distance[n] = (cor_x, cor_y) part_distance[26] = ground_point part_distance[27] = head_point # Calc distances distances = {} for distance_name, (start_p, end_p) in distance_vertices.items(): if start_p in part_distance and end_p in part_distance: start_xy = part_distance[start_p] end_xy = part_distance[end_p] distance = find_distance(start_xy, end_xy) if end_p == 26: start_y = part_distance[start_p][1] end_y = part_distance[end_p][1] distance = abs(end_y - start_y) distances[distance_name] = distance * height_ratio else: distances[distance_name] = -10 ## distances['Distance_10'] = (2 * distances['Distance_5']) / ( distances['Distance_10'] + 1e-6) distances['Distance_11'] = (2 * distances['Distance_6']) / ( distances['Distance_11'] + 1e-6) return distances