def evaluate(self, cfg, preds, output_dir, all_boxes, img_path, *args, **kwargs): rank = cfg.RANK res_folder = os.path.join(output_dir, 'results') if not os.path.exists(res_folder): try: os.makedirs(res_folder) except Exception: logger.error('Fail to make {}'.format(res_folder)) res_file = os.path.join( res_folder, 'keypoints_{}_results_{}.json'.format(self.image_set, rank)) # person x (keypoints) _kpts = [] for idx, kpt in enumerate(preds): _kpts.append({ 'keypoints': kpt, 'center': all_boxes[idx][0:2], 'scale': all_boxes[idx][2:4], 'area': all_boxes[idx][4], 'score': all_boxes[idx][5], 'image': int(img_path[idx][-16:-4]) }) # image x person x (keypoints) kpts = defaultdict(list) for kpt in _kpts: kpts[kpt['image']].append(kpt) # rescoring and oks nms num_joints = self.num_joints in_vis_thre = self.in_vis_thre oks_thre = self.oks_thre oks_nmsed_kpts = [] for img in kpts.keys(): img_kpts = kpts[img] for n_p in img_kpts: box_score = n_p['score'] kpt_score = 0 valid_num = 0 for n_jt in range(0, num_joints): t_s = n_p['keypoints'][n_jt][2] if t_s > in_vis_thre: kpt_score = kpt_score + t_s valid_num = valid_num + 1 if valid_num != 0: kpt_score = kpt_score / valid_num # rescoring n_p['score'] = kpt_score * box_score if self.soft_nms: keep = soft_oks_nms( [img_kpts[i] for i in range(len(img_kpts))], oks_thre) else: keep = oks_nms([img_kpts[i] for i in range(len(img_kpts))], oks_thre) if len(keep) == 0: oks_nmsed_kpts.append(img_kpts) else: oks_nmsed_kpts.append([img_kpts[_keep] for _keep in keep]) self._write_coco_keypoint_results(oks_nmsed_kpts, res_file) if 'test' not in self.image_set: info_str = self._do_python_keypoint_eval(res_file, res_folder) name_value = OrderedDict(info_str) return name_value, name_value['AP'] else: return {'Null': 0}, 0
def test_net(tester, dets, det_range, gpu_id, sigmas, vis_kps): dump_results = [] start_time = time.time() img_start = det_range[0] img_id = 0 img_id2 = 0 pbar = tqdm(total=det_range[1] - img_start - 1, position=gpu_id) pbar.set_description("GPU %s" % str(gpu_id)) while img_start < det_range[1]: img_end = img_start + 1 im_info = dets[img_start] while img_end < det_range[1] and dets[img_end]['image_id'] == im_info[ 'image_id']: img_end += 1 # all human detection results of a certain image cropped_data = dets[img_start:img_end] pbar.update(img_end - img_start) img_start = img_end kps_result = np.zeros((len(cropped_data), cfg.num_kps, 3)) area_save = np.zeros(len(cropped_data)) # cluster human detection results with test_batch_size for batch_id in range(0, len(cropped_data), cfg.test_batch_size): start_id = batch_id end_id = min(len(cropped_data), batch_id + cfg.test_batch_size) imgs = [] crop_infos = [] for i in range(start_id, end_id): img, crop_info = generate_batch(cropped_data[i], stage='test') imgs.append(img) crop_infos.append(crop_info) imgs = np.array(imgs) crop_infos = np.array(crop_infos) # forward heatmap = tester.predict_one([imgs])[0] if cfg.flip_test: flip_imgs = imgs[:, :, ::-1, :] flip_heatmap = tester.predict_one([flip_imgs])[0] flip_heatmap = flip_heatmap[:, :, ::-1, :] for (q, w) in cfg.kps_symmetry: flip_heatmap_w, flip_heatmap_q = flip_heatmap[:, :, :, w].copy( ), flip_heatmap[:, :, :, q].copy() flip_heatmap[:, :, :, q], flip_heatmap[:, :, :, w] = flip_heatmap_w, flip_heatmap_q flip_heatmap[:, :, 1:, :] = flip_heatmap.copy()[:, :, 0:-1, :] heatmap += flip_heatmap heatmap /= 2 # for each human detection from clustered batch for image_id in range(start_id, end_id): for j in range(cfg.num_kps): hm_j = heatmap[image_id - start_id, :, :, j] idx = hm_j.argmax() y, x = np.unravel_index(idx, hm_j.shape) px = int(math.floor(x + 0.5)) py = int(math.floor(y + 0.5)) if 1 < px < cfg.output_shape[ 1] - 1 and 1 < py < cfg.output_shape[0] - 1: diff = np.array([ hm_j[py][px + 1] - hm_j[py][px - 1], hm_j[py + 1][px] - hm_j[py - 1][px] ]) diff = np.sign(diff) x += diff[0] * .25 y += diff[1] * .25 kps_result[image_id, j, :2] = (x * cfg.input_shape[1] / cfg.output_shape[1], y * cfg.input_shape[0] / cfg.output_shape[0]) kps_result[image_id, j, 2] = hm_j.max() / 255 vis = False crop_info = crop_infos[image_id - start_id, :] area = (crop_info[2] - crop_info[0]) * (crop_info[3] - crop_info[1]) if vis and np.any(kps_result[image_id, :, 2]) > 0.9 and area > 96**2: tmpimg = imgs[image_id - start_id].copy() tmpimg = cfg.denormalize_input(tmpimg) tmpimg = tmpimg.astype('uint8') tmpkps = np.zeros((3, cfg.num_kps)) tmpkps[:2, :] = kps_result[image_id, :, :2].transpose(1, 0) tmpkps[2, :] = kps_result[image_id, :, 2] _tmpimg = tmpimg.copy() _tmpimg = cfg.vis_keypoints(_tmpimg, tmpkps) cv2.imwrite( osp.join(cfg.vis_dir, str(img_id) + '_output.jpg'), _tmpimg) img_id += 1 # map back to original images for j in range(cfg.num_kps): kps_result[image_id, j, 0] = kps_result[image_id, j, 0] / cfg.input_shape[1] * (\ crop_infos[image_id - start_id][2] - crop_infos[image_id - start_id][0]) + crop_infos[image_id - start_id][0] kps_result[image_id, j, 1] = kps_result[image_id, j, 1] / cfg.input_shape[0] * (\ crop_infos[image_id - start_id][3] - crop_infos[image_id - start_id][1]) + crop_infos[image_id - start_id][1] area_save[image_id] = (crop_infos[image_id - start_id][2] - crop_infos[image_id - start_id][0]) * ( crop_infos[image_id - start_id][3] - crop_infos[image_id - start_id][1]) #vis if vis_kps and np.any(kps_result[:, :, 2] > 0.8): tmpimg = cv2.imread( os.path.join(cfg.img_path, cropped_data[0]['imgpath'])) tmpimg = tmpimg.astype('uint8') for i in range(len(kps_result)): tmpkps = np.zeros((3, cfg.num_kps)) tmpkps[:2, :] = kps_result[i, :, :2].transpose(1, 0) tmpkps[2, :] = kps_result[i, :, 2] tmpimg = cfg.vis_keypoints(tmpimg, tmpkps) cv2.imwrite(osp.join(cfg.vis_dir, str(img_id2) + '.jpg'), tmpimg) img_id2 += 1 score_result = np.copy(kps_result[:, :, 2]) kps_result[:, :, 2] = 1 kps_result = kps_result.reshape(-1, cfg.num_kps * 3) # rescoring and oks nms if cfg.dataset == 'COCO' or cfg.dataset == 'JTA': rescored_score = np.zeros((len(score_result))) for i in range(len(score_result)): score_mask = score_result[i] > cfg.score_thr if np.sum(score_mask) > 0: rescored_score[i] = np.mean( score_result[i][score_mask]) * cropped_data[i]['score'] score_result = rescored_score keep = oks_nms(kps_result, score_result, area_save, cfg.oks_nms_thr, sigmas) if len(keep) > 0: kps_result = kps_result[keep, :] score_result = score_result[keep] area_save = area_save[keep] elif cfg.dataset == 'PoseTrack': keep = oks_nms(kps_result, np.mean(score_result, axis=1), area_save, cfg.oks_nms_thr) if len(keep) > 0: kps_result = kps_result[keep, :] score_result = score_result[keep, :] area_save = area_save[keep] # save result for i in range(len(kps_result)): if cfg.dataset == 'COCO' or cfg.dataset == 'JTA': result = dict(image_id=im_info['image_id'], category_id=1, keypoints=kps_result[i].round(3).tolist(), score=float(round(score_result[i], 4))) elif cfg.dataset == 'PoseTrack': result = dict(image_id=im_info['image_id'], category_id=1, track_id=0, scores=score_result[i].round(4).tolist(), keypoints=kps_result[i].round(3).tolist()) elif cfg.dataset == 'MPII': result = dict(image_id=im_info['image_id'], scores=score_result[i].round(4).tolist(), keypoints=kps_result[i].round(3).tolist()) # elif cfg.dataset == 'JTA': # result = dict(image_id=im_info['image_id'],category_id=1, scores=score_result[i].round(4).tolist(), # keypoints=kps_result[i].round(3).tolist()) dump_results.append(result) pbar.close() del pbar return dump_results
def rescore_and_save_result(self, output_file, preds, all_boxes, img_path, orig_boxes): assert output_file.endswith('.json') or output_file.endswith( '.npy'), "Only json and numpy output is supported" ensuredir(os.path.dirname(output_file)) # person x (keypoints) _kpts = [] for idx, kpt in enumerate(preds): _kpts.append({ 'keypoints': kpt, 'center': all_boxes[idx][0:2], 'scale': all_boxes[idx][2:4], 'area': all_boxes[idx][4], 'score': all_boxes[idx][5], 'image': img_path[idx], 'origbox': orig_boxes[idx] }) # image x person x (keypoints) kpts = defaultdict(list) for kpt in _kpts: kpts[kpt['image']].append(kpt) # rescoring and oks nms num_joints = self.num_joints in_vis_thre = self.in_vis_thre oks_thre = self.oks_thre oks_nmsed_kpts = [] nmsed_kpts_by_frame = defaultdict(list) for img in kpts.keys(): img_kpts = kpts[img] for n_p in img_kpts: box_score = n_p['score'] kpt_score = 0 valid_num = 0 for n_jt in range(0, num_joints): t_s = n_p['keypoints'][n_jt][2] if t_s > in_vis_thre: kpt_score = kpt_score + t_s valid_num = valid_num + 1 if valid_num != 0: kpt_score = kpt_score / valid_num # rescoring n_p['score'] = kpt_score * box_score if self.soft_nms: keep = soft_oks_nms( [img_kpts[i] for i in range(len(img_kpts))], oks_thre) else: keep = oks_nms([img_kpts[i] for i in range(len(img_kpts))], oks_thre) if len(keep) == 0: selected_kpts = img_kpts else: selected_kpts = [img_kpts[_keep] for _keep in keep] oks_nmsed_kpts.append(selected_kpts) nmsed_kpts_by_frame[img] = selected_kpts self._write_keypoint_results(nmsed_kpts_by_frame, output_file)
def evaluate(self, cfg, preds, output_dir, all_boxes, image_path, file_names, *args): res_folder = os.path.join(output_dir, 'results') if not os.path.exists(res_folder): os.makedirs(res_folder) res_file = os.path.join(res_folder, 'keypoints_%s_results.json' % self.image_set) # person x (keypoints) _kpts = [] for idx, kpt in enumerate(preds): #image_id=file_names[idx] #print((img_path[idx])) ''' example=>(img_path[idx])data/posetrack/images/valid/000522_mpii_test/000080.jpg ''' image_id = image_path[idx][-8:-4] folder_name = image_path[idx].split('/')[-2][0:6] if (self.image_set == 'valid' or 'val'): prefix = '1' else: prefix = '0' image_id = int(prefix + folder_name + image_id) _kpts.append({ 'keypoints': kpt, 'center': all_boxes[idx][0:2], 'scale': all_boxes[idx][2:4], 'area': all_boxes[idx][4], 'score': all_boxes[idx][5], #'image': (img_path[idx][-10:-4]), #'image': img_path[idx], #'image_id' :image_path[idx] 'image_id': image_id #int(img_path[idx][-10:-4])# }) # image x person x (keypoints) kpts = defaultdict(list) for kpt in _kpts: kpts[kpt['image_id']].append(kpt) #logger.info('=>kpts--{}'.format(kpts)) # rescoring and oks nms logger.info('=>len--kpts.keys()--{}'.format(len(kpts.keys()))) num_joints = self.num_joints in_vis_thre = self.in_vis_thre oks_thre = self.oks_thre oks_nmsed_kpts = [] for img in kpts.keys(): img_kpts = kpts[img] for n_p in img_kpts: #logger.info('n_p--{}'.format(n_p)) box_score = n_p['score'] kpt_score = 0 valid_num = 0 for n_jt in range(0, num_joints): t_s = n_p['keypoints'][n_jt][2] if t_s > in_vis_thre: kpt_score = kpt_score + t_s valid_num = valid_num + 1 if valid_num != 0: kpt_score = kpt_score / valid_num # rescoring n_p['score'] = kpt_score * box_score keep = oks_nms([img_kpts[i] for i in range(len(img_kpts))], oks_thre) if len(keep) == 0: oks_nmsed_kpts.append(img_kpts) else: oks_nmsed_kpts.append([img_kpts[_keep] for _keep in keep]) self._write_coco_keypoint_results(oks_nmsed_kpts, res_file) pred_folder = os.path.join(self.root, 'posetrack_example_results', 'test_pred/') gt_folder = os.path.join(self.root, 'posetrack_data', 'anns/') if 'test' not in self.image_set: info_str = self._do_python_keypoint_eval(res_file, res_folder) if "val" in self.image_set: self._do_posetrack_keypoint_eval(gt_folder, pred_folder) name_value = OrderedDict(info_str) return name_value, name_value['AP'] else: return {'Null': 0}, 0
def test_net(tester, input_pose, det_range, gpu_id): dump_results = [] start_time = time.time() img_start = det_range[0] img_id = 0 img_id2 = 0 pbar = tqdm(total=det_range[1] - img_start - 1, position=gpu_id) pbar.set_description("GPU %s" % str(gpu_id)) while img_start < det_range[1]: img_end = img_start + 1 im_info = input_pose[img_start] while img_end < det_range[1] and input_pose[img_end][ 'image_id'] == im_info['image_id']: img_end += 1 # all human detection results of a certain image cropped_data = input_pose[img_start:img_end] #pbar.set_description("GPU %s" % str(gpu_id)) pbar.update(img_end - img_start) img_start = img_end kps_result = np.zeros((len(cropped_data), cfg.num_kps, 3)) area_save = np.zeros(len(cropped_data)) # cluster human detection results with test_batch_size for batch_id in range(0, len(cropped_data), cfg.test_batch_size): start_id = batch_id end_id = min(len(cropped_data), batch_id + cfg.test_batch_size) imgs = [] input_pose_coords = [] input_pose_valids = [] input_pose_scores = [] crop_infos = [] for i in range(start_id, end_id): img, input_pose_coord, input_pose_valid, input_pose_score, crop_info = generate_batch( cropped_data[i], stage='test') imgs.append(img) input_pose_coords.append(input_pose_coord) input_pose_valids.append(input_pose_valid) input_pose_scores.append(input_pose_score) crop_infos.append(crop_info) imgs = np.array(imgs) input_pose_coords = np.array(input_pose_coords) input_pose_valids = np.array(input_pose_valids) input_pose_scores = np.array(input_pose_scores) crop_infos = np.array(crop_infos) # forward coord = tester.predict_one( [imgs, input_pose_coords, input_pose_valids])[0] if cfg.flip_test: flip_imgs = imgs[:, :, ::-1, :] flip_input_pose_coords = input_pose_coords.copy() flip_input_pose_coords[:, :, 0] = cfg.input_shape[ 1] - 1 - flip_input_pose_coords[:, :, 0] flip_input_pose_valids = input_pose_valids.copy() for (q, w) in cfg.kps_symmetry: flip_input_pose_coords_w, flip_input_pose_coords_q = flip_input_pose_coords[:, w, :].copy( ), flip_input_pose_coords[:, q, :].copy() flip_input_pose_coords[:, q, :], flip_input_pose_coords[:, w, :] = flip_input_pose_coords_w, flip_input_pose_coords_q flip_input_pose_valids_w, flip_input_pose_valids_q = flip_input_pose_valids[:, w].copy( ), flip_input_pose_valids[:, q].copy() flip_input_pose_valids[:, q], flip_input_pose_valids[:, w] = flip_input_pose_valids_w, flip_input_pose_valids_q flip_coord = tester.predict_one([ flip_imgs, flip_input_pose_coords, flip_input_pose_valids ])[0] flip_coord[:, :, 0] = cfg.input_shape[1] - 1 - flip_coord[:, :, 0] for (q, w) in cfg.kps_symmetry: flip_coord_w, flip_coord_q = flip_coord[:, w, :].copy( ), flip_coord[:, q, :].copy() flip_coord[:, q, :], flip_coord[:, w, :] = flip_coord_w, flip_coord_q coord += flip_coord coord /= 2 # for each human detection from clustered batch for image_id in range(start_id, end_id): kps_result[image_id, :, :2] = coord[image_id - start_id] kps_result[image_id, :, 2] = input_pose_scores[image_id - start_id] vis = False crop_info = crop_infos[image_id - start_id, :] area = (crop_info[2] - crop_info[0]) * (crop_info[3] - crop_info[1]) if vis and np.any(kps_result[image_id, :, 2]) > 0.9 and area > 96**2: tmpimg = imgs[image_id - start_id].copy() tmpimg = cfg.denormalize_input(tmpimg) tmpimg = tmpimg.astype('uint8') tmpkps = np.zeros((3, cfg.num_kps)) tmpkps[:2, :] = kps_result[image_id, :, :2].transpose(1, 0) tmpkps[2, :] = kps_result[image_id, :, 2] _tmpimg = tmpimg.copy() _tmpimg = cfg.vis_keypoints(_tmpimg, tmpkps) cv2.imwrite( osp.join(cfg.vis_dir, str(img_id) + '_output.jpg'), _tmpimg) img_id += 1 # map back to original images for j in range(cfg.num_kps): kps_result[image_id, j, 0] = kps_result[image_id, j, 0] / cfg.input_shape[1] * (\ crop_infos[image_id - start_id][2] - crop_infos[image_id - start_id][0]) + crop_infos[image_id - start_id][0] kps_result[image_id, j, 1] = kps_result[image_id, j, 1] / cfg.input_shape[0] * (\ crop_infos[image_id - start_id][3] - crop_infos[image_id - start_id][1]) + crop_infos[image_id - start_id][1] area_save[image_id] = (crop_infos[image_id - start_id][2] - crop_infos[image_id - start_id][0]) * ( crop_infos[image_id - start_id][3] - crop_infos[image_id - start_id][1]) #vis vis = False if vis and np.any(kps_result[:, :, 2] > 0.9): tmpimg = cv2.imread( os.path.join(cfg.img_path, cropped_data[0]['imgpath'])) tmpimg = tmpimg.astype('uint8') for i in range(len(kps_result)): tmpkps = np.zeros((3, cfg.num_kps)) tmpkps[:2, :] = kps_result[i, :, :2].transpose(1, 0) tmpkps[2, :] = kps_result[i, :, 2] tmpimg = cfg.vis_keypoints(tmpimg, tmpkps) cv2.imwrite(osp.join(cfg.vis_dir, str(img_id2) + '.jpg'), tmpimg) img_id2 += 1 # oks nms if cfg.dataset in ['COCO', 'PoseTrack']: nms_kps = np.delete(kps_result, cfg.ignore_kps, 1) nms_score = np.mean(nms_kps[:, :, 2], axis=1) nms_kps[:, :, 2] = 1 nms_kps = nms_kps.reshape(len(kps_result), -1) nms_sigmas = np.delete(cfg.kps_sigmas, cfg.ignore_kps) keep = oks_nms(nms_kps, nms_score, area_save, cfg.oks_nms_thr, nms_sigmas) if len(keep) > 0: kps_result = kps_result[keep, :, :] area_save = area_save[keep] score_result = np.copy(kps_result[:, :, 2]) kps_result[:, :, 2] = 1 kps_result = kps_result.reshape(-1, cfg.num_kps * 3) # save result for i in range(len(kps_result)): if cfg.dataset == 'COCO': result = dict(image_id=im_info['image_id'], category_id=1, score=float(round(np.mean(score_result[i]), 4)), keypoints=kps_result[i].round(3).tolist()) elif cfg.dataset == 'PoseTrack': result = dict(image_id=im_info['image_id'], category_id=1, track_id=0, scores=score_result[i].round(4).tolist(), keypoints=kps_result[i].round(3).tolist()) elif cfg.dataset == 'MPII': result = dict(image_id=im_info['image_id'], scores=score_result[i].round(4).tolist(), keypoints=kps_result[i].round(3).tolist()) dump_results.append(result) return dump_results