def test_net(tester, dets, det_range, gpu_id): dump_results = [] start_time = time.time() img_start = det_range[0] img_id = 0 img_id2 = 0 pbar = tqdm(total=det_range[1] - img_start - 1, position=gpu_id) pbar.set_description("GPU %s" % str(gpu_id)) while img_start < det_range[1]: img_end = img_start + 1 im_info = dets[img_start] while img_end < det_range[1] and dets[img_end]['image_id'] == im_info[ 'image_id']: img_end += 1 # all human detection results of a certain image cropped_data = dets[img_start:img_end] pbar.update(img_end - img_start) img_start = img_end kps_result = np.zeros((len(cropped_data), cfg.num_kps, 3)) area_save = np.zeros(len(cropped_data)) # cluster human detection results with test_batch_size for batch_id in range(0, len(cropped_data), cfg.test_batch_size): start_id = batch_id end_id = min(len(cropped_data), batch_id + cfg.test_batch_size) imgs = [] crop_infos = [] for i in range(start_id, end_id): img, crop_info = generate_batch(cropped_data[i], stage='test') imgs.append(img) crop_infos.append(crop_info) imgs = np.array(imgs) crop_infos = np.array(crop_infos) # forward heatmap = tester.predict_one([imgs])[0] if cfg.flip_test: flip_imgs = imgs[:, :, ::-1, :] flip_heatmap = tester.predict_one([flip_imgs])[0] flip_heatmap = flip_heatmap[:, :, ::-1, :] for (q, w) in cfg.kps_symmetry: flip_heatmap_w, flip_heatmap_q = flip_heatmap[:, :, :, w].copy( ), flip_heatmap[:, :, :, q].copy() flip_heatmap[:, :, :, q], flip_heatmap[:, :, :, w] = flip_heatmap_w, flip_heatmap_q flip_heatmap[:, :, 1:, :] = flip_heatmap.copy()[:, :, 0:-1, :] heatmap += flip_heatmap heatmap /= 2 # for each human detection from clustered batch for image_id in range(start_id, end_id): for j in range(cfg.num_kps): hm_j = heatmap[image_id - start_id, :, :, j] idx = hm_j.argmax() y, x = np.unravel_index(idx, hm_j.shape) px = int(math.floor(x + 0.5)) py = int(math.floor(y + 0.5)) if 1 < px < cfg.output_shape[ 1] - 1 and 1 < py < cfg.output_shape[0] - 1: diff = np.array([ hm_j[py][px + 1] - hm_j[py][px - 1], hm_j[py + 1][px] - hm_j[py - 1][px] ]) diff = np.sign(diff) x += diff[0] * .25 y += diff[1] * .25 kps_result[image_id, j, :2] = (x * cfg.input_shape[1] / cfg.output_shape[1], y * cfg.input_shape[0] / cfg.output_shape[0]) kps_result[image_id, j, 2] = hm_j.max() / 255 vis = False crop_info = crop_infos[image_id - start_id, :] area = (crop_info[2] - crop_info[0]) * (crop_info[3] - crop_info[1]) if vis and np.any(kps_result[image_id, :, 2]) > 0.9 and area > 96**2: tmpimg = imgs[image_id - start_id].copy() tmpimg = cfg.denormalize_input(tmpimg) tmpimg = tmpimg.astype('uint8') tmpkps = np.zeros((3, cfg.num_kps)) tmpkps[:2, :] = kps_result[image_id, :, :2].transpose(1, 0) tmpkps[2, :] = kps_result[image_id, :, 2] _tmpimg = tmpimg.copy() _tmpimg = cfg.vis_keypoints(_tmpimg, tmpkps) cv2.imwrite( osp.join(cfg.vis_dir, str(img_id) + '_output.jpg'), _tmpimg) img_id += 1 # map back to original images for j in range(cfg.num_kps): kps_result[image_id, j, 0] = kps_result[image_id, j, 0] / cfg.input_shape[1] * (\ crop_infos[image_id - start_id][2] - crop_infos[image_id - start_id][0]) + crop_infos[image_id - start_id][0] kps_result[image_id, j, 1] = kps_result[image_id, j, 1] / cfg.input_shape[0] * (\ crop_infos[image_id - start_id][3] - crop_infos[image_id - start_id][1]) + crop_infos[image_id - start_id][1] area_save[image_id] = (crop_infos[image_id - start_id][2] - crop_infos[image_id - start_id][0]) * ( crop_infos[image_id - start_id][3] - crop_infos[image_id - start_id][1]) #vis vis = False if vis and np.any(kps_result[:, :, 2] > 0.9): tmpimg = cv2.imread( os.path.join(cfg.img_path, cropped_data[0]['imgpath'])) tmpimg = tmpimg.astype('uint8') for i in range(len(kps_result)): tmpkps = np.zeros((3, cfg.num_kps)) tmpkps[:2, :] = kps_result[i, :, :2].transpose(1, 0) tmpkps[2, :] = kps_result[i, :, 2] tmpimg = cfg.vis_keypoints(tmpimg, tmpkps) cv2.imwrite(osp.join(cfg.vis_dir, str(img_id2) + '.jpg'), tmpimg) img_id2 += 1 score_result = np.copy(kps_result[:, :, 2]) kps_result[:, :, 2] = 1 kps_result = kps_result.reshape(-1, cfg.num_kps * 3) # rescoring and oks nms if cfg.dataset == 'COCO': rescored_score = np.zeros((len(score_result))) for i in range(len(score_result)): score_mask = score_result[i] > cfg.score_thr if np.sum(score_mask) > 0: rescored_score[i] = np.mean( score_result[i][score_mask]) * cropped_data[i]['score'] score_result = rescored_score keep = oks_nms(kps_result, score_result, area_save, cfg.oks_nms_thr) if len(keep) > 0: kps_result = kps_result[keep, :] score_result = score_result[keep] area_save = area_save[keep] elif cfg.dataset == 'PoseTrack': keep = oks_nms(kps_result, np.mean(score_result, axis=1), area_save, cfg.oks_nms_thr) if len(keep) > 0: kps_result = kps_result[keep, :] score_result = score_result[keep, :] area_save = area_save[keep] # save result for i in range(len(kps_result)): if cfg.dataset == 'COCO': result = dict(image_id=im_info['image_id'], category_id=1, score=float(round(score_result[i], 4)), keypoints=kps_result[i].round(3).tolist()) elif cfg.dataset == 'PoseTrack': result = dict(image_id=im_info['image_id'], category_id=1, track_id=0, scores=score_result[i].round(4).tolist(), keypoints=kps_result[i].round(3).tolist()) elif cfg.dataset == 'MPII': result = dict(image_id=im_info['image_id'], scores=score_result[i].round(4).tolist(), keypoints=kps_result[i].round(3).tolist()) dump_results.append(result) return dump_results
def test_net(tester, input_pose, det_range, gpu_id): dump_results = [] start_time = time.time() img_start = det_range[0] img_id = 0 img_id2 = 0 pbar = tqdm(total=det_range[1] - img_start - 1, position=gpu_id) pbar.set_description("GPU %s" % str(gpu_id)) test_count = 0 while img_start < det_range[1]: record_id = img_start img_end = img_start + 1 im_info = input_pose[img_start] while img_end < det_range[1] and input_pose[img_end][ 'image_id'] == im_info['image_id']: img_end += 1 # all human detection results of a certain image cropped_data = input_pose[img_start:img_end] #pbar.set_description("GPU %s" % str(gpu_id)) pbar.update(img_end - img_start) img_start = img_end kps_result = np.zeros((len(cropped_data), cfg.num_kps, 3)) area_save = np.zeros(len(cropped_data)) # cluster human detection results with test_batch_size for batch_id in range(0, len(cropped_data), cfg.test_batch_size): start_id = batch_id end_id = min(len(cropped_data), batch_id + cfg.test_batch_size) imgs = [] input_pose_coords = [] input_pose_valids = [] input_pose_scores = [] crop_infos = [] for i in range(start_id, end_id): img, input_pose_coord, input_pose_valid, input_pose_score, crop_info = generate_batch( cropped_data[i], stage='test') imgs.append(img) input_pose_coords.append(input_pose_coord) input_pose_valids.append(input_pose_valid) input_pose_scores.append(input_pose_score) crop_infos.append(crop_info) imgs = np.array(imgs) input_pose_coords = np.array(input_pose_coords) input_pose_valids = np.array(input_pose_valids) input_pose_scores = np.array(input_pose_scores) crop_infos = np.array(crop_infos) #if test_count < 2100: # test_count += imgs.shape[0] # continue #test_count += imgs.shape[0] # forward coord, heatmaps = tester.predict_one( [imgs, input_pose_coords, input_pose_valids]) # np.savez('temp/imgs_{:d}_{:d}.npz'.format(record_id, batch_id), imgs=imgs, heatmaps=heatmaps, coord=coord) if cfg.flip_test: flip_imgs = imgs[:, :, ::-1, :] flip_input_pose_coords = input_pose_coords.copy() flip_input_pose_coords[:, :, 0] = cfg.input_shape[ 1] - 1 - flip_input_pose_coords[:, :, 0] flip_input_pose_valids = input_pose_valids.copy() for (q, w) in cfg.kps_symmetry: flip_input_pose_coords_w, flip_input_pose_coords_q = flip_input_pose_coords[:, w, :].copy( ), flip_input_pose_coords[:, q, :].copy() flip_input_pose_coords[:, q, :], flip_input_pose_coords[:, w, :] = flip_input_pose_coords_w, flip_input_pose_coords_q flip_input_pose_valids_w, flip_input_pose_valids_q = flip_input_pose_valids[:, w].copy( ), flip_input_pose_valids[:, q].copy() flip_input_pose_valids[:, q], flip_input_pose_valids[:, w] = flip_input_pose_valids_w, flip_input_pose_valids_q flip_coord = tester.predict_one([ flip_imgs, flip_input_pose_coords, flip_input_pose_valids ])[0] flip_coord[:, :, 0] = cfg.input_shape[1] - 1 - flip_coord[:, :, 0] for (q, w) in cfg.kps_symmetry: flip_coord_w, flip_coord_q = flip_coord[:, w, :].copy( ), flip_coord[:, q, :].copy() flip_coord[:, q, :], flip_coord[:, w, :] = flip_coord_w, flip_coord_q coord += flip_coord coord /= 2 # for each human detection from clustered batch for image_id in range(start_id, end_id): kps_result[image_id, :, :2] = coord[image_id - start_id] kps_result[image_id, :, 2] = input_pose_scores[image_id - start_id] vis = True crop_info = crop_infos[image_id - start_id, :] area = (crop_info[2] - crop_info[0]) * (crop_info[3] - crop_info[1]) if vis and np.any(kps_result[image_id, :, 2]) > 0.9 and area > 96**2: tmpimg = imgs[image_id - start_id].copy() tmpimg = cfg.denormalize_input(tmpimg) tmpimg = tmpimg.astype('uint8') tmpkps = np.zeros((3, cfg.num_kps)) tmpkps[:2, :] = kps_result[image_id, :, :2].transpose(1, 0) tmpkps[2, :] = kps_result[image_id, :, 2] _tmpimg = tmpimg.copy() _tmpimg = cfg.vis_keypoints(_tmpimg, tmpkps, kp_thresh=0.05) cv2.imwrite( osp.join(cfg.vis_dir, str(img_id) + '_output.jpg'), _tmpimg) temp_heatmaps = heatmaps[image_id - start_id] save_mergedHeatmaps(heatmaps[image_id - start_id], osp.join(cfg.vis_dir, str(img_id) + '_hm.jpg'), softmax=True) img_id += 1 # map back to original images for j in range(cfg.num_kps): kps_result[image_id, j, 0] = kps_result[image_id, j, 0] / cfg.input_shape[1] * (\ crop_infos[image_id - start_id][2] - crop_infos[image_id - start_id][0]) + crop_infos[image_id - start_id][0] kps_result[image_id, j, 1] = kps_result[image_id, j, 1] / cfg.input_shape[0] * (\ crop_infos[image_id - start_id][3] - crop_infos[image_id - start_id][1]) + crop_infos[image_id - start_id][1] area_save[image_id] = (crop_infos[image_id - start_id][2] - crop_infos[image_id - start_id][0]) * ( crop_infos[image_id - start_id][3] - crop_infos[image_id - start_id][1]) #vis vis = False if vis and np.any(kps_result[:, :, 2] > 0.9): tmpimg = cv2.imread( os.path.join(cfg.img_path, cropped_data[0]['imgpath'])) tmpimg = tmpimg.astype('uint8') for i in range(len(kps_result)): tmpkps = np.zeros((3, cfg.num_kps)) tmpkps[:2, :] = kps_result[i, :, :2].transpose(1, 0) tmpkps[2, :] = kps_result[i, :, 2] tmpimg = cfg.vis_keypoints(tmpimg, tmpkps) cv2.imwrite(osp.join(cfg.vis_dir, str(img_id2) + '.jpg'), tmpimg) img_id2 += 1 # oks nms if cfg.dataset in ['COCO', 'PoseTrack']: nms_kps = np.delete(kps_result, cfg.ignore_kps, 1) nms_score = np.mean(nms_kps[:, :, 2], axis=1) nms_kps[:, :, 2] = 1 nms_kps = nms_kps.reshape(len(kps_result), -1) nms_sigmas = np.delete(cfg.kps_sigmas, cfg.ignore_kps) keep = oks_nms(nms_kps, nms_score, area_save, cfg.oks_nms_thr, nms_sigmas) if len(keep) > 0: kps_result = kps_result[keep, :, :] area_save = area_save[keep] score_result = np.copy(kps_result[:, :, 2]) kps_result[:, :, 2] = 1 kps_result = kps_result.reshape(-1, cfg.num_kps * 3) # save result for i in range(len(kps_result)): if cfg.dataset == 'COCO': result = dict(image_id=im_info['image_id'], category_id=1, score=float(round(np.mean(score_result[i]), 4)), keypoints=kps_result[i].round(3).tolist()) elif cfg.dataset == 'PoseTrack': result = dict(image_id=im_info['image_id'], category_id=1, track_id=0, scores=score_result[i].round(4).tolist(), keypoints=kps_result[i].round(3).tolist()) elif cfg.dataset == 'MPII': result = dict(image_id=im_info['image_id'], scores=score_result[i].round(4).tolist(), keypoints=kps_result[i].round(3).tolist()) dump_results.append(result) return dump_results