def evaluate(self, outs, cur_sample_idx): annots = self.datalist sample_num = len(outs) eval_result = {'joint_out': [], 'mesh_out': []} for n in range(sample_num): annot = annots[cur_sample_idx + n] out = outs[n] # x,y: resize to input image space and perform bbox to image affine transform mesh_out_img = out['mesh_coord_img'] mesh_out_img[:, 0] = mesh_out_img[:, 0] / cfg.output_hm_shape[ 2] * cfg.input_img_shape[1] mesh_out_img[:, 1] = mesh_out_img[:, 1] / cfg.output_hm_shape[ 1] * cfg.input_img_shape[0] mesh_out_img_xy1 = np.concatenate( (mesh_out_img[:, :2], np.ones_like(mesh_out_img[:, :1])), 1) mesh_out_img[:, :2] = np.dot(out['bb2img_trans'], mesh_out_img_xy1.transpose( 1, 0)).transpose(1, 0)[:, :2] # z: devoxelize and translate to absolute depth root_joint_depth = annot['root_joint_depth'] mesh_out_img[:, 2] = (mesh_out_img[:, 2] / cfg.output_hm_shape[0] * 2. - 1) * (cfg.bbox_3d_size / 2) mesh_out_img[:, 2] = mesh_out_img[:, 2] + root_joint_depth # camera back-projection cam_param = annot['cam_param'] focal, princpt = cam_param['focal'], cam_param['princpt'] mesh_out_cam = pixel2cam(mesh_out_img, focal, princpt) if cfg.stage == 'param': mesh_out_cam = out['mesh_coord_cam'] joint_out_cam = np.dot(self.joint_regressor, mesh_out_cam) eval_result['mesh_out'].append(mesh_out_cam.tolist()) eval_result['joint_out'].append(joint_out_cam.tolist()) vis = False if vis: filename = annot['img_path'].split('/')[-1][:-4] img = load_img(annot['img_path'])[:, :, ::-1] img = vis_mesh(img, mesh_out_img, 0.5) cv2.imwrite(filename + '.jpg', img) save_obj(mesh_out_cam, self.mano.face, filename + '.obj') return eval_result
def evaluate(self, outs, cur_sample_idx): annots = self.datalist sample_num = len(outs) eval_result = {'mpjpe_lixel': [], 'pa_mpjpe_lixel': [], 'mpjpe_param': [], 'pa_mpjpe_param': []} for n in range(sample_num): annot = annots[cur_sample_idx + n] out = outs[n] # h36m joint from gt mesh mesh_gt_cam = out['mesh_coord_cam_target'] pose_coord_gt_h36m = np.dot(self.h36m_joint_regressor, mesh_gt_cam) depth_gt_h36m = pose_coord_gt_h36m[self.h36m_root_joint_idx,2] pose_coord_gt_h36m = pose_coord_gt_h36m - pose_coord_gt_h36m[self.h36m_root_joint_idx,None] # root-relative pose_coord_gt_h36m = pose_coord_gt_h36m[self.h36m_eval_joint,:] # mesh from lixel # x,y: resize to input image space and perform bbox to image affine transform mesh_out_img = out['mesh_coord_img'] mesh_out_img[:,0] = mesh_out_img[:,0] / cfg.output_hm_shape[2] * cfg.input_img_shape[1] mesh_out_img[:,1] = mesh_out_img[:,1] / cfg.output_hm_shape[1] * cfg.input_img_shape[0] mesh_out_img_xy1 = np.concatenate((mesh_out_img[:,:2], np.ones_like(mesh_out_img[:,:1])),1) mesh_out_img[:,:2] = np.dot(out['bb2img_trans'], mesh_out_img_xy1.transpose(1,0)).transpose(1,0)[:,:2] # z: devoxelize and translate to absolute depth if cfg.use_gt_info: root_joint_depth = depth_gt_h36m else: root_joint_depth = annot['root_joint_depth'] mesh_out_img[:,2] = (mesh_out_img[:,2] / cfg.output_hm_shape[0] * 2. - 1) * (cfg.bbox_3d_size / 2) mesh_out_img[:,2] = mesh_out_img[:,2] + root_joint_depth # camera back-projection cam_param = annot['cam_param'] focal, princpt = cam_param['focal'], cam_param['princpt'] mesh_out_cam = pixel2cam(mesh_out_img, focal, princpt) # h36m joint from lixel mesh pose_coord_out_h36m = np.dot(self.h36m_joint_regressor, mesh_out_cam) pose_coord_out_h36m = pose_coord_out_h36m - pose_coord_out_h36m[self.h36m_root_joint_idx,None] # root-relative pose_coord_out_h36m = pose_coord_out_h36m[self.h36m_eval_joint,:] pose_coord_out_h36m_aligned = rigid_align(pose_coord_out_h36m, pose_coord_gt_h36m) eval_result['mpjpe_lixel'].append(np.sqrt(np.sum((pose_coord_out_h36m - pose_coord_gt_h36m)**2,1)).mean() * 1000) # meter -> milimeter eval_result['pa_mpjpe_lixel'].append(np.sqrt(np.sum((pose_coord_out_h36m_aligned - pose_coord_gt_h36m)**2,1)).mean() * 1000) # meter -> milimeter # h36m joint from parameter mesh if cfg.stage == 'param': mesh_out_cam = out['mesh_coord_cam'] pose_coord_out_h36m = np.dot(self.h36m_joint_regressor, mesh_out_cam) pose_coord_out_h36m = pose_coord_out_h36m - pose_coord_out_h36m[self.h36m_root_joint_idx,None] # root-relative pose_coord_out_h36m = pose_coord_out_h36m[self.h36m_eval_joint,:] pose_coord_out_h36m_aligned = rigid_align(pose_coord_out_h36m, pose_coord_gt_h36m) eval_result['mpjpe_param'].append(np.sqrt(np.sum((pose_coord_out_h36m - pose_coord_gt_h36m)**2,1)).mean() * 1000) # meter -> milimeter eval_result['pa_mpjpe_param'].append(np.sqrt(np.sum((pose_coord_out_h36m_aligned - pose_coord_gt_h36m)**2,1)).mean() * 1000) # meter -> milimeter vis = False if vis: seq_name = annot['img_path'].split('/')[-2] img_name = annot['img_path'].split('/')[-1][:-4] filename = seq_name + '_' + img_name + '_' + str(n) img = load_img(annot['img_path'])[:,:,::-1] img = vis_mesh(img, mesh_out_img, 0.5) cv2.imwrite(filename + '.jpg', img) save_obj(mesh_out_cam, self.smpl.face, filename + '.obj') return eval_result
root_cam = pixel2cam(root_img[None,:], focal, princpt) mesh_lixel_img[:,2] += root_depth mesh_lixel_cam = pixel2cam(mesh_lixel_img, focal, princpt) mesh_param_cam += root_cam.reshape(1,3) # visualize lixel mesh in 2D space vis_img = original_img.copy() vis_img = vis_mesh(vis_img, mesh_lixel_img) cv2.imwrite('output_mesh_lixel.jpg', vis_img) # visualize lixel mesh in 2D space vis_img = original_img.copy() mesh_param_img = cam2pixel(mesh_param_cam, focal, princpt) vis_img = vis_mesh(vis_img, mesh_param_img) cv2.imwrite('output_mesh_param.jpg', vis_img) # save mesh (obj) save_obj(mesh_lixel_cam, face, 'output_mesh_lixel.obj') save_obj(mesh_param_cam, face, 'output_mesh_param.obj') # render mesh from lixel vis_img = original_img.copy() rendered_img = render_mesh(vis_img, mesh_lixel_cam, face, {'focal': focal, 'princpt': princpt}) cv2.imwrite('rendered_mesh_lixel.jpg', rendered_img) # render mesh from param vis_img = original_img.copy() rendered_img = render_mesh(vis_img, mesh_param_cam, face, {'focal': focal, 'princpt': princpt}) cv2.imwrite('rendered_mesh_param.jpg', rendered_img)
img, img2bb_trans, bb2img_trans = generate_patch_image(img, bbox, 1.0, 0.0, False, cfg.input_img_shape) img = transform(img.astype(np.float32)) / 255 img = img.cuda()[None, :, :, :] # forward inputs = {'img': img} targets = {} meta_info = {'bb2img_trans': bb2img_trans} with torch.no_grad(): out = model(inputs, targets, meta_info, 'test') img = img[0].cpu().numpy() mesh_img = out['mesh_coord_img'][0].cpu().numpy() mesh_cam = out['mesh_coord_cam'][0].cpu().numpy() # visualize mesh in 2D space vis_img = img.copy() * 255 vis_img = vis_img.astype(np.uint8) vis_img = np.transpose(vis_img, (1, 2, 0)).copy() mesh_img[:, 0] = mesh_img[:, 0] / cfg.output_hm_shape[2] * cfg.input_img_shape[1] mesh_img[:, 1] = mesh_img[:, 1] / cfg.output_hm_shape[1] * cfg.input_img_shape[0] vis_img = vis_mesh(vis_img, mesh_img) cv2.imwrite('output_mesh.jpg', vis_img) # save mesh (obj) save_obj(mesh_cam, face, 'output_mesh.obj')