def __init__(self, backbone, mesh_head, smpl, disc=None, loss_gan=None, loss_mesh=None, train_cfg=None, test_cfg=None, pretrained=None): super().__init__() self.backbone = builder.build_backbone(backbone) self.mesh_head = builder.build_head(mesh_head) self.generator = torch.nn.Sequential(self.backbone, self.mesh_head) self.smpl = builder.build_mesh_model(smpl) self.with_gan = disc is not None and loss_gan is not None if self.with_gan: self.discriminator = SMPLDiscriminator(**disc) self.loss_gan = builder.build_loss(loss_gan) self.disc_step_count = 0 self.train_cfg = train_cfg self.test_cfg = test_cfg self.loss_mesh = builder.build_loss(loss_mesh) self.init_weights(pretrained=pretrained)
def test_mesh_hmr_head(): """Test hmr mesh head.""" head = HMRMeshHead(in_channels=512) head.init_weights() input_shape = (1, 512, 8, 8) inputs = _demo_inputs(input_shape) out = head(inputs) smpl_rotmat, smpl_shape, camera = out assert smpl_rotmat.shape == torch.Size([1, 24, 3, 3]) assert smpl_shape.shape == torch.Size([1, 10]) assert camera.shape == torch.Size([1, 3]) """Test hmr mesh head with assigned mean parameters and n_iter """ head = HMRMeshHead(in_channels=512, smpl_mean_params='tests/data/smpl/smpl_mean_params.npz', n_iter=3) head.init_weights() input_shape = (1, 512, 8, 8) inputs = _demo_inputs(input_shape) out = head(inputs) smpl_rotmat, smpl_shape, camera = out assert smpl_rotmat.shape == torch.Size([1, 24, 3, 3]) assert smpl_shape.shape == torch.Size([1, 10]) assert camera.shape == torch.Size([1, 3]) # test discriminator with SMPL pose parameters # in rotation matrix representation disc = SMPLDiscriminator(beta_channel=(10, 10, 5, 1), per_joint_channel=(9, 32, 32, 16, 1), full_pose_channel=(23 * 16, 256, 1)) pred_theta = (camera, smpl_rotmat, smpl_shape) pred_score = disc(pred_theta) assert pred_score.shape[1] == 25 # test discriminator with SMPL pose parameters # in axis-angle representation pred_theta = (camera, camera.new_zeros([1, 72]), smpl_shape) pred_score = disc(pred_theta) assert pred_score.shape[1] == 25 with pytest.raises(TypeError): _ = SMPLDiscriminator(beta_channel=[10, 10, 5, 1], per_joint_channel=(9, 32, 32, 16, 1), full_pose_channel=(23 * 16, 256, 1)) with pytest.raises(ValueError): _ = SMPLDiscriminator(beta_channel=(10, ), per_joint_channel=(9, 32, 32, 16, 1), full_pose_channel=(23 * 16, 256, 1))
def __init__(self, backbone, mesh_head, smpl, disc=None, loss_gan=None, loss_mesh=None, train_cfg=None, test_cfg=None, pretrained=None): super().__init__() assert has_smpl, 'Please install smplx to use SMPL.' self.backbone = builder.build_backbone(backbone) self.mesh_head = builder.build_head(mesh_head) self.generator = torch.nn.Sequential(self.backbone, self.mesh_head) self.smpl = SMPL( model_path=smpl['smpl_path'], create_betas=False, create_global_orient=False, create_body_pose=False, create_transl=False) joints_regressor = torch.tensor( np.load(smpl['joints_regressor']), dtype=torch.float).unsqueeze(0) self.register_buffer('joints_regressor', joints_regressor) self.with_gan = disc is not None and loss_gan is not None if self.with_gan: self.discriminator = SMPLDiscriminator(**disc) self.loss_gan = builder.build_loss(loss_gan) self.disc_step_count = 0 self.train_cfg = train_cfg self.test_cfg = test_cfg self.loss_mesh = builder.build_loss(loss_mesh) self.init_weights(pretrained=pretrained)
class ParametricMesh(BasePose): """Model-based 3D human mesh detector. Take a single color image as input and output 3D joints, SMPL parameters and camera parameters. Args: backbone (dict): Backbone modules to extract feature. mesh_head (dict): Mesh head to process feature. smpl (dict): Config for SMPL model. disc (dict): Discriminator for SMPL parameters. Default: None. loss_gan (dict): Config for adversarial loss. Default: None. loss_mesh (dict): Config for mesh loss. Default: None. train_cfg (dict): Config for training. Default: None. test_cfg (dict): Config for testing. Default: None. pretrained (str): Path to the pretrained models. """ def __init__(self, backbone, mesh_head, smpl, disc=None, loss_gan=None, loss_mesh=None, train_cfg=None, test_cfg=None, pretrained=None): super().__init__() assert has_smpl, 'Please install smplx to use SMPL.' self.backbone = builder.build_backbone(backbone) self.mesh_head = builder.build_head(mesh_head) self.generator = torch.nn.Sequential(self.backbone, self.mesh_head) self.smpl = SMPL( model_path=smpl['smpl_path'], create_betas=False, create_global_orient=False, create_body_pose=False, create_transl=False) joints_regressor = torch.tensor( np.load(smpl['joints_regressor']), dtype=torch.float).unsqueeze(0) self.register_buffer('joints_regressor', joints_regressor) self.with_gan = disc is not None and loss_gan is not None if self.with_gan: self.discriminator = SMPLDiscriminator(**disc) self.loss_gan = builder.build_loss(loss_gan) self.disc_step_count = 0 self.train_cfg = train_cfg self.test_cfg = test_cfg self.loss_mesh = builder.build_loss(loss_mesh) self.init_weights(pretrained=pretrained) def init_weights(self, pretrained=None): """Weight initialization for model.""" self.backbone.init_weights(pretrained) self.mesh_head.init_weights() if self.with_gan: self.discriminator.init_weights() def train_step(self, data_batch, optimizer, **kwargs): """Train step function. In this function, the detector will finish the train step following the pipeline: 1. get fake and real SMPL parameters 2. optimize discriminator (if have) 3. optimize generator If `self.train_cfg.disc_step > 1`, the train step will contain multiple iterations for optimizing discriminator with different input data and only one iteration for optimizing generator after `disc_step` iterations for discriminator. Args: data_batch (torch.Tensor): Batch of data as input. optimizer (dict[torch.optim.Optimizer]): Dict with optimizers for generator and discriminator (if have). Returns: outputs (dict): Dict with loss, information for logger, the number of samples. """ img = data_batch['img'] pred_smpl = self.generator(img) pred_pose, pred_beta, pred_camera = pred_smpl # optimize discriminator (if have) if self.train_cfg['disc_step'] > 0 and self.with_gan: set_requires_grad(self.discriminator, True) fake_data = (pred_camera.detach(), pred_pose.detach(), pred_beta.detach()) mosh_theta = data_batch['mosh_theta'] real_data = (mosh_theta[:, :3], mosh_theta[:, 3:75], mosh_theta[:, 75:]) fake_score = self.discriminator(fake_data) real_score = self.discriminator(real_data) disc_losses = {} disc_losses['real_loss'] = self.loss_gan( real_score, target_is_real=True, is_disc=True) disc_losses['fake_loss'] = self.loss_gan( fake_score, target_is_real=False, is_disc=True) loss_disc, log_vars_d = self._parse_losses(disc_losses) optimizer['discriminator'].zero_grad() loss_disc.backward() optimizer['discriminator'].step() self.disc_step_count = \ (self.disc_step_count + 1) % self.train_cfg['disc_step'] if self.disc_step_count != 0: outputs = dict( loss=loss_disc, log_vars=log_vars_d, num_samples=len(next(iter(data_batch.values())))) return outputs # optimize generator pred_out = self.smpl( betas=pred_beta, body_pose=pred_pose[:, 1:], global_orient=pred_pose[:, :1], pose2rot=False) pred_vertices = pred_out.vertices pred_joints_3d = self.get_3d_joints_from_mesh(pred_vertices) gt_beta = data_batch['beta'] gt_pose = data_batch['pose'] gt_vertices = self.smpl( betas=gt_beta, body_pose=gt_pose[:, 3:], global_orient=gt_pose[:, :3]).vertices pred = dict( pose=pred_pose, beta=pred_beta, camera=pred_camera, vertices=pred_vertices, joints_3d=pred_joints_3d) target = { key: data_batch[key] for key in [ 'pose', 'beta', 'has_smpl', 'joints_3d', 'joints_2d', 'joints_3d_visible', 'joints_2d_visible' ] } target['vertices'] = gt_vertices losses = self.loss_mesh(pred, target) if self.with_gan: set_requires_grad(self.discriminator, False) pred_theta = (pred_camera, pred_pose, pred_beta) pred_score = self.discriminator(pred_theta) loss_adv = self.loss_gan( pred_score, target_is_real=True, is_disc=False) losses['adv_loss'] = loss_adv loss, log_vars = self._parse_losses(losses) optimizer['generator'].zero_grad() loss.backward() optimizer['generator'].step() outputs = dict( loss=loss, log_vars=log_vars, num_samples=len(next(iter(data_batch.values())))) return outputs def forward_train(self, *args, **kwargs): """Forward function for training. For ParametricMesh, we do not use this interface. """ raise NotImplementedError('This interface should not be used in ' 'current training schedule. Please use ' '`train_step` for training.') def val_step(self, data_batch, **kwargs): """Forward function for evaluation. Args: data_batch (dict): Contain data for forward. Returns: dict: Contain the results from model. """ output = self.forward_test(**data_batch, **kwargs) return output def forward_dummy(self, img): """Used for computing network FLOPs. See ``tools/get_flops.py``. Args: img (torch.Tensor): Input image. Returns: Tensor: Outputs. """ output = self.generator(img) return output def forward_test(self, img, img_metas, **kwargs): """Defines the computation performed at every call when testing.""" assert img.size(0) == 1 assert len(img_metas) == 1 pred_smpl = self.generator(img) pred_pose, pred_beta, pred_camera = pred_smpl pred_out = self.smpl( betas=pred_beta, body_pose=pred_pose[:, 1:], global_orient=pred_pose[:, :1], pose2rot=False) pred_vertices = pred_out.vertices pred_joints_3d = self.get_3d_joints_from_mesh(pred_vertices) all_preds = (pred_joints_3d.detach().cpu().numpy(), (pred_pose.detach().cpu().numpy(), pred_beta.detach().cpu().numpy()), pred_camera.detach().cpu().numpy()) all_boxes = np.zeros((1, 6), dtype=np.float32) image_path = [] img_metas = img_metas[0] c = img_metas['center'].reshape(1, -1) s = img_metas['scale'].reshape(1, -1) score = 1.0 if 'bbox_score' in img_metas: score = np.array(img_metas['bbox_score']).reshape(-1) all_boxes[0, 0:2] = c[:, 0:2] all_boxes[0, 2:4] = s[:, 0:2] all_boxes[0, 4] = np.prod(s * 200.0, axis=1) all_boxes[0, 5] = score image_path.extend(img_metas['image_file']) return all_preds, all_boxes, image_path def get_3d_joints_from_mesh(self, vertices): """Get 3D joints from 3D mesh using predefined joints regressor.""" return torch.matmul( self.joints_regressor.to(vertices.device), vertices) def forward(self, img, img_metas=None, return_loss=False, **kwargs): """Forward function. Calls either forward_train or forward_test depending on whether return_loss=True. Note: batch_size: N num_img_channel: C (Default: 3) img height: imgH img width: imgW Args: img (torch.Tensor[N x C x imgH x imgW]): Input images. img_metas (list(dict)): Information about data augmentation By default this includes: - "image_file: path to the image file - "center": center of the bbox - "scale": scale of the bbox - "rotation": rotation of the bbox - "bbox_score": score of bbox return_loss (bool): Option to `return loss`. `return loss=True` for training, `return loss=False` for validation & test. Returns: Return predicted 3D joints, SMPL parameters, boxes and image paths. """ if return_loss: return self.forward_train(img, img_metas, **kwargs) return self.forward_test(img, img_metas, **kwargs) def show_result(self, **kwargs): pass
class ParametricMesh(BasePose): """Model-based 3D human mesh detector. Take a single color image as input and output 3D joints, SMPL parameters and camera parameters. Args: backbone (dict): Backbone modules to extract feature. mesh_head (dict): Mesh head to process feature. smpl (dict): Config for SMPL model. disc (dict): Discriminator for SMPL parameters. Default: None. loss_gan (dict): Config for adversarial loss. Default: None. loss_mesh (dict): Config for mesh loss. Default: None. train_cfg (dict): Config for training. Default: None. test_cfg (dict): Config for testing. Default: None. pretrained (str): Path to the pretrained models. """ def __init__(self, backbone, mesh_head, smpl, disc=None, loss_gan=None, loss_mesh=None, train_cfg=None, test_cfg=None, pretrained=None): super().__init__() self.backbone = builder.build_backbone(backbone) self.mesh_head = builder.build_head(mesh_head) self.generator = torch.nn.Sequential(self.backbone, self.mesh_head) self.smpl = builder.build_mesh_model(smpl) self.with_gan = disc is not None and loss_gan is not None if self.with_gan: self.discriminator = SMPLDiscriminator(**disc) self.loss_gan = builder.build_loss(loss_gan) self.disc_step_count = 0 self.train_cfg = train_cfg self.test_cfg = test_cfg self.loss_mesh = builder.build_loss(loss_mesh) self.init_weights(pretrained=pretrained) def init_weights(self, pretrained=None): """Weight initialization for model.""" self.backbone.init_weights(pretrained) self.mesh_head.init_weights() if self.with_gan: self.discriminator.init_weights() def train_step(self, data_batch, optimizer, **kwargs): """Train step function. In this function, the detector will finish the train step following the pipeline: 1. get fake and real SMPL parameters 2. optimize discriminator (if have) 3. optimize generator If `self.train_cfg.disc_step > 1`, the train step will contain multiple iterations for optimizing discriminator with different input data and only one iteration for optimizing generator after `disc_step` iterations for discriminator. Args: data_batch (torch.Tensor): Batch of data as input. optimizer (dict[torch.optim.Optimizer]): Dict with optimizers for generator and discriminator (if have). Returns: outputs (dict): Dict with loss, information for logger, the number of samples. """ img = data_batch['img'] pred_smpl = self.generator(img) pred_pose, pred_beta, pred_camera = pred_smpl # optimize discriminator (if have) if self.train_cfg['disc_step'] > 0 and self.with_gan: set_requires_grad(self.discriminator, True) fake_data = (pred_camera.detach(), pred_pose.detach(), pred_beta.detach()) mosh_theta = data_batch['mosh_theta'] real_data = (mosh_theta[:, :3], mosh_theta[:, 3:75], mosh_theta[:, 75:]) fake_score = self.discriminator(fake_data) real_score = self.discriminator(real_data) disc_losses = {} disc_losses['real_loss'] = self.loss_gan(real_score, target_is_real=True, is_disc=True) disc_losses['fake_loss'] = self.loss_gan(fake_score, target_is_real=False, is_disc=True) loss_disc, log_vars_d = self._parse_losses(disc_losses) optimizer['discriminator'].zero_grad() loss_disc.backward() optimizer['discriminator'].step() self.disc_step_count = \ (self.disc_step_count + 1) % self.train_cfg['disc_step'] if self.disc_step_count != 0: outputs = dict(loss=loss_disc, log_vars=log_vars_d, num_samples=len(next(iter( data_batch.values())))) return outputs # optimize generator pred_out = self.smpl(betas=pred_beta, body_pose=pred_pose[:, 1:], global_orient=pred_pose[:, :1]) pred_vertices, pred_joints_3d = pred_out['vertices'], pred_out[ 'joints'] gt_beta = data_batch['beta'] gt_pose = data_batch['pose'] gt_vertices = self.smpl(betas=gt_beta, body_pose=gt_pose[:, 3:], global_orient=gt_pose[:, :3])['vertices'] pred = dict(pose=pred_pose, beta=pred_beta, camera=pred_camera, vertices=pred_vertices, joints_3d=pred_joints_3d) target = { key: data_batch[key] for key in [ 'pose', 'beta', 'has_smpl', 'joints_3d', 'joints_2d', 'joints_3d_visible', 'joints_2d_visible' ] } target['vertices'] = gt_vertices losses = self.loss_mesh(pred, target) if self.with_gan: set_requires_grad(self.discriminator, False) pred_theta = (pred_camera, pred_pose, pred_beta) pred_score = self.discriminator(pred_theta) loss_adv = self.loss_gan(pred_score, target_is_real=True, is_disc=False) losses['adv_loss'] = loss_adv loss, log_vars = self._parse_losses(losses) optimizer['generator'].zero_grad() loss.backward() optimizer['generator'].step() outputs = dict(loss=loss, log_vars=log_vars, num_samples=len(next(iter(data_batch.values())))) return outputs def forward_train(self, *args, **kwargs): """Forward function for training. For ParametricMesh, we do not use this interface. """ raise NotImplementedError('This interface should not be used in ' 'current training schedule. Please use ' '`train_step` for training.') def val_step(self, data_batch, **kwargs): """Forward function for evaluation. Args: data_batch (dict): Contain data for forward. Returns: dict: Contain the results from model. """ output = self.forward_test(**data_batch, **kwargs) return output def forward_dummy(self, img): """Used for computing network FLOPs. See ``tools/get_flops.py``. Args: img (torch.Tensor): Input image. Returns: Tensor: Outputs. """ output = self.generator(img) return output def forward_test(self, img, img_metas, return_vertices=False, return_faces=False, **kwargs): """Defines the computation performed at every call when testing.""" pred_smpl = self.generator(img) pred_pose, pred_beta, pred_camera = pred_smpl pred_out = self.smpl(betas=pred_beta, body_pose=pred_pose[:, 1:], global_orient=pred_pose[:, :1]) pred_vertices, pred_joints_3d = pred_out['vertices'], pred_out[ 'joints'] all_preds = {} all_preds['keypoints_3d'] = pred_joints_3d.detach().cpu().numpy() all_preds['smpl_pose'] = pred_pose.detach().cpu().numpy() all_preds['smpl_beta'] = pred_beta.detach().cpu().numpy() all_preds['camera'] = pred_camera.detach().cpu().numpy() if return_vertices: all_preds['vertices'] = pred_vertices.detach().cpu().numpy() if return_faces: all_preds['faces'] = self.smpl.get_faces() all_boxes = [] image_path = [] for img_meta in img_metas: box = np.zeros(6, dtype=np.float32) c = img_meta['center'] s = img_meta['scale'] if 'bbox_score' in img_metas: score = np.array(img_metas['bbox_score']).reshape(-1) else: score = 1.0 box[0:2] = c box[2:4] = s box[4] = np.prod(s * 200.0, axis=0) box[5] = score all_boxes.append(box) image_path.append(img_meta['image_file']) all_preds['bboxes'] = np.stack(all_boxes, axis=0) all_preds['image_path'] = image_path return all_preds def get_3d_joints_from_mesh(self, vertices): """Get 3D joints from 3D mesh using predefined joints regressor.""" return torch.matmul(self.joints_regressor.to(vertices.device), vertices) def forward(self, img, img_metas=None, return_loss=False, **kwargs): """Forward function. Calls either forward_train or forward_test depending on whether return_loss=True. Note: - batch_size: N - num_img_channel: C (Default: 3) - img height: imgH - img width: imgW Args: img (torch.Tensor[N x C x imgH x imgW]): Input images. img_metas (list(dict)): Information about data augmentation By default this includes: - "image_file: path to the image file - "center": center of the bbox - "scale": scale of the bbox - "rotation": rotation of the bbox - "bbox_score": score of bbox return_loss (bool): Option to `return loss`. `return loss=True` for training, `return loss=False` for validation & test. Returns: Return predicted 3D joints, SMPL parameters, boxes and image paths. """ if return_loss: return self.forward_train(img, img_metas, **kwargs) return self.forward_test(img, img_metas, **kwargs) def show_result(self, result, img, show=False, out_file=None, win_name='', wait_time=0, bbox_color='green', mesh_color=(76, 76, 204), **kwargs): """Visualize 3D mesh estimation results. Args: result (list[dict]): The mesh estimation results containing: - "bbox" (ndarray[4]): instance bounding bbox - "center" (ndarray[2]): bbox center - "scale" (ndarray[2]): bbox scale - "keypoints_3d" (ndarray[K,3]): predicted 3D keypoints - "camera" (ndarray[3]): camera parameters - "vertices" (ndarray[V, 3]): predicted 3D vertices - "faces" (ndarray[F, 3]): mesh faces img (str or Tensor): Optional. The image to visualize 2D inputs on. win_name (str): The window name. show (bool): Whether to show the image. Default: False. wait_time (int): Value of waitKey param. Default: 0. out_file (str or None): The filename to write the image. Default: None. bbox_color (str or tuple or :obj:`Color`): Color of bbox lines. mesh_color (str or tuple or :obj:`Color`): Color of mesh surface. Returns: ndarray: Visualized img, only if not `show` or `out_file`. """ if img is not None: img = mmcv.imread(img) focal_length = self.loss_mesh.focal_length H, W, C = img.shape img_center = np.array([[0.5 * W], [0.5 * H]]) # show bounding boxes bboxes = [res['bbox'] for res in result] bboxes = np.vstack(bboxes) mmcv.imshow_bboxes(img, bboxes, colors=bbox_color, top_k=-1, thickness=2, show=False) vertex_list = [] face_list = [] for res in result: vertices = res['vertices'] faces = res['faces'] camera = res['camera'] camera_center = res['center'] scale = res['scale'] # predicted vertices are in root-relative space, # we need to translate them to camera space. translation = np.array([ camera[1], camera[2], 2 * focal_length / (scale[0] * 200.0 * camera[0] + 1e-9) ]) mean_depth = vertices[:, -1].mean() + translation[-1] translation[:2] += (camera_center - img_center[:, 0]) / focal_length * mean_depth vertices += translation[None, :] vertex_list.append(vertices) face_list.append(faces) # render from front view img_vis = imshow_mesh_3d(img, vertex_list, face_list, img_center, [focal_length, focal_length], colors=mesh_color) # render from side view # rotate mesh vertices R = cv2.Rodrigues(np.array([0, np.radians(90.), 0]))[0] rot_vertex_list = [np.dot(vert, R) for vert in vertex_list] # get the 3D bbox containing all meshes rot_vertices = np.concatenate(rot_vertex_list, axis=0) min_corner = rot_vertices.min(0) max_corner = rot_vertices.max(0) center_3d = 0.5 * (min_corner + max_corner) ratio = 0.8 bbox3d_size = max_corner - min_corner # set appropriate translation to make all meshes appear in the image z_x = bbox3d_size[0] * focal_length / (ratio * W) - min_corner[2] z_y = bbox3d_size[1] * focal_length / (ratio * H) - min_corner[2] z = max(z_x, z_y) translation = -center_3d translation[2] = z translation = translation[None, :] rot_vertex_list = [ rot_vert + translation for rot_vert in rot_vertex_list ] # render from side view img_side = imshow_mesh_3d( np.ones_like(img) * 255, rot_vertex_list, face_list, img_center, [focal_length, focal_length]) # merger images from front view and side view img_vis = np.concatenate([img_vis, img_side], axis=1) if show: mmcv.visualization.imshow(img_vis, win_name, wait_time) if out_file is not None: mmcv.imwrite(img_vis, out_file) return img_vis