Пример #1
0
            # Render depth image of the object model in the ground-truth pose.
            depth_gt_large = ren.render_object(gt['obj_id'], gt['cam_R_m2c'],
                                               gt['cam_t_m2c'], fx, fy,
                                               cx + ren_cx_offset,
                                               cy + ren_cy_offset)['depth']
            depth_gt = depth_gt_large[ren_cy_offset:(ren_cy_offset +
                                                     im_height),
                                      ren_cx_offset:(ren_cx_offset + im_width)]

            # Convert depth images to distance images.
            dist_gt = misc.depth_im_to_dist_im(depth_gt, K)
            dist_im = misc.depth_im_to_dist_im(depth, K)

            # Estimation of the visibility mask.
            visib_gt = visibility.estimate_visib_mask_gt(dist_im,
                                                         dist_gt,
                                                         p['delta'],
                                                         visib_mode='bop19')

            # Mask of the object in the GT pose.
            obj_mask_gt_large = depth_gt_large > 0
            obj_mask_gt = dist_gt > 0

            # Number of pixels in the whole object silhouette
            # (even in the truncated part).
            px_count_all = np.sum(obj_mask_gt_large)

            # Number of pixels in the object silhouette with a valid depth measurement
            # (i.e. with a non-zero value in the depth image).
            px_count_valid = np.sum(dist_im[obj_mask_gt] > 0)

            # Number of pixels in the visible part of the object silhouette.
Пример #2
0
def vsd(R_est, t_est, R_gt, t_gt, depth_test, K, delta, taus,
        normalized_by_diameter, diameter, renderer, obj_id, cost_type='step'):
  """Visible Surface Discrepancy -- by Hodan, Michel et al. (ECCV 2018).

  :param R_est: 3x3 ndarray with the estimated rotation matrix.
  :param t_est: 3x1 ndarray with the estimated translation vector.
  :param R_gt: 3x3 ndarray with the ground-truth rotation matrix.
  :param t_gt: 3x1 ndarray with the ground-truth translation vector.
  :param depth_test: hxw ndarray with the test depth image.
  :param K: 3x3 ndarray with an intrinsic camera matrix.
  :param delta: Tolerance used for estimation of the visibility masks.
  :param taus: A list of misalignment tolerance values.
  :param normalized_by_diameter: Whether to normalize the pixel-wise distances
      by the object diameter.
  :param diameter: Object diameter.
  :param renderer: Instance of the Renderer class (see renderer.py).
  :param obj_id: Object identifier.
  :param cost_type: Type of the pixel-wise matching cost:
      'tlinear' - Used in the original definition of VSD in:
          Hodan et al., On Evaluation of 6D Object Pose Estimation, ECCVW'16
      'step' - Used for SIXD Challenge 2017 onwards.
  :return: List of calculated errors (one for each misalignment tolerance).
  """
  # Render depth images of the model in the estimated and the ground-truth pose.
  fx, fy, cx, cy = K[0, 0], K[1, 1], K[0, 2], K[1, 2]
  depth_est = renderer.render_object(
    obj_id, R_est, t_est, fx, fy, cx, cy)['depth']
  depth_gt = renderer.render_object(
    obj_id, R_gt, t_gt, fx, fy, cx, cy)['depth']

  # Convert depth images to distance images.
  dist_test = misc.depth_im_to_dist_im_fast(depth_test, K)
  dist_gt = misc.depth_im_to_dist_im_fast(depth_gt, K)
  dist_est = misc.depth_im_to_dist_im_fast(depth_est, K)

  # Visibility mask of the model in the ground-truth pose.
  visib_gt = visibility.estimate_visib_mask_gt(
    dist_test, dist_gt, delta, visib_mode='bop19')

  # Visibility mask of the model in the estimated pose.
  visib_est = visibility.estimate_visib_mask_est(
    dist_test, dist_est, visib_gt, delta, visib_mode='bop19')

  # Intersection and union of the visibility masks.
  visib_inter = np.logical_and(visib_gt, visib_est)
  visib_union = np.logical_or(visib_gt, visib_est)

  visib_union_count = visib_union.sum()
  visib_comp_count = visib_union_count - visib_inter.sum()

  # Pixel-wise distances.
  dists = np.abs(dist_gt[visib_inter] - dist_est[visib_inter])

  # Normalization of pixel-wise distances by object diameter.
  if normalized_by_diameter:
    dists /= diameter

  # Calculate VSD for each provided value of the misalignment tolerance.
  if visib_union_count == 0:
    errors = [1.0] * len(taus)
  else:
    errors = []
    for tau in taus:

      # Pixel-wise matching cost.
      if cost_type == 'step':
        costs = dists >= tau
      elif cost_type == 'tlinear':  # Truncated linear function.
        costs = dists / tau
        costs[costs > 1.0] = 1.0
      else:
        raise ValueError('Unknown pixel matching cost.')

      e = (np.sum(costs) + visib_comp_count) / float(visib_union_count)
      errors.append(e)

  return errors