Пример #1
0
    depth_im *= scene_camera[im_id]['depth_scale']  # to [mm]
    dist_im = misc.depth_im_to_dist_im(depth_im, K)
    '''
        for gt_id, gt in enumerate(scene_gt[im_id]):

            # Render the depth image.
            depth_gt = ren.render_object(gt['obj_id'], gt['cam_R_m2c'],
                                         gt['cam_t_m2c'], fx, fy, cx,
                                         cy)['depth']

            # Convert depth image to distance image.
            dist_gt = misc.depth_im_to_dist_im(depth_gt, K)

            # Mask of the full object silhouette.
            mask = dist_gt > 0
            '''
      # Mask of the visible part of the object silhouette.
      mask_visib = visibility.estimate_visib_mask_gt(
        dist_im, dist_gt, p['delta'], visib_mode='bop19')
      '''
            # Save the calculated masks.
            mask_path = dp_split['mask_tpath'].format(scene_id=scene_id,
                                                      im_id=im_id,
                                                      gt_id=gt_id)
            inout.save_im(mask_path, 255 * mask.astype(np.uint8))
            '''
      mask_visib_path = dp_split['mask_visib_tpath'].format(
        scene_id=scene_id, im_id=im_id, gt_id=gt_id)
      inout.save_im(mask_visib_path, 255 * mask_visib.astype(np.uint8))
      '''
Пример #2
0
            # Convert depth so it is in the same units as other images in the dataset.
            depth /= float(dp_camera['depth_scale'])

            # The OpenCV function was used for rendering of the training images
            # provided for the SIXD Challenge 2017.
            rgb = cv2.resize(rgb,
                             dp_camera['im_size'],
                             interpolation=cv2.INTER_AREA)
            # rgb = scipy.misc.imresize(rgb, par['cam']['im_size'][::-1], 'bicubic')

            # Save the rendered images.
            out_rgb_path = out_rgb_tpath.format(out_path=out_path,
                                                obj_id=obj_id,
                                                im_id=im_id)
            inout.save_im(out_rgb_path, rgb)
            out_depth_path = out_depth_tpath.format(out_path=out_path,
                                                    obj_id=obj_id,
                                                    im_id=im_id)
            inout.save_depth(out_depth_path, depth)

            # Get 2D bounding box of the object model at the ground truth pose.
            # ys, xs = np.nonzero(depth > 0)
            # obj_bb = misc.calc_2d_bbox(xs, ys, dp_camera['im_size'])

            scene_camera[im_id] = {
                'cam_K': dp_camera['K'],
                'depth_scale': dp_camera['depth_scale'],
                'view_level': int(views_level[view_id])
            }
for obj_id in dp_model['obj_ids']:

    # Load object model.
    misc.log('Loading 3D model of object {}...'.format(obj_id))
    model_path = dp_model['model_tpath'].format(obj_id=obj_id)
    ren.add_object(obj_id, model_path)

    poses = misc.get_symmetry_transformations(models_info[obj_id],
                                              p['max_sym_disc_step'])

    for pose_id, pose in enumerate(poses):

        for view_id, view in enumerate(p['views']):

            R = view['R'].dot(pose['R'])
            t = view['R'].dot(pose['t']) + view['t']

            vis_rgb = ren.render_object(obj_id, R, t, fx, fy, cx, cy)['rgb']

            # Path to the output RGB visualization.
            vis_rgb_path = p['vis_rgb_tpath'].format(vis_path=p['vis_path'],
                                                     dataset=p['dataset'],
                                                     obj_id=obj_id,
                                                     view_id=view_id,
                                                     pose_id=pose_id)
            misc.ensure_dir(os.path.dirname(vis_rgb_path))
            inout.save_im(vis_rgb_path, vis_rgb)

misc.log('Done.')
            })

            # Visualization of the visibility mask.
            if p['vis_visibility_masks']:

                depth_im_vis = visualization.depth_for_vis(depth, 0.2, 1.0)
                depth_im_vis = np.dstack([depth_im_vis] * 3)

                visib_gt_vis = visib_gt.astype(np.float)
                zero_ch = np.zeros(visib_gt_vis.shape)
                visib_gt_vis = np.dstack([zero_ch, visib_gt_vis, zero_ch])

                vis = 0.5 * depth_im_vis + 0.5 * visib_gt_vis
                vis[vis > 1] = 1

                vis_path = p['vis_mask_visib_tpath'].format(
                    delta=p['delta'],
                    dataset=p['dataset'],
                    split=p['dataset_split'],
                    scene_id=scene_id,
                    im_id=im_id,
                    gt_id=gt_id)
                misc.ensure_dir(os.path.dirname(vis_path))
                inout.save_im(vis_path, vis)

    # Save the info for the current scene.
    scene_gt_info_path = dp_split['scene_gt_info_tpath'].format(
        scene_id=scene_id)
    misc.ensure_dir(os.path.dirname(scene_gt_info_path))
    inout.save_json(scene_gt_info_path, scene_gt_info)
Пример #5
0
def vis_object_poses(poses,
                     K,
                     renderer,
                     rgb=None,
                     depth=None,
                     vis_rgb_path=None,
                     vis_depth_diff_path=None,
                     vis_rgb_resolve_visib=False):
    """Visualizes 3D object models in specified poses in a single image.

  Two visualizations are created:
  1. An RGB visualization (if vis_rgb_path is not None).
  2. A Depth-difference visualization (if vis_depth_diff_path is not None).

  :param poses: List of dictionaries, each with info about one pose:
    - 'obj_id': Object ID.
    - 'R': 3x3 ndarray with a rotation matrix.
    - 't': 3x1 ndarray with a translation vector.
    - 'text_info': Info to write at the object (see write_text_on_image).
  :param K: 3x3 ndarray with an intrinsic camera matrix.
  :param renderer: Instance of the Renderer class (see renderer.py).
  :param rgb: ndarray with the RGB input image.
  :param depth: ndarray with the depth input image.
  :param vis_rgb_path: Path to the output RGB visualization.
  :param vis_depth_diff_path: Path to the output depth-difference visualization.
  :param vis_rgb_resolve_visib: Whether to resolve visibility of the objects
    (i.e. only the closest object is visualized at each pixel).
  """
    fx, fy, cx, cy = K[0, 0], K[1, 1], K[0, 2], K[1, 2]

    # Indicators of visualization types.
    vis_rgb = vis_rgb_path is not None
    vis_depth_diff = vis_depth_diff_path is not None

    if vis_rgb and rgb is None:
        raise ValueError(
            'RGB visualization triggered but RGB image not provided.')

    if (vis_depth_diff or
        (vis_rgb and vis_rgb_resolve_visib)) and depth is None:
        raise ValueError(
            'Depth visualization triggered but D image not provided.')

    # Prepare images for rendering.
    im_size = None
    ren_rgb = None
    ren_rgb_info = None
    ren_depth = None

    if vis_rgb:
        im_size = (rgb.shape[1], rgb.shape[0])
        ren_rgb = np.zeros(rgb.shape, np.uint8)
        ren_rgb_info = np.zeros(rgb.shape, np.uint8)

    if vis_depth_diff:
        if im_size and im_size != (depth.shape[1], depth.shape[0]):
            raise ValueError('The RGB and D images must have the same size.')
        else:
            im_size = (depth.shape[1], depth.shape[0])

    if vis_depth_diff or (vis_rgb and vis_rgb_resolve_visib):
        ren_depth = np.zeros((im_size[1], im_size[0]), np.float32)

    # Render the pose estimates one by one.
    for pose in poses:

        # Rendering.
        ren_out = renderer.render_object(pose['obj_id'], pose['R'], pose['t'],
                                         fx, fy, cx, cy)

        m_rgb = None
        if vis_rgb:
            m_rgb = ren_out['rgb']

        m_mask = None
        if vis_depth_diff or (vis_rgb and vis_rgb_resolve_visib):
            m_depth = ren_out['depth']

            # Get mask of the surface parts that are closer than the
            # surfaces rendered before.
            visible_mask = np.logical_or(ren_depth == 0, m_depth < ren_depth)
            m_mask = np.logical_and(m_depth != 0, visible_mask)

            ren_depth[m_mask] = m_depth[m_mask].astype(ren_depth.dtype)

        # Combine the RGB renderings.
        if vis_rgb:
            if vis_rgb_resolve_visib:
                ren_rgb[m_mask] = m_rgb[m_mask].astype(ren_rgb.dtype)
            else:
                ren_rgb_f = ren_rgb.astype(np.float32) + m_rgb.astype(
                    np.float32)
                ren_rgb_f[ren_rgb_f > 255] = 255
                ren_rgb = ren_rgb_f.astype(np.uint8)

            # Draw 2D bounding box and write text info.
            obj_mask = np.sum(m_rgb > 0, axis=2)
            ys, xs = obj_mask.nonzero()
            if len(ys):
                # bbox_color = model_color
                # text_color = model_color
                bbox_color = (0.3, 0.3, 0.3)
                text_color = (1.0, 1.0, 1.0)
                text_size = 11

                bbox = misc.calc_2d_bbox(xs, ys, im_size)
                im_size = (obj_mask.shape[1], obj_mask.shape[0])
                ren_rgb_info = draw_rect(ren_rgb_info, bbox, bbox_color)

                if 'text_info' in pose:
                    text_loc = (bbox[0] + 2, bbox[1])
                    ren_rgb_info = write_text_on_image(ren_rgb_info,
                                                       pose['text_info'],
                                                       text_loc,
                                                       color=text_color,
                                                       size=text_size)

    # Blend and save the RGB visualization.
    if vis_rgb:
        misc.ensure_dir(os.path.dirname(vis_rgb_path))

        vis_im_rgb = 0.5 * rgb.astype(np.float32) + \
                     0.5 * ren_rgb.astype(np.float32) + \
                     1.0 * ren_rgb_info.astype(np.float32)
        vis_im_rgb[vis_im_rgb > 255] = 255
        inout.save_im(vis_rgb_path,
                      vis_im_rgb.astype(np.uint8),
                      jpg_quality=95)

    # Save the image of depth differences.
    if vis_depth_diff:
        misc.ensure_dir(os.path.dirname(vis_depth_diff_path))

        # Calculate the depth difference at pixels where both depth maps are valid.
        valid_mask = (depth > 0) * (ren_depth > 0)
        depth_diff = valid_mask * (ren_depth.astype(np.float32) - depth)

        delta = 15
        below_delta = valid_mask * (depth_diff < delta)
        below_delta_vis = (255 * below_delta).astype(np.uint8)

        depth_diff_vis = 255 * depth_for_vis(depth_diff - depth_diff.min())
        depth_diff_vis = np.dstack(
            [below_delta_vis, depth_diff_vis, depth_diff_vis]).astype(np.uint8)
        depth_diff_vis[np.logical_not(valid_mask)] = 0
        depth_diff_valid = depth_diff[valid_mask]
        depth_info = [
            {
                'name': 'min diff',
                'fmt': ':.3f',
                'val': np.min(depth_diff_valid)
            },
            {
                'name': 'max diff',
                'fmt': ':.3f',
                'val': np.max(depth_diff_valid)
            },
            {
                'name': 'mean diff',
                'fmt': ':.3f',
                'val': np.mean(depth_diff_valid)
            },
        ]
        depth_diff_vis = write_text_on_image(depth_diff_vis, depth_info)
        inout.save_im(vis_depth_diff_path, depth_diff_vis)