Пример #1
0
 def get_depth(self, scene_id, im_id):
     dataset_path = os.path.join(cfg.LM_PATH, "test")
     scene_path = os.path.join(dataset_path, f"{scene_id:06d}")
     file_path = os.path.join(scene_path, f"depth/{im_id:06d}.png")
     if os.path.exists(file_path):
         return bop_inout.load_depth(file_path)
     else:
         print(f"missing file: {file_path}")
         return np.zeros((480, 640), dtype=np.float32)
Пример #2
0
def create_patch_pair(depth_path, mask_path, im_cam, gt, save_name,
                      md_pcd_pts):
    raw_depth = io.load_depth(depth_path)
    mask = io.load_im(mask_path)

    img_pcd = PointCloud.create_from_depth_image(
        depth=Image(masked_where(mask == 0.0, raw_depth).filled(0.0)),
        intrinsic=PHCamIntrinsic(*IM_SIZE, *[im_cam['cam_K'][i] for i in K]),
        depth_scale=im_cam['depth_scale'],
        depth_trunc=150000)
    img_pcd.voxel_down_sample(VOXEL_SIZE)

    if np.asarray(img_pcd.points).shape[0] in PCD_PTS_RANGE or IS_TARGET:
        cam_R, cam_t = gt['cam_R_m2c'], gt['cam_t_m2c']

        # Select reference points on image using farthest point sampling
        img_pcd_pts_fps = torch.as_tensor(img_pcd.points).to(DEVICE)
        img_ref_idxs = fps(img_pcd_pts_fps, ratio=FPS_RATIO).to('cpu').numpy()

        # Calculate model reference points
        img_ref_pts = np.asarray(img_pcd.points)[img_ref_idxs]
        md_ref_pts = (img_ref_pts - cam_t.T) @ np.linalg.inv(cam_R).T

        # Recreate model point cloud
        md_ref_idxs = np.arange(md_ref_pts.shape[0])
        md_pcd_pts = np.concatenate([md_ref_pts, md_pcd_pts], axis=0)
        md_pcd = PointCloud()
        md_pcd.points = Vector3dVector(md_pcd_pts)

        # Calculate and save PPFs
        img_save_path = f'image/{save_name}'
        create_local_patches(img_pcd, img_ref_idxs, img_save_path)

        md_save_path = f'model/{save_name}'
        create_local_patches(md_pcd, md_ref_idxs, md_save_path)

        entry = [save_name, img_ref_idxs.shape[0]]
    else:
        entry = []

    return entry
Пример #3
0
            if im_ind % 10 == 0:
                misc.log(
                    'Calculating error {} - method: {}, dataset: {}{}, scene: {}, '
                    'im: {}'.format(p['error_type'], method, dataset,
                                    split_type_str, scene_id, im_ind))

            # Intrinsic camera matrix.
            K = scene_camera[im_id]['cam_K']

            # Load the depth image if VSD is selected as the pose error function.
            depth_im = None
            if p['error_type'] == 'vsd':
                depth_path = dp_split['depth_tpath'].format(scene_id=scene_id,
                                                            im_id=im_id)
                depth_im = inout.load_depth(depth_path)
                depth_im *= scene_camera[im_id][
                    'depth_scale']  # Convert to [mm].

            for obj_id, target in im_targets.items():

                # The required number of top estimated poses.
                if p['n_top'] == 0:  # All estimates are considered.
                    n_top_curr = None
                elif p['n_top'] == -1:  # Given by the number of GT poses.
                    # n_top_curr = sum([gt['obj_id'] == obj_id for gt in scene_gt[im_id]])
                    n_top_curr = target['inst_count']
                else:
                    n_top_curr = p['n_top']

                # Get the estimates.
Пример #4
0
                        "/{:06d}.tif".format(im_id)
        image_gray = inout.load_im(rgb_path)
        image_t = np.zeros((image_gray.shape[0], image_gray.shape[1], 3),
                           dtype=np.uint8)
        image_t[:, :, :] = np.expand_dims(image_gray, axis=2)
    else:
        rgb_path = test_dir+"/{:06d}/".format(scene_id)+img_type+\
                        "/{:06d}.png".format(im_id)
        image_t = inout.load_im(rgb_path)
    if (dataset == "itodd"):
        depth_path = test_dir + "/{:06d}/depth/{:06d}.tif".format(
            scene_id, im_id)
    else:
        depth_path = test_dir + "/{:06d}/depth/{:06d}.png".format(
            scene_id, im_id)
    depth_t = inout.load_depth(depth_path) / 1000 * depth_scale
    depth_t_zero_nan = np.nan_to_num(depth_t)
    t1 = time.time()
    inst_count_est = np.zeros((len(inst_counts)))
    inst_count_pred = np.zeros((len(inst_counts)))
    inst_count_good = np.zeros((len(inst_counts)))

    depth_valid = np.logical_and(depth_t > 0.2, depth_t < 2.2)
    rgb_valid = np.logical_or(depth_valid, depth_t_zero_nan == 0)
    image_t = image_t.astype(np.float32)
    image_t[np.invert(rgb_valid)] = 0.1 * image_t[np.invert(rgb_valid)]

    points_tgt = np.zeros((depth_t.shape[0], depth_t.shape[1], 6), np.float32)
    points_tgt[:, :, :3] = getXYZ(depth_t,
                                  fx=cam_K[0, 0],
                                  fy=cam_K[1, 1],
Пример #5
0
    scene_gt_info = {}
    im_ids = sorted(scene_gt.keys())
    for im_counter, im_id in enumerate(im_ids):
        if im_counter % 100 == 0:
            misc.log(
                'Calculating GT info - dataset: {} ({}, {}), scene: {}, im: {}'
                .format(p['dataset'], p['dataset_split'],
                        p['dataset_split_type'], scene_id, im_id))

        # Load depth image.
        depth_fpath = dp_split['depth_tpath'].format(scene_id=scene_id,
                                                     im_id=im_id)
        if not os.path.exists(depth_fpath):
            depth_fpath = depth_fpath.replace('.tif', '.png')
        depth = inout.load_depth(depth_fpath)
        depth *= scene_camera[im_id]['depth_scale']  # Convert to [mm].

        K = scene_camera[im_id]['cam_K']
        fx, fy, cx, cy = K[0, 0], K[1, 1], K[0, 2], K[1, 2]
        im_size = (depth.shape[1], depth.shape[0])

        scene_gt_info[im_id] = []
        for gt_id, gt in enumerate(scene_gt[im_id]):

            # Render depth image of the object model in the ground-truth pose.
            depth_gt_large = ren.render_object(gt['obj_id'], gt['cam_R_m2c'],
                                               gt['cam_t_m2c'], fx, fy,
                                               cx + ren_cx_offset,
                                               cy + ren_cy_offset)['depth']
            depth_gt = depth_gt_large[ren_cy_offset:(ren_cy_offset +
Пример #6
0
                rgb = None
                if p['vis_rgb']:
                    if 'rgb' in dp_split['im_modalities']:
                        rgb = inout.load_im(dp_split['rgb_tpath'].format(
                            scene_id=scene_id, im_id=im_id))[:, :, :3]
                    elif 'gray' in dp_split['im_modalities']:
                        gray = inout.load_im(dp_split['gray_tpath'].format(
                            scene_id=scene_id, im_id=im_id))
                        rgb = np.dstack([gray, gray, gray])
                    else:
                        raise ValueError('RGB nor gray images are available.')

                depth = None
                if p['vis_depth_diff'] or (p['vis_rgb']
                                           and p['vis_rgb_resolve_visib']):
                    depth = inout.load_depth(dp_split['depth_tpath'].format(
                        scene_id=scene_id, im_id=im_id))
                    depth *= scene_camera[im_id][
                        'depth_scale']  # Convert to [mm].

                # Visualization name.
                if p['vis_per_obj_id']:
                    vis_name = '{im_id:06d}_{obj_id:06d}'.format(
                        im_id=im_id, obj_id=im_ests_vis_obj_ids[ests_vis_id])
                else:
                    vis_name = '{im_id:06d}'.format(im_id=im_id)

                # Path to the output RGB visualization.
                vis_rgb_path = None
                if p['vis_rgb']:
                    vis_rgb_path = p['vis_rgb_tpath'].format(
                        vis_path=p['vis_path'],
Пример #7
0
        mask_paths = os.path.join(dp_split['base_path'], complete_split,
                                  '{:06d}/mask_visib'.format(scene_id))
        img_path = dp_split['rgb_tpath'].format(scene_id=scene_id, im_id=im_id)
        relative_img_path = os.path.relpath(img_path,
                                            os.path.dirname(coco_gt_path))
        image_info = pycoco_utils.create_image_info(image_id,
                                                    relative_img_path,
                                                    dp_split['im_size'])
        coco_scene_output["images"].append(image_info)

        for idx, inst in enumerate(inst_list):
            category_info = inst['obj_id']

            mask_p = os.path.join(mask_paths,
                                  '{:06d}_{:06d}.png'.format(im_id, idx))
            binary_inst_mask = (inout.load_depth(mask_p) / 255.).astype(
                np.bool)

            annotation_info = pycoco_utils.create_annotation_info(
                segmentation_id,
                image_id,
                category_info,
                binary_inst_mask,
                tolerance=2)

            if annotation_info is not None:
                coco_scene_output["annotations"].append(annotation_info)

            segmentation_id = segmentation_id + 1

        image_id = image_id + 1
Пример #8
0
    def __getitem__(self, frame_id):
        row = self.frame_index.iloc[frame_id]
        scene_id, view_id = row.scene_id, row.view_id
        view_id = int(view_id)
        view_id_str = f'{view_id:06d}'
        scene_id_str = f'{int(scene_id):06d}'
        scene_dir = self.base_dir / scene_id_str

        rgb_dir = scene_dir / 'rgb'
        if not rgb_dir.exists():
            rgb_dir = scene_dir / 'gray'
        rgb_path = rgb_dir / f'{view_id_str}.png'
        if not rgb_path.exists():
            rgb_path = rgb_path.with_suffix('.jpg')
        if not rgb_path.exists():
            rgb_path = rgb_path.with_suffix('.tif')

        rgb = np.array(Image.open(rgb_path))
        if rgb.ndim == 2:
            rgb = np.repeat(rgb[..., None], 3, axis=-1)
        rgb = rgb[..., :3]
        h, w = rgb.shape[:2]
        rgb = torch.as_tensor(rgb)

        cam_annotation = self.annotations[scene_id_str]['scene_camera'][str(
            view_id)]
        if 'cam_R_w2c' in cam_annotation:
            RC0 = np.array(cam_annotation['cam_R_w2c']).reshape(3, 3)
            tC0 = np.array(cam_annotation['cam_t_w2c']) * 0.001
            TC0 = Transform(RC0, tC0)
        else:
            TC0 = Transform(np.eye(3), np.zeros(3))
        K = np.array(cam_annotation['cam_K']).reshape(3, 3)
        T0C = TC0.inverse()
        T0C = T0C.toHomogeneousMatrix()
        camera = dict(T0C=T0C, K=K, TWC=T0C, resolution=rgb.shape[:2])

        T0C = TC0.inverse()

        objects = []
        mask = np.zeros((h, w), dtype=np.uint8)
        if 'scene_gt_info' in self.annotations[scene_id_str]:
            annotation = self.annotations[scene_id_str]['scene_gt'][str(
                view_id)]
            n_objects = len(annotation)
            visib = self.annotations[scene_id_str]['scene_gt_info'][str(
                view_id)]
            for n in range(n_objects):
                RCO = np.array(annotation[n]['cam_R_m2c']).reshape(3, 3)
                tCO = np.array(annotation[n]['cam_t_m2c']) * 0.001
                TCO = Transform(RCO, tCO)
                T0O = T0C * TCO
                T0O = T0O.toHomogeneousMatrix()
                obj_id = annotation[n]['obj_id']
                name = f'obj_{int(obj_id):06d}'
                bbox_visib = np.array(visib[n]['bbox_visib'])
                x, y, w, h = bbox_visib
                x1 = x
                y1 = y
                x2 = x + w
                y2 = y + h
                obj = dict(label=name,
                           name=name,
                           TWO=T0O,
                           T0O=T0O,
                           visib_fract=visib[n]['visib_fract'],
                           id_in_segm=n + 1,
                           bbox=[x1, y1, x2, y2])
                objects.append(obj)

            mask_path = scene_dir / 'mask_visib' / f'{view_id_str}_all.png'
            if mask_path.exists():
                mask = np.array(Image.open(mask_path))
            else:
                for n in range(n_objects):
                    mask_n = np.array(
                        Image.open(scene_dir / 'mask_visib' /
                                   f'{view_id_str}_{n:06d}.png'))
                    mask[mask_n == 255] = n + 1

        mask = torch.as_tensor(mask)

        if self.load_depth:
            depth_path = scene_dir / 'depth' / f'{view_id_str}.png'
            if not depth_path.exists():
                depth_path = depth_path.with_suffix('.tif')
            depth = np.array(inout.load_depth(depth_path))
            camera['depth'] = depth * cam_annotation['depth_scale'] / 1000

        obs = dict(
            objects=objects,
            camera=camera,
            frame_info=row.to_dict(),
        )
        return rgb, mask, obs
Пример #9
0
            print((mask_annot[im_id]))
            score = mask_annot[im_id][obj_id][inst]['score']
            if score < 0:
                continue
            obj_bb_est = mask_annot[im_id][obj_id][inst]['obj_bb']
            obj_id_est = mask_annot[im_id][obj_id][inst]['obj_id']
            chan_id = mask_annot[im_id][obj_id][inst]['np_channel_id']
            mask = maskrcnn_scene_masks[im_id]
            inst_mask = mask[:, :, chan_id]
        else:
            obj_bb_est = scene_gt_info[str(im_id)][inst]['bbox_obj']
            obj_id_est = obj_id
            score = 1.0
            mask_p = os.path.join(mask_paths,
                                  '{:06d}_{:06d}.png'.format(im_id, inst))
            inst_mask = inout.load_depth(mask_p) / 255.

        print((img.shape))
        img_masked = img * inst_mask[..., None].astype(np.uint8)

        x, y, w, h = obj_bb_est
        xmin = float(x) / dp_split['im_size'][0]
        ymin = float(y) / dp_split['im_size'][1]
        xmax = float((x + w)) / dp_split['im_size'][0]
        ymax = float((y + h)) / dp_split['im_size'][1]
        det = [
            BoundingBox(xmin, ymin, xmax, ymax, classes={obj_id_est: score})
        ]

        if pose_refiner_method:
            depth_masked = depth_img * inst_mask