Exemplo n.º 1
0
 def make_scene_infos(self, TWO_9d, TCW_9d):
     TWO = compute_transform_from_pose9d(TWO_9d)
     TCW = compute_transform_from_pose9d(TCW_9d)
     TWC = invert_T(TCW)
     objects = tc.PandasTensorCollection(
         infos=self.obj_infos,
         TWO=TWO,
     )
     cameras = tc.PandasTensorCollection(
         infos=self.cam_infos,
         TWC=TWC,
         K=self.K
     )
     return objects, cameras
Exemplo n.º 2
0
def get_best_viewpair_pose_est(TC1C2, seeds, inliers):
    best_hypotheses = inliers['best_hypotheses']
    TC1C2_best = TC1C2[best_hypotheses]
    view1 = seeds['view1'][best_hypotheses]
    view2 = seeds['view2'][best_hypotheses]
    infos = pd.DataFrame(dict(view1=view1, view2=view2))
    return tc.PandasTensorCollection(infos=infos, TC1C2=TC1C2_best)
Exemplo n.º 3
0
def parse_obs_data(obs):
    data = defaultdict(list)
    frame_info = obs['frame_info']
    TWC = torch.as_tensor(obs['camera']['TWC']).float()
    for n, obj in enumerate(obs['objects']):
        info = dict(frame_obj_id=n,
                    label=obj['name'],
                    visib_fract=obj.get('visib_fract', 1),
                    scene_id=frame_info['scene_id'],
                    view_id=frame_info['view_id'])
        data['infos'].append(info)
        data['TWO'].append(obj['TWO'])
        data['bboxes'].append(obj['bbox'])

    for k, v in data.items():
        if k != 'infos':
            data[k] = torch.stack([torch.as_tensor(x).float() for x in v])

    data['infos'] = pd.DataFrame(data['infos'])
    TCO = invert_T(TWC).unsqueeze(0) @ data['TWO']

    data = tc.PandasTensorCollection(
        infos=data['infos'],
        TCO=TCO,
        bboxes=data['bboxes'],
        poses=TCO,
    )
    return data
Exemplo n.º 4
0
    def collate_fn(self, batch):
        cam_infos, K = [], []
        im_infos = []
        depth = []
        batch_im_id = -1
        for n, data in enumerate(batch):
            assert n == 0
            images, masks, obss = data
            for c, obs in enumerate(obss):
                batch_im_id += 1
                frame_info = obs['frame_info']
                im_info = {k: frame_info[k] for k in ('scene_id', 'view_id', 'group_id')}
                im_info.update(batch_im_id=batch_im_id)
                im_infos.append(im_info)
                cam_info = im_info.copy()

                K.append(obs['camera']['K'])
                cam_infos.append(cam_info)
                if self.load_depth:
                    depth.append(torch.tensor(obs['camera']['depth']))

        cameras = tc.PandasTensorCollection(
            infos=pd.DataFrame(cam_infos),
            K=torch.as_tensor(np.stack(K)),
        )
        data = dict(
            cameras=cameras,
            images=images,
            im_infos=im_infos,
        )
        if self.load_depth:
            data['depth'] = torch.stack(depth)
        return data
Exemplo n.º 5
0
    def collate_fn(self, batch):
        batch_im_id = -1
        det_infos, bboxes = [], []
        images = []
        im_infos = []
        for n, data in enumerate(batch):
            rgb, masks, obs = data
            batch_im_id += 1
            frame_info = obs['frame_info']
            im_info = {k: frame_info[k] for k in ('scene_id', 'view_id')}
            im_info.update(batch_im_id=batch_im_id)
            im_infos.append(im_info)
            images.append(rgb)

            for o, obj in enumerate(obs['objects']):
                obj_info = dict(
                    label=obj['name'],
                    score=1.0,
                )
                obj_info.update(im_info)
                bboxes.append(obj['bbox'])
                det_infos.append(obj_info)

        gt_detections = tc.PandasTensorCollection(
            infos=pd.DataFrame(det_infos),
            bboxes=torch.as_tensor(np.stack(bboxes)).float(),
        )
        data = dict(
            images=torch.stack(images),
            gt_detections=gt_detections,
            im_infos=im_infos,
        )
        return data
Exemplo n.º 6
0
 def make_empty_predictions():
     infos = dict(view_id=np.empty(0, dtype=np.int),
                  scene_id=np.empty(0, dtype=np.int),
                  label=np.empty(0, dtype=np.object),
                  score=np.empty(0, dtype=np.float))
     bboxes = torch.empty(0, 4, dtype=torch.float)
     return tc.PandasTensorCollection(infos=pd.DataFrame(infos), bboxes=bboxes)
Exemplo n.º 7
0
def scene_level_matching(candidates, inliers):
    cand1 = inliers['inlier_matches_cand1']
    cand2 = inliers['inlier_matches_cand2']
    edges = np.ones((len(cand1)), dtype=np.int)
    n_cand = len(candidates)
    graph = csr_matrix((edges, (cand1, cand2)), shape=(n_cand, n_cand))
    n_components, ids = connected_components(graph,
                                             directed=True,
                                             connection='strong')

    component_size = defaultdict(lambda: 0)
    for idx in ids:
        component_size[idx] += 1
    obj_n_cand = np.empty(len(ids), dtype=np.int)
    for n, idx in enumerate(ids):
        obj_n_cand[n] = component_size[idx]

    cand_infos = candidates.infos.copy()
    cand_infos['component_id'] = ids
    keep_cand = obj_n_cand >= 2
    cand_infos = cand_infos[keep_cand].reset_index(drop=True)
    for n, (comp_id, group) in enumerate(cand_infos.groupby('component_id')):
        cand_infos.loc[group.index, 'component_id'] = n
    cand_infos = cand_infos.rename(columns={'component_id': 'obj_id'})

    matched_candidates = tc.PandasTensorCollection(
        infos=cand_infos, poses=candidates.poses[cand_infos['cand_id'].values])
    return matched_candidates
Exemplo n.º 8
0
    def collate_fn(self, batch):
        batch_im_id = -1

        cam_infos, K = [], []
        det_infos, bboxes = [], []
        for n, data in enumerate(batch):
            assert n == 0
            images, masks, obss = data
            for c, obs in enumerate(obss):
                batch_im_id += 1
                frame_info = obs['frame_info']
                im_info = {
                    k: frame_info[k]
                    for k in ('scene_id', 'view_id', 'group_id')
                }
                im_info.update(batch_im_id=batch_im_id)
                cam_info = im_info.copy()

                K.append(obs['camera']['K'])
                cam_infos.append(cam_info)

                for o, obj in enumerate(obs['objects']):
                    obj_info = dict(
                        label=obj['name'],
                        score=1.0,
                    )
                    obj_info.update(im_info)
                    bboxes.append(obj['bbox'])
                    det_infos.append(obj_info)

        gt_detections = tc.PandasTensorCollection(
            infos=pd.DataFrame(det_infos),
            bboxes=torch.as_tensor(np.stack(bboxes)),
        )
        cameras = tc.PandasTensorCollection(
            infos=pd.DataFrame(cam_infos),
            K=torch.as_tensor(np.stack(K)),
        )
        data = dict(
            images=images,
            cameras=cameras,
            gt_detections=gt_detections,
        )
        return data
Exemplo n.º 9
0
    def get_detections(self, images, detection_th=None,
                       output_masks=False, mask_th=0.8,
                       one_instance_per_class=False):
        images = self.cast(images).float()
        if images.shape[-1] == 3:
            images = images.permute(0, 3, 1, 2)
        if images.max() > 1:
            images = images / 255.
            images = images.float().cuda()
        outputs_ = self.model([image_n for image_n in images])

        infos = []
        bboxes = []
        masks = []
        for n, outputs_n in enumerate(outputs_):
            outputs_n['labels'] = [self.category_id_to_label[category_id.item()] \
                                   for category_id in outputs_n['labels']]
            for obj_id in range(len(outputs_n['boxes'])):
                bbox = outputs_n['boxes'][obj_id]
                info = dict(
                    batch_im_id=n,
                    label=outputs_n['labels'][obj_id],
                    score=outputs_n['scores'][obj_id].item(),
                )
                mask = outputs_n['masks'][obj_id, 0] > mask_th
                bboxes.append(torch.as_tensor(bbox))
                masks.append(torch.as_tensor(mask))
                infos.append(info)

        if len(bboxes) > 0:
            bboxes = torch.stack(bboxes).cuda().float()
            masks = torch.stack(masks).cuda()
        else:
            infos = dict(score=[], label=[], batch_im_id=[])
            bboxes = torch.empty(0, 4).cuda().float()
            masks = torch.empty(0, images.shape[1], images.shape[2], dtype=torch.bool).cuda()

        outputs = tc.PandasTensorCollection(
            infos=pd.DataFrame(infos),
            bboxes=bboxes,
        )
        if output_masks:
            outputs.register_tensor('masks', masks)
        if detection_th is not None:
            keep = np.where(outputs.infos['score'] > detection_th)[0]
            outputs = outputs[keep]

        if one_instance_per_class:
            infos = outputs.infos
            infos['det_idx'] = np.arange(len(infos))
            keep_ids = infos.sort_values('score', ascending=False).drop_duplicates('label')['det_idx'].values
            outputs = outputs[keep_ids]
            outputs.infos = outputs.infos.drop('det_idx', axis=1)
        return outputs
Exemplo n.º 10
0
def read_cameras(json_path, view_ids):
    cameras = json.loads(Path(json_path).read_text())
    all_K = []
    for view_id in view_ids:
        cam_info = cameras[str(view_id)]
        K = np.array(cam_info['cam_K']).reshape(3, 3)
        all_K.append(K)
    K = torch.as_tensor(np.stack(all_K))
    cameras = tc.PandasTensorCollection(K=K,
                                        infos=pd.DataFrame(
                                            dict(view_id=view_ids)))
    return cameras
Exemplo n.º 11
0
 def make_TCO_init(self, detections, K):
     K = K[detections.infos['batch_im_id'].values]
     boxes = detections.bboxes
     if self.coarse_model.cfg.init_method == 'z-up+auto-depth':
         meshes = self.coarse_model.mesh_db.select(
             detections.infos['label'])
         points_3d = meshes.sample_points(2000, deterministic=True)
         TCO_init = TCO_init_from_boxes_zup_autodepth(boxes, points_3d, K)
     else:
         TCO_init = TCO_init_from_boxes(z_range=(1.0, 1.0),
                                        boxes=boxes,
                                        K=K)
     return tc.PandasTensorCollection(infos=detections.infos,
                                      poses=TCO_init)
Exemplo n.º 12
0
    def batched_model_predictions(self,
                                  model,
                                  images,
                                  K,
                                  obj_data,
                                  n_iterations=1):
        timer = Timer()
        timer.start()

        ids = torch.arange(len(obj_data))

        ds = TensorDataset(ids)
        dl = DataLoader(ds, batch_size=self.bsz_objects)

        preds = defaultdict(list)
        for (batch_ids, ) in dl:
            timer.resume()
            obj_inputs = obj_data[batch_ids.numpy()]
            labels = obj_inputs.infos['label'].values
            im_ids = obj_inputs.infos['batch_im_id'].values
            images_ = images[im_ids]
            K_ = K[im_ids]
            TCO_input = obj_inputs.poses
            outputs = model(images=images_,
                            K=K_,
                            TCO=TCO_input,
                            n_iterations=n_iterations,
                            labels=labels)
            timer.pause()
            for n in range(1, n_iterations + 1):
                iter_outputs = outputs[f'iteration={n}']

                infos = obj_inputs.infos
                batch_preds = tc.PandasTensorCollection(
                    infos,
                    poses=iter_outputs['TCO_output'],
                    poses_input=iter_outputs['TCO_input'],
                    K_crop=iter_outputs['K_crop'],
                    boxes_rend=iter_outputs['boxes_rend'],
                    boxes_crop=iter_outputs['boxes_crop'])
                preds[f'iteration={n}'].append(batch_preds)

        logger.debug(
            f'Pose prediction on {len(obj_data)} detections (n_iterations={n_iterations}): {timer.stop()}'
        )
        preds = dict(preds)
        for k, v in preds.items():
            preds[k] = tc.concatenate(v)
        return preds
Exemplo n.º 13
0
def load_posecnn_results():
    results_path = LOCAL_DATA_DIR / 'saved_detections' / 'ycbv_posecnn.pkl'
    results = pkl.loads(results_path.read_bytes())
    infos, poses, bboxes = [], [], []

    l_offsets = (LOCAL_DATA_DIR / 'bop_datasets/ycbv' /
                 'offsets.txt').read_text().strip().split('\n')
    ycb_offsets = dict()
    for l_n in l_offsets:
        obj_id, offset = l_n[:2], l_n[3:]
        obj_id = int(obj_id)
        offset = np.array(json.loads(offset)) * 0.001
        ycb_offsets[obj_id] = offset

    def mat_from_qt(qt):
        wxyz = qt[:4].copy().tolist()
        xyzw = [*wxyz[1:], wxyz[0]]
        t = qt[4:].copy()
        return Transform(xyzw, t)

    for scene_view_str, result in results.items():
        scene_id, view_id = scene_view_str.split('/')
        scene_id, view_id = int(scene_id), int(view_id)
        n_dets = result['rois'].shape[0]
        for n in range(n_dets):
            obj_id = result['rois'][:, 1].astype(np.int)[n]
            label = f'obj_{obj_id:06d}'
            infos.append(
                dict(
                    scene_id=scene_id,
                    view_id=view_id,
                    score=result['rois'][n, 1],
                    label=label,
                ))
            bboxes.append(result['rois'][n, 2:6])
            pose = mat_from_qt(result['poses'][n])
            offset = ycb_offsets[obj_id]
            pose = pose * Transform((0, 0, 0, 1), offset).inverse()
            poses.append(pose.toHomogeneousMatrix())

    data = tc.PandasTensorCollection(
        infos=pd.DataFrame(infos),
        poses=torch.as_tensor(np.stack(poses)).float(),
        bboxes=torch.as_tensor(np.stack(bboxes)).float(),
    ).cpu()
    return data
Exemplo n.º 14
0
def read_csv_candidates(csv_path):
    df = pd.read_csv(csv_path)
    infos = df.loc[:, ['im_id', 'scene_id', 'score', 'obj_id']]
    infos['obj_id'] = infos['obj_id'].apply(lambda x: f'obj_{x:06d}')
    infos = infos.rename(dict(im_id='view_id', obj_id='label'), axis=1)
    R = np.stack(
        df['R'].apply(lambda x: list(map(float, x.split(' '))))).reshape(
            -1, 3, 3)
    t = np.stack(
        df['t'].apply(lambda x: list(map(float, x.split(' '))))).reshape(
            -1, 3) * 1e-3
    R = torch.tensor(R, dtype=torch.float)
    t = torch.tensor(t, dtype=torch.float)
    TCO = torch.eye(4, dtype=torch.float).unsqueeze(0).repeat(len(R), 1, 1)
    TCO[:, :3, :3] = R
    TCO[:, :3, -1] = t
    candidates = tc.PandasTensorCollection(poses=TCO, infos=infos)
    return candidates
Exemplo n.º 15
0
def load_pix2pose_results(all_detections=True, remove_incorrect_poses=False):
    if all_detections:
        results_path = LOCAL_DATA_DIR / 'saved_detections' / 'tless_pix2pose_retinanet_vivo_all.pkl'
    else:
        results_path = LOCAL_DATA_DIR / 'saved_detections' / 'tless_pix2pose_retinanet_siso_top1.pkl'
    pix2pose_results = pkl.loads(results_path.read_bytes())
    infos, poses, bboxes = [], [], []
    for key, result in pix2pose_results.items():
        scene_id, view_id = key.split('/')
        scene_id, view_id = int(scene_id), int(view_id)
        boxes = result['rois']
        scores = result['scores']
        poses_ = result['poses']

        labels = result['labels_txt']
        new_boxes = boxes.copy()
        new_boxes[:, 0] = boxes[:, 1]
        new_boxes[:, 1] = boxes[:, 0]
        new_boxes[:, 2] = boxes[:, 3]
        new_boxes[:, 3] = boxes[:, 2]
        for o, label in enumerate(labels):
            t = poses_[o][:3, -1]
            if remove_incorrect_poses and (np.sum(t) == 0 or np.max(t) > 100):
                pass
            else:
                infos.append(
                    dict(
                        scene_id=scene_id,
                        view_id=view_id,
                        score=scores[o],
                        label=label,
                    ))
                bboxes.append(new_boxes[o])
                poses.append(poses_[o])

    data = tc.PandasTensorCollection(
        infos=pd.DataFrame(infos),
        poses=torch.as_tensor(np.stack(poses)),
        bboxes=torch.as_tensor(np.stack(bboxes)).float(),
    ).cpu()
    return data
Exemplo n.º 16
0
 def reproject_scene(self, objects, cameras):
     TCO_data = []
     for o in range(len(objects)):
         for v in range(len(cameras)):
             obj = objects[[o]]
             cam = cameras[[v]]
             infos = dict(
                 scene_id=cam.infos['scene_id'].values,
                 view_id=cam.infos['view_id'].values,
                 score=obj.infos['score'].values + 1.0,
                 view_group=obj.infos['view_group'].values,
                 label=obj.infos['label'].values,
                 batch_im_id=cam.infos['batch_im_id'].values,
                 obj_id=obj.infos['obj_id'].values,
                 from_ba=[True],
             )
             data_ = tc.PandasTensorCollection(
                 infos=pd.DataFrame(infos),
                 poses=invert_T(cam.TWC) @ obj.TWO,
             )
             TCO_data.append(data_)
     return tc.concatenate(TCO_data)
Exemplo n.º 17
0
    def predict_scene_state(self,
                            candidates,
                            cameras,
                            score_th=0.3,
                            use_known_camera_poses=False,
                            ransac_n_iter=2000,
                            ransac_dist_threshold=0.02,
                            ba_n_iter=100):

        predictions = dict()
        cand_inputs = candidates

        assert len(np.unique(candidates.infos['scene_id'])) == 1
        scene_id = np.unique(candidates.infos['scene_id']).item()
        group_id = np.unique(candidates.infos['group_id']).item()
        keep = np.where(candidates.infos['score'] >= score_th)[0]
        candidates = candidates[keep]

        predictions['cand_inputs'] = candidates

        logger.debug(f'Num candidates: {len(candidates)}')
        logger.debug(f'Num views: {len(cameras)}')

        matching_outputs = multiview_candidate_matching(
            candidates=candidates,
            mesh_db=self.mesh_db_ransac,
            n_ransac_iter=ransac_n_iter,
            dist_threshold=ransac_dist_threshold,
            cameras=cameras if use_known_camera_poses else None)

        pairs_TC1C2 = matching_outputs['pairs_TC1C2']
        candidates = matching_outputs['filtered_candidates']

        logger.debug(f'Matched candidates: {len(candidates)}')
        for k, v in matching_outputs.items():
            if 'time' in k:
                logger.debug(f'RANSAC {k}: {v}')

        predictions['cand_matched'] = candidates

        group_infos = make_view_groups(pairs_TC1C2)
        candidates = candidates.merge_df(group_infos, on='view_id').cuda()

        pred_objects, pred_cameras, pred_reproj = [], [], []
        pred_reproj_init = []
        for (view_group, candidate_ids
             ) in candidates.infos.groupby('view_group').groups.items():
            candidates_n = candidates[candidate_ids]
            problem = MultiviewRefinement(candidates=candidates_n,
                                          cameras=cameras,
                                          pairs_TC1C2=pairs_TC1C2,
                                          mesh_db=self.mesh_db_ba)
            ba_outputs = problem.solve(
                n_iterations=ba_n_iter,
                optimize_cameras=not use_known_camera_poses,
            )
            pred_objects_, pred_cameras_ = ba_outputs['objects'], ba_outputs[
                'cameras']
            for x in (pred_objects_, pred_cameras_):
                x.infos['view_group'] = view_group
                x.infos['group_id'] = group_id
                x.infos['scene_id'] = scene_id
            pred_reproj.append(
                self.reproject_scene(pred_objects_, pred_cameras_))
            pred_objects.append(pred_objects_)
            pred_cameras.append(pred_cameras_)

            pred_objects_init, pred_cameras_init = ba_outputs[
                'objects_init'], ba_outputs['cameras_init']
            for x in (pred_objects_init, pred_cameras_init):
                x.infos['view_group'] = view_group
                x.infos['group_id'] = group_id
                x.infos['scene_id'] = scene_id
            pred_reproj_init.append(
                self.reproject_scene(pred_objects_init, pred_cameras_init))

            for k, v in ba_outputs.items():
                if 'time' in k:
                    logger.debug(f'BA {k}: {v}')

        predictions['scene/objects'] = tc.concatenate(pred_objects)
        predictions['scene/cameras'] = tc.concatenate(pred_cameras)

        predictions['ba_output'] = tc.concatenate(pred_reproj)
        predictions['ba_input'] = tc.concatenate(pred_reproj_init)

        cand_inputs = tc.PandasTensorCollection(
            infos=cand_inputs.infos,
            poses=cand_inputs.poses,
        )
        predictions['ba_output+all_cand'] = tc.concatenate(
            [predictions['ba_output'], cand_inputs], )
        return predictions