示例#1
0
 def make_TCO_init(self, detections, K):
     K = K[detections.infos['batch_im_id'].values]
     boxes = detections.bboxes
     if self.coarse_model.cfg.init_method == 'z-up+auto-depth':
         meshes = self.coarse_model.mesh_db.select(
             detections.infos['label'])
         points_3d = meshes.sample_points(2000, deterministic=True)
         TCO_init = TCO_init_from_boxes_zup_autodepth(boxes, points_3d, K)
     else:
         TCO_init = TCO_init_from_boxes(z_range=(1.0, 1.0),
                                        boxes=boxes,
                                        K=K)
     return tc.PandasTensorCollection(infos=detections.infos,
                                      poses=TCO_init)
示例#2
0
    def batched_model_predictions(self,
                                  model,
                                  images,
                                  K,
                                  obj_data,
                                  n_iterations=1):
        timer = Timer()
        timer.start()

        ids = torch.arange(len(obj_data))

        ds = TensorDataset(ids)
        dl = DataLoader(ds, batch_size=self.bsz_objects)

        preds = defaultdict(list)
        for (batch_ids, ) in dl:
            timer.resume()
            obj_inputs = obj_data[batch_ids.numpy()]
            labels = obj_inputs.infos['label'].values
            im_ids = obj_inputs.infos['batch_im_id'].values
            images_ = images[im_ids]
            K_ = K[im_ids]
            TCO_input = obj_inputs.poses
            outputs = model(images=images_,
                            K=K_,
                            TCO=TCO_input,
                            n_iterations=n_iterations,
                            labels=labels)
            timer.pause()
            for n in range(1, n_iterations + 1):
                iter_outputs = outputs[f'iteration={n}']

                infos = obj_inputs.infos
                batch_preds = tc.PandasTensorCollection(
                    infos,
                    poses=iter_outputs['TCO_output'],
                    poses_input=iter_outputs['TCO_input'],
                    K_crop=iter_outputs['K_crop'],
                    boxes_rend=iter_outputs['boxes_rend'],
                    boxes_crop=iter_outputs['boxes_crop'])
                preds[f'iteration={n}'].append(batch_preds)

        logger.debug(
            f'Pose prediction on {len(obj_data)} detections (n_iterations={n_iterations}): {timer.stop()}'
        )
        preds = dict(preds)
        for k, v in preds.items():
            preds[k] = tc.concatenate(v)
        return preds
示例#3
0
    def make_init_obj_data(self,
                           detections,
                           K,
                           resolution=None,
                           joints=None,
                           use_known_joints=False):
        # Joint initialization
        obj_infos = []
        bsz = len(detections)
        if use_known_joints:
            tensor_joints = self.urdf_layer.to_tensor(joints)
            logger.info('Using provided joints for initialization.')
        else:
            tensor_joints = self.urdf_layer.joints_default.unsqueeze(0).repeat(
                bsz, 1)
            logger.info('Using default joints for initialization.')
        tensor_joints = tensor_joints.float().cuda()
        detections.infos['joint_names'] = [
            self.urdf_layer.joint_names.tolist() for _ in range(bsz)
        ]
        for n, row in enumerate(detections.infos.itertuples()):
            obj_infos.append(
                dict(name=row.label,
                     joints=self.urdf_layer.from_tensor(tensor_joints[[n]])))

        # Pose initialization
        boxes = detections.bboxes
        K_ = K[detections.infos.batch_im_id.values]
        meshes = self.model.mesh_db.select(obj_infos)
        _, T_offset = meshes.center_meshes()
        t_O_CENTROID = T_offset[:, :3, -1]
        centered_meshes = Meshes(
            meshes.labels, transform_pts(invert_T(T_offset), meshes.points))
        centered_points = centered_meshes.sample_points(2000,
                                                        deterministic=True)
        T_C_CENTROID_init = TCO_init_from_boxes_zup_autodepth(
            boxes, centered_points, K_)
        TCO_init = T_C_CENTROID_init @ invert_T(T_offset)

        data = tc.PandasTensorCollection(
            infos=detections.infos,
            K=K_,
            poses=TCO_init,
            joints=self.urdf_layer.to_tensor(tensor_joints))
        return data
示例#4
0
def parse_obs_data(obs, parse_joints=False):
    data = defaultdict(list)
    frame_info = obs['frame_info']
    TWC = torch.as_tensor(obs['camera']['TWC']).float()
    for n, obj in enumerate(obs['objects']):
        info = dict(frame_obj_id=n,
                    label=obj['name'],
                    visib_fract=obj.get('visib_fract', 1),
                    scene_id=frame_info['scene_id'],
                    view_id=frame_info['view_id'])
        data['infos'].append(info)
        data['TWO'].append(obj['TWO'])
        data['bboxes'].append(obj['bbox'])
        data['keypoints_2d'].append(obj.get('keypoints_2d', []))
        data['TCO_keypoints_3d'].append(obj.get('TCO_keypoints_3d', []))
        data['points_3d'].append(obj.get('keypoints_2d', []))

    joints = None
    if parse_joints:
        objects = obs['objects']
        joint_names = list(objects[0]['joints'].keys())
        joints = torch.stack([
            torch.tensor([obj['joints'][k] for k in joint_names])
            for obj in obs['objects']
        ])

    for k, v in data.items():
        if k != 'infos':
            data[k] = torch.stack([torch.as_tensor(x).float() for x in v])

    data['infos'] = pd.DataFrame(data['infos'])
    TCO = invert_T(TWC).unsqueeze(0) @ data['TWO']

    data = tc.PandasTensorCollection(
        infos=data['infos'],
        TCO=TCO,
        bboxes=data['bboxes'],
        keypoints_2d=data['keypoints_2d'],
        TCO_keypoints_3d=data['TCO_keypoints_3d'],
        poses=TCO,
    )
    if parse_joints:
        data.register_tensor('joints', joints)
        data.infos['joint_names'] = [joint_names for _ in range(len(data))]
    return data
示例#5
0
def load_craves_results(ds_name):
    if 'youtube' in ds_name:
        results_dir = CRAVES_YOUTUBE_RESULTS_DIR
    else:
        results_dir = CRAVES_LAB_RESULTS_DIR

    results_json = Path(results_dir).glob('*.json')
    infos = []
    keypoints = []
    for result_json in results_json:
        result = json.loads(result_json.read_text())
        keypoints.append(torch.tensor(result['d2_key']))
        scene_id, view_id = parse_name(result_json.with_suffix('').name)
        infos.append(dict(scene_id=scene_id, view_id=view_id))
    infos = pd.DataFrame(infos)
    keypoints = torch.stack(keypoints)
    data = tc.PandasTensorCollection(infos, keypoints_2d=keypoints)
    return data
示例#6
0
def load_dream_result_id(result_id):
    dream_result_dir = LOCAL_DATA_DIR / 'dream_results' / result_id
    if not dream_result_dir.exists():
        logger.info(f'DREAM {result_id} not found ({dream_result_dir})')
        return None
    pnp_results = pd.read_csv(dream_result_dir / 'pnp_results.csv')
    x, y, z, qx, qy, qz, qw = [
        pnp_results.loc[:, f'pose_{k}'].values
        for k in ('x', 'y', 'z', 'qx', 'qy', 'qz', 'qw')
    ]
    pnp_poses = []
    pnp_results['scene_id'] = pnp_results['name']
    pnp_results['view_id'] = pnp_results['name']
    scale = 1 / 100 if 'synt' in result_id else 1.
    for n in range(len(x)):
        T = Transform(np.array([qx[n], qy[n], qz[n], qw[n]]),
                      np.array([x[n], y[n], z[n]]) * scale)
        pnp_poses.append(T.toHomogeneousMatrix())
    pnp_poses = torch.as_tensor(np.stack(pnp_poses))
    infos = pnp_results.loc[:, ['view_id', 'scene_id', 'pnp_success']]
    results = tc.PandasTensorCollection(infos=infos, pnp_poses=pnp_poses)
    return results
示例#7
0
def make_articulated_input_infos(rgb_uint8,
                                 robot_label,
                                 bbox=None,
                                 focal=1000,
                                 resize=(640, 480)):
    rgb_uint8 = np.asarray(rgb_uint8)
    h, w, _ = rgb_uint8.shape
    K = np.array([[focal, 0, w / 2], [0, focal, h / 2], [0, 0, 1]])
    camera = dict(K=K, T0C=np.eye(4), TWC=np.eye(4), resolution=(w, h))
    if bbox is None:
        margin = 0
        h, w, _ = np.array(rgb_uint8).shape
        keypoints_2d = np.array([[w * margin, h * margin],
                                 [w - w * margin, h - h * margin]])
        bbox = np.concatenate(
            [np.min(keypoints_2d, axis=0),
             np.max(keypoints_2d, axis=0)])
    mask = make_masks_from_det(np.array(bbox)[None], h, w).numpy().astype(
        np.uint8)[0] * 255
    robot = dict(joints=None,
                 name=robot_label,
                 id_in_segm=255,
                 bbox=bbox,
                 TWO=np.eye(4))
    state = dict(objects=[robot], camera=camera)
    augmentation = CropResizeToAspectAugmentation(resize=resize)
    rgb, mask, state = augmentation(rgb_uint8, mask, state)
    det_infos = [dict(label=robot_label, score=1.0, batch_im_id=0)]
    detections = tc.PandasTensorCollection(
        infos=pd.DataFrame(det_infos),
        bboxes=torch.as_tensor(
            state['objects'][0]['bbox']).float().cuda().unsqueeze(0),
    )

    images = torch.tensor(np.array(rgb)).cuda().float().unsqueeze(0).permute(
        0, 3, 1, 2) / 255
    K = torch.tensor(state['camera']['K']).float().cuda().unsqueeze(0)
    return images, K, detections
示例#8
0
    def collate_fn(self, batch):
        batch_im_id = -1
        cam_infos, K, TWC = [], [], []
        joints = defaultdict(list)
        orig_K, cropresize_bboxes, orig_wh = [], [], []
        det_infos, bboxes, poses_gt = [], [], []
        images = []
        for n, data in enumerate(batch):
            rgb, masks, obs = data
            batch_im_id += 1
            frame_info = obs['frame_info']
            im_info = {k: frame_info[k] for k in ('scene_id', 'view_id')}
            im_info.update(batch_im_id=batch_im_id)
            cam_info = im_info.copy()

            if 'orig_camera' in obs:
                orig_K_ = obs['orig_camera']['K']
                res = obs['orig_camera']['resolution']
                orig_wh_ = [max(res), min(res)]
                cropresize_bbox = obs['orig_camera']['crop_resize_bbox']
            else:
                orig_K_ = obs['camera']['K']
                orig_wh_ = [rgb.shape[1], rgb.shape[0]]
                cropresize_bbox = (0, 0, orig_wh[0] - 1, orig_wh[1] - 1)

            orig_K.append(torch.as_tensor(orig_K_).float())
            cropresize_bboxes.append(torch.as_tensor(cropresize_bbox))
            orig_wh.append(torch.as_tensor(orig_wh_))

            K.append(obs['camera']['K'])
            TWC.append(obs['camera']['TWC'])
            cam_infos.append(cam_info)
            images.append(rgb)

            for o, obj in enumerate(obs['objects']):
                obj_info = dict(
                    label=obj['name'],
                    score=1.0,
                )
                obj_info.update(im_info)
                h, w, _ = rgb.shape
                m = 1 / 5
                bbox = np.array([w * m, h * m, w - w * m, h - h * m])
                bboxes.append(bbox)
                det_infos.append(obj_info)
                assert 'joints' in obj
                for k, v in obj['joints'].items():
                    joints[k].append(torch.as_tensor(v).view(-1).float())

        detections = tc.PandasTensorCollection(
            infos=pd.DataFrame(det_infos),
            bboxes=torch.as_tensor(np.stack(bboxes)).float(),
        )
        cameras = tc.PandasTensorCollection(
            infos=pd.DataFrame(cam_infos),
            K=torch.as_tensor(np.stack(K)),
            orig_K=torch.as_tensor(np.stack(orig_K)),
            orig_wh=torch.as_tensor(np.stack(orig_wh)),
            cropresize_bboxes=torch.as_tensor(np.stack(cropresize_bboxes)),
            TWC=torch.as_tensor(np.stack(TWC)),
        )
        data = dict(
            images=torch.stack(images),
            cameras=cameras,
            detections=detections,
            joints=joints,
        )
        return data
示例#9
0
    def batched_model_predictions(self,
                                  images,
                                  K,
                                  obj_data,
                                  n_iterations=1,
                                  update_obj_infos=True):
        timer = Timer()
        timer.start()

        ids = torch.arange(len(obj_data))

        ds = TensorDataset(ids)
        dl = DataLoader(ds, batch_size=self.bsz_objects)
        preds = defaultdict(list)
        for (batch_ids, ) in dl:
            obj_inputs = obj_data[batch_ids.numpy()]
            im_ids = obj_inputs.infos.batch_im_id.values
            images_ = images[im_ids]
            K_ = K[im_ids]
            TCO_input, obj_infos_input = data_to_pose_model_inputs(obj_inputs)

            outputs = self.model(images=images_,
                                 K=K_,
                                 TCO=TCO_input,
                                 obj_infos=obj_infos_input,
                                 n_iterations=n_iterations,
                                 update_obj_infos=update_obj_infos,
                                 deterministic=True)

            for n in range(1, n_iterations + 1):
                iter_outputs = outputs[f'iteration={n}']
                bsz = len(images_)
                obj_outputs = iter_outputs['obj_infos_output']
                obj_inputs_ = iter_outputs['obj_infos_input']

                q_input = {
                    k: torch.cat(
                        [obj_inputs_[n]['joints'][k] for n in range(bsz)],
                        dim=0)
                    for k in obj_inputs_[0]['joints'].keys()
                }
                q_pred = {
                    k: torch.cat(
                        [obj_outputs[n]['joints'][k] for n in range(bsz)],
                        dim=0)
                    for k in obj_outputs[0]['joints'].keys()
                }
                q_pred = self.urdf_layer.to_tensor(q_pred)

                infos = obj_inputs.infos

                data = tc.PandasTensorCollection(
                    infos,
                    poses=iter_outputs['TCO_output'],
                    K=iter_outputs['K_input'],
                    joints=q_pred,
                    K_crop=iter_outputs['K_crop'])
                preds[f'iteration={n}'].append(data)

        for k, v in preds.items():
            preds[k] = tc.concatenate(v)
        logger.debug(
            f'Pose prediction on {len(obj_data)} detections (n_iterations={n_iterations}) (joint_update={update_obj_infos}): {timer.stop()}'
        )
        return preds