Exemplo n.º 1
0
class Camera:
    def __init__(self, resolution=(320, 240), near=0.01, far=10, client_id=0):
        assert client_id >= 0, 'Please provide a client id (0 by default)'
        h, w = min(resolution), max(resolution)

        self._client_id = client_id
        self._near = near
        self._far = far
        self._shape = (h, w)
        self._rgba = np.zeros(self._shape + (4, ), dtype=np.uint8)
        self._mask = np.zeros(self._shape, dtype=np.uint8)
        self._depth = np.zeros(self._shape, dtype=np.float32)
        self._render_options = dict()
        self._render_flags = 0

        self.mask_link_index(True)
        self.casts_shadow(True)

        # Transform between standard camera coordinate (z forward) and OPENGL camera coordinate system
        wxyz = transforms3d.euler.euler2quat(np.pi / 2, 0, 0, axes='rxyz')
        xyzw = [*wxyz[1:], wxyz[0]]
        self.TCCGL = Transform(xyzw, (0, 0, 0))

        # Set some default parameters
        self.set_extrinsic_bullet(target=(0, 0, 0),
                                  distance=1.6,
                                  yaw=90,
                                  pitch=-35,
                                  roll=0)
        self.set_intrinsic_fov(90)

    def set_extrinsic_bullet(self, target, distance, yaw, pitch, roll):
        """
        Angles in *degrees*.
        """
        up = 'z'
        self._view_params = dict(yaw=yaw,
                                 pitch=pitch,
                                 roll=roll,
                                 target=target,
                                 distance=distance)
        self._view_mat = pb.computeViewMatrixFromYawPitchRoll(
            target, distance, yaw, pitch, roll, 'xyz'.index(up))

    def set_extrinsic_T(self, TWC):
        TWC = Transform(TWC)
        TWCGL = TWC * self.TCCGL
        xyzw = TWCGL.quaternion.coeffs()
        wxyz = [xyzw[-1], *xyzw[:-1]]
        pitch, roll, yaw = transforms3d.euler.quat2euler(wxyz, axes='sxyz')
        yaw = yaw * 180 / np.pi
        pitch = pitch * 180 / np.pi
        roll = roll * 180 / np.pi
        yaw = (yaw % 360 + 360) % 360
        distance = 0.0001
        self.set_extrinsic_bullet(target=TWCGL.translation,
                                  distance=distance,
                                  pitch=pitch,
                                  roll=roll,
                                  yaw=yaw)

    def set_extrinsic_spherical(self,
                                target=(0, 0, 0),
                                rho=0.6,
                                theta=np.pi / 4,
                                phi=0,
                                roll=0):
        """
        Angles in *radians*.
        https://fr.wikipedia.org/wiki/Coordonn%C3%A9es_sph%C3%A9riques#/media/Fichier:Spherical_Coordinates_(Colatitude,_Longitude)_(b).svg
        """
        x = rho * np.sin(theta) * np.cos(phi)
        y = rho * np.sin(theta) * np.sin(phi)
        z = rho * np.cos(theta)
        t = np.array([x, y, z])
        R = transforms3d.euler.euler2mat(np.pi, theta, phi, axes='sxyz')
        R = R @ transforms3d.euler.euler2mat(
            0, 0, -np.pi / 2 + roll, axes='sxyz')
        t += np.array(target)
        TWC = Transform(R, t)
        self.set_extrinsic_T(TWC)

    def set_intrinsic_K(self, K):
        h, w = self._shape
        proj_mat = proj_from_K(K, near=self._near, far=self._far, h=h,
                               w=w).flatten()
        assert np.allclose(proj_mat[11], -1)
        self._proj_mat = proj_mat
        self._K = K
        self._proj_params = None

    def set_intrinsic_fov(self, fov):
        h, w = self._shape
        self._proj_params = dict(fov=fov)
        self._proj_mat = pb.computeProjectionMatrixFOV(fov=fov,
                                                       aspect=w / h,
                                                       nearVal=self._near,
                                                       farVal=self._far)
        self._K = None

    def set_intrinsic_f(self, *args):
        if len(args) == 2:
            fx, fy = args
            raise NotImplementedError
        else:
            assert len(args) == 1
            fy = args[0]
        h, w = self._shape
        fov_y = np.arctan(h * 0.5 / fy) * 180 / np.pi
        fov = fov_y * 2
        self.set_intrinsic_fov(fov)

    def get_state(self):
        obs = dict()
        # Get images
        rgba, mask, depth = self._shot()
        rgb = rgba[..., :3]
        obs.update(rgb=rgb, mask=mask, depth=depth)

        # Get intrinsic, extrinsic parameters with standard conventions.
        h, w = self._shape
        if self._K is not None:
            K = self._K
        else:
            K = K_from_fov(self._proj_params['fov'])

        trans = self._view_params['target']
        orn = euler2quat([
            self._view_params[k] * np.pi / 180
            for k in ('pitch', 'roll', 'yaw')
        ],
                         axes='sxyz')
        TWCGL = Transform(orn, trans)
        TWC = TWCGL * self.TCCGL.inverse()
        obs.update(TWC=TWC.toHomogeneousMatrix(),
                   K=K,
                   resolution=(self._shape[1], self._shape[0]),
                   proj_mat=self._proj_mat,
                   near=self._near,
                   far=self._far)
        return obs

    def _shot(self):
        """ Computes a RGB image, a depth buffer and a segmentation mask buffer
        with body unique ids of visible objects for each pixel.
        """
        h, w = self._shape
        renderer = pb.ER_BULLET_HARDWARE_OPENGL

        w, h, rgba, depth, mask = pb.getCameraImage(
            width=w,
            height=h,
            projectionMatrix=self._proj_mat,
            viewMatrix=self._view_mat,
            renderer=renderer,
            flags=self._render_flags,
            **self._render_options,
            physicsClientId=self._client_id)

        rgba = np.asarray(rgba, dtype=np.uint8).reshape((h, w, 4))
        depth = np.asarray(depth, dtype=np.float32).reshape((h, w))
        mask = np.asarray(mask, dtype=np.uint8).reshape((h, w))
        return rgba, mask, depth

    def _project(self, fov, near, far):
        """ Apply camera projection matrix.
            Args:
             fov (float): Field of view.
             near float): Near plane distance.
             far (float): Far plane distance.
        """
        self.near = near
        self.far = far
        h, w = self._shape
        self._proj_mat = pb.computeProjectionMatrixFOV(fov=fov,
                                                       aspect=w / h,
                                                       nearVal=near,
                                                       farVal=far)

    def mask_link_index(self, flag):
        """ If is enabled, the mask combines the object unique id and link index
        as follows: value = objectUniqueId + ((linkIndex+1)<<24).
        """
        if flag:
            self._render_flags |= pb.ER_SEGMENTATION_MASK_OBJECT_AND_LINKINDEX
        else:
            self._render_flags &= ~pb.ER_SEGMENTATION_MASK_OBJECT_AND_LINKINDEX

    def casts_shadow(self, flag):
        """ 1 for shadows, 0 for no shadows. """
        self._render_options['shadow'] = 1 if flag else 0
Exemplo n.º 2
0
    def __getitem__(self, frame_id):
        row = self.frame_index.iloc[frame_id]
        scene_id, view_id = row.scene_id, row.view_id
        view_id = int(view_id)
        view_id_str = f'{view_id:06d}'
        scene_id_str = f'{int(scene_id):06d}'
        scene_dir = self.base_dir / scene_id_str

        rgb_dir = scene_dir / 'rgb'
        if not rgb_dir.exists():
            rgb_dir = scene_dir / 'gray'
        rgb_path = rgb_dir / f'{view_id_str}.png'
        if not rgb_path.exists():
            rgb_path = rgb_path.with_suffix('.jpg')
        if not rgb_path.exists():
            rgb_path = rgb_path.with_suffix('.tif')

        rgb = np.array(Image.open(rgb_path))
        if rgb.ndim == 2:
            rgb = np.repeat(rgb[..., None], 3, axis=-1)
        rgb = rgb[..., :3]
        h, w = rgb.shape[:2]
        rgb = torch.as_tensor(rgb)

        cam_annotation = self.annotations[scene_id_str]['scene_camera'][str(
            view_id)]
        if 'cam_R_w2c' in cam_annotation:
            RC0 = np.array(cam_annotation['cam_R_w2c']).reshape(3, 3)
            tC0 = np.array(cam_annotation['cam_t_w2c']) * 0.001
            TC0 = Transform(RC0, tC0)
        else:
            TC0 = Transform(np.eye(3), np.zeros(3))
        K = np.array(cam_annotation['cam_K']).reshape(3, 3)
        T0C = TC0.inverse()
        T0C = T0C.toHomogeneousMatrix()
        camera = dict(T0C=T0C, K=K, TWC=T0C, resolution=rgb.shape[:2])

        T0C = TC0.inverse()

        objects = []
        mask = np.zeros((h, w), dtype=np.uint8)
        if 'scene_gt_info' in self.annotations[scene_id_str]:
            annotation = self.annotations[scene_id_str]['scene_gt'][str(
                view_id)]
            n_objects = len(annotation)
            visib = self.annotations[scene_id_str]['scene_gt_info'][str(
                view_id)]
            for n in range(n_objects):
                RCO = np.array(annotation[n]['cam_R_m2c']).reshape(3, 3)
                tCO = np.array(annotation[n]['cam_t_m2c']) * 0.001
                TCO = Transform(RCO, tCO)
                T0O = T0C * TCO
                T0O = T0O.toHomogeneousMatrix()
                obj_id = annotation[n]['obj_id']
                name = f'obj_{int(obj_id):06d}'
                bbox_visib = np.array(visib[n]['bbox_visib'])
                x, y, w, h = bbox_visib
                x1 = x
                y1 = y
                x2 = x + w
                y2 = y + h
                obj = dict(label=name,
                           name=name,
                           TWO=T0O,
                           T0O=T0O,
                           visib_fract=visib[n]['visib_fract'],
                           id_in_segm=n + 1,
                           bbox=[x1, y1, x2, y2])
                objects.append(obj)

            mask_path = scene_dir / 'mask_visib' / f'{view_id_str}_all.png'
            if mask_path.exists():
                mask = np.array(Image.open(mask_path))
            else:
                for n in range(n_objects):
                    mask_n = np.array(
                        Image.open(scene_dir / 'mask_visib' /
                                   f'{view_id_str}_{n:06d}.png'))
                    mask[mask_n == 255] = n + 1

        mask = torch.as_tensor(mask)

        if self.load_depth:
            depth_path = scene_dir / 'depth' / f'{view_id_str}.png'
            if not depth_path.exists():
                depth_path = depth_path.with_suffix('.tif')
            depth = np.array(inout.load_depth(depth_path))
            camera['depth'] = depth * cam_annotation['depth_scale'] / 1000

        obs = dict(
            objects=objects,
            camera=camera,
            frame_info=row.to_dict(),
        )
        return rgb, mask, obs