Exemplo n.º 1
0
    def test_rotate_on_spot_yaw(self):
        N = 14
        eye = torch.rand(N, 3)
        at = torch.rand(N, 3)
        up = torch.rand(N, 3)

        R, T = look_at_view_transform(eye=eye, at=at, up=up)

        # Moving around the y axis looks left.
        angles = torch.FloatTensor([0, -radians(10), 0])
        rotation = axis_angle_to_matrix(angles)
        R_rot, T_rot = rotate_on_spot(R, T, rotation)

        eye_rot, at_rot, up_rot = camera_to_eye_at_up(
            get_world_to_view_transform(R=R_rot, T=T_rot))
        self.assertClose(eye, eye_rot, atol=1e-5)

        # Make vectors pointing exactly left and up
        left = torch.cross(up, at - eye, dim=-1)
        left_rot = torch.cross(up_rot, at_rot - eye_rot, dim=-1)
        fully_up = torch.cross(at - eye, left, dim=-1)
        fully_up_rot = torch.cross(at_rot - eye_rot, left_rot, dim=-1)

        # The up direction is unchanged
        self.assertClose(normalize(fully_up),
                         normalize(fully_up_rot),
                         atol=1e-5)

        # The camera has moved left
        agree = _batched_dotprod(torch.cross(left, left_rot, dim=1), fully_up)
        self.assertGreater(agree.min(), 0)

        # Batch dimension for rotation
        R_rot2, T_rot2 = rotate_on_spot(R, T, rotation.expand(N, 3, 3))
        self.assertClose(R_rot, R_rot2)
        self.assertClose(T_rot, T_rot2)

        # No batch dimension for either
        R_rot3, T_rot3 = rotate_on_spot(R[0], T[0], rotation)
        self.assertClose(R_rot[:1], R_rot3)
        self.assertClose(T_rot[:1], T_rot3)

        # No batch dimension for R, T
        R_rot4, T_rot4 = rotate_on_spot(R[0], T[0], rotation.expand(N, 3, 3))
        self.assertClose(R_rot[:1].expand(N, 3, 3), R_rot4)
        self.assertClose(T_rot[:1].expand(N, 3), T_rot4)
Exemplo n.º 2
0
    def test_rotate_on_spot_roll(self):
        N = 14
        eye = torch.rand(N, 3)
        at = torch.rand(N, 3)
        up = torch.rand(N, 3)

        R, T = look_at_view_transform(eye=eye, at=at, up=up)

        # Moving around the z axis rotates the image.
        angles = torch.FloatTensor([0, 0, -radians(10)])
        rotation = axis_angle_to_matrix(angles)
        R_rot, T_rot = rotate_on_spot(R, T, rotation)
        eye_rot, at_rot, up_rot = camera_to_eye_at_up(
            get_world_to_view_transform(R=R_rot, T=T_rot))
        self.assertClose(eye, eye_rot, atol=1e-5)
        self.assertClose(normalize(at - eye),
                         normalize(at_rot - eye),
                         atol=1e-5)

        # The camera has moved clockwise
        agree = _batched_dotprod(torch.cross(up, up_rot, dim=1), at - eye)
        self.assertGreater(agree.min(), 0)
Exemplo n.º 3
0
    def _transform_cam_params(
        cam_params: torch.Tensor,
        width: int,
        height: int,
        orthogonal: bool,
        right_handed: bool,
        first_R_then_T: bool = False,
    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor,
               torch.Tensor, torch.Tensor, ]:
        """
        Transform 8 component camera parameter vector(s) to the internal camera
        representation.

        The input vectors consists of:
            * 3 components for camera position,
            * 3 components for camera rotation (three rotation angles) or
              6 components as described in "On the Continuity of Rotation
              Representations in Neural Networks" (Zhou et al.),
            * focal length,
            * the sensor width in world coordinates,
            * [optional] the principal point offset in x and y.

        The sensor height is inferred by pixel size and sensor width to obtain
        quadratic pixels.

        Args:
            * cam_params: [Bx]{8, 10, 11, 13}, input tensors as described above.
            * width: number of pixels in x direction.
            * height: number of pixels in y direction.
            * orthogonal: bool, whether an orthogonal projection is used
                  (does not use focal length).
            * right_handed: bool, whether to use a right handed system
                  (negative z in camera direction).
            * first_R_then_T: bool, whether to first rotate, then translate
                  the camera (PyTorch3D convention).

        Returns:
            * pos_vec: the position vector in 3D,
            * pixel_0_0_center: the center of the upper left pixel in world coordinates,
            * pixel_vec_x: the step to move one pixel on the image x axis
                   in world coordinates,
            * pixel_vec_y: the step to move one pixel on the image y axis
                   in world coordinates,
            * focal_length: the focal lengths,
            * principal_point_offsets: the principal point offsets in x, y.
        """
        global AXANGLE_WARNING_EMITTED
        # Set up all direction vectors, i.e., the sensor direction of all axes.
        assert width > 0
        assert height > 0
        batch_processing = True
        if cam_params.ndimension() == 1:
            batch_processing = False
            cam_params = cam_params[None, :]
        batch_size = cam_params.size(0)
        continuous_rep = True
        if cam_params.shape[1] in [8, 10]:
            if cam_params.requires_grad and not AXANGLE_WARNING_EMITTED:
                warnings.warn(
                    "Using an axis angle representation for camera rotations. "
                    "This has discontinuities and should not be used for optimization. "
                    "Alternatively, use a six-component representation as described in "
                    "'On the Continuity of Rotation Representations in Neural Networks'"
                    " (Zhou et al.). "
                    "The `pytorch3d.transforms` module provides "
                    "facilities for using this representation.")
                AXANGLE_WARNING_EMITTED = True
            continuous_rep = False
        else:
            assert cam_params.shape[1] in [11, 13]
        pos_vec: torch.Tensor = cam_params[:, :3]
        principal_point_offsets: torch.Tensor = torch.zeros(
            (cam_params.shape[0], 2),
            dtype=torch.int32,
            device=cam_params.device)
        if continuous_rep:
            rot_vec = cam_params[:, 3:9]
            focal_length: torch.Tensor = cam_params[:, 9:10]
            sensor_size_x = cam_params[:, 10:11]
            if cam_params.shape[1] == 13:
                principal_point_offsets: torch.Tensor = cam_params[:,
                                                                   11:13].to(
                                                                       torch.
                                                                       int32)
        else:
            rot_vec = cam_params[:, 3:6]
            focal_length: torch.Tensor = cam_params[:, 6:7]
            sensor_size_x = cam_params[:, 7:8]
            if cam_params.shape[1] == 10:
                principal_point_offsets: torch.Tensor = cam_params[:, 8:10].to(
                    torch.int32)
        # Always get quadratic pixels.
        pixel_size_x = sensor_size_x / float(width)
        sensor_size_y = height * pixel_size_x
        if continuous_rep:
            rot_mat = rotation_6d_to_matrix(rot_vec)
        else:
            rot_mat = axis_angle_to_matrix(rot_vec)
        if first_R_then_T:
            pos_vec = torch.matmul(rot_mat, pos_vec[..., None])[:, :, 0]
        LOGGER.debug(
            "Camera position: %s, rotation: %s. Focal length: %s.",
            str(pos_vec),
            str(rot_vec),
            str(focal_length),
        )
        sensor_dir_x = torch.matmul(
            rot_mat,
            torch.tensor([1.0, 0.0, 0.0],
                         dtype=torch.float32,
                         device=rot_mat.device).repeat(batch_size, 1)[:, :,
                                                                      None],
        )[:, :, 0]
        sensor_dir_y = torch.matmul(
            rot_mat,
            torch.tensor([0.0, -1.0, 0.0],
                         dtype=torch.float32,
                         device=rot_mat.device).repeat(batch_size, 1)[:, :,
                                                                      None],
        )[:, :, 0]
        sensor_dir_z = torch.matmul(
            rot_mat,
            torch.tensor([0.0, 0.0, 1.0],
                         dtype=torch.float32,
                         device=rot_mat.device).repeat(batch_size, 1)[:, :,
                                                                      None],
        )[:, :, 0]
        if right_handed:
            sensor_dir_z *= -1
        LOGGER.debug(
            "Sensor direction vectors: %s, %s, %s.",
            str(sensor_dir_x),
            str(sensor_dir_y),
            str(sensor_dir_z),
        )
        if orthogonal:
            sensor_center = pos_vec
        else:
            sensor_center = pos_vec + focal_length * sensor_dir_z
        LOGGER.debug("Sensor center: %s.", str(sensor_center))
        sensor_luc = (  # Sensor left upper corner.
            sensor_center - sensor_dir_x * (sensor_size_x / 2.0) -
            sensor_dir_y * (sensor_size_y / 2.0))
        LOGGER.debug("Sensor luc: %s.", str(sensor_luc))
        pixel_size_x = sensor_size_x / float(width)
        pixel_size_y = sensor_size_y / float(height)
        LOGGER.debug("Pixel sizes (x): %s, (y) %s.", str(pixel_size_x),
                     str(pixel_size_y))
        pixel_vec_x: torch.Tensor = sensor_dir_x * pixel_size_x
        pixel_vec_y: torch.Tensor = sensor_dir_y * pixel_size_y
        pixel_0_0_center = sensor_luc + 0.5 * pixel_vec_x + 0.5 * pixel_vec_y
        LOGGER.debug(
            "Pixel 0 centers: %s, vec x: %s, vec y: %s.",
            str(pixel_0_0_center),
            str(pixel_vec_x),
            str(pixel_vec_y),
        )
        if not orthogonal:
            LOGGER.debug(
                "Camera horizontal fovs: %s deg.",
                str(2.0 * torch.atan(0.5 * sensor_size_x / focal_length) /
                    math.pi * 180.0),
            )
            LOGGER.debug(
                "Camera vertical fovs: %s deg.",
                str(2.0 * torch.atan(0.5 * sensor_size_y / focal_length) /
                    math.pi * 180.0),
            )
        # Reduce dimension.
        focal_length: torch.Tensor = focal_length[:, 0]
        if batch_processing:
            return (
                pos_vec,
                pixel_0_0_center,
                pixel_vec_x,
                pixel_vec_y,
                focal_length,
                principal_point_offsets,
            )
        else:
            return (
                pos_vec[0],
                pixel_0_0_center[0],
                pixel_vec_x[0],
                pixel_vec_y[0],
                focal_length[0],
                principal_point_offsets[0],
            )
 def __init__(self):
     super(SceneModel, self).__init__()
     self.gamma = 0.1
     # Points.
     torch.manual_seed(1)
     vert_pos = torch.rand(N_POINTS, 3, dtype=torch.float32) * 10.0
     vert_pos[:, 2] += 25.0
     vert_pos[:, :2] -= 5.0
     self.register_parameter("vert_pos",
                             nn.Parameter(vert_pos, requires_grad=False))
     self.register_parameter(
         "vert_col",
         nn.Parameter(
             torch.rand(N_POINTS, 3, dtype=torch.float32),
             requires_grad=False,
         ),
     )
     self.register_parameter(
         "vert_rad",
         nn.Parameter(
             torch.rand(N_POINTS, dtype=torch.float32),
             requires_grad=False,
         ),
     )
     self.register_parameter(
         "cam_pos",
         nn.Parameter(
             torch.tensor([0.1, 0.1, 0.0], dtype=torch.float32),
             requires_grad=True,
         ),
     )
     self.register_parameter(
         "cam_rot",
         # We're using the 6D rot. representation for better gradients.
         nn.Parameter(
             axis_angle_to_matrix(
                 torch.tensor(
                     [
                         [0.02, 0.02, 0.01],
                     ],
                     dtype=torch.float32,
                 ))[0],
             requires_grad=True,
         ),
     )
     self.register_parameter(
         "focal_length",
         nn.Parameter(
             torch.tensor(
                 [
                     4.8 * 2.0 / 2.0,
                 ],
                 dtype=torch.float32,
             ),
             requires_grad=True,
         ),
     )
     self.cameras = PerspectiveCameras(
         # The focal length must be double the size for PyTorch3D because of the NDC
         # coordinates spanning a range of two - and they must be normalized by the
         # sensor width (see the pulsar example). This means we need here
         # 5.0 * 2.0 / 2.0 to get the equivalent results as in pulsar.
         #
         # R, T and f are provided here, but will be provided again
         # at every call to the forward method. The reason are problems
         # with PyTorch which makes device placement for gradients problematic
         # for tensors which are themselves on a 'gradient path' but not
         # leafs in the calculation tree. This will be addressed by an architectural
         # change in PyTorch3D in the future. Until then, this workaround is
         # recommended.
         focal_length=self.focal_length,
         R=self.cam_rot[None, ...],
         T=self.cam_pos[None, ...],
         image_size=((HEIGHT, WIDTH), ),
         device=DEVICE,
     )
     raster_settings = PointsRasterizationSettings(
         image_size=(HEIGHT, WIDTH),
         radius=self.vert_rad,
     )
     rasterizer = PointsRasterizer(cameras=self.cameras,
                                   raster_settings=raster_settings)
     self.renderer = PulsarPointsRenderer(rasterizer=rasterizer)