Exemplo n.º 1
0
def _generate_vertices_and_view_matrices():
    camera_origin = ((0.0, 0.0, 0.0), (0.0, 0.0, 0.0))
    camera_up = ((0.0, 1.0, 0.0), (0.0, 1.0, 0.0))
    look_at_point = ((0.0, 0.0, 1.0), (0.0, 0.0, -1.0))
    field_of_view = ((60 * np.math.pi / 180, ), (60 * np.math.pi / 180, ))
    near_plane = ((0.01, ), (0.01, ))
    far_plane = ((400.0, ), (400.0, ))
    aspect_ratio = ((float(_IMAGE_WIDTH) / float(_IMAGE_HEIGHT), ),
                    (float(_IMAGE_WIDTH) / float(_IMAGE_HEIGHT), ))
    # Construct the view projection matrix.
    world_to_camera = look_at.right_handed(camera_origin, look_at_point,
                                           camera_up)
    perspective_matrix = perspective.right_handed(field_of_view, aspect_ratio,
                                                  near_plane, far_plane)
    # Shape [2, 4, 4]
    view_projection_matrix = tf.linalg.matmul(perspective_matrix,
                                              world_to_camera)
    depth = 1.0
    # Shape [2, 3, 3]
    vertices = (((-10.0 * _TRIANGLE_SIZE, 10.0 * _TRIANGLE_SIZE, depth),
                 (10.0 * _TRIANGLE_SIZE, 10.0 * _TRIANGLE_SIZE,
                  depth), (0.0, -10.0 * _TRIANGLE_SIZE, depth)),
                ((-_TRIANGLE_SIZE, 0.0, depth), (0.0, _TRIANGLE_SIZE, depth),
                 (0.0, 0.0, depth)))
    return vertices, view_projection_matrix
Exemplo n.º 2
0
  def test_perspective_correct_barycentrics_jacobian_random(self):
    """Tests the Jacobian of perspective_correct_barycentrics."""
    tensor_size = np.random.randint(1, 3)
    tensor_shape = np.random.randint(1, 5, size=(tensor_size)).tolist()
    vertices_init = np.random.uniform(size=tensor_shape + [3, 3])
    pixel_position_init = np.random.uniform(size=tensor_shape + [2])
    camera_position_init = np.random.uniform(size=tensor_shape + [3, 3])
    look_at_init = np.random.uniform(size=tensor_shape + [3, 3])
    up_vector_init = np.random.uniform(size=tensor_shape + [3, 3])
    vertical_field_of_view_init = np.random.uniform(
        0.1, 1.0, size=tensor_shape + [3, 1])
    screen_dimensions_init = np.random.uniform(
        1.0, 10.0, size=tensor_shape + [3, 2])
    near_init = np.random.uniform(1.0, 10.0, size=tensor_shape + [3, 1])
    far_init = near_init + np.random.uniform(
        0.1, 1.0, size=tensor_shape + [3, 1])
    lower_left_corner_init = np.random.uniform(size=tensor_shape + [3, 2])

    # Build matrices.
    model_to_eye_matrix_init = look_at.right_handed(camera_position_init,
                                                    look_at_init,
                                                    up_vector_init)
    perspective_matrix_init = perspective.right_handed(
        vertical_field_of_view_init,
        screen_dimensions_init[..., 0:1] / screen_dimensions_init[..., 1:2],
        near_init, far_init)

    self.assert_jacobian_is_correct_fn(
        glm.perspective_correct_barycentrics, [
            vertices_init, pixel_position_init, model_to_eye_matrix_init,
            perspective_matrix_init, screen_dimensions_init,
            lower_left_corner_init
        ],
        atol=1e-4)
Exemplo n.º 3
0
  def test_model_to_screen_jacobian_random(self):
    """Tests the Jacobian of model_to_screen."""
    tensor_size = np.random.randint(1, 3)
    tensor_shape = np.random.randint(1, 5, size=(tensor_size)).tolist()
    point_world_space_init = np.random.uniform(size=tensor_shape + [3])
    camera_position_init = np.random.uniform(size=tensor_shape + [3])
    camera_up_init = np.random.uniform(size=tensor_shape + [3])
    look_at_init = np.random.uniform(size=tensor_shape + [3])
    vertical_field_of_view_init = np.random.uniform(
        0.1, 1.0, size=tensor_shape + [1])
    lower_left_corner_init = np.random.uniform(size=tensor_shape + [2])
    screen_dimensions_init = np.random.uniform(
        0.1, 1.0, size=tensor_shape + [2])
    near_init = np.random.uniform(0.1, 1.0, size=tensor_shape + [1])
    far_init = near_init + np.random.uniform(0.1, 1.0, size=tensor_shape + [1])

    # Build matrices.
    model_to_eye_matrix = look_at.right_handed(camera_position_init,
                                               look_at_init, camera_up_init)
    perspective_matrix = perspective.right_handed(
        vertical_field_of_view_init,
        screen_dimensions_init[..., 0:1] / screen_dimensions_init[..., 1:2],
        near_init, far_init)

    args = [
        point_world_space_init, model_to_eye_matrix, perspective_matrix,
        screen_dimensions_init, lower_left_corner_init
    ]

    with self.subTest(name="jacobian_y_projection"):
      self.assert_jacobian_is_correct_fn(
          lambda *args: glm.model_to_screen(*args)[0], args, atol=1e-4)
Exemplo n.º 4
0
  def test_perspective_correct_interpolation_jacobian_preset(self):
    """Tests the Jacobian of perspective_correct_interpolation."""
    vertices_init = np.tile(
        ((-0.2857143, 0.2857143, 5.0), (0.2857143, 0.2857143, 0.5),
         (0.0, -0.2857143, 1.0)), (2, 1, 1))
    attributes_init = np.tile(
        (((1.0, 0.0, 0.0), (0.0, 1.0, 0.0), (0.0, 0.0, 1.0))), (2, 1, 1))
    pixel_position_init = np.array(((125.5, 375.5), (250.5, 250.5)))
    camera_position_init = np.tile((0.0, 0.0, 0.0), (2, 3, 1))
    look_at_init = np.tile((0.0, 0.0, 1.0), (2, 3, 1))
    up_vector_init = np.tile((0.0, 1.0, 0.0), (2, 3, 1))
    vertical_field_of_view_init = np.tile((1.0471975511965976,), (2, 3, 1))
    screen_dimensions_init = np.tile((501.0, 501.0), (2, 3, 1))
    near_init = np.tile((0.01,), (2, 3, 1))
    far_init = np.tile((10.0,), (2, 3, 1))
    lower_left_corner_init = np.tile((0.0, 0.0), (2, 3, 1))

    # Build matrices.
    model_to_eye_matrix_init = look_at.right_handed(camera_position_init,
                                                    look_at_init,
                                                    up_vector_init)
    perspective_matrix_init = perspective.right_handed(
        vertical_field_of_view_init,
        screen_dimensions_init[..., 0:1] / screen_dimensions_init[..., 1:2],
        near_init, far_init)

    self.assert_jacobian_is_correct_fn(glm.perspective_correct_interpolation, [
        vertices_init, attributes_init, pixel_position_init,
        model_to_eye_matrix_init, perspective_matrix_init,
        screen_dimensions_init, lower_left_corner_init
    ])
Exemplo n.º 5
0
  def test_model_to_screen_jacobian_preset(self):
    """Tests the Jacobian of model_to_screen."""
    point_world_space_init = np.array(((3.1, 4.1, 5.1), (-1.1, 2.2, -3.1)))
    camera_position_init = np.array(((0.0, 0.0, 0.0), (0.4, -0.8, 0.1)))
    camera_up_init = np.array(((0.0, 1.0, 0.0), (0.0, 0.0, 1.0)))
    look_at_init = np.array(((0.0, 0.0, 1.0), (0.0, 1.0, 0.0)))
    vertical_field_of_view_init = np.array(
        ((60.0 * math.pi / 180.0,), (65 * math.pi / 180,)))
    lower_left_corner_init = np.array(((0.0, 0.0), (10.0, 20.0)))
    screen_dimensions_init = np.array(((501.0, 501.0), (400.0, 600.0)))
    near_init = np.array(((0.01,), (1.0,)))
    far_init = np.array(((4.0,), (3.0,)))

    # Build matrices.
    model_to_eye_matrix = look_at.right_handed(camera_position_init,
                                               look_at_init, camera_up_init)
    perspective_matrix = perspective.right_handed(
        vertical_field_of_view_init,
        screen_dimensions_init[..., 0:1] / screen_dimensions_init[..., 1:2],
        near_init, far_init)

    args = [
        point_world_space_init, model_to_eye_matrix, perspective_matrix,
        screen_dimensions_init, lower_left_corner_init
    ]

    with self.subTest(name="jacobian_y_projection"):
      self.assert_jacobian_is_correct_fn(
          lambda *args: glm.model_to_screen(*args)[0], args, atol=1e-4)
Exemplo n.º 6
0
  def test_model_to_screen_preset(self):
    """Tests that model_to_screen generates expected results."""
    point_world_space = np.array(((3.1, 4.1, 5.1), (-1.1, 2.2, -3.1)))
    camera_position = np.array(((0.0, 0.0, 0.0), (0.4, -0.8, 0.1)))
    camera_up = np.array(((0.0, 1.0, 0.0), (0.0, 0.0, 1.0)))
    look_at_point = np.array(((0.0, 0.0, 1.0), (0.0, 1.0, 0.0)))
    vertical_field_of_view = np.array(
        ((60.0 * math.pi / 180.0,), (65 * math.pi / 180,)))
    lower_left_corner = np.array(((0.0, 0.0), (10.0, 20.0)))
    screen_dimensions = np.array(((501.0, 501.0), (400.0, 600.0)))
    near = np.array(((0.01,), (1.0,)))
    far = np.array(((4.0,), (3.0,)))

    # Build matrices.
    model_to_eye_matrix = look_at.right_handed(camera_position, look_at_point,
                                               camera_up)
    perspective_matrix = perspective.right_handed(
        vertical_field_of_view,
        screen_dimensions[..., 0:1] / screen_dimensions[..., 1:2], near, far)

    pred_screen, pred_w = glm.model_to_screen(point_world_space,
                                              model_to_eye_matrix,
                                              perspective_matrix,
                                              screen_dimensions,
                                              lower_left_corner)

    gt_screen = ((-13.23016357, 599.30444336, 4.00215721),
                 (98.07017517, -95.40383911, 3.1234405))
    gt_w = ((5.1,), (3.42247,))
    self.assertAllClose(pred_screen, gt_screen, atol=1e-5, rtol=1e-5)
    self.assertAllClose(pred_w, gt_w)
Exemplo n.º 7
0
def model_to_eye(point_model_space,
                 camera_position,
                 look_at_point,
                 up_vector,
                 name=None):
    """Transforms points from model to eye coordinates.

  Note:
    In the following, A1 to An are optional batch dimensions which must be
    broadcast compatible.

  Args:
    point_model_space: A tensor of shape `[A1, ..., An, 3]`, where the last
      dimension represents the 3D points in model space.
    camera_position: A tensor of shape `[A1, ..., An, 3]`, where the last
      dimension represents the 3D position of the camera.
    look_at_point: A tensor of shape `[A1, ..., An, 3]`, with the last dimension
      storing the position where the camera is looking at.
    up_vector: A tensor of shape `[A1, ..., An, 3]`, where the last dimension
      defines the up vector of the camera.
    name: A name for this op. Defaults to 'model_to_eye'.

  Raises:
    ValueError: if the all the inputs are not of the same shape, or if any input
    of of an unsupported shape.

  Returns:
    A tensor of shape `[A1, ..., An, 3]`, containing `point_model_space` in eye
    coordinates.
  """
    with tf.compat.v1.name_scope(
            name, "model_to_eye",
        [point_model_space, camera_position, look_at_point, up_vector]):
        point_model_space = tf.convert_to_tensor(value=point_model_space)
        camera_position = tf.convert_to_tensor(value=camera_position)
        look_at_point = tf.convert_to_tensor(value=look_at_point)
        up_vector = tf.convert_to_tensor(value=up_vector)

        shape.check_static(tensor=point_model_space,
                           tensor_name="point_model_space",
                           has_dim_equals=(-1, 3))
        shape.compare_batch_dimensions(tensors=(point_model_space,
                                                camera_position),
                                       last_axes=-2,
                                       tensor_names=("point_model_space",
                                                     "camera_position"),
                                       broadcast_compatible=True)

        model_to_eye_matrix = look_at.right_handed(camera_position,
                                                   look_at_point, up_vector)
        batch_shape = tf.shape(input=point_model_space)[:-1]
        one = tf.ones(shape=tf.concat((batch_shape, (1, )), axis=-1),
                      dtype=point_model_space.dtype)
        point_model_space = tf.concat((point_model_space, one), axis=-1)
        point_model_space = tf.expand_dims(point_model_space, axis=-1)
        res = tf.squeeze(tf.matmul(model_to_eye_matrix, point_model_space),
                         axis=-1)
        return res[..., :-1]
Exemplo n.º 8
0
    def test_look_at_right_handed_preset(self):
        """Tests that look_at_right_handed generates expected results."""
        camera_position = ((0.0, 0.0, 0.0), (0.1, 0.2, 0.3))
        look_at_point = ((0.0, 0.0, 1.0), (0.4, 0.5, 0.6))
        up_vector = ((0.0, 1.0, 0.0), (0.7, 0.8, 0.9))

        pred = look_at.right_handed(camera_position, look_at_point, up_vector)

        gt = (((-1.0, 0.0, 0.0, 0.0), (0.0, 1.0, 0.0, 0.0),
               (0.0, 0.0, -1.0, 0.0), (0.0, 0.0, 0.0, 1.0)),
              ((4.08248186e-01, -8.16496551e-01, 4.08248395e-01,
                -2.98023224e-08), (-7.07106888e-01, 1.19209290e-07,
                                   7.07106769e-01, -1.41421378e-01),
               (-5.77350318e-01, -5.77350318e-01, -5.77350318e-01,
                3.46410215e-01), (0.0, 0.0, 0.0, 1.0)))
        self.assertAllClose(pred, gt)
Exemplo n.º 9
0
  def test_perspective_correct_interpolation_preset(self):
    """Tests that perspective_correct_interpolation generates expected results."""
    camera_origin = np.array((0.0, 0.0, 0.0))
    camera_up = np.array((0.0, 1.0, 0.0))
    look_at_point = np.array((0.0, 0.0, 1.0))
    fov = np.array((90.0 * np.math.pi / 180.0,))
    bottom_left = np.array((0.0, 0.0))
    image_size = np.array((501.0, 501.0))
    near_plane = np.array((0.01,))
    far_plane = np.array((10.0,))
    batch_size = np.random.randint(1, 5)
    triangle_x_y = np.random.uniform(-10.0, 10.0, (batch_size, 3, 2))
    triangle_z = np.random.uniform(2.0, 10.0, (batch_size, 3, 1))
    triangles = np.concatenate((triangle_x_y, triangle_z), axis=-1)
    # Builds barycentric weights.
    barycentric_weights = np.random.uniform(size=(batch_size, 3))
    barycentric_weights = barycentric_weights / np.sum(
        barycentric_weights, axis=-1, keepdims=True)
    # Barycentric interpolation of vertex positions.
    convex_combination = np.einsum("ba, bac -> bc", barycentric_weights,
                                   triangles)
    # Build matrices.
    model_to_eye_matrix = look_at.right_handed(camera_origin, look_at_point,
                                               camera_up)
    perspective_matrix = perspective.right_handed(
        fov, (image_size[0:1] / image_size[1:2]), near_plane, far_plane)

    # Computes where those points project in screen coordinates.
    pixel_position, _ = glm.model_to_screen(convex_combination,
                                            model_to_eye_matrix,
                                            perspective_matrix, image_size,
                                            bottom_left)

    # Builds attributes.
    num_pixels = pixel_position.shape[0]
    attribute_size = np.random.randint(10)
    attributes = np.random.uniform(size=(num_pixels, 3, attribute_size))

    prediction = glm.perspective_correct_interpolation(triangles, attributes,
                                                       pixel_position[..., 0:2],
                                                       model_to_eye_matrix,
                                                       perspective_matrix,
                                                       image_size, bottom_left)

    groundtruth = np.einsum("ba, bac -> bc", barycentric_weights, attributes)
    self.assertAllClose(prediction, groundtruth)
Exemplo n.º 10
0
    def test_rasterize_preset(self):
        camera_origin = (0.0, 0.0, 0.0)
        camera_up = (0.0, 1.0, 0.0)
        look_at_point = (0.0, 0.0, 1.0)
        field_of_view = (60 * np.math.pi / 180, )
        near_plane = (0.01, )
        far_plane = (400.0, )

        # Construct the view projection matrix.
        model_to_eye_matrix = look_at.right_handed(camera_origin,
                                                   look_at_point, camera_up)
        perspective_matrix = perspective.right_handed(
            field_of_view, (float(_IMAGE_WIDTH) / float(_IMAGE_HEIGHT), ),
            near_plane, far_plane)
        view_projection_matrix = tf.linalg.matmul(perspective_matrix,
                                                  model_to_eye_matrix)
        view_projection_matrix = tf.expand_dims(view_projection_matrix, axis=0)

        depth = 1.0
        vertices = np.array([[(-2.0 * _TRIANGLE_SIZE, 0.0, depth),
                              (0.0, _TRIANGLE_SIZE, depth), (0.0, 0.0, depth),
                              (0.0, -_TRIANGLE_SIZE, depth)]],
                            dtype=np.float32)
        triangles = np.array(((1, 2, 0), (0, 2, 3)), np.int32)

        predicted_fb = rasterization_backend.rasterize(
            vertices, triangles, view_projection_matrix,
            (_IMAGE_WIDTH, _IMAGE_HEIGHT))

        with self.subTest(name="triangle_index"):
            groundtruth_triangle_index = np.zeros(
                (1, _IMAGE_HEIGHT, _IMAGE_WIDTH, 1), dtype=np.int32)
            groundtruth_triangle_index[..., :_IMAGE_WIDTH // 2, 0] = 0
            groundtruth_triangle_index[..., :_IMAGE_HEIGHT // 2,
                                       _IMAGE_WIDTH // 2:, 0] = 1
            self.assertAllEqual(groundtruth_triangle_index,
                                predicted_fb.triangle_id)

        with self.subTest(name="mask"):
            groundtruth_mask = np.ones((1, _IMAGE_HEIGHT, _IMAGE_WIDTH, 1),
                                       dtype=np.int32)
            groundtruth_mask[..., :_IMAGE_WIDTH // 2, 0] = 0
            self.assertAllEqual(groundtruth_mask, predicted_fb.foreground_mask)

        attributes = np.array(((1.0, 0.0, 0.0), (0.0, 1.0, 0.0),
                               (0.0, 0.0, 1.0))).astype(np.float32)
        perspective_correct_interpolation = lambda geometry, pixels: glm.perspective_correct_interpolation(  # pylint: disable=g-long-lambda,line-too-long
            geometry, attributes, pixels, model_to_eye_matrix,
            perspective_matrix,
            np.array((_IMAGE_WIDTH, _IMAGE_HEIGHT)).astype(np.float32),
            np.array((0.0, 0.0)).astype(np.float32))
        with self.subTest(name="barycentric_coordinates_triangle_0"):
            geometry_0 = tf.gather(vertices, triangles[0, :], axis=1)
            pixels_0 = tf.transpose(grid.generate((3.5, 2.5), (6.5, 4.5),
                                                  (4, 3)),
                                    perm=(1, 0, 2))
            barycentrics_gt_0 = perspective_correct_interpolation(
                geometry_0, pixels_0)
            self.assertAllClose(barycentrics_gt_0,
                                predicted_fb.barycentrics.value[0, 2:, 3:, :],
                                atol=1e-3)

        with self.subTest(name="barycentric_coordinates_triangle_1"):
            geometry_1 = tf.gather(vertices, triangles[1, :], axis=1)
            pixels_1 = tf.transpose(grid.generate((3.5, 0.5), (6.5, 1.5),
                                                  (4, 2)),
                                    perm=(1, 0, 2))
            barycentrics_gt_1 = perspective_correct_interpolation(
                geometry_1, pixels_1)
            self.assertAllClose(barycentrics_gt_1,
                                predicted_fb.barycentrics.value[0, 0:2, 3:, :],
                                atol=1e-3)
Exemplo n.º 11
0
    def test_rasterize(self):
        max_depth = 10
        min_depth = 2
        height = 480
        width = 640
        camera_origin = (0.0, 0.0, 0.0)
        camera_up = (0.0, 1.0, 0.0)
        look_at_point = (0.0, 0.0, 1.0)
        fov = (60.0 * np.math.pi / 180, )
        near_plane = (1.0, )
        far_plane = (10.0, )
        batch_shape = tf.convert_to_tensor(value=(2, (max_depth - min_depth) //
                                                  2),
                                           dtype=tf.int32)

        world_to_camera = look_at.right_handed(camera_origin, look_at_point,
                                               camera_up)
        perspective_matrix = perspective.right_handed(
            fov, (float(width) / float(height), ), near_plane, far_plane)
        view_projection_matrix = tf.matmul(perspective_matrix, world_to_camera)
        view_projection_matrix = tf.squeeze(view_projection_matrix)

        # Generate triangles at different depths and associated ground truth.
        tris = np.zeros((max_depth - min_depth, 9), dtype=np.float32)
        gt = np.zeros((max_depth - min_depth, height, width, 2),
                      dtype=np.float32)
        for idx in range(max_depth - min_depth):
            tris[idx, :] = (-100.0, 100.0, idx + min_depth, 100.0, 100.0,
                            idx + min_depth, 0.0, -100.0, idx + min_depth)
            gt[idx, :, :, :] = (0, idx + min_depth)

        # Broadcast the variables.
        render_parameters = {
            "view_projection_matrix":
            ("mat",
             tf.broadcast_to(
                 input=view_projection_matrix,
                 shape=tf.concat(
                     values=(batch_shape,
                             tf.shape(input=view_projection_matrix)[-2:]),
                     axis=0))),
            "triangular_mesh":
            ("buffer",
             tf.reshape(tris,
                        shape=tf.concat(values=(batch_shape, (9, )), axis=0)))
        }
        # Reshape the ground truth.
        gt = tf.reshape(gt,
                        shape=tf.concat(values=(batch_shape, (height, width,
                                                              2)),
                                        axis=0))

        render_parameters = list(six.iteritems(render_parameters))
        variable_names = [v[0] for v in render_parameters]
        variable_kinds = [v[1][0] for v in render_parameters]
        variable_values = [v[1][1] for v in render_parameters]

        def rasterize():
            return rasterization_backend.render_ops.rasterize(
                num_points=3,
                variable_names=variable_names,
                variable_kinds=variable_kinds,
                variable_values=variable_values,
                output_resolution=(width, height),
                vertex_shader=test_vertex_shader,
                geometry_shader=test_geometry_shader,
                fragment_shader=test_fragment_shader,
            )

        result = rasterize()
        self.assertAllClose(result[..., 2:4], gt)

        @tf.function
        def check_lazy_shape():
            # Within @tf.function, the tensor shape is determined by SetShapeFn
            # callback. Ensure that the shape of non-batch axes matches that of of
            # the actual tensor evaluated in eager mode above.
            lazy_shape = rasterize().shape
            self.assertEqual(lazy_shape[-3:], list(result.shape)[-3:])

        check_lazy_shape()
Exemplo n.º 12
0
def make_look_at_matrix(camera_origin=(0.0, 0.0, 0.0),
                        look_at_point=(0.0, 0.0, 0.0)):
    """Shortcut util function to creat model-to-eye matrix for tests."""
    camera_up = (0.0, 1.0, 0.0)
    return look_at.right_handed(camera_origin, look_at_point, camera_up)
Exemplo n.º 13
0
def model_to_screen(point_model_space,
                    camera_position,
                    look_at_point,
                    up_vector,
                    vertical_field_of_view,
                    screen_dimensions,
                    near,
                    far,
                    lower_left_corner,
                    name=None):
    """Transforms points from model to screen coordinates.

  Note:
    Please refer to http://www.songho.ca/opengl/gl_transform.html for an
    in-depth review of this pipeline.

  Note:
    In the following, A1 to An are optional batch dimensions which must be
    broadcast compatible.

  Args:
    point_model_space: A tensor of shape `[A1, ..., An, 3]`, where the last
      dimension represents the 3D points in model space.
    camera_position: A tensor of shape `[A1, ..., An, 3]`, where the last
      dimension represents the 3D position of the camera.
    look_at_point: A tensor of shape `[A1, ..., An, 3]`, with the last dimension
      storing the position where the camera is looking at.
    up_vector: A tensor of shape `[A1, ..., An, 3]`, where the last dimension
      defines the up vector of the camera.
    vertical_field_of_view: A tensor of shape `[A1, ..., An, 1]`, where the last
      dimension represents the vertical field of view of the frustum. Note that
      values for `vertical_field_of_view` must be in the range ]0,pi[.
    screen_dimensions: A tensor of shape `[A1, ..., An, 2]`, where the last
      dimension is expressed in pixels and captures the width and the height (in
      pixels) of the screen.
    near:  A tensor of shape `[A1, ..., An, 1]`, where the last dimension
      captures the distance between the viewer and the near clipping plane. Note
      that values for `near` must be non-negative.
    far:  A tensor of shape `[A1, ..., An, 1]`, where the last dimension
      captures the distance between the viewer and the far clipping plane. Note
      that values for `far` must be greater than those of `near`.
    lower_left_corner: A tensor of shape `[A1, ..., An, 2]`, where the last
      dimension captures the position (in pixels) of the lower left corner of
      the screen.
    name: A name for this op. Defaults to 'model_to_screen'.

  Raises:
    InvalidArgumentError: if any input contains data not in the specified range
      of valid values.
    ValueError: If any input is of an unsupported shape.

  Returns:
  A tuple of two tensors, respectively of shape `[A1, ..., An, 3]` and
    `[A1, ..., An, 1]`, where the first tensor containing the projection of
    `point_model_space` in screen coordinates, and the second
    represents the 'w' component of `point_model_space` in clip space.
  """
    with tf.compat.v1.name_scope(name, "model_to_screen", [
            point_model_space, camera_position, look_at_point, up_vector,
            vertical_field_of_view, screen_dimensions, near, far,
            lower_left_corner
    ]):
        point_model_space = tf.convert_to_tensor(value=point_model_space)
        camera_position = tf.convert_to_tensor(value=camera_position)
        look_at_point = tf.convert_to_tensor(value=look_at_point)
        up_vector = tf.convert_to_tensor(value=up_vector)
        vertical_field_of_view = tf.convert_to_tensor(
            value=vertical_field_of_view)
        near = tf.convert_to_tensor(value=near)
        far = tf.convert_to_tensor(value=far)
        screen_dimensions = tf.convert_to_tensor(value=screen_dimensions)

        shape.check_static(tensor=point_model_space,
                           tensor_name="point_model_space",
                           has_dim_equals=(-1, 3))
        shape.check_static(tensor=screen_dimensions,
                           tensor_name="screen_dimensions",
                           has_dim_equals=(-1, 2))
        shape.check_static(tensor=point_model_space,
                           tensor_name="point_model_space",
                           has_dim_equals=(-1, 3))
        shape.compare_batch_dimensions(
            tensors=(point_model_space, camera_position,
                     vertical_field_of_view, near, far),
            last_axes=-2,
            tensor_names=("point_model_space", "camera_position",
                          "vertical_field_of_view", "aspect_ratio", "near",
                          "far"),
            broadcast_compatible=True)

        batch_shape = tf.shape(input=point_model_space)[:-1]
        one = tf.ones(shape=tf.concat((batch_shape, (1, )), axis=-1),
                      dtype=point_model_space.dtype)
        point_model_space = tf.concat((point_model_space, one), axis=-1)
        point_model_space = tf.expand_dims(point_model_space, axis=-1)

        # The following block performs the equivalent of model_to_eye followed by
        # eye_to_clip.
        model_to_eye_matrix = look_at.right_handed(camera_position,
                                                   look_at_point, up_vector)
        perspective_matrix = perspective.right_handed(
            vertical_field_of_view,
            screen_dimensions[..., 0:1] / screen_dimensions[..., 1:2], near,
            far)
        view_projection_matrix = tf.linalg.matmul(perspective_matrix,
                                                  model_to_eye_matrix)
        point_clip_space = tf.squeeze(tf.matmul(view_projection_matrix,
                                                point_model_space),
                                      axis=-1)

        point_ndc_space = clip_to_ndc(point_clip_space)
        point_screen_space = ndc_to_screen(point_ndc_space, lower_left_corner,
                                           screen_dimensions, near, far)
        return point_screen_space, point_clip_space[..., 3:4]
Exemplo n.º 14
0
    def __init__(self,
                 background_vertices,
                 background_attributes,
                 background_triangles,
                 camera_origin,
                 look_at_point,
                 camera_up,
                 field_of_view,
                 image_size,
                 near_plane,
                 far_plane,
                 bottom_left=(0.0, 0.0),
                 name=None):
        """Initializes TriangleRasterizer with OpenGL parameters and the background.

    Note:
      In the following, A1 to An are optional batch dimensions.

    Args:
      background_vertices: A tensor of shape `[V, 3]` containing `V` 3D
        vertices. Note that these background vertices will be used in every
        rasterized image.
      background_attributes: A tensor of shape `[V, K]` containing `V` vertices
        associated with K-dimensional attributes. Pixels for which the first
        visible surface is in the background geometry will make use of
        `background_attribute` for estimating their own attribute. Note that
        these background attributes will be use in every rasterized image.
      background_triangles: An integer tensor of shape `[T, 3]` containing `T`
        triangles, each associated with 3 vertices from `background_vertices`.
        Note that these background triangles will be used in every rasterized
        image.
      camera_origin: A Tensor of shape `[A1, ..., An, 3]`, where the last axis
        represents the 3D position of the camera.
      look_at_point: A Tensor of shape `[A1, ..., An, 3]`, with the last axis
        storing the position where the camera is looking at.
      camera_up: A Tensor of shape `[A1, ..., An, 3]`, where the last axis
        defines the up vector of the camera.
      field_of_view:  A Tensor of shape `[A1, ..., An, 1]`, where the last axis
        represents the vertical field of view of the frustum expressed in
        radians. Note that values for `field_of_view` must be in the range (0,
        pi).
      image_size: A tuple (height, width) containing the dimensions in pixels of
        the rasterized image".
      near_plane: A Tensor of shape `[A1, ..., An, 1]`, where the last axis
        captures the distance between the viewer and the near clipping plane.
        Note that values for `near_plane` must be non-negative.
      far_plane: A Tensor of shape `[A1, ..., An, 1]`, where the last axis
        captures the distance between the viewer and the far clipping plane.
        Note that values for `far_plane` must be non-negative.
      bottom_left: A Tensor of shape `[A1, ..., An, 2]`, where the last axis
        captures the position (in pixels) of the lower left corner of the
        screen. Defaults to (0.0, 0.0).
      name: A name for this op. Defaults to 'triangle_rasterizer_init'.
    """
        with tf.compat.v1.name_scope(
                name, "triangle_rasterizer_init",
            (background_vertices, background_attributes, background_triangles,
             camera_origin, look_at_point, camera_up, field_of_view,
             near_plane, far_plane, bottom_left)):

            background_vertices = tf.convert_to_tensor(
                value=background_vertices)
            background_attributes = tf.convert_to_tensor(
                value=background_attributes)
            background_triangles = tf.convert_to_tensor(
                value=background_triangles)

            shape.check_static(tensor=background_vertices,
                               tensor_name="background_vertices",
                               has_rank=2,
                               has_dim_equals=(-1, 3))
            shape.check_static(tensor=background_attributes,
                               tensor_name="background_attributes",
                               has_rank=2)
            shape.check_static(
                tensor=background_triangles,
                tensor_name="background_triangles",
                # has_rank=2,
                has_dim_equals=(-1, 3))
            shape.compare_batch_dimensions(
                tensors=(background_vertices, background_attributes),
                last_axes=-2,
                tensor_names=("background_geometry", "background_attribute"),
                broadcast_compatible=False)

            background_vertices = tf.expand_dims(background_vertices, axis=0)
            background_attributes = tf.expand_dims(background_attributes,
                                                   axis=0)

            height = float(image_size[0])
            width = float(image_size[1])

            self._background_geometry = tf.gather(background_vertices,
                                                  background_triangles,
                                                  axis=-2)
            self._background_attribute = tf.gather(background_attributes,
                                                   background_triangles,
                                                   axis=-2)

            self._camera_origin = tf.convert_to_tensor(value=camera_origin)
            self._look_at_point = tf.convert_to_tensor(value=look_at_point)
            self._camera_up = tf.convert_to_tensor(value=camera_up)
            self._field_of_view = tf.convert_to_tensor(value=field_of_view)
            self._image_size_glm = tf.convert_to_tensor(value=(width, height))
            self._image_size_int = (int(width), int(height))
            self._near_plane = tf.convert_to_tensor(value=near_plane)
            self._far_plane = tf.convert_to_tensor(value=far_plane)
            self._bottom_left = tf.convert_to_tensor(value=bottom_left)

            # Construct the pixel grid. Note that OpenGL uses half-integer pixel
            # centers.
            px = tf.linspace(0.5, width - 0.5, num=int(width))
            py = tf.linspace(0.5, height - 0.5, num=int(height))
            xv, yv = tf.meshgrid(px, py)
            self._pixel_position = tf.stack((xv, yv), axis=-1)

            # Construct the view projection matrix.
            world_to_camera = look_at.right_handed(camera_origin,
                                                   look_at_point, camera_up)
            perspective_matrix = perspective.right_handed(
                field_of_view, (width / height, ), near_plane, far_plane)
            perspective_matrix = tf.squeeze(perspective_matrix)
            self._view_projection_matrix = tf.linalg.matmul(
                perspective_matrix, world_to_camera)