def _generate_vertices_and_view_matrices(): camera_origin = ((0.0, 0.0, 0.0), (0.0, 0.0, 0.0)) camera_up = ((0.0, 1.0, 0.0), (0.0, 1.0, 0.0)) look_at_point = ((0.0, 0.0, 1.0), (0.0, 0.0, -1.0)) field_of_view = ((60 * np.math.pi / 180, ), (60 * np.math.pi / 180, )) near_plane = ((0.01, ), (0.01, )) far_plane = ((400.0, ), (400.0, )) aspect_ratio = ((float(_IMAGE_WIDTH) / float(_IMAGE_HEIGHT), ), (float(_IMAGE_WIDTH) / float(_IMAGE_HEIGHT), )) # Construct the view projection matrix. world_to_camera = look_at.right_handed(camera_origin, look_at_point, camera_up) perspective_matrix = perspective.right_handed(field_of_view, aspect_ratio, near_plane, far_plane) # Shape [2, 4, 4] view_projection_matrix = tf.linalg.matmul(perspective_matrix, world_to_camera) depth = 1.0 # Shape [2, 3, 3] vertices = (((-10.0 * _TRIANGLE_SIZE, 10.0 * _TRIANGLE_SIZE, depth), (10.0 * _TRIANGLE_SIZE, 10.0 * _TRIANGLE_SIZE, depth), (0.0, -10.0 * _TRIANGLE_SIZE, depth)), ((-_TRIANGLE_SIZE, 0.0, depth), (0.0, _TRIANGLE_SIZE, depth), (0.0, 0.0, depth))) return vertices, view_projection_matrix
def test_perspective_correct_barycentrics_jacobian_random(self): """Tests the Jacobian of perspective_correct_barycentrics.""" tensor_size = np.random.randint(1, 3) tensor_shape = np.random.randint(1, 5, size=(tensor_size)).tolist() vertices_init = np.random.uniform(size=tensor_shape + [3, 3]) pixel_position_init = np.random.uniform(size=tensor_shape + [2]) camera_position_init = np.random.uniform(size=tensor_shape + [3, 3]) look_at_init = np.random.uniform(size=tensor_shape + [3, 3]) up_vector_init = np.random.uniform(size=tensor_shape + [3, 3]) vertical_field_of_view_init = np.random.uniform( 0.1, 1.0, size=tensor_shape + [3, 1]) screen_dimensions_init = np.random.uniform( 1.0, 10.0, size=tensor_shape + [3, 2]) near_init = np.random.uniform(1.0, 10.0, size=tensor_shape + [3, 1]) far_init = near_init + np.random.uniform( 0.1, 1.0, size=tensor_shape + [3, 1]) lower_left_corner_init = np.random.uniform(size=tensor_shape + [3, 2]) # Build matrices. model_to_eye_matrix_init = look_at.right_handed(camera_position_init, look_at_init, up_vector_init) perspective_matrix_init = perspective.right_handed( vertical_field_of_view_init, screen_dimensions_init[..., 0:1] / screen_dimensions_init[..., 1:2], near_init, far_init) self.assert_jacobian_is_correct_fn( glm.perspective_correct_barycentrics, [ vertices_init, pixel_position_init, model_to_eye_matrix_init, perspective_matrix_init, screen_dimensions_init, lower_left_corner_init ], atol=1e-4)
def test_model_to_screen_jacobian_random(self): """Tests the Jacobian of model_to_screen.""" tensor_size = np.random.randint(1, 3) tensor_shape = np.random.randint(1, 5, size=(tensor_size)).tolist() point_world_space_init = np.random.uniform(size=tensor_shape + [3]) camera_position_init = np.random.uniform(size=tensor_shape + [3]) camera_up_init = np.random.uniform(size=tensor_shape + [3]) look_at_init = np.random.uniform(size=tensor_shape + [3]) vertical_field_of_view_init = np.random.uniform( 0.1, 1.0, size=tensor_shape + [1]) lower_left_corner_init = np.random.uniform(size=tensor_shape + [2]) screen_dimensions_init = np.random.uniform( 0.1, 1.0, size=tensor_shape + [2]) near_init = np.random.uniform(0.1, 1.0, size=tensor_shape + [1]) far_init = near_init + np.random.uniform(0.1, 1.0, size=tensor_shape + [1]) # Build matrices. model_to_eye_matrix = look_at.right_handed(camera_position_init, look_at_init, camera_up_init) perspective_matrix = perspective.right_handed( vertical_field_of_view_init, screen_dimensions_init[..., 0:1] / screen_dimensions_init[..., 1:2], near_init, far_init) args = [ point_world_space_init, model_to_eye_matrix, perspective_matrix, screen_dimensions_init, lower_left_corner_init ] with self.subTest(name="jacobian_y_projection"): self.assert_jacobian_is_correct_fn( lambda *args: glm.model_to_screen(*args)[0], args, atol=1e-4)
def test_perspective_correct_interpolation_jacobian_preset(self): """Tests the Jacobian of perspective_correct_interpolation.""" vertices_init = np.tile( ((-0.2857143, 0.2857143, 5.0), (0.2857143, 0.2857143, 0.5), (0.0, -0.2857143, 1.0)), (2, 1, 1)) attributes_init = np.tile( (((1.0, 0.0, 0.0), (0.0, 1.0, 0.0), (0.0, 0.0, 1.0))), (2, 1, 1)) pixel_position_init = np.array(((125.5, 375.5), (250.5, 250.5))) camera_position_init = np.tile((0.0, 0.0, 0.0), (2, 3, 1)) look_at_init = np.tile((0.0, 0.0, 1.0), (2, 3, 1)) up_vector_init = np.tile((0.0, 1.0, 0.0), (2, 3, 1)) vertical_field_of_view_init = np.tile((1.0471975511965976,), (2, 3, 1)) screen_dimensions_init = np.tile((501.0, 501.0), (2, 3, 1)) near_init = np.tile((0.01,), (2, 3, 1)) far_init = np.tile((10.0,), (2, 3, 1)) lower_left_corner_init = np.tile((0.0, 0.0), (2, 3, 1)) # Build matrices. model_to_eye_matrix_init = look_at.right_handed(camera_position_init, look_at_init, up_vector_init) perspective_matrix_init = perspective.right_handed( vertical_field_of_view_init, screen_dimensions_init[..., 0:1] / screen_dimensions_init[..., 1:2], near_init, far_init) self.assert_jacobian_is_correct_fn(glm.perspective_correct_interpolation, [ vertices_init, attributes_init, pixel_position_init, model_to_eye_matrix_init, perspective_matrix_init, screen_dimensions_init, lower_left_corner_init ])
def test_model_to_screen_jacobian_preset(self): """Tests the Jacobian of model_to_screen.""" point_world_space_init = np.array(((3.1, 4.1, 5.1), (-1.1, 2.2, -3.1))) camera_position_init = np.array(((0.0, 0.0, 0.0), (0.4, -0.8, 0.1))) camera_up_init = np.array(((0.0, 1.0, 0.0), (0.0, 0.0, 1.0))) look_at_init = np.array(((0.0, 0.0, 1.0), (0.0, 1.0, 0.0))) vertical_field_of_view_init = np.array( ((60.0 * math.pi / 180.0,), (65 * math.pi / 180,))) lower_left_corner_init = np.array(((0.0, 0.0), (10.0, 20.0))) screen_dimensions_init = np.array(((501.0, 501.0), (400.0, 600.0))) near_init = np.array(((0.01,), (1.0,))) far_init = np.array(((4.0,), (3.0,))) # Build matrices. model_to_eye_matrix = look_at.right_handed(camera_position_init, look_at_init, camera_up_init) perspective_matrix = perspective.right_handed( vertical_field_of_view_init, screen_dimensions_init[..., 0:1] / screen_dimensions_init[..., 1:2], near_init, far_init) args = [ point_world_space_init, model_to_eye_matrix, perspective_matrix, screen_dimensions_init, lower_left_corner_init ] with self.subTest(name="jacobian_y_projection"): self.assert_jacobian_is_correct_fn( lambda *args: glm.model_to_screen(*args)[0], args, atol=1e-4)
def test_model_to_screen_preset(self): """Tests that model_to_screen generates expected results.""" point_world_space = np.array(((3.1, 4.1, 5.1), (-1.1, 2.2, -3.1))) camera_position = np.array(((0.0, 0.0, 0.0), (0.4, -0.8, 0.1))) camera_up = np.array(((0.0, 1.0, 0.0), (0.0, 0.0, 1.0))) look_at_point = np.array(((0.0, 0.0, 1.0), (0.0, 1.0, 0.0))) vertical_field_of_view = np.array( ((60.0 * math.pi / 180.0,), (65 * math.pi / 180,))) lower_left_corner = np.array(((0.0, 0.0), (10.0, 20.0))) screen_dimensions = np.array(((501.0, 501.0), (400.0, 600.0))) near = np.array(((0.01,), (1.0,))) far = np.array(((4.0,), (3.0,))) # Build matrices. model_to_eye_matrix = look_at.right_handed(camera_position, look_at_point, camera_up) perspective_matrix = perspective.right_handed( vertical_field_of_view, screen_dimensions[..., 0:1] / screen_dimensions[..., 1:2], near, far) pred_screen, pred_w = glm.model_to_screen(point_world_space, model_to_eye_matrix, perspective_matrix, screen_dimensions, lower_left_corner) gt_screen = ((-13.23016357, 599.30444336, 4.00215721), (98.07017517, -95.40383911, 3.1234405)) gt_w = ((5.1,), (3.42247,)) self.assertAllClose(pred_screen, gt_screen, atol=1e-5, rtol=1e-5) self.assertAllClose(pred_w, gt_w)
def model_to_eye(point_model_space, camera_position, look_at_point, up_vector, name=None): """Transforms points from model to eye coordinates. Note: In the following, A1 to An are optional batch dimensions which must be broadcast compatible. Args: point_model_space: A tensor of shape `[A1, ..., An, 3]`, where the last dimension represents the 3D points in model space. camera_position: A tensor of shape `[A1, ..., An, 3]`, where the last dimension represents the 3D position of the camera. look_at_point: A tensor of shape `[A1, ..., An, 3]`, with the last dimension storing the position where the camera is looking at. up_vector: A tensor of shape `[A1, ..., An, 3]`, where the last dimension defines the up vector of the camera. name: A name for this op. Defaults to 'model_to_eye'. Raises: ValueError: if the all the inputs are not of the same shape, or if any input of of an unsupported shape. Returns: A tensor of shape `[A1, ..., An, 3]`, containing `point_model_space` in eye coordinates. """ with tf.compat.v1.name_scope( name, "model_to_eye", [point_model_space, camera_position, look_at_point, up_vector]): point_model_space = tf.convert_to_tensor(value=point_model_space) camera_position = tf.convert_to_tensor(value=camera_position) look_at_point = tf.convert_to_tensor(value=look_at_point) up_vector = tf.convert_to_tensor(value=up_vector) shape.check_static(tensor=point_model_space, tensor_name="point_model_space", has_dim_equals=(-1, 3)) shape.compare_batch_dimensions(tensors=(point_model_space, camera_position), last_axes=-2, tensor_names=("point_model_space", "camera_position"), broadcast_compatible=True) model_to_eye_matrix = look_at.right_handed(camera_position, look_at_point, up_vector) batch_shape = tf.shape(input=point_model_space)[:-1] one = tf.ones(shape=tf.concat((batch_shape, (1, )), axis=-1), dtype=point_model_space.dtype) point_model_space = tf.concat((point_model_space, one), axis=-1) point_model_space = tf.expand_dims(point_model_space, axis=-1) res = tf.squeeze(tf.matmul(model_to_eye_matrix, point_model_space), axis=-1) return res[..., :-1]
def test_look_at_right_handed_preset(self): """Tests that look_at_right_handed generates expected results.""" camera_position = ((0.0, 0.0, 0.0), (0.1, 0.2, 0.3)) look_at_point = ((0.0, 0.0, 1.0), (0.4, 0.5, 0.6)) up_vector = ((0.0, 1.0, 0.0), (0.7, 0.8, 0.9)) pred = look_at.right_handed(camera_position, look_at_point, up_vector) gt = (((-1.0, 0.0, 0.0, 0.0), (0.0, 1.0, 0.0, 0.0), (0.0, 0.0, -1.0, 0.0), (0.0, 0.0, 0.0, 1.0)), ((4.08248186e-01, -8.16496551e-01, 4.08248395e-01, -2.98023224e-08), (-7.07106888e-01, 1.19209290e-07, 7.07106769e-01, -1.41421378e-01), (-5.77350318e-01, -5.77350318e-01, -5.77350318e-01, 3.46410215e-01), (0.0, 0.0, 0.0, 1.0))) self.assertAllClose(pred, gt)
def test_perspective_correct_interpolation_preset(self): """Tests that perspective_correct_interpolation generates expected results.""" camera_origin = np.array((0.0, 0.0, 0.0)) camera_up = np.array((0.0, 1.0, 0.0)) look_at_point = np.array((0.0, 0.0, 1.0)) fov = np.array((90.0 * np.math.pi / 180.0,)) bottom_left = np.array((0.0, 0.0)) image_size = np.array((501.0, 501.0)) near_plane = np.array((0.01,)) far_plane = np.array((10.0,)) batch_size = np.random.randint(1, 5) triangle_x_y = np.random.uniform(-10.0, 10.0, (batch_size, 3, 2)) triangle_z = np.random.uniform(2.0, 10.0, (batch_size, 3, 1)) triangles = np.concatenate((triangle_x_y, triangle_z), axis=-1) # Builds barycentric weights. barycentric_weights = np.random.uniform(size=(batch_size, 3)) barycentric_weights = barycentric_weights / np.sum( barycentric_weights, axis=-1, keepdims=True) # Barycentric interpolation of vertex positions. convex_combination = np.einsum("ba, bac -> bc", barycentric_weights, triangles) # Build matrices. model_to_eye_matrix = look_at.right_handed(camera_origin, look_at_point, camera_up) perspective_matrix = perspective.right_handed( fov, (image_size[0:1] / image_size[1:2]), near_plane, far_plane) # Computes where those points project in screen coordinates. pixel_position, _ = glm.model_to_screen(convex_combination, model_to_eye_matrix, perspective_matrix, image_size, bottom_left) # Builds attributes. num_pixels = pixel_position.shape[0] attribute_size = np.random.randint(10) attributes = np.random.uniform(size=(num_pixels, 3, attribute_size)) prediction = glm.perspective_correct_interpolation(triangles, attributes, pixel_position[..., 0:2], model_to_eye_matrix, perspective_matrix, image_size, bottom_left) groundtruth = np.einsum("ba, bac -> bc", barycentric_weights, attributes) self.assertAllClose(prediction, groundtruth)
def test_rasterize_preset(self): camera_origin = (0.0, 0.0, 0.0) camera_up = (0.0, 1.0, 0.0) look_at_point = (0.0, 0.0, 1.0) field_of_view = (60 * np.math.pi / 180, ) near_plane = (0.01, ) far_plane = (400.0, ) # Construct the view projection matrix. model_to_eye_matrix = look_at.right_handed(camera_origin, look_at_point, camera_up) perspective_matrix = perspective.right_handed( field_of_view, (float(_IMAGE_WIDTH) / float(_IMAGE_HEIGHT), ), near_plane, far_plane) view_projection_matrix = tf.linalg.matmul(perspective_matrix, model_to_eye_matrix) view_projection_matrix = tf.expand_dims(view_projection_matrix, axis=0) depth = 1.0 vertices = np.array([[(-2.0 * _TRIANGLE_SIZE, 0.0, depth), (0.0, _TRIANGLE_SIZE, depth), (0.0, 0.0, depth), (0.0, -_TRIANGLE_SIZE, depth)]], dtype=np.float32) triangles = np.array(((1, 2, 0), (0, 2, 3)), np.int32) predicted_fb = rasterization_backend.rasterize( vertices, triangles, view_projection_matrix, (_IMAGE_WIDTH, _IMAGE_HEIGHT)) with self.subTest(name="triangle_index"): groundtruth_triangle_index = np.zeros( (1, _IMAGE_HEIGHT, _IMAGE_WIDTH, 1), dtype=np.int32) groundtruth_triangle_index[..., :_IMAGE_WIDTH // 2, 0] = 0 groundtruth_triangle_index[..., :_IMAGE_HEIGHT // 2, _IMAGE_WIDTH // 2:, 0] = 1 self.assertAllEqual(groundtruth_triangle_index, predicted_fb.triangle_id) with self.subTest(name="mask"): groundtruth_mask = np.ones((1, _IMAGE_HEIGHT, _IMAGE_WIDTH, 1), dtype=np.int32) groundtruth_mask[..., :_IMAGE_WIDTH // 2, 0] = 0 self.assertAllEqual(groundtruth_mask, predicted_fb.foreground_mask) attributes = np.array(((1.0, 0.0, 0.0), (0.0, 1.0, 0.0), (0.0, 0.0, 1.0))).astype(np.float32) perspective_correct_interpolation = lambda geometry, pixels: glm.perspective_correct_interpolation( # pylint: disable=g-long-lambda,line-too-long geometry, attributes, pixels, model_to_eye_matrix, perspective_matrix, np.array((_IMAGE_WIDTH, _IMAGE_HEIGHT)).astype(np.float32), np.array((0.0, 0.0)).astype(np.float32)) with self.subTest(name="barycentric_coordinates_triangle_0"): geometry_0 = tf.gather(vertices, triangles[0, :], axis=1) pixels_0 = tf.transpose(grid.generate((3.5, 2.5), (6.5, 4.5), (4, 3)), perm=(1, 0, 2)) barycentrics_gt_0 = perspective_correct_interpolation( geometry_0, pixels_0) self.assertAllClose(barycentrics_gt_0, predicted_fb.barycentrics.value[0, 2:, 3:, :], atol=1e-3) with self.subTest(name="barycentric_coordinates_triangle_1"): geometry_1 = tf.gather(vertices, triangles[1, :], axis=1) pixels_1 = tf.transpose(grid.generate((3.5, 0.5), (6.5, 1.5), (4, 2)), perm=(1, 0, 2)) barycentrics_gt_1 = perspective_correct_interpolation( geometry_1, pixels_1) self.assertAllClose(barycentrics_gt_1, predicted_fb.barycentrics.value[0, 0:2, 3:, :], atol=1e-3)
def test_rasterize(self): max_depth = 10 min_depth = 2 height = 480 width = 640 camera_origin = (0.0, 0.0, 0.0) camera_up = (0.0, 1.0, 0.0) look_at_point = (0.0, 0.0, 1.0) fov = (60.0 * np.math.pi / 180, ) near_plane = (1.0, ) far_plane = (10.0, ) batch_shape = tf.convert_to_tensor(value=(2, (max_depth - min_depth) // 2), dtype=tf.int32) world_to_camera = look_at.right_handed(camera_origin, look_at_point, camera_up) perspective_matrix = perspective.right_handed( fov, (float(width) / float(height), ), near_plane, far_plane) view_projection_matrix = tf.matmul(perspective_matrix, world_to_camera) view_projection_matrix = tf.squeeze(view_projection_matrix) # Generate triangles at different depths and associated ground truth. tris = np.zeros((max_depth - min_depth, 9), dtype=np.float32) gt = np.zeros((max_depth - min_depth, height, width, 2), dtype=np.float32) for idx in range(max_depth - min_depth): tris[idx, :] = (-100.0, 100.0, idx + min_depth, 100.0, 100.0, idx + min_depth, 0.0, -100.0, idx + min_depth) gt[idx, :, :, :] = (0, idx + min_depth) # Broadcast the variables. render_parameters = { "view_projection_matrix": ("mat", tf.broadcast_to( input=view_projection_matrix, shape=tf.concat( values=(batch_shape, tf.shape(input=view_projection_matrix)[-2:]), axis=0))), "triangular_mesh": ("buffer", tf.reshape(tris, shape=tf.concat(values=(batch_shape, (9, )), axis=0))) } # Reshape the ground truth. gt = tf.reshape(gt, shape=tf.concat(values=(batch_shape, (height, width, 2)), axis=0)) render_parameters = list(six.iteritems(render_parameters)) variable_names = [v[0] for v in render_parameters] variable_kinds = [v[1][0] for v in render_parameters] variable_values = [v[1][1] for v in render_parameters] def rasterize(): return rasterization_backend.render_ops.rasterize( num_points=3, variable_names=variable_names, variable_kinds=variable_kinds, variable_values=variable_values, output_resolution=(width, height), vertex_shader=test_vertex_shader, geometry_shader=test_geometry_shader, fragment_shader=test_fragment_shader, ) result = rasterize() self.assertAllClose(result[..., 2:4], gt) @tf.function def check_lazy_shape(): # Within @tf.function, the tensor shape is determined by SetShapeFn # callback. Ensure that the shape of non-batch axes matches that of of # the actual tensor evaluated in eager mode above. lazy_shape = rasterize().shape self.assertEqual(lazy_shape[-3:], list(result.shape)[-3:]) check_lazy_shape()
def make_look_at_matrix(camera_origin=(0.0, 0.0, 0.0), look_at_point=(0.0, 0.0, 0.0)): """Shortcut util function to creat model-to-eye matrix for tests.""" camera_up = (0.0, 1.0, 0.0) return look_at.right_handed(camera_origin, look_at_point, camera_up)
def model_to_screen(point_model_space, camera_position, look_at_point, up_vector, vertical_field_of_view, screen_dimensions, near, far, lower_left_corner, name=None): """Transforms points from model to screen coordinates. Note: Please refer to http://www.songho.ca/opengl/gl_transform.html for an in-depth review of this pipeline. Note: In the following, A1 to An are optional batch dimensions which must be broadcast compatible. Args: point_model_space: A tensor of shape `[A1, ..., An, 3]`, where the last dimension represents the 3D points in model space. camera_position: A tensor of shape `[A1, ..., An, 3]`, where the last dimension represents the 3D position of the camera. look_at_point: A tensor of shape `[A1, ..., An, 3]`, with the last dimension storing the position where the camera is looking at. up_vector: A tensor of shape `[A1, ..., An, 3]`, where the last dimension defines the up vector of the camera. vertical_field_of_view: A tensor of shape `[A1, ..., An, 1]`, where the last dimension represents the vertical field of view of the frustum. Note that values for `vertical_field_of_view` must be in the range ]0,pi[. screen_dimensions: A tensor of shape `[A1, ..., An, 2]`, where the last dimension is expressed in pixels and captures the width and the height (in pixels) of the screen. near: A tensor of shape `[A1, ..., An, 1]`, where the last dimension captures the distance between the viewer and the near clipping plane. Note that values for `near` must be non-negative. far: A tensor of shape `[A1, ..., An, 1]`, where the last dimension captures the distance between the viewer and the far clipping plane. Note that values for `far` must be greater than those of `near`. lower_left_corner: A tensor of shape `[A1, ..., An, 2]`, where the last dimension captures the position (in pixels) of the lower left corner of the screen. name: A name for this op. Defaults to 'model_to_screen'. Raises: InvalidArgumentError: if any input contains data not in the specified range of valid values. ValueError: If any input is of an unsupported shape. Returns: A tuple of two tensors, respectively of shape `[A1, ..., An, 3]` and `[A1, ..., An, 1]`, where the first tensor containing the projection of `point_model_space` in screen coordinates, and the second represents the 'w' component of `point_model_space` in clip space. """ with tf.compat.v1.name_scope(name, "model_to_screen", [ point_model_space, camera_position, look_at_point, up_vector, vertical_field_of_view, screen_dimensions, near, far, lower_left_corner ]): point_model_space = tf.convert_to_tensor(value=point_model_space) camera_position = tf.convert_to_tensor(value=camera_position) look_at_point = tf.convert_to_tensor(value=look_at_point) up_vector = tf.convert_to_tensor(value=up_vector) vertical_field_of_view = tf.convert_to_tensor( value=vertical_field_of_view) near = tf.convert_to_tensor(value=near) far = tf.convert_to_tensor(value=far) screen_dimensions = tf.convert_to_tensor(value=screen_dimensions) shape.check_static(tensor=point_model_space, tensor_name="point_model_space", has_dim_equals=(-1, 3)) shape.check_static(tensor=screen_dimensions, tensor_name="screen_dimensions", has_dim_equals=(-1, 2)) shape.check_static(tensor=point_model_space, tensor_name="point_model_space", has_dim_equals=(-1, 3)) shape.compare_batch_dimensions( tensors=(point_model_space, camera_position, vertical_field_of_view, near, far), last_axes=-2, tensor_names=("point_model_space", "camera_position", "vertical_field_of_view", "aspect_ratio", "near", "far"), broadcast_compatible=True) batch_shape = tf.shape(input=point_model_space)[:-1] one = tf.ones(shape=tf.concat((batch_shape, (1, )), axis=-1), dtype=point_model_space.dtype) point_model_space = tf.concat((point_model_space, one), axis=-1) point_model_space = tf.expand_dims(point_model_space, axis=-1) # The following block performs the equivalent of model_to_eye followed by # eye_to_clip. model_to_eye_matrix = look_at.right_handed(camera_position, look_at_point, up_vector) perspective_matrix = perspective.right_handed( vertical_field_of_view, screen_dimensions[..., 0:1] / screen_dimensions[..., 1:2], near, far) view_projection_matrix = tf.linalg.matmul(perspective_matrix, model_to_eye_matrix) point_clip_space = tf.squeeze(tf.matmul(view_projection_matrix, point_model_space), axis=-1) point_ndc_space = clip_to_ndc(point_clip_space) point_screen_space = ndc_to_screen(point_ndc_space, lower_left_corner, screen_dimensions, near, far) return point_screen_space, point_clip_space[..., 3:4]
def __init__(self, background_vertices, background_attributes, background_triangles, camera_origin, look_at_point, camera_up, field_of_view, image_size, near_plane, far_plane, bottom_left=(0.0, 0.0), name=None): """Initializes TriangleRasterizer with OpenGL parameters and the background. Note: In the following, A1 to An are optional batch dimensions. Args: background_vertices: A tensor of shape `[V, 3]` containing `V` 3D vertices. Note that these background vertices will be used in every rasterized image. background_attributes: A tensor of shape `[V, K]` containing `V` vertices associated with K-dimensional attributes. Pixels for which the first visible surface is in the background geometry will make use of `background_attribute` for estimating their own attribute. Note that these background attributes will be use in every rasterized image. background_triangles: An integer tensor of shape `[T, 3]` containing `T` triangles, each associated with 3 vertices from `background_vertices`. Note that these background triangles will be used in every rasterized image. camera_origin: A Tensor of shape `[A1, ..., An, 3]`, where the last axis represents the 3D position of the camera. look_at_point: A Tensor of shape `[A1, ..., An, 3]`, with the last axis storing the position where the camera is looking at. camera_up: A Tensor of shape `[A1, ..., An, 3]`, where the last axis defines the up vector of the camera. field_of_view: A Tensor of shape `[A1, ..., An, 1]`, where the last axis represents the vertical field of view of the frustum expressed in radians. Note that values for `field_of_view` must be in the range (0, pi). image_size: A tuple (height, width) containing the dimensions in pixels of the rasterized image". near_plane: A Tensor of shape `[A1, ..., An, 1]`, where the last axis captures the distance between the viewer and the near clipping plane. Note that values for `near_plane` must be non-negative. far_plane: A Tensor of shape `[A1, ..., An, 1]`, where the last axis captures the distance between the viewer and the far clipping plane. Note that values for `far_plane` must be non-negative. bottom_left: A Tensor of shape `[A1, ..., An, 2]`, where the last axis captures the position (in pixels) of the lower left corner of the screen. Defaults to (0.0, 0.0). name: A name for this op. Defaults to 'triangle_rasterizer_init'. """ with tf.compat.v1.name_scope( name, "triangle_rasterizer_init", (background_vertices, background_attributes, background_triangles, camera_origin, look_at_point, camera_up, field_of_view, near_plane, far_plane, bottom_left)): background_vertices = tf.convert_to_tensor( value=background_vertices) background_attributes = tf.convert_to_tensor( value=background_attributes) background_triangles = tf.convert_to_tensor( value=background_triangles) shape.check_static(tensor=background_vertices, tensor_name="background_vertices", has_rank=2, has_dim_equals=(-1, 3)) shape.check_static(tensor=background_attributes, tensor_name="background_attributes", has_rank=2) shape.check_static( tensor=background_triangles, tensor_name="background_triangles", # has_rank=2, has_dim_equals=(-1, 3)) shape.compare_batch_dimensions( tensors=(background_vertices, background_attributes), last_axes=-2, tensor_names=("background_geometry", "background_attribute"), broadcast_compatible=False) background_vertices = tf.expand_dims(background_vertices, axis=0) background_attributes = tf.expand_dims(background_attributes, axis=0) height = float(image_size[0]) width = float(image_size[1]) self._background_geometry = tf.gather(background_vertices, background_triangles, axis=-2) self._background_attribute = tf.gather(background_attributes, background_triangles, axis=-2) self._camera_origin = tf.convert_to_tensor(value=camera_origin) self._look_at_point = tf.convert_to_tensor(value=look_at_point) self._camera_up = tf.convert_to_tensor(value=camera_up) self._field_of_view = tf.convert_to_tensor(value=field_of_view) self._image_size_glm = tf.convert_to_tensor(value=(width, height)) self._image_size_int = (int(width), int(height)) self._near_plane = tf.convert_to_tensor(value=near_plane) self._far_plane = tf.convert_to_tensor(value=far_plane) self._bottom_left = tf.convert_to_tensor(value=bottom_left) # Construct the pixel grid. Note that OpenGL uses half-integer pixel # centers. px = tf.linspace(0.5, width - 0.5, num=int(width)) py = tf.linspace(0.5, height - 0.5, num=int(height)) xv, yv = tf.meshgrid(px, py) self._pixel_position = tf.stack((xv, yv), axis=-1) # Construct the view projection matrix. world_to_camera = look_at.right_handed(camera_origin, look_at_point, camera_up) perspective_matrix = perspective.right_handed( field_of_view, (width / height, ), near_plane, far_plane) perspective_matrix = tf.squeeze(perspective_matrix) self._view_projection_matrix = tf.linalg.matmul( perspective_matrix, world_to_camera)