def rotate_image_on_pano(images, rotations, fov, output_shape): """Transform perspective images to equirectangular images after rotations. Return equirectangular panoramic images in which the input perspective images embedded in after the rotation R from the input images' frame to the target frame. The image with the field of view "fov" centered at camera's look-at -Z axis is projected onto the pano. The -Z axis corresponds to the spherical coordinates (pi/2, pi/2) which is (HEIGHT/2, WIDTH/4) on the pano. Args: images: [BATCH, HEIGHT, WIDTH, CHANNEL] perspective view images. rotations: [BATCH, 3, 3] rotations matrices. fov: (float) images' field of view in degrees. output_shape: a 2-D list of output dimension [height, width]. Returns: equirectangular images [BATCH, height, width, CHANNELS]. """ with tf.name_scope(None, 'rotate_image_on_pano', [images, rotations, fov, output_shape]): if len(images.shape) != 4: raise ValueError("'images' has the wrong dimensions.") if rotations.shape[-2:] != [3, 3]: raise ValueError("'rotations' must have 3x3 dimensions.") shape = images.shape.as_list() batch, height, width = shape[0], shape[1], shape[2] # Generate a mesh grid on a sphere. spherical = geometry.generate_equirectangular_grid(output_shape) cartesian = geometry.spherical_to_cartesian(spherical) cartesian = tf.tile(cartesian[tf.newaxis, :, :, :, tf.newaxis], [batch, 1, 1, 1, 1]) axis_convert = tf.constant([[0., -1., 0.], [0., 0., 1.], [1., 0., 0.]]) cartesian = tf.matmul(axis_convert, cartesian) cartesian = tf.squeeze( tf.matmul(rotations[:, tf.newaxis, tf.newaxis], cartesian), -1) # Only take one hemisphere. (camera lookat direction) hemisphere_mask = tf.cast(cartesian[:, :, :, -1:] < 0, tf.float32) image_coordinates = cartesian[:, :, :, :2] / cartesian[:, :, :, -1:] x, y = tf.split(image_coordinates, [1, 1], -1) # Map pixels on equirectangular pano to perspective image. nx = -x * width / (2 * tf.tan(math_utils.degrees_to_radians( fov / 2))) + width / 2 - 0.5 ny = y * height / (2 * tf.tan(math_utils.degrees_to_radians( fov / 2))) + height / 2 - 0.5 transformed = hemisphere_mask * tfa.image.resampler( images, tf.concat([nx, ny], -1)) return transformed
def rotate_image_in_3d(images, input_rotations, input_fov, output_fov, output_shape): """Return reprojected perspective view images given a rotated camera. This function applies a homography H = K_output * R^T * K_input' where K_output and K_input are the output and input camera intrinsics, R is the rotation from the input images' frame to the target frame. Args: images: [BATCH, HEIGHT, WIDTH, CHANNEL] perspective view images. input_rotations: [BATCH, 3, 3] rotations matrices from current camera frame to target camera frame. input_fov: [BATCH] a 1-D tensor (float32) of input field of view in degrees. output_fov: (float) output field of view in degrees. output_shape: a 2-D list of output dimension [height, width]. Returns: reprojected images [BATCH, height, width, CHANNELS]. """ with tf.name_scope( None, 'rotate_image_in_3d', [images, input_rotations, input_fov, output_fov, output_shape]): if len(images.shape) != 4: raise ValueError("'images' has the wrong dimensions.") if input_rotations.shape[-2:] != [3, 3]: raise ValueError("'input_rotations' must have 3x3 dimensions.") shape = images.shape.as_list() batch, height, width = shape[0], shape[1], shape[2] cartesian = geometry.generate_cartesian_grid(output_shape, output_fov) cartesian = tf.tile(cartesian[tf.newaxis, :, :, :, tf.newaxis], [batch, 1, 1, 1, 1]) input_rotations = tf.tile( input_rotations[:, tf.newaxis, tf.newaxis, :], [1] + output_shape + [1, 1]) cartesian = tf.squeeze( tf.matmul(input_rotations, cartesian, transpose_a=True), -1) image_coordinates = -cartesian[:, :, :, :2] / cartesian[:, :, :, -1:] x, y = tf.split(image_coordinates, [1, 1], -1) w = 2 * tf.tan(math_utils.degrees_to_radians(input_fov / 2)) h = 2 * tf.tan(math_utils.degrees_to_radians(input_fov / 2)) w = w[:, tf.newaxis, tf.newaxis, tf.newaxis] h = h[:, tf.newaxis, tf.newaxis, tf.newaxis] nx = x * width / w + width / 2 - 0.5 ny = -y * height / h + height / 2 - 0.5 return tfa.image.resampler(images, tf.concat([nx, ny], -1))
def generate_random_views(pano1_rgb, pano2_rgb, r1, t1, r2, t2, max_rotation=90., max_tilt=5., output_fov=90., output_height=512, output_width=512, pano1_depth=None, pano2_depth=None): """Generate stereo image pairs by randomly sampling the panoramic images. We randomly sample camera lookat directions and project the panorama to perspective images. We also compute the overlaping area between the pair given the depth map if depthmaps are provided. The overlap is measured by the minimum of the ratio between the area seen by both cameras and the image size. This function returns a ViewPair object containing the perspective images, the masks that shows the common area seen by both cameras, the camera's field of view (FoV), the relative rotation from camera 2 to camera 1, and the relative translation direction in the frame of camera 1. Args: pano1_rgb: [HEIGHT, WIDTH, 3] the input RGB panoramic image. pano2_rgb: [HEIGHT, WIDTH, 3] the input RGB panoramic image. r1: [3, 3] the camera to world rotation of camera 1. t1: [3] the world location of camera 1. r2: [3, 3] the camera to world rotation of camera 2. t2: [3] the world location of camera 2. max_rotation: (float) maximum relative rotation between the output image pair in degrees. max_tilt: (float) maximum tilt angle of the up vector in degrees. output_fov: (float) output images' horizontal field of view in degrees. output_height: (int) the height dimension of the output perspective images. output_width: (int) the width dimension of the output perspective images. pano1_depth: [HEIGHT, WIDTH, 1] the panoramic depth map of pano1_rgb. pano2_depth: [HEIGHT, WIDTH, 1] the panoramic depth map of pano2_rgb. Returns: ViewPair """ ViewPair = collections.namedtuple( 'ViewPair', ['img1', 'img2', 'mask1', 'mask2', 'fov', 'r', 't']) swap_yz = tf.constant([[1., 0., 0.], [0., 0., 1.], [0., -1., 0.]], shape=[1, 3, 3]) lookat_direction1 = math_utils.random_vector_on_sphere( 1, [[-math.sin(math.pi / 3), math.sin(math.pi / 3)], [0., 2 * math.pi]]) lookat_direction1 = tf.squeeze( tf.matmul(swap_yz, tf.expand_dims(lookat_direction1, -1)), -1) lookat_direction2 = math_utils.uniform_sampled_vector_within_cone( lookat_direction1, math_utils.degrees_to_radians(max_rotation)) lookat_directions = tf.concat([lookat_direction1, lookat_direction2], 0) up1 = math_utils.uniform_sampled_vector_within_cone( tf.constant([[0., 0., 1.]]), math_utils.degrees_to_radians(max_tilt)) up2 = math_utils.uniform_sampled_vector_within_cone( tf.constant([[0., 0., 1.]]), math_utils.degrees_to_radians(max_tilt)) lookat_rotations = math_utils.lookat_matrix(tf.concat([up1, up2], 0), lookat_directions) sample_rotations = tf.matmul(tf.concat([r1, r2], 0), lookat_rotations, transpose_a=True) sampled_views = transformation.rectilinear_projection( tf.stack([pano1_rgb, pano2_rgb], 0), [output_height, output_width], output_fov, sample_rotations) r_c2_to_c1 = tf.matmul(lookat_rotations[0], lookat_rotations[1], transpose_a=True) t_c1 = tf.squeeze( tf.matmul(lookat_rotations[0], tf.expand_dims(tf.nn.l2_normalize(t2 - t1), -1), transpose_a=True)) if pano1_depth is not None and pano2_depth is not None: sampled_depth = transformation.rectilinear_projection( tf.stack([pano1_depth, pano2_depth], 0), [output_height, output_width], output_fov, sample_rotations) fx = output_width * 0.5 / math.tan( math_utils.degrees_to_radians(output_fov) / 2) intrinsics = tf.constant([[fx, 0., output_width * 0.5], [0., -fx, output_height * 0.5], [0., 0., 1.]]) pose1_c2w = tf.concat([lookat_rotations[0], tf.expand_dims(t1, -1)], 1) pose2_c2w = tf.concat([lookat_rotations[1], tf.expand_dims(t2, -1)], 1) mask1, mask2 = overlap_mask(sampled_depth[0], pose1_c2w, sampled_depth[1], pose2_c2w, intrinsics) else: mask1 = None mask2 = None return ViewPair(sampled_views[0], sampled_views[1], mask1, mask2, output_fov, r_c2_to_c1, t_c1)