Пример #1
0
def rotate_image_on_pano(images, rotations, fov, output_shape):
    """Transform perspective images to equirectangular images after rotations.

  Return equirectangular panoramic images in which the input perspective images
  embedded in after the rotation R from the input images' frame to the target
  frame. The image with the field of view "fov" centered at camera's look-at -Z
  axis is projected onto the pano. The -Z axis corresponds to the spherical
  coordinates (pi/2, pi/2) which is (HEIGHT/2, WIDTH/4) on the pano.

  Args:
    images: [BATCH, HEIGHT, WIDTH, CHANNEL] perspective view images.
    rotations: [BATCH, 3, 3] rotations matrices.
    fov: (float) images' field of view in degrees.
    output_shape: a 2-D list of output dimension [height, width].

  Returns:
    equirectangular images [BATCH, height, width, CHANNELS].
  """
    with tf.name_scope(None, 'rotate_image_on_pano',
                       [images, rotations, fov, output_shape]):
        if len(images.shape) != 4:
            raise ValueError("'images' has the wrong dimensions.")
        if rotations.shape[-2:] != [3, 3]:
            raise ValueError("'rotations' must have 3x3 dimensions.")

        shape = images.shape.as_list()
        batch, height, width = shape[0], shape[1], shape[2]
        # Generate a mesh grid on a sphere.
        spherical = geometry.generate_equirectangular_grid(output_shape)
        cartesian = geometry.spherical_to_cartesian(spherical)
        cartesian = tf.tile(cartesian[tf.newaxis, :, :, :, tf.newaxis],
                            [batch, 1, 1, 1, 1])
        axis_convert = tf.constant([[0., -1., 0.], [0., 0., 1.], [1., 0., 0.]])
        cartesian = tf.matmul(axis_convert, cartesian)
        cartesian = tf.squeeze(
            tf.matmul(rotations[:, tf.newaxis, tf.newaxis], cartesian), -1)
        # Only take one hemisphere. (camera lookat direction)
        hemisphere_mask = tf.cast(cartesian[:, :, :, -1:] < 0, tf.float32)
        image_coordinates = cartesian[:, :, :, :2] / cartesian[:, :, :, -1:]
        x, y = tf.split(image_coordinates, [1, 1], -1)
        # Map pixels on equirectangular pano to perspective image.
        nx = -x * width / (2 * tf.tan(math_utils.degrees_to_radians(
            fov / 2))) + width / 2 - 0.5
        ny = y * height / (2 * tf.tan(math_utils.degrees_to_radians(
            fov / 2))) + height / 2 - 0.5
        transformed = hemisphere_mask * tfa.image.resampler(
            images, tf.concat([nx, ny], -1))
        return transformed
Пример #2
0
def rotate_image_in_3d(images, input_rotations, input_fov, output_fov,
                       output_shape):
    """Return reprojected perspective view images given a rotated camera.

  This function applies a homography H = K_output * R^T * K_input' where
  K_output and K_input are the output and input camera intrinsics, R is the
  rotation from the input images' frame to the target frame.

  Args:
    images: [BATCH, HEIGHT, WIDTH, CHANNEL] perspective view images.
    input_rotations: [BATCH, 3, 3] rotations matrices from current camera frame
      to target camera frame.
    input_fov: [BATCH] a 1-D tensor (float32) of input field of view in degrees.
    output_fov: (float) output field of view in degrees.
    output_shape: a 2-D list of output dimension [height, width].

  Returns:
    reprojected images [BATCH, height, width, CHANNELS].
  """
    with tf.name_scope(
            None, 'rotate_image_in_3d',
        [images, input_rotations, input_fov, output_fov, output_shape]):
        if len(images.shape) != 4:
            raise ValueError("'images' has the wrong dimensions.")
        if input_rotations.shape[-2:] != [3, 3]:
            raise ValueError("'input_rotations' must have 3x3 dimensions.")

        shape = images.shape.as_list()
        batch, height, width = shape[0], shape[1], shape[2]
        cartesian = geometry.generate_cartesian_grid(output_shape, output_fov)
        cartesian = tf.tile(cartesian[tf.newaxis, :, :, :, tf.newaxis],
                            [batch, 1, 1, 1, 1])
        input_rotations = tf.tile(
            input_rotations[:, tf.newaxis, tf.newaxis, :],
            [1] + output_shape + [1, 1])
        cartesian = tf.squeeze(
            tf.matmul(input_rotations, cartesian, transpose_a=True), -1)
        image_coordinates = -cartesian[:, :, :, :2] / cartesian[:, :, :, -1:]
        x, y = tf.split(image_coordinates, [1, 1], -1)
        w = 2 * tf.tan(math_utils.degrees_to_radians(input_fov / 2))
        h = 2 * tf.tan(math_utils.degrees_to_radians(input_fov / 2))
        w = w[:, tf.newaxis, tf.newaxis, tf.newaxis]
        h = h[:, tf.newaxis, tf.newaxis, tf.newaxis]
        nx = x * width / w + width / 2 - 0.5
        ny = -y * height / h + height / 2 - 0.5
        return tfa.image.resampler(images, tf.concat([nx, ny], -1))
Пример #3
0
def generate_random_views(pano1_rgb,
                          pano2_rgb,
                          r1,
                          t1,
                          r2,
                          t2,
                          max_rotation=90.,
                          max_tilt=5.,
                          output_fov=90.,
                          output_height=512,
                          output_width=512,
                          pano1_depth=None,
                          pano2_depth=None):
    """Generate stereo image pairs by randomly sampling the panoramic images.

  We randomly sample camera lookat directions and project the panorama to
  perspective images. We also compute the overlaping area between the pair given
  the depth map if depthmaps are provided. The overlap is measured by the
  minimum of the ratio between the area seen by both cameras and the image size.
  This function returns a ViewPair object containing the perspective images,
  the masks that shows the common area seen by both cameras, the camera's field
  of view (FoV), the relative rotation from camera 2 to camera 1, and the
  relative translation direction in the frame of camera 1.


  Args:
    pano1_rgb: [HEIGHT, WIDTH, 3] the input RGB panoramic image.
    pano2_rgb: [HEIGHT, WIDTH, 3] the input RGB panoramic image.
    r1: [3, 3] the camera to world rotation of camera 1.
    t1: [3] the world location of camera 1.
    r2: [3, 3] the camera to world rotation of camera 2.
    t2: [3] the world location of camera 2.
    max_rotation: (float) maximum relative rotation between the output image
      pair in degrees.
    max_tilt: (float) maximum tilt angle of the up vector in degrees.
    output_fov: (float) output images' horizontal field of view in degrees.
    output_height: (int) the height dimension of the output perspective images.
    output_width: (int) the width dimension of the output perspective images.
    pano1_depth: [HEIGHT, WIDTH, 1] the panoramic depth map of pano1_rgb.
    pano2_depth: [HEIGHT, WIDTH, 1] the panoramic depth map of pano2_rgb.

  Returns:
    ViewPair
  """
    ViewPair = collections.namedtuple(
        'ViewPair', ['img1', 'img2', 'mask1', 'mask2', 'fov', 'r', 't'])

    swap_yz = tf.constant([[1., 0., 0.], [0., 0., 1.], [0., -1., 0.]],
                          shape=[1, 3, 3])
    lookat_direction1 = math_utils.random_vector_on_sphere(
        1, [[-math.sin(math.pi / 3),
             math.sin(math.pi / 3)], [0., 2 * math.pi]])
    lookat_direction1 = tf.squeeze(
        tf.matmul(swap_yz, tf.expand_dims(lookat_direction1, -1)), -1)

    lookat_direction2 = math_utils.uniform_sampled_vector_within_cone(
        lookat_direction1, math_utils.degrees_to_radians(max_rotation))
    lookat_directions = tf.concat([lookat_direction1, lookat_direction2], 0)
    up1 = math_utils.uniform_sampled_vector_within_cone(
        tf.constant([[0., 0., 1.]]), math_utils.degrees_to_radians(max_tilt))
    up2 = math_utils.uniform_sampled_vector_within_cone(
        tf.constant([[0., 0., 1.]]), math_utils.degrees_to_radians(max_tilt))
    lookat_rotations = math_utils.lookat_matrix(tf.concat([up1, up2], 0),
                                                lookat_directions)
    sample_rotations = tf.matmul(tf.concat([r1, r2], 0),
                                 lookat_rotations,
                                 transpose_a=True)

    sampled_views = transformation.rectilinear_projection(
        tf.stack([pano1_rgb, pano2_rgb], 0), [output_height, output_width],
        output_fov, sample_rotations)

    r_c2_to_c1 = tf.matmul(lookat_rotations[0],
                           lookat_rotations[1],
                           transpose_a=True)
    t_c1 = tf.squeeze(
        tf.matmul(lookat_rotations[0],
                  tf.expand_dims(tf.nn.l2_normalize(t2 - t1), -1),
                  transpose_a=True))

    if pano1_depth is not None and pano2_depth is not None:
        sampled_depth = transformation.rectilinear_projection(
            tf.stack([pano1_depth, pano2_depth], 0),
            [output_height, output_width], output_fov, sample_rotations)

        fx = output_width * 0.5 / math.tan(
            math_utils.degrees_to_radians(output_fov) / 2)
        intrinsics = tf.constant([[fx, 0., output_width * 0.5],
                                  [0., -fx, output_height * 0.5], [0., 0.,
                                                                   1.]])
        pose1_c2w = tf.concat([lookat_rotations[0], tf.expand_dims(t1, -1)], 1)
        pose2_c2w = tf.concat([lookat_rotations[1], tf.expand_dims(t2, -1)], 1)
        mask1, mask2 = overlap_mask(sampled_depth[0], pose1_c2w,
                                    sampled_depth[1], pose2_c2w, intrinsics)
    else:
        mask1 = None
        mask2 = None

    return ViewPair(sampled_views[0], sampled_views[1], mask1, mask2,
                    output_fov, r_c2_to_c1, t_c1)