Exemplo n.º 1
0
def homography_shift_mult(corner_shift1, w1, h1, corner_shift2, w2, h2, w, h):
    """Multiplies two homographies.

  Args:
    corner_shift1: a homography transformation parameterized as the displacement
      of four corner points. It is of data type float32 and of shape [8]
    w1: the width of the image where corner_shift1 is computed from
    h1: the height of the image where corner_shift1 is computed from
    corner_shift2: a homography transformation parameterized as the displacement
      of four corner points, with the same data type and shape as corner_shift1
    w2: the width of the image where corner_shift2 is computed from
    h2: the height of the image where corner_shift2 is computed from
    w: the width of the image where the output corner_shift is computed from
    h: the height of the image where the output corner_shift is computed from
  Returns:
    the product of the two homographies of the same shape and data type as
      corner_shift1
  """
    hmg1 = shifts_to_homography(w1,
                                h1,
                                corner_shift1,
                                is_forward=False,
                                is_matrix=True)
    mat_scale1 = tf.reshape(
        tf.stack([
            tf.to_float(w1) / tf.to_float(w), 0, 0, 0,
            tf.to_float(h1) / tf.to_float(h), 0, 0, 0, 1
        ]), [3, 3])
    mat1 = tf.matmul(tf.matrix_inverse(mat_scale1),
                     tf.matmul(hmg1, mat_scale1))

    hmg2 = shifts_to_homography(w2,
                                h2,
                                corner_shift2,
                                is_forward=False,
                                is_matrix=True)
    mat_scale2 = tf.reshape(
        tf.stack([
            tf.to_float(w2) / tf.to_float(w), 0, 0, 0,
            tf.to_float(h2) / tf.to_float(h), 0, 0, 0, 1
        ]), [3, 3])
    mat2 = tf.matmul(tf.matrix_inverse(mat_scale2),
                     tf.matmul(hmg2, mat_scale2))

    hmg = tf.matrix_inverse(tf.matmul(mat1, mat2))
    return homography_to_shifts(hmg, w, h, is_matrix=True)
Exemplo n.º 2
0
    def unprocess(self, image):
        with tf.name_scope(None, 'unprocess'):
            image.shape.assert_is_compatible_with([None, None, 3])

            # Randomly creates image metadata.
            rgb2cam = unprocess.random_ccm()
            rgb_gain, red_gain, blue_gain = unprocess.random_gains()

            rgb2cam = tf.cond(
                self.is_train,
                true_fn=lambda: rgb2cam,
                false_fn=lambda: tf.where(tf.math.is_nan(self._ccm), rgb2cam,
                                          self._ccm))
            rgb_gain = tf.cond(
                self.is_train,
                true_fn=lambda: rgb_gain,
                false_fn=lambda: tf.where(tf.math.is_nan(self._rgb_gain),
                                          rgb_gain, self._rgb_gain))
            red_gain = tf.cond(
                self.is_train,
                true_fn=lambda: red_gain,
                false_fn=lambda: tf.where(tf.math.is_nan(self._red_gain),
                                          red_gain, self._red_gain))
            blue_gain = tf.cond(
                self.is_train,
                true_fn=lambda: blue_gain,
                false_fn=lambda: tf.where(tf.math.is_nan(self._blue_gain),
                                          blue_gain, self._blue_gain))

            cam2rgb = tf.matrix_inverse(rgb2cam)

            if self.simple_unprocessing:
                # Inverts gamma compression.
                image = unprocess.gamma_expansion(image)
                # Inverts color correction.
                image = unprocess.apply_ccm(image, rgb2cam)
            else:
                # Approximately inverts global tone mapping.
                image = unprocess.inverse_smoothstep(image)
                # Inverts gamma compression.
                image = unprocess.gamma_expansion(image)
                # Inverts color correction.
                image = unprocess.apply_ccm(image, rgb2cam)
                # Approximately inverts white balance and brightening.
                image = unprocess.safe_invert_gains(image, rgb_gain, red_gain,
                                                    blue_gain)
            # Clips saturated pixels.
            image = tf.clip_by_value(image, 0.0, 1.0)
            # Applies a Bayer mosaic.
            bayer_image = unprocess.mosaic(image)

            metadata = {
                'cam2rgb': cam2rgb,
                'rgb_gain': rgb_gain,
                'red_gain': red_gain,
                'blue_gain': blue_gain,
            }
            return image, bayer_image, metadata
Exemplo n.º 3
0
 def get_constraint(nll, params):
     hessian = [
         tf.gradients(g, params)
         for g in tf.unstack(tf.gradients(nll, params))
     ]
     inverse = tf.matrix_inverse(hessian)
     covariance_poi = inverse[0][0]
     constraint = tf.sqrt(covariance_poi)
     return constraint
Exemplo n.º 4
0
 def get_metrics(links, idx):
     v_rot_idx = tf.gather(links.v_rot, idx)
     # all possible seg pairs
     V = v_rot_idx[newaxis] + v_rot_idx[:, newaxis]
     eps = 1e-9
     eye_eps = -eps * tf.eye(
         links.points0.shape[-1], batch_shape=(1, 1), dtype=tf.float64)
     ginv = tf.to_double(tf.matmul(V, V, transpose_b=True)) + eye_eps
     gij = tf.to_float(tf.matrix_inverse(ginv))
     return gij, ginv
Exemplo n.º 5
0
def subpixel_homography(image, height, width, dy1, dx1, dy2, dx2, dy3, dx3,
                        dy4, dx4):
    """Applies a homography to an image.

  Args:
    image: input image of shape [input_height, input_width, channels] and of
      data type uint8 or float32
    height: the output image height
    width: the output image width
    dy1: the vertical shift of the top left corner
    dx1: the horizontal shift of the top left corner
    dy2: the vertical shift of the bottom left corner
    dx2: the horizontal shift of the bottom left corner
    dy3: the vertical shift of the top right corner
    dx3: the horizontal shift of the top right corner
    dy4: the vertical shift of the bottom right corner
    dx4: the horizontal shift of the bottom right corner
  Returns:
    the warping result of shape [height, width, channels] with the same data
    type as image
  """
    rx1 = tf.cast(tf.stack([0, 0, 1, 0, 0, 0, 0, 0]), tf.float32)
    ry1 = tf.cast(tf.stack([0, 0, 0, 0, 0, 1, 0, 0]), tf.float32)
    rx2 = tf.cast(
        tf.stack([0, height - 1, 1, 0, 0, 0, 0, -(height - 1) * dx2]),
        tf.float32)
    ry2 = tf.cast(
        tf.stack([0, 0, 0, 0, height - 1, 1, 0, -(height - 1) * dy2]),
        tf.float32)
    rx3 = tf.cast(tf.stack([width - 1, 0, 1, 0, 0, 0, -(width - 1) * dx3, 0]),
                  tf.float32)
    ry3 = tf.cast(tf.stack([0, 0, 0, width - 1, 0, 1, -(width - 1) * dy3, 0]),
                  tf.float32)
    rx4 = tf.cast(
        tf.stack([
            width - 1, height - 1, 1, 0, 0, 0, -(width - 1) * dx4,
            -(height - 1) * dx4
        ]), tf.float32)
    ry4 = tf.cast(
        tf.stack([
            0, 0, 0, width - 1, height - 1, 1, -(width - 1) * dy4,
            -(height - 1) * dy4
        ]), tf.float32)
    mat = tf.stack([rx1, ry1, rx2, ry2, rx3, ry3, rx4, ry4])
    b = tf.reshape(
        tf.cast(tf.stack([dx1, dy1, dx2, dy2, dx3, dy3, dx4, dy4]),
                tf.float32), [8, 1])
    inv_mat = tf.matrix_inverse(mat)
    transformation = tf.reshape(tf.matmul(inv_mat, b), [8])
    warped = contrib_image.transform(image, transformation, 'bilinear')
    cropped = tf.image.crop_to_bounding_box(warped, 0, 0, height, width)
    return cropped
Exemplo n.º 6
0
def image_overlap(depth1, pose1_c2w, depth2, pose2_c2w, intrinsics):
    """Determines the overlap of two images."""

    pose1_w2c = tf.matrix_inverse(
        tf.concat([pose1_c2w, tf.constant([[0., 0., 0., 1.]])], 0))[:3]
    pose2_w2c = tf.matrix_inverse(
        tf.concat([pose2_c2w, tf.constant([[0., 0., 0., 1.]])], 0))[:3]

    p_world1 = camera_to_world_projection(depth1, intrinsics, pose1_c2w)
    p_image1_in_2, z1_c2 = world_to_camera_projection(p_world1, intrinsics,
                                                      pose2_w2c)

    p_world2 = camera_to_world_projection(depth2, intrinsics, pose2_c2w)
    p_image2_in_1, z2_c1 = world_to_camera_projection(p_world2, intrinsics,
                                                      pose1_w2c)

    shape = depth1.shape.as_list()
    height, width = shape[0], shape[1]
    height = tf.cast(height, tf.float32)
    width = tf.cast(width, tf.float32)
    mask_h2_in_1 = tf.logical_and(
        tf.less_equal(p_image2_in_1[:, :, 1], height),
        tf.greater_equal(p_image2_in_1[:, :, 1], 0.))
    mask_w2_in_1 = tf.logical_and(tf.less_equal(p_image2_in_1[:, :, 0], width),
                                  tf.greater_equal(p_image2_in_1[:, :, 0], 0.))
    mask2_in_1 = tf.logical_and(tf.logical_and(mask_h2_in_1, mask_w2_in_1),
                                z2_c1 > 0)

    mask_h1_in_2 = tf.logical_and(
        tf.less_equal(p_image1_in_2[:, :, 1], height),
        tf.greater_equal(p_image1_in_2[:, :, 1], 0.))
    mask_w1_in_2 = tf.logical_and(tf.less_equal(p_image1_in_2[:, :, 0], width),
                                  tf.greater_equal(p_image1_in_2[:, :, 0], 0.))
    mask1_in_2 = tf.logical_and(tf.logical_and(mask_h1_in_2, mask_w1_in_2),
                                z1_c2 > 0)

    return mask1_in_2, mask2_in_1
Exemplo n.º 7
0
def orthonorm_op(x, epsilon=1e-7):
    '''
    Computes a matrix that orthogonalizes the input matrix x

    x:      an n x d input matrix
    eps:    epsilon to prevent nonzero values in the diagonal entries of x

    returns:    a d x d matrix, ortho_weights, which orthogonalizes x by
                right multiplication
    '''
    x_2 = K.dot(K.transpose(x), x)
    x_2 += K.eye(K.int_shape(x)[1]) * epsilon
    L = tf.cholesky(x_2)
    ortho_weights = tf.transpose(tf.matrix_inverse(L)) * tf.sqrt(
        tf.cast(tf.shape(x)[0], dtype=K.floatx()))
    return ortho_weights
Exemplo n.º 8
0
def _forward(length, angle_x, angle_y, T):
    """ Given a articulations it calculates the update to the coord matrix and the location of the end point in global coords. """
    # update current transformation from local -> new local
    T_this = tf.matmul(
        _get_trans_mat_hom(-length),
        tf.matmul(_get_rot_mat_x_hom(-angle_x), _get_rot_mat_y_hom(-angle_y)))

    # trafo from global -> new local
    T = tf.matmul(T_this, T)

    # calculate global location of this point
    # x0 = tf.constant([[0.0], [0.0], [0.0], [1.0]])
    s = length.get_shape().as_list()
    x0 = _to_hom(tf.zeros((s[0], 3, 1)))
    x = tf.matmul(tf.matrix_inverse(T), x0)
    return x, T
Exemplo n.º 9
0
    def format_network_input(self, ref_image, psv_src_images, ref_pose,
                             psv_src_poses, planes, intrinsics):
        """Format the network input.

    Args:
      ref_image: reference source image [batch, height, width, 3]
      psv_src_images: stack of source images (excluding the ref image)
                      [batch, height, width, 3*(num_source -1)]
      ref_pose: reference world-to-camera pose (where PSV is constructed)
                [batch, 4, 4]
      psv_src_poses: input poses (world to camera) [batch, num_source-1, 4, 4]
      planes: list of scalar depth values for each plane
      intrinsics: camera intrinsics [batch, 3, 3]
    Returns:
      net_input: [batch, height, width, #planes, num_source*3]
    """
        _, num_psv_source, _, _ = psv_src_poses.get_shape().as_list()
        num_planes = tf.shape(planes)[0]

        net_input = []
        for i in range(num_psv_source):
            curr_pose = tf.matmul(psv_src_poses[:, i],
                                  tf.matrix_inverse(ref_pose))
            curr_image = psv_src_images[:, :, :, i * 3:(i + 1) * 3]
            curr_psv = pj.plane_sweep(curr_image, planes, curr_pose,
                                      intrinsics)
            net_input.append(curr_psv)

        net_input = tf.concat(net_input, axis=4)
        ref_img_stack = tf.tile(tf.expand_dims(ref_image, 3),
                                [1, 1, 1, num_planes, 1])
        net_input = tf.concat([net_input, ref_img_stack], axis=4)

        # Append normalized plane indices
        normalized_disp_inds = tf.reshape(tf.linspace(0.0, 1.0, num_planes),
                                          [1, 1, 1, num_planes, 1])
        sh = tf.shape(net_input)
        normalized_disp_inds_stack = tf.tile(normalized_disp_inds,
                                             [1, sh[1], sh[2], 1, 1])
        net_input = tf.concat([net_input, normalized_disp_inds_stack], axis=4)

        return net_input
Exemplo n.º 10
0
def inv_homography_dmat(k_t, rot, t, n_hat, a):
  """Computes M where M*(u,v,1) = d_t.

  Args:
      k_t: intrinsics for target cameras, are [...] X 3 X 3 matrices
      rot: relative rotation, are [...] X 3 X 3 matrices
      t: [...] X 3 X 1, translations from source to target camera
      n_hat: [...] X 1 X 3, plane normal w.r.t source camera frame
      a: [...] X 1 X 1, plane equation displacement
  Returns:
      d_mat: [...] X 1 X 3 matrices
  """
  with tf.name_scope('inv_homography'):
    rot_t = _transpose(rot)
    k_t_inv = tf.matrix_inverse(k_t, name='k_t_inv')

    denom = a - tf.matmul(tf.matmul(n_hat, rot_t), t)
    d_mat = divide_safe(
        -1 * tf.matmul(tf.matmul(n_hat, rot_t), k_t_inv), denom, name='dmat')
    return d_mat
Exemplo n.º 11
0
def pixel2cam(depth, pixel_coords, intrinsics, is_homogeneous=True):
    """Transforms coordinates in the pixel frame to the camera frame.

  Args:
    depth: [batch, height, width]
    pixel_coords: homogeneous pixel coordinates [batch, 3, height, width]
    intrinsics: camera intrinsics [batch, 3, 3]
    is_homogeneous: return in homogeneous coordinates
  Returns:
    Coords in the camera frame [batch, 3 (4 if homogeneous), height, width]
  """
    batch, height, width = depth.get_shape().as_list()
    depth = tf.reshape(depth, [batch, 1, -1])
    pixel_coords = tf.reshape(pixel_coords, [batch, 3, -1])
    cam_coords = tf.matmul(tf.matrix_inverse(intrinsics), pixel_coords) * depth
    if is_homogeneous:
        ones = tf.ones([batch, 1, height * width])
        cam_coords = tf.concat([cam_coords, ones], axis=1)
    cam_coords = tf.reshape(cam_coords, [batch, -1, height, width])
    return cam_coords
Exemplo n.º 12
0
    def latent_loss(self, prior):
        """
        Analytic expression for latent loss which can be used when posterior and prior are
        Gaussian

        https://en.wikipedia.org/wiki/Multivariate_normal_distribution#Kullback%E2%80%93Leibler_divergence

        :param prior: Vertexwise Prior instance which defines the ``mean`` and ``cov`` vertices
                      attributes
        """
        prior_cov_inv = tf.matrix_inverse(prior.cov)
        mean_diff = tf.subtract(self.mean, prior.mean)

        term1 = tf.trace(tf.matmul(prior_cov_inv, self.cov))
        term2 = tf.matmul(tf.reshape(mean_diff, (self.nvertices, 1, -1)), prior_cov_inv)
        term3 = tf.reshape(tf.matmul(term2, tf.reshape(mean_diff, (self.nvertices, -1, 1))), [self.nvertices])
        term4 = prior.log_det_cov()
        term5 = self.log_det_cov()

        return self.log_tf(tf.identity(0.5*(term1 + term3 - self.nparams + term4 - term5), name="%s_latent_loss" % self.name))
Exemplo n.º 13
0
def calc_homography_from_points(src_points, dst_points, is_matrix=True):
    """Computes a homography from four pairs of corresponding points.

  Args:
    src_points: source points of shape [4, 2] and of data type float32 or int32
    dst_points: target points of shape [4, 2] and of data type float32 or int32
    is_matrix: whether represent the final homography using matrix or vector
  Returns:
    the output homography of data type float32. If is_matrix is True, it is of
      shape [3, 3]; otherwise [8]
  """
    mat_elements = []
    r_vec_elements = []
    for i in range(0, 4):
        rx = tf.to_float(
            tf.stack([
                src_points[i, 0], src_points[i, 1], 1, 0, 0, 0,
                -dst_points[i, 0] * src_points[i, 0],
                -dst_points[i, 0] * src_points[i, 1]
            ]))
        ry = tf.to_float(
            tf.stack([
                0, 0, 0, src_points[i, 0], src_points[i, 1], 1,
                -dst_points[i, 1] * src_points[i, 0],
                -dst_points[i, 1] * src_points[i, 1]
            ]))
        mat_elements.append(rx)
        mat_elements.append(ry)
        r_vec_elements.append(dst_points[i, 0])
        r_vec_elements.append(dst_points[i, 1])
    mat = tf.stack(mat_elements)
    r_vec = tf.reshape(tf.to_float(tf.stack(r_vec_elements)), [8, 1])
    inv_mat = tf.matrix_inverse(mat)
    transform = tf.reshape(tf.matmul(inv_mat, r_vec), [8])
    if is_matrix:
        hmg = tf.reshape(tf.concat([transform, [1.0]], 0), [3, 3])
    else:
        hmg = transform
    return hmg
Exemplo n.º 14
0
def camera_to_world_projection(depth, intrinsics, camera_to_world):
    """Project camera coordinates to world coordinates."""
    # p_pixel: batch, w, h, 3 principal_point, fov 2-d list
    # r: batch, 3, 3 camera to world rotation
    # t: batch, 3 camera to world translation, depth: batch, w, h, 1
    shape = depth.shape.as_list()
    height, width = shape[0], shape[1]
    xx, yy = tf.meshgrid(tf.lin_space(0., width - 1., width),
                         tf.lin_space(0., height - 1., height))
    p_pixel = tf.stack([xx, yy], axis=-1)
    p_pixel_homogeneous = tf.concat([p_pixel, tf.ones([height, width, 1])], -1)

    camera_to_world = tf.tile(camera_to_world[tf.newaxis, tf.newaxis, :],
                              [height, width, 1, 1])
    intrinsics = tf.tile(intrinsics[tf.newaxis, tf.newaxis, :],
                         [height, width, 1, 1])
    # Convert pixels coordinates (u, v, 1) to camera coordinates (x_c, y_c, f)
    # on the image plane.
    p_image = tf.squeeze(
        tf.matmul(tf.matrix_inverse(intrinsics),
                  tf.expand_dims(p_pixel_homogeneous, -1)), -1)

    lookat_axis = tf.tile(tf.constant([0., 0., 1.], shape=[1, 1, 3]),
                          [height, width, 1])
    z = depth * tf.reduce_sum(
        tf.math.l2_normalize(p_image, axis=-1) * lookat_axis,
        axis=-1,
        keepdims=True)
    p_camera = z * p_image
    # convert from OpenCV convention to OpenGL
    p_camera = p_camera * tf.constant([1., 1., -1.], shape=[1, 1, 3])
    p_camera_homogeneous = tf.concat(
        [p_camera, tf.ones(shape=[height, width, 1])], -1)
    # Convert camera coordinates to world coordinates.
    p_world = tf.squeeze(
        tf.matmul(camera_to_world, tf.expand_dims(p_camera_homogeneous, -1)),
        -1)
    return p_world
Exemplo n.º 15
0
def image_to_world_projection(depth, intrinsics, pose_c2w):
    """Project points on the image to the world frame.

  Args:
    depth: [HEIGHT, WIDTH, 1] the depth map contains the radial distance from
      the camera eye to each point corresponding to each pixel.
    intrinsics: [3, 3] camera's intrinsic matrix.
    pose_c2w: [3, 4] camera pose matrix (camera to world).

  Returns:
    [HEIGHT, WIDTH, 3] points in the world's coordinate frame.
  """
    shape = depth.shape.as_list()
    height, width = shape[0], shape[1]
    xx, yy = tf.meshgrid(tf.lin_space(0., width - 1., width),
                         tf.lin_space(0., height - 1., height))
    p_pixel_homogeneous = tf.concat(
        [tf.stack([xx, yy], axis=-1),
         tf.ones([height, width, 1])], -1)

    p_image = tf.squeeze(
        tf.matmul(tf.matrix_inverse(intrinsics[tf.newaxis, tf.newaxis, :]),
                  tf.expand_dims(p_pixel_homogeneous, -1)), -1)

    z = depth * tf.reduce_sum(
        tf.math.l2_normalize(p_image, axis=-1) * tf.constant([[[0., 0., 1.]]]),
        axis=-1,
        keepdims=True)
    p_camera = z * p_image
    # convert to OpenGL coordinate system.
    p_camera = p_camera * tf.constant([1., 1., -1.], shape=[1, 1, 3])
    p_camera_homogeneous = tf.concat(
        [p_camera, tf.ones(shape=[height, width, 1])], -1)
    # Convert camera coordinates to world coordinates.
    p_world = tf.squeeze(
        tf.matmul(pose_c2w[tf.newaxis, tf.newaxis, :],
                  tf.expand_dims(p_camera_homogeneous, -1)), -1)
    return p_world
Exemplo n.º 16
0
def inv_homography(k_s, k_t, rot, t, n_hat, a):
  """Computes inverse homography matrix.

  Args:
      k_s: intrinsics for source cameras, are [...] X 3 X 3 matrices
      k_t: intrinsics for target cameras, are [...] X 3 X 3 matrices
      rot: relative rotation, are [...] X 3 X 3 matrices
      t: [...] X 3 X 1, translations from source to target camera
      n_hat: [...] X 1 X 3, plane normal w.r.t source camera frame
      a: [...] X 1 X 1, plane equation displacement
  Returns:
      homography: [...] X 3 X 3 inverse homography matrices
  """
  with tf.name_scope('inv_homography'):
    rot_t = _transpose(rot)
    k_t_inv = tf.matrix_inverse(k_t, name='k_t_inv')

    denom = a - tf.matmul(tf.matmul(n_hat, rot_t), t)
    numerator = tf.matmul(tf.matmul(tf.matmul(rot_t, t), n_hat), rot_t)
    inv_hom = tf.matmul(
        tf.matmul(k_s, rot_t + divide_safe(numerator, denom)),
        k_t_inv, name='inv_hom')
    return inv_hom
Exemplo n.º 17
0
def tf_rotation_resampling(voxel_array, transformation_matrix, params, Scale_matrix = None, size=64, new_size=128):
    """
    Batch transformation and resampling function
    :param voxel_array: batch of voxels. Shape = [batch_size, height, width, depth, features]
    :param transformation_matrix: Rotation matrix. Shape = [batch_size, height, width, depth, features]
    :param size: original size of the voxel array
    :param new_size: size of the resampled array
    :return: transformed voxel array
    """
    batch_size = tf.shape(voxel_array)[0]
    n_channels = voxel_array.get_shape()[4].value
    target = tf.zeros([ batch_size, new_size, new_size, new_size])
    #Aligning the centroid of the object (voxel grid) to origin for rotation,
    #then move the centroid back to the original position of the grid centroid
    T = tf.constant([[1,0,0, -size * 0.5],
                  [0,1,0, -size * 0.5],
                  [0,0,1, -size * 0.5],
                  [0,0,0,1]])
    # add one more dimension to T and then tile
    T = tf.tile(tf.reshape(T, (1, 4, 4)), [batch_size, 1, 1])

    # However, since the rotated grid might be out of bound for the original grid size,
    # move the rotated grid to a new bigger grid
    T_new_inv = tf.constant([[1, 0, 0, new_size * 0.5],
                             [0, 1, 0, new_size * 0.5],
                             [0, 0, 1, new_size * 0.5],
                             [0, 0, 0, 1]])
    T_new_inv = tf.tile(tf.reshape(T_new_inv, (1, 4, 4)), [batch_size, 1, 1])


    # Add the actual shifting in x and y dimension accoding to input param
    x_shift = tf.reshape(params[:, 3], (batch_size, 1, 1))
    y_shift = tf.reshape(params[:, 4], (batch_size, 1, 1))
    z_shift = tf.reshape(params[:, 5], (batch_size, 1, 1))
    # ========================================================
    # Because tensorflow does not allow tensor item replacement
    # A new matrix needs to be created from scratch by concatenating different vectors into rows and stacking them up
    ones = tf.ones_like(x_shift)
    zeros = tf.zeros_like(x_shift)

    T_translate = tf.concat([
        tf.concat([ones, zeros, zeros, x_shift], axis=2),
        tf.concat([zeros, ones, zeros, y_shift], axis=2),
        tf.concat([zeros, zeros, ones, z_shift], axis=2),
        tf.concat([zeros, zeros, zeros, ones], axis=2)], axis=1)
    total_M = tf.matmul(tf.matmul(tf.matmul(tf.matmul(T_new_inv, T_translate), Scale_matrix), transformation_matrix), T)


    try:
        total_M = tf.matrix_inverse(total_M)

        total_M = total_M[:, 0:3, :] #Ignore the homogenous coordinate so the results are 3D vectors. shape: (batch * 3 * 4)
        grid = tf_voxel_meshgrid(new_size, new_size, new_size, homogeneous=True)
        # here you created new_size^3 grid, but the T matrix just translate this by size * 0.5, this will not align the grid to origin point
        # shape: (4 * new_size^3), here 4 is 3 + homogeneous=True
        grid = tf.tile(tf.reshape(grid, (1, tf.to_int32(grid.get_shape()[0]), tf.to_int32(grid.get_shape()[1]))), [batch_size, 1, 1])
        grid_transform = tf.matmul(total_M, grid)  # (batch * 3 * 4) matmul (batch * 4 * new_size^3) is (3 * 4) matmul (4 * new_size^3) along batch
        x_s_flat = tf.reshape(grid_transform[:, 0, :], [-1])
        y_s_flat = tf.reshape(grid_transform[:, 1, :], [-1])
        z_s_flat = tf.reshape(grid_transform[:, 2, :], [-1])
        input_transformed = tf_interpolate(voxel_array, x_s_flat, y_s_flat, z_s_flat, [batch_size, new_size, new_size, new_size, n_channels])
        target = tf.reshape(input_transformed, [batch_size, new_size, new_size, new_size, n_channels])

        return target, grid_transform
    except tf.InvalidArgumentError:
        return None
def multihead_invertible_1x1_conv_np(name, x, x_mask, multihead_split, inverse,
                                     dtype):
    """Multi-head 1X1 convolution on x."""
    batch_size, length, n_channels_all = common_layers.shape_list(x)
    assert n_channels_all % 32 == 0
    n_channels = 32
    n_1x1_heads = n_channels_all // n_channels

    def get_init_np():
        """Initializer function for multihead 1x1 parameters using numpy."""
        results = []
        for _ in range(n_1x1_heads):
            random_matrix = np.random.rand(n_channels, n_channels)
            np_w = scipy.linalg.qr(random_matrix)[0].astype("float32")
            np_p, np_l, np_u = scipy.linalg.lu(np_w)
            np_s = np.diag(np_u)
            np_sign_s = np.sign(np_s)[np.newaxis, :]
            np_log_s = np.log(np.abs(np_s))[np.newaxis, :]
            np_u = np.triu(np_u, k=1)
            results.append(
                np.concatenate([np_p, np_l, np_u, np_sign_s, np_log_s],
                               axis=0))
        return tf.convert_to_tensor(np.stack(results, axis=0))

    def get_mask_init():
        ones = tf.ones([n_1x1_heads, n_channels, n_channels], dtype=dtype)
        l_mask = tf.matrix_band_part(ones, -1, 0) - tf.matrix_band_part(
            ones, 0, 0)
        u_mask = tf.matrix_band_part(ones, 0, -1) - tf.matrix_band_part(
            ones, 0, 0)
        return tf.stack([l_mask, u_mask], axis=0)

    with tf.variable_scope(name, reuse=tf.AUTO_REUSE):
        params = tf.get_variable("params",
                                 initializer=get_init_np,
                                 dtype=dtype)
        mask_params = tf.get_variable("mask_params",
                                      initializer=get_mask_init,
                                      dtype=dtype,
                                      trainable=False)

        p = tf.stop_gradient(params[:, :n_channels, :])
        l = params[:, n_channels:2 * n_channels, :]
        u = params[:, 2 * n_channels:3 * n_channels, :]
        sign_s = tf.stop_gradient(params[:, 3 * n_channels, :])
        log_s = params[:, 3 * n_channels + 1, :]

        l_mask = mask_params[0]
        u_mask = mask_params[1]

        l_diag = l * l_mask + (tf.eye(
            n_channels, n_channels, [n_1x1_heads], dtype=dtype))
        u_diag = u * u_mask + (tf.matrix_diag(sign_s * tf.exp(log_s)))
        w = tf.matmul(p, tf.matmul(l_diag, u_diag))

        if multihead_split == "a":
            x = tf.reshape(x, [batch_size, length, n_channels, n_1x1_heads])
            x = tf.transpose(x, [3, 0, 1, 2])
        elif multihead_split == "c":
            x = tf.reshape(x, [batch_size, length, n_1x1_heads, n_channels])
            x = tf.transpose(x, [2, 0, 1, 3])
        else:
            raise ValueError("Multihead split not supported.")
        # [n_1x1_heads, batch_size, length, n_channels]

        if not inverse:
            # [n_1x1_heads, 1, n_channels, n_channels]
            x = tf.matmul(x, w[:, tf.newaxis, :, :])
        else:
            w_inv = tf.matrix_inverse(w)
            x = tf.matmul(x, w_inv[:, tf.newaxis, :, :])

        if multihead_split == "a":
            x = tf.transpose(x, [1, 2, 3, 0])
            x = tf.reshape(x, [batch_size, length, n_channels * n_1x1_heads])
        elif multihead_split == "c":
            x = tf.transpose(x, [1, 2, 0, 3])
            x = tf.reshape(x, [batch_size, length, n_1x1_heads * n_channels])
        else:
            raise ValueError("Multihead split not supported.")

        x_length = tf.reduce_sum(x_mask, -1)
        logabsdet = x_length * tf.reduce_sum(log_s)
        if inverse:
            logabsdet *= -1
    return x, logabsdet
Exemplo n.º 19
0
"""
dataset_without_bias = pd.read_csv(FILE_PATH, header=None)
num_examples = dataset_without_bias.shape[0]
dataset = np.c_[np.ones((num_examples, 1)), dataset_without_bias]
# The two dataset without different labels
data_label_0, data_label_1 = split_data(dataset)

# Define the formulas, proved that this can drastically drag the performance
# m is the number of attributes
m = dataset.shape[1] - 1  # don't count the label itself

X = tf.placeholder(tf.float64, shape=(None, m), name='X')
t = tf.placeholder(tf.float64, shape=(None, 1), name='t')
n = tf.placeholder(tf.float64, name='n')
XT = tf.transpose(X)
w = tf.matmul(tf.matmul(tf.matrix_inverse(tf.matmul(XT, X)), XT), t)

y = tf.matmul(X, w)

MSE = tf.div(tf.matmul(tf.transpose(y - t), y - t), n)

w_star = tf.placeholder(tf.float64, shape=(m, 1), name="w_star")
y_test = tf.matmul(X, w_star)
y_test_predicted = tf.round(y_test)

MSE_test = tf.abs(y_test_predicted - t)

for sample_size in range(40, 201, 40):
    accuracy_rate = []
    for exp in range(NUM_EXP):
        training_attr, training_label, test_attr, test_label = generate_training_and_test_dataset(
Exemplo n.º 20
0
    def get(self):
        """ Provides input data to the graph. """
        # calculate size of each record (this lists what is contained in the db and how many bytes are occupied)
        record_bytes = 0

        encoding_bytes = 4
        kp_xyz_entries = 3 * self.num_kp
        record_bytes += encoding_bytes*kp_xyz_entries

        encoding_bytes = 4
        kp_uv_entries = 2 * self.num_kp
        record_bytes += encoding_bytes*kp_uv_entries

        kp_vis_entries = self.num_kp
        record_bytes += encoding_bytes*kp_vis_entries

        image_bytes = self.image_size[0] * self.image_size[1] * 3
        record_bytes += image_bytes

        """ READ DATA ITEMS"""
        # Start reader
        reader = tf.FixedLengthRecordReader(header_bytes=0, record_bytes=record_bytes)
        _, value = reader.read(tf.train.string_input_producer([self.path_to_db]))

        # decode to floats
        bytes_read = 0
        data_dict = dict()
        record_bytes_float32 = tf.decode_raw(value, tf.float32)

        # 1. Read keypoint xyz
        keypoint_xyz21 = tf.reshape(tf.slice(record_bytes_float32, [bytes_read//4], [kp_xyz_entries]), [self.num_kp, 3])
        bytes_read += encoding_bytes*kp_xyz_entries
        keypoint_xyz21 /= 1000.0  # scale to meters
        keypoint_xyz21 = self.convert_kp(keypoint_xyz21)

        # calculate wrist coord
        if self.use_wrist_coord:
            wrist_xyz = keypoint_xyz21[16, :] + 2.0*(keypoint_xyz21[0, :] - keypoint_xyz21[16, :])
            keypoint_xyz21 = tf.concat([tf.expand_dims(wrist_xyz, 0),
                                        keypoint_xyz21[1:, :]], 0)

        data_dict['keypoint_xyz21'] = keypoint_xyz21

        # 2. Read keypoint uv AND VIS
        keypoint_uv_vis21 = tf.reshape(tf.slice(record_bytes_float32, [bytes_read//4], [kp_uv_entries+kp_vis_entries]), [self.num_kp, 3])
        bytes_read += encoding_bytes*(kp_uv_entries+kp_vis_entries)
        keypoint_uv_vis21 = self.convert_kp(keypoint_uv_vis21)
        keypoint_uv21 = keypoint_uv_vis21[:, :2]
        keypoint_vis21 = tf.equal(keypoint_uv_vis21[:, 2], 1.0)

        # calculate wrist vis
        if self.use_wrist_coord:
            wrist_vis = tf.logical_or(keypoint_vis21[16], keypoint_vis21[0])
            keypoint_vis21 = tf.concat([tf.expand_dims(wrist_vis, 0),
                                        keypoint_vis21[1:]], 0)

            wrist_uv = keypoint_uv21[16, :] + 2.0*(keypoint_uv21[0, :] - keypoint_uv21[16, :])
            keypoint_uv21 = tf.concat([tf.expand_dims(wrist_uv, 0),
                                       keypoint_uv21[1:, :]], 0)

        data_dict['keypoint_vis21'] = keypoint_vis21

        if self.coord_uv_noise:
            noise = tf.truncated_normal([42, 2], mean=0.0, stddev=self.coord_uv_noise_sigma)
            keypoint_uv21 += noise

        data_dict['keypoint_uv21'] = keypoint_uv21

        # decode to uint8
        record_bytes_uint8 = tf.decode_raw(value, tf.uint8)

        # 4. Read image
        image = tf.reshape(tf.slice(record_bytes_uint8, [bytes_read], [image_bytes]),
                               [self.image_size[0], self.image_size[1], 3])
        image = tf.cast(image, tf.float32)
        bytes_read += image_bytes

        # subtract mean
        image = image / 255.0 - 0.5
        if self.hue_aug:
            image = tf.image.random_hue(image, self.hue_aug_max)
        data_dict['image'] = image

        """ CONSTANTS """
        # Camera intrinsics
        sx = 822.79041
        sy = 822.79041
        tx = 318.47345
        ty = 250.31296
        data_dict['cam_mat'] = tf.constant([[sx, 0.0, tx], [0.0, sy, ty], [0.0, 0.0, 1.0]])

        # Hand side: this dataset only contains left hands
        data_dict['hand_side'] = tf.one_hot(tf.constant(0, dtype=tf.int32), depth=2, on_value=1.0, off_value=0.0, dtype=tf.float32)

        assert bytes_read == record_bytes, "Doesnt add up."

        """ DEPENDENT DATA ITEMS: XYZ represenations. """
        # make coords relative to root joint
        kp_coord_xyz_root = keypoint_xyz21[0, :] # this is the palm coord
        kp_coord_xyz21_rel = keypoint_xyz21 - kp_coord_xyz_root  # relative coords in metric coords
        index_root_bone_length = tf.sqrt(tf.reduce_sum(tf.square(kp_coord_xyz21_rel[12, :] - kp_coord_xyz21_rel[11, :])))
        data_dict['keypoint_scale'] = index_root_bone_length
        data_dict['keypoint_xyz21_normed'] = kp_coord_xyz21_rel / index_root_bone_length  # normalized by length of 12->11

        # calculate local coordinates
        kp_coord_xyz21_local = bone_rel_trafo(data_dict['keypoint_xyz21_normed'])
        kp_coord_xyz21_local = tf.squeeze(kp_coord_xyz21_local)
        data_dict['keypoint_xyz21_local'] = kp_coord_xyz21_local

        # calculate viewpoint and coords in canonical coordinates
        kp_coord_xyz21_rel_can, rot_mat = canonical_trafo(data_dict['keypoint_xyz21_normed'])
        kp_coord_xyz21_rel_can, rot_mat = tf.squeeze(kp_coord_xyz21_rel_can), tf.squeeze(rot_mat)
        data_dict['keypoint_xyz21_can'] = kp_coord_xyz21_rel_can
        data_dict['rot_mat'] = tf.matrix_inverse(rot_mat)

        """ DEPENDENT DATA ITEMS: HAND CROP """
        if self.hand_crop:
            crop_center = keypoint_uv21[12, ::-1]

            # catch problem, when no valid kp available (happens almost never)
            crop_center = tf.cond(tf.reduce_all(tf.is_finite(crop_center)), lambda: crop_center,
                                  lambda: tf.constant([0.0, 0.0]))
            crop_center.set_shape([2, ])

            if self.crop_center_noise:
                noise = tf.truncated_normal([2], mean=0.0, stddev=self.crop_center_noise_sigma)
                crop_center += noise

            crop_scale_noise = tf.constant(1.0)
            if self.crop_scale_noise:
                    crop_scale_noise = tf.squeeze(tf.random_uniform([1], minval=1.0, maxval=1.2))

            if not self.use_wrist_coord:
                wrist_uv = keypoint_uv21[16, :] + 2.0*(keypoint_uv21[0, :] - keypoint_uv21[16, :])
                keypoint_uv21 = tf.concat([tf.expand_dims(wrist_uv, 0),
                                           keypoint_uv21[1:, :]], 0)

            # select visible coords only
            kp_coord_h = tf.boolean_mask(keypoint_uv21[:, 1], keypoint_vis21)
            kp_coord_w = tf.boolean_mask(keypoint_uv21[:, 0], keypoint_vis21)
            kp_coord_hw = tf.stack([kp_coord_h, kp_coord_w], 1)

            # determine size of crop (measure spatial extend of hw coords first)
            min_coord = tf.maximum(tf.reduce_min(kp_coord_hw, 0), 0.0)
            max_coord = tf.minimum(tf.reduce_max(kp_coord_hw, 0), self.image_size)

            # find out larger distance wrt the center of crop
            crop_size_best = 2*tf.maximum(max_coord - crop_center, crop_center - min_coord)
            crop_size_best = tf.reduce_max(crop_size_best)
            crop_size_best = tf.minimum(tf.maximum(crop_size_best, 50.0), 500.0)

            # catch problem, when no valid kp available
            crop_size_best = tf.cond(tf.reduce_all(tf.is_finite(crop_size_best)), lambda: crop_size_best,
                                  lambda: tf.constant(200.0))
            crop_size_best.set_shape([])

            # calculate necessary scaling
            scale = tf.cast(self.crop_size, tf.float32) / crop_size_best
            scale = tf.minimum(tf.maximum(scale, 1.0), 10.0)
            scale *= crop_scale_noise
            data_dict['crop_scale'] = scale

            if self.crop_offset_noise:
                noise = tf.truncated_normal([2], mean=0.0, stddev=self.crop_offset_noise_sigma)
                crop_center += noise

            # Crop image
            img_crop = crop_image_from_xy(tf.expand_dims(image, 0), crop_center, self.crop_size, scale)
            data_dict['image_crop'] = tf.squeeze(img_crop)

            # Modify uv21 coordinates
            crop_center_float = tf.cast(crop_center, tf.float32)
            keypoint_uv21_u = (data_dict['keypoint_uv21'][:, 0] - crop_center_float[1]) * scale + self.crop_size // 2
            keypoint_uv21_v = (data_dict['keypoint_uv21'][:, 1] - crop_center_float[0]) * scale + self.crop_size // 2
            keypoint_uv21 = tf.stack([keypoint_uv21_u, keypoint_uv21_v], 1)
            data_dict['keypoint_uv21'] = keypoint_uv21

            # Modify camera intrinsics
            scale = tf.reshape(scale, [1, ])
            scale_matrix = tf.dynamic_stitch([[0], [1], [2],
                                              [3], [4], [5],
                                              [6], [7], [8]], [scale, [0.0], [0.0],
                                                               [0.0], scale, [0.0],
                                                               [0.0], [0.0], [1.0]])
            scale_matrix = tf.reshape(scale_matrix, [3, 3])

            crop_center_float = tf.cast(crop_center, tf.float32)
            trans1 = crop_center_float[0] * scale - self.crop_size // 2
            trans2 = crop_center_float[1] * scale - self.crop_size // 2
            trans1 = tf.reshape(trans1, [1, ])
            trans2 = tf.reshape(trans2, [1, ])
            trans_matrix = tf.dynamic_stitch([[0], [1], [2],
                                              [3], [4], [5],
                                              [6], [7], [8]], [[1.0], [0.0], -trans2,
                                                               [0.0], [1.0], -trans1,
                                                               [0.0], [0.0], [1.0]])
            trans_matrix = tf.reshape(trans_matrix, [3, 3])

            data_dict['cam_mat'] = tf.matmul(trans_matrix, tf.matmul(scale_matrix, data_dict['cam_mat']))

        """ DEPENDENT DATA ITEMS: Scoremap from the SUBSET of 21 keypoints"""
        # create scoremaps from the subset of 2D annoataion
        keypoint_hw21 = tf.stack([keypoint_uv21[:, 1], keypoint_uv21[:, 0]], -1)

        scoremap_size = self.image_size
        
        if self.hand_crop:
            scoremap_size = (self.crop_size, self.crop_size)

        scoremap = self.create_multiple_gaussian_map(keypoint_hw21,
                                                     scoremap_size,
                                                     self.sigma,
                                                     valid_vec=keypoint_vis21)
        
        if self.scoremap_dropout:
            scoremap = tf.nn.dropout(scoremap, self.scoremap_dropout_prob,
                                        noise_shape=[1, 1, 21])
            scoremap *= self.scoremap_dropout_prob

        data_dict['scoremap'] = scoremap

        if self.random_crop_to_size:
            tensor_stack = tf.concat([data_dict['image'],
                                      tf.expand_dims(tf.cast(data_dict['hand_parts'], tf.float32), -1),
                                      tf.cast(data_dict['hand_mask'], tf.float32)], 2)
            s = tensor_stack.get_shape().as_list()
            tensor_stack_cropped = tf.random_crop(tensor_stack,
                                                  [self.random_crop_size, self.random_crop_size, s[2]])
            data_dict = dict()  # delete everything else because the random cropping makes the data invalid anyway
            data_dict['image'], data_dict['hand_parts'], data_dict['hand_mask'] = tensor_stack_cropped[:, :, :3],\
                                                                                  tf.cast(tensor_stack_cropped[:, :, 3], tf.int32),\
                                                                                  tf.cast(tensor_stack_cropped[:, :, 4:], tf.int32)

        names, tensors = zip(*data_dict.items())

        if self.shuffle:
            tensors = tf.train.shuffle_batch_join([tensors],
                                                  batch_size=self.batch_size,
                                                  capacity=100,
                                                  min_after_dequeue=50,
                                                  enqueue_many=False)
        else:
            tensors = tf.train.batch_join([tensors],
                                          batch_size=self.batch_size,
                                          capacity=100,
                                          enqueue_many=False)

        return dict(zip(names, tensors))
Exemplo n.º 21
0
  def read_data(self):
    """Provides images and camera intrinsics."""
    with tf.name_scope('data_loading'):
      with tf.name_scope('enqueue_paths'):
        seed = random.randint(0, 2**31 - 1)
        self.file_lists = self.compile_file_list(self.data_dir, self.input_file)
        image_paths_queue = tf.train.string_input_producer(
            self.file_lists['image_file_list'], seed=seed,
            shuffle=self.shuffle,
            num_epochs=(1 if not self.shuffle else None)
        )
        seg_paths_queue = tf.train.string_input_producer(
            self.file_lists['segment_file_list'], seed=seed,
            shuffle=self.shuffle,
            num_epochs=(1 if not self.shuffle else None))
        cam_paths_queue = tf.train.string_input_producer(
            self.file_lists['cam_file_list'], seed=seed,
            shuffle=self.shuffle,
            num_epochs=(1 if not self.shuffle else None))
        img_reader = tf.WholeFileReader()
        _, image_contents = img_reader.read(image_paths_queue)
        seg_reader = tf.WholeFileReader()
        _, seg_contents = seg_reader.read(seg_paths_queue)
        if self.file_extension == 'jpg':
          image_seq = tf.image.decode_jpeg(image_contents)
          seg_seq = tf.image.decode_jpeg(seg_contents, channels=3)
        elif self.file_extension == 'png':
          image_seq = tf.image.decode_png(image_contents, channels=3)
          seg_seq = tf.image.decode_png(seg_contents, channels=3)

      with tf.name_scope('load_intrinsics'):
        cam_reader = tf.TextLineReader()
        _, raw_cam_contents = cam_reader.read(cam_paths_queue)
        rec_def = []
        for _ in range(9):
          rec_def.append([1.0])
        raw_cam_vec = tf.decode_csv(raw_cam_contents, record_defaults=rec_def)
        raw_cam_vec = tf.stack(raw_cam_vec)
        intrinsics = tf.reshape(raw_cam_vec, [3, 3])

      with tf.name_scope('convert_image'):
        image_seq = self.preprocess_image(image_seq)  # Converts to float.

      if self.random_color:
        with tf.name_scope('image_augmentation'):
          image_seq = self.augment_image_colorspace(image_seq)

      image_stack = self.unpack_images(image_seq)
      seg_stack = self.unpack_images(seg_seq)

      if self.flipping_mode != FLIP_NONE:
        random_flipping = (self.flipping_mode == FLIP_RANDOM)
        with tf.name_scope('image_augmentation_flip'):
          image_stack, seg_stack, intrinsics = self.augment_images_flip(
              image_stack, seg_stack, intrinsics,
              randomized=random_flipping)

      if self.random_scale_crop:
        with tf.name_scope('image_augmentation_scale_crop'):
          image_stack, seg_stack, intrinsics = self.augment_images_scale_crop(
              image_stack, seg_stack, intrinsics, self.img_height,
              self.img_width)

      with tf.name_scope('multi_scale_intrinsics'):
        intrinsic_mat = self.get_multi_scale_intrinsics(intrinsics,
                                                        self.num_scales)
        intrinsic_mat.set_shape([self.num_scales, 3, 3])
        intrinsic_mat_inv = tf.matrix_inverse(intrinsic_mat)
        intrinsic_mat_inv.set_shape([self.num_scales, 3, 3])

      if self.imagenet_norm:
        im_mean = tf.tile(
            tf.constant(IMAGENET_MEAN), multiples=[self.seq_length])
        im_sd = tf.tile(
            tf.constant(IMAGENET_SD), multiples=[self.seq_length])
        image_stack_norm = (image_stack - im_mean) / im_sd
      else:
        image_stack_norm = image_stack

      with tf.name_scope('batching'):
        if self.shuffle:
          (image_stack, image_stack_norm, seg_stack, intrinsic_mat,
           intrinsic_mat_inv) = tf.train.shuffle_batch(
               [image_stack, image_stack_norm, seg_stack, intrinsic_mat,
                intrinsic_mat_inv],
               batch_size=self.batch_size,
               num_threads=self.threads,
               capacity=self.queue_size + QUEUE_BUFFER * self.batch_size,
               min_after_dequeue=self.queue_size)
        else:
          (image_stack, image_stack_norm, seg_stack, intrinsic_mat,
           intrinsic_mat_inv) = tf.train.batch(
               [image_stack, image_stack_norm, seg_stack, intrinsic_mat,
                intrinsic_mat_inv],
               batch_size=self.batch_size,
               num_threads=1,
               capacity=self.queue_size + QUEUE_BUFFER * self.batch_size)
    return (image_stack, image_stack_norm, seg_stack, intrinsic_mat,
            intrinsic_mat_inv)
Exemplo n.º 22
0
    def render_envmap(self, cubes, cube_centers, cube_side_lengths,
                      cube_rel_shapes, cube_nest_inds, ref_pose, env_pose,
                      theta_res, phi_res, r_res):
        """Render environment map from volumetric lights.

    Args:
      cubes: input list of cubes in multiscale volume
      cube_centers: position of cube centers
      cube_side_lengths: side lengths of cubes
      cube_rel_shapes: size of "footprint" of each cube within next coarser cube
      cube_nest_inds: indices for cube "footprints"
      ref_pose: c2w pose of ref camera
      env_pose: c2w pose of environment map camera
      theta_res: resolution of theta (width) for environment map
      phi_res: resolution of phi (height) for environment map
      r_res: number of spherical shells to sample for environment map rendering

    Returns:
      An environment map at the input pose
    """
        num_scales = len(cubes)

        env_c2w = env_pose
        env2ref = tf.matmul(tf.matrix_inverse(ref_pose), env_c2w)

        # cube-->sphere resampling
        all_shells_list = []
        all_rad_list = []
        for i in range(num_scales):
            if i == num_scales - 1:
                # "finest" resolution cube, don't zero out
                cube_removed = cubes[i]
            else:
                # zero out areas covered by finer resolution cubes
                cube_shape = cubes[i].get_shape().as_list()[1]

                zm_y, zm_x, zm_z = tf.meshgrid(
                    tf.range(cube_nest_inds[i][0],
                             cube_nest_inds[i][0] + cube_rel_shapes[i]),
                    tf.range(cube_nest_inds[i][1],
                             cube_nest_inds[i][1] + cube_rel_shapes[i]),
                    tf.range(cube_nest_inds[i][2],
                             cube_nest_inds[i][2] + cube_rel_shapes[i]),
                    indexing='ij')
                inds = tf.stack([zm_y, zm_x, zm_z], axis=-1)
                updates = tf.to_float(tf.ones_like(zm_x))
                zero_mask = 1.0 - tf.scatter_nd(
                    inds, updates, shape=[cube_shape, cube_shape, cube_shape])
                cube_removed = zero_mask[tf.newaxis, :, :, :,
                                         tf.newaxis] * cubes[i]

            spheres_i, rad_i = pj.spherical_cubevol_resample(
                cube_removed, env2ref, cube_centers[i], cube_side_lengths[i],
                phi_res, theta_res, r_res)
            all_shells_list.append(spheres_i)
            all_rad_list.append(rad_i)

        all_shells = tf.concat(all_shells_list, axis=3)
        all_rad = tf.concat(all_rad_list, axis=0)
        all_shells = pj.interleave_shells(all_shells, all_rad)
        all_shells_envmap = pj.over_composite(all_shells)

        return all_shells_envmap, all_shells_list
Exemplo n.º 23
0
def augment_seqs_ava(raw_frames, num_frame, max_shift, batch_size=2,
                     queue_size=60, num_threads=3, train_height=128,
                     train_width=128, pixel_noise=0.0, mix=True, screen=False,
                     mode='train', to_gray=True):
  """Prepares training sequence batches from AVA dataset.

  Args:
    raw_frames: input video frames from AVA dataset
    num_frame: the number of frames in a sequence
    max_shift: the range each image corner point can move
    batch_size: the size of training or testing batches
    queue_size: the queue size of the shuffle buffer
    num_threads: the number of threads of the shuffle buffer
    train_height: the height of the training/testing images
    train_width: the width of the training/testing images
    pixel_noise: the magnitude of additive noises
    mix: whether mix the magnitude of corner point shifts
    screen: whether remove highly distorted homographies
    mode: 'train' or 'eval', specifying whether preparing images for training or
      testing
    to_gray: whether prepare color or gray scale training images
  Returns:
    a batch of training images and the corresponding ground-truth homographies
  """
  if to_gray:
    output_frames = tf.image.rgb_to_grayscale(raw_frames)
    num_channel = 1
  else:
    output_frames = raw_frames
    num_channel = 3

  frame_height = tf.to_float(tf.shape(output_frames)[1])
  frame_width = tf.to_float(tf.shape(output_frames)[2])

  if mix:
    p = tf.random_uniform([], minval=0, maxval=1, dtype=tf.float32)
    scale = (tf.to_float(tf.greater(p, 0.1)) + tf.to_float(tf.greater(p, 0.2))
             + tf.to_float(tf.greater(p, 0.3))) / 3
  else:
    scale = 1.0
  new_max_shift = max_shift * scale
  rand_shift_base = tf.random_uniform([num_frame, 8], minval=-new_max_shift,
                                      maxval=new_max_shift, dtype=tf.float32)
  crop_width = frame_width - 2 * new_max_shift - 1
  crop_height = frame_height - 2 * new_max_shift - 1
  ref_window = tf.to_float(tf.stack([0, 0, 0, crop_height - 1, crop_width - 1,
                                     0, crop_width - 1, crop_height - 1]))
  if screen:
    new_shift_list = []
    flag_list = []
    hmg_list = []
    src_points = tf.reshape(ref_window, [4, 2])
    for i in range(num_frame):
      dst_points = tf.reshape(rand_shift_base[i] + ref_window + new_max_shift,
                              [4, 2])
      hmg = calc_homography_from_points(src_points, dst_points)
      hmg_list.append(hmg)
    for i in range(num_frame - 1):
      hmg = tf.matmul(tf.matrix_inverse(hmg_list[i + 1]), hmg_list[i])
      shift = homography_to_shifts(hmg, crop_width, crop_height)
      angles = calc_homography_distortion(crop_width, crop_height, shift)
      max_angle = tf.reduce_min(angles)
      flag = tf.to_float(max_angle >= -0.707)
      flag_list.append(flag)
      if i > 0:
        new_shift = rand_shift_base[i] * flag * flag_list[i - 1]
      else:
        new_shift = rand_shift_base[i] * flag
      new_shift_list.append(new_shift)
    new_shift_list.append(rand_shift_base[num_frame - 1]
                          * flag_list[num_frame - 2])
    rand_shift = tf.stack(new_shift_list)
  else:
    rand_shift = rand_shift_base

  mat_scale = tf.diag(tf.stack([crop_width / train_width,
                                crop_height / train_height, 1.0]))
  inv_mat_scale = tf.matrix_inverse(mat_scale)
  hmg_list = []
  frame_list = []
  for i in range(num_frame):
    src_points = tf.reshape(ref_window, [4, 2])
    dst_points = tf.reshape(rand_shift[i] + ref_window + new_max_shift, [4, 2])
    hmg = calc_homography_from_points(src_points, dst_points)
    hmg_list.append(hmg)
    transform = tf.reshape(hmg, [9]) / hmg[2, 2]
    warped = contrib_image.transform(output_frames[i], transform[:8],
                                     'bilinear')
    crop_window = tf.expand_dims(tf.stack(
        [0, 0, (crop_height - 1) / (frame_height - 1),
         (crop_width - 1) / (frame_width - 1)]), 0)
    resized_base = tf.image.crop_and_resize(
        tf.expand_dims(warped, 0), crop_window, [0],
        [train_height, train_width])
    resized = tf.squeeze(resized_base, [0])

    noise_im = tf.truncated_normal(shape=tf.shape(resized), mean=0.0,
                                   stddev=pixel_noise, dtype=tf.float32)
    noise_frame = normalize_image(tf.to_float(resized) + noise_im)
    frame_list.append(noise_frame)
  noise_frames = tf.reshape(
      tf.stack(frame_list, 2),
      (train_height, train_width, num_frame * num_channel))

  label_list = []
  for i in range(num_frame - 1):
    hmg_combine = tf.matmul(tf.matrix_inverse(hmg_list[i + 1]), hmg_list[i])
    hmg_final = tf.matmul(inv_mat_scale, tf.matmul(hmg_combine, mat_scale))
    label = homography_to_shifts(hmg_final, train_width, train_height)
    label_list.append(label)
  labels = tf.reshape(tf.stack(label_list, 0), [(num_frame - 1) * 8])

  if mode == 'train':
    min_after_dequeue = int(queue_size / 3)
  else:
    min_after_dequeue = batch_size * 3
  batch_frames, batch_labels = tf.train.shuffle_batch(
      [noise_frames, labels], batch_size=batch_size,
      num_threads=num_threads, capacity=queue_size,
      min_after_dequeue=min_after_dequeue, enqueue_many=False)

  return tf.cast(batch_frames, tf.float32), tf.cast(batch_labels, tf.float32)
Exemplo n.º 24
0
    def read_data(self):
        """Provides images and camera intrinsics."""
        with tf.name_scope("data_loading"):
            with tf.name_scope("enqueue_paths"):
                seed = random.randint(0, 2**31 - 1)
                self.file_lists = self.compile_file_list(
                    self.data_dir, self.input_file)
                image_paths_queue = tf.train.string_input_producer(
                    self.file_lists["image_file_list"],
                    seed=seed,
                    shuffle=self.shuffle,
                    num_epochs=(1 if not self.shuffle else None),
                )
                seg_paths_queue = tf.train.string_input_producer(
                    self.file_lists["segment_file_list"],
                    seed=seed,
                    shuffle=self.shuffle,
                    num_epochs=(1 if not self.shuffle else None),
                )
                img_reader = tf.WholeFileReader()
                _, image_contents = img_reader.read(image_paths_queue)
                seg_reader = tf.WholeFileReader()
                _, seg_contents = seg_reader.read(seg_paths_queue)
                if self.file_extension == "jpg":
                    image_seq = tf.image.decode_jpeg(image_contents)
                    seg_seq = tf.image.decode_jpeg(seg_contents, channels=3)
                elif self.file_extension == "png":
                    image_seq = tf.image.decode_png(image_contents, channels=3)
                    seg_seq = tf.image.decode_png(seg_contents, channels=3)

            with tf.name_scope("load_intrinsics"):
                intrinsics = tf.random.uniform(shape=(3, 3))

            with tf.name_scope("convert_image"):
                image_seq = self.preprocess_image(
                    image_seq)  # Converts to float.

            if self.random_color:
                with tf.name_scope("image_augmentation"):
                    image_seq = self.augment_image_colorspace(image_seq)

            image_stack = self.unpack_images(image_seq)
            seg_stack = self.unpack_images(seg_seq)

            if self.flipping_mode != FLIP_NONE:
                random_flipping = self.flipping_mode == FLIP_RANDOM
                with tf.name_scope("image_augmentation_flip"):
                    image_stack, seg_stack, intrinsics = self.augment_images_flip(
                        image_stack,
                        seg_stack,
                        intrinsics,
                        randomized=random_flipping)

            if self.random_scale_crop:
                with tf.name_scope("image_augmentation_scale_crop"):
                    image_stack, seg_stack, intrinsics = self.augment_images_scale_crop(
                        image_stack,
                        seg_stack,
                        intrinsics,
                        self.img_height,
                        self.img_width,
                    )

            with tf.name_scope("multi_scale_intrinsics"):
                intrinsic_mat = self.get_multi_scale_intrinsics(
                    intrinsics, self.num_scales)
                intrinsic_mat.set_shape([self.num_scales, 3, 3])
                intrinsic_mat_inv = tf.matrix_inverse(intrinsic_mat)
                intrinsic_mat_inv.set_shape([self.num_scales, 3, 3])

            if self.imagenet_norm:
                im_mean = tf.tile(tf.constant(IMAGENET_MEAN),
                                  multiples=[self.seq_length])
                im_sd = tf.tile(tf.constant(IMAGENET_SD),
                                multiples=[self.seq_length])
                image_stack_norm = (image_stack - im_mean) / im_sd
            else:
                image_stack_norm = image_stack

            with tf.name_scope("batching"):
                if self.shuffle:
                    (
                        image_stack,
                        image_stack_norm,
                        seg_stack,
                        intrinsic_mat,
                        intrinsic_mat_inv,
                    ) = tf.train.shuffle_batch(
                        [
                            image_stack,
                            image_stack_norm,
                            seg_stack,
                            intrinsic_mat,
                            intrinsic_mat_inv,
                        ],
                        batch_size=self.batch_size,
                        num_threads=self.threads,
                        capacity=self.queue_size +
                        QUEUE_BUFFER * self.batch_size,
                        min_after_dequeue=self.queue_size,
                    )
                else:
                    (
                        image_stack,
                        image_stack_norm,
                        seg_stack,
                        intrinsic_mat,
                        intrinsic_mat_inv,
                    ) = tf.train.batch(
                        [
                            image_stack,
                            image_stack_norm,
                            seg_stack,
                            intrinsic_mat,
                            intrinsic_mat_inv,
                        ],
                        batch_size=self.batch_size,
                        num_threads=1,
                        capacity=self.queue_size +
                        QUEUE_BUFFER * self.batch_size,
                    )
        return (
            image_stack,
            image_stack_norm,
            seg_stack,
            intrinsic_mat,
            intrinsic_mat_inv,
        )
Exemplo n.º 25
0
    def get(self):
        """ Provides input data to the graph. """
        # calculate size of each record (this lists what is contained in the db and how many bytes are occupied)
        record_bytes = 2

        encoding_bytes = 4
        kp_xyz_entries = 3 * self.num_kp
        record_bytes += encoding_bytes * kp_xyz_entries

        encoding_bytes = 4
        kp_uv_entries = 2 * self.num_kp
        record_bytes += encoding_bytes * kp_uv_entries

        cam_matrix_entries = 9
        record_bytes += encoding_bytes * cam_matrix_entries

        image_bytes = self.image_size[0] * self.image_size[1] * 3
        record_bytes += image_bytes

        hand_parts_bytes = self.image_size[0] * self.image_size[1]
        record_bytes += hand_parts_bytes

        kp_vis_bytes = self.num_kp
        record_bytes += kp_vis_bytes
        """ READ DATA ITEMS"""
        # Start reader
        reader = tf.FixedLengthRecordReader(header_bytes=0,
                                            record_bytes=record_bytes)
        _, value = reader.read(
            tf.train.string_input_producer([self.path_to_db]))

        # decode to floats
        bytes_read = 0
        data_dict = dict()
        record_bytes_float32 = tf.decode_raw(value, tf.float32)

        # 1. Read keypoint xyz
        keypoint_xyz = tf.reshape(
            tf.slice(record_bytes_float32, [bytes_read // 4],
                     [kp_xyz_entries]), [self.num_kp, 3])
        bytes_read += encoding_bytes * kp_xyz_entries

        # calculate palm coord
        if not self.use_wrist_coord:
            palm_coord_l = tf.expand_dims(
                0.5 * (keypoint_xyz[0, :] + keypoint_xyz[12, :]), 0)
            palm_coord_r = tf.expand_dims(
                0.5 * (keypoint_xyz[21, :] + keypoint_xyz[33, :]), 0)
            keypoint_xyz = tf.concat([
                palm_coord_l, keypoint_xyz[1:21, :], palm_coord_r,
                keypoint_xyz[-20:, :]
            ], 0)

        data_dict['keypoint_xyz'] = keypoint_xyz

        # 2. Read keypoint uv
        keypoint_uv = tf.cast(
            tf.reshape(
                tf.slice(record_bytes_float32, [bytes_read // 4],
                         [kp_uv_entries]), [self.num_kp, 2]), tf.int32)
        bytes_read += encoding_bytes * kp_uv_entries

        keypoint_uv = tf.cast(keypoint_uv, tf.float32)

        # calculate palm coord
        if not self.use_wrist_coord:
            palm_coord_uv_l = tf.expand_dims(
                0.5 * (keypoint_uv[0, :] + keypoint_uv[12, :]), 0)
            palm_coord_uv_r = tf.expand_dims(
                0.5 * (keypoint_uv[21, :] + keypoint_uv[33, :]), 0)
            keypoint_uv = tf.concat([
                palm_coord_uv_l, keypoint_uv[1:21, :], palm_coord_uv_r,
                keypoint_uv[-20:, :]
            ], 0)

        if self.coord_uv_noise:
            noise = tf.truncated_normal([42, 2],
                                        mean=0.0,
                                        stddev=self.coord_uv_noise_sigma)
            keypoint_uv += noise

        data_dict['keypoint_uv'] = keypoint_uv

        # 3. Camera intrinsics
        cam_mat = tf.reshape(
            tf.slice(record_bytes_float32, [bytes_read // 4],
                     [cam_matrix_entries]), [3, 3])
        bytes_read += encoding_bytes * cam_matrix_entries
        data_dict['cam_mat'] = cam_mat

        # decode to uint8
        bytes_read += 2
        record_bytes_uint8 = tf.decode_raw(value, tf.uint8)

        # 4. Read image
        image = tf.reshape(
            tf.slice(record_bytes_uint8, [bytes_read], [image_bytes]),
            [self.image_size[0], self.image_size[1], 3])
        image = tf.cast(image, tf.float32)
        bytes_read += image_bytes

        # subtract mean
        image = image / 255.0 - 0.5
        if self.hue_aug:
            image = tf.image.random_hue(image, self.hue_aug_max)
        data_dict['image'] = image

        # 5. Read mask
        hand_parts_mask = tf.reshape(
            tf.slice(record_bytes_uint8, [bytes_read], [hand_parts_bytes]),
            [self.image_size[0], self.image_size[1]])
        hand_parts_mask = tf.cast(hand_parts_mask, tf.int32)
        bytes_read += hand_parts_bytes
        data_dict['hand_parts'] = hand_parts_mask
        hand_mask = tf.greater(hand_parts_mask, 1)
        bg_mask = tf.logical_not(hand_mask)
        data_dict['hand_mask'] = tf.cast(tf.stack([bg_mask, hand_mask], 2),
                                         tf.int32)

        # 6. Read visibilty
        keypoint_vis = tf.reshape(
            tf.slice(record_bytes_uint8, [bytes_read], [kp_vis_bytes]),
            [self.num_kp])
        keypoint_vis = tf.cast(keypoint_vis, tf.bool)
        bytes_read += kp_vis_bytes

        # calculate palm visibility
        if not self.use_wrist_coord:
            palm_vis_l = tf.expand_dims(
                tf.logical_or(keypoint_vis[0], keypoint_vis[12]), 0)
            palm_vis_r = tf.expand_dims(
                tf.logical_or(keypoint_vis[21], keypoint_vis[33]), 0)
            keypoint_vis = tf.concat([
                palm_vis_l, keypoint_vis[1:21], palm_vis_r, keypoint_vis[-20:]
            ], 0)
        data_dict['keypoint_vis'] = keypoint_vis

        assert bytes_read == record_bytes, "Doesnt add up."
        """ DEPENDENT DATA ITEMS: SUBSET of 21 keypoints"""
        # figure out dominant hand by analysis of the segmentation mask
        one_map, zero_map = tf.ones_like(hand_parts_mask), tf.zeros_like(
            hand_parts_mask)
        cond_l = tf.logical_and(tf.greater(hand_parts_mask, one_map),
                                tf.less(hand_parts_mask, one_map * 18))
        cond_r = tf.greater(hand_parts_mask, one_map * 17)
        hand_map_l = tf.where(cond_l, one_map, zero_map)
        hand_map_r = tf.where(cond_r, one_map, zero_map)
        num_px_left_hand = tf.reduce_sum(hand_map_l)
        num_px_right_hand = tf.reduce_sum(hand_map_r)

        # PRODUCE the 21 subset using the segmentation masks
        # We only deal with the more prominent hand for each frame and discard the second set of keypoints
        kp_coord_xyz_left = keypoint_xyz[:21, :]
        kp_coord_xyz_right = keypoint_xyz[-21:, :]

        cond_left = tf.logical_and(
            tf.cast(tf.ones_like(kp_coord_xyz_left), tf.bool),
            tf.greater(num_px_left_hand, num_px_right_hand))
        kp_coord_xyz21 = tf.where(cond_left, kp_coord_xyz_left,
                                  kp_coord_xyz_right)

        hand_side = tf.where(
            tf.greater(num_px_left_hand,
                       num_px_right_hand), tf.constant(0, dtype=tf.int32),
            tf.constant(1, dtype=tf.int32))  # left hand = 0; right hand = 1
        data_dict['hand_side'] = tf.one_hot(hand_side,
                                            depth=2,
                                            on_value=1.0,
                                            off_value=0.0,
                                            dtype=tf.float32)

        data_dict['keypoint_xyz21'] = kp_coord_xyz21

        # make coords relative to root joint
        kp_coord_xyz_root = kp_coord_xyz21[0, :]  # this is the palm coord
        kp_coord_xyz21_rel = kp_coord_xyz21 - kp_coord_xyz_root  # relative coords in metric coords
        index_root_bone_length = tf.sqrt(
            tf.reduce_sum(
                tf.square(kp_coord_xyz21_rel[12, :] -
                          kp_coord_xyz21_rel[11, :])))
        data_dict['keypoint_scale'] = index_root_bone_length
        data_dict[
            'keypoint_xyz21_normed'] = kp_coord_xyz21_rel / index_root_bone_length  # normalized by length of 12->11

        # calculate local coordinates
        kp_coord_xyz21_local = bone_rel_trafo(
            data_dict['keypoint_xyz21_normed'])
        kp_coord_xyz21_local = tf.squeeze(kp_coord_xyz21_local)
        data_dict['keypoint_xyz21_local'] = kp_coord_xyz21_local

        # calculate viewpoint and coords in canonical coordinates
        kp_coord_xyz21_rel_can, rot_mat = canonical_trafo(
            data_dict['keypoint_xyz21_normed'])
        kp_coord_xyz21_rel_can, rot_mat = tf.squeeze(
            kp_coord_xyz21_rel_can), tf.squeeze(rot_mat)
        kp_coord_xyz21_rel_can = flip_right_hand(kp_coord_xyz21_rel_can,
                                                 tf.logical_not(cond_left))
        data_dict['keypoint_xyz21_can'] = kp_coord_xyz21_rel_can
        data_dict['rot_mat'] = tf.matrix_inverse(rot_mat)

        # Set of 21 for visibility
        keypoint_vis_left = keypoint_vis[:21]
        keypoint_vis_right = keypoint_vis[-21:]
        keypoint_vis21 = tf.where(cond_left[:, 0], keypoint_vis_left,
                                  keypoint_vis_right)
        data_dict['keypoint_vis21'] = keypoint_vis21

        # Set of 21 for UV coordinates
        keypoint_uv_left = keypoint_uv[:21, :]
        keypoint_uv_right = keypoint_uv[-21:, :]
        keypoint_uv21 = tf.where(cond_left[:, :2], keypoint_uv_left,
                                 keypoint_uv_right)
        data_dict['keypoint_uv21'] = keypoint_uv21
        """ DEPENDENT DATA ITEMS: HAND CROP """
        if self.hand_crop:
            crop_center = keypoint_uv21[12, ::-1]

            # catch problem, when no valid kp available (happens almost never)
            crop_center = tf.cond(tf.reduce_all(tf.is_finite(crop_center)),
                                  lambda: crop_center,
                                  lambda: tf.constant([0.0, 0.0]))
            crop_center.set_shape([
                2,
            ])

            if self.crop_center_noise:
                noise = tf.truncated_normal(
                    [2], mean=0.0, stddev=self.crop_center_noise_sigma)
                crop_center += noise

            crop_scale_noise = tf.constant(1.0)
            if self.crop_scale_noise:
                crop_scale_noise = tf.squeeze(
                    tf.random_uniform([1], minval=1.0, maxval=1.2))

            # select visible coords only
            kp_coord_h = tf.boolean_mask(keypoint_uv21[:, 1], keypoint_vis21)
            kp_coord_w = tf.boolean_mask(keypoint_uv21[:, 0], keypoint_vis21)
            kp_coord_hw = tf.stack([kp_coord_h, kp_coord_w], 1)

            # determine size of crop (measure spatial extend of hw coords first)
            min_coord = tf.maximum(tf.reduce_min(kp_coord_hw, 0), 0.0)
            max_coord = tf.minimum(tf.reduce_max(kp_coord_hw, 0),
                                   self.image_size)

            # find out larger distance wrt the center of crop
            crop_size_best = 2 * tf.maximum(max_coord - crop_center,
                                            crop_center - min_coord)
            crop_size_best = tf.reduce_max(crop_size_best)
            crop_size_best = tf.minimum(tf.maximum(crop_size_best, 50.0),
                                        500.0)

            # catch problem, when no valid kp available
            crop_size_best = tf.cond(
                tf.reduce_all(tf.is_finite(crop_size_best)),
                lambda: crop_size_best, lambda: tf.constant(200.0))
            crop_size_best.set_shape([])

            # calculate necessary scaling
            scale = tf.cast(self.crop_size, tf.float32) / crop_size_best
            scale = tf.minimum(tf.maximum(scale, 1.0), 10.0)
            scale *= crop_scale_noise
            data_dict['crop_scale'] = scale

            if self.crop_offset_noise:
                noise = tf.truncated_normal(
                    [2], mean=0.0, stddev=self.crop_offset_noise_sigma)
                crop_center += noise

            # Crop image
            img_crop = crop_image_from_xy(tf.expand_dims(image, 0),
                                          crop_center, self.crop_size, scale)
            data_dict['image_crop'] = tf.squeeze(img_crop)

            # Modify uv21 coordinates
            crop_center_float = tf.cast(crop_center, tf.float32)
            keypoint_uv21_u = (keypoint_uv21[:, 0] - crop_center_float[1]
                               ) * scale + self.crop_size // 2
            keypoint_uv21_v = (keypoint_uv21[:, 1] - crop_center_float[0]
                               ) * scale + self.crop_size // 2
            keypoint_uv21 = tf.stack([keypoint_uv21_u, keypoint_uv21_v], 1)
            data_dict['keypoint_uv21'] = keypoint_uv21

            # Modify camera intrinsics
            scale = tf.reshape(scale, [
                1,
            ])
            scale_matrix = tf.dynamic_stitch([
                [0], [1], [2], [3], [4], [5], [6], [7], [8]
            ], [scale, [0.0], [0.0], [0.0], scale, [0.0], [0.0], [0.0], [1.0]])
            scale_matrix = tf.reshape(scale_matrix, [3, 3])

            crop_center_float = tf.cast(crop_center, tf.float32)
            trans1 = crop_center_float[0] * scale - self.crop_size // 2
            trans2 = crop_center_float[1] * scale - self.crop_size // 2
            trans1 = tf.reshape(trans1, [
                1,
            ])
            trans2 = tf.reshape(trans2, [
                1,
            ])
            trans_matrix = tf.dynamic_stitch(
                [[0], [1], [2], [3], [4], [5], [6], [7], [8]],
                [[1.0], [0.0], -trans2, [0.0], [1.0], -trans1, [0.0], [0.0],
                 [1.0]])
            trans_matrix = tf.reshape(trans_matrix, [3, 3])

            data_dict['cam_mat'] = tf.matmul(trans_matrix,
                                             tf.matmul(scale_matrix, cam_mat))
        """ DEPENDENT DATA ITEMS: Scoremap from the SUBSET of 21 keypoints"""
        # create scoremaps from the subset of 2D annoataion
        keypoint_hw21 = tf.stack([keypoint_uv21[:, 1], keypoint_uv21[:, 0]],
                                 -1)

        scoremap_size = self.image_size

        if self.hand_crop:
            scoremap_size = (self.crop_size, self.crop_size)

        scoremap = self.create_multiple_gaussian_map(keypoint_hw21,
                                                     scoremap_size,
                                                     self.sigma,
                                                     valid_vec=keypoint_vis21)

        if self.scoremap_dropout:
            scoremap = tf.nn.dropout(scoremap,
                                     self.scoremap_dropout_prob,
                                     noise_shape=[1, 1, 21])
            scoremap *= self.scoremap_dropout_prob

        data_dict['scoremap'] = scoremap

        if self.scale_to_size:
            image, keypoint_uv21, keypoint_vis21 = data_dict[
                'image'], data_dict['keypoint_uv21'], data_dict[
                    'keypoint_vis21']
            s = image.get_shape().as_list()
            image = tf.image.resize_images(image, self.scale_target_size)
            scale = (self.scale_target_size[0] / float(s[0]),
                     self.scale_target_size[1] / float(s[1]))
            keypoint_uv21 = tf.stack([
                keypoint_uv21[:, 0] * scale[1], keypoint_uv21[:, 1] * scale[0]
            ], 1)

            data_dict = dict(
            )  # delete everything else because the scaling makes the data invalid anyway
            data_dict['image'] = image
            data_dict['keypoint_uv21'] = keypoint_uv21
            data_dict['keypoint_vis21'] = keypoint_vis21

        elif self.random_crop_to_size:
            tensor_stack = tf.concat([
                data_dict['image'],
                tf.expand_dims(tf.cast(data_dict['hand_parts'], tf.float32),
                               -1),
                tf.cast(data_dict['hand_mask'], tf.float32)
            ], 2)
            s = tensor_stack.get_shape().as_list()
            tensor_stack_cropped = tf.random_crop(
                tensor_stack,
                [self.random_crop_size, self.random_crop_size, s[2]])
            data_dict = dict(
            )  # delete everything else because the random cropping makes the data invalid anyway
            data_dict['image'], data_dict['hand_parts'], data_dict['hand_mask'] = tensor_stack_cropped[:, :, :3],\
                                                                                  tf.cast(tensor_stack_cropped[:, :, 3], tf.int32),\
                                                                                  tf.cast(tensor_stack_cropped[:, :, 4:], tf.int32)

        names, tensors = zip(*data_dict.items())

        if self.shuffle:
            tensors = tf.train.shuffle_batch_join([tensors],
                                                  batch_size=self.batch_size,
                                                  capacity=100,
                                                  min_after_dequeue=50,
                                                  enqueue_many=False)
        else:
            tensors = tf.train.batch_join([tensors],
                                          batch_size=self.batch_size,
                                          capacity=100,
                                          enqueue_many=False)

        return dict(zip(names, tensors))
Exemplo n.º 26
0
        # creating list of correct answers to allow for interation when checking for correct answers
        t_test_list = X_test['Outcome']

        del X_test['Outcome'] # remove last column 
        del X_train['Outcome'] # remove last column 

        n_train,m = X_train.shape
        n_test,m = X_test.shape

        # define the tensors
        X = tf.placeholder(tf.float64, shape=(None, m), name='X') # input features vector
        t = tf.placeholder(tf.float64, shape=(None, 1), name='t') # target values 
        n = tf.placeholder(tf.float64, name='n') # number of samples
        XT = tf.transpose(X)
        w = tf.matmul(tf.matmul(tf.matrix_inverse(tf.matmul(XT,X)), XT), t) # w = inv(X'*X)*X'*t

        # predicted value
        y = tf.matmul(X,w)

        # mean squared error of the prediction training set
        MSE = tf.div(tf.matmul(tf.transpose(y-t), y-t), n)

        w_star = tf.placeholder(tf.float64, shape=(m, 1), name='w_star')
        y_test = tf.matmul(X, w_star)

        with tf.Session() as sess:
            # running tensorflow sessions
            MSE_train_val, w_val = \
            sess.run([MSE, w], feed_dict={X : X_train, t : t_train, n : n_train})
Exemplo n.º 27
0
def posdef_inv_matrix_inverse(tensor, identity, damping):
  """Computes inverse(tensor + damping * identity) directly."""
  return tf.matrix_inverse(tensor + damping * identity)
Exemplo n.º 28
0
  def build_train_graph(self,
                        inputs,
                        min_depth,
                        max_depth,
                        num_mpi_planes,
                        learning_rate=0.0002,
                        beta1=0.9,
                        vgg_model_file=None,
                        global_step=0):
    """Construct the training computation graph.

    Args:
      inputs: dictionary of tensors (see 'input_data' below) needed for training
      min_depth: minimum depth for the PSV and MPI planes
      max_depth: maximum depth for the PSV and MPI planes
      num_mpi_planes: number of MPI planes to infer
      learning_rate: learning rate
      beta1: hyperparameter for Adam
      vgg_model_file: path to vgg weights (needed when vgg loss is used)
      global_step: current optimization step
    Returns:
      A train_op to be used for training.
    """
    print("starting to build graph")
    with tf.name_scope("input_size_randomization"):
      dim_choices = tf.constant([[1, 16], [2, 32], [4, 32], [4, 64], [4, 128],
                                 [8, 32], [8, 64], [8, 128]],
                                dtype=tf.int32)
      rand_dim = tf.random_shuffle(dim_choices)[0, :]
      height_div = rand_dim[0]
      width_div = rand_dim[0]
      num_mpi_planes = rand_dim[1]
      tf.summary.scalar("num_mpi_planes", num_mpi_planes)

    with tf.name_scope("setup"):
      mpi_planes = self.inv_depths(min_depth, max_depth, num_mpi_planes)

    with tf.name_scope("input_data"):
      raw_tgt_image = inputs["tgt_image"]
      raw_ref_image = inputs["ref_image"]
      raw_src_images = inputs["src_images"]

      _, img_height, img_width, _ = raw_src_images.get_shape().as_list(
      )
      img_height = img_height // height_div
      img_width = img_width // width_div

      raw_tgt_image = tf.image.convert_image_dtype(
          raw_tgt_image, dtype=tf.float32)
      raw_ref_image = tf.image.convert_image_dtype(
          raw_ref_image, dtype=tf.float32)
      raw_src_images = tf.image.convert_image_dtype(
          raw_src_images, dtype=tf.float32)
      raw_tgt_image = tf.image.resize_area(raw_tgt_image,
                                           [img_height, img_width])
      raw_ref_image = tf.image.resize_area(raw_ref_image,
                                           [img_height, img_width])
      raw_src_images = tf.image.resize_area(raw_src_images,
                                            [img_height, img_width])

      tgt_pose = inputs["tgt_pose"]
      ref_pose = inputs["ref_pose"]
      src_poses = inputs["src_poses"]
      intrinsics = inputs["intrinsics"]

      # Scale intrinsics based on size randomization
      intrinsics = tf.concat([
          intrinsics[:, 0:1, :] / tf.to_float(width_div),
          intrinsics[:, 1:2, :] / tf.to_float(height_div), intrinsics[:, 2:3, :]
      ],
                             axis=1)
      inputs["intrinsics"] = intrinsics

      _, num_source, _, _ = src_poses.get_shape().as_list()

    with tf.name_scope("inference"):
      print("setting up MPI inference")
      num_mpi_planes = tf.shape(mpi_planes)[0]
      pred = self.infer_mpi(raw_src_images, raw_ref_image, ref_pose, src_poses,
                            intrinsics, num_mpi_planes,
                            mpi_planes)
      rgba_layers = pred["rgba_layers"]
      rgba_layers_refine = pred["rgba_layers_refine"]
      stuff_behind = pred["stuff_behind"]
      refine_input_mpi = pred["refine_input_mpi"]
      psv = pred["psv"]

    with tf.name_scope("synthesis"):
      print("setting up rendering")
      rel_pose = tf.matmul(tgt_pose, tf.matrix_inverse(ref_pose))
      output_image, output_layers = self.mpi_render_view(
          rgba_layers, rel_pose, mpi_planes, intrinsics)
      output_alpha = output_layers[Ellipsis, -1]
      output_image_refine, _ = self.mpi_render_view(
          rgba_layers_refine, rel_pose, mpi_planes, intrinsics)

    with tf.name_scope("loss"):
      print("computing losses")
      # Mask loss for pixels outside reference frustum
      loss_mask = tf.where(
          tf.equal(
              tf.reduce_min(
                  tf.abs(tf.reduce_sum(output_layers, axis=-1)),
                  axis=3,
                  keep_dims=True), 0.0),
          tf.zeros_like(output_alpha[:, :, :, 0:1]),
          tf.ones_like(output_alpha[:, :, :, 0:1]))
      loss_mask = tf.stop_gradient(loss_mask)
      tf.summary.image("loss_mask", loss_mask)

      # Helper functions for loss
      def compute_error(real, fake, mask):
        return tf.reduce_mean(mask * tf.abs(fake - real))

      # Normalized VGG loss (from
      # https://github.com/CQFIO/PhotographicImageSynthesis)

      downsample = lambda tensor, ds: tf.nn.avg_pool(tensor, [1, ds, ds, 1],
                                                     [1, ds, ds, 1], "SAME")

      def vgg_loss(raw_tgt_image, output_image, loss_mask):
        """Compute VGG loss."""

        vgg_real = build_vgg19(raw_tgt_image * 255.0, vgg_model_file)
        rescaled_output_image = (output_image + 1.)/2. * 255.0
        vgg_fake = build_vgg19(
            rescaled_output_image, vgg_model_file, reuse=True)
        p0 = compute_error(vgg_real["input"], vgg_fake["input"], loss_mask)
        p1 = compute_error(vgg_real["conv1_2"],
                           vgg_fake["conv1_2"],
                           loss_mask)/2.6
        p2 = compute_error(vgg_real["conv2_2"],
                           vgg_fake["conv2_2"],
                           downsample(loss_mask, 2))/4.8
        p3 = compute_error(vgg_real["conv3_2"],
                           vgg_fake["conv3_2"],
                           downsample(loss_mask, 4))/3.7
        p4 = compute_error(vgg_real["conv4_2"],
                           vgg_fake["conv4_2"],
                           downsample(loss_mask, 8))/5.6
        p5 = compute_error(vgg_real["conv5_2"],
                           vgg_fake["conv5_2"],
                           downsample(loss_mask, 16))*10/1.5
        total_loss = p0+p1+p2+p3+p4+p5
        return total_loss, vgg_real, vgg_fake

      vgg_loss_initial, _, _ = vgg_loss(raw_tgt_image, output_image, loss_mask)
      tf.summary.scalar("vgg_loss_initial", vgg_loss_initial)
      total_loss = vgg_loss_initial

      vgg_loss_refine, _, _ = vgg_loss(raw_tgt_image, output_image_refine,
                                       loss_mask)
      tf.summary.scalar("vgg_loss_refine", vgg_loss_refine)
      total_loss += vgg_loss_refine

    with tf.name_scope("train_op"):
      print("setting up train op")
      train_vars = [var for var in tf.trainable_variables()]
      optim = tf.train.AdamOptimizer(learning_rate, beta1)
      grads_and_vars = optim.compute_gradients(total_loss, var_list=train_vars)
      train_op = [optim.apply_gradients(grads_and_vars)]

    # Summaries
    tf.summary.scalar("total_loss", total_loss)
    # Source images
    for i in range(num_source):
      src_image = raw_src_images[:, :, :, i*3:(i+1)*3]
      tf.summary.image("src_image_%d" % i, src_image)
    # Output image
    tf.summary.image("output_image", self.deprocess_image(output_image))
    # Refined output image
    tf.summary.image("output_image_refine",
                     self.deprocess_image(output_image_refine))
    # Target image
    tf.summary.image("tgt_image", raw_tgt_image)
    # Ref image
    tf.summary.image("ref_image", raw_ref_image)
    # Predicted color and alpha layers, and PSV
    num_summ = 16  # Number of plane summaries to show in tensorboard
    for i in range(num_summ):
      ind = tf.to_int32(i * num_mpi_planes/num_summ)
      rgb = rgba_layers[:, :, :, ind, :3]
      alpha = rgba_layers[:, :, :, ind, -1:]
      ref_plane = psv[:, :, :, ind, 3:6]
      source_plane = psv[:, :, :, ind, :3]
      output_rgb = output_layers[:, :, :, ind, :3]
      tf.summary.image("rgb_layer_%d" % i, self.deprocess_image(rgb))
      tf.summary.image("alpha_layer_%d" % i, alpha)
      tf.summary.image("rgba_layer_%d" % i, self.deprocess_image(rgb * alpha))
      tf.summary.image("psv_avg_%d" % i,
                       (self.deprocess_image(0.5*ref_plane + 0.5*source_plane)))
      tf.summary.image("output_rgb_%d" % i,
                       self.deprocess_image(output_rgb))
      tf.summary.image("psv_ref_%d" % i, self.deprocess_image(ref_plane))
      tf.summary.image("psv_source_%d" % i, self.deprocess_image(source_plane))

    # Cumulative rendered images and refined MPI
    for i in range(num_summ):
      ind = tf.to_int32(i * num_mpi_planes/num_summ)
      rgb = rgba_layers_refine[:, :, :, ind, :3]
      alpha = rgba_layers_refine[:, :, :, ind, 3:]
      render = stuff_behind[:, :, :, ind, :3]
      input_colors = refine_input_mpi[:, :, :, ind, :3]
      tf.summary.image("rgb_layer_refine_%d" % i, self.deprocess_image(rgb))
      tf.summary.image("alpha_layer_refine_%d" % i, alpha)
      tf.summary.image("rgba_layer_refine_%d" % i,
                       self.deprocess_image(rgb * alpha))
      tf.summary.image("cumulative_render_%d" % i, self.deprocess_image(render))
      tf.summary.image("input_colors_refine_%d" % i,
                       self.deprocess_image(input_colors))

    return train_op
Exemplo n.º 29
0
def overlap_mask(depth1, pose1_c2w, depth2, pose2_c2w, intrinsics):
    """Compute the overlap masks of two views using triangulation.

  The masks have the same shape of the input images. A pixel value is true if it
  can be seen by both cameras.

  Args:
    depth1: [HEIGHT, WIDTH, 1] the depth map of the first view.
    pose1_c2w: [3, 4] camera pose matrix (camera to world) of the first view.
      pose1_c2w[:, :3] is the rotation and pose1_c2w[:, -1] is the translation.
    depth2: [HEIGHT, WIDTH, 1] the depth map of the second view.
    pose2_c2w: [3, 4] camera pose matrix (camera to world) of the second view.
      pose1_c2w[:, :3] is the rotation and pose1_c2w[:, -1] is the translation.
    intrinsics: [3, 3] camera's intrinsic matrix.

  Returns:
    [HEIGHT, WIDTH] two overlap masks of the two inputs respectively.
  """

    pose1_w2c = tf.matrix_inverse(
        tf.concat([pose1_c2w, tf.constant([[0., 0., 0., 1.]])], 0))[:3]
    pose2_w2c = tf.matrix_inverse(
        tf.concat([pose2_c2w, tf.constant([[0., 0., 0., 1.]])], 0))[:3]

    p_world1 = image_to_world_projection(depth1, intrinsics, pose1_c2w)
    p_image1_in_2, z1_c2 = world_to_image_projection(p_world1, intrinsics,
                                                     pose2_w2c)

    p_world2 = image_to_world_projection(depth2, intrinsics, pose2_c2w)
    p_image2_in_1, z2_c1 = world_to_image_projection(p_world2, intrinsics,
                                                     pose1_w2c)

    shape = depth1.shape.as_list()
    height, width = shape[0], shape[1]
    height = tf.cast(height, tf.float32)
    width = tf.cast(width, tf.float32)
    # Error tolerance.
    eps = 1e-4
    # check the object seen by camera 2 is also projected to camera 1's image
    # plane and in front of the camera 1.
    mask_h2_in_1 = tf.logical_and(
        tf.less_equal(p_image2_in_1[:, :, 1], height + eps),
        tf.greater_equal(p_image2_in_1[:, :, 1], 0. - eps))
    mask_w2_in_1 = tf.logical_and(
        tf.less_equal(p_image2_in_1[:, :, 0], width + eps),
        tf.greater_equal(p_image2_in_1[:, :, 0], 0. - eps))
    # check the projected points are within the image boundaries and in front of
    # the camera.
    mask2_in_1 = tf.logical_and(tf.logical_and(mask_h2_in_1, mask_w2_in_1),
                                tf.squeeze(z2_c1, -1) > 0)

    # check the object seen by camera 1 is also projected to camera 2's image
    # plane and in front of the camera 2.
    mask_h1_in_2 = tf.logical_and(
        tf.less_equal(p_image1_in_2[:, :, 1], height + eps),
        tf.greater_equal(p_image1_in_2[:, :, 1], 0. - eps))
    mask_w1_in_2 = tf.logical_and(
        tf.less_equal(p_image1_in_2[:, :, 0], width + eps),
        tf.greater_equal(p_image1_in_2[:, :, 0], 0. - eps))
    # check the projected points are within the image boundaries and in front of
    # the camera.
    mask1_in_2 = tf.logical_and(tf.logical_and(mask_h1_in_2, mask_w1_in_2),
                                tf.squeeze(z1_c2, -1) > 0)

    return mask1_in_2, mask2_in_1
Exemplo n.º 30
0
import tensorflow.compat.v1 as tf
import numpy as np
from tensorflow.python.framework.ops import disable_eager_execution
disable_eager_execution()

A = tf.placeholder(dtype=tf.float64, shape=[2, 2])
b = tf.placeholder(dtype=tf.float64, shape=[2])
#矩阵函数的使用
A_pow = tf.sin(A)
A_relu = tf.nn.relu(A)

A_inverse = tf.matrix_inverse(A)

A_T = tf.transpose(A)
b_diag = tf.diag(b)
I = tf.eye(6)

init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init)

print('\n-------------A_pow-----------------------')
print(sess.run(A_pow, 
               feed_dict={A: [[1, 2], [-1, 1]], 
                          b: [1, 1]}))

print(sess.run(tf.sin(tf.constant([[1, 2], [-1, 1]],dtype=tf.float64))))

print('\n------------------------------------')
print(sess.run(A_relu, 
               feed_dict={A: [[1, 2], [-1, 1]],