def _using_motion_vector(depth, translation, rotation_angles, intrinsic_mat, intrinsic_mat_inv=None): """A helper for using_motion_vector. See docstring therein.""" if translation.shape.ndims not in (2, 4): raise ValueError('\'translation\' should have rank 2 or 4, not %d' % translation.shape.ndims) if translation.shape[-1] != 3: raise ValueError('translation\'s last dimension should be 3, not %d' % translation.shape[1]) if translation.shape.ndims == 2: translation = tf.expand_dims(tf.expand_dims(translation, 1), 1) _, height, width = tf.unstack(tf.shape(depth)) grid = tf.squeeze( tf.stack(tf.meshgrid(tf.range(width), tf.range(height), (1,))), axis=3) grid = tf.to_float(grid) if intrinsic_mat_inv is None: intrinsic_mat_inv = tf.linalg.inv(intrinsic_mat) # Use the depth map and the inverse intrinsic matrix to generate a point # cloud xyz. xyz = tf.einsum('bij,jhw,bhw->bihw', intrinsic_mat_inv, grid, depth) # TPU pads aggressively tensors that have small dimensions. Therefore having # A rotation of the shape [....., 3, 3] would overflow the HBM memory. To # address this, we represnet the rotations is a 3x3 nested python tuple of # tf.Tensors (that is, we unroll the rotation matrix at the small dimensions). # The 3x3 matrix multiplication is now done in a python loop, and tensors with # small dimensions are avoided. unstacked_xyz = tf.unstack(xyz, axis=1) unstacked_rotation_matrix = transform_utils.unstacked_matrix_from_angles( *tf.unstack(rotation_angles, axis=-1)) rank_diff = ( unstacked_xyz[0].shape.ndims - unstacked_rotation_matrix[0][0].shape.ndims) def expand_to_needed_rank(t): for _ in range(rank_diff): t = tf.expand_dims(t, -1) return t unstacked_rotated_xyz = [0.0] * 3 for i in range(3): for j in range(3): unstacked_rotated_xyz[i] += expand_to_needed_rank( unstacked_rotation_matrix[i][j]) * unstacked_xyz[j] rotated_xyz = tf.stack(unstacked_rotated_xyz, axis=1) # Project the transformed point cloud back to the camera plane. pcoords = tf.einsum('bij,bjhw->bihw', intrinsic_mat, rotated_xyz) projected_translation = tf.einsum('bij,bhwj->bihw', intrinsic_mat, translation) pcoords += projected_translation x, y, z = tf.unstack(pcoords, axis=1) return x / z, y / z, z
def _using_transform_matrix(depth, transform, intrinsic_mat, intrinsic_mat_inv=None): """A helper for using_transform_matrix. See docstring therein.""" with tf.name_scope('Transform', values=[depth, transform, intrinsic_mat]): _, height, width = tf.unstack(tf.shape(depth)) grid = tf.squeeze( tf.stack(tf.meshgrid(tf.range(width), tf.range(height), (1,))), axis=3) grid = tf.to_float(grid) if intrinsic_mat_inv is None: intrinsic_mat_inv = tf.linalg.inv(intrinsic_mat) cam_coords = tf.einsum('bij,jhw,bhw->bihw', intrinsic_mat_inv, grid, depth) rotation = transform[:, :3, :3] translation = transform[:, :3, 3] xyz = ( tf.einsum('bij,bjk,bkhw->bihw', intrinsic_mat, rotation, cam_coords) + _expand_last_dim_twice( tf.einsum('bij,bj->bi', intrinsic_mat, translation))) x, y, z = tf.unstack(xyz, axis=1) pixel_x = x / z pixel_y = y / z return pixel_x, pixel_y, z
def cutout(x, toss, ratio=[1, 2]): batch_size = tf.shape(x)[0] image_size = tf.shape(x)[1:3] cutout_size = image_size * ratio[0] // ratio[1] offset_x = tf.random.uniform([tf.shape(x)[0], 1, 1], maxval=image_size[0] + (1 - cutout_size[0] % 2), dtype=tf.int32) offset_y = tf.random.uniform([tf.shape(x)[0], 1, 1], maxval=image_size[1] + (1 - cutout_size[1] % 2), dtype=tf.int32) grid_batch, grid_x, grid_y = tf.meshgrid(tf.range(batch_size, dtype=tf.int32), tf.range(cutout_size[0], dtype=tf.int32), tf.range(cutout_size[1], dtype=tf.int32), indexing='ij') cutout_grid = tf.stack([ grid_batch, grid_x + offset_x - cutout_size[0] // 2, grid_y + offset_y - cutout_size[1] // 2 ], axis=-1) mask_shape = tf.stack([batch_size, image_size[0], image_size[1]]) cutout_grid = tf.maximum(cutout_grid, 0) cutout_grid = tf.minimum(cutout_grid, tf.reshape(mask_shape - 1, [1, 1, 1, 3])) mask = tf.maximum( 1 - tf.reshape(toss, [-1, 1, 1]) * tf.scatter_nd( cutout_grid, tf.ones([batch_size, cutout_size[0], cutout_size[1]], dtype=tf.float32), mask_shape), 0) x = x * tf.expand_dims(mask, axis=3) return x
def _batch_slice(self, ary, start_ijk, w, batch_size): """Batched slicing of original grid. Args: ary: tensor, rank = 3. start_ijk: [batch_size, 3] tensor, starting index. w: width of cube to extract. batch_size: int, batch size. Returns: batched_slices: [batch_size, w, w, w] tensor, batched slices of ary. """ batch_size = start_ijk.shape[0] ijk = tf.range(w, dtype=tf.int32) slice_idx = tf.meshgrid(ijk, ijk, ijk, indexing='ij') slice_idx = tf.stack( slice_idx, axis=-1) # [in_grid_res, in_grid_res, in_grid_res, 3] slice_idx = tf.broadcast_to(slice_idx[tf.newaxis], [batch_size, w, w, w, 3]) offset = tf.broadcast_to( start_ijk[:, tf.newaxis, tf.newaxis, tf.newaxis, :], [batch_size, w, w, w, 3]) slice_idx += offset # [batch_size, in_grid_res, in_grid_res, in_grid_res, 3] batched_slices = tf.gather_nd(ary, slice_idx) # [batch_size, in_grid_res, in_grid_res, in_grid_res] return batched_slices
def flow_gather(source_images, flows): """Gather from a tensor of images. Args: source_images: 5D tensor of images [B, H, W, D, 3] flows: 5D tensor of x/y offsets to gather for each slice (pixel offsets) Returns: warped_imgs_reshape: 5D tensor of gathered (warped) images [B, H, W, D, 3] """ batchsize = tf.shape(source_images)[0] height = tf.shape(source_images)[1] width = tf.shape(source_images)[2] num_depths = tf.shape(source_images)[3] source_images_reshape = tf.reshape( tf.transpose(source_images, [0, 3, 1, 2, 4]), [batchsize * num_depths, height, width, 3]) flows_reshape = tf.reshape(tf.transpose(flows, [0, 3, 1, 2, 4]), [batchsize * num_depths, height, width, 2]) _, h, w = tf.meshgrid(tf.range(tf.to_float(batchsize * num_depths), dtype=tf.float32), tf.range(tf.to_float(height), dtype=tf.float32), tf.range(tf.to_float(width), dtype=tf.float32), indexing='ij') coords_y = tf.clip_by_value(h + flows_reshape[Ellipsis, 0], 0.0, tf.to_float(height)) coords_x = tf.clip_by_value(w + flows_reshape[Ellipsis, 1], 0.0, tf.to_float(width)) sampling_coords = tf.stack([coords_x, coords_y], axis=-1) warped_imgs = contrib_resampler.resampler(source_images_reshape, sampling_coords) warped_imgs_reshape = tf.transpose( tf.reshape(warped_imgs, [batchsize, num_depths, height, width, 3]), [0, 2, 3, 1, 4]) return warped_imgs_reshape
def __init__(self, config, name='decoder'): super(Decoder, self).__init__(name=name) assert len(config['dec_channel']) == len(config['dec_kernel']) assert len(config['dec_channel']) == len(config['dec_shape']) with self._enter_variable_scope(check_same_graph=False): dec_shape_list = [(n, n) if isinstance(n, int) else n for n in config['dec_shape']] plane_ht, plane_wd = dec_shape_list[0] with tf.name_scope('grid'): rows = tf.linspace(-1.0, 1.0, plane_ht) cols = tf.linspace(-1.0, 1.0, plane_wd) grid_rows, grid_cols = tf.meshgrid(rows, cols) self._grid = tf.expand_dims(tf.stack([grid_cols, grid_rows], axis=-1), axis=0) self._layers = [] for idx, (channel, kernel, shape) in enumerate( zip(config['dec_channel'], config['dec_kernel'], dec_shape_list)): if (plane_ht, plane_wd) != shape: self._layers.append(partial(tf.image.resize_bilinear, size=shape, name='resize_{}'.format(idx))) self._layers += [ snt.Conv2D(channel, kernel, padding='VALID', name='conv_{}'.format(idx)), partial(tf.nn.relu, name='relu_{}'.format(idx)), ] plane_ht -= kernel - 1 plane_wd -= kernel - 1 if [plane_ht, plane_wd] != config['image_shape'][:2]: self._layers.append( partial(tf.image.resize_bilinear, size=config['image_shape'][:2], name='resize_out')) self._image_ch = config['image_shape'][-1] self._layers.append(snt.Conv2D(self._image_ch + 1, 1, name='conv_out'))
def img2mpi(self, img, depth, planedepths): """Compute ground truth MPI of visible content using depth map.""" height = tf.shape(img)[1] width = tf.shape(img)[2] num_depths = planedepths.shape[0] depth_inds = (tf.to_float(num_depths) - 1) * ( (1.0 / depth) - (1.0 / planedepths[0])) / ((1.0 / planedepths[-1]) - (1.0 / planedepths[0])) depth_inds = tf.round(depth_inds) depth_inds_tile = tf.to_int32( tf.tile(depth_inds[:, :, :, tf.newaxis], [1, 1, 1, num_depths])) _, _, d = tf.meshgrid(tf.range(height), tf.range(width), tf.range(num_depths), indexing='ij') mpi_colors = tf.to_float( tf.tile(img[:, :, :, tf.newaxis, :], [1, 1, 1, num_depths, 1])) mpi_alphas = tf.to_float( tf.where(tf.equal(depth_inds_tile, d), tf.ones_like(depth_inds_tile), tf.zeros_like(depth_inds_tile))) mpi = tf.concat([mpi_colors, mpi_alphas[Ellipsis, tf.newaxis]], axis=4) return mpi
def affine_grid_generator(height, width, theta): """ This function returns a sampling grid, which when used with the bilinear sampler on the input feature map, will create an output feature map that is an affine transformation [1] of the input feature map. Input ----- - height: desired height of grid/output. Used to downsample or upsample. - width: desired width of grid/output. Used to downsample or upsample. - theta: affine transform matrices of shape (num_batch, 2, 3). For each image in the batch, we have 6 theta parameters of the form (2x3) that define the affine transformation T. Returns ------- - normalized gird (-1, 1) of shape (num_batch, 2, H, W). The 2nd dimension has 2 components: (x, y) which are the sampling points of the original image for each point in the target image. Note ---- [1]: the affine transformation allows cropping, translation, and isotropic scaling. """ # grab batch size num_batch = tf.shape(theta)[0] # create normalized 2D grid x = tf.linspace(-1.0, 1.0, width) y = tf.linspace(-1.0, 1.0, height) x_t, y_t = tf.meshgrid(x, y) # flatten x_t_flat = tf.reshape(x_t, [-1]) y_t_flat = tf.reshape(y_t, [-1]) # reshape to (x_t, y_t , 1) ones = tf.ones_like(x_t_flat) sampling_grid = tf.stack([x_t_flat, y_t_flat, ones]) # repeat grid num_batch times sampling_grid = tf.expand_dims(sampling_grid, axis=0) sampling_grid = tf.tile(sampling_grid, tf.stack([num_batch, 1, 1])) # cast to float32 (required for matmul) theta = tf.cast(theta, 'float32') sampling_grid = tf.cast(sampling_grid, 'float32') # transform the sampling grid - batch multiply batch_grids = tf.matmul(theta, sampling_grid) # batch grid has shape (num_batch, 2, H*W) # reshape to (num_batch, H, W, 2) batch_grids = tf.reshape(batch_grids, [num_batch, 2, height, width]) # batch_grids = tf.transpose(batch_grids, [0, 2, 1, 3]) return batch_grids
def get_offset(self, cell_size: int, num_anchors: int): x = tf.range(cell_size, dtype=tf.float32) y = tf.range(cell_size, dtype=tf.float32) xx, yy = tf.meshgrid(x, y) offset = tf.stack([xx, yy], axis=-1) offset = tf.expand_dims(offset, axis=2) offset = tf.tile(offset, [1, 1, num_anchors, 1]) return offset
def grid_coord(h, w, d): xl = tf.linspace(-1.0, 1.0, w) yl = tf.linspace(-1.0, 1.0, h) zl = tf.linspace(-1.0, 1.0, d) xs, ys, zs = tf.meshgrid(xl, yl, zl, indexing='ij') g = tf.concat(0,[flatten(xs), flatten(ys), flatten(zs)]) return g
def get_grid(shape, name='grid'): with tf.name_scope(name): rows = tf.linspace(-1.0, 1.0, shape[0]) cols = tf.linspace(-1.0, 1.0, shape[1]) grid_cols, grid_rows = tf.meshgrid(cols, rows) grid = tf.expand_dims(tf.stack([grid_cols, grid_rows], axis=-1), axis=0) return grid
def generate_heatmap_target_sigmas_rotation(heatmap_size, landmarks, sigmas, rotation, scale=1.0, normalize=False, data_format='channels_first'): """ Generates heatmap images for the given parameters. :param heatmap_size: The image size of a single heatmap. :param landmarks: The list of landmarks. For each landmark, a heatmap on the given coordinate will be generated. If landmark.is_valid is False, then the heatmap will be empty. :param sigmas: The sigmas for the individual heatmaps. May be either fixed, or trainable. :param rotation: The rotation of the heatmap. May be either fixed, or trainable. :param scale: The scale factor for each heatmap. Each pixel value will be multiplied by this value. :param normalize: If true, each heatmap value will be multiplied by the normalization factor of the gaussian. :param data_format: The data format of the resulting tensor of heatmap images. :return: The tensor of heatmap images. """ landmarks_shape = landmarks.get_shape().as_list() sigmas_shape = sigmas.get_shape().as_list() batch_size = landmarks_shape[0] num_landmarks = landmarks_shape[1] dim = landmarks_shape[2] - 1 assert dim == 2, 'Currently only dim == 2 is supported.' assert len(heatmap_size) == dim, 'Dimensions do not match.' assert sigmas_shape[0] == num_landmarks, 'Number of sigmas does not match.' rotation_matrix = tf.stack([tf.stack([tf.cos(rotation), -tf.sin(rotation)], axis=-1), tf.stack([tf.sin(rotation), tf.cos(rotation)], axis=-1)], axis=-1) rotation_matrix_t = tf.stack([tf.stack([tf.cos(rotation), tf.sin(rotation)], axis=-1), tf.stack([-tf.sin(rotation), tf.cos(rotation)], axis=-1)], axis=-1) det_covariances = tf.reduce_prod(sigmas, axis=-1) sigmas_inv_eye = tf.eye(dim, dim, batch_shape=[num_landmarks]) * tf.expand_dims(1.0 / sigmas, -1) inv_covariances = tf.matmul(tf.matmul(rotation_matrix, sigmas_inv_eye), rotation_matrix_t) if data_format == 'channels_first': heatmap_axis = 1 landmarks_reshaped = tf.reshape(landmarks[..., 1:], [batch_size, num_landmarks] + [1] * dim + [dim]) is_valid_reshaped = tf.reshape(landmarks[..., 0], [batch_size, num_landmarks] + [1] * dim) det_covariances_reshaped = tf.reshape(det_covariances, [1, num_landmarks] + [1] * dim) inv_covariances_reshaped = tf.reshape(inv_covariances, [1, num_landmarks] + [1] * dim + [dim, dim]) else: heatmap_axis = dim + 1 landmarks_reshaped = tf.reshape(landmarks[..., 1:], [batch_size] + [1] * dim + [num_landmarks, dim]) is_valid_reshaped = tf.reshape(landmarks[..., 0], [batch_size] + [1] * dim + [num_landmarks]) det_covariances_reshaped = tf.reshape(det_covariances, [1] + [1] * dim + [num_landmarks]) inv_covariances_reshaped = tf.reshape(inv_covariances, [1] + [1] * dim + [num_landmarks, dim, dim]) aranges = [np.arange(s) for s in heatmap_size] grid = tf.meshgrid(*aranges, indexing='ij') grid_stacked = tf.stack(grid, axis=dim) grid_stacked = tf.cast(grid_stacked, tf.float32) grid_stacked = tf.stack([grid_stacked] * batch_size, axis=0) grid_stacked = tf.stack([grid_stacked] * num_landmarks, axis=heatmap_axis) if normalize: scale /= tf.sqrt(tf.pow(2 * np.pi, dim) * det_covariances_reshaped) x_minus_mu = grid_stacked - landmarks_reshaped exp_factor = tf.reduce_sum(tf.reduce_sum(tf.expand_dims(x_minus_mu, -1) * inv_covariances_reshaped * tf.expand_dims(x_minus_mu, -2), axis=-1), axis=-1) heatmap = scale * tf.exp(-0.5 * exp_factor) heatmap_or_zeros = tf.where((is_valid_reshaped + tf.zeros_like(heatmap)) > 0, heatmap, tf.zeros_like(heatmap)) return heatmap_or_zeros
def yolo_layer(inputs, n_classes, anchors, img_size, data_format): """Creates Yolo final detection layer. Detects boxes with respect to anchors. Args: inputs: Tensor input. n_classes: Number of labels. anchors: A list of anchor sizes. img_size: The input size of the model. data_format: The input format. Returns: Tensor output. """ n_anchors = len(anchors) inputs = tf.layers.conv2d(inputs, filters=n_anchors * (5 + n_classes), kernel_size=1, strides=1, use_bias=True, data_format=data_format) shape = inputs.get_shape().as_list() grid_shape = shape[2:4] if data_format == 'channels_first' else shape[1:3] if data_format == 'channels_first': inputs = tf.transpose(inputs, [0, 2, 3, 1]) inputs = tf.reshape( inputs, [-1, n_anchors * grid_shape[0] * grid_shape[1], 5 + n_classes]) strides = (img_size[0] // grid_shape[0], img_size[1] // grid_shape[1]) box_centers, box_shapes, confidence, classes = \ tf.split(inputs, [2, 2, 1, n_classes], axis=-1) x = tf.range(grid_shape[0], dtype=tf.float32) y = tf.range(grid_shape[1], dtype=tf.float32) x_offset, y_offset = tf.meshgrid(x, y) x_offset = tf.reshape(x_offset, (-1, 1)) y_offset = tf.reshape(y_offset, (-1, 1)) x_y_offset = tf.concat([x_offset, y_offset], axis=-1) x_y_offset = tf.tile(x_y_offset, [1, n_anchors]) x_y_offset = tf.reshape(x_y_offset, [1, -1, 2]) box_centers = tf.nn.sigmoid(box_centers) box_centers = (box_centers + x_y_offset) * strides anchors = tf.tile(anchors, [grid_shape[0] * grid_shape[1], 1]) box_shapes = tf.exp(box_shapes) * tf.to_float(anchors) confidence = tf.nn.sigmoid(confidence) classes = tf.nn.sigmoid(classes) inputs = tf.concat([box_centers, box_shapes, confidence, classes], axis=-1) return inputs
def graph_fn(): y, x = tf.meshgrid(tf.range(32, dtype=tf.float32), tf.range(32, dtype=tf.float32)) blist = box_list.BoxList( tf.constant([[0., 0., 32., 32.], [0., 0., 16., 16.], [0.0, 0.0, 4.0, 4.0]])) classes = tf.constant([[0., 1., 0.], [1., 0., 0.], [0., 0., 1.]]) result = ta_utils.coordinates_to_iou(y, x, blist, classes) return result
def _get_xy_ctr(self, score_size, score_offset, total_stride): fm_height, fm_width = score_size, score_size y_list = tf.linspace(0., fm_height - 1., fm_height) x_list = tf.linspace(0., fm_width - 1., fm_width) X, Y = tf.meshgrid(x_list, y_list) XY = score_offset + tf.stack([X, Y], axis=-1) * total_stride XY = tf.reshape(XY, (1, fm_height * fm_width, 2)) return XY
def apply_line_prediction(inputs, features, blur_steps, learn_alpha=True, name=None): """Applies "Line Prediction" layer to input images.""" inputs.shape.assert_is_compatible_with([None, None, None, 6]) with tf.name_scope(name, 'blur_prediction', values=[inputs, features]): with tf.name_scope(None, 'input_frames', values=[inputs]): frames = [inputs[:, :, :, :3], inputs[:, :, :, 3:]] with tf.name_scope(None, 'frame_size', values=[inputs, features]): shape = tf.shape(inputs) height = shape[1] width = shape[2] with tf.name_scope(None, 'identity_warp', values=[]): x_idx, y_idx = tf.meshgrid(tf.range(width), tf.range(height)) identity_warp = tf.to_float(tf.stack([x_idx, y_idx], axis=-1)) identity_warp = identity_warp[tf.newaxis, :, :, tf.newaxis, :] warp_steps = tf.to_float(tf.range(blur_steps - 1) + 1) / (blur_steps - 1) warp_steps = warp_steps[tf.newaxis, tf.newaxis, tf.newaxis, :, tf.newaxis] max_warps = tf.to_float(tf.stack([width - 1, height - 1])) max_warps = max_warps[tf.newaxis, tf.newaxis, tf.newaxis, tf.newaxis, :] output_frames = [] for frame in frames: with tf.name_scope(None, 'predict_blurs', values=[features]): flow = tf.layers.conv2d(features, 2, 1, padding='same') if learn_alpha: alpha = tf.layers.conv2d( features, blur_steps, 1, padding='same', activation=tf.nn.softmax) with tf.name_scope(None, 'apply_blurs', values=[]): with tf.name_scope(None, 'warp', values=[frame, flow]): warps = identity_warp + flow[:, :, :, tf.newaxis, :] * warp_steps warps = tf.clip_by_value(warps, 0.0, max_warps) warped = contrib_resampler.resampler(frame, warps) warped = tf.concat([frame[:, :, :, tf.newaxis, :], warped], axis=3) with tf.name_scope(None, 'apply_alpha', values=[frame, flow]): if learn_alpha: mask = alpha[:, :, :, :, tf.newaxis] else: mask = 1.0 / blur_steps output_frames.append(tf.reduce_sum(warped * mask, axis=3)) with tf.name_scope(None, 'outputs', values=[output_frames]): output = tf.add_n(output_frames) / len(frames) return output
def tile_anchors(grid_height, grid_width, scales, aspect_ratios, anchor_stride, anchor_offset): """ It returns boxes in absolute coordinates. Arguments: grid_height: a scalar int tensor, size of the grid in the y direction. grid_width: a scalar int tensor, size of the grid in the x direction. scales: a float tensor with shape [N], it represents the scale of each box in the basis set. aspect_ratios: a float tensor with shape [N], it represents the aspect ratio of each box in the basis set. anchor_stride: a tuple of float scalar tensors, difference in centers between anchors for adjacent grid positions. anchor_offset: a tuple of float scalar tensors, center of the anchor on upper left element of the grid ((0, 0)-th anchor). Returns: a float tensor with shape [grid_height * grid_width * N, 4]. """ N = tf.size(scales) ratio_sqrts = tf.sqrt(aspect_ratios) heights = scales / ratio_sqrts widths = scales * ratio_sqrts # widths/heights = aspect_ratios, # and scales = sqrt(heights * widths) # get a grid of box centers y_centers = tf.to_float( tf.range(grid_height)) * anchor_stride[0] + anchor_offset[0] x_centers = tf.to_float( tf.range(grid_width)) * anchor_stride[1] + anchor_offset[1] x_centers, y_centers = tf.meshgrid(x_centers, y_centers) # they have shape [grid_height, grid_width] centers = tf.stack([y_centers, x_centers], axis=2) centers = tf.expand_dims(centers, 2) centers = tf.tile(centers, [1, 1, N, 1]) # shape [grid_height, grid_width, N, 2] sizes = tf.stack([heights, widths], axis=1) sizes = tf.expand_dims(tf.expand_dims(sizes, 0), 0) sizes = tf.tile(sizes, [grid_height, grid_width, 1, 1]) # shape [grid_height, grid_width, N, 2] boxes = tf.concat([centers - 0.5 * sizes, centers + 0.5 * sizes], axis=3) # it has shape [grid_height, grid_width, N, 4] boxes = tf.reshape(boxes, [-1, 4]) return boxes
def _using_motion_vector(depth, translation, rotation_angles, intrinsic_mat): """A helper for using_motion_vector. See docstring therein.""" if translation.shape.ndims not in (2, 4): raise ValueError( "'translation' should have rank 2 or 4, not %d" % translation.shape.ndims ) if translation.shape[-1] != 3: raise ValueError( "translation's last dimension should be 3, not %d" % translation.shape[1] ) if translation.shape.ndims == 2: translation = tf.expand_dims(tf.expand_dims(translation, 1), 1) _, height, width = tf.unstack(tf.shape(depth)) grid = tf.squeeze( tf.stack(tf.meshgrid(tf.range(width), tf.range(height), (1,))), axis=3 ) grid = tf.to_float(grid) intrinsic_mat_inv = tf.linalg.inv(intrinsic_mat) rot_mat = transform_utils.matrix_from_angles(rotation_angles) # We have to treat separately the case of a per-image rotation vector and a # per-image rotation field, because the broadcasting capabilities of einsum # are limited. if rotation_angles.shape.ndims == 2: # The calculation here is identical to the one in inverse_warp above. # Howeverwe use einsum for better clarity. Under the hood, einsum performs # the reshaping and invocation of BatchMatMul, instead of doing it manually, # as in inverse_warp. projected_rotation = tf.einsum( "bij,bjk,bkl->bil", intrinsic_mat, rot_mat, intrinsic_mat_inv ) pcoords = tf.einsum("bij,jhw,bhw->bihw", projected_rotation, grid, depth) elif rotation_angles.shape.ndims == 4: # We push the H and W dimensions to the end, and transpose the rotation # matrix elements (as noted above). rot_mat = tf.transpose(rot_mat, [0, 3, 4, 1, 2]) projected_rotation = tf.einsum( "bij,bjkhw,bkl->bilhw", intrinsic_mat, rot_mat, intrinsic_mat_inv ) pcoords = tf.einsum("bijhw,jhw,bhw->bihw", projected_rotation, grid, depth) projected_translation = tf.einsum("bij,bhwj->bihw", intrinsic_mat, translation) pcoords += projected_translation x, y, z = tf.unstack(pcoords, axis=1) return x / z, y / z, z
def generate_heatmap_target(heatmap_size, landmarks, sigmas, scale=1.0, normalize=False, data_format='channels_first'): """ Generates heatmap images for the given parameters. :param heatmap_size: The image size of a single heatmap. :param landmarks: The list of landmarks. For each landmark, a heatmap on the given coordinate will be generated. If landmark.is_valid is False, then the heatmap will be empty. :param sigmas: The sigmas for the individual heatmaps. May be either fixed, or trainable. :param scale: The scale factor for each heatmap. Each pixel value will be multiplied by this value. :param normalize: If true, each heatmap value will be multiplied by the normalization factor of the gaussian. :param data_format: The data format of the resulting tensor of heatmap images. :return: The tensor of heatmap images. """ landmarks_shape = landmarks.get_shape().as_list() sigmas_shape = sigmas.get_shape().as_list() batch_size = landmarks_shape[0] num_landmarks = landmarks_shape[1] dim = landmarks_shape[2] - 1 assert len(heatmap_size) == dim, 'Dimensions do not match.' assert sigmas_shape[0] == num_landmarks, 'Number of sigmas does not match.' if data_format == 'channels_first': heatmap_axis = 1 landmarks_reshaped = tf.reshape(landmarks[..., 1:], [batch_size, num_landmarks] + [1] * dim + [dim]) is_valid_reshaped = tf.reshape(landmarks[..., 0], [batch_size, num_landmarks] + [1] * dim) sigmas_reshaped = tf.reshape(sigmas, [1, num_landmarks] + [1] * dim) else: heatmap_axis = dim + 1 landmarks_reshaped = tf.reshape(landmarks[..., 1:], [batch_size] + [1] * dim + [num_landmarks, dim]) is_valid_reshaped = tf.reshape(landmarks[..., 0], [batch_size] + [1] * dim + [num_landmarks]) sigmas_reshaped = tf.reshape(sigmas, [1] + [1] * dim + [num_landmarks]) aranges = [np.arange(s) for s in heatmap_size] grid = tf.meshgrid(*aranges, indexing='ij') grid_stacked = tf.stack(grid, axis=dim) grid_stacked = tf.cast(grid_stacked, tf.float32) grid_stacked = tf.stack([grid_stacked] * batch_size, axis=0) grid_stacked = tf.stack([grid_stacked] * num_landmarks, axis=heatmap_axis) if normalize: scale /= tf.pow(np.sqrt(2 * np.pi) * sigmas_reshaped, dim) squared_distances = tf.reduce_sum(tf.pow(grid_stacked - landmarks_reshaped, 2.0), axis=-1) heatmap = scale * tf.exp(-squared_distances / (2 * tf.pow(sigmas_reshaped, 2))) heatmap_or_zeros = tf.where((is_valid_reshaped + tf.zeros_like(heatmap)) > 0, heatmap, tf.zeros_like(heatmap)) return heatmap_or_zeros
def make_density_summary(log_density_fn, num_bins=100): """Plot density.""" if FLAGS.target == dists.NINE_GAUSSIANS_DIST or FLAGS.target == dists.TWO_RINGS_DIST: bounds = (-2, 2) elif FLAGS.target == dists.CHECKERBOARD_DIST: bounds = (0, 1) x = tf.range( bounds[0], bounds[1], delta=(bounds[1] - bounds[0]) / float(num_bins)) grid_x, grid_y = tf.meshgrid(x, x, indexing="ij") grid_xy = tf.stack([grid_x, grid_y], axis=-1) log_z = log_density_fn(grid_xy) log_bigz = reduce_logavgexp(log_z) z = tf.exp(log_z - log_bigz) plot = tf.reshape(z, [num_bins, num_bins]) return plot
def tf_voxel_meshgrid(height, width, depth, homogeneous = False): with tf.variable_scope('voxel_meshgrid'): #Because 'ij' ordering is used for meshgrid, z_t and x_t are swapped (Think about order in 'xy' VS 'ij' # ↑↑↑↑↑ I do not understand z_t, y_t, x_t = tf.meshgrid(tf.range(depth, dtype = tf.float32), tf.range(height, dtype = tf.float32), tf.range(width, dtype = tf.float32), indexing='ij') #Reshape into a big list of slices one after another along the X,Y,Z direction x_t_flat = tf.reshape(x_t, (1, -1)) y_t_flat = tf.reshape(y_t, (1, -1)) z_t_flat = tf.reshape(z_t, (1, -1)) #Vertical stack to create a (3,N) matrix for X,Y,Z coordinates grid = tf.concat([x_t_flat, y_t_flat, z_t_flat], axis=0) if homogeneous: ones = tf.ones_like(x_t_flat) grid = tf.concat([grid, ones], axis = 0) return grid
def yolo(inputs, n_classes, anchors, img_size, data_format): n_anchors = len(anchors) inputs = tf.layers.conv2d(inputs, filters=n_anchors * (5 + n_classes), kernel_size=1, strides=1, use_bias=True, data_format=data_format) shape = inputs.get_shape().as_list() grid_shape = shape[2:4] if data_format == 'channels_first' else shape[1:3] if data_format == 'channels_first': inputs = tf.transpose(inputs, [0, 2, 3, 1]) inputs = tf.reshape(inputs, [-1, n_anchors * grid_shape[0] * grid_shape[1], 5 + n_classes]) strides = (img_size[0] // grid_shape[0], img_size[1] // grid_shape[1]) print("Detection_layer : tf.split : {}".format(inputs)) box_centers, box_shapes, confidence, classes = \ tf.split(inputs, [2, 2, 1, n_classes], axis=-1) x = tf.range(grid_shape[0], dtype=tf.float32) y = tf.range(grid_shape[1], dtype=tf.float32) x_offset, y_offset = tf.meshgrid(x, y) x_offset = tf.reshape(x_offset, (-1, 1)) y_offset = tf.reshape(y_offset, (-1, 1)) x_y_offset = tf.concat([x_offset, y_offset], axis=-1) x_y_offset = tf.tile(x_y_offset, [1, n_anchors]) x_y_offset = tf.reshape(x_y_offset, [1, -1, 2]) box_centers = tf.nn.sigmoid(box_centers) box_centers = (box_centers + x_y_offset) * strides anchors = tf.tile(anchors, [grid_shape[0] * grid_shape[1], 1]) box_shapes = tf.exp(box_shapes) * tf.to_float(anchors) confidence = tf.nn.sigmoid(confidence) classes = tf.nn.sigmoid(classes) inputs = tf.concat([box_centers, box_shapes, confidence, classes], axis=-1) return inputs
def create_centered_identity_transformation_field(shape, spacings): """Create 2D or 3D centered identity transformation field. Args: shape: 2- or 3-element list. The shape of the transformation field. spacings: 2- or 3-element list. The spacings of the transformation field. Returns: 2D case: 3-D Tensor (x0, x1, comp) describing a 2D vector field 3D case: 4-D Tensor (x0, x1, x2, comp) describing a 3D vector field """ coords = [] for i, size in enumerate(shape): spacing = spacings[i] coords.append( tf.linspace(-(size - 1) / 2 * spacing, (size - 1) / 2 * spacing, size)) permutation = np.roll(np.arange(len(coords) + 1), -1) return tf.transpose(tf.meshgrid(*coords, indexing="ij"), permutation)
def image_shape_to_grids(height, width): """Computes xy-grids given the shape of the image. Args: height: The height of the image. width: The width of the image. Returns: A tuple of two tensors: y_grid: A float tensor with shape [height, width] representing the y-coordinate of each pixel grid. x_grid: A float tensor with shape [height, width] representing the x-coordinate of each pixel grid. """ out_height = tf.cast(height, tf.float32) out_width = tf.cast(width, tf.float32) x_range = tf.range(out_width, dtype=tf.float32) y_range = tf.range(out_height, dtype=tf.float32) x_grid, y_grid = tf.meshgrid(x_range, y_range, indexing='xy') return (y_grid, x_grid)
def warp(teninput, tenflow): """ warps image with dense flow to obtain motion compensated frame """ batch_size, height, width, channels = ( tf.shape(teninput)[0], tf.shape(teninput)[1], tf.shape(teninput)[2], tf.shape(teninput)[3], ) grid_x, grid_y = tf.meshgrid(tf.range(width), tf.range(height)) stacked_grid = tf.cast(tf.stack([grid_y, grid_x], axis=2), tenflow.dtype) batched_grid = tf.expand_dims(stacked_grid, axis=0) query_points_on_grid = batched_grid - tenflow query_points_flattened = tf.reshape(query_points_on_grid, [batch_size, height * width, 2]) interpolated = interpolate_bilinear(teninput, query_points_flattened) interpolated = tf.reshape(interpolated, [batch_size, height, width, channels]) return interpolated
def _generate_anchors(self, feature_map_shape): """Generate anchor for an image. Using the feature map, the output of the pretrained network for an image, and the anchor_reference generated using the anchor config values. We generate a list of anchors. Anchors are just fixed bounding boxes of different ratios and sizes that are uniformly generated throught the image. Args: feature_map_shape: Shape of the convolutional feature map used as input for the RPN. Should be (batch, height, width, depth). Returns: all_anchors: A flattened Tensor with all the anchors of shape `(num_anchors_per_points * feature_width * feature_height, 4)` using the (x1, y1, x2, y2) convention. """ with tf.variable_scope('generate_anchors'): grid_width = feature_map_shape[2] # width grid_height = feature_map_shape[1] # height shift_x = tf.range(grid_width) * self._anchor_stride shift_y = tf.range(grid_height) * self._anchor_stride shift_x, shift_y = tf.meshgrid(shift_x, shift_y) shift_x = tf.reshape(shift_x, [-1]) shift_y = tf.reshape(shift_y, [-1]) shifts = tf.stack([shift_x, shift_y, shift_x, shift_y], axis=0) shifts = tf.transpose(shifts) # Shifts now is a (H x W, 4) Tensor # Expand dims to use broadcasting sum. all_anchors = (np.expand_dims(self._anchor_reference, axis=0) + tf.expand_dims(shifts, axis=1)) # Flatten all_anchors = tf.reshape(all_anchors, (-1, 4)) return all_anchors
def basis(sample_paths): """Computes polynomial basis expansion at the given sample points. Args: sample_paths: A `Tensor`s of either `flot32` or `float64` dtype and of shape `[num_samples, dim]` where `dim` has to be statically known. Returns: A `Tensor`s of shape `[degree * dim, num_samples]`. """ samples = tf.convert_to_tensor(sample_paths) dim = samples.shape.as_list()[-1] grid = tf.range(0, degree + 1, dtype=samples.dtype) samples_centered = samples - tf.math.reduce_mean(samples, axis=0) samples_centered = tf.expand_dims(samples_centered, -2) grid = tf.meshgrid(*(dim * [grid])) grid = tf.reshape(tf.stack(grid, -1), [-1, dim]) # Shape [num_samples, degree * dim] basis_expansion = tf.reduce_prod(samples_centered**grid, -1) return tf.transpose(basis_expansion)
def _CreateRampTestImages(self, batch_size, height, width): """Creates a batch of test images of given size. Args: batch_size: Number of images to stack into a batch. height: Height of the image. width: Width of the image. Returns: images: Tensor of shape [batch_size, height, width, 3]. In each image the R-channel values are equal to the x coordinate of the pixel, in G- and B-channel values are equal to the y coordinate. """ mesh_x, mesh_y = tf.meshgrid(np.arange(width, dtype=np.float32), np.arange(height, dtype=np.float32)) mesh_x = tf.expand_dims(mesh_x, 2) mesh_y = tf.expand_dims(mesh_y, 2) image = tf.concat([mesh_x, mesh_y, mesh_y], 2) image = tf.expand_dims(image, 0) images = tf.tile(image, [batch_size, 1, 1, 1]) return images
def image_to_world_projection(depth, intrinsics, pose_c2w): """Project points on the image to the world frame. Args: depth: [HEIGHT, WIDTH, 1] the depth map contains the radial distance from the camera eye to each point corresponding to each pixel. intrinsics: [3, 3] camera's intrinsic matrix. pose_c2w: [3, 4] camera pose matrix (camera to world). Returns: [HEIGHT, WIDTH, 3] points in the world's coordinate frame. """ shape = depth.shape.as_list() height, width = shape[0], shape[1] xx, yy = tf.meshgrid(tf.lin_space(0., width - 1., width), tf.lin_space(0., height - 1., height)) p_pixel_homogeneous = tf.concat( [tf.stack([xx, yy], axis=-1), tf.ones([height, width, 1])], -1) p_image = tf.squeeze( tf.matmul(tf.matrix_inverse(intrinsics[tf.newaxis, tf.newaxis, :]), tf.expand_dims(p_pixel_homogeneous, -1)), -1) z = depth * tf.reduce_sum( tf.math.l2_normalize(p_image, axis=-1) * tf.constant([[[0., 0., 1.]]]), axis=-1, keepdims=True) p_camera = z * p_image # convert to OpenGL coordinate system. p_camera = p_camera * tf.constant([1., 1., -1.], shape=[1, 1, 3]) p_camera_homogeneous = tf.concat( [p_camera, tf.ones(shape=[height, width, 1])], -1) # Convert camera coordinates to world coordinates. p_world = tf.squeeze( tf.matmul(pose_c2w[tf.newaxis, tf.newaxis, :], tf.expand_dims(p_camera_homogeneous, -1)), -1) return p_world
def camera_to_world_projection(depth, intrinsics, camera_to_world): """Project camera coordinates to world coordinates.""" # p_pixel: batch, w, h, 3 principal_point, fov 2-d list # r: batch, 3, 3 camera to world rotation # t: batch, 3 camera to world translation, depth: batch, w, h, 1 shape = depth.shape.as_list() height, width = shape[0], shape[1] xx, yy = tf.meshgrid(tf.lin_space(0., width - 1., width), tf.lin_space(0., height - 1., height)) p_pixel = tf.stack([xx, yy], axis=-1) p_pixel_homogeneous = tf.concat([p_pixel, tf.ones([height, width, 1])], -1) camera_to_world = tf.tile(camera_to_world[tf.newaxis, tf.newaxis, :], [height, width, 1, 1]) intrinsics = tf.tile(intrinsics[tf.newaxis, tf.newaxis, :], [height, width, 1, 1]) # Convert pixels coordinates (u, v, 1) to camera coordinates (x_c, y_c, f) # on the image plane. p_image = tf.squeeze( tf.matmul(tf.matrix_inverse(intrinsics), tf.expand_dims(p_pixel_homogeneous, -1)), -1) lookat_axis = tf.tile(tf.constant([0., 0., 1.], shape=[1, 1, 3]), [height, width, 1]) z = depth * tf.reduce_sum( tf.math.l2_normalize(p_image, axis=-1) * lookat_axis, axis=-1, keepdims=True) p_camera = z * p_image # convert from OpenCV convention to OpenGL p_camera = p_camera * tf.constant([1., 1., -1.], shape=[1, 1, 3]) p_camera_homogeneous = tf.concat( [p_camera, tf.ones(shape=[height, width, 1])], -1) # Convert camera coordinates to world coordinates. p_world = tf.squeeze( tf.matmul(camera_to_world, tf.expand_dims(p_camera_homogeneous, -1)), -1) return p_world