def _get_groundtruth_mask_output(self, boxes, masks): """Get the expected mask output for each box. Args: boxes: A [num_instances, 4] float tensor containing bounding boxes in normalized coordinates. masks: A [num_instances, height, width] float tensor containing binary ground truth masks. Returns: masks: If predict_full_resolution_masks is set, masks are not resized and the size of this tensor is [num_instances, input_height, input_width]. Otherwise, returns a tensor of size [num_instances, mask_size, mask_size]. """ mask_size = self._deepmac_params.mask_size if self._deepmac_params.predict_full_resolution_masks: return masks else: cropped_masks = spatial_transform_ops.matmul_crop_and_resize( masks[:, :, :, tf.newaxis], boxes[:, tf.newaxis, :], [mask_size, mask_size]) cropped_masks = tf.stop_gradient(cropped_masks) cropped_masks = tf.squeeze(cropped_masks, axis=[1, 4]) # TODO(vighneshb) should we discretize masks? return cropped_masks
def crop_and_resize_feature_map(features, boxes, size): """Crop and resize regions from a single feature map given a set of boxes. Args: features: A [H, W, C] float tensor. boxes: A [N, 4] tensor of norrmalized boxes. size: int, the size of the output features. Returns: per_box_features: A [N, size, size, C] tensor of cropped and resized features. """ return spatial_transform_ops.matmul_crop_and_resize( features[tf.newaxis], boxes[tf.newaxis], [size, size])[0]
def _get_mask_head_input(self, boxes, pixel_embedding): """Get the input to the mask network, given bounding boxes. Args: boxes: A [num_instances, 4] float tensor containing bounding boxes in normalized coordinates. pixel_embedding: A [height, width, embedding_size] float tensor containing spatial pixel embeddings. Returns: embedding: A [num_instances, mask_height, mask_width, embedding_size + 2] float tensor containing the inputs to the mask network. For each bounding box, we concatenate the normalized box coordinates to the cropped pixel embeddings. If predict_full_resolution_masks is set, mask_height and mask_width are the same as height and width of pixel_embedding. If not, mask_height and mask_width are the same as mask_size. """ num_instances = tf.shape(boxes)[0] mask_size = self._deepmac_params.mask_size if self._deepmac_params.predict_full_resolution_masks: num_instances = tf.shape(boxes)[0] pixel_embedding = pixel_embedding[tf.newaxis, :, :, :] pixel_embeddings_processed = tf.tile(pixel_embedding, [num_instances, 1, 1, 1]) else: # TODO(vighneshb) Explore multilevel_roi_align and align_corners=False. pixel_embeddings_cropped = spatial_transform_ops.matmul_crop_and_resize( pixel_embedding[tf.newaxis], boxes[tf.newaxis], [mask_size, mask_size]) pixel_embeddings_processed = pixel_embeddings_cropped[0] mask_shape = tf.shape(pixel_embeddings_processed) mask_height, mask_width = mask_shape[1], mask_shape[2] y_grid, x_grid = tf.meshgrid(tf.linspace(-1.0, 1.0, mask_height), tf.linspace(-1.0, 1.0, mask_width), indexing='ij') coords = tf.stack([y_grid, x_grid], axis=2) coords = coords[tf.newaxis, :, :, :] coords = tf.tile(coords, [num_instances, 1, 1, 1]) if self._deepmac_params.use_xy: return tf.concat([coords, pixel_embeddings_processed], axis=3) else: return pixel_embeddings_processed
def crop_masks_within_boxes(masks, boxes, output_size): """Crops masks to lie tightly within the boxes. Args: masks: A [num_instances, height, width] float tensor of masks. boxes: A [num_instances, 4] sized tensor of normalized bounding boxes. output_size: The height and width of the output masks. Returns: masks: A [num_instances, output_size, output_size] tensor of masks which are cropped to be tightly within the gives boxes and resized. """ masks = spatial_transform_ops.matmul_crop_and_resize( masks[:, :, :, tf.newaxis], boxes[:, tf.newaxis, :], [output_size, output_size]) return masks[:, 0, :, :, 0]
def crop_and_resize_instance_masks(masks, boxes, mask_size): """Crop and resize each mask according to the given boxes. Args: masks: A [N, H, W] float tensor. boxes: A [N, 4] float tensor of normalized boxes. mask_size: int, the size of the output masks. Returns: masks: A [N, mask_size, mask_size] float tensor of cropped and resized instance masks. """ cropped_masks = spatial_transform_ops.matmul_crop_and_resize( masks[:, :, :, tf.newaxis], boxes[:, tf.newaxis, :], [mask_size, mask_size]) cropped_masks = tf.squeeze(cropped_masks, axis=[1, 4]) return cropped_masks
def testInvalidInputShape(self): image = tf.constant([[[1], [2]], [[3], [4]]], dtype=tf.float32) boxes = tf.constant([[-1, -1, 1, 1]], dtype=tf.float32) crop_size = [4, 4] with self.assertRaises(ValueError): spatial_ops.matmul_crop_and_resize(image, boxes, crop_size)
def graph_fn(image, boxes): return spatial_ops.matmul_crop_and_resize(image, boxes, crop_size=[2, 2])