def clip_boxes(self, boxes): """Clip boxes to fit in an image.""" boxes = tf.where(tf.less(boxes, 0), tf.zeros_like(boxes), boxes) is_height_short_side = tf.less(self._scaled_height, self._scaled_width) bound = tf.where( is_height_short_side, tf.convert_to_tensor( [self._output_size[0] - 1, self._output_size[1] - 1] * 2, dtype=tf.float32), tf.convert_to_tensor( [self._output_size[1] - 1, self._output_size[0] - 1] * 2, dtype=tf.float32)) boxes = tf.where(tf.greater(boxes, bound), bound * tf.ones_like(boxes), boxes) return boxes
def resize_and_crop_image(self, method=tf.image.ResizeMethod.BILINEAR): """Resize input image and crop it to the self._output dimension.""" scaled_image = tf.image.resize_images( self._image, [self._scaled_height, self._scaled_width], method=method) is_height_short_side = tf.less(self._scaled_height, self._scaled_width) output_image = tf.cond( is_height_short_side, lambda: tf.image.pad_to_bounding_box( scaled_image, 0, 0, self._output_size[0], self._output_size[1 ]), # pylint: disable=line-too-long lambda: tf.image.pad_to_bounding_box( scaled_image, 0, 0, self._output_size[1], self._output_size[0]) # pylint: disable=line-too-long ) return output_image
def _transform_images(self, params, features, labels=None): """Transforms images.""" images = features['images'] batch_size, _, _, c = images.get_shape().as_list() if params['conv0_space_to_depth_block_size'] != 0: # Transforms (space-to-depth) images for TPU performance. def _fused_transform(images, image_size): return spatial_transform.fused_transpose_and_space_to_depth( images, image_size, params['conv0_space_to_depth_block_size'], params['transpose_input']) images = tf.cond( tf.less(features['image_info'][0, 3], features['image_info'][0, 4]), lambda: _fused_transform(images, params['image_size']), lambda: _fused_transform(images, params['image_size'][::-1])) else: # Transposes images for TPU performance. image_area = params['image_size'][0] * params['image_size'][1] if params['transpose_input']: images = tf.transpose(images, [1, 2, 0, 3]) # Flattens spatial dimensions so that the image tensor has a static # shape. images = tf.reshape(images, [image_area, batch_size, c]) else: images = tf.reshape(images, [batch_size, image_area, c]) if params['use_bfloat16']: images = tf.cast(images, dtype=tf.bfloat16) features['images'] = images if labels is not None: return features, labels else: return features, tf.zeros([batch_size])
def horizontal_image(*args): image_info = args[0]['image_info'] return tf.less(image_info[3], image_info[4])
def _dataset_parser(value): """Parse data to a fixed dimension input image and learning targets. Args: value: A dictionary contains an image and groundtruth annotations. Returns: features: A dictionary that contains the image and auxiliary information. The following describes {key: value} pairs in the dictionary. image: An image tensor that is preprocessed to have normalized value and fixed dimension [image_size, image_size, 3] image_info: Image information that includes the original height and width, the scale of the processed image to the original image, and the scaled height and width. source_ids: Source image id. Default value -1 if the source id is empty in the groundtruth annotation. labels: (only for training) A dictionary that contains groundtruth labels. The following describes {key: value} pairs in the dictionary. score_targets_dict: An ordered dictionary with keys [min_level, min_level+1, ..., max_level]. The values are tensor with shape [height_l, width_l, num_anchors]. The height_l and width_l represent the dimension of objectiveness score at l-th level. box_targets_dict: An ordered dictionary with keys [min_level, min_level+1, ..., max_level]. The values are tensor with shape [height_l, width_l, num_anchors * 4]. The height_l and width_l represent the dimension of bounding box regression output at l-th level. gt_boxes: Groundtruth bounding box annotations. The box is represented in [y1, x1, y2, x2] format. The tennsor is padded with -1 to the fixed dimension [self._max_num_instances, 4]. gt_classes: Groundtruth classes annotations. The tennsor is padded with -1 to the fixed dimension [self._max_num_instances]. cropped_gt_masks: Groundtruth masks cropped by the bounding box and resized to a fixed size determined by params['gt_mask_size'] """ with tf.name_scope('parser'): data = example_decoder.decode(value) image = data['image'] source_id = data['source_id'] source_id = tf.where(tf.equal(source_id, tf.constant('')), '-1', source_id) source_id = tf.string_to_number(source_id) if self._mode == tf.estimator.ModeKeys.PREDICT: input_processor = InstanceSegmentationInputProcessor( image, image_size, params['short_side_image_size'], params['long_side_max_image_size']) input_processor.normalize_image() input_processor.set_scale_factors_to_mlperf_reference_size( ) image = input_processor.resize_and_crop_image() if params['use_bfloat16']: image = tf.cast(image, dtype=tf.bfloat16) image_info = input_processor.get_image_info() return { 'images': image, 'image_info': image_info, 'source_ids': source_id } # The following part is for training. instance_masks = data['groundtruth_instance_masks'] boxes = data['groundtruth_boxes'] classes = data['groundtruth_classes'] classes = tf.reshape(tf.cast(classes, dtype=tf.float32), [-1, 1]) if not params['use_category']: classes = tf.cast(tf.greater(classes, 0), dtype=tf.float32) if (params['skip_crowd_during_training'] and self._mode == tf.estimator.ModeKeys.TRAIN): indices = tf.where( tf.logical_not(data['groundtruth_is_crowd'])) classes = tf.gather_nd(classes, indices) boxes = tf.gather_nd(boxes, indices) instance_masks = tf.gather_nd(instance_masks, indices) input_processor = InstanceSegmentationInputProcessor( image, image_size, params['short_side_image_size'], params['long_side_max_image_size'], boxes, classes, instance_masks) input_processor.normalize_image() if params['input_rand_hflip']: input_processor.random_horizontal_flip() input_processor.set_scale_factors_to_mlperf_reference_size() image = input_processor.resize_and_crop_image() boxes, classes = input_processor.resize_and_crop_boxes() cropped_gt_masks = input_processor.crop_gt_masks( params['gt_mask_size']) image_info = input_processor.get_image_info() # Assign anchors. is_height_short_side = tf.less(image_info[3], image_info[4]) score_targets, box_targets = tf.cond( is_height_short_side, lambda: anchor_labeler.label_anchors(boxes, classes), lambda: height_long_side_anchor_labeler.label_anchors(boxes, classes)) # pylint: disable=line-too-long # Pad groundtruth data. boxes *= image_info[2] boxes = pad_to_fixed_size(boxes, -1, [self._max_num_instances, 4]) classes = pad_to_fixed_size(classes, -1, [self._max_num_instances, 1]) # Pads cropped_gt_masks. cropped_gt_masks = tf.reshape( cropped_gt_masks, [-1, (params['gt_mask_size'] + 4)**2]) cropped_gt_masks = pad_to_fixed_size( cropped_gt_masks, -1, [self._max_num_instances, (params['gt_mask_size'] + 4)**2]) cropped_gt_masks = tf.reshape(cropped_gt_masks, [ self._max_num_instances, params['gt_mask_size'] + 4, params['gt_mask_size'] + 4 ]) if params['use_bfloat16']: image = tf.cast(image, dtype=tf.bfloat16) features = {} features['images'] = image features['image_info'] = image_info features['source_ids'] = source_id labels = {} for level in range(params['min_level'], params['max_level'] + 1): labels['score_targets_%d' % level] = score_targets[level] labels['box_targets_%d' % level] = box_targets[level] labels['gt_boxes'] = boxes labels['gt_classes'] = classes labels['cropped_gt_masks'] = cropped_gt_masks return features, labels
def clip_boxes(self, boxes): """Clip boxes to fit in an image.""" boxes = tf.where(tf.less(boxes, 0), tf.zeros_like(boxes), boxes) boxes = tf.where(tf.greater(boxes, self._output_size - 1), (self._output_size - 1) * tf.ones_like(boxes), boxes) return boxes