示例#1
0
  def _common_image_process(self, image, classes, boxes, data, params):
    # Training time preprocessing.
    if params['skip_crowd_during_training']:
      indices = tf.where(tf.logical_not(data['groundtruth_is_crowd']))
      classes = tf.gather_nd(classes, indices)
      boxes = tf.gather_nd(boxes, indices)

    if params.get('grid_mask', None):
      from aug import gridmask  # pylint: disable=g-import-not-at-top
      image, boxes = gridmask.gridmask(image, boxes)

    if params.get('autoaugment_policy', None):
      from aug import autoaugment  # pylint: disable=g-import-not-at-top
      if params['autoaugment_policy'] == 'randaug':
        image, boxes = autoaugment.distort_image_with_randaugment(
          image, boxes, num_layers=1, magnitude=15)
      else:
        image, boxes = autoaugment.distort_image_with_autoaugment(
          image, boxes, params['autoaugment_policy'])
    return image, boxes, classes
示例#2
0
  def dataset_parser(self, value, example_decoder, anchor_labeler, params):
    """Parse data to a fixed dimension input image and learning targets.

    Args:
      value: a single serialized tf.Example string.
      example_decoder: TF example decoder.
      anchor_labeler: anchor box labeler.
      params: a dict of extra parameters.

    Returns:
      image: Image tensor that is preprocessed to have normalized value and
        fixed dimension [image_height, image_width, 3]
      cls_targets_dict: ordered dictionary with keys
        [min_level, min_level+1, ..., max_level]. The values are tensor with
        shape [height_l, width_l, num_anchors]. The height_l and width_l
        represent the dimension of class logits at l-th level.
      box_targets_dict: ordered dictionary with keys
        [min_level, min_level+1, ..., max_level]. The values are tensor with
        shape [height_l, width_l, num_anchors * 4]. The height_l and
        width_l represent the dimension of bounding box regression output at
        l-th level.
      num_positives: Number of positive anchors in the image.
      source_id: Source image id. Default value -1 if the source id is empty
        in the groundtruth annotation.
      image_scale: Scale of the processed image to the original image.
      boxes: Groundtruth bounding box annotations. The box is represented in
        [y1, x1, y2, x2] format. The tensor is padded with -1 to the fixed
        dimension [self._max_instances_per_image, 4].
      is_crowds: Groundtruth annotations to indicate if an annotation
        represents a group of instances by value {0, 1}. The tensor is
        padded with 0 to the fixed dimension [self._max_instances_per_image].
      areas: Groundtruth areas annotations. The tensor is padded with -1
        to the fixed dimension [self._max_instances_per_image].
      classes: Groundtruth classes annotations. The tensor is padded with -1
        to the fixed dimension [self._max_instances_per_image].
    """
    with tf.name_scope('parser'):
      data = example_decoder.decode(value)
      source_id = data['source_id']
      image = data['image']
      boxes = data['groundtruth_boxes']
      classes = data['groundtruth_classes']
      classes = tf.reshape(tf.cast(classes, dtype=tf.float32), [-1, 1])
      areas = data['groundtruth_area']
      is_crowds = data['groundtruth_is_crowd']
      image_masks = data.get('groundtruth_instance_masks', [])
      classes = tf.reshape(tf.cast(classes, dtype=tf.float32), [-1, 1])

      if self._is_training:
        # Training time preprocessing.
        if params['skip_crowd_during_training']:
          indices = tf.where(tf.logical_not(data['groundtruth_is_crowd']))
          classes = tf.gather_nd(classes, indices)
          boxes = tf.gather_nd(boxes, indices)

        if params.get('grid_mask', None):
          from aug import gridmask  # pylint: disable=g-import-not-at-top
          image, boxes = gridmask.gridmask(image, boxes)

        if params.get('autoaugment_policy', None):
          from aug import autoaugment  # pylint: disable=g-import-not-at-top
          if params['autoaugment_policy'] == 'randaug':
            image, boxes = autoaugment.distort_image_with_randaugment(
                image, boxes, num_layers=1, magnitude=15)
          else:
            image, boxes = autoaugment.distort_image_with_autoaugment(
                image, boxes, params['autoaugment_policy'],
                params['use_augmix'], *params['augmix_params'])

      input_processor = DetectionInputProcessor(image, params['image_size'],
                                                boxes, classes)
      input_processor.normalize_image()
      if self._is_training:
        if params['input_rand_hflip']:
          input_processor.random_horizontal_flip()

        input_processor.set_training_random_scale_factors(
            params['jitter_min'], params['jitter_max'],
            params.get('target_size', None))
      else:
        input_processor.set_scale_factors_to_output_size()
      image = input_processor.resize_and_crop_image()
      boxes, classes = input_processor.resize_and_crop_boxes()

      # Assign anchors.
      (cls_targets, box_targets,
       num_positives) = anchor_labeler.label_anchors(boxes, classes)

      source_id = tf.where(
          tf.equal(source_id, tf.constant('')), '-1', source_id)
      source_id = tf.strings.to_number(source_id)

      # Pad groundtruth data for evaluation.
      image_scale = input_processor.image_scale_to_original
      boxes *= image_scale
      is_crowds = tf.cast(is_crowds, dtype=tf.float32)
      boxes = pad_to_fixed_size(boxes, -1, [self._max_instances_per_image, 4])
      is_crowds = pad_to_fixed_size(is_crowds, 0,
                                    [self._max_instances_per_image, 1])
      areas = pad_to_fixed_size(areas, -1, [self._max_instances_per_image, 1])
      classes = pad_to_fixed_size(classes, -1,
                                  [self._max_instances_per_image, 1])
      return (image, cls_targets, box_targets, num_positives, source_id,
              image_scale, boxes, is_crowds, areas, classes, image_masks)
 def test_randaugment_policy(self):
     image = tf.placeholder(tf.uint8, shape=[320, 320, 3])
     bboxes = tf.placeholder(tf.float32, shape=[4, 4])
     autoaugment.distort_image_with_randaugment(image, bboxes, 1, 15)