示例#1
0
  def _build_label(self,
                   image,
                   gt_boxes,
                   gt_classes,
                   info,
                   inds,
                   data,
                   is_training=True):
    """Label construction for both the train and eval data."""
    width = self._image_w
    height = self._image_h

    # Set the image shape.
    imshape = image.get_shape().as_list()
    imshape[-1] = 3
    image.set_shape(imshape)

    labels = dict()
    (labels['inds'], labels['upds'],
     labels['true_conf']) = self._label_builder(gt_boxes, gt_classes, width,
                                                height)

    # Set/fix the boxes shape.
    boxes = self.set_shape(gt_boxes, pad_axis=0, pad_value=0)
    classes = self.set_shape(gt_classes, pad_axis=0, pad_value=-1)

    # Build the dictionary set.
    labels.update({
        'source_id': utils.process_source_id(data['source_id']),
        'bbox': tf.cast(boxes, dtype=self._dtype),
        'classes': tf.cast(classes, dtype=self._dtype),
    })

    # Update the labels dictionary.
    if not is_training:

      # Sets up groundtruth data for evaluation.
      groundtruths = {
          'source_id': labels['source_id'],
          'height': height,
          'width': width,
          'num_detections': tf.shape(gt_boxes)[0],
          'image_info': info,
          'boxes': gt_boxes,
          'classes': gt_classes,
          'areas': tf.gather(data['groundtruth_area'], inds),
          'is_crowds':
              tf.cast(tf.gather(data['groundtruth_is_crowd'], inds), tf.int32),
      }
      groundtruths['source_id'] = utils.process_source_id(
          groundtruths['source_id'])
      groundtruths = utils.pad_groundtruths_to_fixed_size(
          groundtruths, self._max_num_instances)
      labels['groundtruths'] = groundtruths
    return image, labels
示例#2
0
    def test_pad_groundtruths_to_fixed_size(self, boxes, area, classes, size):
        groundtruths = {}
        groundtruths['boxes'] = tf.constant(boxes)
        groundtruths['is_crowds'] = tf.constant([[0]])
        groundtruths['areas'] = tf.constant(area)
        groundtruths['classes'] = tf.constant(classes)

        actual_result = utils.pad_groundtruths_to_fixed_size(
            groundtruths=groundtruths, size=size)

        # Check that the first dimension is padded to the expected size.
        for key in actual_result:
            pad_shape = actual_result[key].shape[0]
            self.assertEqual(size, pad_shape)
示例#3
0
    def _build_label(self, boxes, classes, image_info, unpad_image_shape,
                     data):

        # Sets up groundtruth data for evaluation.
        groundtruths = {
            'source_id':
            data['source_id'],
            'height':
            data['height'],
            'width':
            data['width'],
            'num_detections':
            tf.shape(data['groundtruth_classes'])[0],
            'boxes':
            box_ops.denormalize_boxes(data['groundtruth_boxes'],
                                      tf.shape(input=data['image'])[0:2]),
            'classes':
            data['groundtruth_classes'],
            'areas':
            data['groundtruth_area'],
            'is_crowds':
            tf.cast(data['groundtruth_is_crowd'], tf.int32),
        }

        groundtruths['source_id'] = utils.process_source_id(
            groundtruths['source_id'])
        groundtruths = utils.pad_groundtruths_to_fixed_size(
            groundtruths, self._max_num_instances)

        labels = {
            'boxes':
            preprocess_ops.clip_or_pad_to_fixed_size(boxes,
                                                     self._max_num_instances,
                                                     -1),
            'classes':
            preprocess_ops.clip_or_pad_to_fixed_size(classes,
                                                     self._max_num_instances,
                                                     -1),
            'image_info':
            image_info,
            'unpad_image_shapes':
            unpad_image_shape,
            'groundtruths':
            groundtruths
        }

        return labels
示例#4
0
  def test_pad_groundtruths_to_fixed_size(self, boxes, area, classes, size,
                                          attributes):
    groundtruths = {}
    groundtruths['boxes'] = tf.constant(boxes)
    groundtruths['is_crowds'] = tf.constant([[0]])
    groundtruths['areas'] = tf.constant(area)
    groundtruths['classes'] = tf.constant(classes)
    if attributes:
      groundtruths['attributes'] = {'depth': tf.constant(attributes)}

    actual_result = utils.pad_groundtruths_to_fixed_size(
        groundtruths=groundtruths, size=size)

    # Check that the first dimension is padded to the expected size.
    for key in actual_result:
      if key == 'attributes':
        for _, v in actual_result[key].items():
          pad_shape = v.shape[0]
          self.assertEqual(size, pad_shape)
      else:
        pad_shape = actual_result[key].shape[0]
        self.assertEqual(size, pad_shape)
示例#5
0
    def _parse_eval_data(self, data):
        """Parses data for evaluation.

    Args:
      data: the decoded tensor dictionary from TfExampleDecoder.

    Returns:
      A dictionary of {'images': image, 'labels': labels} where
        image: image tensor that is preproessed to have normalized value and
          dimension [output_size[0], output_size[1], 3]
        labels: a dictionary of tensors used for training. The following
          describes {key: value} pairs in the dictionary.
          source_ids: Source image id. Default value -1 if the source id is
            empty in the groundtruth annotation.
          image_info: a 2D `Tensor` that encodes the information of the image
            and the applied preprocessing. It is in the format of
            [[original_height, original_width], [scaled_height, scaled_width],
          anchor_boxes: ordered dictionary with keys
            [min_level, min_level+1, ..., max_level]. The values are tensor with
            shape [height_l, width_l, 4] representing anchor boxes at each
            level.
    """
        # Gets original image and its size.
        image = data['image']
        image_shape = tf.shape(image)[0:2]

        # Normalizes image with mean and std pixel values.
        image = preprocess_ops.normalize_image(image)

        # Resizes and crops image.
        image, image_info = preprocess_ops.resize_and_crop_image(
            image,
            self._output_size,
            padded_size=preprocess_ops.compute_padded_size(
                self._output_size, 2**self._max_level),
            aug_scale_min=1.0,
            aug_scale_max=1.0)
        image_height, image_width, _ = image.get_shape().as_list()

        # Casts input image to self._dtype
        image = tf.cast(image, dtype=self._dtype)

        # Converts boxes from normalized coordinates to pixel coordinates.
        boxes = box_ops.denormalize_boxes(data['groundtruth_boxes'],
                                          image_shape)

        # Compute Anchor boxes.
        input_anchor = anchor.build_anchor_generator(
            min_level=self._min_level,
            max_level=self._max_level,
            num_scales=self._num_scales,
            aspect_ratios=self._aspect_ratios,
            anchor_size=self._anchor_size)
        anchor_boxes = input_anchor(image_size=(image_height, image_width))

        labels = {
            'image_info': image_info,
            'anchor_boxes': anchor_boxes,
        }

        groundtruths = {
            'source_id': data['source_id'],
            'height': data['height'],
            'width': data['width'],
            'num_detections': tf.shape(data['groundtruth_classes'])[0],
            'boxes': boxes,
            'classes': data['groundtruth_classes'],
            'areas': data['groundtruth_area'],
            'is_crowds': tf.cast(data['groundtruth_is_crowd'], tf.int32),
        }
        groundtruths['source_id'] = utils.process_source_id(
            groundtruths['source_id'])
        groundtruths = utils.pad_groundtruths_to_fixed_size(
            groundtruths, self._max_num_instances)
        labels['groundtruths'] = groundtruths
        return image, labels
示例#6
0
    def _parse_eval_data(self, data):
        """Parses data for training and evaluation."""
        groundtruths = {}
        classes = data['groundtruth_classes']
        boxes = data['groundtruth_boxes']
        # If not empty, `attributes` is a dict of (name, ground_truth) pairs.
        # `ground_gruth` of attributes is assumed in shape [N, attribute_size].
        # TODO(xianzhi): support parsing attributes weights.
        attributes = data.get('groundtruth_attributes', {})

        # Gets original image and its size.
        image = data['image']
        image_shape = tf.shape(input=image)[0:2]

        # Normalizes image with mean and std pixel values.
        image = preprocess_ops.normalize_image(image)

        # Converts boxes from normalized coordinates to pixel coordinates.
        boxes = box_ops.denormalize_boxes(boxes, image_shape)

        # Resizes and crops image.
        image, image_info = preprocess_ops.resize_and_crop_image(
            image,
            self._output_size,
            padded_size=preprocess_ops.compute_padded_size(
                self._output_size, 2**self._max_level),
            aug_scale_min=1.0,
            aug_scale_max=1.0)
        image_height, image_width, _ = image.get_shape().as_list()

        # Resizes and crops boxes.
        image_scale = image_info[2, :]
        offset = image_info[3, :]
        boxes = preprocess_ops.resize_and_crop_boxes(boxes, image_scale,
                                                     image_info[1, :], offset)
        # Filters out ground truth boxes that are all zeros.
        indices = box_ops.get_non_empty_box_indices(boxes)
        boxes = tf.gather(boxes, indices)
        classes = tf.gather(classes, indices)
        for k, v in attributes.items():
            attributes[k] = tf.gather(v, indices)

        # Assigns anchors.
        input_anchor = anchor.build_anchor_generator(
            min_level=self._min_level,
            max_level=self._max_level,
            num_scales=self._num_scales,
            aspect_ratios=self._aspect_ratios,
            anchor_size=self._anchor_size)
        anchor_boxes = input_anchor(image_size=(image_height, image_width))
        anchor_labeler = anchor.AnchorLabeler(self._match_threshold,
                                              self._unmatched_threshold)
        (cls_targets, box_targets, att_targets, cls_weights,
         box_weights) = anchor_labeler.label_anchors(
             anchor_boxes, boxes, tf.expand_dims(classes, axis=1), attributes)

        # Casts input image to desired data type.
        image = tf.cast(image, dtype=self._dtype)

        # Sets up groundtruth data for evaluation.
        groundtruths = {
            'source_id':
            data['source_id'],
            'height':
            data['height'],
            'width':
            data['width'],
            'num_detections':
            tf.shape(data['groundtruth_classes']),
            'image_info':
            image_info,
            'boxes':
            box_ops.denormalize_boxes(data['groundtruth_boxes'], image_shape),
            'classes':
            data['groundtruth_classes'],
            'areas':
            data['groundtruth_area'],
            'is_crowds':
            tf.cast(data['groundtruth_is_crowd'], tf.int32),
        }
        if 'groundtruth_attributes' in data:
            groundtruths['attributes'] = data['groundtruth_attributes']
        groundtruths['source_id'] = utils.process_source_id(
            groundtruths['source_id'])
        groundtruths = utils.pad_groundtruths_to_fixed_size(
            groundtruths, self._max_num_instances)

        # Packs labels for model_fn outputs.
        labels = {
            'cls_targets': cls_targets,
            'box_targets': box_targets,
            'anchor_boxes': anchor_boxes,
            'cls_weights': cls_weights,
            'box_weights': box_weights,
            'image_info': image_info,
            'groundtruths': groundtruths,
        }
        if att_targets:
            labels['attribute_targets'] = att_targets
        return image, labels