Exemplo n.º 1
0
    def _build_label(self, boxes, classes, image_info, unpad_image_shape,
                     data):

        # Sets up groundtruth data for evaluation.
        groundtruths = {
            'source_id':
            data['source_id'],
            'height':
            data['height'],
            'width':
            data['width'],
            'num_detections':
            tf.shape(data['groundtruth_classes'])[0],
            'boxes':
            box_ops.denormalize_boxes(data['groundtruth_boxes'],
                                      tf.shape(input=data['image'])[0:2]),
            'classes':
            data['groundtruth_classes'],
            'areas':
            data['groundtruth_area'],
            'is_crowds':
            tf.cast(data['groundtruth_is_crowd'], tf.int32),
        }

        groundtruths['source_id'] = utils.process_source_id(
            groundtruths['source_id'])
        groundtruths = utils.pad_groundtruths_to_fixed_size(
            groundtruths, self._max_num_instances)

        labels = {
            'boxes':
            preprocess_ops.clip_or_pad_to_fixed_size(boxes,
                                                     self._max_num_instances,
                                                     -1),
            'classes':
            preprocess_ops.clip_or_pad_to_fixed_size(classes,
                                                     self._max_num_instances,
                                                     -1),
            'image_info':
            image_info,
            'unpad_image_shapes':
            unpad_image_shape,
            'groundtruths':
            groundtruths
        }

        return labels
Exemplo n.º 2
0
 def test_process_source_id(self, source_id, expected_result):
     source_id = tf.constant(source_id, dtype=tf.int64)
     source_id = tf.strings.as_string(source_id)
     self.assertSequenceAlmostEqual(
         expected_result, utils.process_source_id(source_id=source_id))
Exemplo n.º 3
0
 def test_process_empty_source_id(self):
     source_id = tf.constant([], dtype=tf.int64)
     source_id = tf.strings.as_string(source_id)
     self.assertEqual(-1, utils.process_source_id(source_id=source_id))
Exemplo n.º 4
0
  def _parse_eval_data(self, data):
    """Parses data for evaluation.

    Args:
      data: the decoded tensor dictionary from TfExampleDecoder.

    Returns:
      A dictionary of {'images': image, 'labels': labels} where
        image: image tensor that is preproessed to have normalized value and
          dimension [output_size[0], output_size[1], 3]
        labels: a dictionary of tensors used for training. The following
          describes {key: value} pairs in the dictionary.
          source_ids: Source image id. Default value -1 if the source id is
            empty in the groundtruth annotation.
          image_info: a 2D `Tensor` that encodes the information of the image
            and the applied preprocessing. It is in the format of
            [[original_height, original_width], [scaled_height, scaled_width],
          anchor_boxes: ordered dictionary with keys
            [min_level, min_level+1, ..., max_level]. The values are tensor with
            shape [height_l, width_l, 4] representing anchor boxes at each
            level.
    """
    # Gets original image and its size.
    image = data['image']
    image_shape = tf.shape(image)[0:2]

    # Normalizes image with mean and std pixel values.
    image = preprocess_ops.normalize_image(image)

    # Resizes and crops image.
    image, image_info = preprocess_ops.resize_and_crop_image(
        image,
        self._output_size,
        padded_size=preprocess_ops.compute_padded_size(
            self._output_size, 2 ** self._max_level),
        aug_scale_min=1.0,
        aug_scale_max=1.0)
    image_height, image_width, _ = image.get_shape().as_list()

    # Casts input image to self._dtype
    image = tf.cast(image, dtype=self._dtype)

    # Converts boxes from normalized coordinates to pixel coordinates.
    boxes = box_ops.denormalize_boxes(data['groundtruth_boxes'], image_shape)

    # Compute Anchor boxes.
    input_anchor = anchor.build_anchor_generator(
        min_level=self._min_level,
        max_level=self._max_level,
        num_scales=self._num_scales,
        aspect_ratios=self._aspect_ratios,
        anchor_size=self._anchor_size)
    anchor_boxes = input_anchor(image_size=(image_height, image_width))

    labels = {
        'image_info': image_info,
        'anchor_boxes': anchor_boxes,
    }

    groundtruths = {
        'source_id': data['source_id'],
        'height': data['height'],
        'width': data['width'],
        'num_detections': tf.shape(data['groundtruth_classes'])[0],
        'boxes': boxes,
        'classes': data['groundtruth_classes'],
        'areas': data['groundtruth_area'],
        'is_crowds': tf.cast(data['groundtruth_is_crowd'], tf.int32),
    }
    groundtruths['source_id'] = utils.process_source_id(
        groundtruths['source_id'])
    groundtruths = utils.pad_groundtruths_to_fixed_size(
        groundtruths, self._max_num_instances)
    labels['groundtruths'] = groundtruths
    return image, labels
    def _parse_eval_data(self, data):
        """Parses data for training and evaluation."""
        groundtruths = {}
        classes = data['groundtruth_classes']
        boxes = data['groundtruth_boxes']
        # If not empty, `attributes` is a dict of (name, ground_truth) pairs.
        # `ground_gruth` of attributes is assumed in shape [N, attribute_size].
        # TODO(xianzhi): support parsing attributes weights.
        attributes = data.get('groundtruth_attributes', {})

        # Gets original image and its size.
        image = data['image']
        image_shape = tf.shape(input=image)[0:2]

        # Normalizes image with mean and std pixel values.
        image = preprocess_ops.normalize_image(image)

        # Converts boxes from normalized coordinates to pixel coordinates.
        boxes = box_ops.denormalize_boxes(boxes, image_shape)

        # Resizes and crops image.
        image, image_info = preprocess_ops.resize_and_crop_image(
            image,
            self._output_size,
            padded_size=preprocess_ops.compute_padded_size(
                self._output_size, 2**self._max_level),
            aug_scale_min=1.0,
            aug_scale_max=1.0)
        image_height, image_width, _ = image.get_shape().as_list()

        # Resizes and crops boxes.
        image_scale = image_info[2, :]
        offset = image_info[3, :]
        boxes = preprocess_ops.resize_and_crop_boxes(boxes, image_scale,
                                                     image_info[1, :], offset)
        # Filters out ground truth boxes that are all zeros.
        indices = box_ops.get_non_empty_box_indices(boxes)
        boxes = tf.gather(boxes, indices)
        classes = tf.gather(classes, indices)
        for k, v in attributes.items():
            attributes[k] = tf.gather(v, indices)

        # Assigns anchors.
        input_anchor = anchor.build_anchor_generator(
            min_level=self._min_level,
            max_level=self._max_level,
            num_scales=self._num_scales,
            aspect_ratios=self._aspect_ratios,
            anchor_size=self._anchor_size)
        anchor_boxes = input_anchor(image_size=(image_height, image_width))
        anchor_labeler = anchor.AnchorLabeler(self._match_threshold,
                                              self._unmatched_threshold)
        (cls_targets, box_targets, att_targets, cls_weights,
         box_weights) = anchor_labeler.label_anchors(
             anchor_boxes, boxes, tf.expand_dims(classes, axis=1), attributes)

        # Casts input image to desired data type.
        image = tf.cast(image, dtype=self._dtype)

        # Sets up groundtruth data for evaluation.
        groundtruths = {
            'source_id':
            data['source_id'],
            'height':
            data['height'],
            'width':
            data['width'],
            'num_detections':
            tf.shape(data['groundtruth_classes']),
            'image_info':
            image_info,
            'boxes':
            box_ops.denormalize_boxes(data['groundtruth_boxes'], image_shape),
            'classes':
            data['groundtruth_classes'],
            'areas':
            data['groundtruth_area'],
            'is_crowds':
            tf.cast(data['groundtruth_is_crowd'], tf.int32),
        }
        if 'groundtruth_attributes' in data:
            groundtruths['attributes'] = data['groundtruth_attributes']
        groundtruths['source_id'] = utils.process_source_id(
            groundtruths['source_id'])
        groundtruths = utils.pad_groundtruths_to_fixed_size(
            groundtruths, self._max_num_instances)

        # Packs labels for model_fn outputs.
        labels = {
            'cls_targets': cls_targets,
            'box_targets': box_targets,
            'anchor_boxes': anchor_boxes,
            'cls_weights': cls_weights,
            'box_weights': box_weights,
            'image_info': image_info,
            'groundtruths': groundtruths,
        }
        if att_targets:
            labels['attribute_targets'] = att_targets
        return image, labels
Exemplo n.º 6
0
    def _build_label(self,
                     image,
                     gt_boxes,
                     gt_classes,
                     info,
                     inds,
                     data,
                     is_training=True):
        """Label construction for both the train and eval data."""
        width = self._image_w
        height = self._image_h

        # Set the image shape.
        imshape = image.get_shape().as_list()
        imshape[-1] = 3
        image.set_shape(imshape)

        labels = dict()
        (labels['inds'], labels['upds'],
         labels['true_conf']) = self._label_builder(gt_boxes, gt_classes,
                                                    width, height)

        # Set/fix the boxes shape.
        boxes = self.set_shape(gt_boxes, pad_axis=0, pad_value=0)
        classes = self.set_shape(gt_classes, pad_axis=0, pad_value=-1)

        # Build the dictionary set.
        labels.update({
            'source_id': utils.process_source_id(data['source_id']),
            'bbox': tf.cast(boxes, dtype=self._dtype),
            'classes': tf.cast(classes, dtype=self._dtype),
        })

        # Update the labels dictionary.
        if not is_training:
            # Sets up groundtruth data for evaluation.
            groundtruths = {
                'source_id':
                labels['source_id'],
                'height':
                data['height'],
                'width':
                data['width'],
                'num_detections':
                tf.shape(data['groundtruth_boxes'])[0],
                'image_info':
                info,
                'boxes':
                bbox_ops.denormalize_boxes(
                    data['groundtruth_boxes'],
                    tf.cast([data['height'], data['width']], gt_boxes.dtype)),
                'classes':
                data['groundtruth_classes'],
                'areas':
                data['groundtruth_area'],
                'is_crowds':
                tf.cast(tf.gather(data['groundtruth_is_crowd'], inds),
                        tf.int32),
            }
            groundtruths['source_id'] = utils.process_source_id(
                groundtruths['source_id'])
            groundtruths = utils.pad_groundtruths_to_fixed_size(
                groundtruths, self._max_num_instances)
            labels['groundtruths'] = groundtruths
        return image, labels