def _parse_eval_data(self, decoded_tensors):
        """Parses data for evaluation."""
        label = tf.cast(decoded_tensors['image/class/label'], dtype=tf.int32)
        image_bytes = decoded_tensors['image/encoded']
        image_shape = tf.image.extract_jpeg_shape(image_bytes)

        # Center crops and resizes image.
        image = preprocess_ops.center_crop_image_v2(image_bytes, image_shape)

        image = tf.image.resize(image,
                                self._output_size,
                                method=tf.image.ResizeMethod.BILINEAR)

        image = tf.reshape(image,
                           [self._output_size[0], self._output_size[1], 3])

        # Normalizes image with mean and std pixel values.
        image = preprocess_ops.normalize_image(image,
                                               offset=MEAN_RGB,
                                               scale=STDDEV_RGB)

        # Convert image to self._dtype.
        image = tf.image.convert_image_dtype(image, self._dtype)

        return image, label
    def _parse_train_data(self, decoded_tensors):
        """Parses data for training."""
        label = tf.cast(decoded_tensors['image/class/label'], dtype=tf.int32)

        image_bytes = decoded_tensors['image/encoded']
        image_shape = tf.image.extract_jpeg_shape(image_bytes)

        # Crops image.
        # TODO(pengchong): support image format other than JPEG.
        cropped_image = preprocess_ops.random_crop_image_v2(
            image_bytes, image_shape)
        image = tf.cond(
            tf.reduce_all(tf.equal(tf.shape(cropped_image), image_shape)),
            lambda: preprocess_ops.center_crop_image_v2(
                image_bytes, image_shape), lambda: cropped_image)

        if self._aug_rand_hflip:
            image = tf.image.random_flip_left_right(image)

        # Resizes image.
        image = tf.image.resize(image,
                                self._output_size,
                                method=tf.image.ResizeMethod.BILINEAR)

        # Normalizes image with mean and std pixel values.
        image = preprocess_ops.normalize_image(image,
                                               offset=MEAN_RGB,
                                               scale=STDDEV_RGB)

        # Convert image to self._dtype.
        image = tf.image.convert_image_dtype(image, self._dtype)

        return image, label
Exemplo n.º 3
0
    def _parse_eval_image(self, decoded_tensors):
        """Parses image data for evaluation."""
        image = tf.io.decode_image(decoded_tensors[self._image_field_key],
                                   channels=3)

        image = tf.reshape(image, (decoded_tensors['image/height'],
                                   decoded_tensors['image/width'], 3))

        # TODO: Add option to center crop and resize image.
        image, _ = preprocess_ops.resize_and_crop_image(
            image,
            self._output_size,
            self._output_size,
            preserve_aspect_ratio=self._preserve_aspect_ratio)

        image = tf.reshape(image,
                           [self._output_size[0], self._output_size[1], 3])

        # Normalizes image with mean and std pixel values.
        image = preprocess_ops.normalize_image(image,
                                               offset=MEAN_RGB,
                                               scale=STDDEV_RGB)

        # Convert image to self._dtype.
        image = tf.image.convert_image_dtype(image, self._dtype)

        return image
Exemplo n.º 4
0
  def _parse_train_image(self, decoded_tensors):
    """Parses image data for training."""
    image_bytes = decoded_tensors[self._image_field_key]
    image_shape = tf.image.extract_jpeg_shape(image_bytes)

    # Crops image.
    # TODO(pengchong): support image format other than JPEG.
    cropped_image = preprocess_ops.random_crop_image_v2(
        image_bytes, image_shape)
    image = tf.cond(
        tf.reduce_all(tf.equal(tf.shape(cropped_image), image_shape)),
        lambda: preprocess_ops.center_crop_image_v2(image_bytes, image_shape),
        lambda: cropped_image)

    if self._aug_rand_hflip:
      image = tf.image.random_flip_left_right(image)

    # Resizes image.
    image = tf.image.resize(
        image, self._output_size, method=tf.image.ResizeMethod.BILINEAR)

    # Apply autoaug or randaug.
    if self._augmenter is not None:
      image = self._augmenter.distort(image)

    # Normalizes image with mean and std pixel values.
    image = preprocess_ops.normalize_image(image,
                                           offset=MEAN_RGB,
                                           scale=STDDEV_RGB)

    # Convert image to self._dtype.
    image = tf.image.convert_image_dtype(image, self._dtype)

    return image
Exemplo n.º 5
0
  def _parse_eval_data(self, decoded_tensors):
    """Parses data for evaluation."""
    label = tf.cast(decoded_tensors[self._label_field_key], dtype=tf.int32)
    image_bytes = decoded_tensors[self._image_field_key]
    image_shape = tf.image.extract_jpeg_shape(image_bytes)

    # Center crops and resizes image.
    image = preprocess_ops.center_crop_image_v2(image_bytes, image_shape)

    image = tf.image.resize(
        image, self._output_size, method=tf.image.ResizeMethod.BILINEAR)

    image = tf.reshape(image, [self._output_size[0], self._output_size[1], 3])

    # Normalizes image with mean and std pixel values.
    image = preprocess_ops.normalize_image(image,
                                           offset=MEAN_RGB,
                                           scale=STDDEV_RGB)

    # Convert image to self._dtype.
    image = tf.image.convert_image_dtype(image, self._dtype)

    if self._is_multilabel:
      if isinstance(label, tf.sparse.SparseTensor):
        label = tf.sparse.to_dense(label)
      label = tf.reduce_sum(tf.one_hot(label, self._num_classes), axis=0)

    return image, label
    def _build_inputs(self, image):
        """Builds classification model inputs for serving."""
        model_params = self._params.task.model
        # Normalizes image with mean and std pixel values.
        image = preprocess_ops.normalize_image(image,
                                               offset=MEAN_RGB,
                                               scale=STDDEV_RGB)

        image, image_info = preprocess_ops.resize_and_crop_image(
            image,
            self._input_image_size,
            padded_size=preprocess_ops.compute_padded_size(
                self._input_image_size, 2**model_params.max_level),
            aug_scale_min=1.0,
            aug_scale_max=1.0)

        image_shape = image_info[1, :]  # Shape of original image.

        input_anchor = anchor.build_anchor_generator(
            min_level=model_params.min_level,
            max_level=model_params.max_level,
            num_scales=model_params.anchor.num_scales,
            aspect_ratios=model_params.anchor.aspect_ratios,
            anchor_size=model_params.anchor.anchor_size)
        anchor_boxes = input_anchor(image_size=(self._input_image_size[0],
                                                self._input_image_size[1]))

        return image, anchor_boxes, image_shape
Exemplo n.º 7
0
    def _parse_eval_image(self, decoded_tensors):
        """Parses image data for evaluation."""
        image_bytes = decoded_tensors[self._image_field_key]

        if self._decode_jpeg_only:
            image_shape = tf.image.extract_jpeg_shape(image_bytes)

            # Center crops.
            image = preprocess_ops.center_crop_image_v2(
                image_bytes, image_shape)
        else:
            # Decodes image.
            image = tf.io.decode_image(image_bytes, channels=3)
            image.set_shape([None, None, 3])

            # Center crops.
            image = preprocess_ops.center_crop_image(image)

        image = tf.image.resize(image,
                                self._output_size,
                                method=tf.image.ResizeMethod.BILINEAR)
        image.set_shape([self._output_size[0], self._output_size[1], 3])

        # Normalizes image with mean and std pixel values.
        image = preprocess_ops.normalize_image(image,
                                               offset=MEAN_RGB,
                                               scale=STDDEV_RGB)

        # Convert image to self._dtype.
        image = tf.image.convert_image_dtype(image, self._dtype)

        return image
Exemplo n.º 8
0
    def _parse_train_image(self, decoded_tensors):
        """Parses image data for training."""
        image_bytes = decoded_tensors[self._image_field_key]

        if self._decode_jpeg_only:
            image_shape = tf.image.extract_jpeg_shape(image_bytes)

            # Crops image.
            cropped_image = preprocess_ops.random_crop_image_v2(
                image_bytes, image_shape)
            image = tf.cond(
                tf.reduce_all(tf.equal(tf.shape(cropped_image), image_shape)),
                lambda: preprocess_ops.center_crop_image_v2(
                    image_bytes, image_shape), lambda: cropped_image)
        else:
            # Decodes image.
            image = tf.io.decode_image(image_bytes, channels=3)
            image.set_shape([None, None, 3])

            # Crops image.
            cropped_image = preprocess_ops.random_crop_image(image)

            image = tf.cond(
                tf.reduce_all(
                    tf.equal(tf.shape(cropped_image), tf.shape(image))),
                lambda: preprocess_ops.center_crop_image(image),
                lambda: cropped_image)

        if self._aug_rand_hflip:
            image = tf.image.random_flip_left_right(image)

        # Color jitter.
        if self._color_jitter > 0:
            image = preprocess_ops.color_jitter(image, self._color_jitter,
                                                self._color_jitter,
                                                self._color_jitter)

        # Resizes image.
        image = tf.image.resize(image,
                                self._output_size,
                                method=tf.image.ResizeMethod.BILINEAR)
        image.set_shape([self._output_size[0], self._output_size[1], 3])

        # Apply autoaug or randaug.
        if self._augmenter is not None:
            image = self._augmenter.distort(image)

        # Normalizes image with mean and std pixel values.
        image = preprocess_ops.normalize_image(image,
                                               offset=MEAN_RGB,
                                               scale=STDDEV_RGB)

        # Random erasing after the image has been normalized
        if self._random_erasing is not None:
            image = self._random_erasing.distort(image)

        # Convert image to self._dtype.
        image = tf.image.convert_image_dtype(image, self._dtype)

        return image
Exemplo n.º 9
0
    def _parse_eval_data(self, data):
        """Generates images and labels that are usable for model evaluation.

    Args:
      data: the decoded tensor dictionary from TfExampleDecoder.

    Returns:
      images: the image tensor.
      labels: a dict of Tensors that contains labels.
    """
        image = tf.cast(data['image'], dtype=tf.float32)
        boxes = data['groundtruth_boxes']
        classes = data['groundtruth_classes']

        image_shape = tf.shape(input=image)[0:2]
        # Converts boxes from normalized coordinates to pixel coordinates.
        boxes = box_ops.denormalize_boxes(boxes, image_shape)

        # Resizes and crops image.
        image, image_info = preprocess_ops.resize_and_crop_image(
            image, [self._output_height, self._output_width],
            padded_size=[self._output_height, self._output_width],
            aug_scale_min=1.0,
            aug_scale_max=1.0)
        unpad_image_shape = tf.cast(tf.shape(image), tf.float32)

        # Resizes and crops boxes.
        image_scale = image_info[2, :]
        offset = image_info[3, :]
        boxes = preprocess_ops.resize_and_crop_boxes(boxes, image_scale,
                                                     image_info[1, :], offset)

        # Filters out ground truth boxes that are all zeros.
        indices = box_ops.get_non_empty_box_indices(boxes)
        boxes = tf.gather(boxes, indices)
        classes = tf.gather(classes, indices)

        labels = self._build_label(unpad_image_shape=unpad_image_shape,
                                   boxes=boxes,
                                   classes=classes,
                                   image_info=image_info,
                                   data=data)

        if self._bgr_ordering:
            red, green, blue = tf.unstack(image, num=3, axis=2)
            image = tf.stack([blue, green, red], axis=2)

        image = preprocess_ops.normalize_image(image=image,
                                               offset=self._channel_means,
                                               scale=self._channel_stds)

        image = tf.cast(image, self._dtype)

        return image, labels
Exemplo n.º 10
0
    def _prepare_image_and_label(self, data):
        """Prepare normalized image and label."""
        image = tf.io.decode_image(data['image/encoded'], channels=3)
        label = tf.io.decode_image(data['image/segmentation/class/encoded'],
                                   channels=1)
        height = data['image/height']
        width = data['image/width']
        image = tf.reshape(image, (height, width, 3))

        label = tf.reshape(label, (1, height, width))
        label = tf.cast(label, tf.float32)
        # Normalizes image with mean and std pixel values.
        image = preprocess_ops.normalize_image(image)
        return image, label
Exemplo n.º 11
0
    def _build_inputs(self, image):
        """Builds classification model inputs for serving."""
        # Center crops and resizes image.
        #image = preprocess_ops.center_crop_image(image)

        image = tf.image.resize(image,
                                self._input_image_size,
                                method=tf.image.ResizeMethod.BILINEAR)

        image = tf.reshape(
            image, [self._input_image_size[0], self._input_image_size[1], 3])
        image = preprocess_ops.normalize_image(image,
                                               offset=MEAN_RGB,
                                               scale=STDDEV_RGB)
        return image
Exemplo n.º 12
0
  def _prepare_image_and_label(self, data):
    """Prepare normalized image and label."""
    image = tf.io.decode_image(data['image/encoded'], channels=3)
    label = tf.io.decode_image(data['image/segmentation/class/encoded'],
                               channels=1)
    height = data['image/height']
    width = data['image/width']
    image = tf.reshape(image, (height, width, 3))
    
    label = tf.reshape(label, (height, width, 1))
    label = tf.image.convert_image_dtype(label, dtype=tf.float32)

    image = preprocess_ops.normalize_image(image)

    return image, label
Exemplo n.º 13
0
    def _build_inputs(self, image):
        """Builds classification model inputs for serving."""

        # Normalizes image with mean and std pixel values.
        image = preprocess_ops.normalize_image(image,
                                               offset=MEAN_RGB,
                                               scale=STDDEV_RGB)

        image, _ = preprocess_ops.resize_and_crop_image(
            image,
            self._input_image_size,
            padded_size=self._input_image_size,
            aug_scale_min=1.0,
            aug_scale_max=1.0)
        return image
Exemplo n.º 14
0
    def _parse_eval_data(self, data):
        """Parses data for training and evaluation."""
        image, label = self._prepare_image_and_label(data)
        # The label is first offset by +1 and then padded with 0.
        label += 1
        label = tf.expand_dims(label, axis=3)

        # Resizes and crops image.
        image, image_info = preprocess_ops.resize_and_crop_image(
            image,
            self._output_size,
            self._output_size,
            preserve_aspect_ratio=self._preserve_aspect_ratio)

        if self._resize_eval_groundtruth:
            # Resizes eval masks to match input image sizes. In that case, mean IoU
            # is computed on output_size not the original size of the images.
            image_scale = image_info[2, :]
            offset = image_info[3, :]
            label = preprocess_ops.resize_and_crop_masks(
                label, image_scale, self._output_size, offset)
        else:
            label = tf.image.pad_to_bounding_box(
                label, 0, 0, self._groundtruth_padded_size[0],
                self._groundtruth_padded_size[1])

        label -= 1
        label = tf.where(tf.equal(label, -1),
                         self._ignore_label * tf.ones_like(label), label)
        label = tf.squeeze(label, axis=0)

        valid_mask = tf.not_equal(label, self._ignore_label)
        labels = {
            'masks': label,
            'valid_masks': valid_mask,
            'image_info': image_info
        }

        # Normalizes image with mean and std pixel values.
        # Must be done after augmenter since certain ops rely on uint8
        image = preprocess_ops.normalize_image(image,
                                               offset=MEAN_RGB,
                                               scale=STDDEV_RGB)

        # Cast image as self._dtype
        image = tf.cast(image, dtype=self._dtype)

        return image, labels
Exemplo n.º 15
0
  def _parse_data(
      self, decoded_tensors: Mapping[str,
                                     tf.Tensor]) -> Tuple[tf.Tensor, tf.Tensor]:
    label = tf.cast(decoded_tensors['image/class/label'], dtype=tf.int32)
    image_bytes = decoded_tensors['image/encoded']
    image = tf.io.decode_jpeg(image_bytes, channels=3)
    image = tf.image.resize(
        image, self._output_size, method=tf.image.ResizeMethod.BILINEAR)
    image = tf.ensure_shape(image, self._output_size + [3])

    # Normalizes image with mean and std pixel values.
    image = preprocess_ops.normalize_image(
        image, offset=MEAN_RGB, scale=STDDEV_RGB)

    image = tf.image.convert_image_dtype(image, self._dtype)
    return image, label
Exemplo n.º 16
0
  def inference_fn(cls,
                   image: tf.Tensor,
                   input_image_size: List[int],
                   num_channels: int = 3) -> tf.Tensor:
    """Builds image model inputs for serving."""

    image = tf.cast(image, dtype=tf.float32)
    image = preprocess_ops.center_crop_image(image)
    image = tf.image.resize(
        image, input_image_size, method=tf.image.ResizeMethod.BILINEAR)

    # Normalizes image with mean and std pixel values.
    image = preprocess_ops.normalize_image(
        image, offset=MEAN_RGB, scale=STDDEV_RGB)
    image.set_shape(input_image_size + [num_channels])
    return image
Exemplo n.º 17
0
    def _build_inputs(self, image):
        """Builds segmentation model inputs for serving."""

        # Normalizes image with mean and std pixel values.
        image = preprocess_ops.normalize_image(
            image,
            offset=run_lib.IMAGENET_MEAN_RGB,
            scale=run_lib.IMAGENET_STDDEV_RGB)

        image, _ = preprocess_ops.resize_and_crop_image(
            image,
            self._input_image_size,
            padded_size=self._input_image_size,
            aug_scale_min=1.0,
            aug_scale_max=1.0,
            preserve_aspect_ratio=False)
        return image
Exemplo n.º 18
0
    def _build_inputs(self, image):
        """Builds detection model inputs for serving."""
        model_params = self.params.task.model
        # Normalizes image with mean and std pixel values.
        image = preprocess_ops.normalize_image(image,
                                               offset=MEAN_RGB,
                                               scale=STDDEV_RGB)

        image, image_info = preprocess_ops.resize_and_crop_image(
            image,
            self._input_image_size,
            padded_size=preprocess_ops.compute_padded_size(
                self._input_image_size, 2**model_params.max_level),
            aug_scale_min=1.0,
            aug_scale_max=1.0)
        anchor_boxes = self._build_anchor_boxes()

        return image, anchor_boxes, image_info
Exemplo n.º 19
0
    def _prepare_image_and_label(self, data):
        """Prepare normalized image and label."""
        image = tf.io.decode_image(data['image/encoded'], channels=3)
        label = tf.io.decode_image(data['image/segmentation/class/encoded'],
                                   channels=1)
        height = data['image/height']
        width = data['image/width']
        image = tf.reshape(image, (height, width, 3))

        label = tf.reshape(label, (1, height, width))
        label = tf.where(tf.math.greater(label, self.max_class),
                         tf.zeros_like(label), label)
        label = tf.where(tf.math.equal(label, 0),
                         tf.ones_like(label) * 255, label)
        label = tf.cast(label, tf.float32)
        # Normalizes image with mean and std pixel values.
        image = preprocess_ops.normalize_image(image,
                                               offset=[0.5, 0.5, 0.5],
                                               scale=[0.5, 0.5, 0.5])
        return image, label
Exemplo n.º 20
0
    def _parse_eval_data(self, data):
        """Parses data for evaluation.
    !!! All augmentations and transformations are on bboxes with format
      (ymin, xmin, ymax, xmax). Required to do the appropriate transformations.
    !!! Images are supposed to be in RGB format
    """
        image, boxes = data['image'], data['boxes']

        image, image_info = preprocess_ops.resize_and_crop_image(
            image,
            self._input_size[:2],
            self._input_size[:2],
            aug_scale_min=self._aug_scale_min,
            aug_scale_max=self._aug_scale_max,
            preserve_aspect_ratio=self._preserve_aspect_ratio)
        boxes = preprocess_ops.resize_and_crop_boxes(boxes, image_info[2, :],
                                                     image_info[1, :],
                                                     image_info[3, :])

        image = preprocess_ops.normalize_image(image,
                                               offset=MEAN_RGB,
                                               scale=STDDEV_RGB)
        image = tf.cast(image, dtype=self._dtype)

        boxes = tf.clip_by_value(boxes, 0, self._input_size[0] - 1)
        bbox_labels = yolo_box_ops.yxyx_to_xcycwh(boxes)
        bbox_labels = tf.concat([bbox_labels, data['classes'][:, tf.newaxis]],
                                axis=-1)

        labels, bbox_labels = yolo_ops.preprocess_true_boxes(
            bboxes=bbox_labels,
            train_output_sizes=self.train_output_sizes,
            anchor_per_scale=self.anchor_per_scale,
            num_classes=self.num_classes,
            max_bbox_per_scale=self.max_bbox_per_scale,
            strides=self.strides,
            anchors=self.anchors)

        targets = {'labels': labels, 'bboxes': bbox_labels}

        return image, targets
Exemplo n.º 21
0
  def _parse_train_data(self, decoded_tensors):
    """Parses data for training."""
    label = tf.cast(decoded_tensors[self._label_field_key], dtype=tf.int32)
    image_bytes = decoded_tensors[self._image_field_key]
    image_shape = tf.image.extract_jpeg_shape(image_bytes)

    # Crops image.
    # TODO(pengchong): support image format other than JPEG.
    cropped_image = preprocess_ops.random_crop_image_v2(
        image_bytes, image_shape)
    image = tf.cond(
        tf.reduce_all(tf.equal(tf.shape(cropped_image), image_shape)),
        lambda: preprocess_ops.center_crop_image_v2(image_bytes, image_shape),
        lambda: cropped_image)

    if self._aug_rand_hflip:
      image = tf.image.random_flip_left_right(image)

    # Resizes image.
    image = tf.image.resize(
        image, self._output_size, method=tf.image.ResizeMethod.BILINEAR)

    # Apply autoaug or randaug.
    if self._augmenter is not None:
      image = self._augmenter.distort(image)

    # Normalizes image with mean and std pixel values.
    image = preprocess_ops.normalize_image(image,
                                           offset=MEAN_RGB,
                                           scale=STDDEV_RGB)

    # Convert image to self._dtype.
    image = tf.image.convert_image_dtype(image, self._dtype)

    if self._is_multilabel:
      if isinstance(label, tf.sparse.SparseTensor):
        label = tf.sparse.to_dense(label)
      label = tf.reduce_sum(tf.one_hot(label, self._num_classes), axis=0)

    return image, label
Exemplo n.º 22
0
    def _parse_train_image(self, decoded_tensors):
        """Parses image data for training."""
        # TODO: add option to crop images
        image_bytes = decoded_tensors[self._image_field_key]
        if 'image/height' in decoded_tensors and 'image/width' in decoded_tensors:
            image_shape = (decoded_tensors['image/height'],
                           decoded_tensors['image/width'], 3)
        else:
            image_shape = tf.image.extract_jpeg_shape(image_bytes)

        image = tf.io.decode_image(image_bytes, channels=3)
        image = tf.reshape(image, image_shape)

        if self._aug_rand_hflip:
            image = tf.image.random_flip_left_right(image)

        image, _ = preprocess_ops.resize_and_crop_image(
            image,
            self._output_size,
            self._output_size,
            aug_scale_min=self._aug_scale_min,
            aug_scale_max=self._aug_scale_max,
            preserve_aspect_ratio=self._preserve_aspect_ratio)

        # Apply autoaug or randaug.
        if self._augmenter is not None:
            image = self._augmenter.distort(image)

        # Normalizes image with mean and std pixel values.
        image = preprocess_ops.normalize_image(image,
                                               offset=MEAN_RGB,
                                               scale=STDDEV_RGB)

        # Convert image to self._dtype.
        image = tf.image.convert_image_dtype(image, self._dtype)

        return image
Exemplo n.º 23
0
    def _prepare_image_and_label(self, data):
        """Prepare normalized image and label."""
        image = tf.io.decode_image(data['image/encoded'], channels=3)
        label = tf.io.decode_image(data['image/segmentation/class/encoded'],
                                   channels=1)
        height = data['image/height']
        width = data['image/width']
        image = tf.reshape(image, (height, width, 3))

        label = tf.reshape(label, (1, height, width))
        label = tf.cast(label, tf.float32)
        # Normalizes image with mean and std pixel values.
        image = preprocess_ops.normalize_image(image)

        if not self._preserve_aspect_ratio:
            label = tf.reshape(label,
                               [data['image/height'], data['image/width'], 1])
            image = tf.image.resize(image,
                                    self._output_size,
                                    method='bilinear')
            label = tf.image.resize(label, self._output_size, method='nearest')
            label = tf.reshape(label[:, :, -1], [1] + self._output_size)

        return image, label
Exemplo n.º 24
0
    def _parse_eval_data(self, data):
        """Parses data for training and evaluation."""
        groundtruths = {}
        classes = data['groundtruth_classes']
        boxes = data['groundtruth_boxes']
        # If not empty, `attributes` is a dict of (name, ground_truth) pairs.
        # `ground_gruth` of attributes is assumed in shape [N, attribute_size].
        # TODO(xianzhi): support parsing attributes weights.
        attributes = data.get('groundtruth_attributes', {})

        # Gets original image and its size.
        image = data['image']
        image_shape = tf.shape(input=image)[0:2]

        # Normalizes image with mean and std pixel values.
        image = preprocess_ops.normalize_image(image)

        # Converts boxes from normalized coordinates to pixel coordinates.
        boxes = box_ops.denormalize_boxes(boxes, image_shape)

        # Resizes and crops image.
        image, image_info = preprocess_ops.resize_and_crop_image(
            image,
            self._output_size,
            padded_size=preprocess_ops.compute_padded_size(
                self._output_size, 2**self._max_level),
            aug_scale_min=1.0,
            aug_scale_max=1.0)
        image_height, image_width, _ = image.get_shape().as_list()

        # Resizes and crops boxes.
        image_scale = image_info[2, :]
        offset = image_info[3, :]
        boxes = preprocess_ops.resize_and_crop_boxes(boxes, image_scale,
                                                     image_info[1, :], offset)
        # Filters out ground truth boxes that are all zeros.
        indices = box_ops.get_non_empty_box_indices(boxes)
        boxes = tf.gather(boxes, indices)
        classes = tf.gather(classes, indices)
        for k, v in attributes.items():
            attributes[k] = tf.gather(v, indices)

        # Assigns anchors.
        input_anchor = anchor.build_anchor_generator(
            min_level=self._min_level,
            max_level=self._max_level,
            num_scales=self._num_scales,
            aspect_ratios=self._aspect_ratios,
            anchor_size=self._anchor_size)
        anchor_boxes = input_anchor(image_size=(image_height, image_width))
        anchor_labeler = anchor.AnchorLabeler(self._match_threshold,
                                              self._unmatched_threshold)
        (cls_targets, box_targets, att_targets, cls_weights,
         box_weights) = anchor_labeler.label_anchors(
             anchor_boxes, boxes, tf.expand_dims(classes, axis=1), attributes)

        # Casts input image to desired data type.
        image = tf.cast(image, dtype=self._dtype)

        # Sets up groundtruth data for evaluation.
        groundtruths = {
            'source_id':
            data['source_id'],
            'height':
            data['height'],
            'width':
            data['width'],
            'num_detections':
            tf.shape(data['groundtruth_classes']),
            'image_info':
            image_info,
            'boxes':
            box_ops.denormalize_boxes(data['groundtruth_boxes'], image_shape),
            'classes':
            data['groundtruth_classes'],
            'areas':
            data['groundtruth_area'],
            'is_crowds':
            tf.cast(data['groundtruth_is_crowd'], tf.int32),
        }
        if 'groundtruth_attributes' in data:
            groundtruths['attributes'] = data['groundtruth_attributes']
        groundtruths['source_id'] = utils.process_source_id(
            groundtruths['source_id'])
        groundtruths = utils.pad_groundtruths_to_fixed_size(
            groundtruths, self._max_num_instances)

        # Packs labels for model_fn outputs.
        labels = {
            'cls_targets': cls_targets,
            'box_targets': box_targets,
            'anchor_boxes': anchor_boxes,
            'cls_weights': cls_weights,
            'box_weights': box_weights,
            'image_info': image_info,
            'groundtruths': groundtruths,
        }
        if att_targets:
            labels['attribute_targets'] = att_targets
        return image, labels
Exemplo n.º 25
0
    def _parse_train_data(self, data):
        """Parses data for training and evaluation.
    !!! All augmentations and transformations are on bboxes with format
      (ymin, xmin, ymax, xmax). Required to do the appropriate transformations.
    !!! Images are supposed to be in RGB format
    """
        image, boxes = data['image'], data['boxes']

        # Execute RandAugment first as some ops require uint8 colors
        if self._augmenter is not None:
            image = self._augmenter.distort(image)

        if self._aug_rand_hflip:
            image, boxes = yolo_ops.random_horizontal_flip(image, boxes)

        image, image_info = preprocess_ops.resize_and_crop_image(
            image,
            self._input_size[:2],
            self._input_size[:2],
            aug_scale_min=self._aug_scale_min,
            aug_scale_max=self._aug_scale_max,
            preserve_aspect_ratio=self._preserve_aspect_ratio)
        boxes = preprocess_ops.resize_and_crop_boxes(boxes, image_info[2, :],
                                                     image_info[1, :],
                                                     image_info[3, :])

        if self._aug_jitter_im != 0.0:
            image, boxes = yolo_ops.random_translate(image, boxes,
                                                     self._aug_jitter_im)

        if self._aug_jitter_boxes != 0.0:
            boxes = box_ops.jitter_boxes(boxes, self._aug_jitter_boxes)

        image = preprocess_ops.normalize_image(image,
                                               offset=MEAN_RGB,
                                               scale=STDDEV_RGB)
        image = tf.cast(image, dtype=self._dtype)

        boxes = tf.clip_by_value(boxes, 0, self._input_size[0] - 1)
        bbox_labels = yolo_box_ops.yxyx_to_xcycwh(boxes)
        bbox_labels = tf.concat([bbox_labels, data['classes'][:, tf.newaxis]],
                                axis=-1)

        labels, bbox_labels = yolo_ops.preprocess_true_boxes(
            bboxes=bbox_labels,
            train_output_sizes=self.train_output_sizes,
            anchor_per_scale=self.anchor_per_scale,
            num_classes=self.num_classes,
            max_bbox_per_scale=self.max_bbox_per_scale,
            strides=self.strides,
            anchors=self.anchors)

        # TODO: Figure out why we need to fix the num BBOX if not there will be an error
        # https://github.com/whizzmobility/models/pull/61
        # pad / limit to MAX_DISPLAY_BBOX boxes for constant size
        raw_bboxes = boxes
        num_bboxes = tf.shape(raw_bboxes)[0]
        if num_bboxes > MAX_DISPLAY_BBOX:
            raw_bboxes = raw_bboxes[:, :MAX_DISPLAY_BBOX]
        else:
            paddings = tf.stack([0, MAX_DISPLAY_BBOX - num_bboxes], axis=-1)
            paddings = tf.stack([paddings, [0, 0]], axis=0)
            raw_bboxes = tf.pad(raw_bboxes, paddings)

        targets = {
            'labels': labels,
            'bboxes': bbox_labels,
            'raw_bboxes': raw_bboxes
        }

        return image, targets
Exemplo n.º 26
0
    def _parse_train_data(self, data):
        """Generates images and labels that are usable for model training.

    We use random flip, random scaling (between 0.6 to 1.3), cropping,
    and color jittering as data augmentation

    Args:
        data: the decoded tensor dictionary from TfExampleDecoder.

    Returns:
        images: the image tensor.
        labels: a dict of Tensors that contains labels.
    """

        image = tf.cast(data['image'], dtype=tf.float32)
        boxes = data['groundtruth_boxes']
        classes = data['groundtruth_classes']

        image_shape = tf.shape(input=image)[0:2]

        if self._aug_rand_hflip:
            image, boxes, _ = preprocess_ops.random_horizontal_flip(
                image, boxes)

        # Image augmentation
        if not self._odapi_augmentation:
            # Color and lighting jittering
            if self._aug_rand_hue:
                image = tf.image.random_hue(image=image, max_delta=.02)
            if self._aug_rand_contrast:
                image = tf.image.random_contrast(image=image,
                                                 lower=0.8,
                                                 upper=1.25)
            if self._aug_rand_saturation:
                image = tf.image.random_saturation(image=image,
                                                   lower=0.8,
                                                   upper=1.25)
            if self._aug_rand_brightness:
                image = tf.image.random_brightness(image=image, max_delta=.2)
            image = tf.clip_by_value(image,
                                     clip_value_min=0.0,
                                     clip_value_max=255.0)
            # Converts boxes from normalized coordinates to pixel coordinates.
            boxes = box_ops.denormalize_boxes(boxes, image_shape)

            # Resizes and crops image.
            image, image_info = preprocess_ops.resize_and_crop_image(
                image, [self._output_height, self._output_width],
                padded_size=[self._output_height, self._output_width],
                aug_scale_min=self._aug_scale_min,
                aug_scale_max=self._aug_scale_max)
            unpad_image_shape = tf.cast(tf.shape(image), tf.float32)

            # Resizes and crops boxes.
            image_scale = image_info[2, :]
            offset = image_info[3, :]
            boxes = preprocess_ops.resize_and_crop_boxes(
                boxes, image_scale, image_info[1, :], offset)

        else:
            # Color and lighting jittering
            if self._aug_rand_hue:
                image = cn_prep_ops.random_adjust_hue(image=image,
                                                      max_delta=.02)
            if self._aug_rand_contrast:
                image = cn_prep_ops.random_adjust_contrast(image=image,
                                                           min_delta=0.8,
                                                           max_delta=1.25)
            if self._aug_rand_saturation:
                image = cn_prep_ops.random_adjust_saturation(image=image,
                                                             min_delta=0.8,
                                                             max_delta=1.25)
            if self._aug_rand_brightness:
                image = cn_prep_ops.random_adjust_brightness(image=image,
                                                             max_delta=.2)

            sc_image, sc_boxes, classes = cn_prep_ops.random_square_crop_by_scale(
                image=image,
                boxes=boxes,
                labels=classes,
                scale_min=self._aug_scale_min,
                scale_max=self._aug_scale_max)

            image, unpad_image_shape = cn_prep_ops.resize_to_range(
                image=sc_image,
                min_dimension=self._output_width,
                max_dimension=self._output_width,
                pad_to_max_dimension=True)
            preprocessed_shape = tf.cast(tf.shape(image), tf.float32)
            unpad_image_shape = tf.cast(unpad_image_shape, tf.float32)

            im_box = tf.stack([
                0.0, 0.0, preprocessed_shape[0] / unpad_image_shape[0],
                preprocessed_shape[1] / unpad_image_shape[1]
            ])
            realigned_bboxes = box_list_ops.change_coordinate_frame(
                boxlist=box_list.BoxList(sc_boxes), window=im_box)

            valid_boxes = box_list_ops.assert_or_prune_invalid_boxes(
                realigned_bboxes.get())

            boxes = box_list_ops.to_absolute_coordinates(
                boxlist=box_list.BoxList(valid_boxes),
                height=self._output_height,
                width=self._output_width).get()

            image_info = tf.stack([
                tf.cast(image_shape, dtype=tf.float32),
                tf.constant([self._output_height, self._output_width],
                            dtype=tf.float32),
                tf.cast(tf.shape(sc_image)[0:2] / image_shape,
                        dtype=tf.float32),
                tf.constant([0., 0.])
            ])

        # Filters out ground truth boxes that are all zeros.
        indices = box_ops.get_non_empty_box_indices(boxes)
        boxes = tf.gather(boxes, indices)
        classes = tf.gather(classes, indices)

        labels = self._build_label(unpad_image_shape=unpad_image_shape,
                                   boxes=boxes,
                                   classes=classes,
                                   image_info=image_info,
                                   data=data)

        if self._bgr_ordering:
            red, green, blue = tf.unstack(image, num=3, axis=2)
            image = tf.stack([blue, green, red], axis=2)

        image = preprocess_ops.normalize_image(image=image,
                                               offset=self._channel_means,
                                               scale=self._channel_stds)

        image = tf.cast(image, self._dtype)

        return image, labels
Exemplo n.º 27
0
    def _parse_train_data(self, data):
        """Parses data for training and evaluation."""
        classes = data['groundtruth_classes']
        boxes = data['groundtruth_boxes']
        is_crowds = data['groundtruth_is_crowd']
        # Skips annotations with `is_crowd` = True.
        if self._skip_crowd_during_training:
            num_groundtrtuhs = tf.shape(input=classes)[0]
            with tf.control_dependencies([num_groundtrtuhs, is_crowds]):
                indices = tf.cond(
                    pred=tf.greater(tf.size(input=is_crowds), 0),
                    true_fn=lambda: tf.where(tf.logical_not(is_crowds))[:, 0],
                    false_fn=lambda: tf.cast(tf.range(num_groundtrtuhs), tf.
                                             int64))
            classes = tf.gather(classes, indices)
            boxes = tf.gather(boxes, indices)

        # Gets original image and its size.
        image = data['image']

        image_shape = tf.shape(input=image)[0:2]

        # Normalizes image with mean and std pixel values.
        image = preprocess_ops.normalize_image(image)

        # Flips image randomly during training.
        if self._aug_rand_hflip:
            image, boxes, _ = preprocess_ops.random_horizontal_flip(
                image, boxes)

        # Converts boxes from normalized coordinates to pixel coordinates.
        boxes = box_ops.denormalize_boxes(boxes, image_shape)

        # Resizes and crops image.
        image, image_info = preprocess_ops.resize_and_crop_image(
            image,
            self._output_size,
            padded_size=preprocess_ops.compute_padded_size(
                self._output_size, 2**self._max_level),
            aug_scale_min=self._aug_scale_min,
            aug_scale_max=self._aug_scale_max)
        image_height, image_width, _ = image.get_shape().as_list()

        # Resizes and crops boxes.
        image_scale = image_info[2, :]
        offset = image_info[3, :]
        boxes = preprocess_ops.resize_and_crop_boxes(boxes, image_scale,
                                                     image_info[1, :], offset)
        # Filters out ground truth boxes that are all zeros.
        indices = box_ops.get_non_empty_box_indices(boxes)
        boxes = tf.gather(boxes, indices)
        classes = tf.gather(classes, indices)

        # Assigns anchors.
        input_anchor = anchor.build_anchor_generator(
            min_level=self._min_level,
            max_level=self._max_level,
            num_scales=self._num_scales,
            aspect_ratios=self._aspect_ratios,
            anchor_size=self._anchor_size)
        anchor_boxes = input_anchor(image_size=(image_height, image_width))
        anchor_labeler = anchor.AnchorLabeler(self._match_threshold,
                                              self._unmatched_threshold)
        (cls_targets, box_targets, cls_weights,
         box_weights) = anchor_labeler.label_anchors(
             anchor_boxes, boxes, tf.expand_dims(classes, axis=1))

        # If bfloat16 is used, casts input image to tf.bfloat16.
        if self._use_bfloat16:
            image = tf.cast(image, dtype=tf.bfloat16)

        # Packs labels for model_fn outputs.
        labels = {
            'cls_targets': cls_targets,
            'box_targets': box_targets,
            'anchor_boxes': anchor_boxes,
            'cls_weights': cls_weights,
            'box_weights': box_weights,
            'image_info': image_info,
        }
        return image, labels
Exemplo n.º 28
0
    def _parse_eval_data(self, data):
        """Parses data for evaluation.

    Args:
      data: the decoded tensor dictionary from TfExampleDecoder.

    Returns:
      A dictionary of {'images': image, 'labels': labels} where
        image: image tensor that is preproessed to have normalized value and
          dimension [output_size[0], output_size[1], 3]
        labels: a dictionary of tensors used for training. The following
          describes {key: value} pairs in the dictionary.
          source_ids: Source image id. Default value -1 if the source id is
            empty in the groundtruth annotation.
          image_info: a 2D `Tensor` that encodes the information of the image
            and the applied preprocessing. It is in the format of
            [[original_height, original_width], [scaled_height, scaled_width],
          anchor_boxes: ordered dictionary with keys
            [min_level, min_level+1, ..., max_level]. The values are tensor with
            shape [height_l, width_l, 4] representing anchor boxes at each
            level.
    """
        # Gets original image and its size.
        image = data['image']
        image_shape = tf.shape(image)[0:2]

        # Normalizes image with mean and std pixel values.
        image = preprocess_ops.normalize_image(image)

        # Resizes and crops image.
        image, image_info = preprocess_ops.resize_and_crop_image(
            image,
            self._output_size,
            padded_size=preprocess_ops.compute_padded_size(
                self._output_size, 2**self._max_level),
            aug_scale_min=1.0,
            aug_scale_max=1.0)
        image_height, image_width, _ = image.get_shape().as_list()

        # Casts input image to self._dtype
        image = tf.cast(image, dtype=self._dtype)

        # Converts boxes from normalized coordinates to pixel coordinates.
        boxes = box_ops.denormalize_boxes(data['groundtruth_boxes'],
                                          image_shape)

        # Compute Anchor boxes.
        input_anchor = anchor.build_anchor_generator(
            min_level=self._min_level,
            max_level=self._max_level,
            num_scales=self._num_scales,
            aspect_ratios=self._aspect_ratios,
            anchor_size=self._anchor_size)
        anchor_boxes = input_anchor(image_size=(image_height, image_width))

        labels = {
            'image_info': image_info,
            'anchor_boxes': anchor_boxes,
        }

        groundtruths = {
            'source_id': data['source_id'],
            'height': data['height'],
            'width': data['width'],
            'num_detections': tf.shape(data['groundtruth_classes'])[0],
            'boxes': boxes,
            'classes': data['groundtruth_classes'],
            'areas': data['groundtruth_area'],
            'is_crowds': tf.cast(data['groundtruth_is_crowd'], tf.int32),
        }
        groundtruths['source_id'] = utils.process_source_id(
            groundtruths['source_id'])
        groundtruths = utils.pad_groundtruths_to_fixed_size(
            groundtruths, self._max_num_instances)
        labels['groundtruths'] = groundtruths
        return image, labels
Exemplo n.º 29
0
    def _parse_train_data(self, data):
        """Parses data for training.

    Args:
      data: the decoded tensor dictionary from TfExampleDecoder.

    Returns:
      image: image tensor that is preproessed to have normalized value and
        dimension [output_size[0], output_size[1], 3]
      labels: a dictionary of tensors used for training. The following describes
        {key: value} pairs in the dictionary.
        image_info: a 2D `Tensor` that encodes the information of the image and
          the applied preprocessing. It is in the format of
          [[original_height, original_width], [scaled_height, scaled_width],
        anchor_boxes: ordered dictionary with keys
          [min_level, min_level+1, ..., max_level]. The values are tensor with
          shape [height_l, width_l, 4] representing anchor boxes at each level.
        rpn_score_targets: ordered dictionary with keys
          [min_level, min_level+1, ..., max_level]. The values are tensor with
          shape [height_l, width_l, anchors_per_location]. The height_l and
          width_l represent the dimension of class logits at l-th level.
        rpn_box_targets: ordered dictionary with keys
          [min_level, min_level+1, ..., max_level]. The values are tensor with
          shape [height_l, width_l, anchors_per_location * 4]. The height_l and
          width_l represent the dimension of bounding box regression output at
          l-th level.
        gt_boxes: Groundtruth bounding box annotations. The box is represented
           in [y1, x1, y2, x2] format. The coordinates are w.r.t the scaled
           image that is fed to the network. The tennsor is padded with -1 to
           the fixed dimension [self._max_num_instances, 4].
        gt_classes: Groundtruth classes annotations. The tennsor is padded
          with -1 to the fixed dimension [self._max_num_instances].
        gt_masks: groundtrugh masks cropped by the bounding box and
          resized to a fixed size determined by mask_crop_size.
    """
        classes = data['groundtruth_classes']
        boxes = data['groundtruth_boxes']
        if self._include_mask:
            masks = data['groundtruth_instance_masks']

        is_crowds = data['groundtruth_is_crowd']
        # Skips annotations with `is_crowd` = True.
        if self._skip_crowd_during_training:
            num_groundtruths = tf.shape(classes)[0]
            with tf.control_dependencies([num_groundtruths, is_crowds]):
                indices = tf.cond(
                    tf.greater(tf.size(is_crowds), 0),
                    lambda: tf.where(tf.logical_not(is_crowds))[:, 0],
                    lambda: tf.cast(tf.range(num_groundtruths), tf.int64))
            classes = tf.gather(classes, indices)
            boxes = tf.gather(boxes, indices)
            if self._include_mask:
                masks = tf.gather(masks, indices)

        # Gets original image and its size.
        image = data['image']
        image_shape = tf.shape(image)[0:2]

        # Normalizes image with mean and std pixel values.
        image = preprocess_ops.normalize_image(image)

        # Flips image randomly during training.
        if self._aug_rand_hflip:
            if self._include_mask:
                image, boxes, masks = preprocess_ops.random_horizontal_flip(
                    image, boxes, masks)
            else:
                image, boxes, _ = preprocess_ops.random_horizontal_flip(
                    image, boxes)

        # Converts boxes from normalized coordinates to pixel coordinates.
        # Now the coordinates of boxes are w.r.t. the original image.
        boxes = box_ops.denormalize_boxes(boxes, image_shape)

        # Resizes and crops image.
        image, image_info = preprocess_ops.resize_and_crop_image(
            image,
            self._output_size,
            padded_size=preprocess_ops.compute_padded_size(
                self._output_size, 2**self._max_level),
            aug_scale_min=self._aug_scale_min,
            aug_scale_max=self._aug_scale_max)
        image_height, image_width, _ = image.get_shape().as_list()

        # Resizes and crops boxes.
        # Now the coordinates of boxes are w.r.t the scaled image.
        image_scale = image_info[2, :]
        offset = image_info[3, :]
        boxes = preprocess_ops.resize_and_crop_boxes(boxes, image_scale,
                                                     image_info[1, :], offset)

        # Filters out ground truth boxes that are all zeros.
        indices = box_ops.get_non_empty_box_indices(boxes)
        boxes = tf.gather(boxes, indices)
        classes = tf.gather(classes, indices)
        if self._include_mask:
            masks = tf.gather(masks, indices)
            # Transfer boxes to the original image space and do normalization.
            cropped_boxes = boxes + tf.tile(tf.expand_dims(offset, axis=0),
                                            [1, 2])
            cropped_boxes /= tf.tile(tf.expand_dims(image_scale, axis=0),
                                     [1, 2])
            cropped_boxes = box_ops.normalize_boxes(cropped_boxes, image_shape)
            num_masks = tf.shape(masks)[0]
            masks = tf.image.crop_and_resize(
                tf.expand_dims(masks, axis=-1),
                cropped_boxes,
                box_indices=tf.range(num_masks, dtype=tf.int32),
                crop_size=[self._mask_crop_size, self._mask_crop_size],
                method='bilinear')
            masks = tf.squeeze(masks, axis=-1)

        # Assigns anchor targets.
        # Note that after the target assignment, box targets are absolute pixel
        # offsets w.r.t. the scaled image.
        input_anchor = anchor.build_anchor_generator(
            min_level=self._min_level,
            max_level=self._max_level,
            num_scales=self._num_scales,
            aspect_ratios=self._aspect_ratios,
            anchor_size=self._anchor_size)
        anchor_boxes = input_anchor(image_size=(image_height, image_width))
        anchor_labeler = anchor.RpnAnchorLabeler(self._rpn_match_threshold,
                                                 self._rpn_unmatched_threshold,
                                                 self._rpn_batch_size_per_im,
                                                 self._rpn_fg_fraction)
        rpn_score_targets, rpn_box_targets = anchor_labeler.label_anchors(
            anchor_boxes, boxes,
            tf.cast(tf.expand_dims(classes, axis=-1), dtype=tf.float32))

        # Casts input image to self._dtype
        image = tf.cast(image, dtype=self._dtype)

        # Packs labels for model_fn outputs.
        labels = {
            'anchor_boxes':
            anchor_boxes,
            'image_info':
            image_info,
            'rpn_score_targets':
            rpn_score_targets,
            'rpn_box_targets':
            rpn_box_targets,
            'gt_boxes':
            preprocess_ops.clip_or_pad_to_fixed_size(boxes,
                                                     self._max_num_instances,
                                                     -1),
            'gt_classes':
            preprocess_ops.clip_or_pad_to_fixed_size(classes,
                                                     self._max_num_instances,
                                                     -1),
        }
        if self._include_mask:
            labels['gt_masks'] = preprocess_ops.clip_or_pad_to_fixed_size(
                masks, self._max_num_instances, -1)

        return image, labels
Exemplo n.º 30
0
    def _parse_train_data(self, data):
        """Parses data for training and evaluation."""
        classes = data['groundtruth_classes']
        boxes = data['groundtruth_boxes']
        # If not empty, `attributes` is a dict of (name, ground_truth) pairs.
        # `ground_gruth` of attributes is assumed in shape [N, attribute_size].
        # TODO(xianzhi): support parsing attributes weights.
        attributes = data.get('groundtruth_attributes', {})
        is_crowds = data['groundtruth_is_crowd']

        # Skips annotations with `is_crowd` = True.
        if self._skip_crowd_during_training:
            num_groundtrtuhs = tf.shape(input=classes)[0]
            with tf.control_dependencies([num_groundtrtuhs, is_crowds]):
                indices = tf.cond(
                    pred=tf.greater(tf.size(input=is_crowds), 0),
                    true_fn=lambda: tf.where(tf.logical_not(is_crowds))[:, 0],
                    false_fn=lambda: tf.cast(tf.range(num_groundtrtuhs), tf.
                                             int64))
            classes = tf.gather(classes, indices)
            boxes = tf.gather(boxes, indices)
            for k, v in attributes.items():
                attributes[k] = tf.gather(v, indices)

        # Gets original image.
        image = data['image']

        # Apply autoaug or randaug.
        if self._augmenter is not None:
            image, boxes = self._augmenter.distort_with_boxes(image, boxes)

        image_shape = tf.shape(input=image)[0:2]

        # Normalizes image with mean and std pixel values.
        image = preprocess_ops.normalize_image(image)

        # Flips image randomly during training.
        if self._aug_rand_hflip:
            image, boxes, _ = preprocess_ops.random_horizontal_flip(
                image, boxes)

        # Converts boxes from normalized coordinates to pixel coordinates.
        boxes = box_ops.denormalize_boxes(boxes, image_shape)

        # Resizes and crops image.
        image, image_info = preprocess_ops.resize_and_crop_image(
            image,
            self._output_size,
            padded_size=preprocess_ops.compute_padded_size(
                self._output_size, 2**self._max_level),
            aug_scale_min=self._aug_scale_min,
            aug_scale_max=self._aug_scale_max)
        image_height, image_width, _ = image.get_shape().as_list()

        # Resizes and crops boxes.
        image_scale = image_info[2, :]
        offset = image_info[3, :]
        boxes = preprocess_ops.resize_and_crop_boxes(boxes, image_scale,
                                                     image_info[1, :], offset)
        # Filters out ground truth boxes that are all zeros.
        indices = box_ops.get_non_empty_box_indices(boxes)
        boxes = tf.gather(boxes, indices)
        classes = tf.gather(classes, indices)
        for k, v in attributes.items():
            attributes[k] = tf.gather(v, indices)

        # Assigns anchors.
        input_anchor = anchor.build_anchor_generator(
            min_level=self._min_level,
            max_level=self._max_level,
            num_scales=self._num_scales,
            aspect_ratios=self._aspect_ratios,
            anchor_size=self._anchor_size)
        anchor_boxes = input_anchor(image_size=(image_height, image_width))
        anchor_labeler = anchor.AnchorLabeler(self._match_threshold,
                                              self._unmatched_threshold)
        (cls_targets, box_targets, att_targets, cls_weights,
         box_weights) = anchor_labeler.label_anchors(
             anchor_boxes, boxes, tf.expand_dims(classes, axis=1), attributes)

        # Casts input image to desired data type.
        image = tf.cast(image, dtype=self._dtype)

        # Packs labels for model_fn outputs.
        labels = {
            'cls_targets': cls_targets,
            'box_targets': box_targets,
            'anchor_boxes': anchor_boxes,
            'cls_weights': cls_weights,
            'box_weights': box_weights,
            'image_info': image_info,
        }
        if att_targets:
            labels['attribute_targets'] = att_targets
        return image, labels