Пример #1
0
    def _parse_eval_image(self, decoded_tensors):
        """Parses image data for evaluation."""
        image_bytes = decoded_tensors[self._image_field_key]

        if self._decode_jpeg_only and self._aug_crop:
            image_shape = tf.image.extract_jpeg_shape(image_bytes)

            # Center crops.
            image = preprocess_ops.center_crop_image_v2(
                image_bytes, image_shape)
        else:
            # Decodes image.
            image = tf.io.decode_image(image_bytes, channels=3)
            image.set_shape([None, None, 3])

            # Center crops.
            if self._aug_crop:
                image = preprocess_ops.center_crop_image(image)

        image = tf.image.resize(image,
                                self._output_size,
                                method=tf.image.ResizeMethod.BILINEAR)
        image.set_shape([self._output_size[0], self._output_size[1], 3])

        # Normalizes image with mean and std pixel values.
        image = preprocess_ops.normalize_image(image,
                                               offset=MEAN_RGB,
                                               scale=STDDEV_RGB)

        # Convert image to self._dtype.
        image = tf.image.convert_image_dtype(image, self._dtype)

        return image
Пример #2
0
  def _parse_train_image(self, decoded_tensors):
    """Parses image data for training."""
    image_bytes = decoded_tensors[self._image_field_key]

    if self._decode_jpeg_only:
      image_shape = tf.image.extract_jpeg_shape(image_bytes)

      # Crops image.
      cropped_image = preprocess_ops.random_crop_image_v2(
          image_bytes, image_shape)
      image = tf.cond(
          tf.reduce_all(tf.equal(tf.shape(cropped_image), image_shape)),
          lambda: preprocess_ops.center_crop_image_v2(image_bytes, image_shape),
          lambda: cropped_image)
    else:
      # Decodes image.
      image = tf.io.decode_image(image_bytes, channels=3)
      image.set_shape([None, None, 3])

      # Crops image.
      cropped_image = preprocess_ops.random_crop_image(image)

      image = tf.cond(
          tf.reduce_all(tf.equal(tf.shape(cropped_image), tf.shape(image))),
          lambda: preprocess_ops.center_crop_image(image),
          lambda: cropped_image)

    if self._aug_rand_hflip:
      image = tf.image.random_flip_left_right(image)

    # Color jitter.
    if self._color_jitter > 0:
      image = preprocess_ops.color_jitter(image, self._color_jitter,
                                          self._color_jitter,
                                          self._color_jitter)

    # Resizes image.
    image = tf.image.resize(
        image, self._output_size, method=tf.image.ResizeMethod.BILINEAR)
    image.set_shape([self._output_size[0], self._output_size[1], 3])

    # Apply autoaug or randaug.
    if self._augmenter is not None:
      image = self._augmenter.distort(image)

    # Normalizes image with mean and std pixel values.
    image = preprocess_ops.normalize_image(image,
                                           offset=MEAN_RGB,
                                           scale=STDDEV_RGB)

    # Random erasing after the image has been normalized
    if self._random_erasing is not None:
      image = self._random_erasing.distort(image)

    # Convert image to self._dtype.
    image = tf.image.convert_image_dtype(image, self._dtype)

    return image
Пример #3
0
    def _parse_eval_data(self, data):
        """Parses data for training and evaluation."""
        classes = data['groundtruth_classes']
        boxes = data['groundtruth_boxes']
        is_crowd = data['groundtruth_is_crowd']

        # Gets original image and its size.
        image = data['image']

        # Normalizes image with mean and std pixel values.
        image = preprocess_ops.normalize_image(image)

        scales = tf.constant([self._resize_scales[-1]], tf.float32)

        image_shape = tf.shape(image)[:2]
        boxes = box_ops.denormalize_boxes(boxes, image_shape)
        gt_boxes = boxes
        short_side = scales[0]
        image, image_info = preprocess_ops.resize_image(
            image, short_side, max(self._output_size))
        boxes = preprocess_ops.resize_and_crop_boxes(boxes, image_info[2, :],
                                                     image_info[1, :],
                                                     image_info[3, :])
        boxes = box_ops.normalize_boxes(boxes, image_info[1, :])

        # Filters out ground truth boxes that are all zeros.
        indices = box_ops.get_non_empty_box_indices(boxes)
        boxes = tf.gather(boxes, indices)
        classes = tf.gather(classes, indices)
        is_crowd = tf.gather(is_crowd, indices)
        boxes = box_ops.yxyx_to_cycxhw(boxes)

        image = tf.image.pad_to_bounding_box(image, 0, 0, self._output_size[0],
                                             self._output_size[1])
        labels = {
            'classes':
            preprocess_ops.clip_or_pad_to_fixed_size(classes,
                                                     self._max_num_boxes),
            'boxes':
            preprocess_ops.clip_or_pad_to_fixed_size(boxes,
                                                     self._max_num_boxes)
        }
        labels.update({
            'id':
            int(data['source_id']),
            'image_info':
            image_info,
            'is_crowd':
            preprocess_ops.clip_or_pad_to_fixed_size(is_crowd,
                                                     self._max_num_boxes),
            'gt_boxes':
            preprocess_ops.clip_or_pad_to_fixed_size(gt_boxes,
                                                     self._max_num_boxes),
        })

        return image, labels
Пример #4
0
    def _parse_eval_data(self, data):
        """Generates images and labels that are usable for model evaluation.

    Args:
      data: the decoded tensor dictionary from TfExampleDecoder.

    Returns:
      images: the image tensor.
      labels: a dict of Tensors that contains labels.
    """
        image = tf.cast(data['image'], dtype=tf.float32)
        boxes = data['groundtruth_boxes']
        classes = data['groundtruth_classes']

        image_shape = tf.shape(input=image)[0:2]
        # Converts boxes from normalized coordinates to pixel coordinates.
        boxes = box_ops.denormalize_boxes(boxes, image_shape)

        # Resizes and crops image.
        image, image_info = preprocess_ops.resize_and_crop_image(
            image, [self._output_height, self._output_width],
            padded_size=[self._output_height, self._output_width],
            aug_scale_min=1.0,
            aug_scale_max=1.0)
        unpad_image_shape = tf.cast(tf.shape(image), tf.float32)

        # Resizes and crops boxes.
        image_scale = image_info[2, :]
        offset = image_info[3, :]
        boxes = preprocess_ops.resize_and_crop_boxes(boxes, image_scale,
                                                     image_info[1, :], offset)

        # Filters out ground truth boxes that are all zeros.
        indices = box_ops.get_non_empty_box_indices(boxes)
        boxes = tf.gather(boxes, indices)
        classes = tf.gather(classes, indices)

        labels = self._build_label(unpad_image_shape=unpad_image_shape,
                                   boxes=boxes,
                                   classes=classes,
                                   image_info=image_info,
                                   data=data)

        if self._bgr_ordering:
            red, green, blue = tf.unstack(image, num=3, axis=2)
            image = tf.stack([blue, green, red], axis=2)

        image = preprocess_ops.normalize_image(image=image,
                                               offset=self._channel_means,
                                               scale=self._channel_stds)

        image = tf.cast(image, self._dtype)

        return image, labels
Пример #5
0
    def _build_inputs(self, image):
        """Builds embedding model inputs for serving."""
        image = tf.image.resize(image,
                                self._input_image_size,
                                method=tf.image.ResizeMethod.BILINEAR)

        image = tf.reshape(
            image, [self._input_image_size[0], self._input_image_size[1], 3])

        # Normalizes image with mean and std pixel values.
        image = preprocess_ops.normalize_image(image,
                                               offset=MEAN_RGB,
                                               scale=STDDEV_RGB)
        return image
Пример #6
0
    def _build_inputs(self, image):
        """Builds classification model inputs for serving."""

        # Normalizes image with mean and std pixel values.
        image = preprocess_ops.normalize_image(image,
                                               offset=MEAN_RGB,
                                               scale=STDDEV_RGB)

        image, image_info = preprocess_ops.resize_and_crop_image(
            image,
            self._input_image_size,
            padded_size=self._input_image_size,
            aug_scale_min=1.0,
            aug_scale_max=1.0)
        return image, image_info
Пример #7
0
  def _parse_data(
      self, decoded_tensors: Mapping[str,
                                     tf.Tensor]) -> Tuple[tf.Tensor, tf.Tensor]:
    label = tf.cast(decoded_tensors['image/class/label'], dtype=tf.int32)
    image_bytes = decoded_tensors['image/encoded']
    image = tf.io.decode_jpeg(image_bytes, channels=3)
    image = tf.image.resize(
        image, self._output_size, method=tf.image.ResizeMethod.BILINEAR)
    image = tf.ensure_shape(image, self._output_size + [3])

    # Normalizes image with mean and std pixel values.
    image = preprocess_ops.normalize_image(
        image, offset=MEAN_RGB, scale=STDDEV_RGB)

    image = tf.image.convert_image_dtype(image, self._dtype)
    return image, label
Пример #8
0
  def inference_fn(cls,
                   image: tf.Tensor,
                   input_image_size: List[int],
                   num_channels: int = 3) -> tf.Tensor:
    """Builds image model inputs for serving."""

    image = tf.cast(image, dtype=tf.float32)
    image = preprocess_ops.center_crop_image(image)
    image = tf.image.resize(
        image, input_image_size, method=tf.image.ResizeMethod.BILINEAR)

    # Normalizes image with mean and std pixel values.
    image = preprocess_ops.normalize_image(
        image, offset=MEAN_RGB, scale=STDDEV_RGB)
    image.set_shape(input_image_size + [num_channels])
    return image
Пример #9
0
    def _build_inputs(self, image):
        """Builds classification model inputs for serving."""
        # Center crops and resizes image.
        if self.params.task.train_data.aug_crop:
            image = preprocess_ops.center_crop_image(image)

        image = tf.image.resize(image,
                                self._input_image_size,
                                method=tf.image.ResizeMethod.BILINEAR)

        image = tf.reshape(
            image, [self._input_image_size[0], self._input_image_size[1], 3])

        # Normalizes image with mean and std pixel values.
        image = preprocess_ops.normalize_image(image,
                                               offset=MEAN_RGB,
                                               scale=STDDEV_RGB)
        return image
Пример #10
0
  def _prepare_image_and_label(self, data):
    """Prepare normalized image and label."""
    image = tf.io.decode_image(data['image/encoded'], channels=3)
    label = tf.io.decode_image(data['image/segmentation/class/encoded'],
                               channels=1)
    height = data['image/height']
    width = data['image/width']
    image = tf.reshape(image, (height, width, 3))

    label = tf.reshape(label, (1, height, width))
    label = tf.where(
        tf.math.greater(label, self.max_class), tf.zeros_like(label), label)
    label = tf.where(tf.math.equal(label, 0), tf.ones_like(label)*255, label)
    label = tf.cast(label, tf.float32)
    # Normalizes image with mean and std pixel values.
    image = preprocess_ops.normalize_image(
        image, offset=[0.5, 0.5, 0.5], scale=[0.5, 0.5, 0.5])
    return image, label
Пример #11
0
    def _build_inputs(self, image):
        """Builds detection model inputs for serving."""
        model_params = self.params.task.model
        # Normalizes image with mean and std pixel values.
        image = preprocess_ops.normalize_image(image,
                                               offset=MEAN_RGB,
                                               scale=STDDEV_RGB)

        image, image_info = preprocess_ops.resize_and_crop_image(
            image,
            self._input_image_size,
            padded_size=preprocess_ops.compute_padded_size(
                self._input_image_size, 2**model_params.max_level),
            aug_scale_min=1.0,
            aug_scale_max=1.0)
        anchor_boxes = self._build_anchor_boxes()

        return image, anchor_boxes, image_info
    def _prepare_image_and_label(self, data):
        """Prepare normalized image and label."""
        image = tf.io.decode_image(data['image/encoded'], channels=3)
        label = tf.io.decode_image(data['image/segmentation/class/encoded'],
                                   channels=1)
        height = data['image/height']
        width = data['image/width']
        image = tf.reshape(image, (height, width, 3))

        label = tf.reshape(label, (1, height, width))
        label = tf.cast(label, tf.float32)
        # Normalizes image with mean and std pixel values.
        image = preprocess_ops.normalize_image(image)

        if not self._preserve_aspect_ratio:
            label = tf.reshape(label,
                               [data['image/height'], data['image/width'], 1])
            image = tf.image.resize(image,
                                    self._output_size,
                                    method='bilinear')
            label = tf.image.resize(label, self._output_size, method='nearest')
            label = tf.reshape(label[:, :, -1], [1] + self._output_size)

        return image, label
Пример #13
0
    def _parse_train_data(self, data):
        """Generates images and labels that are usable for model training.

    We use random flip, random scaling (between 0.6 to 1.3), cropping,
    and color jittering as data augmentation

    Args:
        data: the decoded tensor dictionary from TfExampleDecoder.

    Returns:
        images: the image tensor.
        labels: a dict of Tensors that contains labels.
    """

        image = tf.cast(data['image'], dtype=tf.float32)
        boxes = data['groundtruth_boxes']
        classes = data['groundtruth_classes']

        image_shape = tf.shape(input=image)[0:2]

        if self._aug_rand_hflip:
            image, boxes, _ = preprocess_ops.random_horizontal_flip(
                image, boxes)

        # Image augmentation
        if not self._odapi_augmentation:
            # Color and lighting jittering
            if self._aug_rand_hue:
                image = tf.image.random_hue(image=image, max_delta=.02)
            if self._aug_rand_contrast:
                image = tf.image.random_contrast(image=image,
                                                 lower=0.8,
                                                 upper=1.25)
            if self._aug_rand_saturation:
                image = tf.image.random_saturation(image=image,
                                                   lower=0.8,
                                                   upper=1.25)
            if self._aug_rand_brightness:
                image = tf.image.random_brightness(image=image, max_delta=.2)
            image = tf.clip_by_value(image,
                                     clip_value_min=0.0,
                                     clip_value_max=255.0)
            # Converts boxes from normalized coordinates to pixel coordinates.
            boxes = box_ops.denormalize_boxes(boxes, image_shape)

            # Resizes and crops image.
            image, image_info = preprocess_ops.resize_and_crop_image(
                image, [self._output_height, self._output_width],
                padded_size=[self._output_height, self._output_width],
                aug_scale_min=self._aug_scale_min,
                aug_scale_max=self._aug_scale_max)
            unpad_image_shape = tf.cast(tf.shape(image), tf.float32)

            # Resizes and crops boxes.
            image_scale = image_info[2, :]
            offset = image_info[3, :]
            boxes = preprocess_ops.resize_and_crop_boxes(
                boxes, image_scale, image_info[1, :], offset)

        else:
            # Color and lighting jittering
            if self._aug_rand_hue:
                image = cn_prep_ops.random_adjust_hue(image=image,
                                                      max_delta=.02)
            if self._aug_rand_contrast:
                image = cn_prep_ops.random_adjust_contrast(image=image,
                                                           min_delta=0.8,
                                                           max_delta=1.25)
            if self._aug_rand_saturation:
                image = cn_prep_ops.random_adjust_saturation(image=image,
                                                             min_delta=0.8,
                                                             max_delta=1.25)
            if self._aug_rand_brightness:
                image = cn_prep_ops.random_adjust_brightness(image=image,
                                                             max_delta=.2)

            sc_image, sc_boxes, classes = cn_prep_ops.random_square_crop_by_scale(
                image=image,
                boxes=boxes,
                labels=classes,
                scale_min=self._aug_scale_min,
                scale_max=self._aug_scale_max)

            image, unpad_image_shape = cn_prep_ops.resize_to_range(
                image=sc_image,
                min_dimension=self._output_width,
                max_dimension=self._output_width,
                pad_to_max_dimension=True)
            preprocessed_shape = tf.cast(tf.shape(image), tf.float32)
            unpad_image_shape = tf.cast(unpad_image_shape, tf.float32)

            im_box = tf.stack([
                0.0, 0.0, preprocessed_shape[0] / unpad_image_shape[0],
                preprocessed_shape[1] / unpad_image_shape[1]
            ])
            realigned_bboxes = box_list_ops.change_coordinate_frame(
                boxlist=box_list.BoxList(sc_boxes), window=im_box)

            valid_boxes = box_list_ops.assert_or_prune_invalid_boxes(
                realigned_bboxes.get())

            boxes = box_list_ops.to_absolute_coordinates(
                boxlist=box_list.BoxList(valid_boxes),
                height=self._output_height,
                width=self._output_width).get()

            image_info = tf.stack([
                tf.cast(image_shape, dtype=tf.float32),
                tf.constant([self._output_height, self._output_width],
                            dtype=tf.float32),
                tf.cast(tf.shape(sc_image)[0:2] / image_shape,
                        dtype=tf.float32),
                tf.constant([0., 0.])
            ])

        # Filters out ground truth boxes that are all zeros.
        indices = box_ops.get_non_empty_box_indices(boxes)
        boxes = tf.gather(boxes, indices)
        classes = tf.gather(classes, indices)

        labels = self._build_label(unpad_image_shape=unpad_image_shape,
                                   boxes=boxes,
                                   classes=classes,
                                   image_info=image_info,
                                   data=data)

        if self._bgr_ordering:
            red, green, blue = tf.unstack(image, num=3, axis=2)
            image = tf.stack([blue, green, red], axis=2)

        image = preprocess_ops.normalize_image(image=image,
                                               offset=self._channel_means,
                                               scale=self._channel_stds)

        image = tf.cast(image, self._dtype)

        return image, labels
Пример #14
0
    def _parse_data(self, data, is_training):
        image = data['image']

        if self._augmenter is not None and is_training:
            image = self._augmenter.distort(image)

        image = preprocess_ops.normalize_image(image)

        category_mask = tf.cast(
            data['groundtruth_panoptic_category_mask'][:, :, 0],
            dtype=tf.float32)
        instance_mask = tf.cast(
            data['groundtruth_panoptic_instance_mask'][:, :, 0],
            dtype=tf.float32)

        # Flips image randomly during training.
        if self._aug_rand_hflip and is_training:
            masks = tf.stack([category_mask, instance_mask], axis=0)
            image, _, masks = preprocess_ops.random_horizontal_flip(
                image=image, masks=masks)
            category_mask = masks[0]
            instance_mask = masks[1]

        # Resizes and crops image.
        image, image_info = preprocess_ops.resize_and_crop_image(
            image,
            self._output_size,
            self._output_size,
            aug_scale_min=self._aug_scale_min if is_training else 1.0,
            aug_scale_max=self._aug_scale_max if is_training else 1.0)

        category_mask = self._resize_and_crop_mask(category_mask,
                                                   image_info,
                                                   is_training=is_training)
        instance_mask = self._resize_and_crop_mask(instance_mask,
                                                   image_info,
                                                   is_training=is_training)

        (instance_centers_heatmap, instance_centers_offset,
         semantic_weights) = self._encode_centers_and_offets(
             instance_mask=instance_mask[:, :, 0])

        # Cast image and labels as self._dtype
        image = tf.cast(image, dtype=self._dtype)
        category_mask = tf.cast(category_mask, dtype=self._dtype)
        instance_mask = tf.cast(instance_mask, dtype=self._dtype)
        instance_centers_heatmap = tf.cast(instance_centers_heatmap,
                                           dtype=self._dtype)
        instance_centers_offset = tf.cast(instance_centers_offset,
                                          dtype=self._dtype)

        valid_mask = tf.not_equal(category_mask, self._ignore_label)
        things_mask = tf.not_equal(instance_mask, self._ignore_label)

        labels = {
            'category_mask': category_mask,
            'instance_mask': instance_mask,
            'instance_centers_heatmap': instance_centers_heatmap,
            'instance_centers_offset': instance_centers_offset,
            'semantic_weights': semantic_weights,
            'valid_mask': valid_mask,
            'things_mask': things_mask,
            'image_info': image_info
        }
        return image, labels
Пример #15
0
    def _parse_train_data(self, data):
        """Parses data for training and evaluation."""
        classes = data['groundtruth_classes']
        boxes = data['groundtruth_boxes']
        # If not empty, `attributes` is a dict of (name, ground_truth) pairs.
        # `ground_gruth` of attributes is assumed in shape [N, attribute_size].
        # TODO(xianzhi): support parsing attributes weights.
        attributes = data.get('groundtruth_attributes', {})
        is_crowds = data['groundtruth_is_crowd']

        # Skips annotations with `is_crowd` = True.
        if self._skip_crowd_during_training:
            num_groundtrtuhs = tf.shape(input=classes)[0]
            with tf.control_dependencies([num_groundtrtuhs, is_crowds]):
                indices = tf.cond(
                    pred=tf.greater(tf.size(input=is_crowds), 0),
                    true_fn=lambda: tf.where(tf.logical_not(is_crowds))[:, 0],
                    false_fn=lambda: tf.cast(tf.range(num_groundtrtuhs), tf.
                                             int64))
            classes = tf.gather(classes, indices)
            boxes = tf.gather(boxes, indices)
            for k, v in attributes.items():
                attributes[k] = tf.gather(v, indices)

        # Gets original image.
        image = data['image']

        # Apply autoaug or randaug.
        if self._augmenter is not None:
            image, boxes = self._augmenter.distort_with_boxes(image, boxes)
        image_shape = tf.shape(input=image)[0:2]

        # Normalizes image with mean and std pixel values.
        image = preprocess_ops.normalize_image(image)

        # Flips image randomly during training.
        if self._aug_rand_hflip:
            image, boxes, _ = preprocess_ops.random_horizontal_flip(
                image, boxes)

        # Converts boxes from normalized coordinates to pixel coordinates.
        boxes = box_ops.denormalize_boxes(boxes, image_shape)

        # Resizes and crops image.
        image, image_info = preprocess_ops.resize_and_crop_image(
            image,
            self._output_size,
            padded_size=preprocess_ops.compute_padded_size(
                self._output_size, 2**self._max_level),
            aug_scale_min=self._aug_scale_min,
            aug_scale_max=self._aug_scale_max)
        image_height, image_width, _ = image.get_shape().as_list()

        # Resizes and crops boxes.
        image_scale = image_info[2, :]
        offset = image_info[3, :]
        boxes = preprocess_ops.resize_and_crop_boxes(boxes, image_scale,
                                                     image_info[1, :], offset)
        # Filters out ground truth boxes that are all zeros.
        indices = box_ops.get_non_empty_box_indices(boxes)
        boxes = tf.gather(boxes, indices)
        classes = tf.gather(classes, indices)
        for k, v in attributes.items():
            attributes[k] = tf.gather(v, indices)

        # Assigns anchors.
        input_anchor = anchor.build_anchor_generator(
            min_level=self._min_level,
            max_level=self._max_level,
            num_scales=self._num_scales,
            aspect_ratios=self._aspect_ratios,
            anchor_size=self._anchor_size)
        anchor_boxes = input_anchor(image_size=(image_height, image_width))
        anchor_labeler = anchor.AnchorLabeler(self._match_threshold,
                                              self._unmatched_threshold)
        (cls_targets, box_targets, att_targets, cls_weights,
         box_weights) = anchor_labeler.label_anchors(
             anchor_boxes, boxes, tf.expand_dims(classes, axis=1), attributes)

        # Casts input image to desired data type.
        image = tf.cast(image, dtype=self._dtype)

        # Packs labels for model_fn outputs.
        labels = {
            'cls_targets': cls_targets,
            'box_targets': box_targets,
            'anchor_boxes': anchor_boxes,
            'cls_weights': cls_weights,
            'box_weights': box_weights,
            'image_info': image_info,
        }
        if att_targets:
            labels['attribute_targets'] = att_targets
        return image, labels
Пример #16
0
    def _parse_eval_data(self, data):
        """Parses data for training and evaluation."""
        groundtruths = {}
        classes = data['groundtruth_classes']
        boxes = data['groundtruth_boxes']
        # If not empty, `attributes` is a dict of (name, ground_truth) pairs.
        # `ground_gruth` of attributes is assumed in shape [N, attribute_size].
        # TODO(xianzhi): support parsing attributes weights.
        attributes = data.get('groundtruth_attributes', {})

        # Gets original image and its size.
        image = data['image']
        image_shape = tf.shape(input=image)[0:2]

        # Normalizes image with mean and std pixel values.
        image = preprocess_ops.normalize_image(image)

        # Converts boxes from normalized coordinates to pixel coordinates.
        boxes = box_ops.denormalize_boxes(boxes, image_shape)

        # Resizes and crops image.
        image, image_info = preprocess_ops.resize_and_crop_image(
            image,
            self._output_size,
            padded_size=preprocess_ops.compute_padded_size(
                self._output_size, 2**self._max_level),
            aug_scale_min=1.0,
            aug_scale_max=1.0)
        image_height, image_width, _ = image.get_shape().as_list()

        # Resizes and crops boxes.
        image_scale = image_info[2, :]
        offset = image_info[3, :]
        boxes = preprocess_ops.resize_and_crop_boxes(boxes, image_scale,
                                                     image_info[1, :], offset)
        # Filters out ground truth boxes that are all zeros.
        indices = box_ops.get_non_empty_box_indices(boxes)
        boxes = tf.gather(boxes, indices)
        classes = tf.gather(classes, indices)
        for k, v in attributes.items():
            attributes[k] = tf.gather(v, indices)

        # Assigns anchors.
        input_anchor = anchor.build_anchor_generator(
            min_level=self._min_level,
            max_level=self._max_level,
            num_scales=self._num_scales,
            aspect_ratios=self._aspect_ratios,
            anchor_size=self._anchor_size)
        anchor_boxes = input_anchor(image_size=(image_height, image_width))
        anchor_labeler = anchor.AnchorLabeler(self._match_threshold,
                                              self._unmatched_threshold)
        (cls_targets, box_targets, att_targets, cls_weights,
         box_weights) = anchor_labeler.label_anchors(
             anchor_boxes, boxes, tf.expand_dims(classes, axis=1), attributes)

        # Casts input image to desired data type.
        image = tf.cast(image, dtype=self._dtype)

        # Sets up groundtruth data for evaluation.
        groundtruths = {
            'source_id':
            data['source_id'],
            'height':
            data['height'],
            'width':
            data['width'],
            'num_detections':
            tf.shape(data['groundtruth_classes']),
            'image_info':
            image_info,
            'boxes':
            box_ops.denormalize_boxes(data['groundtruth_boxes'], image_shape),
            'classes':
            data['groundtruth_classes'],
            'areas':
            data['groundtruth_area'],
            'is_crowds':
            tf.cast(data['groundtruth_is_crowd'], tf.int32),
        }
        if 'groundtruth_attributes' in data:
            groundtruths['attributes'] = data['groundtruth_attributes']
        groundtruths['source_id'] = utils.process_source_id(
            groundtruths['source_id'])
        groundtruths = utils.pad_groundtruths_to_fixed_size(
            groundtruths, self._max_num_instances)

        # Packs labels for model_fn outputs.
        labels = {
            'cls_targets': cls_targets,
            'box_targets': box_targets,
            'anchor_boxes': anchor_boxes,
            'cls_weights': cls_weights,
            'box_weights': box_weights,
            'image_info': image_info,
            'groundtruths': groundtruths,
        }
        if att_targets:
            labels['attribute_targets'] = att_targets
        return image, labels
Пример #17
0
    def _parse_train_data(self, data):
        """Parses data for training and evaluation."""
        classes = data['groundtruth_classes'] + self._class_offset
        boxes = data['groundtruth_boxes']
        is_crowd = data['groundtruth_is_crowd']

        # Gets original image.
        image = data['image']

        # Normalizes image with mean and std pixel values.
        image = preprocess_ops.normalize_image(image)
        image, boxes, _ = preprocess_ops.random_horizontal_flip(image, boxes)

        do_crop = tf.greater(tf.random.uniform([]), 0.5)
        if do_crop:
            # Rescale
            boxes = box_ops.denormalize_boxes(boxes, tf.shape(image)[:2])
            index = tf.random.categorical(tf.zeros([1, 3]), 1)[0]
            scales = tf.gather([400.0, 500.0, 600.0], index, axis=0)
            short_side = scales[0]
            image, image_info = preprocess_ops.resize_image(image, short_side)
            boxes = preprocess_ops.resize_and_crop_boxes(
                boxes, image_info[2, :], image_info[1, :], image_info[3, :])
            boxes = box_ops.normalize_boxes(boxes, image_info[1, :])

            # Do croping
            shape = tf.cast(image_info[1], dtype=tf.int32)
            h = tf.random.uniform([],
                                  384,
                                  tf.math.minimum(shape[0], 600),
                                  dtype=tf.int32)
            w = tf.random.uniform([],
                                  384,
                                  tf.math.minimum(shape[1], 600),
                                  dtype=tf.int32)
            i = tf.random.uniform([], 0, shape[0] - h + 1, dtype=tf.int32)
            j = tf.random.uniform([], 0, shape[1] - w + 1, dtype=tf.int32)
            image = tf.image.crop_to_bounding_box(image, i, j, h, w)
            boxes = tf.clip_by_value(
                (boxes[..., :] *
                 tf.cast(tf.stack([shape[0], shape[1], shape[0], shape[1]]),
                         dtype=tf.float32) -
                 tf.cast(tf.stack([i, j, i, j]), dtype=tf.float32)) /
                tf.cast(tf.stack([h, w, h, w]), dtype=tf.float32), 0.0, 1.0)
        scales = tf.constant(self._resize_scales, dtype=tf.float32)
        index = tf.random.categorical(tf.zeros([1, 11]), 1)[0]
        scales = tf.gather(scales, index, axis=0)

        image_shape = tf.shape(image)[:2]
        boxes = box_ops.denormalize_boxes(boxes, image_shape)
        short_side = scales[0]
        image, image_info = preprocess_ops.resize_image(
            image, short_side, max(self._output_size))
        boxes = preprocess_ops.resize_and_crop_boxes(boxes, image_info[2, :],
                                                     image_info[1, :],
                                                     image_info[3, :])
        boxes = box_ops.normalize_boxes(boxes, image_info[1, :])

        # Filters out ground truth boxes that are all zeros.
        indices = box_ops.get_non_empty_box_indices(boxes)
        boxes = tf.gather(boxes, indices)
        classes = tf.gather(classes, indices)
        is_crowd = tf.gather(is_crowd, indices)
        boxes = box_ops.yxyx_to_cycxhw(boxes)

        image = tf.image.pad_to_bounding_box(image, 0, 0, self._output_size[0],
                                             self._output_size[1])
        labels = {
            'classes':
            preprocess_ops.clip_or_pad_to_fixed_size(classes,
                                                     self._max_num_boxes),
            'boxes':
            preprocess_ops.clip_or_pad_to_fixed_size(boxes,
                                                     self._max_num_boxes)
        }

        return image, labels
Пример #18
0
  def _parse_train_data(self, data):
    """Parses data for training.

    Args:
      data: the decoded tensor dictionary from TfExampleDecoder.

    Returns:
      image: image tensor that is preproessed to have normalized value and
        dimension [output_size[0], output_size[1], 3]
      labels: a dictionary of tensors used for training. The following describes
        {key: value} pairs in the dictionary.
        image_info: a 2D `Tensor` that encodes the information of the image and
          the applied preprocessing. It is in the format of
          [[original_height, original_width], [scaled_height, scaled_width],
        anchor_boxes: ordered dictionary with keys
          [min_level, min_level+1, ..., max_level]. The values are tensor with
          shape [height_l, width_l, 4] representing anchor boxes at each level.
        rpn_score_targets: ordered dictionary with keys
          [min_level, min_level+1, ..., max_level]. The values are tensor with
          shape [height_l, width_l, anchors_per_location]. The height_l and
          width_l represent the dimension of class logits at l-th level.
        rpn_box_targets: ordered dictionary with keys
          [min_level, min_level+1, ..., max_level]. The values are tensor with
          shape [height_l, width_l, anchors_per_location * 4]. The height_l and
          width_l represent the dimension of bounding box regression output at
          l-th level.
        gt_boxes: Groundtruth bounding box annotations. The box is represented
           in [y1, x1, y2, x2] format. The coordinates are w.r.t the scaled
           image that is fed to the network. The tennsor is padded with -1 to
           the fixed dimension [self._max_num_instances, 4].
        gt_classes: Groundtruth classes annotations. The tennsor is padded
          with -1 to the fixed dimension [self._max_num_instances].
        gt_masks: groundtrugh masks cropped by the bounding box and
          resized to a fixed size determined by mask_crop_size.
    """
    classes = data['groundtruth_classes']
    boxes = data['groundtruth_boxes']
    if self._include_mask:
      masks = data['groundtruth_instance_masks']

    is_crowds = data['groundtruth_is_crowd']
    # Skips annotations with `is_crowd` = True.
    if self._skip_crowd_during_training:
      num_groundtruths = tf.shape(classes)[0]
      with tf.control_dependencies([num_groundtruths, is_crowds]):
        indices = tf.cond(
            tf.greater(tf.size(is_crowds), 0),
            lambda: tf.where(tf.logical_not(is_crowds))[:, 0],
            lambda: tf.cast(tf.range(num_groundtruths), tf.int64))
      classes = tf.gather(classes, indices)
      boxes = tf.gather(boxes, indices)
      if self._include_mask:
        masks = tf.gather(masks, indices)

    # Gets original image and its size.
    image = data['image']
    if self._augmenter is not None:
      image = self._augmenter.distort(image)

    image_shape = tf.shape(image)[0:2]

    # Normalizes image with mean and std pixel values.
    image = preprocess_ops.normalize_image(image)

    # Flips image randomly during training.
    if self._aug_rand_hflip:
      if self._include_mask:
        image, boxes, masks = preprocess_ops.random_horizontal_flip(
            image, boxes, masks)
      else:
        image, boxes, _ = preprocess_ops.random_horizontal_flip(
            image, boxes)

    # Converts boxes from normalized coordinates to pixel coordinates.
    # Now the coordinates of boxes are w.r.t. the original image.
    boxes = box_ops.denormalize_boxes(boxes, image_shape)

    # Resizes and crops image.
    image, image_info = preprocess_ops.resize_and_crop_image(
        image,
        self._output_size,
        padded_size=preprocess_ops.compute_padded_size(
            self._output_size, 2 ** self._max_level),
        aug_scale_min=self._aug_scale_min,
        aug_scale_max=self._aug_scale_max)
    image_height, image_width, _ = image.get_shape().as_list()

    # Resizes and crops boxes.
    # Now the coordinates of boxes are w.r.t the scaled image.
    image_scale = image_info[2, :]
    offset = image_info[3, :]
    boxes = preprocess_ops.resize_and_crop_boxes(
        boxes, image_scale, image_info[1, :], offset)

    # Filters out ground truth boxes that are all zeros.
    indices = box_ops.get_non_empty_box_indices(boxes)
    boxes = tf.gather(boxes, indices)
    classes = tf.gather(classes, indices)
    if self._include_mask:
      masks = tf.gather(masks, indices)
      # Transfer boxes to the original image space and do normalization.
      cropped_boxes = boxes + tf.tile(tf.expand_dims(offset, axis=0), [1, 2])
      cropped_boxes /= tf.tile(tf.expand_dims(image_scale, axis=0), [1, 2])
      cropped_boxes = box_ops.normalize_boxes(cropped_boxes, image_shape)
      num_masks = tf.shape(masks)[0]
      masks = tf.image.crop_and_resize(
          tf.expand_dims(masks, axis=-1),
          cropped_boxes,
          box_indices=tf.range(num_masks, dtype=tf.int32),
          crop_size=[self._mask_crop_size, self._mask_crop_size],
          method='bilinear')
      masks = tf.squeeze(masks, axis=-1)

    # Assigns anchor targets.
    # Note that after the target assignment, box targets are absolute pixel
    # offsets w.r.t. the scaled image.
    input_anchor = anchor.build_anchor_generator(
        min_level=self._min_level,
        max_level=self._max_level,
        num_scales=self._num_scales,
        aspect_ratios=self._aspect_ratios,
        anchor_size=self._anchor_size)
    anchor_boxes = input_anchor(image_size=(image_height, image_width))
    anchor_labeler = anchor.RpnAnchorLabeler(
        self._rpn_match_threshold,
        self._rpn_unmatched_threshold,
        self._rpn_batch_size_per_im,
        self._rpn_fg_fraction)
    rpn_score_targets, rpn_box_targets = anchor_labeler.label_anchors(
        anchor_boxes, boxes,
        tf.cast(tf.expand_dims(classes, axis=-1), dtype=tf.float32))

    # Casts input image to self._dtype
    image = tf.cast(image, dtype=self._dtype)

    # Packs labels for model_fn outputs.
    labels = {
        'anchor_boxes':
            anchor_boxes,
        'image_info':
            image_info,
        'rpn_score_targets':
            rpn_score_targets,
        'rpn_box_targets':
            rpn_box_targets,
        'gt_boxes':
            preprocess_ops.clip_or_pad_to_fixed_size(boxes,
                                                     self._max_num_instances,
                                                     -1),
        'gt_classes':
            preprocess_ops.clip_or_pad_to_fixed_size(classes,
                                                     self._max_num_instances,
                                                     -1),
    }
    if self._include_mask:
      labels['gt_masks'] = preprocess_ops.clip_or_pad_to_fixed_size(
          masks, self._max_num_instances, -1)

    return image, labels
Пример #19
0
  def _parse_eval_data(self, data):
    """Parses data for evaluation.

    Args:
      data: the decoded tensor dictionary from TfExampleDecoder.

    Returns:
      A dictionary of {'images': image, 'labels': labels} where
        image: image tensor that is preproessed to have normalized value and
          dimension [output_size[0], output_size[1], 3]
        labels: a dictionary of tensors used for training. The following
          describes {key: value} pairs in the dictionary.
          source_ids: Source image id. Default value -1 if the source id is
            empty in the groundtruth annotation.
          image_info: a 2D `Tensor` that encodes the information of the image
            and the applied preprocessing. It is in the format of
            [[original_height, original_width], [scaled_height, scaled_width],
          anchor_boxes: ordered dictionary with keys
            [min_level, min_level+1, ..., max_level]. The values are tensor with
            shape [height_l, width_l, 4] representing anchor boxes at each
            level.
    """
    # Gets original image and its size.
    image = data['image']
    image_shape = tf.shape(image)[0:2]

    # Normalizes image with mean and std pixel values.
    image = preprocess_ops.normalize_image(image)

    # Resizes and crops image.
    image, image_info = preprocess_ops.resize_and_crop_image(
        image,
        self._output_size,
        padded_size=preprocess_ops.compute_padded_size(
            self._output_size, 2 ** self._max_level),
        aug_scale_min=1.0,
        aug_scale_max=1.0)
    image_height, image_width, _ = image.get_shape().as_list()

    # Casts input image to self._dtype
    image = tf.cast(image, dtype=self._dtype)

    # Converts boxes from normalized coordinates to pixel coordinates.
    boxes = box_ops.denormalize_boxes(data['groundtruth_boxes'], image_shape)

    # Compute Anchor boxes.
    input_anchor = anchor.build_anchor_generator(
        min_level=self._min_level,
        max_level=self._max_level,
        num_scales=self._num_scales,
        aspect_ratios=self._aspect_ratios,
        anchor_size=self._anchor_size)
    anchor_boxes = input_anchor(image_size=(image_height, image_width))

    labels = {
        'image_info': image_info,
        'anchor_boxes': anchor_boxes,
    }

    groundtruths = {
        'source_id': data['source_id'],
        'height': data['height'],
        'width': data['width'],
        'num_detections': tf.shape(data['groundtruth_classes'])[0],
        'boxes': boxes,
        'classes': data['groundtruth_classes'],
        'areas': data['groundtruth_area'],
        'is_crowds': tf.cast(data['groundtruth_is_crowd'], tf.int32),
    }
    groundtruths['source_id'] = utils.process_source_id(
        groundtruths['source_id'])
    groundtruths = utils.pad_groundtruths_to_fixed_size(
        groundtruths, self._max_num_instances)
    labels['groundtruths'] = groundtruths
    return image, labels
Пример #20
0
  def preprocess(self, inputs):
    """Preprocess COCO for DETR."""
    image = inputs['image']
    boxes = inputs['objects']['bbox']
    classes = inputs['objects']['label'] + 1
    is_crowd = inputs['objects']['is_crowd']

    image = preprocess_ops.normalize_image(image)
    if self._params.is_training:
      image, boxes, _ = preprocess_ops.random_horizontal_flip(image, boxes)

      do_crop = tf.greater(tf.random.uniform([]), 0.5)
      if do_crop:
        # Rescale
        boxes = box_ops.denormalize_boxes(boxes, tf.shape(image)[:2])
        index = tf.random.categorical(tf.zeros([1, 3]), 1)[0]
        scales = tf.gather([400.0, 500.0, 600.0], index, axis=0)
        short_side = scales[0]
        image, image_info = preprocess_ops.resize_image(image, short_side)
        boxes = preprocess_ops.resize_and_crop_boxes(boxes,
                                                     image_info[2, :],
                                                     image_info[1, :],
                                                     image_info[3, :])
        boxes = box_ops.normalize_boxes(boxes, image_info[1, :])

        # Do croping
        shape = tf.cast(image_info[1], dtype=tf.int32)
        h = tf.random.uniform(
            [], 384, tf.math.minimum(shape[0], 600), dtype=tf.int32)
        w = tf.random.uniform(
            [], 384, tf.math.minimum(shape[1], 600), dtype=tf.int32)
        i = tf.random.uniform([], 0, shape[0] - h + 1, dtype=tf.int32)
        j = tf.random.uniform([], 0, shape[1] - w + 1, dtype=tf.int32)
        image = tf.image.crop_to_bounding_box(image, i, j, h, w)
        boxes = tf.clip_by_value(
            (boxes[..., :] * tf.cast(
                tf.stack([shape[0], shape[1], shape[0], shape[1]]),
                dtype=tf.float32) -
             tf.cast(tf.stack([i, j, i, j]), dtype=tf.float32)) /
            tf.cast(tf.stack([h, w, h, w]), dtype=tf.float32), 0.0, 1.0)
      scales = tf.constant(
          self._params.resize_scales,
          dtype=tf.float32)
      index = tf.random.categorical(tf.zeros([1, 11]), 1)[0]
      scales = tf.gather(scales, index, axis=0)
    else:
      scales = tf.constant([self._params.resize_scales[-1]], tf.float32)

    image_shape = tf.shape(image)[:2]
    boxes = box_ops.denormalize_boxes(boxes, image_shape)
    gt_boxes = boxes
    short_side = scales[0]
    image, image_info = preprocess_ops.resize_image(
        image,
        short_side,
        max(self._params.output_size))
    boxes = preprocess_ops.resize_and_crop_boxes(boxes,
                                                 image_info[2, :],
                                                 image_info[1, :],
                                                 image_info[3, :])
    boxes = box_ops.normalize_boxes(boxes, image_info[1, :])

    # Filters out ground truth boxes that are all zeros.
    indices = box_ops.get_non_empty_box_indices(boxes)
    boxes = tf.gather(boxes, indices)
    classes = tf.gather(classes, indices)
    is_crowd = tf.gather(is_crowd, indices)
    boxes = box_ops.yxyx_to_cycxhw(boxes)

    image = tf.image.pad_to_bounding_box(
        image, 0, 0, self._params.output_size[0], self._params.output_size[1])
    labels = {
        'classes':
            preprocess_ops.clip_or_pad_to_fixed_size(
                classes, self._params.max_num_boxes),
        'boxes':
            preprocess_ops.clip_or_pad_to_fixed_size(
                boxes, self._params.max_num_boxes)
    }
    if not self._params.is_training:
      labels.update({
          'id':
              inputs['image/id'],
          'image_info':
              image_info,
          'is_crowd':
              preprocess_ops.clip_or_pad_to_fixed_size(
                  is_crowd, self._params.max_num_boxes),
          'gt_boxes':
              preprocess_ops.clip_or_pad_to_fixed_size(
                  gt_boxes, self._params.max_num_boxes),
      })

    return image, labels