Пример #1
0
    def _parse_eval_image(self, decoded_tensors):
        """Parses image data for evaluation."""
        image = tf.io.decode_image(decoded_tensors[self._image_field_key],
                                   channels=3)

        image = tf.reshape(image, (decoded_tensors['image/height'],
                                   decoded_tensors['image/width'], 3))

        # TODO: Add option to center crop and resize image.
        image, _ = preprocess_ops.resize_and_crop_image(
            image,
            self._output_size,
            self._output_size,
            preserve_aspect_ratio=self._preserve_aspect_ratio)

        image = tf.reshape(image,
                           [self._output_size[0], self._output_size[1], 3])

        # Normalizes image with mean and std pixel values.
        image = preprocess_ops.normalize_image(image,
                                               offset=MEAN_RGB,
                                               scale=STDDEV_RGB)

        # Convert image to self._dtype.
        image = tf.image.convert_image_dtype(image, self._dtype)

        return image
Пример #2
0
  def test_resize_and_crop_image_rectangluar_case(self, input_height,
                                                  input_width, desired_height,
                                                  desired_width, stride,
                                                  scale_y, scale_x,
                                                  output_height, output_width):
    image = tf.convert_to_tensor(
        np.random.rand(input_height, input_width, 3))

    desired_size = (desired_height, desired_width)
    resized_image, image_info = preprocess_ops.resize_and_crop_image(
        image,
        desired_size=desired_size,
        padded_size=preprocess_ops.compute_padded_size(desired_size, stride))
    resized_image_shape = tf.shape(resized_image)

    self.assertAllEqual(
        [output_height, output_width, 3],
        resized_image_shape.numpy())
    self.assertNDArrayNear(
        [[input_height, input_width],
         [desired_height, desired_width],
         [scale_y, scale_x],
         [0.0, 0.0]],
        image_info.numpy(),
        1e-5)
    def _build_inputs(self, image):
        """Builds classification model inputs for serving."""
        model_params = self._params.task.model
        # Normalizes image with mean and std pixel values.
        image = preprocess_ops.normalize_image(image,
                                               offset=MEAN_RGB,
                                               scale=STDDEV_RGB)

        image, image_info = preprocess_ops.resize_and_crop_image(
            image,
            self._input_image_size,
            padded_size=preprocess_ops.compute_padded_size(
                self._input_image_size, 2**model_params.max_level),
            aug_scale_min=1.0,
            aug_scale_max=1.0)

        image_shape = image_info[1, :]  # Shape of original image.

        input_anchor = anchor.build_anchor_generator(
            min_level=model_params.min_level,
            max_level=model_params.max_level,
            num_scales=model_params.anchor.num_scales,
            aspect_ratios=model_params.anchor.aspect_ratios,
            anchor_size=model_params.anchor.anchor_size)
        anchor_boxes = input_anchor(image_size=(self._input_image_size[0],
                                                self._input_image_size[1]))

        return image, anchor_boxes, image_shape
    def _parse_train_data(self, data):
        """Parses data for training and evaluation."""
        image, label = self._prepare_image_and_label(data)

        if self._train_on_crops:
            if data['image/height'] < self._output_size[0] or data[
                    'image/width'] < self._output_size[1]:
                raise ValueError(
                    'Image size has to be larger than crop size (output_size)')
            label = tf.reshape(label,
                               [data['image/height'], data['image/width'], 1])
            image_mask = tf.concat([image, label], axis=2)
            image_mask_crop = tf.image.random_crop(image_mask,
                                                   self._output_size + [4])
            image = image_mask_crop[:, :, :-1]
            label = tf.reshape(image_mask_crop[:, :, -1],
                               [1] + self._output_size)

        # Flips image randomly during training.
        if self._aug_rand_hflip:
            image, _, label = preprocess_ops.random_horizontal_flip(
                image, masks=label)

        # Resizes and crops image.
        image, image_info = preprocess_ops.resize_and_crop_image(
            image,
            self._output_size,
            self._output_size,
            aug_scale_min=self._aug_scale_min,
            aug_scale_max=self._aug_scale_max)

        # Resizes and crops boxes.
        image_scale = image_info[2, :]
        offset = image_info[3, :]

        # Pad label and make sure the padded region assigned to the ignore label.
        # The label is first offset by +1 and then padded with 0.
        label += 1
        label = tf.expand_dims(label, axis=3)
        label = preprocess_ops.resize_and_crop_masks(label, image_scale,
                                                     self._output_size, offset)
        label -= 1
        label = tf.where(tf.equal(label, -1),
                         self._ignore_label * tf.ones_like(label), label)
        label = tf.squeeze(label, axis=0)
        valid_mask = tf.not_equal(label, self._ignore_label)
        labels = {
            'masks': label,
            'valid_masks': valid_mask,
            'image_info': image_info,
        }

        # Cast image as self._dtype
        image = tf.cast(image, dtype=self._dtype)

        return image, labels
Пример #5
0
    def _parse_eval_data(self, data):
        """Generates images and labels that are usable for model evaluation.

    Args:
      data: the decoded tensor dictionary from TfExampleDecoder.

    Returns:
      images: the image tensor.
      labels: a dict of Tensors that contains labels.
    """
        image = tf.cast(data['image'], dtype=tf.float32)
        boxes = data['groundtruth_boxes']
        classes = data['groundtruth_classes']

        image_shape = tf.shape(input=image)[0:2]
        # Converts boxes from normalized coordinates to pixel coordinates.
        boxes = box_ops.denormalize_boxes(boxes, image_shape)

        # Resizes and crops image.
        image, image_info = preprocess_ops.resize_and_crop_image(
            image, [self._output_height, self._output_width],
            padded_size=[self._output_height, self._output_width],
            aug_scale_min=1.0,
            aug_scale_max=1.0)
        unpad_image_shape = tf.cast(tf.shape(image), tf.float32)

        # Resizes and crops boxes.
        image_scale = image_info[2, :]
        offset = image_info[3, :]
        boxes = preprocess_ops.resize_and_crop_boxes(boxes, image_scale,
                                                     image_info[1, :], offset)

        # Filters out ground truth boxes that are all zeros.
        indices = box_ops.get_non_empty_box_indices(boxes)
        boxes = tf.gather(boxes, indices)
        classes = tf.gather(classes, indices)

        labels = self._build_label(unpad_image_shape=unpad_image_shape,
                                   boxes=boxes,
                                   classes=classes,
                                   image_info=image_info,
                                   data=data)

        if self._bgr_ordering:
            red, green, blue = tf.unstack(image, num=3, axis=2)
            image = tf.stack([blue, green, red], axis=2)

        image = preprocess_ops.normalize_image(image=image,
                                               offset=self._channel_means,
                                               scale=self._channel_stds)

        image = tf.cast(image, self._dtype)

        return image, labels
Пример #6
0
    def _build_inputs(self, image):
        """Builds classification model inputs for serving."""

        # Normalizes image with mean and std pixel values.
        image = preprocess_ops.normalize_image(image,
                                               offset=MEAN_RGB,
                                               scale=STDDEV_RGB)

        image, _ = preprocess_ops.resize_and_crop_image(
            image,
            self._input_image_size,
            padded_size=self._input_image_size,
            aug_scale_min=1.0,
            aug_scale_max=1.0)
        return image
Пример #7
0
    def _parse_eval_data(self, data):
        """Parses data for training and evaluation."""
        image, label = self._prepare_image_and_label(data)
        # The label is first offset by +1 and then padded with 0.
        label += 1
        label = tf.expand_dims(label, axis=3)

        # Resizes and crops image.
        image, image_info = preprocess_ops.resize_and_crop_image(
            image,
            self._output_size,
            self._output_size,
            preserve_aspect_ratio=self._preserve_aspect_ratio)

        if self._resize_eval_groundtruth:
            # Resizes eval masks to match input image sizes. In that case, mean IoU
            # is computed on output_size not the original size of the images.
            image_scale = image_info[2, :]
            offset = image_info[3, :]
            label = preprocess_ops.resize_and_crop_masks(
                label, image_scale, self._output_size, offset)
        else:
            label = tf.image.pad_to_bounding_box(
                label, 0, 0, self._groundtruth_padded_size[0],
                self._groundtruth_padded_size[1])

        label -= 1
        label = tf.where(tf.equal(label, -1),
                         self._ignore_label * tf.ones_like(label), label)
        label = tf.squeeze(label, axis=0)

        valid_mask = tf.not_equal(label, self._ignore_label)
        labels = {
            'masks': label,
            'valid_masks': valid_mask,
            'image_info': image_info
        }

        # Normalizes image with mean and std pixel values.
        # Must be done after augmenter since certain ops rely on uint8
        image = preprocess_ops.normalize_image(image,
                                               offset=MEAN_RGB,
                                               scale=STDDEV_RGB)

        # Cast image as self._dtype
        image = tf.cast(image, dtype=self._dtype)

        return image, labels
Пример #8
0
    def _build_inputs(self, image):
        """Builds segmentation model inputs for serving."""

        # Normalizes image with mean and std pixel values.
        image = preprocess_ops.normalize_image(
            image,
            offset=run_lib.IMAGENET_MEAN_RGB,
            scale=run_lib.IMAGENET_STDDEV_RGB)

        image, _ = preprocess_ops.resize_and_crop_image(
            image,
            self._input_image_size,
            padded_size=self._input_image_size,
            aug_scale_min=1.0,
            aug_scale_max=1.0,
            preserve_aspect_ratio=False)
        return image
Пример #9
0
    def _build_inputs(self, image):
        """Builds detection model inputs for serving."""
        model_params = self.params.task.model
        # Normalizes image with mean and std pixel values.
        image = preprocess_ops.normalize_image(image,
                                               offset=MEAN_RGB,
                                               scale=STDDEV_RGB)

        image, image_info = preprocess_ops.resize_and_crop_image(
            image,
            self._input_image_size,
            padded_size=preprocess_ops.compute_padded_size(
                self._input_image_size, 2**model_params.max_level),
            aug_scale_min=1.0,
            aug_scale_max=1.0)
        anchor_boxes = self._build_anchor_boxes()

        return image, anchor_boxes, image_info
Пример #10
0
    def _parse_train_data(self, data):
        """Parses data for training and evaluation."""
        image, label = self._prepare_image_and_label(data)

        # Flips image randomly during training.
        if self._aug_rand_hflip:
            image, label = preprocess_ops.random_horizontal_flip(image,
                                                                 masks=label)

        # Resizes and crops image.
        image, image_info = preprocess_ops.resize_and_crop_image(
            image,
            self._output_size,
            self._output_size,
            aug_scale_min=self._aug_scale_min,
            aug_scale_max=self._aug_scale_max)

        # Resizes and crops boxes.
        image_scale = image_info[2, :]
        offset = image_info[3, :]

        # Pad label and make sure the padded region assigned to the ignore label.
        # The label is first offset by +1 and then padded with 0.
        label += 1
        label = tf.expand_dims(label, axis=3)
        label = preprocess_ops.resize_and_crop_masks(label, image_scale,
                                                     self._output_size, offset)
        label -= 1
        label = tf.where(tf.equal(label, -1),
                         self._ignore_label * tf.ones_like(label), label)
        label = tf.squeeze(label, axis=0)
        valid_mask = tf.not_equal(label, self._ignore_label)
        labels = {
            'masks': label,
            'valid_masks': valid_mask,
            'image_info': image_info,
        }

        # Cast image as self._dtype
        image = tf.cast(image, dtype=self._dtype)

        return image, labels
Пример #11
0
    def _parse_eval_data(self, data):
        """Parses data for evaluation.
    !!! All augmentations and transformations are on bboxes with format
      (ymin, xmin, ymax, xmax). Required to do the appropriate transformations.
    !!! Images are supposed to be in RGB format
    """
        image, boxes = data['image'], data['boxes']

        image, image_info = preprocess_ops.resize_and_crop_image(
            image,
            self._input_size[:2],
            self._input_size[:2],
            aug_scale_min=self._aug_scale_min,
            aug_scale_max=self._aug_scale_max,
            preserve_aspect_ratio=self._preserve_aspect_ratio)
        boxes = preprocess_ops.resize_and_crop_boxes(boxes, image_info[2, :],
                                                     image_info[1, :],
                                                     image_info[3, :])

        image = preprocess_ops.normalize_image(image,
                                               offset=MEAN_RGB,
                                               scale=STDDEV_RGB)
        image = tf.cast(image, dtype=self._dtype)

        boxes = tf.clip_by_value(boxes, 0, self._input_size[0] - 1)
        bbox_labels = yolo_box_ops.yxyx_to_xcycwh(boxes)
        bbox_labels = tf.concat([bbox_labels, data['classes'][:, tf.newaxis]],
                                axis=-1)

        labels, bbox_labels = yolo_ops.preprocess_true_boxes(
            bboxes=bbox_labels,
            train_output_sizes=self.train_output_sizes,
            anchor_per_scale=self.anchor_per_scale,
            num_classes=self.num_classes,
            max_bbox_per_scale=self.max_bbox_per_scale,
            strides=self.strides,
            anchors=self.anchors)

        targets = {'labels': labels, 'bboxes': bbox_labels}

        return image, targets
    def _parse_eval_data(self, data):
        """Parses data for training and evaluation."""
        image, label = self._prepare_image_and_label(data)
        # The label is first offset by +1 and then padded with 0.
        label += 1
        label = tf.expand_dims(label, axis=3)

        if self._resize_eval:
            # Resizes and crops image.
            image, image_info = preprocess_ops.resize_and_crop_image(
                image, self._output_size, self._output_size)

            # Resizes and crops mask.
            image_scale = image_info[2, :]
            offset = image_info[3, :]

            label = preprocess_ops.resize_and_crop_masks(
                label, image_scale, self._output_size, offset)
        else:
            # Pads image and mask to output size.
            image = tf.image.pad_to_bounding_box(image, 0, 0,
                                                 self._output_size[0],
                                                 self._output_size[1])
            label = tf.image.pad_to_bounding_box(label, 0, 0,
                                                 self._output_size[0],
                                                 self._output_size[1])

        label -= 1
        label = tf.where(tf.equal(label, -1),
                         self._ignore_label * tf.ones_like(label), label)
        label = tf.squeeze(label, axis=0)

        valid_mask = tf.not_equal(label, self._ignore_label)
        labels = {'masks': label, 'valid_masks': valid_mask}

        # Cast image as self._dtype
        image = tf.cast(image, dtype=self._dtype)

        return image, labels
Пример #13
0
    def _parse_train_image(self, decoded_tensors):
        """Parses image data for training."""
        # TODO: add option to crop images
        image_bytes = decoded_tensors[self._image_field_key]
        if 'image/height' in decoded_tensors and 'image/width' in decoded_tensors:
            image_shape = (decoded_tensors['image/height'],
                           decoded_tensors['image/width'], 3)
        else:
            image_shape = tf.image.extract_jpeg_shape(image_bytes)

        image = tf.io.decode_image(image_bytes, channels=3)
        image = tf.reshape(image, image_shape)

        if self._aug_rand_hflip:
            image = tf.image.random_flip_left_right(image)

        image, _ = preprocess_ops.resize_and_crop_image(
            image,
            self._output_size,
            self._output_size,
            aug_scale_min=self._aug_scale_min,
            aug_scale_max=self._aug_scale_max,
            preserve_aspect_ratio=self._preserve_aspect_ratio)

        # Apply autoaug or randaug.
        if self._augmenter is not None:
            image = self._augmenter.distort(image)

        # Normalizes image with mean and std pixel values.
        image = preprocess_ops.normalize_image(image,
                                               offset=MEAN_RGB,
                                               scale=STDDEV_RGB)

        # Convert image to self._dtype.
        image = tf.image.convert_image_dtype(image, self._dtype)

        return image
Пример #14
0
    def _parse_train_data(self, data):
        """Parses data for training and evaluation."""
        classes = data['groundtruth_classes']
        boxes = data['groundtruth_boxes']
        is_crowds = data['groundtruth_is_crowd']
        # Skips annotations with `is_crowd` = True.
        if self._skip_crowd_during_training:
            num_groundtrtuhs = tf.shape(input=classes)[0]
            with tf.control_dependencies([num_groundtrtuhs, is_crowds]):
                indices = tf.cond(
                    pred=tf.greater(tf.size(input=is_crowds), 0),
                    true_fn=lambda: tf.where(tf.logical_not(is_crowds))[:, 0],
                    false_fn=lambda: tf.cast(tf.range(num_groundtrtuhs), tf.
                                             int64))
            classes = tf.gather(classes, indices)
            boxes = tf.gather(boxes, indices)

        # Gets original image and its size.
        image = data['image']

        image_shape = tf.shape(input=image)[0:2]

        # Normalizes image with mean and std pixel values.
        image = preprocess_ops.normalize_image(image)

        # Flips image randomly during training.
        if self._aug_rand_hflip:
            image, boxes, _ = preprocess_ops.random_horizontal_flip(
                image, boxes)

        # Converts boxes from normalized coordinates to pixel coordinates.
        boxes = box_ops.denormalize_boxes(boxes, image_shape)

        # Resizes and crops image.
        image, image_info = preprocess_ops.resize_and_crop_image(
            image,
            self._output_size,
            padded_size=preprocess_ops.compute_padded_size(
                self._output_size, 2**self._max_level),
            aug_scale_min=self._aug_scale_min,
            aug_scale_max=self._aug_scale_max)
        image_height, image_width, _ = image.get_shape().as_list()

        # Resizes and crops boxes.
        image_scale = image_info[2, :]
        offset = image_info[3, :]
        boxes = preprocess_ops.resize_and_crop_boxes(boxes, image_scale,
                                                     image_info[1, :], offset)
        # Filters out ground truth boxes that are all zeros.
        indices = box_ops.get_non_empty_box_indices(boxes)
        boxes = tf.gather(boxes, indices)
        classes = tf.gather(classes, indices)

        # Assigns anchors.
        input_anchor = anchor.build_anchor_generator(
            min_level=self._min_level,
            max_level=self._max_level,
            num_scales=self._num_scales,
            aspect_ratios=self._aspect_ratios,
            anchor_size=self._anchor_size)
        anchor_boxes = input_anchor(image_size=(image_height, image_width))
        anchor_labeler = anchor.AnchorLabeler(self._match_threshold,
                                              self._unmatched_threshold)
        (cls_targets, box_targets, cls_weights,
         box_weights) = anchor_labeler.label_anchors(
             anchor_boxes, boxes, tf.expand_dims(classes, axis=1))

        # If bfloat16 is used, casts input image to tf.bfloat16.
        if self._use_bfloat16:
            image = tf.cast(image, dtype=tf.bfloat16)

        # Packs labels for model_fn outputs.
        labels = {
            'cls_targets': cls_targets,
            'box_targets': box_targets,
            'anchor_boxes': anchor_boxes,
            'cls_weights': cls_weights,
            'box_weights': box_weights,
            'image_info': image_info,
        }
        return image, labels
Пример #15
0
    def _parse_train_data(self, data):
        """Parses data for training and evaluation."""
        image, label = self._prepare_image_and_label(data)

        if self._crop_size:

            label = tf.reshape(label,
                               [data['image/height'], data['image/width'], 1])
            # If output_size is specified, resize image, and label to desired
            # output_size.
            if self._output_size:
                image = tf.image.resize(image,
                                        self._output_size,
                                        method='bilinear')
                label = tf.image.resize(label,
                                        self._output_size,
                                        method='nearest')

            image_mask = tf.concat([image, label], axis=2)
            image_mask_crop = tf.image.random_crop(image_mask,
                                                   self._crop_size + [4])
            image = image_mask_crop[:, :, :-1]
            label = tf.reshape(image_mask_crop[:, :, -1],
                               [1] + self._crop_size)

        # Flips image randomly during training.
        if self._aug_rand_hflip:
            image, _, label = preprocess_ops.random_horizontal_flip(
                image, masks=label)

        train_image_size = self._crop_size if self._crop_size else self._output_size
        # Rotates image randomly during training
        if self._rotate_min != 0.0 and \
          self._rotate_max != 0.0 and \
          self._rotate_min < self._rotate_max:
            image, label = preprocess_ops.random_rotation(
                image,
                masks=label,
                rotate_max=self._rotate_max,
                rotate_min=self._rotate_min,
                ignore_label=self._ignore_label)

        # Resizes and crops image.
        image, image_info = preprocess_ops.resize_and_crop_image(
            image,
            train_image_size,
            train_image_size,
            aug_scale_min=self._aug_scale_min,
            aug_scale_max=self._aug_scale_max,
            preserve_aspect_ratio=self._preserve_aspect_ratio)

        # Modify brightness randomly during training
        if self._bright_min != 1.0 and \
          self._bright_max != 1.0 and \
          self._bright_min < self._bright_max:
            image = preprocess_ops.random_brightness(
                image,
                bright_min=self._bright_min,
                bright_max=self._bright_max)

        # Resizes and crops boxes.
        image_scale = image_info[2, :]
        offset = image_info[3, :]

        # Pad label and make sure the padded region assigned to the ignore label.
        # The label is first offset by +1 and then padded with 0.
        label += 1
        label = tf.expand_dims(label, axis=3)
        label = preprocess_ops.resize_and_crop_masks(label, image_scale,
                                                     train_image_size, offset)
        label -= 1
        label = tf.where(tf.equal(label, -1),
                         self._ignore_label * tf.ones_like(label), label)
        label = tf.squeeze(label, axis=0)

        # Apply randaug
        if self._augmenter is not None:
            image, label = self._augmenter.distort_image_and_mask(
                image, label, self._ignore_label)

        valid_mask = tf.not_equal(label, self._ignore_label)
        labels = {
            'masks': label,
            'valid_masks': valid_mask,
            'image_info': image_info,
        }

        # Normalizes image with mean and std pixel values.
        # Must be done after augmenter since certain ops rely on uint8
        image = preprocess_ops.normalize_image(image,
                                               offset=MEAN_RGB,
                                               scale=STDDEV_RGB)

        # Cast image as self._dtype
        image = tf.cast(image, dtype=self._dtype)

        return image, labels
Пример #16
0
    def _parse_train_data(self, data):
        """Parses data for training and evaluation.
    !!! All augmentations and transformations are on bboxes with format
      (ymin, xmin, ymax, xmax). Required to do the appropriate transformations.
    !!! Images are supposed to be in RGB format
    """
        image, boxes = data['image'], data['boxes']

        # Execute RandAugment first as some ops require uint8 colors
        if self._augmenter is not None:
            image = self._augmenter.distort(image)

        if self._aug_rand_hflip:
            image, boxes = yolo_ops.random_horizontal_flip(image, boxes)

        image, image_info = preprocess_ops.resize_and_crop_image(
            image,
            self._input_size[:2],
            self._input_size[:2],
            aug_scale_min=self._aug_scale_min,
            aug_scale_max=self._aug_scale_max,
            preserve_aspect_ratio=self._preserve_aspect_ratio)
        boxes = preprocess_ops.resize_and_crop_boxes(boxes, image_info[2, :],
                                                     image_info[1, :],
                                                     image_info[3, :])

        if self._aug_jitter_im != 0.0:
            image, boxes = yolo_ops.random_translate(image, boxes,
                                                     self._aug_jitter_im)

        if self._aug_jitter_boxes != 0.0:
            boxes = box_ops.jitter_boxes(boxes, self._aug_jitter_boxes)

        image = preprocess_ops.normalize_image(image,
                                               offset=MEAN_RGB,
                                               scale=STDDEV_RGB)
        image = tf.cast(image, dtype=self._dtype)

        boxes = tf.clip_by_value(boxes, 0, self._input_size[0] - 1)
        bbox_labels = yolo_box_ops.yxyx_to_xcycwh(boxes)
        bbox_labels = tf.concat([bbox_labels, data['classes'][:, tf.newaxis]],
                                axis=-1)

        labels, bbox_labels = yolo_ops.preprocess_true_boxes(
            bboxes=bbox_labels,
            train_output_sizes=self.train_output_sizes,
            anchor_per_scale=self.anchor_per_scale,
            num_classes=self.num_classes,
            max_bbox_per_scale=self.max_bbox_per_scale,
            strides=self.strides,
            anchors=self.anchors)

        # TODO: Figure out why we need to fix the num BBOX if not there will be an error
        # https://github.com/whizzmobility/models/pull/61
        # pad / limit to MAX_DISPLAY_BBOX boxes for constant size
        raw_bboxes = boxes
        num_bboxes = tf.shape(raw_bboxes)[0]
        if num_bboxes > MAX_DISPLAY_BBOX:
            raw_bboxes = raw_bboxes[:, :MAX_DISPLAY_BBOX]
        else:
            paddings = tf.stack([0, MAX_DISPLAY_BBOX - num_bboxes], axis=-1)
            paddings = tf.stack([paddings, [0, 0]], axis=0)
            raw_bboxes = tf.pad(raw_bboxes, paddings)

        targets = {
            'labels': labels,
            'bboxes': bbox_labels,
            'raw_bboxes': raw_bboxes
        }

        return image, targets
Пример #17
0
    def _parse_eval_data(self, data):
        """Parses data for evaluation.

    Args:
      data: the decoded tensor dictionary from TfExampleDecoder.

    Returns:
      A dictionary of {'images': image, 'labels': labels} where
        image: image tensor that is preproessed to have normalized value and
          dimension [output_size[0], output_size[1], 3]
        labels: a dictionary of tensors used for training. The following
          describes {key: value} pairs in the dictionary.
          source_ids: Source image id. Default value -1 if the source id is
            empty in the groundtruth annotation.
          image_info: a 2D `Tensor` that encodes the information of the image
            and the applied preprocessing. It is in the format of
            [[original_height, original_width], [scaled_height, scaled_width],
          anchor_boxes: ordered dictionary with keys
            [min_level, min_level+1, ..., max_level]. The values are tensor with
            shape [height_l, width_l, 4] representing anchor boxes at each
            level.
    """
        # Gets original image and its size.
        image = data['image']
        image_shape = tf.shape(image)[0:2]

        # Normalizes image with mean and std pixel values.
        image = preprocess_ops.normalize_image(image)

        # Resizes and crops image.
        image, image_info = preprocess_ops.resize_and_crop_image(
            image,
            self._output_size,
            padded_size=preprocess_ops.compute_padded_size(
                self._output_size, 2**self._max_level),
            aug_scale_min=1.0,
            aug_scale_max=1.0)
        image_height, image_width, _ = image.get_shape().as_list()

        # Casts input image to self._dtype
        image = tf.cast(image, dtype=self._dtype)

        # Converts boxes from normalized coordinates to pixel coordinates.
        boxes = box_ops.denormalize_boxes(data['groundtruth_boxes'],
                                          image_shape)

        # Compute Anchor boxes.
        input_anchor = anchor.build_anchor_generator(
            min_level=self._min_level,
            max_level=self._max_level,
            num_scales=self._num_scales,
            aspect_ratios=self._aspect_ratios,
            anchor_size=self._anchor_size)
        anchor_boxes = input_anchor(image_size=(image_height, image_width))

        labels = {
            'image_info': image_info,
            'anchor_boxes': anchor_boxes,
        }

        groundtruths = {
            'source_id': data['source_id'],
            'height': data['height'],
            'width': data['width'],
            'num_detections': tf.shape(data['groundtruth_classes'])[0],
            'boxes': boxes,
            'classes': data['groundtruth_classes'],
            'areas': data['groundtruth_area'],
            'is_crowds': tf.cast(data['groundtruth_is_crowd'], tf.int32),
        }
        groundtruths['source_id'] = utils.process_source_id(
            groundtruths['source_id'])
        groundtruths = utils.pad_groundtruths_to_fixed_size(
            groundtruths, self._max_num_instances)
        labels['groundtruths'] = groundtruths
        return image, labels
Пример #18
0
    def _parse_train_data(self, data):
        """Parses data for training.

    Args:
      data: the decoded tensor dictionary from TfExampleDecoder.

    Returns:
      image: image tensor that is preproessed to have normalized value and
        dimension [output_size[0], output_size[1], 3]
      labels: a dictionary of tensors used for training. The following describes
        {key: value} pairs in the dictionary.
        image_info: a 2D `Tensor` that encodes the information of the image and
          the applied preprocessing. It is in the format of
          [[original_height, original_width], [scaled_height, scaled_width],
        anchor_boxes: ordered dictionary with keys
          [min_level, min_level+1, ..., max_level]. The values are tensor with
          shape [height_l, width_l, 4] representing anchor boxes at each level.
        rpn_score_targets: ordered dictionary with keys
          [min_level, min_level+1, ..., max_level]. The values are tensor with
          shape [height_l, width_l, anchors_per_location]. The height_l and
          width_l represent the dimension of class logits at l-th level.
        rpn_box_targets: ordered dictionary with keys
          [min_level, min_level+1, ..., max_level]. The values are tensor with
          shape [height_l, width_l, anchors_per_location * 4]. The height_l and
          width_l represent the dimension of bounding box regression output at
          l-th level.
        gt_boxes: Groundtruth bounding box annotations. The box is represented
           in [y1, x1, y2, x2] format. The coordinates are w.r.t the scaled
           image that is fed to the network. The tennsor is padded with -1 to
           the fixed dimension [self._max_num_instances, 4].
        gt_classes: Groundtruth classes annotations. The tennsor is padded
          with -1 to the fixed dimension [self._max_num_instances].
        gt_masks: groundtrugh masks cropped by the bounding box and
          resized to a fixed size determined by mask_crop_size.
    """
        classes = data['groundtruth_classes']
        boxes = data['groundtruth_boxes']
        if self._include_mask:
            masks = data['groundtruth_instance_masks']

        is_crowds = data['groundtruth_is_crowd']
        # Skips annotations with `is_crowd` = True.
        if self._skip_crowd_during_training:
            num_groundtruths = tf.shape(classes)[0]
            with tf.control_dependencies([num_groundtruths, is_crowds]):
                indices = tf.cond(
                    tf.greater(tf.size(is_crowds), 0),
                    lambda: tf.where(tf.logical_not(is_crowds))[:, 0],
                    lambda: tf.cast(tf.range(num_groundtruths), tf.int64))
            classes = tf.gather(classes, indices)
            boxes = tf.gather(boxes, indices)
            if self._include_mask:
                masks = tf.gather(masks, indices)

        # Gets original image and its size.
        image = data['image']
        image_shape = tf.shape(image)[0:2]

        # Normalizes image with mean and std pixel values.
        image = preprocess_ops.normalize_image(image)

        # Flips image randomly during training.
        if self._aug_rand_hflip:
            if self._include_mask:
                image, boxes, masks = preprocess_ops.random_horizontal_flip(
                    image, boxes, masks)
            else:
                image, boxes, _ = preprocess_ops.random_horizontal_flip(
                    image, boxes)

        # Converts boxes from normalized coordinates to pixel coordinates.
        # Now the coordinates of boxes are w.r.t. the original image.
        boxes = box_ops.denormalize_boxes(boxes, image_shape)

        # Resizes and crops image.
        image, image_info = preprocess_ops.resize_and_crop_image(
            image,
            self._output_size,
            padded_size=preprocess_ops.compute_padded_size(
                self._output_size, 2**self._max_level),
            aug_scale_min=self._aug_scale_min,
            aug_scale_max=self._aug_scale_max)
        image_height, image_width, _ = image.get_shape().as_list()

        # Resizes and crops boxes.
        # Now the coordinates of boxes are w.r.t the scaled image.
        image_scale = image_info[2, :]
        offset = image_info[3, :]
        boxes = preprocess_ops.resize_and_crop_boxes(boxes, image_scale,
                                                     image_info[1, :], offset)

        # Filters out ground truth boxes that are all zeros.
        indices = box_ops.get_non_empty_box_indices(boxes)
        boxes = tf.gather(boxes, indices)
        classes = tf.gather(classes, indices)
        if self._include_mask:
            masks = tf.gather(masks, indices)
            # Transfer boxes to the original image space and do normalization.
            cropped_boxes = boxes + tf.tile(tf.expand_dims(offset, axis=0),
                                            [1, 2])
            cropped_boxes /= tf.tile(tf.expand_dims(image_scale, axis=0),
                                     [1, 2])
            cropped_boxes = box_ops.normalize_boxes(cropped_boxes, image_shape)
            num_masks = tf.shape(masks)[0]
            masks = tf.image.crop_and_resize(
                tf.expand_dims(masks, axis=-1),
                cropped_boxes,
                box_indices=tf.range(num_masks, dtype=tf.int32),
                crop_size=[self._mask_crop_size, self._mask_crop_size],
                method='bilinear')
            masks = tf.squeeze(masks, axis=-1)

        # Assigns anchor targets.
        # Note that after the target assignment, box targets are absolute pixel
        # offsets w.r.t. the scaled image.
        input_anchor = anchor.build_anchor_generator(
            min_level=self._min_level,
            max_level=self._max_level,
            num_scales=self._num_scales,
            aspect_ratios=self._aspect_ratios,
            anchor_size=self._anchor_size)
        anchor_boxes = input_anchor(image_size=(image_height, image_width))
        anchor_labeler = anchor.RpnAnchorLabeler(self._rpn_match_threshold,
                                                 self._rpn_unmatched_threshold,
                                                 self._rpn_batch_size_per_im,
                                                 self._rpn_fg_fraction)
        rpn_score_targets, rpn_box_targets = anchor_labeler.label_anchors(
            anchor_boxes, boxes,
            tf.cast(tf.expand_dims(classes, axis=-1), dtype=tf.float32))

        # Casts input image to self._dtype
        image = tf.cast(image, dtype=self._dtype)

        # Packs labels for model_fn outputs.
        labels = {
            'anchor_boxes':
            anchor_boxes,
            'image_info':
            image_info,
            'rpn_score_targets':
            rpn_score_targets,
            'rpn_box_targets':
            rpn_box_targets,
            'gt_boxes':
            preprocess_ops.clip_or_pad_to_fixed_size(boxes,
                                                     self._max_num_instances,
                                                     -1),
            'gt_classes':
            preprocess_ops.clip_or_pad_to_fixed_size(classes,
                                                     self._max_num_instances,
                                                     -1),
        }
        if self._include_mask:
            labels['gt_masks'] = preprocess_ops.clip_or_pad_to_fixed_size(
                masks, self._max_num_instances, -1)

        return image, labels
Пример #19
0
    def _parse_eval_data(self, data):
        """Parses data for training and evaluation."""
        groundtruths = {}
        classes = data['groundtruth_classes']
        boxes = data['groundtruth_boxes']
        # If not empty, `attributes` is a dict of (name, ground_truth) pairs.
        # `ground_gruth` of attributes is assumed in shape [N, attribute_size].
        # TODO(xianzhi): support parsing attributes weights.
        attributes = data.get('groundtruth_attributes', {})

        # Gets original image and its size.
        image = data['image']
        image_shape = tf.shape(input=image)[0:2]

        # Normalizes image with mean and std pixel values.
        image = preprocess_ops.normalize_image(image)

        # Converts boxes from normalized coordinates to pixel coordinates.
        boxes = box_ops.denormalize_boxes(boxes, image_shape)

        # Resizes and crops image.
        image, image_info = preprocess_ops.resize_and_crop_image(
            image,
            self._output_size,
            padded_size=preprocess_ops.compute_padded_size(
                self._output_size, 2**self._max_level),
            aug_scale_min=1.0,
            aug_scale_max=1.0)
        image_height, image_width, _ = image.get_shape().as_list()

        # Resizes and crops boxes.
        image_scale = image_info[2, :]
        offset = image_info[3, :]
        boxes = preprocess_ops.resize_and_crop_boxes(boxes, image_scale,
                                                     image_info[1, :], offset)
        # Filters out ground truth boxes that are all zeros.
        indices = box_ops.get_non_empty_box_indices(boxes)
        boxes = tf.gather(boxes, indices)
        classes = tf.gather(classes, indices)
        for k, v in attributes.items():
            attributes[k] = tf.gather(v, indices)

        # Assigns anchors.
        input_anchor = anchor.build_anchor_generator(
            min_level=self._min_level,
            max_level=self._max_level,
            num_scales=self._num_scales,
            aspect_ratios=self._aspect_ratios,
            anchor_size=self._anchor_size)
        anchor_boxes = input_anchor(image_size=(image_height, image_width))
        anchor_labeler = anchor.AnchorLabeler(self._match_threshold,
                                              self._unmatched_threshold)
        (cls_targets, box_targets, att_targets, cls_weights,
         box_weights) = anchor_labeler.label_anchors(
             anchor_boxes, boxes, tf.expand_dims(classes, axis=1), attributes)

        # Casts input image to desired data type.
        image = tf.cast(image, dtype=self._dtype)

        # Sets up groundtruth data for evaluation.
        groundtruths = {
            'source_id':
            data['source_id'],
            'height':
            data['height'],
            'width':
            data['width'],
            'num_detections':
            tf.shape(data['groundtruth_classes']),
            'image_info':
            image_info,
            'boxes':
            box_ops.denormalize_boxes(data['groundtruth_boxes'], image_shape),
            'classes':
            data['groundtruth_classes'],
            'areas':
            data['groundtruth_area'],
            'is_crowds':
            tf.cast(data['groundtruth_is_crowd'], tf.int32),
        }
        if 'groundtruth_attributes' in data:
            groundtruths['attributes'] = data['groundtruth_attributes']
        groundtruths['source_id'] = utils.process_source_id(
            groundtruths['source_id'])
        groundtruths = utils.pad_groundtruths_to_fixed_size(
            groundtruths, self._max_num_instances)

        # Packs labels for model_fn outputs.
        labels = {
            'cls_targets': cls_targets,
            'box_targets': box_targets,
            'anchor_boxes': anchor_boxes,
            'cls_weights': cls_weights,
            'box_weights': box_weights,
            'image_info': image_info,
            'groundtruths': groundtruths,
        }
        if att_targets:
            labels['attribute_targets'] = att_targets
        return image, labels
Пример #20
0
    def _parse_train_data(self, data):
        """Parses data for training and evaluation."""
        classes = data['groundtruth_classes']
        boxes = data['groundtruth_boxes']
        # If not empty, `attributes` is a dict of (name, ground_truth) pairs.
        # `ground_gruth` of attributes is assumed in shape [N, attribute_size].
        # TODO(xianzhi): support parsing attributes weights.
        attributes = data.get('groundtruth_attributes', {})
        is_crowds = data['groundtruth_is_crowd']

        # Skips annotations with `is_crowd` = True.
        if self._skip_crowd_during_training:
            num_groundtrtuhs = tf.shape(input=classes)[0]
            with tf.control_dependencies([num_groundtrtuhs, is_crowds]):
                indices = tf.cond(
                    pred=tf.greater(tf.size(input=is_crowds), 0),
                    true_fn=lambda: tf.where(tf.logical_not(is_crowds))[:, 0],
                    false_fn=lambda: tf.cast(tf.range(num_groundtrtuhs), tf.
                                             int64))
            classes = tf.gather(classes, indices)
            boxes = tf.gather(boxes, indices)
            for k, v in attributes.items():
                attributes[k] = tf.gather(v, indices)

        # Gets original image.
        image = data['image']

        # Apply autoaug or randaug.
        if self._augmenter is not None:
            image, boxes = self._augmenter.distort_with_boxes(image, boxes)

        image_shape = tf.shape(input=image)[0:2]

        # Normalizes image with mean and std pixel values.
        image = preprocess_ops.normalize_image(image)

        # Flips image randomly during training.
        if self._aug_rand_hflip:
            image, boxes, _ = preprocess_ops.random_horizontal_flip(
                image, boxes)

        # Converts boxes from normalized coordinates to pixel coordinates.
        boxes = box_ops.denormalize_boxes(boxes, image_shape)

        # Resizes and crops image.
        image, image_info = preprocess_ops.resize_and_crop_image(
            image,
            self._output_size,
            padded_size=preprocess_ops.compute_padded_size(
                self._output_size, 2**self._max_level),
            aug_scale_min=self._aug_scale_min,
            aug_scale_max=self._aug_scale_max)
        image_height, image_width, _ = image.get_shape().as_list()

        # Resizes and crops boxes.
        image_scale = image_info[2, :]
        offset = image_info[3, :]
        boxes = preprocess_ops.resize_and_crop_boxes(boxes, image_scale,
                                                     image_info[1, :], offset)
        # Filters out ground truth boxes that are all zeros.
        indices = box_ops.get_non_empty_box_indices(boxes)
        boxes = tf.gather(boxes, indices)
        classes = tf.gather(classes, indices)
        for k, v in attributes.items():
            attributes[k] = tf.gather(v, indices)

        # Assigns anchors.
        input_anchor = anchor.build_anchor_generator(
            min_level=self._min_level,
            max_level=self._max_level,
            num_scales=self._num_scales,
            aspect_ratios=self._aspect_ratios,
            anchor_size=self._anchor_size)
        anchor_boxes = input_anchor(image_size=(image_height, image_width))
        anchor_labeler = anchor.AnchorLabeler(self._match_threshold,
                                              self._unmatched_threshold)
        (cls_targets, box_targets, att_targets, cls_weights,
         box_weights) = anchor_labeler.label_anchors(
             anchor_boxes, boxes, tf.expand_dims(classes, axis=1), attributes)

        # Casts input image to desired data type.
        image = tf.cast(image, dtype=self._dtype)

        # Packs labels for model_fn outputs.
        labels = {
            'cls_targets': cls_targets,
            'box_targets': box_targets,
            'anchor_boxes': anchor_boxes,
            'cls_weights': cls_weights,
            'box_weights': box_weights,
            'image_info': image_info,
        }
        if att_targets:
            labels['attribute_targets'] = att_targets
        return image, labels
Пример #21
0
    def _parse_train_data(self, data):
        """Generates images and labels that are usable for model training.

    We use random flip, random scaling (between 0.6 to 1.3), cropping,
    and color jittering as data augmentation

    Args:
        data: the decoded tensor dictionary from TfExampleDecoder.

    Returns:
        images: the image tensor.
        labels: a dict of Tensors that contains labels.
    """

        image = tf.cast(data['image'], dtype=tf.float32)
        boxes = data['groundtruth_boxes']
        classes = data['groundtruth_classes']

        image_shape = tf.shape(input=image)[0:2]

        if self._aug_rand_hflip:
            image, boxes, _ = preprocess_ops.random_horizontal_flip(
                image, boxes)

        # Image augmentation
        if not self._odapi_augmentation:
            # Color and lighting jittering
            if self._aug_rand_hue:
                image = tf.image.random_hue(image=image, max_delta=.02)
            if self._aug_rand_contrast:
                image = tf.image.random_contrast(image=image,
                                                 lower=0.8,
                                                 upper=1.25)
            if self._aug_rand_saturation:
                image = tf.image.random_saturation(image=image,
                                                   lower=0.8,
                                                   upper=1.25)
            if self._aug_rand_brightness:
                image = tf.image.random_brightness(image=image, max_delta=.2)
            image = tf.clip_by_value(image,
                                     clip_value_min=0.0,
                                     clip_value_max=255.0)
            # Converts boxes from normalized coordinates to pixel coordinates.
            boxes = box_ops.denormalize_boxes(boxes, image_shape)

            # Resizes and crops image.
            image, image_info = preprocess_ops.resize_and_crop_image(
                image, [self._output_height, self._output_width],
                padded_size=[self._output_height, self._output_width],
                aug_scale_min=self._aug_scale_min,
                aug_scale_max=self._aug_scale_max)
            unpad_image_shape = tf.cast(tf.shape(image), tf.float32)

            # Resizes and crops boxes.
            image_scale = image_info[2, :]
            offset = image_info[3, :]
            boxes = preprocess_ops.resize_and_crop_boxes(
                boxes, image_scale, image_info[1, :], offset)

        else:
            # Color and lighting jittering
            if self._aug_rand_hue:
                image = cn_prep_ops.random_adjust_hue(image=image,
                                                      max_delta=.02)
            if self._aug_rand_contrast:
                image = cn_prep_ops.random_adjust_contrast(image=image,
                                                           min_delta=0.8,
                                                           max_delta=1.25)
            if self._aug_rand_saturation:
                image = cn_prep_ops.random_adjust_saturation(image=image,
                                                             min_delta=0.8,
                                                             max_delta=1.25)
            if self._aug_rand_brightness:
                image = cn_prep_ops.random_adjust_brightness(image=image,
                                                             max_delta=.2)

            sc_image, sc_boxes, classes = cn_prep_ops.random_square_crop_by_scale(
                image=image,
                boxes=boxes,
                labels=classes,
                scale_min=self._aug_scale_min,
                scale_max=self._aug_scale_max)

            image, unpad_image_shape = cn_prep_ops.resize_to_range(
                image=sc_image,
                min_dimension=self._output_width,
                max_dimension=self._output_width,
                pad_to_max_dimension=True)
            preprocessed_shape = tf.cast(tf.shape(image), tf.float32)
            unpad_image_shape = tf.cast(unpad_image_shape, tf.float32)

            im_box = tf.stack([
                0.0, 0.0, preprocessed_shape[0] / unpad_image_shape[0],
                preprocessed_shape[1] / unpad_image_shape[1]
            ])
            realigned_bboxes = box_list_ops.change_coordinate_frame(
                boxlist=box_list.BoxList(sc_boxes), window=im_box)

            valid_boxes = box_list_ops.assert_or_prune_invalid_boxes(
                realigned_bboxes.get())

            boxes = box_list_ops.to_absolute_coordinates(
                boxlist=box_list.BoxList(valid_boxes),
                height=self._output_height,
                width=self._output_width).get()

            image_info = tf.stack([
                tf.cast(image_shape, dtype=tf.float32),
                tf.constant([self._output_height, self._output_width],
                            dtype=tf.float32),
                tf.cast(tf.shape(sc_image)[0:2] / image_shape,
                        dtype=tf.float32),
                tf.constant([0., 0.])
            ])

        # Filters out ground truth boxes that are all zeros.
        indices = box_ops.get_non_empty_box_indices(boxes)
        boxes = tf.gather(boxes, indices)
        classes = tf.gather(classes, indices)

        labels = self._build_label(unpad_image_shape=unpad_image_shape,
                                   boxes=boxes,
                                   classes=classes,
                                   image_info=image_info,
                                   data=data)

        if self._bgr_ordering:
            red, green, blue = tf.unstack(image, num=3, axis=2)
            image = tf.stack([blue, green, red], axis=2)

        image = preprocess_ops.normalize_image(image=image,
                                               offset=self._channel_means,
                                               scale=self._channel_stds)

        image = tf.cast(image, self._dtype)

        return image, labels