Пример #1
0
  def _parse_train_data(self, data):
    """Parses data for training and evaluation."""
    image, label = self._prepare_image_and_label(data)

    # Flips image randomly during training.
    if self._aug_rand_hflip:
      image, label = preprocess_ops.random_horizontal_flip(image, masks=label)



    image = tf.image.resize(image, tf.cast([256, 256], tf.int32))
    label = tf.image.resize(label, tf.cast([256, 256], tf.int32))

    # (gunho) random crop both image and mask
    image_mask = tf.concat([image, label], axis=2)
    image_mask_crop = tf.image.random_crop(image_mask,
                                           self._output_size + [4])
    image = image_mask_crop[:, :, :-1]
    label = image_mask_crop[:, :,-1]


    # Cast image as self._dtype
    image = tf.cast(image, dtype=self._dtype)

    return image, label
Пример #2
0
    def _parse_train_data(self, data):
        """Parses data for training."""

        # Initialize the shape constants.
        image = data['image']
        boxes = data['groundtruth_boxes']
        classes = data['groundtruth_classes']

        if self._random_flip:
            # Randomly flip the image horizontally.
            image, boxes, _ = preprocess_ops.random_horizontal_flip(
                image, boxes, seed=self._seed)

        if not data['is_mosaic']:
            image, infos, affine = self._jitter_scale(
                image, [self._image_h, self._image_w], self._letter_box,
                self._jitter, self._random_pad, self._aug_scale_min,
                self._aug_scale_max, self._aug_rand_translate,
                self._aug_rand_angle, self._aug_rand_perspective)

            # Clip and clean boxes.
            boxes, inds = preprocessing_ops.transform_and_clip_boxes(
                boxes,
                infos,
                affine=affine,
                shuffle_boxes=False,
                area_thresh=self._area_thresh,
                augment=True,
                seed=self._seed)
            classes = tf.gather(classes, inds)
            info = infos[-1]
        else:
            image = tf.image.resize(image, (self._image_h, self._image_w),
                                    method='nearest')
            output_size = tf.cast([640, 640], tf.float32)
            boxes_ = bbox_ops.denormalize_boxes(boxes, output_size)
            inds = bbox_ops.get_non_empty_box_indices(boxes_)
            boxes = tf.gather(boxes, inds)
            classes = tf.gather(classes, inds)
            info = self._pad_infos_object(image)

        # Apply scaling to the hue saturation and brightness of an image.
        image = tf.cast(image, dtype=self._dtype)
        image = image / 255.0
        image = preprocessing_ops.image_rand_hsv(image,
                                                 self._aug_rand_hue,
                                                 self._aug_rand_saturation,
                                                 self._aug_rand_brightness,
                                                 seed=self._seed,
                                                 darknet=self._darknet)

        # Cast the image to the selcted datatype.
        image, labels = self._build_label(image,
                                          boxes,
                                          classes,
                                          info,
                                          inds,
                                          data,
                                          is_training=True)
        return image, labels
    def _parse_train_data(self, data):
        """Parses data for training and evaluation."""
        image, label = self._prepare_image_and_label(data)

        if self._train_on_crops:
            if data['image/height'] < self._output_size[0] or data[
                    'image/width'] < self._output_size[1]:
                raise ValueError(
                    'Image size has to be larger than crop size (output_size)')
            label = tf.reshape(label,
                               [data['image/height'], data['image/width'], 1])
            image_mask = tf.concat([image, label], axis=2)
            image_mask_crop = tf.image.random_crop(image_mask,
                                                   self._output_size + [4])
            image = image_mask_crop[:, :, :-1]
            label = tf.reshape(image_mask_crop[:, :, -1],
                               [1] + self._output_size)

        # Flips image randomly during training.
        if self._aug_rand_hflip:
            image, _, label = preprocess_ops.random_horizontal_flip(
                image, masks=label)

        # Resizes and crops image.
        image, image_info = preprocess_ops.resize_and_crop_image(
            image,
            self._output_size,
            self._output_size,
            aug_scale_min=self._aug_scale_min,
            aug_scale_max=self._aug_scale_max)

        # Resizes and crops boxes.
        image_scale = image_info[2, :]
        offset = image_info[3, :]

        # Pad label and make sure the padded region assigned to the ignore label.
        # The label is first offset by +1 and then padded with 0.
        label += 1
        label = tf.expand_dims(label, axis=3)
        label = preprocess_ops.resize_and_crop_masks(label, image_scale,
                                                     self._output_size, offset)
        label -= 1
        label = tf.where(tf.equal(label, -1),
                         self._ignore_label * tf.ones_like(label), label)
        label = tf.squeeze(label, axis=0)
        valid_mask = tf.not_equal(label, self._ignore_label)
        labels = {
            'masks': label,
            'valid_masks': valid_mask,
            'image_info': image_info,
        }

        # Cast image as self._dtype
        image = tf.cast(image, dtype=self._dtype)

        return image, labels
Пример #4
0
  def _parse_eval_data(self, data):
    """Parses data for training and evaluation."""
    image, label = self._prepare_image_and_label(data)
    # Flips image randomly during training.
    if self._aug_rand_hflip:
      image, label = preprocess_ops.random_horizontal_flip(image, masks=label)



    image = tf.image.resize(image, tf.cast([256, 256], tf.int32))
    label = tf.image.resize(label, tf.cast([256, 256], tf.int32))

    # (gunho) No random crop for evaluation

    # Cast image as self._dtype
    image = tf.cast(image, dtype=self._dtype)

    return image, label
Пример #5
0
    def _parse_train_data(self, data):
        """Parses data for training and evaluation."""
        image, label = self._prepare_image_and_label(data)

        # Flips image randomly during training.
        if self._aug_rand_hflip:
            image, label = preprocess_ops.random_horizontal_flip(image,
                                                                 masks=label)

        # Resizes and crops image.
        image, image_info = preprocess_ops.resize_and_crop_image(
            image,
            self._output_size,
            self._output_size,
            aug_scale_min=self._aug_scale_min,
            aug_scale_max=self._aug_scale_max)

        # Resizes and crops boxes.
        image_scale = image_info[2, :]
        offset = image_info[3, :]

        # Pad label and make sure the padded region assigned to the ignore label.
        # The label is first offset by +1 and then padded with 0.
        label += 1
        label = tf.expand_dims(label, axis=3)
        label = preprocess_ops.resize_and_crop_masks(label, image_scale,
                                                     self._output_size, offset)
        label -= 1
        label = tf.where(tf.equal(label, -1),
                         self._ignore_label * tf.ones_like(label), label)
        label = tf.squeeze(label, axis=0)
        valid_mask = tf.not_equal(label, self._ignore_label)
        labels = {
            'masks': label,
            'valid_masks': valid_mask,
            'image_info': image_info,
        }

        # Cast image as self._dtype
        image = tf.cast(image, dtype=self._dtype)

        return image, labels
Пример #6
0
    def _augment_image(self,
                       image,
                       boxes,
                       classes,
                       is_crowd,
                       area,
                       xs=0.0,
                       ys=0.0,
                       cut=None):
        """Process a single image prior to the application of patching."""
        if self._random_flip:
            # Randomly flip the image horizontally.
            image, boxes, _ = preprocess_ops.random_horizontal_flip(
                image, boxes, seed=self._seed)

        # Augment the image without resizing
        image, infos, crop_points = preprocessing_ops.resize_and_jitter_image(
            image, [self._output_size[0], self._output_size[1]],
            random_pad=False,
            letter_box=self._letter_box,
            jitter=self._random_crop,
            shiftx=xs,
            shifty=ys,
            cut=cut,
            seed=self._seed)

        # Clip and clean boxes.
        boxes, inds = preprocessing_ops.transform_and_clip_boxes(
            boxes,
            infos,
            area_thresh=self._area_thresh,
            shuffle_boxes=False,
            filter_and_clip_boxes=True,
            seed=self._seed)
        classes, is_crowd, area = self._select_ind(inds, classes, is_crowd,
                                                   area)  # pylint:disable=unbalanced-tuple-unpacking
        return image, boxes, classes, is_crowd, area, crop_points
Пример #7
0
    def _parse_train_data(self, data):
        """Generates images and labels that are usable for model training.

    Args:
      data: a dict of Tensors produced by the decoder.
    Returns:
      images: the image tensor.
      labels: a dict of Tensors that contains labels.
    """

        shape = tf.shape(data['image'])
        image = data['image'] / 255
        boxes = data['groundtruth_boxes']
        width = shape[0]
        height = shape[1]

        image, boxes = yolo_preprocess_ops.fit_preserve_aspect_ratio(
            image,
            boxes,
            width=width,
            height=height,
            target_dim=self._max_process_size)

        image_shape = tf.shape(image)[:2]

        if self._random_flip:
            image, boxes, _ = preprocess_ops.random_horizontal_flip(
                image, boxes, seed=self._seed)

        randscale = self._image_w // self._net_down_scale

        if not self._fixed_size:
            do_scale = tf.greater(
                tf.random.uniform([], minval=0, maxval=1, seed=self._seed),
                0.5)
            if do_scale:
                # This scales the image to a random multiple of net_down_scale
                # between 320 to 608
                randscale = tf.random.uniform(
                    [],
                    minval=self._min_process_size // self._net_down_scale,
                    maxval=self._max_process_size // self._net_down_scale,
                    seed=self._seed,
                    dtype=tf.int32) * self._net_down_scale

        if self._jitter_boxes != 0.0:
            boxes = box_ops.denormalize_boxes(boxes, image_shape)
            boxes = box_ops.jitter_boxes(boxes, 0.025)
            boxes = box_ops.normalize_boxes(boxes, image_shape)

        # YOLO loss function uses x-center, y-center format
        boxes = yolo_box_ops.yxyx_to_xcycwh(boxes)

        if self._jitter_im != 0.0:
            image, boxes = yolo_preprocess_ops.random_translate(
                image, boxes, self._jitter_im, seed=self._seed)

        if self._aug_rand_zoom:
            image, boxes = yolo_preprocess_ops.resize_crop_filter(
                image,
                boxes,
                default_width=self._image_w,
                default_height=self._image_h,
                target_width=randscale,
                target_height=randscale)
        image = tf.image.resize(image, (416, 416), preserve_aspect_ratio=False)

        if self._aug_rand_brightness:
            image = tf.image.random_brightness(image=image,
                                               max_delta=.1)  # Brightness
        if self._aug_rand_saturation:
            image = tf.image.random_saturation(image=image,
                                               lower=0.75,
                                               upper=1.25)  # Saturation
        if self._aug_rand_hue:
            image = tf.image.random_hue(image=image, max_delta=.3)  # Hue
        image = tf.clip_by_value(image, 0.0, 1.0)
        # Find the best anchor for the ground truth labels to maximize the iou
        best_anchors = yolo_preprocess_ops.get_best_anchor(
            boxes, self._anchors, width=self._image_w, height=self._image_h)

        # Padding
        boxes = preprocess_ops.clip_or_pad_to_fixed_size(
            boxes, self._max_num_instances, 0)
        classes = preprocess_ops.clip_or_pad_to_fixed_size(
            data['groundtruth_classes'], self._max_num_instances, -1)
        best_anchors = preprocess_ops.clip_or_pad_to_fixed_size(
            best_anchors, self._max_num_instances, 0)
        area = preprocess_ops.clip_or_pad_to_fixed_size(
            data['groundtruth_area'], self._max_num_instances, 0)
        is_crowd = preprocess_ops.clip_or_pad_to_fixed_size(
            tf.cast(data['groundtruth_is_crowd'], tf.int32),
            self._max_num_instances, 0)

        labels = {
            'source_id': data['source_id'],
            'bbox': tf.cast(boxes, self._dtype),
            'classes': tf.cast(classes, self._dtype),
            'area': tf.cast(area, self._dtype),
            'is_crowd': is_crowd,
            'best_anchors': tf.cast(best_anchors, self._dtype),
            'width': width,
            'height': height,
            'num_detections': tf.shape(data['groundtruth_classes'])[0],
        }

        if self._fixed_size:
            grid = self._build_grid(labels,
                                    self._image_w,
                                    use_tie_breaker=self._use_tie_breaker)
            labels.update({'grid_form': grid})

        return image, labels
Пример #8
0
    def _parse_train_data(self, data):
        """Parses data for training.

    Args:
      data: the decoded tensor dictionary from TfExampleDecoder.

    Returns:
      image: image tensor that is preproessed to have normalized value and
        dimension [output_size[0], output_size[1], 3]
      labels: a dictionary of tensors used for training. The following describes
        {key: value} pairs in the dictionary.
        image_info: a 2D `Tensor` that encodes the information of the image and
          the applied preprocessing. It is in the format of
          [[original_height, original_width], [scaled_height, scaled_width],
        anchor_boxes: ordered dictionary with keys
          [min_level, min_level+1, ..., max_level]. The values are tensor with
          shape [height_l, width_l, 4] representing anchor boxes at each level.
        rpn_score_targets: ordered dictionary with keys
          [min_level, min_level+1, ..., max_level]. The values are tensor with
          shape [height_l, width_l, anchors_per_location]. The height_l and
          width_l represent the dimension of class logits at l-th level.
        rpn_box_targets: ordered dictionary with keys
          [min_level, min_level+1, ..., max_level]. The values are tensor with
          shape [height_l, width_l, anchors_per_location * 4]. The height_l and
          width_l represent the dimension of bounding box regression output at
          l-th level.
        gt_boxes: Groundtruth bounding box annotations. The box is represented
           in [y1, x1, y2, x2] format. The coordinates are w.r.t the scaled
           image that is fed to the network. The tennsor is padded with -1 to
           the fixed dimension [self._max_num_instances, 4].
        gt_classes: Groundtruth classes annotations. The tennsor is padded
          with -1 to the fixed dimension [self._max_num_instances].
        gt_masks: groundtrugh masks cropped by the bounding box and
          resized to a fixed size determined by mask_crop_size.
    """
        classes = data['groundtruth_classes']
        boxes = data['groundtruth_boxes']
        if self._include_mask:
            masks = data['groundtruth_instance_masks']

        is_crowds = data['groundtruth_is_crowd']
        # Skips annotations with `is_crowd` = True.
        if self._skip_crowd_during_training:
            num_groundtruths = tf.shape(classes)[0]
            with tf.control_dependencies([num_groundtruths, is_crowds]):
                indices = tf.cond(
                    tf.greater(tf.size(is_crowds), 0),
                    lambda: tf.where(tf.logical_not(is_crowds))[:, 0],
                    lambda: tf.cast(tf.range(num_groundtruths), tf.int64))
            classes = tf.gather(classes, indices)
            boxes = tf.gather(boxes, indices)
            if self._include_mask:
                masks = tf.gather(masks, indices)

        # Gets original image and its size.
        image = data['image']
        image_shape = tf.shape(image)[0:2]

        # Normalizes image with mean and std pixel values.
        image = preprocess_ops.normalize_image(image)

        # Flips image randomly during training.
        if self._aug_rand_hflip:
            if self._include_mask:
                image, boxes, masks = preprocess_ops.random_horizontal_flip(
                    image, boxes, masks)
            else:
                image, boxes, _ = preprocess_ops.random_horizontal_flip(
                    image, boxes)

        # Converts boxes from normalized coordinates to pixel coordinates.
        # Now the coordinates of boxes are w.r.t. the original image.
        boxes = box_ops.denormalize_boxes(boxes, image_shape)

        # Resizes and crops image.
        image, image_info = preprocess_ops.resize_and_crop_image(
            image,
            self._output_size,
            padded_size=preprocess_ops.compute_padded_size(
                self._output_size, 2**self._max_level),
            aug_scale_min=self._aug_scale_min,
            aug_scale_max=self._aug_scale_max)
        image_height, image_width, _ = image.get_shape().as_list()

        # Resizes and crops boxes.
        # Now the coordinates of boxes are w.r.t the scaled image.
        image_scale = image_info[2, :]
        offset = image_info[3, :]
        boxes = preprocess_ops.resize_and_crop_boxes(boxes, image_scale,
                                                     image_info[1, :], offset)

        # Filters out ground truth boxes that are all zeros.
        indices = box_ops.get_non_empty_box_indices(boxes)
        boxes = tf.gather(boxes, indices)
        classes = tf.gather(classes, indices)
        if self._include_mask:
            masks = tf.gather(masks, indices)
            # Transfer boxes to the original image space and do normalization.
            cropped_boxes = boxes + tf.tile(tf.expand_dims(offset, axis=0),
                                            [1, 2])
            cropped_boxes /= tf.tile(tf.expand_dims(image_scale, axis=0),
                                     [1, 2])
            cropped_boxes = box_ops.normalize_boxes(cropped_boxes, image_shape)
            num_masks = tf.shape(masks)[0]
            masks = tf.image.crop_and_resize(
                tf.expand_dims(masks, axis=-1),
                cropped_boxes,
                box_indices=tf.range(num_masks, dtype=tf.int32),
                crop_size=[self._mask_crop_size, self._mask_crop_size],
                method='bilinear')
            masks = tf.squeeze(masks, axis=-1)

        # Assigns anchor targets.
        # Note that after the target assignment, box targets are absolute pixel
        # offsets w.r.t. the scaled image.
        input_anchor = anchor.build_anchor_generator(
            min_level=self._min_level,
            max_level=self._max_level,
            num_scales=self._num_scales,
            aspect_ratios=self._aspect_ratios,
            anchor_size=self._anchor_size)
        anchor_boxes = input_anchor(image_size=(image_height, image_width))
        anchor_labeler = anchor.RpnAnchorLabeler(self._rpn_match_threshold,
                                                 self._rpn_unmatched_threshold,
                                                 self._rpn_batch_size_per_im,
                                                 self._rpn_fg_fraction)
        rpn_score_targets, rpn_box_targets = anchor_labeler.label_anchors(
            anchor_boxes, boxes,
            tf.cast(tf.expand_dims(classes, axis=-1), dtype=tf.float32))

        # Casts input image to self._dtype
        image = tf.cast(image, dtype=self._dtype)

        # Packs labels for model_fn outputs.
        labels = {
            'anchor_boxes':
            anchor_boxes,
            'image_info':
            image_info,
            'rpn_score_targets':
            rpn_score_targets,
            'rpn_box_targets':
            rpn_box_targets,
            'gt_boxes':
            preprocess_ops.clip_or_pad_to_fixed_size(boxes,
                                                     self._max_num_instances,
                                                     -1),
            'gt_classes':
            preprocess_ops.clip_or_pad_to_fixed_size(classes,
                                                     self._max_num_instances,
                                                     -1),
        }
        if self._include_mask:
            labels['gt_masks'] = preprocess_ops.clip_or_pad_to_fixed_size(
                masks, self._max_num_instances, -1)

        return image, labels
Пример #9
0
    def _parse_train_data(self, data):
        """Parses data for training and evaluation."""
        classes = data['groundtruth_classes']
        boxes = data['groundtruth_boxes']
        # If not empty, `attributes` is a dict of (name, ground_truth) pairs.
        # `ground_gruth` of attributes is assumed in shape [N, attribute_size].
        # TODO(xianzhi): support parsing attributes weights.
        attributes = data.get('groundtruth_attributes', {})
        is_crowds = data['groundtruth_is_crowd']

        # Skips annotations with `is_crowd` = True.
        if self._skip_crowd_during_training:
            num_groundtrtuhs = tf.shape(input=classes)[0]
            with tf.control_dependencies([num_groundtrtuhs, is_crowds]):
                indices = tf.cond(
                    pred=tf.greater(tf.size(input=is_crowds), 0),
                    true_fn=lambda: tf.where(tf.logical_not(is_crowds))[:, 0],
                    false_fn=lambda: tf.cast(tf.range(num_groundtrtuhs), tf.
                                             int64))
            classes = tf.gather(classes, indices)
            boxes = tf.gather(boxes, indices)
            for k, v in attributes.items():
                attributes[k] = tf.gather(v, indices)

        # Gets original image.
        image = data['image']

        # Apply autoaug or randaug.
        if self._augmenter is not None:
            image, boxes = self._augmenter.distort_with_boxes(image, boxes)

        image_shape = tf.shape(input=image)[0:2]

        # Normalizes image with mean and std pixel values.
        image = preprocess_ops.normalize_image(image)

        # Flips image randomly during training.
        if self._aug_rand_hflip:
            image, boxes, _ = preprocess_ops.random_horizontal_flip(
                image, boxes)

        # Converts boxes from normalized coordinates to pixel coordinates.
        boxes = box_ops.denormalize_boxes(boxes, image_shape)

        # Resizes and crops image.
        image, image_info = preprocess_ops.resize_and_crop_image(
            image,
            self._output_size,
            padded_size=preprocess_ops.compute_padded_size(
                self._output_size, 2**self._max_level),
            aug_scale_min=self._aug_scale_min,
            aug_scale_max=self._aug_scale_max)
        image_height, image_width, _ = image.get_shape().as_list()

        # Resizes and crops boxes.
        image_scale = image_info[2, :]
        offset = image_info[3, :]
        boxes = preprocess_ops.resize_and_crop_boxes(boxes, image_scale,
                                                     image_info[1, :], offset)
        # Filters out ground truth boxes that are all zeros.
        indices = box_ops.get_non_empty_box_indices(boxes)
        boxes = tf.gather(boxes, indices)
        classes = tf.gather(classes, indices)
        for k, v in attributes.items():
            attributes[k] = tf.gather(v, indices)

        # Assigns anchors.
        input_anchor = anchor.build_anchor_generator(
            min_level=self._min_level,
            max_level=self._max_level,
            num_scales=self._num_scales,
            aspect_ratios=self._aspect_ratios,
            anchor_size=self._anchor_size)
        anchor_boxes = input_anchor(image_size=(image_height, image_width))
        anchor_labeler = anchor.AnchorLabeler(self._match_threshold,
                                              self._unmatched_threshold)
        (cls_targets, box_targets, att_targets, cls_weights,
         box_weights) = anchor_labeler.label_anchors(
             anchor_boxes, boxes, tf.expand_dims(classes, axis=1), attributes)

        # Casts input image to desired data type.
        image = tf.cast(image, dtype=self._dtype)

        # Packs labels for model_fn outputs.
        labels = {
            'cls_targets': cls_targets,
            'box_targets': box_targets,
            'anchor_boxes': anchor_boxes,
            'cls_weights': cls_weights,
            'box_weights': box_weights,
            'image_info': image_info,
        }
        if att_targets:
            labels['attribute_targets'] = att_targets
        return image, labels
Пример #10
0
    def _parse_train_data(self, data):
        """Parses data for training and evaluation."""
        classes = data['groundtruth_classes']
        boxes = data['groundtruth_boxes']
        is_crowds = data['groundtruth_is_crowd']
        # Skips annotations with `is_crowd` = True.
        if self._skip_crowd_during_training:
            num_groundtrtuhs = tf.shape(input=classes)[0]
            with tf.control_dependencies([num_groundtrtuhs, is_crowds]):
                indices = tf.cond(
                    pred=tf.greater(tf.size(input=is_crowds), 0),
                    true_fn=lambda: tf.where(tf.logical_not(is_crowds))[:, 0],
                    false_fn=lambda: tf.cast(tf.range(num_groundtrtuhs), tf.
                                             int64))
            classes = tf.gather(classes, indices)
            boxes = tf.gather(boxes, indices)

        # Gets original image and its size.
        image = data['image']

        image_shape = tf.shape(input=image)[0:2]

        # Normalizes image with mean and std pixel values.
        image = preprocess_ops.normalize_image(image)

        # Flips image randomly during training.
        if self._aug_rand_hflip:
            image, boxes, _ = preprocess_ops.random_horizontal_flip(
                image, boxes)

        # Converts boxes from normalized coordinates to pixel coordinates.
        boxes = box_ops.denormalize_boxes(boxes, image_shape)

        # Resizes and crops image.
        image, image_info = preprocess_ops.resize_and_crop_image(
            image,
            self._output_size,
            padded_size=preprocess_ops.compute_padded_size(
                self._output_size, 2**self._max_level),
            aug_scale_min=self._aug_scale_min,
            aug_scale_max=self._aug_scale_max)
        image_height, image_width, _ = image.get_shape().as_list()

        # Resizes and crops boxes.
        image_scale = image_info[2, :]
        offset = image_info[3, :]
        boxes = preprocess_ops.resize_and_crop_boxes(boxes, image_scale,
                                                     image_info[1, :], offset)
        # Filters out ground truth boxes that are all zeros.
        indices = box_ops.get_non_empty_box_indices(boxes)
        boxes = tf.gather(boxes, indices)
        classes = tf.gather(classes, indices)

        # Assigns anchors.
        input_anchor = anchor.build_anchor_generator(
            min_level=self._min_level,
            max_level=self._max_level,
            num_scales=self._num_scales,
            aspect_ratios=self._aspect_ratios,
            anchor_size=self._anchor_size)
        anchor_boxes = input_anchor(image_size=(image_height, image_width))
        anchor_labeler = anchor.AnchorLabeler(self._match_threshold,
                                              self._unmatched_threshold)
        (cls_targets, box_targets, cls_weights,
         box_weights) = anchor_labeler.label_anchors(
             anchor_boxes, boxes, tf.expand_dims(classes, axis=1))

        # If bfloat16 is used, casts input image to tf.bfloat16.
        if self._use_bfloat16:
            image = tf.cast(image, dtype=tf.bfloat16)

        # Packs labels for model_fn outputs.
        labels = {
            'cls_targets': cls_targets,
            'box_targets': box_targets,
            'anchor_boxes': anchor_boxes,
            'cls_weights': cls_weights,
            'box_weights': box_weights,
            'image_info': image_info,
        }
        return image, labels
Пример #11
0
    def _parse_train_data(self, data):
        """Parses data for training.

    Args:
      data: the decoded tensor dictionary from TfExampleDecoder.

    Returns:
      image: image tensor that is preproessed to have normalized value and
        dimension [output_size[0], output_size[1], 3]
      labels: a dictionary of tensors used for training. The following describes
        {key: value} pairs in the dictionary.
        image_info: a 2D `Tensor` that encodes the information of the image and
          the applied preprocessing. It is in the format of
          [[original_height, original_width], [scaled_height, scaled_width]],
        anchor_boxes: ordered dictionary with keys
          [min_level, min_level+1, ..., max_level]. The values are tensor with
          shape [height_l, width_l, 4] representing anchor boxes at each level.
        rpn_score_targets: ordered dictionary with keys
          [min_level, min_level+1, ..., max_level]. The values are tensor with
          shape [height_l, width_l, anchors_per_location]. The height_l and
          width_l represent the dimension of class logits at l-th level.
        rpn_box_targets: ordered dictionary with keys
          [min_level, min_level+1, ..., max_level]. The values are tensor with
          shape [height_l, width_l, anchors_per_location * 4]. The height_l and
          width_l represent the dimension of bounding box regression output at
          l-th level.
        gt_boxes: Groundtruth bounding box annotations. The box is represented
           in [y1, x1, y2, x2] format. The coordinates are w.r.t the scaled
           image that is fed to the network. The tennsor is padded with -1 to
           the fixed dimension [self._max_num_instances, 4].
        gt_classes: Groundtruth classes annotations. The tennsor is padded
          with -1 to the fixed dimension [self._max_num_instances].
        gt_masks: Groundtruth masks cropped by the bounding box and
          resized to a fixed size determined by mask_crop_size.
        gt_segmentation_mask: Groundtruth mask for segmentation head, this is
          resized to a fixed size determined by output_size.
        gt_segmentation_valid_mask: Binary mask that marks the pixels that
          are supposed to be used in computing the segmentation loss while
          training.
    """
        segmentation_mask = data['groundtruth_segmentation_mask']

        # Flips image randomly during training.
        if self.aug_rand_hflip:
            masks = data['groundtruth_instance_masks']
            image_mask = tf.concat([data['image'], segmentation_mask], axis=2)

            image_mask, boxes, masks = preprocess_ops.random_horizontal_flip(
                image_mask, data['groundtruth_boxes'], masks)

            segmentation_mask = image_mask[:, :, -1:]
            image = image_mask[:, :, :-1]

            data['image'] = image
            data['boxes'] = boxes
            data['masks'] = masks

        image, labels = super(Parser, self)._parse_train_data(data)

        image_info = labels['image_info']
        image_scale = image_info[2, :]
        offset = image_info[3, :]

        segmentation_mask = tf.reshape(
            segmentation_mask, shape=[1, data['height'], data['width']])
        segmentation_mask = tf.cast(segmentation_mask, tf.float32)

        # Pad label and make sure the padded region assigned to the ignore label.
        # The label is first offset by +1 and then padded with 0.
        segmentation_mask += 1
        segmentation_mask = tf.expand_dims(segmentation_mask, axis=3)
        segmentation_mask = preprocess_ops.resize_and_crop_masks(
            segmentation_mask, image_scale, self._output_size, offset)
        segmentation_mask -= 1
        segmentation_mask = tf.where(
            tf.equal(segmentation_mask, -1),
            self._segmentation_ignore_label * tf.ones_like(segmentation_mask),
            segmentation_mask)
        segmentation_mask = tf.squeeze(segmentation_mask, axis=0)
        segmentation_valid_mask = tf.not_equal(segmentation_mask,
                                               self._segmentation_ignore_label)

        labels.update({
            'gt_segmentation_mask': segmentation_mask,
            'gt_segmentation_valid_mask': segmentation_valid_mask
        })

        return image, labels
Пример #12
0
    def _parse_train_data(self, data):
        """Generates images and labels that are usable for model training.

    We use random flip, random scaling (between 0.6 to 1.3), cropping,
    and color jittering as data augmentation

    Args:
        data: the decoded tensor dictionary from TfExampleDecoder.

    Returns:
        images: the image tensor.
        labels: a dict of Tensors that contains labels.
    """

        image = tf.cast(data['image'], dtype=tf.float32)
        boxes = data['groundtruth_boxes']
        classes = data['groundtruth_classes']

        image_shape = tf.shape(input=image)[0:2]

        if self._aug_rand_hflip:
            image, boxes, _ = preprocess_ops.random_horizontal_flip(
                image, boxes)

        # Image augmentation
        if not self._odapi_augmentation:
            # Color and lighting jittering
            if self._aug_rand_hue:
                image = tf.image.random_hue(image=image, max_delta=.02)
            if self._aug_rand_contrast:
                image = tf.image.random_contrast(image=image,
                                                 lower=0.8,
                                                 upper=1.25)
            if self._aug_rand_saturation:
                image = tf.image.random_saturation(image=image,
                                                   lower=0.8,
                                                   upper=1.25)
            if self._aug_rand_brightness:
                image = tf.image.random_brightness(image=image, max_delta=.2)
            image = tf.clip_by_value(image,
                                     clip_value_min=0.0,
                                     clip_value_max=255.0)
            # Converts boxes from normalized coordinates to pixel coordinates.
            boxes = box_ops.denormalize_boxes(boxes, image_shape)

            # Resizes and crops image.
            image, image_info = preprocess_ops.resize_and_crop_image(
                image, [self._output_height, self._output_width],
                padded_size=[self._output_height, self._output_width],
                aug_scale_min=self._aug_scale_min,
                aug_scale_max=self._aug_scale_max)
            unpad_image_shape = tf.cast(tf.shape(image), tf.float32)

            # Resizes and crops boxes.
            image_scale = image_info[2, :]
            offset = image_info[3, :]
            boxes = preprocess_ops.resize_and_crop_boxes(
                boxes, image_scale, image_info[1, :], offset)

        else:
            # Color and lighting jittering
            if self._aug_rand_hue:
                image = cn_prep_ops.random_adjust_hue(image=image,
                                                      max_delta=.02)
            if self._aug_rand_contrast:
                image = cn_prep_ops.random_adjust_contrast(image=image,
                                                           min_delta=0.8,
                                                           max_delta=1.25)
            if self._aug_rand_saturation:
                image = cn_prep_ops.random_adjust_saturation(image=image,
                                                             min_delta=0.8,
                                                             max_delta=1.25)
            if self._aug_rand_brightness:
                image = cn_prep_ops.random_adjust_brightness(image=image,
                                                             max_delta=.2)

            sc_image, sc_boxes, classes = cn_prep_ops.random_square_crop_by_scale(
                image=image,
                boxes=boxes,
                labels=classes,
                scale_min=self._aug_scale_min,
                scale_max=self._aug_scale_max)

            image, unpad_image_shape = cn_prep_ops.resize_to_range(
                image=sc_image,
                min_dimension=self._output_width,
                max_dimension=self._output_width,
                pad_to_max_dimension=True)
            preprocessed_shape = tf.cast(tf.shape(image), tf.float32)
            unpad_image_shape = tf.cast(unpad_image_shape, tf.float32)

            im_box = tf.stack([
                0.0, 0.0, preprocessed_shape[0] / unpad_image_shape[0],
                preprocessed_shape[1] / unpad_image_shape[1]
            ])
            realigned_bboxes = box_list_ops.change_coordinate_frame(
                boxlist=box_list.BoxList(sc_boxes), window=im_box)

            valid_boxes = box_list_ops.assert_or_prune_invalid_boxes(
                realigned_bboxes.get())

            boxes = box_list_ops.to_absolute_coordinates(
                boxlist=box_list.BoxList(valid_boxes),
                height=self._output_height,
                width=self._output_width).get()

            image_info = tf.stack([
                tf.cast(image_shape, dtype=tf.float32),
                tf.constant([self._output_height, self._output_width],
                            dtype=tf.float32),
                tf.cast(tf.shape(sc_image)[0:2] / image_shape,
                        dtype=tf.float32),
                tf.constant([0., 0.])
            ])

        # Filters out ground truth boxes that are all zeros.
        indices = box_ops.get_non_empty_box_indices(boxes)
        boxes = tf.gather(boxes, indices)
        classes = tf.gather(classes, indices)

        labels = self._build_label(unpad_image_shape=unpad_image_shape,
                                   boxes=boxes,
                                   classes=classes,
                                   image_info=image_info,
                                   data=data)

        if self._bgr_ordering:
            red, green, blue = tf.unstack(image, num=3, axis=2)
            image = tf.stack([blue, green, red], axis=2)

        image = preprocess_ops.normalize_image(image=image,
                                               offset=self._channel_means,
                                               scale=self._channel_stds)

        image = tf.cast(image, self._dtype)

        return image, labels
Пример #13
0
    def _parse_train_data(self, data):
        """Parses data for training and evaluation."""
        image, label = self._prepare_image_and_label(data)

        if self._crop_size:

            label = tf.reshape(label,
                               [data['image/height'], data['image/width'], 1])
            # If output_size is specified, resize image, and label to desired
            # output_size.
            if self._output_size:
                image = tf.image.resize(image,
                                        self._output_size,
                                        method='bilinear')
                label = tf.image.resize(label,
                                        self._output_size,
                                        method='nearest')

            image_mask = tf.concat([image, label], axis=2)
            image_mask_crop = tf.image.random_crop(image_mask,
                                                   self._crop_size + [4])
            image = image_mask_crop[:, :, :-1]
            label = tf.reshape(image_mask_crop[:, :, -1],
                               [1] + self._crop_size)

        # Flips image randomly during training.
        if self._aug_rand_hflip:
            image, _, label = preprocess_ops.random_horizontal_flip(
                image, masks=label)

        train_image_size = self._crop_size if self._crop_size else self._output_size
        # Rotates image randomly during training
        if self._rotate_min != 0.0 and \
          self._rotate_max != 0.0 and \
          self._rotate_min < self._rotate_max:
            image, label = preprocess_ops.random_rotation(
                image,
                masks=label,
                rotate_max=self._rotate_max,
                rotate_min=self._rotate_min,
                ignore_label=self._ignore_label)

        # Resizes and crops image.
        image, image_info = preprocess_ops.resize_and_crop_image(
            image,
            train_image_size,
            train_image_size,
            aug_scale_min=self._aug_scale_min,
            aug_scale_max=self._aug_scale_max,
            preserve_aspect_ratio=self._preserve_aspect_ratio)

        # Modify brightness randomly during training
        if self._bright_min != 1.0 and \
          self._bright_max != 1.0 and \
          self._bright_min < self._bright_max:
            image = preprocess_ops.random_brightness(
                image,
                bright_min=self._bright_min,
                bright_max=self._bright_max)

        # Resizes and crops boxes.
        image_scale = image_info[2, :]
        offset = image_info[3, :]

        # Pad label and make sure the padded region assigned to the ignore label.
        # The label is first offset by +1 and then padded with 0.
        label += 1
        label = tf.expand_dims(label, axis=3)
        label = preprocess_ops.resize_and_crop_masks(label, image_scale,
                                                     train_image_size, offset)
        label -= 1
        label = tf.where(tf.equal(label, -1),
                         self._ignore_label * tf.ones_like(label), label)
        label = tf.squeeze(label, axis=0)

        # Apply randaug
        if self._augmenter is not None:
            image, label = self._augmenter.distort_image_and_mask(
                image, label, self._ignore_label)

        valid_mask = tf.not_equal(label, self._ignore_label)
        labels = {
            'masks': label,
            'valid_masks': valid_mask,
            'image_info': image_info,
        }

        # Normalizes image with mean and std pixel values.
        # Must be done after augmenter since certain ops rely on uint8
        image = preprocess_ops.normalize_image(image,
                                               offset=MEAN_RGB,
                                               scale=STDDEV_RGB)

        # Cast image as self._dtype
        image = tf.cast(image, dtype=self._dtype)

        return image, labels
Пример #14
0
  def _parse_train_data(self, data):
    """Generates images and labels that are usable for model training.
        Args:
          data: a dict of Tensors produced by the decoder.
        Returns:
          images: the image tensor.
          labels: a dict of Tensors that contains labels.
        """

    image = data['image'] / 255

    # / 255
    boxes = data['groundtruth_boxes']
    classes = data['groundtruth_classes']

    do_blur = tf.random.uniform([],
                                minval=0,
                                maxval=1,
                                seed=self._seed,
                                dtype=tf.float32)
    if do_blur > 0.9:
      image = tfa.image.gaussian_filter2d(image, filter_shape=7, sigma=15)
    elif do_blur > 0.7:
      image = tfa.image.gaussian_filter2d(image, filter_shape=5, sigma=6)
    elif do_blur > 0.4:
      image = tfa.image.gaussian_filter2d(image, filter_shape=5, sigma=3)

    image = tf.image.rgb_to_hsv(image)
    i_h, i_s, i_v = tf.split(image, 3, axis=-1)
    if self._aug_rand_hue:
      delta = preprocessing_ops.rand_uniform_strong(
          -0.1, 0.1
      )  # tf.random.uniform([], minval= -0.1,maxval=0.1, seed=self._seed, dtype=tf.float32)
      i_h = i_h + delta  # Hue
      i_h = tf.clip_by_value(i_h, 0.0, 1.0)
    if self._aug_rand_saturation:
      delta = preprocessing_ops.rand_scale(
          0.75
      )  # tf.random.uniform([], minval= 0.5,maxval=1.1, seed=self._seed, dtype=tf.float32)
      i_s = i_s * delta
    if self._aug_rand_brightness:
      delta = preprocessing_ops.rand_scale(
          0.75
      )  # tf.random.uniform([], minval= -0.15,maxval=0.15, seed=self._seed, dtype=tf.float32)
      i_v = i_v * delta
    image = tf.concat([i_h, i_s, i_v], axis=-1)
    image = tf.image.hsv_to_rgb(image)

    stddev = tf.random.uniform([],
                               minval=0,
                               maxval=40 / 255,
                               seed=self._seed,
                               dtype=tf.float32)
    noise = tf.random.normal(
        shape=tf.shape(image), mean=0.0, stddev=stddev, seed=self._seed)
    noise = tf.math.minimum(noise, 0.5)
    noise = tf.math.maximum(noise, 0)
    image += noise
    image = tf.clip_by_value(image, 0.0, 1.0)

    image_shape = tf.shape(image)[:2]

    if self._random_flip:
      image, boxes, _ = preprocess_ops.random_horizontal_flip(
          image, boxes, seed=self._seed)

    if self._jitter_boxes != 0.0:
      boxes = box_ops.denormalize_boxes(boxes, image_shape)
      boxes = box_ops.jitter_boxes(boxes, 0.025)
      boxes = box_ops.normalize_boxes(boxes, image_shape)

    if self._jitter_im != 0.0:
      image, boxes, classes = preprocessing_ops.random_jitter(
          image, boxes, classes, self._jitter_im, seed=self._seed)
      # image, boxes, classes = preprocessing_ops.random_translate(image, boxes, classes, 0.2, seed=self._seed)

    if self._aug_rand_zoom:
      image, boxes, classes = preprocessing_ops.random_zoom_crop(
          image, boxes, classes, self._jitter_im)

    shape = tf.shape(image)
    width = shape[1]
    height = shape[0]
    randscale = self._image_w // self._net_down_scale

    if self._fixed_size:
      do_scale = tf.greater(
          tf.random.uniform([], minval=0, maxval=1, seed=self._seed),
          1 - self._pct_rand)
      if do_scale:
        randscale = tf.random.uniform([],
                                      minval=10,
                                      maxval=15,
                                      seed=self._seed,
                                      dtype=tf.int32)

    if self._letter_box:
      image, boxes = preprocessing_ops.fit_preserve_aspect_ratio(
          image,
          boxes,
          width=width,
          height=height,
          target_dim=randscale * self._net_down_scale)
      width = randscale * self._net_down_scale
      height = randscale * self._net_down_scale

    shape = tf.shape(image)
    width = shape[1]
    height = shape[0]
    image, boxes, classes = preprocessing_ops.resize_crop_filter(
        image,
        boxes,
        classes,
        default_width=width,  # randscale * self._net_down_scale,
        default_height=height,  # randscale * self._net_down_scale,
        target_width=self._image_w,
        target_height=self._image_h,
        randomize=False)

    boxes = box_utils.yxyx_to_xcycwh(boxes)
    image = tf.clip_by_value(image, 0.0, 1.0)
    num_dets = tf.shape(classes)[0]

    # padding
    classes = preprocess_ops.clip_or_pad_to_fixed_size(classes,
                                                       self._max_num_instances,
                                                       -1)

    if self._fixed_size and not self._cutmix:
      best_anchors = preprocessing_ops.get_best_anchor(
          boxes, self._anchors, width=self._image_w, height=self._image_h)
      best_anchors = preprocess_ops.clip_or_pad_to_fixed_size(
          best_anchors, self._max_num_instances, 0)
      boxes = preprocess_ops.clip_or_pad_to_fixed_size(boxes,
                                                       self._max_num_instances,
                                                       0)
      labels = {
          'source_id': data['source_id'],
          'bbox': tf.cast(boxes, self._dtype),
          'classes': tf.cast(classes, self._dtype),
          'best_anchors': tf.cast(best_anchors, self._dtype),
          'width': width,
          'height': height,
          'num_detections': num_dets
      }
      grid = self._build_grid(
          labels, self._image_w, use_tie_breaker=self._use_tie_breaker)
      labels.update({'grid_form': grid})
      labels['bbox'] = box_utils.xcycwh_to_yxyx(labels['bbox'])
    else:
      boxes = preprocess_ops.clip_or_pad_to_fixed_size(boxes,
                                                       self._max_num_instances,
                                                       0)
      labels = {
          'source_id': data['source_id'],
          'bbox': tf.cast(boxes, self._dtype),
          'classes': tf.cast(classes, self._dtype),
          'width': width,
          'height': height,
          'num_detections': num_dets
      }
    return image, labels
Пример #15
0
  def preprocess(self, inputs):
    """Preprocess COCO for DETR."""
    image = inputs['image']
    boxes = inputs['objects']['bbox']
    classes = inputs['objects']['label'] + 1
    is_crowd = inputs['objects']['is_crowd']

    image = preprocess_ops.normalize_image(image)
    if self._params.is_training:
      image, boxes, _ = preprocess_ops.random_horizontal_flip(image, boxes)

      do_crop = tf.greater(tf.random.uniform([]), 0.5)
      if do_crop:
        # Rescale
        boxes = box_ops.denormalize_boxes(boxes, tf.shape(image)[:2])
        index = tf.random.categorical(tf.zeros([1, 3]), 1)[0]
        scales = tf.gather([400.0, 500.0, 600.0], index, axis=0)
        short_side = scales[0]
        image, image_info = preprocess_ops.resize_image(image, short_side)
        boxes = preprocess_ops.resize_and_crop_boxes(boxes,
                                                     image_info[2, :],
                                                     image_info[1, :],
                                                     image_info[3, :])
        boxes = box_ops.normalize_boxes(boxes, image_info[1, :])

        # Do croping
        shape = tf.cast(image_info[1], dtype=tf.int32)
        h = tf.random.uniform(
            [], 384, tf.math.minimum(shape[0], 600), dtype=tf.int32)
        w = tf.random.uniform(
            [], 384, tf.math.minimum(shape[1], 600), dtype=tf.int32)
        i = tf.random.uniform([], 0, shape[0] - h + 1, dtype=tf.int32)
        j = tf.random.uniform([], 0, shape[1] - w + 1, dtype=tf.int32)
        image = tf.image.crop_to_bounding_box(image, i, j, h, w)
        boxes = tf.clip_by_value(
            (boxes[..., :] * tf.cast(
                tf.stack([shape[0], shape[1], shape[0], shape[1]]),
                dtype=tf.float32) -
             tf.cast(tf.stack([i, j, i, j]), dtype=tf.float32)) /
            tf.cast(tf.stack([h, w, h, w]), dtype=tf.float32), 0.0, 1.0)
      scales = tf.constant(
          self._params.resize_scales,
          dtype=tf.float32)
      index = tf.random.categorical(tf.zeros([1, 11]), 1)[0]
      scales = tf.gather(scales, index, axis=0)
    else:
      scales = tf.constant([self._params.resize_scales[-1]], tf.float32)

    image_shape = tf.shape(image)[:2]
    boxes = box_ops.denormalize_boxes(boxes, image_shape)
    gt_boxes = boxes
    short_side = scales[0]
    image, image_info = preprocess_ops.resize_image(
        image,
        short_side,
        max(self._params.output_size))
    boxes = preprocess_ops.resize_and_crop_boxes(boxes,
                                                 image_info[2, :],
                                                 image_info[1, :],
                                                 image_info[3, :])
    boxes = box_ops.normalize_boxes(boxes, image_info[1, :])

    # Filters out ground truth boxes that are all zeros.
    indices = box_ops.get_non_empty_box_indices(boxes)
    boxes = tf.gather(boxes, indices)
    classes = tf.gather(classes, indices)
    is_crowd = tf.gather(is_crowd, indices)
    boxes = box_ops.yxyx_to_cycxhw(boxes)

    image = tf.image.pad_to_bounding_box(
        image, 0, 0, self._params.output_size[0], self._params.output_size[1])
    labels = {
        'classes':
            preprocess_ops.clip_or_pad_to_fixed_size(
                classes, self._params.max_num_boxes),
        'boxes':
            preprocess_ops.clip_or_pad_to_fixed_size(
                boxes, self._params.max_num_boxes)
    }
    if not self._params.is_training:
      labels.update({
          'id':
              inputs['image/id'],
          'image_info':
              image_info,
          'is_crowd':
              preprocess_ops.clip_or_pad_to_fixed_size(
                  is_crowd, self._params.max_num_boxes),
          'gt_boxes':
              preprocess_ops.clip_or_pad_to_fixed_size(
                  gt_boxes, self._params.max_num_boxes),
      })

    return image, labels