Exemplo n.º 1
0
    def serve(self, images: tf.Tensor):
        """Cast image to float and run inference.

    Args:
      images: uint8 Tensor of shape [batch_size, None, None, 3]
    Returns:
      Tensor holding detection output logits.
    """

        images, anchor_boxes, image_info = self.preprocess(images)
        input_image_shape = image_info[:, 1, :]

        # To overcome keras.Model extra limitation to save a model with layers that
        # have multiple inputs, we use `model.call` here to trigger the forward
        # path. Note that, this disables some keras magics happens in `__call__`.
        detections = self.model.call(images=images,
                                     image_shape=input_image_shape,
                                     anchor_boxes=anchor_boxes,
                                     training=False)

        if self.params.task.model.detection_generator.apply_nms:
            # For RetinaNet model, apply export_config.
            # TODO(huizhongc): Add export_config to fasterrcnn and maskrcnn as needed.
            if isinstance(self.params.task.model, configs.retinanet.RetinaNet):
                export_config = self.params.task.export_config
                # Normalize detection box coordinates to [0, 1].
                if export_config.output_normalized_coordinates:
                    detection_boxes = (
                        detections['detection_boxes'] /
                        tf.tile(image_info[:, 2:3, :], [1, 1, 2]))
                    detections['detection_boxes'] = box_ops.normalize_boxes(
                        detection_boxes, image_info[:, 0:1, :])

                # Cast num_detections and detection_classes to float. This allows the
                # model inference to work on chain (go/chain) as chain requires floating
                # point outputs.
                if export_config.cast_num_detections_to_float:
                    detections['num_detections'] = tf.cast(
                        detections['num_detections'], dtype=tf.float32)
                if export_config.cast_detection_classes_to_float:
                    detections['detection_classes'] = tf.cast(
                        detections['detection_classes'], dtype=tf.float32)

            final_outputs = {
                'detection_boxes': detections['detection_boxes'],
                'detection_scores': detections['detection_scores'],
                'detection_classes': detections['detection_classes'],
                'num_detections': detections['num_detections']
            }
        else:
            final_outputs = {
                'decoded_boxes': detections['decoded_boxes'],
                'decoded_box_scores': detections['decoded_box_scores']
            }

        if 'detection_masks' in detections.keys():
            final_outputs['detection_masks'] = detections['detection_masks']

        final_outputs.update({'image_info': image_info})
        return final_outputs
Exemplo n.º 2
0
    def _mosaic_crop_image(self, image, boxes, classes, is_crowd, area):
        """Process a patched image in preperation for final output."""
        if self._mosaic_crop_mode != 'crop':
            shape = tf.cast(preprocessing_ops.get_image_shape(image),
                            tf.float32)
            center = shape * self._mosaic_center

            # shift the center of the image by applying a translation to the whole
            # image
            ch = tf.math.round(
                preprocessing_ops.random_uniform_strong(-center[0],
                                                        center[0],
                                                        seed=self._seed))
            cw = tf.math.round(
                preprocessing_ops.random_uniform_strong(-center[1],
                                                        center[1],
                                                        seed=self._seed))

            # clip the boxes to those with in the image
            image = tfa.image.translate(image, [cw, ch],
                                        fill_value=self._pad_value)
            boxes = box_ops.denormalize_boxes(boxes, shape[:2])
            boxes = boxes + tf.cast([ch, cw, ch, cw], boxes.dtype)
            boxes = box_ops.clip_boxes(boxes, shape[:2])
            inds = box_ops.get_non_empty_box_indices(boxes)

            boxes = box_ops.normalize_boxes(boxes, shape[:2])
            boxes, classes, is_crowd, area = self._select_ind(
                inds,
                boxes,
                classes,  # pylint:disable=unbalanced-tuple-unpacking
                is_crowd,
                area)

        # warp and scale the fully stitched sample
        image, _, affine = preprocessing_ops.affine_warp_image(
            image, [self._output_size[0], self._output_size[1]],
            scale_min=self._aug_scale_min,
            scale_max=self._aug_scale_max,
            translate=self._aug_rand_translate,
            degrees=self._aug_rand_angle,
            perspective=self._aug_rand_perspective,
            random_pad=self._random_pad,
            seed=self._seed)
        height, width = self._output_size[0], self._output_size[1]
        image = tf.image.resize(image, (height, width))

        # clip and clean boxes
        boxes, inds = preprocessing_ops.transform_and_clip_boxes(
            boxes,
            None,
            affine=affine,
            area_thresh=self._area_thresh,
            seed=self._seed)
        classes, is_crowd, area = self._select_ind(inds, classes, is_crowd,
                                                   area)  # pylint:disable=unbalanced-tuple-unpacking
        return image, boxes, classes, is_crowd, area, area
Exemplo n.º 3
0
  def scale_boxes(self, patch, ishape, boxes, classes, xs, ys):
    """Scale and translate the boxes for each image prior to patching."""
    xs = tf.cast(xs, boxes.dtype)
    ys = tf.cast(ys, boxes.dtype)
    pshape = tf.cast(tf.shape(patch), boxes.dtype)
    ishape = tf.cast(ishape, boxes.dtype)
    translate = tf.cast((ishape - pshape), boxes.dtype)

    boxes = box_ops.denormalize_boxes(boxes, pshape[:2])
    boxes = boxes + tf.cast([
        translate[0] * ys, translate[1] * xs, translate[0] * ys,
        translate[1] * xs
    ], boxes.dtype)
    boxes = box_ops.normalize_boxes(boxes, ishape[:2])
    return boxes, classes
Exemplo n.º 4
0
    def serve(self, images):
        """Cast image to float and run inference.

    Args:
      images: uint8 Tensor of shape [batch_size, None, None, 3]
    Returns:
      Tensor holding classification output logits.
    """
        with tf.device('cpu:0'):
            images = tf.cast(images, dtype=tf.float32)

            images = tf.nest.map_structure(
                tf.identity,
                tf.map_fn(self._build_inputs,
                          elems=images,
                          fn_output_signature=tf.TensorSpec(
                              shape=self._input_image_size + [3],
                              dtype=tf.float32),
                          parallel_iterations=32))

        outputs = self.inference_step(
            images)  # tf.keras.Model's __call__ method

        num_classes = outputs['predictions']['0'].shape[-1] - 5
        bbox_tensors, _, prob_tensors = yolo_ops.concat_tensor_dict(
            tensor_dict=outputs['predictions'], num_classes=num_classes)

        boxes = tf.concat(bbox_tensors, axis=1)
        boxes = tf.squeeze(yolo_box_ops.xcycwh_to_yxyx(boxes))
        scores = tf.concat(prob_tensors, axis=1)
        scores = tf.squeeze(tf.math.reduce_max(scores, axis=-1))
        classes = tf.argmax(prob_tensors, axis=-1)

        indices = tf.image.non_max_suppression(boxes=boxes,
                                               scores=scores,
                                               max_output_size=20,
                                               iou_threshold=0.5,
                                               score_threshold=0.25)

        boxes = tf.expand_dims(tf.gather(boxes, indices), axis=0)
        boxes = box_ops.normalize_boxes(boxes, self._input_image_size)
        scores = tf.expand_dims(tf.gather(scores, indices), axis=0)
        classes = tf.gather(classes, indices, axis=1)

        return {'boxes': boxes, 'classes': classes, 'scores': scores}
Exemplo n.º 5
0
def visualize_images_with_bounding_boxes(images, box_outputs, step,
                                         summary_writer):
  """Records subset of evaluation images with bounding boxes."""
  if not isinstance(images, list):
    logging.warning(
        'visualize_images_with_bounding_boxes expects list of '
        'images but received type: %s and value: %s', type(images), images)
    return

  image_shape = tf.shape(images[0])
  image_height = tf.cast(image_shape[0], tf.float32)
  image_width = tf.cast(image_shape[1], tf.float32)
  normalized_boxes = box_ops.normalize_boxes(box_outputs,
                                             [image_height, image_width])

  bounding_box_color = tf.constant([[1.0, 1.0, 0.0, 1.0]])
  image_summary = tf.image.draw_bounding_boxes(
      tf.cast(images, tf.float32), normalized_boxes, bounding_box_color)
  with summary_writer.as_default():
    tf.summary.image('bounding_box_summary', image_summary, step=step)
    summary_writer.flush()
Exemplo n.º 6
0
    def serve(self, images: tf.Tensor):
        """Cast image to float and run inference.

    Args:
      images: uint8 Tensor of shape [batch_size, None, None, 3]
    Returns:
      Tensor holding detection output logits.
    """

        # Skip image preprocessing when input_type is tflite so it is compatible
        # with TFLite quantization.
        if self._input_type != 'tflite':
            images, anchor_boxes, image_info = self.preprocess(images)
        else:
            with tf.device('cpu:0'):
                anchor_boxes = self._build_anchor_boxes()
                # image_info is a 3D tensor of shape [batch_size, 4, 2]. It is in the
                # format of [[original_height, original_width],
                # [desired_height, desired_width], [y_scale, x_scale],
                # [y_offset, x_offset]]. When input_type is tflite, input image is
                # supposed to be preprocessed already.
                image_info = tf.convert_to_tensor([[
                    self._input_image_size, self._input_image_size, [1.0, 1.0],
                    [0, 0]
                ]],
                                                  dtype=tf.float32)
        input_image_shape = image_info[:, 1, :]

        # To overcome keras.Model extra limitation to save a model with layers that
        # have multiple inputs, we use `model.call` here to trigger the forward
        # path. Note that, this disables some keras magics happens in `__call__`.
        detections = self.model.call(images=images,
                                     image_shape=input_image_shape,
                                     anchor_boxes=anchor_boxes,
                                     training=False)

        if self.params.task.model.detection_generator.apply_nms:
            # For RetinaNet model, apply export_config.
            # TODO(huizhongc): Add export_config to fasterrcnn and maskrcnn as needed.
            if isinstance(self.params.task.model, configs.retinanet.RetinaNet):
                export_config = self.params.task.export_config
                # Normalize detection box coordinates to [0, 1].
                if export_config.output_normalized_coordinates:
                    detection_boxes = (
                        detections['detection_boxes'] /
                        tf.tile(image_info[:, 2:3, :], [1, 1, 2]))
                    detections['detection_boxes'] = box_ops.normalize_boxes(
                        detection_boxes, image_info[:, 0:1, :])

                # Cast num_detections and detection_classes to float. This allows the
                # model inference to work on chain (go/chain) as chain requires floating
                # point outputs.
                if export_config.cast_num_detections_to_float:
                    detections['num_detections'] = tf.cast(
                        detections['num_detections'], dtype=tf.float32)
                if export_config.cast_detection_classes_to_float:
                    detections['detection_classes'] = tf.cast(
                        detections['detection_classes'], dtype=tf.float32)

            final_outputs = {
                'detection_boxes': detections['detection_boxes'],
                'detection_scores': detections['detection_scores'],
                'detection_classes': detections['detection_classes'],
                'num_detections': detections['num_detections']
            }
        else:
            final_outputs = {
                'decoded_boxes': detections['decoded_boxes'],
                'decoded_box_scores': detections['decoded_box_scores']
            }

        if 'detection_masks' in detections.keys():
            final_outputs['detection_masks'] = detections['detection_masks']

        final_outputs.update({'image_info': image_info})
        return final_outputs
Exemplo n.º 7
0
def transform_and_clip_boxes(boxes,
                             infos,
                             affine=None,
                             shuffle_boxes=False,
                             area_thresh=0.1,
                             seed=None,
                             augment=True):
    """Clips and cleans the boxes.

  Args:
    boxes: A `Tensor` for the boxes.
    infos: A `list` that contains the image infos.
    affine: A `list` that contains parameters for resize and crop.
    shuffle_boxes: A `bool` for shuffling the boxes.
    area_thresh: An `int` for the area threshold.
    seed: seed for random number generation.
    augment: A `bool` for clipping the boxes to [0, 1].

  Returns:
    boxes: A `Tensor` representing the augmented boxes.
    ind: A `Tensor` valid box indices.
  """

    # Clip and clean boxes.
    def get_valid_boxes(boxes):
        """Get indices for non-empty boxes."""
        # Convert the boxes to center width height formatting.
        height = boxes[:, 2] - boxes[:, 0]
        width = boxes[:, 3] - boxes[:, 1]
        base = tf.logical_and(tf.greater(height, 0), tf.greater(width, 0))
        return base

    # Initialize history to track operation applied to boxes
    box_history = boxes

    # Make sure all boxes are valid to start, clip to [0, 1] and get only the
    # valid boxes.
    output_size = tf.cast([640, 640], tf.float32)
    if augment:
        boxes = tf.math.maximum(tf.math.minimum(boxes, 1.0), 0.0)
    cond = get_valid_boxes(boxes)

    if infos is None:
        infos = []

    for info in infos:
        # Denormalize the boxes.
        boxes = bbox_ops.denormalize_boxes(boxes, info[0])
        box_history = bbox_ops.denormalize_boxes(box_history, info[0])

        # Shift and scale all boxes, and keep track of box history with no
        # box clipping, history is used for removing boxes that have become
        # too small or exit the image area.
        (boxes, box_history) = resize_and_crop_boxes(boxes,
                                                     info[2, :],
                                                     info[1, :],
                                                     info[3, :],
                                                     box_history=box_history)

        # Get all the boxes that still remain in the image and store
        # in a bit vector for later use.
        cond = tf.logical_and(get_valid_boxes(boxes), cond)

        # Normalize the boxes to [0, 1].
        output_size = info[1]
        boxes = bbox_ops.normalize_boxes(boxes, output_size)
        box_history = bbox_ops.normalize_boxes(box_history, output_size)

    if affine is not None:
        # Denormalize the boxes.
        boxes = bbox_ops.denormalize_boxes(boxes, affine[0])
        box_history = bbox_ops.denormalize_boxes(box_history, affine[0])

        # Clipped final boxes.
        (boxes, box_history) = affine_warp_boxes(affine[2],
                                                 boxes,
                                                 affine[1],
                                                 box_history=box_history)

        # Get all the boxes that still remain in the image and store
        # in a bit vector for later use.
        cond = tf.logical_and(get_valid_boxes(boxes), cond)

        # Normalize the boxes to [0, 1].
        output_size = affine[1]
        boxes = bbox_ops.normalize_boxes(boxes, output_size)
        box_history = bbox_ops.normalize_boxes(box_history, output_size)

    # Remove the bad boxes.
    boxes *= tf.cast(tf.expand_dims(cond, axis=-1), boxes.dtype)

    # Threshold the existing boxes.
    if augment:
        boxes_ = bbox_ops.denormalize_boxes(boxes, output_size)
        box_history_ = bbox_ops.denormalize_boxes(box_history, output_size)
        inds = boxes_candidates(boxes_, box_history_, area_thr=area_thresh)
        # Select and gather the good boxes.
        if shuffle_boxes:
            inds = tf.random.shuffle(inds, seed=seed)
    else:
        boxes = box_history
        boxes_ = bbox_ops.denormalize_boxes(boxes, output_size)
        inds = bbox_ops.get_non_empty_box_indices(boxes_)
    boxes = tf.gather(boxes, inds)
    return boxes, inds
Exemplo n.º 8
0
    def _parse_train_data(self, data):
        """Generates images and labels that are usable for model training.

    Args:
      data: a dict of Tensors produced by the decoder.
    Returns:
      images: the image tensor.
      labels: a dict of Tensors that contains labels.
    """

        shape = tf.shape(data['image'])
        image = data['image'] / 255
        boxes = data['groundtruth_boxes']
        width = shape[0]
        height = shape[1]

        image, boxes = yolo_preprocess_ops.fit_preserve_aspect_ratio(
            image,
            boxes,
            width=width,
            height=height,
            target_dim=self._max_process_size)

        image_shape = tf.shape(image)[:2]

        if self._random_flip:
            image, boxes, _ = preprocess_ops.random_horizontal_flip(
                image, boxes, seed=self._seed)

        randscale = self._image_w // self._net_down_scale

        if not self._fixed_size:
            do_scale = tf.greater(
                tf.random.uniform([], minval=0, maxval=1, seed=self._seed),
                0.5)
            if do_scale:
                # This scales the image to a random multiple of net_down_scale
                # between 320 to 608
                randscale = tf.random.uniform(
                    [],
                    minval=self._min_process_size // self._net_down_scale,
                    maxval=self._max_process_size // self._net_down_scale,
                    seed=self._seed,
                    dtype=tf.int32) * self._net_down_scale

        if self._jitter_boxes != 0.0:
            boxes = box_ops.denormalize_boxes(boxes, image_shape)
            boxes = box_ops.jitter_boxes(boxes, 0.025)
            boxes = box_ops.normalize_boxes(boxes, image_shape)

        # YOLO loss function uses x-center, y-center format
        boxes = yolo_box_ops.yxyx_to_xcycwh(boxes)

        if self._jitter_im != 0.0:
            image, boxes = yolo_preprocess_ops.random_translate(
                image, boxes, self._jitter_im, seed=self._seed)

        if self._aug_rand_zoom:
            image, boxes = yolo_preprocess_ops.resize_crop_filter(
                image,
                boxes,
                default_width=self._image_w,
                default_height=self._image_h,
                target_width=randscale,
                target_height=randscale)
        image = tf.image.resize(image, (416, 416), preserve_aspect_ratio=False)

        if self._aug_rand_brightness:
            image = tf.image.random_brightness(image=image,
                                               max_delta=.1)  # Brightness
        if self._aug_rand_saturation:
            image = tf.image.random_saturation(image=image,
                                               lower=0.75,
                                               upper=1.25)  # Saturation
        if self._aug_rand_hue:
            image = tf.image.random_hue(image=image, max_delta=.3)  # Hue
        image = tf.clip_by_value(image, 0.0, 1.0)
        # Find the best anchor for the ground truth labels to maximize the iou
        best_anchors = yolo_preprocess_ops.get_best_anchor(
            boxes, self._anchors, width=self._image_w, height=self._image_h)

        # Padding
        boxes = preprocess_ops.clip_or_pad_to_fixed_size(
            boxes, self._max_num_instances, 0)
        classes = preprocess_ops.clip_or_pad_to_fixed_size(
            data['groundtruth_classes'], self._max_num_instances, -1)
        best_anchors = preprocess_ops.clip_or_pad_to_fixed_size(
            best_anchors, self._max_num_instances, 0)
        area = preprocess_ops.clip_or_pad_to_fixed_size(
            data['groundtruth_area'], self._max_num_instances, 0)
        is_crowd = preprocess_ops.clip_or_pad_to_fixed_size(
            tf.cast(data['groundtruth_is_crowd'], tf.int32),
            self._max_num_instances, 0)

        labels = {
            'source_id': data['source_id'],
            'bbox': tf.cast(boxes, self._dtype),
            'classes': tf.cast(classes, self._dtype),
            'area': tf.cast(area, self._dtype),
            'is_crowd': is_crowd,
            'best_anchors': tf.cast(best_anchors, self._dtype),
            'width': width,
            'height': height,
            'num_detections': tf.shape(data['groundtruth_classes'])[0],
        }

        if self._fixed_size:
            grid = self._build_grid(labels,
                                    self._image_w,
                                    use_tie_breaker=self._use_tie_breaker)
            labels.update({'grid_form': grid})

        return image, labels
Exemplo n.º 9
0
    def serve(self, images: tf.Tensor):
        """Cast image to float and run inference.

    Args:
      images: uint8 Tensor of shape [batch_size, None, None, 3]
    Returns:
      Tensor holding detection output logits.
    """
        model_params = self.params.task.model
        with tf.device('cpu:0'):
            images = tf.cast(images, dtype=tf.float32)

            # Tensor Specs for map_fn outputs (images, anchor_boxes, and image_info).
            images_spec = tf.TensorSpec(shape=self._input_image_size + [3],
                                        dtype=tf.float32)

            num_anchors = model_params.anchor.num_scales * len(
                model_params.anchor.aspect_ratios) * 4
            anchor_shapes = []
            for level in range(model_params.min_level,
                               model_params.max_level + 1):
                anchor_level_spec = tf.TensorSpec(shape=[
                    self._input_image_size[0] // 2**level,
                    self._input_image_size[1] // 2**level, num_anchors
                ],
                                                  dtype=tf.float32)
                anchor_shapes.append((str(level), anchor_level_spec))

            image_info_spec = tf.TensorSpec(shape=[4, 2], dtype=tf.float32)

            images, anchor_boxes, image_info = tf.nest.map_structure(
                tf.identity,
                tf.map_fn(self._build_inputs,
                          elems=images,
                          fn_output_signature=(images_spec,
                                               dict(anchor_shapes),
                                               image_info_spec),
                          parallel_iterations=32))

        input_image_shape = image_info[:, 1, :]

        # To overcome keras.Model extra limitation to save a model with layers that
        # have multiple inputs, we use `model.call` here to trigger the forward
        # path. Note that, this disables some keras magics happens in `__call__`.
        detections = self.model.call(images=images,
                                     image_shape=input_image_shape,
                                     anchor_boxes=anchor_boxes,
                                     training=False)

        if self.params.task.model.detection_generator.apply_nms:
            # For RetinaNet model, apply export_config.
            # TODO(huizhongc): Add export_config to fasterrcnn and maskrcnn as needed.
            if isinstance(self.params.task.model, configs.retinanet.RetinaNet):
                export_config = self.params.task.export_config
                # Normalize detection box coordinates to [0, 1].
                if export_config.output_normalized_coordinates:
                    detection_boxes = (
                        detections['detection_boxes'] /
                        tf.tile(image_info[:, 2:3, :], [1, 1, 2]))
                    detections['detection_boxes'] = box_ops.normalize_boxes(
                        detection_boxes, image_info[:, 0:1, :])

                # Cast num_detections and detection_classes to float. This allows the
                # model inference to work on chain (go/chain) as chain requires floating
                # point outputs.
                if export_config.cast_num_detections_to_float:
                    detections['num_detections'] = tf.cast(
                        detections['num_detections'], dtype=tf.float32)
                if export_config.cast_detection_classes_to_float:
                    detections['detection_classes'] = tf.cast(
                        detections['detection_classes'], dtype=tf.float32)

            final_outputs = {
                'detection_boxes': detections['detection_boxes'],
                'detection_scores': detections['detection_scores'],
                'detection_classes': detections['detection_classes'],
                'num_detections': detections['num_detections']
            }
        else:
            final_outputs = {
                'decoded_boxes': detections['decoded_boxes'],
                'decoded_box_scores': detections['decoded_box_scores']
            }

        if 'detection_masks' in detections.keys():
            final_outputs['detection_masks'] = detections['detection_masks']

        final_outputs.update({'image_info': image_info})
        return final_outputs
Exemplo n.º 10
0
    def _parse_train_data(self, data):
        """Parses data for training.

    Args:
      data: the decoded tensor dictionary from TfExampleDecoder.

    Returns:
      image: image tensor that is preproessed to have normalized value and
        dimension [output_size[0], output_size[1], 3]
      labels: a dictionary of tensors used for training. The following describes
        {key: value} pairs in the dictionary.
        image_info: a 2D `Tensor` that encodes the information of the image and
          the applied preprocessing. It is in the format of
          [[original_height, original_width], [scaled_height, scaled_width],
        anchor_boxes: ordered dictionary with keys
          [min_level, min_level+1, ..., max_level]. The values are tensor with
          shape [height_l, width_l, 4] representing anchor boxes at each level.
        rpn_score_targets: ordered dictionary with keys
          [min_level, min_level+1, ..., max_level]. The values are tensor with
          shape [height_l, width_l, anchors_per_location]. The height_l and
          width_l represent the dimension of class logits at l-th level.
        rpn_box_targets: ordered dictionary with keys
          [min_level, min_level+1, ..., max_level]. The values are tensor with
          shape [height_l, width_l, anchors_per_location * 4]. The height_l and
          width_l represent the dimension of bounding box regression output at
          l-th level.
        gt_boxes: Groundtruth bounding box annotations. The box is represented
           in [y1, x1, y2, x2] format. The coordinates are w.r.t the scaled
           image that is fed to the network. The tennsor is padded with -1 to
           the fixed dimension [self._max_num_instances, 4].
        gt_classes: Groundtruth classes annotations. The tennsor is padded
          with -1 to the fixed dimension [self._max_num_instances].
        gt_masks: groundtrugh masks cropped by the bounding box and
          resized to a fixed size determined by mask_crop_size.
    """
        classes = data['groundtruth_classes']
        boxes = data['groundtruth_boxes']
        if self._include_mask:
            masks = data['groundtruth_instance_masks']

        is_crowds = data['groundtruth_is_crowd']
        # Skips annotations with `is_crowd` = True.
        if self._skip_crowd_during_training:
            num_groundtruths = tf.shape(classes)[0]
            with tf.control_dependencies([num_groundtruths, is_crowds]):
                indices = tf.cond(
                    tf.greater(tf.size(is_crowds), 0),
                    lambda: tf.where(tf.logical_not(is_crowds))[:, 0],
                    lambda: tf.cast(tf.range(num_groundtruths), tf.int64))
            classes = tf.gather(classes, indices)
            boxes = tf.gather(boxes, indices)
            if self._include_mask:
                masks = tf.gather(masks, indices)

        # Gets original image and its size.
        image = data['image']
        image_shape = tf.shape(image)[0:2]

        # Normalizes image with mean and std pixel values.
        image = preprocess_ops.normalize_image(image)

        # Flips image randomly during training.
        if self._aug_rand_hflip:
            if self._include_mask:
                image, boxes, masks = preprocess_ops.random_horizontal_flip(
                    image, boxes, masks)
            else:
                image, boxes, _ = preprocess_ops.random_horizontal_flip(
                    image, boxes)

        # Converts boxes from normalized coordinates to pixel coordinates.
        # Now the coordinates of boxes are w.r.t. the original image.
        boxes = box_ops.denormalize_boxes(boxes, image_shape)

        # Resizes and crops image.
        image, image_info = preprocess_ops.resize_and_crop_image(
            image,
            self._output_size,
            padded_size=preprocess_ops.compute_padded_size(
                self._output_size, 2**self._max_level),
            aug_scale_min=self._aug_scale_min,
            aug_scale_max=self._aug_scale_max)
        image_height, image_width, _ = image.get_shape().as_list()

        # Resizes and crops boxes.
        # Now the coordinates of boxes are w.r.t the scaled image.
        image_scale = image_info[2, :]
        offset = image_info[3, :]
        boxes = preprocess_ops.resize_and_crop_boxes(boxes, image_scale,
                                                     image_info[1, :], offset)

        # Filters out ground truth boxes that are all zeros.
        indices = box_ops.get_non_empty_box_indices(boxes)
        boxes = tf.gather(boxes, indices)
        classes = tf.gather(classes, indices)
        if self._include_mask:
            masks = tf.gather(masks, indices)
            # Transfer boxes to the original image space and do normalization.
            cropped_boxes = boxes + tf.tile(tf.expand_dims(offset, axis=0),
                                            [1, 2])
            cropped_boxes /= tf.tile(tf.expand_dims(image_scale, axis=0),
                                     [1, 2])
            cropped_boxes = box_ops.normalize_boxes(cropped_boxes, image_shape)
            num_masks = tf.shape(masks)[0]
            masks = tf.image.crop_and_resize(
                tf.expand_dims(masks, axis=-1),
                cropped_boxes,
                box_indices=tf.range(num_masks, dtype=tf.int32),
                crop_size=[self._mask_crop_size, self._mask_crop_size],
                method='bilinear')
            masks = tf.squeeze(masks, axis=-1)

        # Assigns anchor targets.
        # Note that after the target assignment, box targets are absolute pixel
        # offsets w.r.t. the scaled image.
        input_anchor = anchor.build_anchor_generator(
            min_level=self._min_level,
            max_level=self._max_level,
            num_scales=self._num_scales,
            aspect_ratios=self._aspect_ratios,
            anchor_size=self._anchor_size)
        anchor_boxes = input_anchor(image_size=(image_height, image_width))
        anchor_labeler = anchor.RpnAnchorLabeler(self._rpn_match_threshold,
                                                 self._rpn_unmatched_threshold,
                                                 self._rpn_batch_size_per_im,
                                                 self._rpn_fg_fraction)
        rpn_score_targets, rpn_box_targets = anchor_labeler.label_anchors(
            anchor_boxes, boxes,
            tf.cast(tf.expand_dims(classes, axis=-1), dtype=tf.float32))

        # Casts input image to self._dtype
        image = tf.cast(image, dtype=self._dtype)

        # Packs labels for model_fn outputs.
        labels = {
            'anchor_boxes':
            anchor_boxes,
            'image_info':
            image_info,
            'rpn_score_targets':
            rpn_score_targets,
            'rpn_box_targets':
            rpn_box_targets,
            'gt_boxes':
            preprocess_ops.clip_or_pad_to_fixed_size(boxes,
                                                     self._max_num_instances,
                                                     -1),
            'gt_classes':
            preprocess_ops.clip_or_pad_to_fixed_size(classes,
                                                     self._max_num_instances,
                                                     -1),
        }
        if self._include_mask:
            labels['gt_masks'] = preprocess_ops.clip_or_pad_to_fixed_size(
                masks, self._max_num_instances, -1)

        return image, labels
Exemplo n.º 11
0
    def run_on_image_dir(self,
                         image_path_glob: str,
                         output_dir: str,
                         preprocess_fn: Callable[[tf.Tensor], tf.Tensor],
                         inference_fn: Callable[[tf.Tensor], tf.Tensor],
                         class_names_path: str,
                         save_logits_bin: bool = False,
                         *args,
                         **kwargs):
        """Runs inference graph for the model, for given directory of images
    
    Args:
      image_path_glob: `str`, path pattern for images
      output_dir: `str`, path to output logs
      preprocess_fn: `Callable`, takes image tensor of shape (1, height, 
        width, channels), produces altered image tensor of same shape
      inference_fn: `Callable`, takes image tensor of shape (1, height, 
        width, channels), outputs Tensor of shape [batch_size, None, None, 3]
      class_names_path: `str`, path to txt file containing classes. Text file
        should contain one class name per line.
      save_logits_bin: `bool`, flag to save tensors and binary files
    """
        cmap = get_colormap(cmap_type='cityscapes').numpy()
        dataset = run_lib.inference_dataset(image_path_glob=image_path_glob,
                                            output_dir=output_dir,
                                            preprocess_fn=preprocess_fn)

        class_names = run_lib.load_class_names(
            class_names_paths=class_names_path)
        if len(class_names) != 2:
            raise ValueError('Class name paths found: %s' %class_names + \
              ' , please specify only 2 (cls, yolo).')

        for image, img_filename, save_basename in dataset:

            logits = inference_fn(image)
            if len(logits) != 7:
                raise NotImplementedError("Inferences for multitask only implemented for " +\
                  "argmax_outputs=True, visualise_outputs=True, class_present_outputs=True.")

            cls_env, seg_mask, seg_visualised, is_classes_present, yolo_boxes, yolo_classes, yolo_scores = logits
            if yolo_classes.dtype == 'float32':
                yolo_classes, yolo_scores = yolo_scores, yolo_classes

            if save_logits_bin:
                run_lib.write_tensor_as_bin(tensor=image,
                                            output_path=save_basename +
                                            '_input')
                run_lib.write_tensor_as_bin(tensor=seg_mask,
                                            output_path=save_basename +
                                            '_mask')
                run_lib.write_tensor_as_bin(tensor=seg_visualised,
                                            output_path=save_basename +
                                            '_visualised_mask')
                run_lib.write_tensor_as_bin(tensor=yolo_boxes,
                                            output_path=save_basename +
                                            '_boxes')
                run_lib.write_tensor_as_bin(tensor=yolo_scores,
                                            output_path=save_basename +
                                            '_scores')
                run_lib.write_tensor_as_bin(tensor=yolo_classes,
                                            output_path=save_basename +
                                            '_classes')

            image = tf.image.resize(image, self._input_image_size)
            image = tf.cast(image, tf.uint8)
            yolo_boxes = box_ops.normalize_boxes(yolo_boxes,
                                                 self._input_image_size)

            output_image = run_lib.draw_bbox(
                image=run_lib.tensor_to_numpy(image).squeeze(),
                bboxes=run_lib.tensor_to_numpy(yolo_boxes),
                scores=run_lib.tensor_to_numpy(yolo_scores),
                classes=run_lib.tensor_to_numpy(yolo_classes),
                num_bboxes=tf.constant(yolo_classes.shape[0]).numpy(),
                class_names=class_names[1])
            env_val = run_lib.tensor_to_numpy(cls_env)[0]
            output_image = run_lib.draw_text(
                image=output_image,
                text_list=[class_names[0][env_val]],
                spacing=20)

            seg_mask = tf.squeeze(seg_mask).numpy()
            if seg_mask.ndim > 2:
                seg_mask = np.argmax(seg_mask, axis=-1).astype(np.uint8)
            seg_mask = cmap[seg_mask]
            output_image = np.hstack((output_image, seg_mask))

            output_image = tf.image.encode_png(output_image)
            tf.io.write_file(save_basename + '.png', output_image)
            print("Visualised %s, saving result at %s" %
                  (img_filename, save_basename + '.png'))
Exemplo n.º 12
0
  def train_step(self,
                 inputs: Tuple,
                 model: tf.keras.Model,
                 optimizer: tf.keras.optimizers.Optimizer,
                 metrics: Optional[List[Any]] = None):
    """Does forward and backward.

    Args:
      inputs: a dictionary of input tensors.
      model: the model, forward pass definition.
      optimizer: the optimizer for this training step.
      metrics: a nested structure of metrics objects.

    Returns:
      A dictionary of logs.
    """
    features, labels = inputs

    input_partition_dims = self.task_config.train_input_partition_dims
    if input_partition_dims:
      strategy = tf.distribute.get_strategy()
      features = strategy.experimental_split_to_logical_devices(
          features, input_partition_dims)
    
    input_shape = self.task_config.model.input_size[:2]
    normalized_boxes = box_ops.normalize_boxes(labels['raw_bboxes'], input_shape)
    bbox_color = tf.constant([[1.0, 1.0, 0.0, 1.0]])
    self.image_summary_manager.write_summaries({
      'input_images': features,
      'bbox': tf.image.draw_bounding_boxes(features, normalized_boxes, bbox_color)
    })
    
    num_replicas = tf.distribute.get_strategy().num_replicas_in_sync
    with tf.GradientTape() as tape:
      outputs = model(features, training=True)
      # Casting output layer as float32 is necessary when mixed_precision is
      # mixed_float16 or mixed_bfloat16 to ensure output is casted as float32.
      outputs = tf.nest.map_structure(
          lambda x: tf.cast(x, tf.float32), outputs)

      # Computes per-replica loss.
      loss, giou_loss, conf_loss, prob_loss = self.build_losses(
          model_outputs=outputs, labels=labels, aux_losses=model.losses)
      # Scales loss as the default gradients allreduce performs sum inside the
      # optimizer.
      scaled_loss = loss / num_replicas

      # For mixed_precision policy, when LossScaleOptimizer is used, loss is
      # scaled for numerical stability.
      if isinstance(optimizer, tf.keras.mixed_precision.LossScaleOptimizer):
        scaled_loss = optimizer.get_scaled_loss(scaled_loss)
    
    tvars = model.trainable_variables
    grads = tape.gradient(scaled_loss, tvars)
    # Scales back gradient before apply_gradients when LossScaleOptimizer is
    # used.
    if isinstance(optimizer, tf.keras.mixed_precision.LossScaleOptimizer):
      grads = optimizer.get_unscaled_gradients(grads)
    optimizer.apply_gradients(list(zip(grads, tvars)))

    logs = {self.loss: loss}
    all_losses = {
      'giou_loss': giou_loss,
      'conf_loss': conf_loss,
      'prob_loss': prob_loss
    }
    if metrics:
      # process metrics uses labels and outputs, metrics.mean uses values only
      for m in metrics:
        m.update_state(all_losses[m.name])
        logs.update({m.name: m.result()})

    return logs
Exemplo n.º 13
0
  def _parse_train_data(self, data):
    """Generates images and labels that are usable for model training.
        Args:
          data: a dict of Tensors produced by the decoder.
        Returns:
          images: the image tensor.
          labels: a dict of Tensors that contains labels.
        """

    image = data['image'] / 255

    # / 255
    boxes = data['groundtruth_boxes']
    classes = data['groundtruth_classes']

    do_blur = tf.random.uniform([],
                                minval=0,
                                maxval=1,
                                seed=self._seed,
                                dtype=tf.float32)
    if do_blur > 0.9:
      image = tfa.image.gaussian_filter2d(image, filter_shape=7, sigma=15)
    elif do_blur > 0.7:
      image = tfa.image.gaussian_filter2d(image, filter_shape=5, sigma=6)
    elif do_blur > 0.4:
      image = tfa.image.gaussian_filter2d(image, filter_shape=5, sigma=3)

    image = tf.image.rgb_to_hsv(image)
    i_h, i_s, i_v = tf.split(image, 3, axis=-1)
    if self._aug_rand_hue:
      delta = preprocessing_ops.rand_uniform_strong(
          -0.1, 0.1
      )  # tf.random.uniform([], minval= -0.1,maxval=0.1, seed=self._seed, dtype=tf.float32)
      i_h = i_h + delta  # Hue
      i_h = tf.clip_by_value(i_h, 0.0, 1.0)
    if self._aug_rand_saturation:
      delta = preprocessing_ops.rand_scale(
          0.75
      )  # tf.random.uniform([], minval= 0.5,maxval=1.1, seed=self._seed, dtype=tf.float32)
      i_s = i_s * delta
    if self._aug_rand_brightness:
      delta = preprocessing_ops.rand_scale(
          0.75
      )  # tf.random.uniform([], minval= -0.15,maxval=0.15, seed=self._seed, dtype=tf.float32)
      i_v = i_v * delta
    image = tf.concat([i_h, i_s, i_v], axis=-1)
    image = tf.image.hsv_to_rgb(image)

    stddev = tf.random.uniform([],
                               minval=0,
                               maxval=40 / 255,
                               seed=self._seed,
                               dtype=tf.float32)
    noise = tf.random.normal(
        shape=tf.shape(image), mean=0.0, stddev=stddev, seed=self._seed)
    noise = tf.math.minimum(noise, 0.5)
    noise = tf.math.maximum(noise, 0)
    image += noise
    image = tf.clip_by_value(image, 0.0, 1.0)

    image_shape = tf.shape(image)[:2]

    if self._random_flip:
      image, boxes, _ = preprocess_ops.random_horizontal_flip(
          image, boxes, seed=self._seed)

    if self._jitter_boxes != 0.0:
      boxes = box_ops.denormalize_boxes(boxes, image_shape)
      boxes = box_ops.jitter_boxes(boxes, 0.025)
      boxes = box_ops.normalize_boxes(boxes, image_shape)

    if self._jitter_im != 0.0:
      image, boxes, classes = preprocessing_ops.random_jitter(
          image, boxes, classes, self._jitter_im, seed=self._seed)
      # image, boxes, classes = preprocessing_ops.random_translate(image, boxes, classes, 0.2, seed=self._seed)

    if self._aug_rand_zoom:
      image, boxes, classes = preprocessing_ops.random_zoom_crop(
          image, boxes, classes, self._jitter_im)

    shape = tf.shape(image)
    width = shape[1]
    height = shape[0]
    randscale = self._image_w // self._net_down_scale

    if self._fixed_size:
      do_scale = tf.greater(
          tf.random.uniform([], minval=0, maxval=1, seed=self._seed),
          1 - self._pct_rand)
      if do_scale:
        randscale = tf.random.uniform([],
                                      minval=10,
                                      maxval=15,
                                      seed=self._seed,
                                      dtype=tf.int32)

    if self._letter_box:
      image, boxes = preprocessing_ops.fit_preserve_aspect_ratio(
          image,
          boxes,
          width=width,
          height=height,
          target_dim=randscale * self._net_down_scale)
      width = randscale * self._net_down_scale
      height = randscale * self._net_down_scale

    shape = tf.shape(image)
    width = shape[1]
    height = shape[0]
    image, boxes, classes = preprocessing_ops.resize_crop_filter(
        image,
        boxes,
        classes,
        default_width=width,  # randscale * self._net_down_scale,
        default_height=height,  # randscale * self._net_down_scale,
        target_width=self._image_w,
        target_height=self._image_h,
        randomize=False)

    boxes = box_utils.yxyx_to_xcycwh(boxes)
    image = tf.clip_by_value(image, 0.0, 1.0)
    num_dets = tf.shape(classes)[0]

    # padding
    classes = preprocess_ops.clip_or_pad_to_fixed_size(classes,
                                                       self._max_num_instances,
                                                       -1)

    if self._fixed_size and not self._cutmix:
      best_anchors = preprocessing_ops.get_best_anchor(
          boxes, self._anchors, width=self._image_w, height=self._image_h)
      best_anchors = preprocess_ops.clip_or_pad_to_fixed_size(
          best_anchors, self._max_num_instances, 0)
      boxes = preprocess_ops.clip_or_pad_to_fixed_size(boxes,
                                                       self._max_num_instances,
                                                       0)
      labels = {
          'source_id': data['source_id'],
          'bbox': tf.cast(boxes, self._dtype),
          'classes': tf.cast(classes, self._dtype),
          'best_anchors': tf.cast(best_anchors, self._dtype),
          'width': width,
          'height': height,
          'num_detections': num_dets
      }
      grid = self._build_grid(
          labels, self._image_w, use_tie_breaker=self._use_tie_breaker)
      labels.update({'grid_form': grid})
      labels['bbox'] = box_utils.xcycwh_to_yxyx(labels['bbox'])
    else:
      boxes = preprocess_ops.clip_or_pad_to_fixed_size(boxes,
                                                       self._max_num_instances,
                                                       0)
      labels = {
          'source_id': data['source_id'],
          'bbox': tf.cast(boxes, self._dtype),
          'classes': tf.cast(classes, self._dtype),
          'width': width,
          'height': height,
          'num_detections': num_dets
      }
    return image, labels
Exemplo n.º 14
0
  def preprocess(self, inputs):
    """Preprocess COCO for DETR."""
    image = inputs['image']
    boxes = inputs['objects']['bbox']
    classes = inputs['objects']['label'] + 1
    is_crowd = inputs['objects']['is_crowd']

    image = preprocess_ops.normalize_image(image)
    if self._params.is_training:
      image, boxes, _ = preprocess_ops.random_horizontal_flip(image, boxes)

      do_crop = tf.greater(tf.random.uniform([]), 0.5)
      if do_crop:
        # Rescale
        boxes = box_ops.denormalize_boxes(boxes, tf.shape(image)[:2])
        index = tf.random.categorical(tf.zeros([1, 3]), 1)[0]
        scales = tf.gather([400.0, 500.0, 600.0], index, axis=0)
        short_side = scales[0]
        image, image_info = preprocess_ops.resize_image(image, short_side)
        boxes = preprocess_ops.resize_and_crop_boxes(boxes,
                                                     image_info[2, :],
                                                     image_info[1, :],
                                                     image_info[3, :])
        boxes = box_ops.normalize_boxes(boxes, image_info[1, :])

        # Do croping
        shape = tf.cast(image_info[1], dtype=tf.int32)
        h = tf.random.uniform(
            [], 384, tf.math.minimum(shape[0], 600), dtype=tf.int32)
        w = tf.random.uniform(
            [], 384, tf.math.minimum(shape[1], 600), dtype=tf.int32)
        i = tf.random.uniform([], 0, shape[0] - h + 1, dtype=tf.int32)
        j = tf.random.uniform([], 0, shape[1] - w + 1, dtype=tf.int32)
        image = tf.image.crop_to_bounding_box(image, i, j, h, w)
        boxes = tf.clip_by_value(
            (boxes[..., :] * tf.cast(
                tf.stack([shape[0], shape[1], shape[0], shape[1]]),
                dtype=tf.float32) -
             tf.cast(tf.stack([i, j, i, j]), dtype=tf.float32)) /
            tf.cast(tf.stack([h, w, h, w]), dtype=tf.float32), 0.0, 1.0)
      scales = tf.constant(
          self._params.resize_scales,
          dtype=tf.float32)
      index = tf.random.categorical(tf.zeros([1, 11]), 1)[0]
      scales = tf.gather(scales, index, axis=0)
    else:
      scales = tf.constant([self._params.resize_scales[-1]], tf.float32)

    image_shape = tf.shape(image)[:2]
    boxes = box_ops.denormalize_boxes(boxes, image_shape)
    gt_boxes = boxes
    short_side = scales[0]
    image, image_info = preprocess_ops.resize_image(
        image,
        short_side,
        max(self._params.output_size))
    boxes = preprocess_ops.resize_and_crop_boxes(boxes,
                                                 image_info[2, :],
                                                 image_info[1, :],
                                                 image_info[3, :])
    boxes = box_ops.normalize_boxes(boxes, image_info[1, :])

    # Filters out ground truth boxes that are all zeros.
    indices = box_ops.get_non_empty_box_indices(boxes)
    boxes = tf.gather(boxes, indices)
    classes = tf.gather(classes, indices)
    is_crowd = tf.gather(is_crowd, indices)
    boxes = box_ops.yxyx_to_cycxhw(boxes)

    image = tf.image.pad_to_bounding_box(
        image, 0, 0, self._params.output_size[0], self._params.output_size[1])
    labels = {
        'classes':
            preprocess_ops.clip_or_pad_to_fixed_size(
                classes, self._params.max_num_boxes),
        'boxes':
            preprocess_ops.clip_or_pad_to_fixed_size(
                boxes, self._params.max_num_boxes)
    }
    if not self._params.is_training:
      labels.update({
          'id':
              inputs['image/id'],
          'image_info':
              image_info,
          'is_crowd':
              preprocess_ops.clip_or_pad_to_fixed_size(
                  is_crowd, self._params.max_num_boxes),
          'gt_boxes':
              preprocess_ops.clip_or_pad_to_fixed_size(
                  gt_boxes, self._params.max_num_boxes),
      })

    return image, labels