Пример #1
0
 def test_box_conversions(self, num_boxes):
     boxes = tf.convert_to_tensor(np.random.rand(num_boxes, 4))
     expected_shape = np.array([num_boxes, 4])
     xywh_box = box_ops.yxyx_to_xcycwh(boxes)
     yxyx_box = box_ops.xcycwh_to_yxyx(boxes)
     self.assertAllEqual(tf.shape(xywh_box).numpy(), expected_shape)
     self.assertAllEqual(tf.shape(yxyx_box).numpy(), expected_shape)
Пример #2
0
  def parse_prediction_path(self, key, inputs):
    shape_ = tf.shape(inputs)
    shape = inputs.get_shape().as_list()
    batchsize, height, width = shape_[0], shape[1], shape[2]

    if height is None or width is None:
      height, width = shape_[1], shape_[2]

    generator = self._generator[key]
    len_mask = self._len_mask[key]
    scale_xy = self._scale_xy[key]

    # reshape the yolo output to (batchsize,
    #                             width,
    #                             height,
    #                             number_anchors,
    #                             remaining_points)
    data = tf.reshape(inputs, [-1, height, width, len_mask, self._classes + 5])

    # use the grid generator to get the formatted anchor boxes and grid points
    # in shape [1, height, width, 2]
    centers, anchors = generator(height, width, batchsize, dtype=data.dtype)

    # split the yolo detections into boxes, object score map, classes
    boxes, obns_scores, class_scores = tf.split(
        data, [4, 1, self._classes], axis=-1)

    # determine the number of classes
    classes = class_scores.get_shape().as_list()[-1]

    # configurable to use the new coordinates in scaled Yolo v4 or not
    _, _, boxes = loss_utils.get_predicted_box(
        tf.cast(height, data.dtype),
        tf.cast(width, data.dtype),
        boxes,
        anchors,
        centers,
        scale_xy,
        stride=self._path_scale[key],
        darknet=False,
        box_type=self._box_type[key])

    # convert boxes from yolo(x, y, w. h) to tensorflow(ymin, xmin, ymax, xmax)
    boxes = box_ops.xcycwh_to_yxyx(boxes)

    # activate and detection map
    obns_scores = tf.math.sigmoid(obns_scores)

    # convert detection map to class detection probabailities
    class_scores = tf.math.sigmoid(class_scores) * obns_scores

    # platten predictions to [batchsize, N, -1] for non max supression
    fill = height * width * len_mask
    boxes = tf.reshape(boxes, [-1, fill, 4])
    class_scores = tf.reshape(class_scores, [-1, fill, classes])
    obns_scores = tf.reshape(obns_scores, [-1, fill])
    return obns_scores, boxes, class_scores
Пример #3
0
def fit_preserve_aspect_ratio(image,
                              boxes,
                              width=None,
                              height=None,
                              target_dim=None):
    """Resizes the image while peserving the image aspect ratio.

  Args:
      image: a `Tensor` representing the image.
      boxes: a `Tensor` representing the boxes.
      width: int for the image width.
      height: int for the image height.
      target_dim: list or a Tensor of height and width.
  Returns:
      image: a `Tensor` representing the image.
      box: a `Tensor` representing the boxes.
  """
    if width is None or height is None:
        shape = tf.shape(image)
        if tf.shape(shape)[0] == 4:
            width = shape[1]
            height = shape[2]
        else:
            width = shape[0]
            height = shape[1]

    clipper = tf.math.maximum(width, height)
    if target_dim is None:
        target_dim = clipper

    pad_width = clipper - width
    pad_height = clipper - height
    image = tf.image.pad_to_bounding_box(image, pad_width // 2,
                                         pad_height // 2, clipper, clipper)

    boxes = box_ops.yxyx_to_xcycwh(boxes)
    x, y, w, h = tf.split(boxes, 4, axis=-1)

    y *= tf.cast(width / clipper, tf.float32)
    x *= tf.cast(height / clipper, tf.float32)

    y += tf.cast((pad_width / clipper) / 2, tf.float32)
    x += tf.cast((pad_height / clipper) / 2, tf.float32)

    h *= tf.cast(width / clipper, tf.float32)
    w *= tf.cast(height / clipper, tf.float32)

    boxes = tf.concat([x, y, w, h], axis=-1)

    boxes = box_ops.xcycwh_to_yxyx(boxes)
    image = tf.image.resize(image, (target_dim, target_dim))
    return image, boxes
Пример #4
0
    def testResizeImageBoxes(self, bbox, bbox_result, height, width,
                             target_dim, preserve_aspect_ratio):
        image = tf.random.uniform((height, width, 3))
        bbox = [
            bbox[0] * width, bbox[1] * height, bbox[2] * width,
            bbox[3] * height
        ]
        bbox = box_ops.xcycwh_to_yxyx(bbox)
        new_image, new_bbox = yolo_ops.resize_image_and_bboxes(
            image=image,
            bboxes=bbox,
            target_size=target_dim,
            preserve_aspect_ratio=preserve_aspect_ratio)

        self.assertAllClose(new_bbox, bbox_result)
        self.assertAllEqual(new_image.shape[:2], target_dim)
Пример #5
0
    def serve(self, images):
        """Cast image to float and run inference.

    Args:
      images: uint8 Tensor of shape [batch_size, None, None, 3]
    Returns:
      Tensor holding classification output logits.
    """
        with tf.device('cpu:0'):
            images = tf.cast(images, dtype=tf.float32)

            images = tf.nest.map_structure(
                tf.identity,
                tf.map_fn(self._build_inputs,
                          elems=images,
                          fn_output_signature=tf.TensorSpec(
                              shape=self._input_image_size + [3],
                              dtype=tf.float32),
                          parallel_iterations=32))

        outputs = self.inference_step(
            images)  # tf.keras.Model's __call__ method

        num_classes = outputs['predictions']['0'].shape[-1] - 5
        bbox_tensors, _, prob_tensors = yolo_ops.concat_tensor_dict(
            tensor_dict=outputs['predictions'], num_classes=num_classes)

        boxes = tf.concat(bbox_tensors, axis=1)
        boxes = tf.squeeze(yolo_box_ops.xcycwh_to_yxyx(boxes))
        scores = tf.concat(prob_tensors, axis=1)
        scores = tf.squeeze(tf.math.reduce_max(scores, axis=-1))
        classes = tf.argmax(prob_tensors, axis=-1)

        indices = tf.image.non_max_suppression(boxes=boxes,
                                               scores=scores,
                                               max_output_size=20,
                                               iou_threshold=0.5,
                                               score_threshold=0.25)

        boxes = tf.expand_dims(tf.gather(boxes, indices), axis=0)
        boxes = box_ops.normalize_boxes(boxes, self._input_image_size)
        scores = tf.expand_dims(tf.gather(scores, indices), axis=0)
        classes = tf.gather(classes, indices, axis=1)

        return {'boxes': boxes, 'classes': classes, 'scores': scores}
Пример #6
0
    def _decode_boxes(self, parsed_tensors):
        """Concat box coordinates in the format of [x, y, width, height]."""
        x = parsed_tensors['bbox/x']
        y = parsed_tensors['bbox/y']
        w = parsed_tensors['bbox/w']
        h = parsed_tensors['bbox/h']

        if not self.is_bbox_in_pixels:
            x = x * tf.cast(parsed_tensors['image/width'], tf.float32)
            y = y * tf.cast(parsed_tensors['image/height'], tf.float32)
            w = w * tf.cast(parsed_tensors['image/width'], tf.float32)
            h = h * tf.cast(parsed_tensors['image/height'], tf.float32)

        bbox = tf.stack([x, y, w, h], axis=-1)
        if self.is_xywh:
            bbox = yolo_box_ops.xcycwh_to_yxyx(bbox)

        return bbox
Пример #7
0
    def parse_prediction_path(self, key, inputs):
        shape = inputs.get_shape().as_list()
        height, width = shape[1], shape[2]

        len_mask = self._len_mask[key]

        # reshape the yolo output to (batchsize,
        #                             width,
        #                             height,
        #                             number_anchors,
        #                             remaining_points)

        data = tf.reshape(inputs,
                          [-1, height, width, len_mask, self._classes + 5])

        # split the yolo detections into boxes, object score map, classes
        boxes, obns_scores, class_scores = tf.split(data,
                                                    [4, 1, self._classes],
                                                    axis=-1)

        # determine the number of classes
        classes = class_scores.get_shape().as_list()[-1]

        # convert boxes from yolo(x, y, w. h) to tensorflow(ymin, xmin, ymax, xmax)
        boxes = box_ops.xcycwh_to_yxyx(boxes)

        # activate and detection map
        obns_scores = tf.math.sigmoid(obns_scores)

        # threshold the detection map
        obns_mask = tf.cast(obns_scores > self._thresh, obns_scores.dtype)

        # convert detection map to class detection probabailities
        class_scores = tf.math.sigmoid(class_scores) * obns_mask * obns_scores
        class_scores *= tf.cast(class_scores > self._thresh,
                                class_scores.dtype)

        fill = height * width * len_mask
        # platten predictions to [batchsize, N, -1] for non max supression
        boxes = tf.reshape(boxes, [-1, fill, 4])
        class_scores = tf.reshape(class_scores, [-1, fill, classes])
        obns_scores = tf.reshape(obns_scores, [-1, fill])

        return obns_scores, boxes, class_scores
Пример #8
0
    def serve(self, images: tf.Tensor) -> Mapping[str, tf.Tensor]:
        """Cast image to float and run inference.

    Args:
      images: uint8 Tensor of shape [batch_size, None, None, 3]
    Returns:
      Tensor holding classification output logits.
    """
        # Removing nest.map_structure, as it adds a while node that is not static
        if images.shape[0] > 1:
            with tf.device('cpu:0'):
                images = tf.cast(images, dtype=tf.float32)

                images = tf.nest.map_structure(
                    tf.identity,
                    tf.map_fn(self._build_inputs,
                              elems=images,
                              fn_output_signature=tf.TensorSpec(
                                  shape=self._input_image_size + [3],
                                  dtype=tf.float32),
                              parallel_iterations=32))
        else:
            images = tf.cast(images, dtype=tf.float32)
            images = tf.squeeze(images)
            images = self._build_inputs(images)
            images = tf.expand_dims(images, axis=0)

        outputs = self.inference_step(images)
        processed_outputs = {}

        for name, output in outputs.items():

            if 'classification' in name:
                if self._argmax_outputs:
                    output = tf.math.argmax(output, -1)
                else:
                    output = tf.nn.softmax(output)
                processed_outputs[name] = output

            elif 'segmentation' in name:
                num_classes = output.shape[-1]

                if self._class_present_outputs:
                    flattened_output = tf.math.argmax(
                        tf.reshape(output, [-1, num_classes]), -1)
                    one_hotted = tf.one_hot(flattened_output, 19, axis=0)
                    class_counts = tf.reduce_sum(one_hotted, axis=-1)
                    processed_outputs[name + '_class_count'] = class_counts

                output = tf.image.resize(output,
                                         self._input_image_size,
                                         method='bilinear')

                if self._argmax_outputs:
                    output = tf.math.argmax(output, -1)
                processed_outputs[name] = output

                if self._visualise_outputs and len(output.shape) == 3:
                    colormap = get_colormap(cmap_type='cityscapes_int')
                    processed_outputs[name + '_visualised'] = tf.gather(
                        colormap, tf.cast(tf.squeeze(output), tf.int32))

            elif 'yolo' in name:
                num_classes = output['predictions']['0'].shape[-1] - 5
                bbox_tensors, _, prob_tensors = yolo_ops.concat_tensor_dict(
                    tensor_dict=output['predictions'], num_classes=num_classes)

                boxes = tf.concat(bbox_tensors, axis=1)
                boxes = tf.squeeze(yolo_box_ops.xcycwh_to_yxyx(boxes))
                scores = tf.concat(prob_tensors, axis=1)
                scores = tf.squeeze(tf.math.reduce_max(scores, axis=-1))
                classes = tf.squeeze(tf.math.argmax(prob_tensors, axis=-1))

                indices = tf.image.non_max_suppression(boxes=boxes,
                                                       scores=scores,
                                                       max_output_size=20,
                                                       iou_threshold=0.25,
                                                       score_threshold=0.25)

                boxes = tf.gather(boxes, indices)
                scores = tf.gather(scores, indices)
                classes = tf.gather(classes, indices)

                processed_outputs[name + 'boxes'] = boxes
                processed_outputs[name + 'classes'] = classes
                processed_outputs[name + 'scores'] = scores

            else:
                raise NotImplementedError('Task type %s is not implemented.' + \
                  'Try renaming the task routine.' %name)

        return processed_outputs
Пример #9
0
    def _get_anchor_free(self, key, boxes, classes, height, width, stride,
                         center_radius):
        """Find the box assignements in an anchor free paradigm."""
        level_limits = self.anchor_free_level_limits[key]
        gen = loss_utils.GridGenerator(anchors=[[1, 1]], scale_anchors=stride)
        grid_points = gen(width, height, 1, boxes.dtype)[0]
        grid_points = tf.squeeze(grid_points, axis=0)
        box_list = boxes
        class_list = classes

        grid_points = (grid_points + 0.5) * stride
        x_centers, y_centers = grid_points[..., 0], grid_points[..., 1]
        boxes *= (tf.convert_to_tensor([width, height, width, height]) *
                  stride)

        tlbr_boxes = box_ops.xcycwh_to_yxyx(boxes)

        boxes = tf.reshape(boxes, [1, 1, -1, 4])
        tlbr_boxes = tf.reshape(tlbr_boxes, [1, 1, -1, 4])
        if self.use_tie_breaker:
            area = tf.reduce_prod(boxes[..., 2:], axis=-1)

        # check if the box is in the receptive feild of the this fpn level
        b_t = y_centers - tlbr_boxes[..., 0]
        b_l = x_centers - tlbr_boxes[..., 1]
        b_b = tlbr_boxes[..., 2] - y_centers
        b_r = tlbr_boxes[..., 3] - x_centers
        box_delta = tf.stack([b_t, b_l, b_b, b_r], axis=-1)
        if level_limits is not None:
            max_reg_targets_per_im = tf.reduce_max(box_delta, axis=-1)
            gt_min = max_reg_targets_per_im >= level_limits[0]
            gt_max = max_reg_targets_per_im <= level_limits[1]
            is_in_boxes = tf.logical_and(gt_min, gt_max)
        else:
            is_in_boxes = tf.reduce_min(box_delta, axis=-1) > 0.0
        is_in_boxes_all = tf.reduce_any(is_in_boxes,
                                        axis=(0, 1),
                                        keepdims=True)

        # check if the center is in the receptive feild of the this fpn level
        c_t = y_centers - (boxes[..., 1] - center_radius * stride)
        c_l = x_centers - (boxes[..., 0] - center_radius * stride)
        c_b = (boxes[..., 1] + center_radius * stride) - y_centers
        c_r = (boxes[..., 0] + center_radius * stride) - x_centers
        centers_delta = tf.stack([c_t, c_l, c_b, c_r], axis=-1)
        is_in_centers = tf.reduce_min(centers_delta, axis=-1) > 0.0
        is_in_centers_all = tf.reduce_any(is_in_centers,
                                          axis=(0, 1),
                                          keepdims=True)

        # colate all masks to get the final locations
        is_in_index = tf.logical_or(is_in_boxes_all, is_in_centers_all)
        is_in_boxes_and_center = tf.logical_and(is_in_boxes, is_in_centers)
        is_in_boxes_and_center = tf.logical_and(is_in_index,
                                                is_in_boxes_and_center)

        if self.use_tie_breaker:
            boxes_all = tf.cast(is_in_boxes_and_center, area.dtype)
            boxes_all = ((boxes_all * area) + ((1 - boxes_all) * INF))
            boxes_min = tf.reduce_min(boxes_all, axis=-1, keepdims=True)
            boxes_min = tf.where(boxes_min == INF, -1.0, boxes_min)
            is_in_boxes_and_center = boxes_all == boxes_min

        # construct the index update grid
        reps = tf.reduce_sum(tf.cast(is_in_boxes_and_center, tf.int16),
                             axis=-1)
        indexes = tf.cast(tf.where(is_in_boxes_and_center), tf.int32)
        y, x, t = tf.split(indexes, 3, axis=-1)

        boxes = tf.gather_nd(box_list, t)
        classes = tf.cast(tf.gather_nd(class_list, t), boxes.dtype)
        reps = tf.gather_nd(reps, tf.concat([y, x], axis=-1))
        reps = tf.cast(tf.expand_dims(reps, axis=-1), boxes.dtype)
        classes = tf.cast(tf.expand_dims(classes, axis=-1), boxes.dtype)
        conf = tf.ones_like(classes)

        # return the samples and the indexes
        samples = tf.concat([boxes, conf, classes], axis=-1)
        indexes = tf.concat([y, x, tf.zeros_like(t)], axis=-1)
        return indexes, samples
Пример #10
0
def get_best_anchor(y_true,
                    anchors,
                    stride,
                    width=1,
                    height=1,
                    iou_thresh=0.25,
                    best_match_only=False,
                    use_tie_breaker=True):
    """Get the correct anchor that is assoiciated with each box using IOU.

  Args:
    y_true: tf.Tensor[] for the list of bounding boxes in the yolo format.
    anchors: list or tensor for the anchor boxes to be used in prediction found
      via Kmeans.
    stride: `int` stride for the anchors.
    width: int for the image width.
    height: int for the image height.
    iou_thresh: `float` the minimum iou threshold to use for selecting boxes for
      each level.
    best_match_only: `bool` if the box only has one match and it is less than
      the iou threshold, when set to True, this match will be dropped as no
      anchors can be linked to it.
    use_tie_breaker: `bool` if there is many anchors for a given box, then
      attempt to use all of them, if False, only the first matching box will be
      used.
  Returns:
    tf.Tensor: y_true with the anchor associated with each ground truth box
      known
  """
    with tf.name_scope('get_best_anchor'):
        width = tf.cast(width, dtype=tf.float32)
        height = tf.cast(height, dtype=tf.float32)
        scaler = tf.convert_to_tensor([width, height])

        # scale to levels houts width and height
        true_wh = tf.cast(y_true[..., 2:4], dtype=tf.float32) * scaler

        # scale down from large anchor to small anchor type
        anchors = tf.cast(anchors, dtype=tf.float32) / stride

        k = tf.shape(anchors)[0]

        anchors = tf.concat([tf.zeros_like(anchors), anchors], axis=-1)
        truth_comp = tf.concat([tf.zeros_like(true_wh), true_wh], axis=-1)

        if iou_thresh >= 1.0:
            anchors = tf.expand_dims(anchors, axis=-2)
            truth_comp = tf.expand_dims(truth_comp, axis=-3)

            aspect = truth_comp[..., 2:4] / anchors[..., 2:4]
            aspect = tf.where(tf.math.is_nan(aspect), tf.zeros_like(aspect),
                              aspect)
            aspect = tf.maximum(aspect, 1 / aspect)
            aspect = tf.where(tf.math.is_nan(aspect), tf.zeros_like(aspect),
                              aspect)
            aspect = tf.reduce_max(aspect, axis=-1)

            values, indexes = tf.math.top_k(tf.transpose(-aspect, perm=[1, 0]),
                                            k=tf.cast(k, dtype=tf.int32),
                                            sorted=True)
            values = -values
            ind_mask = tf.cast(values < iou_thresh, dtype=indexes.dtype)
        else:
            truth_comp = box_ops.xcycwh_to_yxyx(truth_comp)
            anchors = box_ops.xcycwh_to_yxyx(anchors)
            iou_raw = box_ops.aggregated_comparitive_iou(
                truth_comp,
                anchors,
                iou_type=3,
            )
            values, indexes = tf.math.top_k(iou_raw,
                                            k=tf.cast(k, dtype=tf.int32),
                                            sorted=True)
            ind_mask = tf.cast(values >= iou_thresh, dtype=indexes.dtype)

        # pad the indexs such that all values less than the thresh are -1
        # add one, multiply the mask to zeros all the bad locations
        # subtract 1 makeing all the bad locations 0.
        if best_match_only:
            iou_index = ((indexes[..., 0:] + 1) * ind_mask[..., 0:]) - 1
        elif use_tie_breaker:
            iou_index = tf.concat([
                tf.expand_dims(indexes[..., 0], axis=-1),
                ((indexes[..., 1:] + 1) * ind_mask[..., 1:]) - 1
            ],
                                  axis=-1)
        else:
            iou_index = tf.concat([
                tf.expand_dims(indexes[..., 0], axis=-1),
                tf.zeros_like(indexes[..., 1:]) - 1
            ],
                                  axis=-1)

    return tf.cast(iou_index, dtype=tf.float32), tf.cast(values,
                                                         dtype=tf.float32)