示例#1
0
def compute_giou(box1, box2, yxyx=False):
    """Calculates the General intersection over union between box1 and box2.

  Args:
    box1: any `Tensor` whose last dimension is 4 representing the coordinates of
      boxes.
    box2: any `Tensor` whose last dimension is 4 representing the coordinates of
      boxes.
    yxyx: a `bool` indicating whether the input box is of the format x_center
      y_center, width, height or y_min, x_min, y_max, x_max.

  Returns:
    giou: a `Tensor` who represents the General intersection over union.
  """
    with tf.name_scope('giou'):
        if not yxyx:
            yxyx1 = xcycwh_to_yxyx(box1)
            yxyx2 = xcycwh_to_yxyx(box2)
        else:
            yxyx1, yxyx2 = box1, box2

        cmi, cma, _ = smallest_encompassing_box(yxyx1, yxyx2, yxyx=True)
        intersection, union = intersect_and_union(yxyx1, yxyx2, yxyx=True)
        iou = math_ops.divide_no_nan(intersection, union)

        bcwh = cma - cmi
        c = tf.math.reduce_prod(bcwh, axis=-1)

        regularization = math_ops.divide_no_nan((c - union), c)
        giou = iou - regularization
    return iou, giou
示例#2
0
def compute_ciou(box1, box2, yxyx=False, darknet=False):
    """Calculates the complete intersection over union between box1 and box2.

  Args:
    box1: any `Tensor` whose last dimension is 4 representing the coordinates of
      boxes.
    box2: any `Tensor` whose last dimension is 4 representing the coordinates of
      boxes.
    yxyx: a `bool` indicating whether the input box is of the format x_center
      y_center, width, height or y_min, x_min, y_max, x_max.
    darknet: a `bool` indicating whether the calling function is the YOLO
      darknet loss.

  Returns:
    ciou: a `Tensor` who represents the complete intersection over union.
  """
    with tf.name_scope('ciou'):
        if not yxyx:
            xycc1, xycc2 = box1, box2
            yxyx1 = xcycwh_to_yxyx(box1)
            yxyx2 = xcycwh_to_yxyx(box2)
        else:
            yxyx1, yxyx2 = box1, box2
            xycc1 = yxyx_to_xcycwh(box1)
            xycc2 = yxyx_to_xcycwh(box2)

        # Build the smallest encomapssing box.
        cmi, cma, _ = smallest_encompassing_box(yxyx1, yxyx2, yxyx=True)
        intersection, union = intersect_and_union(yxyx1, yxyx2, yxyx=True)
        iou = math_ops.divide_no_nan(intersection, union)

        b1xy, b1w, b1h = tf.split(xycc1, [2, 1, 1], axis=-1)
        b2xy, b2w, b2h = tf.split(xycc2, [2, 1, 1], axis=-1)
        bchw = cma - cmi

        # Center regularization
        center_dist = tf.reduce_sum((b1xy - b2xy)**2, axis=-1)
        c_diag = tf.reduce_sum(bchw**2, axis=-1)
        regularization = math_ops.divide_no_nan(center_dist, c_diag)

        # Computer aspect ratio consistency
        terma = math_ops.divide_no_nan(b1w, b1h)  # gt
        termb = math_ops.divide_no_nan(b2w, b2h)  # pred
        arcterm = tf.squeeze(tf.math.pow(
            tf.math.atan(termb) - tf.math.atan(terma), 2),
                             axis=-1)
        v = (4 / math.pi**2) * arcterm

        # Compute the aspect ratio weight, should be treated as a constant
        a = tf.stop_gradient(math_ops.divide_no_nan(v, 1 - iou + v))

        if darknet:
            grad_scale = tf.stop_gradient(tf.square(b2w) + tf.square(b2h))
            v *= tf.squeeze(grad_scale, axis=-1)

        ciou = iou - regularization - (v * a)
    return iou, ciou
示例#3
0
def compute_diou(box1, box2, beta=1.0, yxyx=False):
    """Calculates the distance intersection over union between box1 and box2.

  Args:
    box1: any `Tensor` whose last dimension is 4 representing the coordinates of
      boxes.
    box2: any `Tensor` whose last dimension is 4 representing the coordinates of
      boxes.
    beta: a `float` indicating the amount to scale the distance iou
      regularization term.
    yxyx: a `bool` indicating whether the input box is of the format x_center
      y_center, width, height or y_min, x_min, y_max, x_max.

  Returns:
    diou: a `Tensor` who represents the distance intersection over union.
  """
    with tf.name_scope('diou'):
        # compute center distance
        if not yxyx:
            xycc1, xycc2 = box1, box2
            yxyx1 = xcycwh_to_yxyx(box1)
            yxyx2 = xcycwh_to_yxyx(box2)
        else:
            yxyx1, yxyx2 = box1, box2
            xycc1 = yxyx_to_xcycwh(box1)
            xycc2 = yxyx_to_xcycwh(box2)

        cmi, cma, _ = smallest_encompassing_box(yxyx1, yxyx2, yxyx=True)
        intersection, union = intersect_and_union(yxyx1, yxyx2, yxyx=True)
        iou = math_ops.divide_no_nan(intersection, union)

        b1xy, _ = tf.split(xycc1, 2, axis=-1)
        b2xy, _ = tf.split(xycc2, 2, axis=-1)
        bcwh = cma - cmi

        center_dist = tf.reduce_sum((b1xy - b2xy)**2, axis=-1)
        c_diag = tf.reduce_sum(bcwh**2, axis=-1)

        regularization = math_ops.divide_no_nan(center_dist, c_diag)
        diou = iou - regularization**beta
    return iou, diou
示例#4
0
def compute_iou(box1, box2, yxyx=False):
    """Calculates the intersection over union between box1 and box2.

  Args:
    box1: any `Tensor` whose last dimension is 4 representing the coordinates of
      boxes.
    box2: any `Tensor` whose last dimension is 4 representing the coordinates of
      boxes.
    yxyx: a `bool` indicating whether the input box is of the format x_center
      y_center, width, height or y_min, x_min, y_max, x_max.

  Returns:
    iou: a `Tensor` who represents the intersection over union.
  """
    with tf.name_scope('iou'):
        intersection, union = intersect_and_union(box1, box2, yxyx=yxyx)
        iou = math_ops.divide_no_nan(intersection, union)
    return iou
示例#5
0
def average_iou(iou):
    """Computes the average intersection over union without counting locations.

  where the iou is zero.

  Args:
    iou: A `Tensor` representing the iou values.

  Returns:
    tf.stop_gradient(avg_iou): A `Tensor` representing average
     intersection over union.
  """
    iou_sum = tf.reduce_sum(iou, axis=tf.range(1, tf.shape(tf.shape(iou))[0]))
    counts = tf.cast(
        tf.math.count_nonzero(iou,
                              axis=tf.range(1,
                                            tf.shape(tf.shape(iou))[0])),
        iou.dtype)
    avg_iou = tf.reduce_mean(math_ops.divide_no_nan(iou_sum, counts))
    return tf.stop_gradient(avg_iou)
示例#6
0
    def _compute_loss(self, true_counts, inds, y_true, boxes, classes, y_pred):
        """Per FPN path loss logic for Yolov4-csp, Yolov4-Large, and Yolov5."""
        # Generate shape constants.
        shape = tf.shape(true_counts)
        batch_size, width, height, num = shape[0], shape[1], shape[2], shape[3]
        fwidth = tf.cast(width, tf.float32)
        fheight = tf.cast(height, tf.float32)

        # Cast all input compontnts to float32 and stop gradient to save memory.
        y_true = tf.cast(y_true, tf.float32)
        true_counts = tf.cast(true_counts, tf.float32)
        true_conf = tf.clip_by_value(true_counts, 0.0, 1.0)
        grid_points, anchor_grid = self._anchor_generator(width,
                                                          height,
                                                          batch_size,
                                                          dtype=tf.float32)

        # Split the y_true list.
        (true_box, ind_mask, true_class) = tf.split(y_true, [4, 1, 1], axis=-1)
        grid_mask = true_conf = tf.squeeze(true_conf, axis=-1)
        true_class = tf.squeeze(true_class, axis=-1)
        num_objs = tf.cast(tf.reduce_sum(ind_mask), dtype=y_pred.dtype)

        # Split up the predicitons.
        y_pred = tf.cast(
            tf.reshape(y_pred, [batch_size, width, height, num, -1]),
            tf.float32)
        pred_box, pred_conf, pred_class = tf.split(y_pred, [4, 1, -1], axis=-1)

        # Decode the boxes for loss compute.
        scale, pred_box, pbg = self._decode_boxes(fwidth,
                                                  fheight,
                                                  pred_box,
                                                  anchor_grid,
                                                  grid_points,
                                                  darknet=False)

        # If the ignore threshold is enabled, search all boxes ignore all
        # IOU valeus larger than the ignore threshold that are not in the
        # noted ground truth list.
        if self._ignore_thresh != 0.0:
            (_, obj_mask) = self._tiled_global_box_search(
                pbg,
                tf.stop_gradient(tf.sigmoid(pred_class)),
                boxes,
                classes,
                true_conf,
                smoothed=False,
                scale=None)

        # Scale and shift and select the ground truth boxes
        # and predictions to the prediciton domain.
        if self._box_type == 'anchor_free':
            true_box = loss_utils.apply_mask(
                ind_mask, (scale * self._path_stride * true_box))
        else:
            offset = tf.cast(tf.gather_nd(grid_points, inds, batch_dims=1),
                             true_box.dtype)
            offset = tf.concat([offset, tf.zeros_like(offset)], axis=-1)
            true_box = loss_utils.apply_mask(ind_mask,
                                             (scale * true_box) - offset)
        pred_box = loss_utils.apply_mask(
            ind_mask, tf.gather_nd(pred_box, inds, batch_dims=1))

        # Select the correct/used prediction classes.
        true_class = tf.one_hot(tf.cast(true_class, tf.int32),
                                depth=tf.shape(pred_class)[-1],
                                dtype=pred_class.dtype)
        true_class = loss_utils.apply_mask(ind_mask, true_class)
        pred_class = loss_utils.apply_mask(
            ind_mask, tf.gather_nd(pred_class, inds, batch_dims=1))

        # Compute the box loss.
        _, iou, box_loss = self.box_loss(true_box, pred_box, darknet=False)
        box_loss = loss_utils.apply_mask(tf.squeeze(ind_mask, axis=-1),
                                         box_loss)
        box_loss = math_ops.divide_no_nan(tf.reduce_sum(box_loss), num_objs)

        # Use the box IOU to build the map for confidence loss computation.
        iou = tf.maximum(tf.stop_gradient(iou), 0.0)
        smoothed_iou = ((
            (1 - self._objectness_smooth) * tf.cast(ind_mask, iou.dtype)) +
                        self._objectness_smooth * tf.expand_dims(iou, axis=-1))
        smoothed_iou = loss_utils.apply_mask(ind_mask, smoothed_iou)
        true_conf = loss_utils.build_grid(inds,
                                          smoothed_iou,
                                          pred_conf,
                                          ind_mask,
                                          update=self._update_on_repeat)
        true_conf = tf.squeeze(true_conf, axis=-1)

        # Compute the cross entropy loss for the confidence map.
        bce = tf.keras.losses.binary_crossentropy(tf.expand_dims(true_conf,
                                                                 axis=-1),
                                                  pred_conf,
                                                  from_logits=True)
        if self._ignore_thresh != 0.0:
            bce = loss_utils.apply_mask(obj_mask, bce)
            conf_loss = tf.reduce_sum(bce) / tf.reduce_sum(obj_mask)
        else:
            conf_loss = tf.reduce_mean(bce)

        # Compute the cross entropy loss for the class maps.
        class_loss = tf.keras.losses.binary_crossentropy(
            true_class,
            pred_class,
            label_smoothing=self._label_smoothing,
            from_logits=True)
        class_loss = loss_utils.apply_mask(tf.squeeze(ind_mask, axis=-1),
                                           class_loss)
        class_loss = math_ops.divide_no_nan(tf.reduce_sum(class_loss),
                                            num_objs)

        # Apply the weights to each loss.
        box_loss *= self._iou_normalizer
        class_loss *= self._cls_normalizer
        conf_loss *= self._obj_normalizer

        # Add all the losses together then take the sum over the batches.
        mean_loss = box_loss + class_loss + conf_loss
        loss = mean_loss * tf.cast(batch_size, mean_loss.dtype)

        return (loss, box_loss, conf_loss, class_loss, mean_loss, iou,
                pred_conf, ind_mask, grid_mask)
示例#7
0
    def _compute_loss(self, true_counts, inds, y_true, boxes, classes, y_pred):
        """Per FPN path loss logic used for Yolov3, Yolov4, and Yolo-Tiny."""
        if self._box_type == 'scaled':
            # Darknet Model Propagates a sigmoid once in back prop so we replicate
            # that behaviour
            y_pred = grad_sigmoid(y_pred)

        # Generate and store constants and format output.
        shape = tf.shape(true_counts)
        batch_size, width, height, num = shape[0], shape[1], shape[2], shape[3]
        fwidth = tf.cast(width, tf.float32)
        fheight = tf.cast(height, tf.float32)
        grid_points, anchor_grid = self._anchor_generator(width,
                                                          height,
                                                          batch_size,
                                                          dtype=tf.float32)

        # Cast all input compontnts to float32 and stop gradient to save memory.
        boxes = tf.stop_gradient(tf.cast(boxes, tf.float32))
        classes = tf.stop_gradient(tf.cast(classes, tf.float32))
        y_true = tf.stop_gradient(tf.cast(y_true, tf.float32))
        true_counts = tf.stop_gradient(tf.cast(true_counts, tf.float32))
        true_conf = tf.stop_gradient(tf.clip_by_value(true_counts, 0.0, 1.0))
        grid_points = tf.stop_gradient(grid_points)
        anchor_grid = tf.stop_gradient(anchor_grid)

        # Split all the ground truths to use as seperate items in loss computation.
        (true_box, ind_mask, true_class) = tf.split(y_true, [4, 1, 1], axis=-1)
        true_conf = tf.squeeze(true_conf, axis=-1)
        true_class = tf.squeeze(true_class, axis=-1)
        grid_mask = true_conf

        # Splits all predictions.
        y_pred = tf.cast(
            tf.reshape(y_pred, [batch_size, width, height, num, -1]),
            tf.float32)
        pred_box, pred_conf, pred_class = tf.split(y_pred, [4, 1, -1], axis=-1)

        # Decode the boxes to be used for loss compute.
        _, _, pred_box = self._decode_boxes(fwidth,
                                            fheight,
                                            pred_box,
                                            anchor_grid,
                                            grid_points,
                                            darknet=True)

        # If the ignore threshold is enabled, search all boxes ignore all
        # IOU valeus larger than the ignore threshold that are not in the
        # noted ground truth list.
        if self._ignore_thresh != 0.0:
            (true_conf, obj_mask) = self._tiled_global_box_search(
                pred_box,
                tf.stop_gradient(tf.sigmoid(pred_class)),
                boxes,
                classes,
                true_conf,
                smoothed=self._objectness_smooth > 0)

        # Build the one hot class list that are used for class loss.
        true_class = tf.one_hot(tf.cast(true_class, tf.int32),
                                depth=tf.shape(pred_class)[-1],
                                dtype=pred_class.dtype)
        true_classes = tf.stop_gradient(
            loss_utils.apply_mask(ind_mask, true_class))

        # Reorganize the one hot class list as a grid.
        true_class = loss_utils.build_grid(inds,
                                           true_classes,
                                           pred_class,
                                           ind_mask,
                                           update=False)
        true_class = tf.stop_gradient(true_class)

        # Use the class mask to find the number of objects located in
        # each predicted grid cell/pixel.
        counts = true_class
        counts = tf.reduce_sum(counts, axis=-1, keepdims=True)
        reps = tf.gather_nd(counts, inds, batch_dims=1)
        reps = tf.squeeze(reps, axis=-1)
        reps = tf.stop_gradient(tf.where(reps == 0.0, tf.ones_like(reps),
                                         reps))

        # Compute the loss for only the cells in which the boxes are located.
        pred_box = loss_utils.apply_mask(
            ind_mask, tf.gather_nd(pred_box, inds, batch_dims=1))
        iou, _, box_loss = self.box_loss(true_box, pred_box, darknet=True)
        box_loss = loss_utils.apply_mask(tf.squeeze(ind_mask, axis=-1),
                                         box_loss)
        box_loss = math_ops.divide_no_nan(box_loss, reps)
        box_loss = tf.cast(tf.reduce_sum(box_loss, axis=1), dtype=y_pred.dtype)

        # Compute the sigmoid binary cross entropy for the class maps.
        class_loss = tf.reduce_mean(loss_utils.sigmoid_bce(
            tf.expand_dims(true_class, axis=-1),
            tf.expand_dims(pred_class, axis=-1), self._label_smoothing),
                                    axis=-1)

        # Apply normalization to the class losses.
        if self._cls_normalizer < 1.0:
            # Build a mask based on the true class locations.
            cls_norm_mask = true_class
            # Apply the classes weight to class indexes were one_hot is one.
            class_loss *= ((1 - cls_norm_mask) +
                           cls_norm_mask * self._cls_normalizer)

        # Mask to the class loss and compute the sum over all the objects.
        class_loss = tf.reduce_sum(class_loss, axis=-1)
        class_loss = loss_utils.apply_mask(grid_mask, class_loss)
        class_loss = math_ops.rm_nan_inf(class_loss, val=0.0)
        class_loss = tf.cast(tf.reduce_sum(class_loss, axis=(1, 2, 3)),
                             dtype=y_pred.dtype)

        # Compute the sigmoid binary cross entropy for the confidence maps.
        bce = tf.reduce_mean(loss_utils.sigmoid_bce(
            tf.expand_dims(true_conf, axis=-1), pred_conf, 0.0),
                             axis=-1)

        # Mask the confidence loss and take the sum across all the grid cells.
        if self._ignore_thresh != 0.0:
            bce = loss_utils.apply_mask(obj_mask, bce)
        conf_loss = tf.cast(tf.reduce_sum(bce, axis=(1, 2, 3)),
                            dtype=y_pred.dtype)

        # Apply the weights to each loss.
        box_loss *= self._iou_normalizer
        conf_loss *= self._obj_normalizer

        # Add all the losses together then take the mean over the batches.
        loss = box_loss + class_loss + conf_loss
        loss = tf.reduce_mean(loss)

        # Reduce the mean of the losses to use as a metric.
        box_loss = tf.reduce_mean(box_loss)
        conf_loss = tf.reduce_mean(conf_loss)
        class_loss = tf.reduce_mean(class_loss)

        return (loss, box_loss, conf_loss, class_loss, loss, iou, pred_conf,
                ind_mask, grid_mask)