Exemplo n.º 1
0
def build_grid(indexes, truths, preds, ind_mask, update=False, grid=None):
    """This function is used to broadcast elements into the output shape.

  This function is used to broadcasts a list of truths into the correct index
  in the output shape. This is used for the ground truth map construction in
  the scaled loss and the classification map in the darknet loss.

  Args:
    indexes: A `Tensor` for the indexes
    truths: A `Tensor` for the ground truth.
    preds: A `Tensor` for the predictions.
    ind_mask: A `Tensor` for the index masks.
    update: A `bool` for updating the grid.
    grid: A `Tensor` for the grid.

  Returns:
    grid: A `Tensor` representing the augmented grid.
  """
    # this function is used to broadcast all the indexes to the correct
    # into the correct ground truth mask, used for iou detection map
    # in the scaled loss and the classification mask in the darknet loss
    num_flatten = tf.shape(preds)[-1]

    # is there a way to verify that we are not on the CPU?
    ind_mask = tf.cast(ind_mask, indexes.dtype)

    # find all the batch indexes using the cumulated sum of a ones tensor
    # cumsum(ones) - 1 yeild the zero indexed batches
    bhep = tf.reduce_max(tf.ones_like(indexes), axis=-1, keepdims=True)
    bhep = tf.math.cumsum(bhep, axis=0) - 1

    # concatnate the batch sizes to the indexes
    indexes = tf.concat([bhep, indexes], axis=-1)
    indexes = apply_mask(tf.cast(ind_mask, indexes.dtype), indexes)
    indexes = (indexes + (ind_mask - 1))

    # reshape the indexes into the correct shape for the loss,
    # just flatten all indexes but the last
    indexes = tf.reshape(indexes, [-1, 4])

    # also flatten the ground truth value on all axis but the last
    truths = tf.reshape(truths, [-1, num_flatten])

    # build a zero grid in the samve shape as the predicitons
    if grid is None:
        grid = tf.zeros_like(preds)
    # remove invalid values from the truths that may have
    # come up from computation, invalid = nan and inf
    truths = math_ops.rm_nan_inf(truths)

    # scatter update the zero grid
    if update:
        grid = tf.tensor_scatter_nd_update(grid, indexes, truths)
    else:
        grid = tf.tensor_scatter_nd_max(grid, indexes, truths)

    # stop gradient and return to avoid TPU errors and save compute
    # resources
    return grid
Exemplo n.º 2
0
    def delta(unused_dy_scaler, dy_scaled, dy):
        dy_xy, dy_wh = tf.split(dy, 2, axis=-1)
        dy_xy_, dy_wh_ = tf.split(dy_scaled, 2, axis=-1)

        # add all the gradients that may have been applied to the
        # boxes and those that have been applied to the width and height
        dy_wh += dy_wh_
        dy_xy += dy_xy_

        dbox = tf.concat([dy_xy, dy_wh], axis=-1)

        # apply the gradient clipping to xy and wh
        dbox = math_ops.rm_nan_inf(dbox)
        delta = tf.cast(max_delta, dbox.dtype)
        dbox = tf.clip_by_value(dbox, -delta, delta)
        return dbox, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0
Exemplo n.º 3
0
    def delta(unused_dy_scaler, dy_scaled, dy):
        dy_xy, dy_wh = tf.split(dy, 2, axis=-1)
        dy_xy_, dy_wh_ = tf.split(dy_scaled, 2, axis=-1)

        # add all the gradients that may have been applied to the
        # boxes and those that have been applied to the width and height
        dy_wh += dy_wh_
        dy_xy += dy_xy_

        # propagate the exponential applied to the width and height in
        # order to ensure the gradient propagated is of the correct
        # magnitude
        pred_wh = encoded_boxes[..., 2:4]
        dy_wh *= tf.math.exp(pred_wh)

        dbox = tf.concat([dy_xy, dy_wh], axis=-1)

        # apply the gradient clipping to xy and wh
        dbox = math_ops.rm_nan_inf(dbox)
        delta = tf.cast(max_delta, dbox.dtype)
        dbox = tf.clip_by_value(dbox, -delta, delta)
        return dbox, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0
Exemplo n.º 4
0
    def _compute_loss(self, true_counts, inds, y_true, boxes, classes, y_pred):
        """Per FPN path loss logic used for Yolov3, Yolov4, and Yolo-Tiny."""
        if self._box_type == 'scaled':
            # Darknet Model Propagates a sigmoid once in back prop so we replicate
            # that behaviour
            y_pred = grad_sigmoid(y_pred)

        # Generate and store constants and format output.
        shape = tf.shape(true_counts)
        batch_size, width, height, num = shape[0], shape[1], shape[2], shape[3]
        fwidth = tf.cast(width, tf.float32)
        fheight = tf.cast(height, tf.float32)
        grid_points, anchor_grid = self._anchor_generator(width,
                                                          height,
                                                          batch_size,
                                                          dtype=tf.float32)

        # Cast all input compontnts to float32 and stop gradient to save memory.
        boxes = tf.stop_gradient(tf.cast(boxes, tf.float32))
        classes = tf.stop_gradient(tf.cast(classes, tf.float32))
        y_true = tf.stop_gradient(tf.cast(y_true, tf.float32))
        true_counts = tf.stop_gradient(tf.cast(true_counts, tf.float32))
        true_conf = tf.stop_gradient(tf.clip_by_value(true_counts, 0.0, 1.0))
        grid_points = tf.stop_gradient(grid_points)
        anchor_grid = tf.stop_gradient(anchor_grid)

        # Split all the ground truths to use as seperate items in loss computation.
        (true_box, ind_mask, true_class) = tf.split(y_true, [4, 1, 1], axis=-1)
        true_conf = tf.squeeze(true_conf, axis=-1)
        true_class = tf.squeeze(true_class, axis=-1)
        grid_mask = true_conf

        # Splits all predictions.
        y_pred = tf.cast(
            tf.reshape(y_pred, [batch_size, width, height, num, -1]),
            tf.float32)
        pred_box, pred_conf, pred_class = tf.split(y_pred, [4, 1, -1], axis=-1)

        # Decode the boxes to be used for loss compute.
        _, _, pred_box = self._decode_boxes(fwidth,
                                            fheight,
                                            pred_box,
                                            anchor_grid,
                                            grid_points,
                                            darknet=True)

        # If the ignore threshold is enabled, search all boxes ignore all
        # IOU valeus larger than the ignore threshold that are not in the
        # noted ground truth list.
        if self._ignore_thresh != 0.0:
            (true_conf, obj_mask) = self._tiled_global_box_search(
                pred_box,
                tf.stop_gradient(tf.sigmoid(pred_class)),
                boxes,
                classes,
                true_conf,
                smoothed=self._objectness_smooth > 0)

        # Build the one hot class list that are used for class loss.
        true_class = tf.one_hot(tf.cast(true_class, tf.int32),
                                depth=tf.shape(pred_class)[-1],
                                dtype=pred_class.dtype)
        true_classes = tf.stop_gradient(
            loss_utils.apply_mask(ind_mask, true_class))

        # Reorganize the one hot class list as a grid.
        true_class = loss_utils.build_grid(inds,
                                           true_classes,
                                           pred_class,
                                           ind_mask,
                                           update=False)
        true_class = tf.stop_gradient(true_class)

        # Use the class mask to find the number of objects located in
        # each predicted grid cell/pixel.
        counts = true_class
        counts = tf.reduce_sum(counts, axis=-1, keepdims=True)
        reps = tf.gather_nd(counts, inds, batch_dims=1)
        reps = tf.squeeze(reps, axis=-1)
        reps = tf.stop_gradient(tf.where(reps == 0.0, tf.ones_like(reps),
                                         reps))

        # Compute the loss for only the cells in which the boxes are located.
        pred_box = loss_utils.apply_mask(
            ind_mask, tf.gather_nd(pred_box, inds, batch_dims=1))
        iou, _, box_loss = self.box_loss(true_box, pred_box, darknet=True)
        box_loss = loss_utils.apply_mask(tf.squeeze(ind_mask, axis=-1),
                                         box_loss)
        box_loss = math_ops.divide_no_nan(box_loss, reps)
        box_loss = tf.cast(tf.reduce_sum(box_loss, axis=1), dtype=y_pred.dtype)

        # Compute the sigmoid binary cross entropy for the class maps.
        class_loss = tf.reduce_mean(loss_utils.sigmoid_bce(
            tf.expand_dims(true_class, axis=-1),
            tf.expand_dims(pred_class, axis=-1), self._label_smoothing),
                                    axis=-1)

        # Apply normalization to the class losses.
        if self._cls_normalizer < 1.0:
            # Build a mask based on the true class locations.
            cls_norm_mask = true_class
            # Apply the classes weight to class indexes were one_hot is one.
            class_loss *= ((1 - cls_norm_mask) +
                           cls_norm_mask * self._cls_normalizer)

        # Mask to the class loss and compute the sum over all the objects.
        class_loss = tf.reduce_sum(class_loss, axis=-1)
        class_loss = loss_utils.apply_mask(grid_mask, class_loss)
        class_loss = math_ops.rm_nan_inf(class_loss, val=0.0)
        class_loss = tf.cast(tf.reduce_sum(class_loss, axis=(1, 2, 3)),
                             dtype=y_pred.dtype)

        # Compute the sigmoid binary cross entropy for the confidence maps.
        bce = tf.reduce_mean(loss_utils.sigmoid_bce(
            tf.expand_dims(true_conf, axis=-1), pred_conf, 0.0),
                             axis=-1)

        # Mask the confidence loss and take the sum across all the grid cells.
        if self._ignore_thresh != 0.0:
            bce = loss_utils.apply_mask(obj_mask, bce)
        conf_loss = tf.cast(tf.reduce_sum(bce, axis=(1, 2, 3)),
                            dtype=y_pred.dtype)

        # Apply the weights to each loss.
        box_loss *= self._iou_normalizer
        conf_loss *= self._obj_normalizer

        # Add all the losses together then take the mean over the batches.
        loss = box_loss + class_loss + conf_loss
        loss = tf.reduce_mean(loss)

        # Reduce the mean of the losses to use as a metric.
        box_loss = tf.reduce_mean(box_loss)
        conf_loss = tf.reduce_mean(conf_loss)
        class_loss = tf.reduce_mean(class_loss)

        return (loss, box_loss, conf_loss, class_loss, loss, iou, pred_conf,
                ind_mask, grid_mask)