Exemplo n.º 1
0
 def call(self, y_true, box_outputs):
     anchor_boxes = tf.tile(
         self.input_anchors.boxes,
         [box_outputs.shape[0] // self.input_anchors.boxes.shape[0], 1])
     num_positives, box_targets = y_true
     box_outputs = anchors.decode_box_outputs(box_outputs, anchor_boxes)
     box_targets = anchors.decode_box_outputs(box_targets, anchor_boxes)
     normalizer = num_positives * 4.0
     box_iou_loss = iou_utils.iou_loss(box_outputs, box_targets,
                                       self.iou_loss_type)
     box_iou_loss = tf.reduce_sum(box_iou_loss) / normalizer
     return box_iou_loss
Exemplo n.º 2
0
def pre_nms(params, cls_outputs, box_outputs, topk=True):
    """Detection post processing before nms.

  It takes the multi-level class and box predictions from network, merge them
  into unified tensors, and compute boxes, scores, and classes.

  Args:
    params: a dict of parameters.
    cls_outputs: a list of tensors for classes, each tensor denotes a level of
      logits with shape [N, H, W, num_class * num_anchors].
    box_outputs: a list of tensors for boxes, each tensor ddenotes a level of
      boxes with shape [N, H, W, 4 * num_anchors].
    topk: if True, select topk before nms (mainly to speed up nms).

  Returns:
    A tuple of (boxes, scores, classes).
  """
    # get boxes by apply bounding box regression to anchors.
    eval_anchors = anchors.Anchors(params['min_level'], params['max_level'],
                                   params['num_scales'],
                                   params['aspect_ratios'],
                                   params['anchor_scale'],
                                   params['image_size'])

    cls_outputs, box_outputs = merge_class_box_level_outputs(
        params, cls_outputs, box_outputs)

    if topk:
        # select topK purely based on scores before NMS, in order to speed up nms.
        cls_outputs, box_outputs, classes, indices = topk_class_boxes(
            params, cls_outputs, box_outputs)
        anchor_boxes = tf.gather(eval_anchors.boxes, indices)
    else:
        anchor_boxes = eval_anchors.boxes
        classes = None

    boxes = anchors.decode_box_outputs(box_outputs, anchor_boxes)
    # convert logits to scores.
    scores = tf.math.sigmoid(cls_outputs)
    return boxes, scores, classes
Exemplo n.º 3
0
def detection_loss(cls_outputs, box_outputs, labels, params):
    """Computes total detection loss.

  Computes total detection loss including box and class loss from all levels.
  Args:
    cls_outputs: an OrderDict with keys representing levels and values
      representing logits in [batch_size, height, width, num_anchors].
    box_outputs: an OrderDict with keys representing levels and values
      representing box regression targets in [batch_size, height, width,
      num_anchors * 4].
    labels: the dictionary that returned from dataloader that includes
      groundtruth targets.
    params: the dictionary including training parameters specified in
      default_haprams function in this file.

  Returns:
    total_loss: an integer tensor representing total loss reducing from
      class and box losses from all levels.
    cls_loss: an integer tensor representing total class loss.
    box_loss: an integer tensor representing total box regression loss.
    box_iou_loss: an integer tensor representing total box iou loss.
  """
    # Sum all positives in a batch for normalization and avoid zero
    # num_positives_sum, which would lead to inf loss during training
    num_positives_sum = tf.reduce_sum(labels['mean_num_positives']) + 1.0
    positives_momentum = params.get('positives_momentum', None) or 0
    if positives_momentum > 0:
        # normalize the num_positive_examples for training stability.
        moving_normalizer_var = tf.Variable(
            0.0,
            name='moving_normalizer',
            dtype=tf.float32,
            synchronization=tf.VariableSynchronization.ON_READ,
            trainable=False,
            aggregation=tf.VariableAggregation.MEAN)
        num_positives_sum = tf.keras.backend.moving_average_update(
            moving_normalizer_var,
            num_positives_sum,
            momentum=params['positives_momentum'])
    elif positives_momentum < 0:
        num_positives_sum = utils.cross_replica_mean(num_positives_sum)

    levels = cls_outputs.keys()
    cls_losses = []
    box_losses = []
    for level in levels:
        # Onehot encoding for classification labels.
        cls_targets_at_level = tf.one_hot(labels['cls_targets_%d' % level],
                                          params['num_classes'])

        if params['data_format'] == 'channels_first':
            bs, _, width, height, _ = cls_targets_at_level.get_shape().as_list(
            )
            cls_targets_at_level = tf.reshape(cls_targets_at_level,
                                              [bs, -1, width, height])
        else:
            bs, width, height, _, _ = cls_targets_at_level.get_shape().as_list(
            )
            cls_targets_at_level = tf.reshape(cls_targets_at_level,
                                              [bs, width, height, -1])
        box_targets_at_level = labels['box_targets_%d' % level]

        cls_loss = focal_loss(cls_outputs[level],
                              cls_targets_at_level,
                              params['alpha'],
                              params['gamma'],
                              normalizer=num_positives_sum,
                              label_smoothing=params['label_smoothing'])

        if params['data_format'] == 'channels_first':
            cls_loss = tf.reshape(
                cls_loss, [bs, -1, width, height, params['num_classes']])
        else:
            cls_loss = tf.reshape(
                cls_loss, [bs, width, height, -1, params['num_classes']])
        cls_loss *= tf.cast(
            tf.expand_dims(tf.not_equal(labels['cls_targets_%d' % level], -2),
                           -1), tf.float32)
        cls_losses.append(tf.clip_by_value(tf.reduce_sum(cls_loss), 0.0, 2.0))

        if params['box_loss_weight']:
            box_losses.append(
                _box_loss(box_outputs[level],
                          box_targets_at_level,
                          num_positives_sum,
                          delta=params['delta']))

    if params['iou_loss_type']:
        input_anchors = anchors.Anchors(params['min_level'],
                                        params['max_level'],
                                        params['num_scales'],
                                        params['aspect_ratios'],
                                        params['anchor_scale'],
                                        params['image_size'])
        box_output_list = [tf.reshape(box_outputs[i], [-1, 4]) for i in levels]
        box_outputs = tf.concat(box_output_list, axis=0)
        box_target_list = [
            tf.reshape(labels['box_targets_%d' % level], [-1, 4])
            for level in levels
        ]
        box_targets = tf.concat(box_target_list, axis=0)
        anchor_boxes = tf.tile(input_anchors.boxes, [params['batch_size'], 1])
        box_outputs = anchors.decode_box_outputs(box_outputs, anchor_boxes)
        box_targets = anchors.decode_box_outputs(box_targets, anchor_boxes)
        box_iou_loss = _box_iou_loss(box_outputs, box_targets,
                                     num_positives_sum,
                                     params['iou_loss_type'])

    else:
        box_iou_loss = 0

    # Sum per level losses to total loss.
    cls_loss = tf.add_n(cls_losses)
    box_loss = tf.add_n(box_losses) if box_losses else 0

    total_loss = (cls_loss + params['box_loss_weight'] * box_loss +
                  params['iou_loss_weight'] * box_iou_loss)

    return total_loss, cls_loss, box_loss, box_iou_loss
Exemplo n.º 4
0
def detection_loss(cls_outputs, box_outputs, labels, params):
    """Computes total detection loss.

  Computes total detection loss including box and class loss from all levels.
  Args:
    cls_outputs: an OrderDict with keys representing levels and values
      representing logits in [batch_size, height, width, num_anchors].
    box_outputs: an OrderDict with keys representing levels and values
      representing box regression targets in [batch_size, height, width,
      num_anchors * 4].
    labels: the dictionary that returned from dataloader that includes
      groundtruth targets.
    params: the dictionary including training parameters specified in
      default_haprams function in this file.

  Returns:
    total_loss: an integer tensor representing total loss reducing from
      class and box losses from all levels.
    cls_loss: an integer tensor representing total class loss.
    box_loss: an integer tensor representing total box regression loss.
    box_iou_loss: an integer tensor representing total box iou loss.
  """
    # Sum all positives in a batch for normalization and avoid zero
    # num_positives_sum, which would lead to inf loss during training
    num_positives_sum = tf.reduce_sum(labels['mean_num_positives']) + 1.0
    levels = cls_outputs.keys()

    cls_losses = []
    box_losses = []
    sumrule = {}
    if params.get('sumrule'):
        sumrule = params['sumrule']
        # because of cls_targets -= 1 (so that bg class becomes -1, actual class then starts from 0)
        # we need to subtract 1 from sumrule as well.
        _sumrule = {}
        for k, v in sumrule.items():
            _sumrule[k - 1] = [vv - 1 for vv in v]
        sumrule = _sumrule

    def table_lookup(values, old_onehot, cls_targets_at_level):
        for val in values:
            if sumrule.get(val):
                new_val = sumrule[val]
                #prob = 1.0/len(new_val)
                prob = 0.5  # try sigmoid cross entropy first so set this to 0.5, if we use softmax we should set this to 1.0/len(new_val)
                if len(new_val) == 1:
                    # leaf node, prob = 1.0
                    prob = 1.0
                _matching_onehot = old_onehot[np.where(
                    cls_targets_at_level == val)]
                _matching_onehot[:, new_val] = prob
                _matching_onehot[:, val] = 0
                old_onehot[np.where(
                    cls_targets_at_level == val)] = _matching_onehot
        return old_onehot

    for level in levels:
        # Onehot encoding for classification labels.
        _cls_targets_at_level = tf.one_hot(labels['cls_targets_%d' % level],
                                           params['num_classes'])
        if params.get('sumrule'):
            unique_labels, _ = tf.unique(
                tf.reshape(labels['cls_targets_%d' % level], [-1]))
            # refine one-hot labels so that we map each label to it's finest leaves
            cls_targets_at_level = tf.numpy_function(
                table_lookup, [
                    unique_labels, _cls_targets_at_level,
                    labels['cls_targets_%d' % level]
                ], _cls_targets_at_level.dtype)
            cls_targets_at_level = tf.reshape(cls_targets_at_level,
                                              _cls_targets_at_level.shape)
        else:
            cls_targets_at_level = _cls_targets_at_level

        if params['data_format'] == 'channels_first':
            bs, _, width, height, _ = cls_targets_at_level.get_shape().as_list(
            )
            cls_targets_at_level = tf.reshape(cls_targets_at_level,
                                              [bs, -1, width, height])
        else:
            bs, width, height, _, _ = cls_targets_at_level.get_shape().as_list(
            )
            cls_targets_at_level = tf.reshape(cls_targets_at_level,
                                              [bs, width, height, -1])
        box_targets_at_level = labels['box_targets_%d' % level]

        cls_loss = focal_loss(cls_outputs[level],
                              cls_targets_at_level,
                              params['alpha'],
                              params['gamma'],
                              normalizer=num_positives_sum,
                              label_smoothing=params['label_smoothing'])

        if params['data_format'] == 'channels_first':
            cls_loss = tf.reshape(
                cls_loss, [bs, -1, width, height, params['num_classes']])
        else:
            cls_loss = tf.reshape(
                cls_loss, [bs, width, height, -1, params['num_classes']])
        cls_loss *= tf.cast(
            tf.expand_dims(tf.not_equal(labels['cls_targets_%d' % level], -2),
                           -1), tf.float32)
        cls_losses.append(tf.reduce_sum(cls_loss))

        if params['box_loss_weight']:
            box_losses.append(
                _box_loss(box_outputs[level],
                          box_targets_at_level,
                          num_positives_sum,
                          delta=params['delta']))

    if params['iou_loss_type']:
        input_anchors = anchors.Anchors(params['min_level'],
                                        params['max_level'],
                                        params['num_scales'],
                                        params['aspect_ratios'],
                                        params['anchor_scale'],
                                        params['image_size'])
        box_output_list = [tf.reshape(box_outputs[i], [-1, 4]) for i in levels]
        box_outputs = tf.concat(box_output_list, axis=0)
        box_target_list = [
            tf.reshape(labels['box_targets_%d' % level], [-1, 4])
            for level in levels
        ]
        box_targets = tf.concat(box_target_list, axis=0)
        anchor_boxes = tf.tile(input_anchors.boxes, [params['batch_size'], 1])
        box_outputs = anchors.decode_box_outputs(box_outputs, anchor_boxes)
        box_targets = anchors.decode_box_outputs(box_targets, anchor_boxes)
        box_iou_loss = _box_iou_loss(box_outputs, box_targets,
                                     num_positives_sum,
                                     params['iou_loss_type'])

    else:
        box_iou_loss = 0

    # Sum per level losses to total loss.
    cls_loss = tf.add_n(cls_losses)
    box_loss = tf.add_n(box_losses) if box_losses else 0

    total_loss = (cls_loss + params['box_loss_weight'] * box_loss +
                  params['iou_loss_weight'] * box_iou_loss)

    return total_loss, cls_loss, box_loss, box_iou_loss