def call(self, y_true, box_outputs): anchor_boxes = tf.tile( self.input_anchors.boxes, [box_outputs.shape[0] // self.input_anchors.boxes.shape[0], 1]) num_positives, box_targets = y_true box_outputs = anchors.decode_box_outputs(box_outputs, anchor_boxes) box_targets = anchors.decode_box_outputs(box_targets, anchor_boxes) normalizer = num_positives * 4.0 box_iou_loss = iou_utils.iou_loss(box_outputs, box_targets, self.iou_loss_type) box_iou_loss = tf.reduce_sum(box_iou_loss) / normalizer return box_iou_loss
def pre_nms(params, cls_outputs, box_outputs, topk=True): """Detection post processing before nms. It takes the multi-level class and box predictions from network, merge them into unified tensors, and compute boxes, scores, and classes. Args: params: a dict of parameters. cls_outputs: a list of tensors for classes, each tensor denotes a level of logits with shape [N, H, W, num_class * num_anchors]. box_outputs: a list of tensors for boxes, each tensor ddenotes a level of boxes with shape [N, H, W, 4 * num_anchors]. topk: if True, select topk before nms (mainly to speed up nms). Returns: A tuple of (boxes, scores, classes). """ # get boxes by apply bounding box regression to anchors. eval_anchors = anchors.Anchors(params['min_level'], params['max_level'], params['num_scales'], params['aspect_ratios'], params['anchor_scale'], params['image_size']) cls_outputs, box_outputs = merge_class_box_level_outputs( params, cls_outputs, box_outputs) if topk: # select topK purely based on scores before NMS, in order to speed up nms. cls_outputs, box_outputs, classes, indices = topk_class_boxes( params, cls_outputs, box_outputs) anchor_boxes = tf.gather(eval_anchors.boxes, indices) else: anchor_boxes = eval_anchors.boxes classes = None boxes = anchors.decode_box_outputs(box_outputs, anchor_boxes) # convert logits to scores. scores = tf.math.sigmoid(cls_outputs) return boxes, scores, classes
def detection_loss(cls_outputs, box_outputs, labels, params): """Computes total detection loss. Computes total detection loss including box and class loss from all levels. Args: cls_outputs: an OrderDict with keys representing levels and values representing logits in [batch_size, height, width, num_anchors]. box_outputs: an OrderDict with keys representing levels and values representing box regression targets in [batch_size, height, width, num_anchors * 4]. labels: the dictionary that returned from dataloader that includes groundtruth targets. params: the dictionary including training parameters specified in default_haprams function in this file. Returns: total_loss: an integer tensor representing total loss reducing from class and box losses from all levels. cls_loss: an integer tensor representing total class loss. box_loss: an integer tensor representing total box regression loss. box_iou_loss: an integer tensor representing total box iou loss. """ # Sum all positives in a batch for normalization and avoid zero # num_positives_sum, which would lead to inf loss during training num_positives_sum = tf.reduce_sum(labels['mean_num_positives']) + 1.0 positives_momentum = params.get('positives_momentum', None) or 0 if positives_momentum > 0: # normalize the num_positive_examples for training stability. moving_normalizer_var = tf.Variable( 0.0, name='moving_normalizer', dtype=tf.float32, synchronization=tf.VariableSynchronization.ON_READ, trainable=False, aggregation=tf.VariableAggregation.MEAN) num_positives_sum = tf.keras.backend.moving_average_update( moving_normalizer_var, num_positives_sum, momentum=params['positives_momentum']) elif positives_momentum < 0: num_positives_sum = utils.cross_replica_mean(num_positives_sum) levels = cls_outputs.keys() cls_losses = [] box_losses = [] for level in levels: # Onehot encoding for classification labels. cls_targets_at_level = tf.one_hot(labels['cls_targets_%d' % level], params['num_classes']) if params['data_format'] == 'channels_first': bs, _, width, height, _ = cls_targets_at_level.get_shape().as_list( ) cls_targets_at_level = tf.reshape(cls_targets_at_level, [bs, -1, width, height]) else: bs, width, height, _, _ = cls_targets_at_level.get_shape().as_list( ) cls_targets_at_level = tf.reshape(cls_targets_at_level, [bs, width, height, -1]) box_targets_at_level = labels['box_targets_%d' % level] cls_loss = focal_loss(cls_outputs[level], cls_targets_at_level, params['alpha'], params['gamma'], normalizer=num_positives_sum, label_smoothing=params['label_smoothing']) if params['data_format'] == 'channels_first': cls_loss = tf.reshape( cls_loss, [bs, -1, width, height, params['num_classes']]) else: cls_loss = tf.reshape( cls_loss, [bs, width, height, -1, params['num_classes']]) cls_loss *= tf.cast( tf.expand_dims(tf.not_equal(labels['cls_targets_%d' % level], -2), -1), tf.float32) cls_losses.append(tf.clip_by_value(tf.reduce_sum(cls_loss), 0.0, 2.0)) if params['box_loss_weight']: box_losses.append( _box_loss(box_outputs[level], box_targets_at_level, num_positives_sum, delta=params['delta'])) if params['iou_loss_type']: input_anchors = anchors.Anchors(params['min_level'], params['max_level'], params['num_scales'], params['aspect_ratios'], params['anchor_scale'], params['image_size']) box_output_list = [tf.reshape(box_outputs[i], [-1, 4]) for i in levels] box_outputs = tf.concat(box_output_list, axis=0) box_target_list = [ tf.reshape(labels['box_targets_%d' % level], [-1, 4]) for level in levels ] box_targets = tf.concat(box_target_list, axis=0) anchor_boxes = tf.tile(input_anchors.boxes, [params['batch_size'], 1]) box_outputs = anchors.decode_box_outputs(box_outputs, anchor_boxes) box_targets = anchors.decode_box_outputs(box_targets, anchor_boxes) box_iou_loss = _box_iou_loss(box_outputs, box_targets, num_positives_sum, params['iou_loss_type']) else: box_iou_loss = 0 # Sum per level losses to total loss. cls_loss = tf.add_n(cls_losses) box_loss = tf.add_n(box_losses) if box_losses else 0 total_loss = (cls_loss + params['box_loss_weight'] * box_loss + params['iou_loss_weight'] * box_iou_loss) return total_loss, cls_loss, box_loss, box_iou_loss
def detection_loss(cls_outputs, box_outputs, labels, params): """Computes total detection loss. Computes total detection loss including box and class loss from all levels. Args: cls_outputs: an OrderDict with keys representing levels and values representing logits in [batch_size, height, width, num_anchors]. box_outputs: an OrderDict with keys representing levels and values representing box regression targets in [batch_size, height, width, num_anchors * 4]. labels: the dictionary that returned from dataloader that includes groundtruth targets. params: the dictionary including training parameters specified in default_haprams function in this file. Returns: total_loss: an integer tensor representing total loss reducing from class and box losses from all levels. cls_loss: an integer tensor representing total class loss. box_loss: an integer tensor representing total box regression loss. box_iou_loss: an integer tensor representing total box iou loss. """ # Sum all positives in a batch for normalization and avoid zero # num_positives_sum, which would lead to inf loss during training num_positives_sum = tf.reduce_sum(labels['mean_num_positives']) + 1.0 levels = cls_outputs.keys() cls_losses = [] box_losses = [] sumrule = {} if params.get('sumrule'): sumrule = params['sumrule'] # because of cls_targets -= 1 (so that bg class becomes -1, actual class then starts from 0) # we need to subtract 1 from sumrule as well. _sumrule = {} for k, v in sumrule.items(): _sumrule[k - 1] = [vv - 1 for vv in v] sumrule = _sumrule def table_lookup(values, old_onehot, cls_targets_at_level): for val in values: if sumrule.get(val): new_val = sumrule[val] #prob = 1.0/len(new_val) prob = 0.5 # try sigmoid cross entropy first so set this to 0.5, if we use softmax we should set this to 1.0/len(new_val) if len(new_val) == 1: # leaf node, prob = 1.0 prob = 1.0 _matching_onehot = old_onehot[np.where( cls_targets_at_level == val)] _matching_onehot[:, new_val] = prob _matching_onehot[:, val] = 0 old_onehot[np.where( cls_targets_at_level == val)] = _matching_onehot return old_onehot for level in levels: # Onehot encoding for classification labels. _cls_targets_at_level = tf.one_hot(labels['cls_targets_%d' % level], params['num_classes']) if params.get('sumrule'): unique_labels, _ = tf.unique( tf.reshape(labels['cls_targets_%d' % level], [-1])) # refine one-hot labels so that we map each label to it's finest leaves cls_targets_at_level = tf.numpy_function( table_lookup, [ unique_labels, _cls_targets_at_level, labels['cls_targets_%d' % level] ], _cls_targets_at_level.dtype) cls_targets_at_level = tf.reshape(cls_targets_at_level, _cls_targets_at_level.shape) else: cls_targets_at_level = _cls_targets_at_level if params['data_format'] == 'channels_first': bs, _, width, height, _ = cls_targets_at_level.get_shape().as_list( ) cls_targets_at_level = tf.reshape(cls_targets_at_level, [bs, -1, width, height]) else: bs, width, height, _, _ = cls_targets_at_level.get_shape().as_list( ) cls_targets_at_level = tf.reshape(cls_targets_at_level, [bs, width, height, -1]) box_targets_at_level = labels['box_targets_%d' % level] cls_loss = focal_loss(cls_outputs[level], cls_targets_at_level, params['alpha'], params['gamma'], normalizer=num_positives_sum, label_smoothing=params['label_smoothing']) if params['data_format'] == 'channels_first': cls_loss = tf.reshape( cls_loss, [bs, -1, width, height, params['num_classes']]) else: cls_loss = tf.reshape( cls_loss, [bs, width, height, -1, params['num_classes']]) cls_loss *= tf.cast( tf.expand_dims(tf.not_equal(labels['cls_targets_%d' % level], -2), -1), tf.float32) cls_losses.append(tf.reduce_sum(cls_loss)) if params['box_loss_weight']: box_losses.append( _box_loss(box_outputs[level], box_targets_at_level, num_positives_sum, delta=params['delta'])) if params['iou_loss_type']: input_anchors = anchors.Anchors(params['min_level'], params['max_level'], params['num_scales'], params['aspect_ratios'], params['anchor_scale'], params['image_size']) box_output_list = [tf.reshape(box_outputs[i], [-1, 4]) for i in levels] box_outputs = tf.concat(box_output_list, axis=0) box_target_list = [ tf.reshape(labels['box_targets_%d' % level], [-1, 4]) for level in levels ] box_targets = tf.concat(box_target_list, axis=0) anchor_boxes = tf.tile(input_anchors.boxes, [params['batch_size'], 1]) box_outputs = anchors.decode_box_outputs(box_outputs, anchor_boxes) box_targets = anchors.decode_box_outputs(box_targets, anchor_boxes) box_iou_loss = _box_iou_loss(box_outputs, box_targets, num_positives_sum, params['iou_loss_type']) else: box_iou_loss = 0 # Sum per level losses to total loss. cls_loss = tf.add_n(cls_losses) box_loss = tf.add_n(box_losses) if box_losses else 0 total_loss = (cls_loss + params['box_loss_weight'] * box_loss + params['iou_loss_weight'] * box_iou_loss) return total_loss, cls_loss, box_loss, box_iou_loss