def det_post_process_combined(params, cls_outputs, box_outputs, scales, min_score_thresh, max_boxes_to_draw): """A combined version of det_post_process with dynamic batch size support.""" batch_size = tf.shape(list(cls_outputs.values())[0])[0] cls_outputs_all = [] box_outputs_all = [] # Concatenates class and box of all levels into one tensor. for level in range(params['min_level'], params['max_level'] + 1): if params['data_format'] == 'channels_first': cls_outputs[level] = tf.transpose(cls_outputs[level], [0, 2, 3, 1]) box_outputs[level] = tf.transpose(box_outputs[level], [0, 2, 3, 1]) cls_outputs_all.append( tf.reshape(cls_outputs[level], [batch_size, -1, params['num_classes']])) box_outputs_all.append( tf.reshape(box_outputs[level], [batch_size, -1, 4])) cls_outputs_all = tf.concat(cls_outputs_all, 1) box_outputs_all = tf.concat(box_outputs_all, 1) # Create anchor_label for picking top-k predictions. eval_anchors = anchors.Anchors(params['min_level'], params['max_level'], params['num_scales'], params['aspect_ratios'], params['anchor_scale'], params['image_size']) anchor_boxes = eval_anchors.boxes scores = tf.math.sigmoid(cls_outputs_all) # apply bounding box regression to anchors boxes = anchors.decode_box_outputs_tf(box_outputs_all, anchor_boxes) boxes = tf.expand_dims(boxes, axis=2) scales = tf.expand_dims(scales, axis=-1) nmsed_boxes, nmsed_scores, nmsed_classes, valid_detections = ( tf.image.combined_non_max_suppression(boxes, scores, max_boxes_to_draw, max_boxes_to_draw, score_threshold=min_score_thresh, clip_boxes=False)) del valid_detections # to be used in futue. image_ids = tf.cast(tf.tile(tf.expand_dims(tf.range(batch_size), axis=1), [1, max_boxes_to_draw]), dtype=tf.float32) y = nmsed_boxes[..., 0] * scales x = nmsed_boxes[..., 1] * scales height = nmsed_boxes[..., 2] * scales - y width = nmsed_boxes[..., 3] * scales - x detection_list = [ # Format: (image_ids, y, x, height, width, score, class) image_ids, y, x, height, width, nmsed_scores, tf.cast(nmsed_classes + 1, tf.float32) ] detections = tf.stack(detection_list, axis=2, name='detections') return detections
def pre_nms(params, cls_outputs, box_outputs) -> Tuple[T, T, T]: """Detection post processing before nms. It takes the multi-level class and box predictions from network, merge them into unified tensors, and compute boxes, scores, and classes. Args: params: a dict of parameters. cls_outputs: a list of tensors for classes, each tensor denotes a level of logits with shape [N, H, W, num_class * num_anchors]. box_outputs: a list of tensors for boxes, each tensor ddenotes a level of boxes with shape [N, H, W, 4 * num_anchors]. Returns: A tuple of (boxes, scores, classes). """ cls_outputs, box_outputs = merge_class_box_level_outputs( params, cls_outputs, box_outputs) cls_outputs, box_outputs, classes, indices = topk_class_boxes( params, cls_outputs, box_outputs) # get boxes by apply bounding box regression to anchors. eval_anchors = anchors.Anchors(params['min_level'], params['max_level'], params['num_scales'], params['aspect_ratios'], params['anchor_scale'], params['image_size']) anchor_boxes = tf.gather(eval_anchors.boxes, indices) boxes = anchors.decode_box_outputs_tf(box_outputs, anchor_boxes) # convert logits to scores. scores = tf.math.sigmoid(cls_outputs) return boxes, scores, classes