def _build_per_path_attributes(self): """Paramterization of pair wise search and grid generators. Objects created here are used for box decoding and dynamic ground truth association. """ self._anchor_generator = loss_utils.GridGenerator( anchors=self._anchors, scale_anchors=self._path_stride) if self._ignore_thresh > 0.0: self._search_pairs = loss_utils.PairWiseSearch(iou_type='iou', any_match=True, min_conf=0.25) return
def get_generators(self, anchors, path_scale, path_key): anchor_generator = loss_utils.GridGenerator( anchors, scale_anchors=path_scale) return anchor_generator
def __init__(self, anchors, classes, iou_thresh=0.0, ignore_thresh=0.7, truth_thresh=1.0, nms_thresh=0.6, max_delta=10.0, loss_type='ciou', iou_normalizer=1.0, cls_normalizer=1.0, object_normalizer=1.0, use_scaled_loss=False, update_on_repeat=False, pre_nms_points=5000, label_smoothing=0.0, max_boxes=200, box_type='original', path_scale=None, scale_xy=None, nms_type='greedy', objectness_smooth=False, **kwargs): """Parameters for the loss functions used at each detection head output. Args: anchors: `List[List[int]]` for the anchor boxes that are used in the model. classes: `int` for the number of classes. iou_thresh: `float` to use many anchors per object if IoU(Obj, Anchor) > iou_thresh. ignore_thresh: `float` for the IOU value over which the loss is not propagated, and a detection is assumed to have been made. truth_thresh: `float` for the IOU value over which the loss is propagated despite a detection being made'. nms_thresh: `float` for the minimum IOU value for an overlap. max_delta: gradient clipping to apply to the box loss. loss_type: `str` for the typeof iou loss to use with in {ciou, diou, giou, iou}. iou_normalizer: `float` for how much to scale the loss on the IOU or the boxes. cls_normalizer: `float` for how much to scale the loss on the classes. object_normalizer: `float` for how much to scale loss on the detection map. use_scaled_loss: `bool` for whether to use the scaled loss or the traditional loss. update_on_repeat: `bool` indicating how you would like to handle repeated indexes in a given [j, i] index. Setting this to True will give more consistent MAP, setting it to falls will improve recall by 1-2% but will sacrifice some MAP. pre_nms_points: `int` number of top candidate detections per class before NMS. label_smoothing: `float` for how much to smooth the loss on the classes. max_boxes: `int` for the maximum number of boxes retained over all classes. box_type: `str`, there are 3 different box types that will affect training differently {original, scaled and anchor_free}. The original method decodes the boxes by applying an exponential to the model width and height maps, then scaling the maps by the anchor boxes. This method is used in Yolo-v4, Yolo-v3, and all its counterparts. The Scale method squares the width and height and scales both by a fixed factor of 4. This method is used in the Scale Yolo models, as well as Yolov4-CSP. Finally, anchor_free is like the original method but will not apply an activation function to the boxes, this is used for some of the newer anchor free versions of YOLO. path_scale: `dict` for the size of the input tensors. Defaults to precalulated values from the `mask`. scale_xy: dictionary `float` values inidcating how far each pixel can see outside of its containment of 1.0. a value of 1.2 indicates there is a 20% extended radius around each pixel that this specific pixel can predict values for a center at. the center can range from 0 - value/2 to 1 + value/2, this value is set in the yolo filter, and resused here. there should be one value for scale_xy for each level from min_level to max_level. nms_type: `str` for which non max suppression to use. objectness_smooth: `float` for how much to smooth the loss on the detection map. **kwargs: Addtional keyword arguments. """ super().__init__(**kwargs) self._anchors = anchors self._thresh = iou_thresh self._ignore_thresh = ignore_thresh self._truth_thresh = truth_thresh self._iou_normalizer = iou_normalizer self._cls_normalizer = cls_normalizer self._object_normalizer = object_normalizer self._objectness_smooth = objectness_smooth self._nms_thresh = nms_thresh self._max_boxes = max_boxes self._max_delta = max_delta self._classes = classes self._loss_type = loss_type self._use_scaled_loss = use_scaled_loss self._update_on_repeat = update_on_repeat self._pre_nms_points = pre_nms_points self._label_smoothing = label_smoothing self._keys = list(anchors.keys()) self._len_keys = len(self._keys) self._box_type = box_type self._path_scale = path_scale or { key: 2**int(key) for key in self._keys } self._nms_type = nms_type self._scale_xy = scale_xy or {key: 1.0 for key, _ in anchors.items()} self._generator = {} self._len_mask = {} for key in self._keys: anchors = self._anchors[key] self._generator[key] = loss_utils.GridGenerator( anchors, scale_anchors=self._path_scale[key]) self._len_mask[key] = len(anchors) return
def _get_anchor_free(self, key, boxes, classes, height, width, stride, center_radius): """Find the box assignements in an anchor free paradigm.""" level_limits = self.anchor_free_level_limits[key] gen = loss_utils.GridGenerator(anchors=[[1, 1]], scale_anchors=stride) grid_points = gen(width, height, 1, boxes.dtype)[0] grid_points = tf.squeeze(grid_points, axis=0) box_list = boxes class_list = classes grid_points = (grid_points + 0.5) * stride x_centers, y_centers = grid_points[..., 0], grid_points[..., 1] boxes *= (tf.convert_to_tensor([width, height, width, height]) * stride) tlbr_boxes = box_ops.xcycwh_to_yxyx(boxes) boxes = tf.reshape(boxes, [1, 1, -1, 4]) tlbr_boxes = tf.reshape(tlbr_boxes, [1, 1, -1, 4]) if self.use_tie_breaker: area = tf.reduce_prod(boxes[..., 2:], axis=-1) # check if the box is in the receptive feild of the this fpn level b_t = y_centers - tlbr_boxes[..., 0] b_l = x_centers - tlbr_boxes[..., 1] b_b = tlbr_boxes[..., 2] - y_centers b_r = tlbr_boxes[..., 3] - x_centers box_delta = tf.stack([b_t, b_l, b_b, b_r], axis=-1) if level_limits is not None: max_reg_targets_per_im = tf.reduce_max(box_delta, axis=-1) gt_min = max_reg_targets_per_im >= level_limits[0] gt_max = max_reg_targets_per_im <= level_limits[1] is_in_boxes = tf.logical_and(gt_min, gt_max) else: is_in_boxes = tf.reduce_min(box_delta, axis=-1) > 0.0 is_in_boxes_all = tf.reduce_any(is_in_boxes, axis=(0, 1), keepdims=True) # check if the center is in the receptive feild of the this fpn level c_t = y_centers - (boxes[..., 1] - center_radius * stride) c_l = x_centers - (boxes[..., 0] - center_radius * stride) c_b = (boxes[..., 1] + center_radius * stride) - y_centers c_r = (boxes[..., 0] + center_radius * stride) - x_centers centers_delta = tf.stack([c_t, c_l, c_b, c_r], axis=-1) is_in_centers = tf.reduce_min(centers_delta, axis=-1) > 0.0 is_in_centers_all = tf.reduce_any(is_in_centers, axis=(0, 1), keepdims=True) # colate all masks to get the final locations is_in_index = tf.logical_or(is_in_boxes_all, is_in_centers_all) is_in_boxes_and_center = tf.logical_and(is_in_boxes, is_in_centers) is_in_boxes_and_center = tf.logical_and(is_in_index, is_in_boxes_and_center) if self.use_tie_breaker: boxes_all = tf.cast(is_in_boxes_and_center, area.dtype) boxes_all = ((boxes_all * area) + ((1 - boxes_all) * INF)) boxes_min = tf.reduce_min(boxes_all, axis=-1, keepdims=True) boxes_min = tf.where(boxes_min == INF, -1.0, boxes_min) is_in_boxes_and_center = boxes_all == boxes_min # construct the index update grid reps = tf.reduce_sum(tf.cast(is_in_boxes_and_center, tf.int16), axis=-1) indexes = tf.cast(tf.where(is_in_boxes_and_center), tf.int32) y, x, t = tf.split(indexes, 3, axis=-1) boxes = tf.gather_nd(box_list, t) classes = tf.cast(tf.gather_nd(class_list, t), boxes.dtype) reps = tf.gather_nd(reps, tf.concat([y, x], axis=-1)) reps = tf.cast(tf.expand_dims(reps, axis=-1), boxes.dtype) classes = tf.cast(tf.expand_dims(classes, axis=-1), boxes.dtype) conf = tf.ones_like(classes) # return the samples and the indexes samples = tf.concat([boxes, conf, classes], axis=-1) indexes = tf.concat([y, x, tf.zeros_like(t)], axis=-1) return indexes, samples