def __call__(self, loc, gt_loc, gt_label): xp = cuda.get_array_module(loc.array) loc = loc[xp.where(gt_label > 0)[0]] gt_loc = gt_loc[xp.where(gt_label > 0)[0]] n_sample = loc.shape[0] + self._eps loss = F.sum(smooth_l1(loc, gt_loc, self._beta)) / n_sample return loss
def bbox_head_loss_post(locs, confs, roi_indices, gt_locs, gt_labels, batchsize): """Loss function for Head (post). Args: locs (array): An array whose shape is :math:`(R, n\_class, 4)`, where :math:`R` is the total number of RoIs in the given batch. confs (array): An iterable of arrays whose shape is :math:`(R, n\_class)`. roi_indices (list of arrays): A list of arrays returned by :func:`bbox_head_locs_pre`. gt_locs (list of arrays): A list of arrays returned by :func:`bbox_head_locs_pre`. gt_labels (list of arrays): A list of arrays returned by :func:`bbox_head_locs_pre`. batchsize (int): The size of batch. Returns: tuple of two variables: :obj:`loc_loss` and :obj:`conf_loss`. """ xp = cuda.get_array_module(locs.array, confs.array) roi_indices = xp.hstack(roi_indices).astype(np.int32) gt_locs = xp.vstack(gt_locs).astype(np.float32) gt_labels = xp.hstack(gt_labels).astype(np.int32) loc_loss = 0 conf_loss = 0 for i in np.unique(cuda.to_cpu(roi_indices)): mask = roi_indices == i gt_loc = gt_locs[mask] gt_label = gt_labels[mask] n_sample = mask.sum() loc_loss += F.sum( smooth_l1( locs[mask][xp.where(gt_label > 0)[0], gt_label[gt_label > 0]], gt_loc[gt_label > 0], 1)) / n_sample conf_loss += F.softmax_cross_entropy(confs[mask], gt_label) loc_loss /= batchsize conf_loss /= batchsize return loc_loss, conf_loss
def rpn_loss(locs, confs, anchors, sizes, bboxes): """Loss function for RPN. Args: locs (iterable of arrays): An iterable of arrays whose shape is :math:`(N, K_l, 4)`, where :math:`K_l` is the number of the anchor boxes of the :math:`l`-th level. confs (iterable of arrays): An iterable of arrays whose shape is :math:`(N, K_l)`. anchors (list of arrays): A list of arrays returned by :meth:`anchors`. sizes (list of tuples of two ints): A list of :math:`(H_n, W_n)`, where :math:`H_n` and :math:`W_n` are height and width of the :math:`n`-th image. bboxes (list of arrays): A list of arrays whose shape is :math:`(R_n, 4)`, where :math:`R_n` is the number of ground truth bounding boxes. Returns: tuple of two variables: :obj:`loc_loss` and :obj:`conf_loss`. """ fg_thresh = 0.7 bg_thresh = 0.3 batchsize_per_image = 256 fg_ratio = 0.25 locs = F.concat(locs) confs = F.concat(confs) xp = cuda.get_array_module(locs.array, confs.array) anchors = xp.vstack(anchors) anchors_yx = (anchors[:, 2:] + anchors[:, :2]) / 2 anchors_hw = anchors[:, 2:] - anchors[:, :2] loc_loss = 0 conf_loss = 0 for i in range(len(sizes)): if len(bboxes[i]) > 0: iou = utils.bbox_iou(anchors, bboxes[i]) gt_loc = bboxes[i][iou.argmax(axis=1)].copy() # tlbr -> yxhw gt_loc[:, 2:] -= gt_loc[:, :2] gt_loc[:, :2] += gt_loc[:, 2:] / 2 # offset gt_loc[:, :2] = (gt_loc[:, :2] - anchors_yx) / anchors_hw gt_loc[:, 2:] = xp.log(gt_loc[:, 2:] / anchors_hw) else: gt_loc = xp.empty_like(anchors) gt_label = xp.empty(len(anchors), dtype=np.int32) gt_label[:] = -1 mask = xp.logical_and(anchors[:, :2] >= 0, anchors[:, 2:] < xp.array(sizes[i])).all(axis=1) if len(bboxes[i]) > 0: gt_label[xp.where(mask)[0][(iou[mask] == iou[mask].max( axis=0)).any(axis=1)]] = 1 gt_label[xp.logical_and(mask, iou.max(axis=1) >= fg_thresh)] = 1 fg_index = xp.where(gt_label == 1)[0] n_fg = int(batchsize_per_image * fg_ratio) if len(fg_index) > n_fg: gt_label[choice(fg_index, size=len(fg_index) - n_fg)] = -1 if len(bboxes[i]) > 0: bg_index = xp.where( xp.logical_and(mask, iou.max(axis=1) < bg_thresh))[0] else: bg_index = xp.where(mask)[0] n_bg = batchsize_per_image - int((gt_label == 1).sum()) if len(bg_index) > n_bg: gt_label[bg_index[xp.random.randint(len(bg_index), size=n_bg)]] = 0 n_sample = (gt_label >= 0).sum() loc_loss += F.sum( smooth_l1(locs[i][gt_label == 1], gt_loc[gt_label == 1], 1 / 9)) / n_sample conf_loss += F.sum(F.sigmoid_cross_entropy( confs[i][gt_label >= 0], gt_label[gt_label >= 0], reduce='no')) \ / n_sample loc_loss /= len(sizes) conf_loss /= len(sizes) return loc_loss, conf_loss