示例#1
0
 def __call__(self, loc, gt_loc, gt_label):
     xp = cuda.get_array_module(loc.array)
     loc = loc[xp.where(gt_label > 0)[0]]
     gt_loc = gt_loc[xp.where(gt_label > 0)[0]]
     n_sample = loc.shape[0] + self._eps
     loss = F.sum(smooth_l1(loc, gt_loc, self._beta)) / n_sample
     return loss
示例#2
0
def bbox_head_loss_post(locs, confs, roi_indices, gt_locs, gt_labels,
                        batchsize):
    """Loss function for Head (post).

     Args:
         locs (array): An array whose shape is :math:`(R, n\_class, 4)`,
             where :math:`R` is the total number of RoIs in the given batch.
         confs (array): An iterable of arrays whose shape is
             :math:`(R, n\_class)`.
         roi_indices (list of arrays): A list of arrays returned by
             :func:`bbox_head_locs_pre`.
         gt_locs (list of arrays): A list of arrays returned by
             :func:`bbox_head_locs_pre`.
         gt_labels (list of arrays): A list of arrays returned by
             :func:`bbox_head_locs_pre`.
         batchsize (int): The size of batch.

     Returns:
         tuple of two variables:
         :obj:`loc_loss` and :obj:`conf_loss`.
    """

    xp = cuda.get_array_module(locs.array, confs.array)

    roi_indices = xp.hstack(roi_indices).astype(np.int32)
    gt_locs = xp.vstack(gt_locs).astype(np.float32)
    gt_labels = xp.hstack(gt_labels).astype(np.int32)

    loc_loss = 0
    conf_loss = 0
    for i in np.unique(cuda.to_cpu(roi_indices)):
        mask = roi_indices == i
        gt_loc = gt_locs[mask]
        gt_label = gt_labels[mask]

        n_sample = mask.sum()
        loc_loss += F.sum(
            smooth_l1(
                locs[mask][xp.where(gt_label > 0)[0], gt_label[gt_label > 0]],
                gt_loc[gt_label > 0], 1)) / n_sample
        conf_loss += F.softmax_cross_entropy(confs[mask], gt_label)

    loc_loss /= batchsize
    conf_loss /= batchsize

    return loc_loss, conf_loss
示例#3
0
def rpn_loss(locs, confs, anchors, sizes, bboxes):
    """Loss function for RPN.

     Args:
         locs (iterable of arrays): An iterable of arrays whose shape is
             :math:`(N, K_l, 4)`, where :math:`K_l` is the number of
             the anchor boxes of the :math:`l`-th level.
         confs (iterable of arrays): An iterable of arrays whose shape is
             :math:`(N, K_l)`.
         anchors (list of arrays): A list of arrays returned by
             :meth:`anchors`.
         sizes (list of tuples of two ints): A list of
             :math:`(H_n, W_n)`, where :math:`H_n` and :math:`W_n`
             are height and width of the :math:`n`-th image.
         bboxes (list of arrays): A list of arrays whose shape is
             :math:`(R_n, 4)`, where :math:`R_n` is the number of
             ground truth bounding boxes.

     Returns:
         tuple of two variables:
         :obj:`loc_loss` and :obj:`conf_loss`.
    """
    fg_thresh = 0.7
    bg_thresh = 0.3
    batchsize_per_image = 256
    fg_ratio = 0.25

    locs = F.concat(locs)
    confs = F.concat(confs)

    xp = cuda.get_array_module(locs.array, confs.array)

    anchors = xp.vstack(anchors)
    anchors_yx = (anchors[:, 2:] + anchors[:, :2]) / 2
    anchors_hw = anchors[:, 2:] - anchors[:, :2]

    loc_loss = 0
    conf_loss = 0
    for i in range(len(sizes)):
        if len(bboxes[i]) > 0:
            iou = utils.bbox_iou(anchors, bboxes[i])

            gt_loc = bboxes[i][iou.argmax(axis=1)].copy()
            # tlbr -> yxhw
            gt_loc[:, 2:] -= gt_loc[:, :2]
            gt_loc[:, :2] += gt_loc[:, 2:] / 2
            # offset
            gt_loc[:, :2] = (gt_loc[:, :2] - anchors_yx) / anchors_hw
            gt_loc[:, 2:] = xp.log(gt_loc[:, 2:] / anchors_hw)
        else:
            gt_loc = xp.empty_like(anchors)

        gt_label = xp.empty(len(anchors), dtype=np.int32)
        gt_label[:] = -1

        mask = xp.logical_and(anchors[:, :2] >= 0,
                              anchors[:, 2:] < xp.array(sizes[i])).all(axis=1)

        if len(bboxes[i]) > 0:
            gt_label[xp.where(mask)[0][(iou[mask] == iou[mask].max(
                axis=0)).any(axis=1)]] = 1
            gt_label[xp.logical_and(mask, iou.max(axis=1) >= fg_thresh)] = 1

        fg_index = xp.where(gt_label == 1)[0]
        n_fg = int(batchsize_per_image * fg_ratio)
        if len(fg_index) > n_fg:
            gt_label[choice(fg_index, size=len(fg_index) - n_fg)] = -1

        if len(bboxes[i]) > 0:
            bg_index = xp.where(
                xp.logical_and(mask,
                               iou.max(axis=1) < bg_thresh))[0]
        else:
            bg_index = xp.where(mask)[0]
        n_bg = batchsize_per_image - int((gt_label == 1).sum())
        if len(bg_index) > n_bg:
            gt_label[bg_index[xp.random.randint(len(bg_index), size=n_bg)]] = 0

        n_sample = (gt_label >= 0).sum()
        loc_loss += F.sum(
            smooth_l1(locs[i][gt_label == 1], gt_loc[gt_label == 1],
                      1 / 9)) / n_sample
        conf_loss += F.sum(F.sigmoid_cross_entropy(
            confs[i][gt_label >= 0], gt_label[gt_label >= 0], reduce='no')) \
            / n_sample

    loc_loss /= len(sizes)
    conf_loss /= len(sizes)

    return loc_loss, conf_loss