Exemplo n.º 1
0
    def get_ground_truth(self, anchors_list, batched_gt_boxes, batched_num_gts):
        anchors = F.concat(anchors_list, axis=0)
        labels_list = []
        offsets_list = []

        for bid in range(batched_gt_boxes.shape[0]):
            gt_boxes = batched_gt_boxes[bid, :batched_num_gts[bid]]

            overlaps = layers.get_iou(gt_boxes[:, :4], anchors)
            matched_indices, labels = self.matcher(overlaps)

            offsets = self.box_coder.encode(anchors, gt_boxes[matched_indices, :4])

            # sample positive labels
            num_positive = int(self.cfg.num_sample_anchors * self.cfg.positive_anchor_ratio)
            labels = layers.sample_labels(labels, num_positive, 1, -1)
            # sample negative labels
            num_positive = (labels == 1).sum().astype(np.int32)
            num_negative = self.cfg.num_sample_anchors - num_positive
            labels = layers.sample_labels(labels, num_negative, 0, -1)

            labels_list.append(labels)
            offsets_list.append(offsets)

        return (
            F.concat(labels_list, axis=0).detach(),
            F.concat(offsets_list, axis=0).detach(),
        )
Exemplo n.º 2
0
    def get_ground_truth(self, rpn_rois, im_info, gt_boxes):
        if not self.training:
            return rpn_rois, None, None

        return_rois = []
        return_labels = []
        return_bbox_targets = []

        # get per image proposals and gt_boxes
        for bid in range(gt_boxes.shape[0]):
            num_valid_boxes = im_info[bid, 4].astype("int32")
            gt_boxes_per_img = gt_boxes[bid, :num_valid_boxes, :]
            batch_inds = F.full((gt_boxes_per_img.shape[0], 1), bid)
            gt_rois = F.concat([batch_inds, gt_boxes_per_img[:, :4]], axis=1)
            batch_roi_mask = rpn_rois[:, 0] == bid
            # all_rois : [batch_id, x1, y1, x2, y2]
            all_rois = F.concat([rpn_rois[batch_roi_mask], gt_rois])

            overlaps = layers.get_iou(all_rois[:, 1:], gt_boxes_per_img)

            max_overlaps = overlaps.max(axis=1)
            gt_assignment = F.argmax(overlaps, axis=1).astype("int32")
            labels = gt_boxes_per_img[gt_assignment, 4]

            # ---------------- get the fg/bg labels for each roi ---------------#
            fg_mask = (max_overlaps >= self.cfg.fg_threshold) & (labels >= 0)
            bg_mask = ((max_overlaps >= self.cfg.bg_threshold_low)
                       & (max_overlaps < self.cfg.bg_threshold_high))

            num_fg_rois = int(self.cfg.num_rois * self.cfg.fg_ratio)
            fg_inds_mask = layers.sample_labels(fg_mask, num_fg_rois, True,
                                                False)
            num_bg_rois = int(self.cfg.num_rois - fg_inds_mask.sum())
            bg_inds_mask = layers.sample_labels(bg_mask, num_bg_rois, True,
                                                False)

            labels[bg_inds_mask] = 0

            keep_mask = fg_inds_mask | bg_inds_mask
            labels = labels[keep_mask].astype("int32")
            rois = all_rois[keep_mask]
            target_boxes = gt_boxes_per_img[gt_assignment[keep_mask], :4]
            bbox_targets = self.box_coder.encode(rois[:, 1:], target_boxes)
            bbox_targets = bbox_targets.reshape(-1, 4)

            return_rois.append(rois)
            return_labels.append(labels)
            return_bbox_targets.append(bbox_targets)

        return (
            F.concat(return_rois, axis=0).detach(),
            F.concat(return_labels, axis=0).detach(),
            F.concat(return_bbox_targets, axis=0).detach(),
        )
Exemplo n.º 3
0
    def get_ground_truth(self, anchors, batched_gt_boxes, batched_num_gts):
        labels_list = []
        offsets_list = []

        for bid in range(batched_gt_boxes.shape[0]):
            gt_boxes = batched_gt_boxes[bid, :batched_num_gts[bid]]

            overlaps = layers.get_iou(gt_boxes[:, :4], anchors)
            match_indices, labels = self.matcher(overlaps)
            gt_boxes_matched = gt_boxes[match_indices]

            fg_mask = labels == 1
            labels[fg_mask] = gt_boxes_matched[fg_mask, 4].astype(np.int32)
            offsets = self.box_coder.encode(anchors, gt_boxes_matched[:, :4])

            labels_list.append(labels)
            offsets_list.append(offsets)

        return (
            F.stack(labels_list, axis=0).detach(),
            F.stack(offsets_list, axis=0).detach(),
        )
Exemplo n.º 4
0
    def get_losses(self, anchors, pred_logits, pred_offsets, gt_boxes,
                   im_info):
        # pylint: disable=too-many-statements
        def positive_bag_loss(logits, axis=1):
            weight = 1.0 / (1.0 - logits)
            weight /= weight.sum(axis=axis, keepdims=True)
            bag_prob = (weight * logits).sum(axis=1)
            return -layers.safelog(bag_prob)

        def negative_bag_loss(logits, gamma):
            return (logits**gamma) * (-layers.safelog(1.0 - logits))

        pred_scores = F.sigmoid(pred_logits)
        box_prob_list = []
        positive_losses = []
        clamp_eps = 1e-7
        bucket_size = self.cfg.bucket_size

        for bid in range(im_info.shape[0]):
            boxes_info = gt_boxes[bid, :im_info[bid, 4].astype("int32")]
            # id 0 is used for background classes, so -1 first
            labels = boxes_info[:, 4].astype("int32") - 1

            pred_box = self.box_coder.decode(anchors,
                                             pred_offsets[bid]).detach()
            overlaps = layers.get_iou(boxes_info[:, :4], pred_box).detach()
            thresh1 = self.cfg.box_iou_threshold
            thresh2 = F.clip(overlaps.max(axis=1, keepdims=True),
                             lower=thresh1 + clamp_eps,
                             upper=1.0)
            gt_pred_prob = F.clip((overlaps - thresh1) / (thresh2 - thresh1),
                                  lower=0,
                                  upper=1.0)

            image_boxes_prob = F.zeros(pred_logits.shape[1:]).detach()
            # guarantee that nonzero_idx is not empty
            if gt_pred_prob.max() > clamp_eps:
                _, nonzero_idx = F.cond_take(gt_pred_prob != 0, gt_pred_prob)
                # since nonzeros is only 1 dim, use num_anchor to get real indices
                num_anchors = gt_pred_prob.shape[1]
                anchors_idx = nonzero_idx % num_anchors
                gt_idx = nonzero_idx // num_anchors
                image_boxes_prob[anchors_idx,
                                 labels[gt_idx]] = gt_pred_prob[gt_idx,
                                                                anchors_idx]

            box_prob_list.append(image_boxes_prob)

            # construct bags for objects
            match_quality_matrix = layers.get_iou(boxes_info[:, :4],
                                                  anchors).detach()
            num_gt = match_quality_matrix.shape[0]
            _, matched_idx = F.topk(
                match_quality_matrix,
                k=bucket_size,
                descending=True,
                no_sort=True,
            )

            matched_idx = matched_idx.detach()
            matched_idx_flatten = matched_idx.reshape(-1)
            gather_idx = labels.reshape(-1, 1)
            gather_idx = F.broadcast_to(gather_idx, (num_gt, bucket_size))

            gather_src = pred_scores[bid, matched_idx_flatten]
            gather_src = gather_src.reshape(num_gt, bucket_size, -1)
            matched_score = F.indexing_one_hot(gather_src, gather_idx, axis=2)

            topk_anchors = anchors[matched_idx_flatten]
            boxes_broad_cast = F.broadcast_to(
                F.expand_dims(boxes_info[:, :4], axis=1),
                (num_gt, bucket_size, 4)).reshape(-1, 4)

            matched_offsets = self.box_coder.encode(topk_anchors,
                                                    boxes_broad_cast)

            reg_loss = layers.smooth_l1_loss(
                pred_offsets[bid, matched_idx_flatten],
                matched_offsets,
                beta=self.cfg.smooth_l1_beta).sum(
                    axis=-1) * self.cfg.reg_loss_weight
            matched_reg_scores = F.exp(-reg_loss)

            positive_losses.append(
                positive_bag_loss(matched_score *
                                  matched_reg_scores.reshape(-1, bucket_size),
                                  axis=1))

        num_foreground = im_info[:, 4].sum()
        pos_loss = F.concat(positive_losses).sum() / F.maximum(
            1.0, num_foreground)
        box_probs = F.stack(box_prob_list, axis=0)

        neg_loss = negative_bag_loss(
            pred_scores *
            (1 - box_probs), self.cfg.focal_loss_gamma).sum() / F.maximum(
                1.0, num_foreground * bucket_size)

        alpha = self.cfg.focal_loss_alpha
        pos_loss = pos_loss * alpha
        neg_loss = neg_loss * (1 - alpha)
        loss_dict = {
            "total_loss": pos_loss + neg_loss,
            "pos_loss": pos_loss,
            "neg_loss": neg_loss,
        }
        return loss_dict
Exemplo n.º 5
0
    def get_ground_truth(self, anchors_list, batched_gt_boxes,
                         batched_num_gts):
        labels_list = []
        offsets_list = []
        ctrness_list = []

        all_level_anchors = F.concat(anchors_list, axis=0)
        for bid in range(batched_gt_boxes.shape[0]):
            gt_boxes = batched_gt_boxes[bid, :batched_num_gts[bid]]

            ious = []
            candidate_idxs = []
            base = 0
            for stride, anchors_i in zip(self.cfg.stride, anchors_list):
                ious.append(
                    layers.get_iou(
                        gt_boxes[:, :4],
                        F.concat([
                            anchors_i - stride * self.cfg.anchor_scale / 2,
                            anchors_i + stride * self.cfg.anchor_scale / 2,
                        ],
                                 axis=1)))
                gt_centers = (gt_boxes[:, :2] + gt_boxes[:, 2:4]) / 2
                distances = F.sqrt(
                    F.sum((F.expand_dims(gt_centers, axis=1) - anchors_i)**2,
                          axis=2))
                _, topk_idxs = F.topk(distances, self.cfg.anchor_topk)
                candidate_idxs.append(base + topk_idxs)
                base += anchors_i.shape[0]
            ious = F.concat(ious, axis=1)
            candidate_idxs = F.concat(candidate_idxs, axis=1)

            candidate_ious = F.gather(ious, 1, candidate_idxs)
            ious_thr = (F.mean(candidate_ious, axis=1, keepdims=True) +
                        F.std(candidate_ious, axis=1, keepdims=True))
            is_foreground = F.scatter(
                F.zeros(ious.shape), 1, candidate_idxs,
                F.ones(candidate_idxs.shape)).astype(bool) & (ious >= ious_thr)

            is_in_boxes = F.min(self.point_coder.encode(
                all_level_anchors, F.expand_dims(gt_boxes[:, :4], axis=1)),
                                axis=2) > 0

            ious[~is_foreground] = -1
            ious[~is_in_boxes] = -1

            match_indices = F.argmax(ious, axis=0)
            gt_boxes_matched = gt_boxes[match_indices]
            anchor_max_iou = F.indexing_one_hot(ious, match_indices, axis=0)

            labels = gt_boxes_matched[:, 4].astype(np.int32)
            labels[anchor_max_iou == -1] = 0
            offsets = self.point_coder.encode(all_level_anchors,
                                              gt_boxes_matched[:, :4])

            left_right = offsets[:, [0, 2]]
            top_bottom = offsets[:, [1, 3]]
            ctrness = F.sqrt(
                F.clip(F.min(left_right, axis=1) / F.max(left_right, axis=1),
                       lower=0) *
                F.clip(F.min(top_bottom, axis=1) / F.max(top_bottom, axis=1),
                       lower=0))

            labels_list.append(labels)
            offsets_list.append(offsets)
            ctrness_list.append(ctrness)

        return (
            F.stack(labels_list, axis=0).detach(),
            F.stack(offsets_list, axis=0).detach(),
            F.stack(ctrness_list, axis=0).detach(),
        )