Exemplo n.º 1
0
    def add_gt_proposals(self, proposals, targets):
        """
        Arguments:
            proposals: list[BoxList]
            targets: list[BoxList]
        """
        # Get the device we're operating on
        device = proposals[0].bbox.device
        # new_targets = []
        ############ change width & height ############

        new_targets = [target.set2rboxes() for target in targets]

        ###############################################

        gt_boxes = [target.copy_with_fields([]) for target in new_targets]

        # later cat of bbox requires all fields to be present for all bbox
        # so we need to add a dummy for objectness that's missing
        for gt_box in gt_boxes:
            gt_box.add_field("objectness",
                             torch.ones(len(gt_box), device=device))

        proposals = [
            cat_boxlist((proposal, gt_box))
            for proposal, gt_box in zip(proposals, gt_boxes)
        ]

        return proposals
    def forward(self, locations, box_cls, box_regression, centerness,
                image_sizes):
        """
        Arguments:
            anchors: list[list[BoxList]]
            box_cls: list[tensor]
            box_regression: list[tensor]
            image_sizes: list[(h, w)]
        Returns:
            boxlists (list[BoxList]): the post-processed anchors, after
                applying box decoding and NMS
        """
        sampled_boxes = []
        normal_factor = [16, 32, 64, 128, 256]
        # 5个特征尺度
        for layer, (l, o, b, c) in enumerate(
                zip(locations, box_cls, box_regression, centerness)):
            sampled_boxes.append(
                self.forward_for_single_feature_map(l, o, b, c, image_sizes,
                                                    normal_factor[layer]))

        boxlists = list(zip(*sampled_boxes))
        boxlists = [cat_boxlist(boxlist) for boxlist in boxlists]
        # print("boxlists", boxlists)
        # boxlists = self.select_over_all_levels(boxlists)

        return boxlists
Exemplo n.º 3
0
    def forward(self, anchors, objectness, box_regression, targets=None):
        """
        Arguments:
            anchors: list[list[BoxList]]
            objectness: list[tensor]
            box_regression: list[tensor]

        Returns:
            boxlists (list[BoxList]): the post-processed anchors, after
                applying box decoding and NMS
        """
        sampled_boxes = []
        num_levels = len(objectness)
        anchors = list(zip(*anchors))
        for a, o, b in zip(anchors, objectness, box_regression):
            sampled_boxes.append(self.forward_for_single_feature_map(a, o, b))

        boxlists = list(zip(*sampled_boxes))
        boxlists = [cat_boxlist(boxlist) for boxlist in boxlists]

        if num_levels > 1:
            boxlists = self.select_over_all_levels(boxlists)

        # append ground-truth bboxes to proposals
        if self.training and targets is not None:
            boxlists = self.add_gt_proposals(boxlists, targets)

        return boxlists
Exemplo n.º 4
0
    def filter_results(self, boxlist, num_classes, num_of_fwd_left):
        """Returns bounding-box detection results by thresholding on scores and
        applying non-maximum suppression (NMS).
        """
        # unwrap the boxlist to avoid additional overhead.
        # if we had multi-class NMS, we could perform this directly on the boxlist
        boxes = boxlist.bbox.reshape(-1, num_classes * 5)
        scores = boxlist.get_field("scores").reshape(-1, num_classes)

        device = scores.device
        result = []
        # Apply threshold on detection probabilities and apply NMS
        # Skip j = 0, because it's the background class
        inds_all = scores > self.score_thresh
        for j in range(1, num_classes):
            inds = inds_all[:, j].nonzero().squeeze(1)
            scores_j = scores[inds, j]

            # print("scores_j:", np.unique(scores_j.data.cpu().numpy())[-10:])

            boxes_j = boxes[inds, j * 5 : (j + 1) * 5]
            boxlist_for_class = RBoxList(boxes_j, boxlist.size, mode="xywha")
            boxlist_for_class.add_field("scores", scores_j)

            if num_of_fwd_left == 0:
                boxlist_for_class.rescale(1. / self.shrink_margin)
                boxlist_for_class = self.nms_fn(
                    boxlist_for_class, self.nms, score_field="scores"
                )
            num_labels = len(boxlist_for_class)
            boxlist_for_class.add_field(
                "labels", torch.full((num_labels,), j, dtype=torch.int64, device=device)
            )
            result.append(boxlist_for_class)

        result = cat_boxlist(result)
        number_of_detections = len(result)

        # Limit to max_per_image detections **over all classes**
        if number_of_detections > self.detections_per_img > 0:
            cls_scores = result.get_field("scores")
            image_thresh, _ = torch.kthvalue(
                cls_scores.cpu(), number_of_detections - self.detections_per_img + 1
            )
            keep = cls_scores >= image_thresh.item()
            keep = torch.nonzero(keep).squeeze(1)
            result = result[keep]
        return result
Exemplo n.º 5
0
    def __call__(self, image_mix_list, target_mix_list):
        crop_imgs = []
        crop_tars = []

        maxH, calW = 0, 0

        for i in range(len(image_mix_list)):
            img = image_mix_list[i]
            tar = target_mix_list[i]

            # img, tar = self.crop_tool(img, tar)
            crop_imgs.append(img)
            crop_tars.append(tar)

            np_img = np.array(img)
            H, W = np_img.shape[:2]

            if H > maxH:
                maxH = H
            calW += W

        mix_img = np.zeros((maxH, calW, 3))
        mix_tar = []

        shift = 0

        for i in range(len(crop_imgs)):
            crop_im = crop_imgs[i]
            crop_tar = crop_tars[i]

            np_img = np.array(crop_im)
            H, W = np_img.shape[:2]
            mix_img[:H, shift:W + shift] = np_img

            if not crop_tar is None:
                crop_tar = crop_tar.shift(shift, 0, (calW, maxH))
                mix_tar.append(crop_tar)

            shift += W

        # print("mix_img:", mix_img.shape, type(mix_img), mix_tar)
        if len(mix_tar) > 0:
            cat_boxes = cat_boxlist(mix_tar)
        else:
            cat_boxes = None

        return Image.fromarray(mix_img.astype(np.uint8)), cat_boxes
Exemplo n.º 6
0
    def select_over_all_levels(self, boxlists):
        num_images = len(boxlists)
        results = []
        for i in range(num_images):
            scores = boxlists[i].get_field("scores")
            labels = boxlists[i].get_field("labels")
            boxes = boxlists[i].bbox
            boxlist = boxlists[i]
            result = []
            # skip the background
            for j in range(1, self.num_classes):
                inds = (labels == j).nonzero().view(-1)

                scores_j = scores[inds]
                boxes_j = boxes[inds, :].view(-1, 4)
                boxlist_for_class = BoxList(boxes_j, boxlist.size, mode="xyxy")
                boxlist_for_class.add_field("scores", scores_j)
                boxlist_for_class = boxlist_nms(
                    boxlist_for_class, self.nms_thresh,
                    score_field="scores"
                )
                num_labels = len(boxlist_for_class)
                boxlist_for_class.add_field(
                    "labels", torch.full((num_labels,), j,
                                         dtype=torch.int64,
                                         device=scores.device)
                )
                result.append(boxlist_for_class)

            result = cat_boxlist(result)
            number_of_detections = len(result)

            # Limit to max_per_image detections **over all classes**
            if number_of_detections > self.fpn_post_nms_top_n > 0:
                cls_scores = result.get_field("scores")
                image_thresh, _ = torch.kthvalue(
                    cls_scores.cpu(),
                    number_of_detections - self.fpn_post_nms_top_n + 1
                )
                keep = cls_scores >= image_thresh.item()
                keep = torch.nonzero(keep).squeeze(1)
                result = result[keep]
            results.append(result)
        return results
Exemplo n.º 7
0
def sampling_boxes(boxes, max_num=128):
    """
    Given a set of BoxList containing the `labels` field,
    return a set of BoxList for which `labels > 0`.

    Arguments:
        boxes (list of BoxList)
    """
    assert isinstance(boxes, (list, tuple))
    assert isinstance(boxes[0], RBoxList)
    assert boxes[0].has_field("labels")

    all_boxes = []

    positive_boxes = []
    positive_inds = []

    negative_boxes = []
    negative_inds = []

    num_boxes = 0
    for boxes_per_image in boxes:
        labels = boxes_per_image.get_field("labels")
        inds_mask = labels > 0
        inds = inds_mask.nonzero().squeeze(1)
        positive_boxes.append(boxes_per_image[inds][:max_num])
        positive_inds.append(inds_mask)

        neg_mask = labels == 0
        neg_inds = neg_mask.nonzero().squeeze(1)

        negative_box = boxes_per_image[neg_inds][-int(max_num / 4):]

        negative_boxes.append(negative_box)
        negative_inds.append(neg_mask)

        all_boxes.append(
            cat_boxlist([boxes_per_image[inds][:max_num], negative_box]))

    return positive_boxes, positive_inds, negative_boxes, negative_inds, all_boxes
Exemplo n.º 8
0
    def add_gt_proposals(self, proposals, targets):
        """
        Arguments:
            proposals: list[BoxList]
            targets: list[BoxList]
        """
        # Get the device we're operating on
        device = proposals[0].bbox.device

        gt_boxes = [target.copy_with_fields([]) for target in targets]

        # later cat of bbox requires all fields to be present for all bbox
        # so we need to add a dummy for objectness that's missing
        for gt_box in gt_boxes:
            gt_box.add_field("objectness", torch.ones(len(gt_box), device=device))

        proposals = [
            cat_boxlist((proposal, gt_box))
            for proposal, gt_box in zip(proposals, gt_boxes)
        ]
        # print('rrpn_proposal:', proposals[0].bbox.size(), proposals[0].bbox[:, 2:4])
        return proposals
Exemplo n.º 9
0
    def __call__(self, anchors, objectness, box_regression, targets):
        """
        Arguments:
            anchors (list[BoxList])
            objectness (list[Tensor])
            box_regression (list[Tensor])
            targets (list[BoxList])

        Returns:
            objectness_loss (Tensor)
            box_loss (Tensor
        """
        anchors = [
            cat_boxlist(anchors_per_image) for anchors_per_image in anchors
        ]
        labels, regression_targets = self.prepare_targets(anchors, targets)
        sampled_pos_inds, sampled_neg_inds = self.fg_bg_sampler(labels)
        sampled_pos_inds = torch.nonzero(torch.cat(sampled_pos_inds,
                                                   dim=0)).squeeze(1)
        sampled_neg_inds = torch.nonzero(torch.cat(sampled_neg_inds,
                                                   dim=0)).squeeze(1)

        # print("pos and neg:", sampled_pos_inds.shape, sampled_neg_inds.shape)

        sampled_inds = torch.cat([sampled_pos_inds, sampled_neg_inds], dim=0)

        objectness_flattened = []
        box_regression_flattened = []
        # for each feature level, permute the outputs to make them be in the
        # same format as the labels. Note that the labels are computed for
        # all feature levels concatenated, so we keep the same representation
        # for the objectness and the box_regression
        for objectness_per_level, box_regression_per_level in zip(
                objectness, box_regression):

            N, A, H, W = objectness_per_level.shape
            objectness_per_level = objectness_per_level.permute(0, 2, 3,
                                                                1).reshape(
                                                                    N, -1)
            box_regression_per_level = box_regression_per_level.view(
                N, -1, 5, H, W)
            box_regression_per_level = box_regression_per_level.permute(
                0, 3, 4, 1, 2)
            box_regression_per_level = box_regression_per_level.reshape(
                N, -1, 5)

            objectness_flattened.append(objectness_per_level)
            box_regression_flattened.append(box_regression_per_level)
        # concatenate on the first dimension (representing the feature levels), to
        # take into account the way the labels were generated (with all feature maps
        # being concatenated as well)
        objectness = cat(objectness_flattened, dim=1).reshape(-1)
        box_regression = cat(box_regression_flattened, dim=1).reshape(-1, 5)

        labels = torch.cat(labels, dim=0)
        regression_targets = torch.cat(regression_targets, dim=0)
        box_regression = box_regression

        box_regression_pos = box_regression[sampled_pos_inds]
        regression_targets_pos = regression_targets[sampled_pos_inds]
        if self.edge_punished:
            anchors_cat = torch.cat([anchor.bbox for anchor in anchors], 0)
            pos_anchors_w = anchors_cat[:, 2:3][sampled_pos_inds]
            pos_anchors_w_norm = pos_anchors_w / (torch.mean(pos_anchors_w) +
                                                  1e-10)
            # print('box_regression_pos:', pos_anchors_w_norm.size(), box_regression_pos.size())
            box_regression_pos = pos_anchors_w_norm * box_regression_pos
            regression_targets_pos = pos_anchors_w_norm * regression_targets_pos

        plabels = labels[sampled_inds]

        if self.OHEM:
            cls_logits = objectness[sampled_inds]
            score_sig = torch.sigmoid(cls_logits)

            # pick hard positive which takes 1/4
            pos_score_sig = score_sig[plabels == 1]
            pos_num = pos_score_sig.shape[0]
            hard_pos_num = int(pos_num / 4) + 1
            hp_vals, hp_indices = torch.topk(-pos_score_sig,
                                             hard_pos_num,
                                             dim=0)
            # hard_pos_sig = pos_score_sig[hp_indices]

            pos_label = plabels[plabels == 1]
            pos_label = pos_label[hp_indices]
            pos_logits = cls_logits[plabels == 1]
            pos_logits = pos_logits[hp_indices]

            pos_box_reg = box_regression_pos[hp_indices]
            pos_box_target = regression_targets_pos[hp_indices]
            # print("box_regression_pos:", box_regression_pos.shape, pos_score_sig.shape, pos_box_reg)

            # print("pos_score_sig:", hard_pos_sig, pos_score_sig)
            # pick hard negative which takes 1/4
            neg_score_sig = score_sig[plabels != 1]
            neg_num = neg_score_sig.shape[0]
            hard_neg_num = int(neg_num / 4) + 1
            hn_vals, hn_indices = torch.topk(neg_score_sig,
                                             hard_neg_num,
                                             dim=0)
            # hard_neg_sig = neg_score_sig[hn_indices]

            neg_label = plabels[plabels != 1]
            neg_label = neg_label[hn_indices]
            neg_logits = cls_logits[plabels != 1]
            neg_logits = neg_logits[hn_indices]

            hard_labels = torch.cat([pos_label, neg_label], dim=0)
            hard_logits = torch.cat([pos_logits, neg_logits], dim=0)

            ohem_box_loss = smooth_l1_loss(
                pos_box_reg,
                pos_box_target,
                beta=1.0 / 9,
                size_average=False,
            ) / float(hard_pos_num + hard_neg_num)

            ohem_objectness_loss = F.binary_cross_entropy_with_logits(
                hard_logits, hard_labels.to(hard_logits.device))

            return ohem_objectness_loss, ohem_box_loss

        else:
            box_loss = smooth_l1_loss(
                box_regression_pos,
                regression_targets_pos,
                beta=1.0 / 9,
                size_average=False,
            ) / (sampled_inds.numel())

            score = objectness[sampled_inds]
            plabels = plabels
            objectness_loss = F.binary_cross_entropy_with_logits(
                score, plabels.to(score.device))

            return objectness_loss, box_loss
Exemplo n.º 10
0
    def forward(self, features, proposals, targets=None):
        """
        Arguments:
            features (list[Tensor]): feature-maps from possibly several levels
            proposals (list[BoxList]): proposal boxes
            targets (list[BoxList], optional): the ground-truth targets.

        Returns:
            x (Tensor): the result of the feature extractor
            proposals (list[BoxList]): during training, the original proposals
                are returned. During testing, the predicted boxlists are returned
                with the `mask` field set
            losses (dict[Tensor]): During training, returns the losses for the
                head. During testing, returns an empty dict.
        """

        if self.training:
            # during training, only focus on positive boxes
            all_proposals = proposals
            positive_boxes, positive_inds, negative_boxes, negative_inds, proposals = sampling_boxes(
                proposals, self.max_num_positive)
            if self.cfg.MODEL.ROI_REC_HEAD.POS_ONLY:
                proposals = positive_boxes

        if self.training and self.cfg.MODEL.ROI_REC_HEAD.SHARE_BOX_FEATURE_EXTRACTOR:
            x = features
            pos_x = x[torch.cat(positive_inds, dim=0)][:self.max_num_positive]

            all_proposals = cat_boxlist(all_proposals)
            pos_proposals = all_proposals[torch.cat(
                positive_inds, dim=0)][:self.max_num_positive]

            if self.cfg.MODEL.ROI_REC_HEAD.POS_ONLY:
                x = pos_x
                proposals = pos_proposals
            else:
                neg_x = x[torch.cat(negative_inds,
                                    dim=0)][:self.max_num_positive]
                x = torch.cat([pos_x, neg_x], dim=0)
                neg_proposals = all_proposals[torch.cat(
                    negative_inds, dim=0)][:self.max_num_positive]
                proposals = cat_boxlist([pos_proposals, neg_proposals])

        else:

            if not self.training:
                proposals = [
                    proposal.rescale(self.cfg.MODEL.ROI_REC_HEAD.BOXES_MARGIN)
                    for proposal in proposals
                ]

            x = self.feature_extractor(features, proposals)

            if self.training and self.cfg.MODEL.ROI_REC_HEAD.REC_DETACH:
                x = x.detach()

        rec_logits = self.predictor(x)

        if not self.training:
            if self.cfg.MODEL.ROI_REC_HEAD.STRUCT == "REF_TRANSFORMER":
                result = self.post_processor(rec_logits, proposals,
                                             self.transformer)
            else:
                result = self.post_processor(rec_logits, proposals)
            return x, result, {}

        if self.cfg.MODEL.ROI_REC_HEAD.STRUCT == "REF_TRANSFORMER":
            loss_rec = self.loss_evaluator(proposals, rec_logits, targets,
                                           self.transformer)
        else:
            loss_rec = self.loss_evaluator(proposals, rec_logits, targets)

        return x, proposals, dict(loss_rec=loss_rec)
Exemplo n.º 11
0
    def __call__(self, anchors, objectness, box_regression, targets):
        """
        Arguments:
            anchors (list[BoxList])
            objectness (list[Tensor])
            box_regression (list[Tensor])
            targets (list[BoxList])

        Returns:
            objectness_loss (Tensor)
            box_loss (Tensor
        """
        anchors = [cat_boxlist(anchors_per_image) for anchors_per_image in anchors]
        labels, regression_targets = self.prepare_targets(anchors, targets)
        sampled_pos_inds, sampled_neg_inds = self.fg_bg_sampler(labels)
        sampled_pos_inds = torch.nonzero(torch.cat(sampled_pos_inds, dim=0)).squeeze(1)
        sampled_neg_inds = torch.nonzero(torch.cat(sampled_neg_inds, dim=0)).squeeze(1)

        sampled_inds = torch.cat([sampled_pos_inds, sampled_neg_inds], dim=0)

        objectness_flattened = []
        box_regression_flattened = []
        # for each feature level, permute the outputs to make them be in the
        # same format as the labels. Note that the labels are computed for
        # all feature levels concatenated, so we keep the same representation
        # for the objectness and the box_regression
        for objectness_per_level, box_regression_per_level in zip(
            objectness, box_regression
        ):
            
            N, A, H, W = objectness_per_level.shape
            objectness_per_level = objectness_per_level.permute(0, 2, 3, 1).reshape(
                N, -1
            )
            box_regression_per_level = box_regression_per_level.view(N, -1, 5, H, W)
            box_regression_per_level = box_regression_per_level.permute(0, 3, 4, 1, 2)
            box_regression_per_level = box_regression_per_level.reshape(N, -1, 5)

            objectness_flattened.append(objectness_per_level)
            box_regression_flattened.append(box_regression_per_level)
        # concatenate on the first dimension (representing the feature levels), to
        # take into account the way the labels were generated (with all feature maps
        # being concatenated as well)
        objectness = cat(objectness_flattened, dim=1).reshape(-1)
        box_regression = cat(box_regression_flattened, dim=1).reshape(-1, 5)

        labels = torch.cat(labels, dim=0)
        regression_targets = torch.cat(regression_targets, dim=0)
        box_regression = box_regression

        box_regression_pos = box_regression[sampled_pos_inds]
        regression_targets_pos = regression_targets[sampled_pos_inds]
        if self.edge_punished:
            anchors_cat = torch.cat([anchor.bbox for anchor in anchors], 0)
            pos_anchors_w = anchors_cat[:, 2:3][sampled_pos_inds]
            pos_anchors_w_norm = pos_anchors_w / (torch.mean(pos_anchors_w) + 1e-10)
            # print('box_regression_pos:', pos_anchors_w_norm.size(), box_regression_pos.size())
            box_regression_pos = pos_anchors_w_norm * box_regression_pos
            regression_targets_pos = pos_anchors_w_norm * regression_targets_pos

        box_loss = smooth_l1_loss(
            box_regression_pos,
            regression_targets_pos,
            beta=1.0 / 9,
            size_average=False,
        ) / (sampled_inds.numel())

        # print('type:', objectness[sampled_inds], labels[sampled_inds])

        score = objectness[sampled_inds]#.detach() ########################
        plabels = labels[sampled_inds]
        plabels = plabels
        objectness_loss = F.binary_cross_entropy_with_logits(
            score, plabels.to(score.device)
        )

        return objectness_loss, box_loss