Пример #1
0
    def merge_maskrcnn_benchmark_result(self,
                                        corners,
                                        results,
                                        im_scales=None,
                                        image_size=None):
        import torch
        from maskrcnn_benchmark.structures.boxlist_ops import BoxList

        def result_fmt(result):
            bbox = result.bbox
            labels = result.extra_fields["labels"].reshape(-1, 1).float()
            scores = result.extra_fields["scores"].reshape(-1, 1)
            det_result = torch.cat([bbox, labels, scores],
                                   dim=1).detach().cpu().numpy()
            return det_result

        input_BoxList = isinstance(results[0], BoxList)

        if input_BoxList:
            assert im_scales is not None and image_size is not None, ''

        if input_BoxList:
            det_results = []
            for result, im_scale in zip(results, im_scales):
                det_result = result_fmt(result)
                det_result[:, :4] = det_result[:, :4] / np.array(
                    [im_scale[1], im_scale[0], im_scale[1], im_scale[0]])
                det_results.append(det_result)
        else:
            det_results = results
        det_results = self.translate_bboxes(corners, det_results)
        if len(det_results) == 0: return []
        _, keep = self.nms(det_results[:, :4], det_results[:, 5])
        det_results = det_results[keep]

        if input_BoxList:
            merge_result = BoxList(torch.Tensor(det_results[:, :4]),
                                   image_size, 'xyxy')
            merge_result.add_field("labels", torch.Tensor(det_results[:, 4]))
            merge_result.add_field("scores", torch.Tensor(det_results[:, 5]))
        else:
            merge_result = det_results
        return merge_result
Пример #2
0
    def __call__(self, anchors, box_cls, box_regression, targets):
        """
        Arguments:
            anchors (list[BoxList])
            objectness (list[Tensor])
            box_regression (list[Tensor])
            targets (list[BoxList])

        Returns:
            objectness_loss (Tensor)
            box_loss (Tensor
        """
        anchors = [cat_boxlist(anchors_per_image) for anchors_per_image in anchors]
        box_cls_flattened = []
        box_regression_flattened = []
        # for each feature level, permute the outputs to make them be in the
        # same format as the labels. Note that the labels are computed for
        # all feature levels concatenated, so we keep the same representation
        # for the objectness and the box_regression
        for box_cls_per_level, box_regression_per_level in zip(
            box_cls, box_regression
        ):
            N, A, H, W = box_cls_per_level.shape
            C = self.num_classes
            box_cls_per_level = box_cls_per_level.view(N, -1, C, H, W)
            box_cls_per_level = box_cls_per_level.permute(0, 3, 4, 1, 2)
            box_cls_per_level = box_cls_per_level.reshape(N, -1, C)
            box_regression_per_level = box_regression_per_level.view(N, -1, 4, H, W)
            box_regression_per_level = box_regression_per_level.permute(0, 3, 4, 1, 2)
            box_regression_per_level = box_regression_per_level.reshape(N, -1, 4)
            box_cls_flattened.append(box_cls_per_level)
            box_regression_flattened.append(box_regression_per_level)
        # concatenate on the first dimension (representing the feature levels), to
        # take into account the way the labels were generated (with all feature maps
        # being concatenated as well)
        box_cls = cat(box_cls_flattened, dim=1)
        box_regression = cat(box_regression_flattened, dim=1)

        cls_prob = torch.sigmoid(box_cls)
        box_prob = []
        positive_numels = 0
        positive_losses = []
        for img, (anchors_, targets_, cls_prob_, box_regression_) in enumerate(
                zip(anchors, targets, cls_prob, box_regression)
        ):
            labels_ = targets_.get_field("labels").to(dtype=torch.long) - 1

            with torch.set_grad_enabled(False):
                # box_localization: a_{j}^{loc}, shape: [j, 4]
                box_localization = self.box_coder.decode(box_regression_, anchors_.bbox)

                # object_box_iou: IoU_{ij}^{loc}, shape: [i, j]
                object_box_iou = boxlist_iou(
                    targets_,
                    BoxList(box_localization, anchors_.size, mode='xyxy')
                )

                t1 = self.bbox_threshold
                t2 = object_box_iou.max(dim=1, keepdim=True).values.clamp(min=t1 + 1e-12)

                # object_box_prob: P{a_{j} -> b_{i}}, shape: [i, j]
                object_box_prob = (
                    (object_box_iou - t1) / (t2 - t1)
                ).clamp(min=0, max=1)

                indices = torch.stack([torch.arange(len(labels_)).type_as(labels_), labels_], dim=0)

                # object_cls_box_prob: P{a_{j} -> b_{i}}, shape: [i, c, j]
                object_cls_box_prob = torch.sparse_coo_tensor(indices, object_box_prob)

                # image_box_iou: P{a_{j} \in A_{+}}, shape: [j, c]
                """
                from "start" to "end" implement:

                image_box_iou = torch.sparse.max(object_cls_box_prob, dim=0).t()

                """
                # start
                indices = torch.nonzero(torch.sparse.sum(
                    object_cls_box_prob, dim=0
                ).to_dense()).t_()

                if indices.numel() == 0:
                    image_box_prob = torch.zeros(anchors_.bbox.size(0), self.num_classes).type_as(object_box_prob)
                else:
                    nonzero_box_prob = torch.where(
                        (labels_.unsqueeze(dim=-1) == indices[0]),
                        object_box_prob[:, indices[1]],
                        torch.tensor([0.0]).type_as(object_box_prob)
                    ).max(dim=0).values

                    image_box_prob = torch.sparse_coo_tensor(
                        indices.flip([0]), nonzero_box_prob,
                        size=(anchors_.bbox.size(0), self.num_classes)
                    ).to_dense()
                # end

                box_prob.append(image_box_prob)

            # construct bags for objects
            match_quality_matrix = boxlist_iou(targets_, anchors_)
            _, matched = torch.topk(match_quality_matrix, self.pre_anchor_topk, dim=1, sorted=False)
            del match_quality_matrix

            # matched_cls_prob: P_{ij}^{cls}
            matched_cls_prob = torch.gather(
                cls_prob_[matched], 2, labels_.view(-1, 1, 1).repeat(1, self.pre_anchor_topk, 1)
            ).squeeze(2)

            # matched_box_prob: P_{ij}^{loc}
            matched_object_targets = self.box_coder.encode(targets_.bbox.unsqueeze(dim=1), anchors_.bbox[matched])
            retinanet_regression_loss = smooth_l1_loss(
                box_regression_[matched], matched_object_targets, *self.smooth_l1_loss_param
            )
            matched_box_prob = torch.exp(-retinanet_regression_loss)

            # positive_losses: { -log( Mean-max(P_{ij}^{cls} * P_{ij}^{loc}) ) }
            positive_numels += len(targets_)
            positive_losses.append(self.positive_bag_loss_func(matched_cls_prob * matched_box_prob, dim=1))

        # positive_loss: \sum_{i}{ -log( Mean-max(P_{ij}^{cls} * P_{ij}^{loc}) ) } / ||B||
        positive_loss = torch.cat(positive_losses).sum() / max(1, positive_numels)

        # box_prob: P{a_{j} \in A_{+}}
        box_prob = torch.stack(box_prob, dim=0)

        # negative_loss: \sum_{j}{ FL( (1 - P{a_{j} \in A_{+}}) * (1 - P_{j}^{bg}) ) } / n||B||
        negative_loss = self.negative_bag_loss_func(
            cls_prob * (1 - box_prob), self.focal_loss_gamma
        ) / max(1, positive_numels * self.pre_anchor_topk)

        #losses = {
        #    "loss_retina_positive": positive_loss * self.focal_loss_alpha,
        #    "loss_retina_negative": negative_loss * (1 - self.focal_loss_alpha),
        #}
        #return losses
        return positive_loss * self.focal_loss_alpha, negative_loss * (1 - self.focal_loss_alpha)
Пример #3
0
    def __call__(self, anchors, box_cls, box_regression, targets):
        """
        Arguments:
            anchors (list[BoxList])
            objectness (list[Tensor])
            box_regression (list[Tensor])
            targets (list[BoxList])

        Returns:
            objectness_loss (Tensor)
            box_loss (Tensor
        """
        anchors = [
            cat_boxlist(anchors_per_image) for anchors_per_image in anchors
        ]
        box_cls_flattened = []
        box_regression_flattened = []
        # for each feature level, permute the outputs to make them be in the
        # same format as the labels. Note that the labels are computed for
        # all feature levels concatenated, so we keep the same representation
        # for the objectness and the box_regression
        for box_cls_per_level, box_regression_per_level in zip(
                box_cls, box_regression):
            N, A, H, W = box_cls_per_level.shape
            C = self.num_classes
            box_cls_per_level = box_cls_per_level.view(N, -1, C, H, W)
            box_cls_per_level = box_cls_per_level.permute(0, 3, 4, 1, 2)
            box_cls_per_level = box_cls_per_level.reshape(N, -1, C)
            box_regression_per_level = box_regression_per_level.view(
                N, -1, 4, H, W)
            box_regression_per_level = box_regression_per_level.permute(
                0, 3, 4, 1, 2)
            box_regression_per_level = box_regression_per_level.reshape(
                N, -1, 4)
            box_cls_flattened.append(box_cls_per_level)
            box_regression_flattened.append(box_regression_per_level)
        # concatenate on the first dimension (representing the feature levels), to
        # take into account the way the labels were generated (with all feature maps
        # being concatenated as well)
        box_cls = cat(box_cls_flattened, dim=1)
        box_regression = cat(box_regression_flattened, dim=1)

        cls_prob = torch.sigmoid(box_cls)
        box_prob = []
        positive_numels = 0
        positive_losses = []
        for img, (anchors_, targets_, cls_prob_, box_regression_) in enumerate(
                zip(anchors, targets, cls_prob, box_regression)):
            labels_ = targets_.get_field("labels") - 1

            with torch.set_grad_enabled(False):
                box_localization = self.box_coder.decode(
                    box_regression_, anchors_.bbox)
                object_box_iou = boxlist_iou(
                    targets_,
                    BoxList(box_localization, anchors_.size, mode='xyxy'))
                H = object_box_iou.max(
                    dim=1,
                    keepdim=True).values.clamp(min=self.bbox_threshold + 1e-12)
                object_box_prob = ((object_box_iou - self.bbox_threshold) /
                                   (H - self.bbox_threshold)).clamp(min=0,
                                                                    max=1)

                indices = torch.stack(
                    [torch.arange(len(labels_)).type_as(labels_), labels_],
                    dim=0)
                """
                to implement image_box_iou = torch.sparse.max(
                                  torch.sparse_coo_tensor(indices, object_box_iou), dim=0
                             )
                """
                # start
                indices = torch.nonzero(
                    torch.sparse.sum(torch.sparse_coo_tensor(
                        indices, object_box_prob),
                                     dim=0).to_dense()).t_()

                if indices.numel() == 0:
                    image_box_prob = torch.zeros(
                        anchors_.bbox.size(0),
                        self.num_classes).type_as(object_box_prob)
                else:
                    nonzero_box_prob = torch.where(
                        (labels_.unsqueeze(dim=-1) == indices[0]),
                        object_box_prob[:, indices[1]],
                        torch.tensor(
                            [0]).type_as(object_box_prob)).max(dim=0).values

                    image_box_prob = torch.sparse_coo_tensor(
                        indices.flip([0]),
                        nonzero_box_prob,
                        size=(anchors_.bbox.size(0),
                              self.num_classes)).to_dense()
                # end

                box_prob.append(image_box_prob)

            match_quality_matrix = boxlist_iou(targets_, anchors_)
            _, matched = torch.topk(match_quality_matrix,
                                    self.pre_anchor_topk,
                                    dim=1,
                                    sorted=False)
            del match_quality_matrix

            matched_cls_prob = torch.gather(
                cls_prob_[matched], 2,
                labels_.view(-1, 1, 1).repeat(1, self.pre_anchor_topk,
                                              1)).squeeze(2)

            matched_object_targets = self.box_coder.encode(
                targets_.bbox.unsqueeze(dim=1), anchors_.bbox[matched])
            retinanet_regression_loss = smooth_l1_loss(
                box_regression_[matched], matched_object_targets,
                *self.smooth_l1_loss_param)
            matched_box_prob = torch.exp(-retinanet_regression_loss)

            positive_numels += len(targets_)
            positive_losses.append(
                self.positive_bag_loss_func(matched_cls_prob *
                                            matched_box_prob,
                                            dim=1))

        positive_loss = torch.cat(positive_losses).sum() / max(
            1, positive_numels)

        box_prob = torch.stack(box_prob, dim=0)

        negative_loss = self.negative_bag_loss_func(
            cls_prob * (1 - box_prob), self.focal_loss_gamma) / max(
                1, positive_numels * self.pre_anchor_topk)

        losses = {
            "loss_retina_positive": positive_loss * self.focal_loss_alpha,
            "loss_retina_negative":
            negative_loss * (1 - self.focal_loss_alpha),
        }
        return losses