示例#1
0
    def __call__(self, anchors, objectness, box_regression, targets):
        """
        Arguments:
            anchors (list[BoxList])
            objectness (list[Tensor])
            box_regression (list[Tensor])
            targets (list[BoxList])

        Returns:
            objectness_loss (Tensor)
            box_loss (Tensor
        """
        anchors = [cat_boxlist(anchors_per_image) for anchors_per_image in anchors]
        labels, regression_targets = self.prepare_targets(anchors, targets)
        sampled_pos_inds, sampled_neg_inds = self.fg_bg_sampler(labels)
        sampled_pos_inds = torch.nonzero(torch.cat(sampled_pos_inds, dim=0)).squeeze(1)
        sampled_neg_inds = torch.nonzero(torch.cat(sampled_neg_inds, dim=0)).squeeze(1)

        sampled_inds = torch.cat([sampled_pos_inds, sampled_neg_inds], dim=0)

        objectness_flattened = []
        box_regression_flattened = []
        # for each feature level, permute the outputs to make them be in the
        # same format as the labels. Note that the labels are computed for
        # all feature levels concatenated, so we keep the same representation
        # for the objectness and the box_regression
        for objectness_per_level, box_regression_per_level in zip(
            objectness, box_regression
        ):
            N, A, H, W = objectness_per_level.shape
            objectness_per_level = objectness_per_level.permute(0, 2, 3, 1).reshape(
                N, -1
            )
            box_regression_per_level = box_regression_per_level.view(N, -1, 4, H, W)
            box_regression_per_level = box_regression_per_level.permute(0, 3, 4, 1, 2)
            box_regression_per_level = box_regression_per_level.reshape(N, -1, 4)
            objectness_flattened.append(objectness_per_level)
            box_regression_flattened.append(box_regression_per_level)
        # concatenate on the first dimension (representing the feature levels), to
        # take into account the way the labels were generated (with all feature maps
        # being concatenated as well)
        objectness = cat(objectness_flattened, dim=1).reshape(-1)
        box_regression = cat(box_regression_flattened, dim=1).reshape(-1, 4)

        labels = torch.cat(labels, dim=0)
        regression_targets = torch.cat(regression_targets, dim=0)

        box_loss = smooth_l1_loss(
            box_regression[sampled_pos_inds],
            regression_targets[sampled_pos_inds],
            beta=1.0 / 9,
            size_average=False,
        ) / (sampled_inds.numel())

        objectness_loss = F.binary_cross_entropy_with_logits(
            objectness[sampled_inds], labels[sampled_inds]
        )

        return objectness_loss, box_loss
示例#2
0
    def __call__(self, anchors, objectness, box_regression, targets):
        """
        Arguments:
            anchors (list[list[BoxList]]), 第一个维度是img_batch, 第二个维度是level
                每个level的anchor是一个BoxList对象
            objectness (list[Tensor]), 第一个维度是level
            box_regression (list[Tensor]), 第一个维度是level
            targets (list[BoxList]), 第一个维度是img_batch

        Returns:
            objectness_loss (Tensor)
            box_loss (Tensor)
        """

        # anchors: [num_imgs, (x)num_levels(个boxlist)] --> [num_imgs(个boxlist),]
        # 即将batch中每张图片各个level的boxlist对象合并成一个
        anchors = [
            cat_boxlist(anchors_per_image) for anchors_per_image in anchors
        ]

        # labels: fg,bg,discard  [img_batch, num_anchors]
        # regression_targets: t_x,t_y,t_w,t_h  [img_batch, num_anchors, 4]
        labels, regression_targets = self.prepare_targets(anchors, targets)

        # 从所有预测值中随机采样一个batch的正负样本  [img_batch, num_anchors]
        sampled_pos_inds, sampled_neg_inds = self.fg_bg_sampler(labels)

        # 处理之前sampled_pos_inds和sampled_neg_inds: [img_batch, num_anchors], 可以看成二维矩阵
        #   矩阵中每一行都是0和1两种值, 1的位置代表采样到的样本数量, 这两个变量同一行中1的数量相加之后是
        #   batch_size_per_image, 即从同一张图片中采样batch_size_per_image个正负样本
        # 处理之后sampled_pos_inds和sampled_neg_inds: [all_sampled_inds], 处理过程是首先把img_batch
        #   展开, 展开后共有img_batch*num_anchors个数, 然后取出这些数中非0元素的索引值,
        #   取值范围是0~ img_batch*num_anchors-1, 后面对labels和regression_targets同样将img_batch展
        #   开, 这样就可以使用这两个索引变量直接进行取值
        sampled_pos_inds = torch.nonzero(torch.cat(sampled_pos_inds,
                                                   dim=0)).squeeze(1)
        sampled_neg_inds = torch.nonzero(torch.cat(sampled_neg_inds,
                                                   dim=0)).squeeze(1)

        # [img_batch*batch_size_per_image]
        sampled_inds = torch.cat([sampled_pos_inds, sampled_neg_inds], dim=0)

        # objectness: [[num_img, num_anchors, H, W], ...] --> [img_batch*num_anchors, 1]
        # box_regression: [[num_img, 4*num_anchors, H, W], ...] --> [img_batch*num_anchors, 4]
        objectness, box_regression = \
            concat_box_prediction_layers(objectness, box_regression)

        # [img_batch*num_anchors]
        objectness = objectness.squeeze()

        # [img_batch, num_anchors] --> [img_batch*num_anchors]
        labels = torch.cat(labels, dim=0)
        # [img_batch, num_anchors, 4] --> [img_batch*num_anchors, 4]
        regression_targets = torch.cat(regression_targets, dim=0)

        box_loss = smooth_l1_loss(
            box_regression[sampled_pos_inds],
            regression_targets[sampled_pos_inds],
            beta=1.0 / 9,
            size_average=False,
        ) / (sampled_inds.numel())

        # sigmod结合交叉熵进行fg/bg二分类的损失函数
        objectness_loss = F.binary_cross_entropy_with_logits(
            objectness[sampled_inds], labels[sampled_inds])

        return objectness_loss, box_loss
示例#3
0
    def __call__(self, anchors, box_cls, box_regression, coeffs, prototypes, targets):
        coeffs = concat_coeffs_prediction_layers(coeffs, self.num_prototypes)

        anchors = [cat_boxlist(anchors_per_image) for anchors_per_image in anchors]
        labels, regression_targets, mask_targets, mask_pred, gt_boxes_area = \
            self.prepare_targets_and_assemble(anchors, targets, coeffs, prototypes)

        N = len(labels)
        box_cls, box_regression = \
                concat_box_prediction_layers(box_cls, box_regression)

        labels = torch.cat(labels, dim=0)
        regression_targets = torch.cat(regression_targets, dim=0)
        pos_inds = torch.nonzero(labels > 0).squeeze(1)

        mask_pred = torch.cat(mask_pred, dim=0)
        device = mask_pred.device
        mask_targets = torch.cat(mask_targets, dim=0).to(device, dtype=torch.float32)
        gt_boxes_area = torch.cat(gt_boxes_area, dim=0)

        if mask_pred.size(0) > self.mask_to_train:
            perm = torch.randperm(mask_pred.size(0))
            select = perm[:self.mask_to_train]
            mask_pred = mask_pred[select]
            mask_targets = mask_targets[select]
            gt_boxes_area = gt_boxes_area[select]

        # only positive boxes contribute to regression loss and mask loss
        retinanet_regression_loss = smooth_l1_loss(
            box_regression[pos_inds],
            regression_targets[pos_inds],
            beta=self.bbox_reg_beta,
            size_average=False,
        ) / (max(1, pos_inds.numel() * self.regress_norm))

        # if DEBUG:
        #     print('retinanet_regression_loss', box_regression[pos_inds].shape)

        # torch.mean (in binary_cross_entropy_with_logits) doesn't
        # accept empty tensors, so handle it separately
        if mask_targets.numel() == 0:
            yolact_mask_loss = mask_pred.sum() * 0
        else:
            if self.mask_with_logits:
                yolact_mask_loss = F.binary_cross_entropy_with_logits(mask_pred, mask_targets, reduction='none')
            else:
                yolact_mask_loss = F.binary_cross_entropy(mask_pred, mask_targets, reduction='none')
                
        # if DEBUG:
        #     print("gt_boxes_area:", gt_boxes_area)
        # if DEBUG:
        #     print('yolact_mask_loss', mask_pred.shape)

        # reweight mask loss by dividing the area of ground-truth boxes
        yolact_mask_loss = yolact_mask_loss.sum(dim=(1, 2)) / gt_boxes_area
        yolact_mask_loss = yolact_mask_loss.sum() / (max(1, pos_inds.numel() * self.mask_norm))
        
        if DEBUG:
            print('pos_inds.numel():', pos_inds.numel())
            print('gt_boxes_area.shape:', gt_boxes_area.shape)

        labels = labels.int()
        retinanet_cls_loss = self.box_cls_loss_func(
            box_cls,
            labels
        ) / (pos_inds.numel() + N)

        # if DEBUG:
        #     print('retinanet_cls_loss', box_cls.shape)

        return retinanet_cls_loss, retinanet_regression_loss, yolact_mask_loss
示例#4
0
    def forward(self, images, iteration=None, targets=None):
        """
        Arguments:
            images (list[Tensor] or ImageList): images to be processed
            targets (list[BoxList]): ground-truth boxes present in the image (optional)

        Returns:
            result (list[BoxList] or dict[Tensor]): the output from the model.
                During training, it returns a dict[Tensor] which contains the losses.
                During testing, it returns list[BoxList] contains additional fields
                like `scores`, `labels` and `mask` (for Mask R-CNN models).

        """
        if self.training and targets is None:
            raise ValueError("In training mode, targets should be passed")
        images = to_image_list(images)
        features = self.backbone(images.tensors)

        # Retina RPN Output
        rpn_features = features
        mask_all = []
        for rpn_feat in features:
            num_batch = rpn_feat.shape[0]
            num_channel = rpn_feat.shape[1]
            num_height = rpn_feat.shape[2]
            num_width = rpn_feat.shape[3]

            # compute cam with conv feat
            feat_channel_mean = torch.mean(rpn_feat.view(
                num_batch, num_channel, -1),
                                           dim=2)
            feat_channel_mean = feat_channel_mean.view(num_batch, num_channel,
                                                       1, 1)
            cam = torch.sum(rpn_feat * feat_channel_mean, 1)  # [B 1 H W]
            mask_all.append(cam)

        # Inverted Attention
        if self.cfg.FREEANCHOR.IA_ON and self.training and iteration is not None:
            rpn_features_tmp = []
            for feat_idx, rpn_feat in enumerate(rpn_features):
                rpn_features_tmp.append(rpn_feat.clone().detach())
            rpn_features_tmp = tuple(rpn_features_tmp)

            # the ratio of IA
            max_iteration = self.cfg.SOLVER.MAX_ITER
            ratio = self.ratio_function(self.cfg.FREEANCHOR.IA_TYPE,
                                        max_iteration, iteration)

            if self.cfg.FREEANCHOR.IA_FEAT:
                if self.cfg.FREEANCHOR.IA_FEAT_TYPE == 0:
                    mask = self.IA_feat(rpn_features_tmp, ratio)
                else:
                    mask = self.IA_feat2(rpn_features_tmp, ratio)
            else:
                mask = self.IA_grad(images, rpn_features_tmp, targets, ratio)

        if self.cfg.RETINANET.BACKBONE == "p2p7":
            rpn_features = features[1:]
        if self.cfg.FREEANCHOR.IA_ON and self.training:
            # print('images.size(): ', images.size(), targets)
            (anchors, detections), detector_losses = self.rpn(images,
                                                              rpn_features,
                                                              mask,
                                                              targets=targets)
        else:
            (anchors, detections), detector_losses = self.rpn(images,
                                                              rpn_features,
                                                              targets=targets)
        # print('anchors: ', anchors)
        # print('detections: ', detections)
        # print('detector_losses: ', detector_losses)
        # print('size 1: ', images.size())
        # print('size 2: ', len(rpn_features))
        # for idx in range(len(rpn_features)):
        #     print('size 2: ', rpn_features[idx].size())
        # print('size 3: ', len(targets))
        # print('size 3: ', targets[0])

        if self.training:
            losses = {}
            losses.update(detector_losses)
            if self.mask:
                if self.cfg.MODEL.MASK_ON:
                    # Padding the GT
                    proposals = []
                    for (image_detections,
                         image_targets) in zip(detections, targets):
                        merge_list = []
                        if not isinstance(image_detections, list):
                            merge_list.append(
                                image_detections.copy_with_fields('labels'))

                        if not isinstance(image_targets, list):
                            merge_list.append(
                                image_targets.copy_with_fields('labels'))

                        if len(merge_list) == 1:
                            proposals.append(merge_list[0])
                        else:
                            proposals.append(cat_boxlist(merge_list))
                    x, result, mask_losses = self.mask(features, proposals,
                                                       targets)
                    # print('x: ', x)
                    # print('result: ', result)
                    # print('mask_losses: ', mask_losses)
                elif self.cfg.MODEL.SPARSE_MASK_ON:
                    x, result, mask_losses = self.mask(features, anchors,
                                                       targets)
                    # print('x: ', x)
                    # print('result: ', result)
                    # print('mask_losses: ', mask_losses)

                losses.update(mask_losses)
            return losses
        else:
            if self.mask:
                proposals = []
                for image_detections in detections:
                    num_of_detections = image_detections.bbox.shape[0]
                    if num_of_detections > self.cfg.RETINANET.NUM_MASKS_TEST > 0:
                        cls_scores = image_detections.get_field("scores")
                        image_thresh, _ = torch.kthvalue(
                            cls_scores.cpu(), num_of_detections - \
                            self.cfg.RETINANET.NUM_MASKS_TEST + 1
                        )
                        keep = cls_scores >= image_thresh.item()
                        keep = torch.nonzero(keep).squeeze(1)
                        image_detections = image_detections[keep]

                    proposals.append(image_detections)

                if self.cfg.MODEL.SPARSE_MASK_ON:
                    x, detections, mask_losses = self.mask(
                        features, proposals, targets)
                else:
                    x, detections, mask_losses = self.mask(
                        features, proposals, targets)
            return detections
        """
示例#5
0
    def __call__(self, anchors, objectness, box_regression, targets):
        """
        Arguments:
            anchors (list[BoxList])
            objectness (list[Tensor])
            box_regression (list[Tensor])
            targets (list[BoxList])

        Returns:
            objectness_loss (Tensor)
            box_loss (Tensor
        """
        anchors = [
            cat_boxlist(anchors_per_image) for anchors_per_image in anchors
        ]
        labels, regression_targets = self.prepare_targets(anchors, targets)
        sampled_pos_inds, sampled_neg_inds = self.fg_bg_sampler(labels)
        sampled_pos_inds = torch.nonzero(torch.cat(sampled_pos_inds,
                                                   dim=0)).squeeze(1)
        sampled_neg_inds = torch.nonzero(torch.cat(sampled_neg_inds,
                                                   dim=0)).squeeze(1)

        sampled_inds = torch.cat([sampled_pos_inds, sampled_neg_inds], dim=0)

        objectness_flattened = []
        box_regression_flattened = []
        # for each feature level, permute the outputs to make them be in the
        # same format as the labels. Note that the labels are computed for
        # all feature levels concatenated, so we keep the same representation
        # for the objectness and the box_regression
        for objectness_per_level, box_regression_per_level in zip(
                objectness, box_regression):
            N, A, H, W = objectness_per_level.shape
            objectness_per_level = objectness_per_level.permute(0, 2, 3,
                                                                1).reshape(
                                                                    N, -1)
            box_regression_per_level = box_regression_per_level.view(
                N, -1, 4, H, W)
            box_regression_per_level = box_regression_per_level.permute(
                0, 3, 4, 1, 2)
            box_regression_per_level = box_regression_per_level.reshape(
                N, -1, 4)
            objectness_flattened.append(objectness_per_level)
            box_regression_flattened.append(box_regression_per_level)
        # concatenate on the first dimension (representing the feature levels), to
        # take into account the way the labels were generated (with all feature maps
        # being concatenated as well)
        objectness = cat(objectness_flattened, dim=1).reshape(-1)
        box_regression = cat(box_regression_flattened, dim=1).reshape(-1, 4)

        labels = torch.cat(labels, dim=0)
        regression_targets = torch.cat(regression_targets, dim=0)

        box_loss = smooth_l1_loss(
            box_regression[sampled_pos_inds],
            regression_targets[sampled_pos_inds],
            beta=1.0 / 9,
            size_average=False,
        ) / (sampled_inds.numel())

        objectness_loss = F.binary_cross_entropy_with_logits(
            objectness[sampled_inds], labels[sampled_inds])

        return objectness_loss, box_loss
    def select_over_all_levels(self, boxlists):
        num_images = len(boxlists)
        results = []
        has_offsets = boxlists[0].has_field("offsets")
        for i in range(num_images):
            scores = boxlists[i].get_field("scores")
            labels = boxlists[i].get_field("labels")
            if has_offsets:
                offsets = boxlists[i].get_field("offsets")
                locations = boxlists[i].get_field("locations")
                rec_masks = boxlists[i].get_field("rec_masks")
            beziers = boxlists[i].get_field("beziers")
            boxes = boxlists[i].bbox
            boxlist = boxlists[i]
            result = []
            # skip the background
            for j in range(1, self.num_classes):
                inds = (labels == j).nonzero().view(-1)

                scores_j = scores[inds]
                boxes_j = boxes[inds, :].view(-1, 4)
                beziers_j = beziers[inds, :].view(-1, 16)

                boxlist_for_class = BoxList(boxes_j, boxlist.size, mode="xyxy")
                boxlist_for_class.add_field("scores", scores_j)
                boxlist_for_class.add_field("beziers", beziers_j)

                if has_offsets:
                    boxlist_for_class.add_field(
                        "offsets", offsets[inds])
                    boxlist_for_class.add_field(
                        "locations", locations[inds])
                    boxlist_for_class.add_field(
                        "rec_masks", rec_masks[inds])

                boxlist_for_class = boxlist_nms(
                    boxlist_for_class, self.nms_thresh,
                    score_field="scores"
                )
                num_labels = len(boxlist_for_class)
                boxlist_for_class.add_field(
                    "labels", torch.full((num_labels,), j,
                                         dtype=torch.int64,
                                         device=scores.device)
                )
                result.append(boxlist_for_class)

            result = cat_boxlist(result)
            number_of_detections = len(result)

            # Limit to max_per_image detections **over all classes**
            if number_of_detections > self.fpn_post_nms_top_n > 0:
                cls_scores = result.get_field("scores")
                image_thresh, _ = torch.kthvalue(
                    cls_scores.cpu(),
                    number_of_detections - self.fpn_post_nms_top_n + 1
                )
                keep = cls_scores >= image_thresh.item()
                keep = torch.nonzero(keep).squeeze(1)
                result = result[keep]
            results.append(result)
        return results
示例#7
0
    def __call__(self, anchors, objectness, box_regression, targets):
        """
        Arguments:
            anchors (list[BoxList])
            objectness (list[Tensor])
            box_regression (list[Tensor])
            targets (list[BoxList])

        Returns:
            objectness_loss (Tensor)
            box_loss (Tensor
        """
        anchors = [
            cat_boxlist(anchors_per_image) for anchors_per_image in anchors
        ]
        labels, regression_targets = self.prepare_targets(anchors, targets)
        sampled_pos_inds, sampled_neg_inds = self.fg_bg_sampler(labels)
        sampled_pos_inds = torch.nonzero(torch.cat(sampled_pos_inds,
                                                   dim=0)).squeeze(1)
        sampled_neg_inds = torch.nonzero(torch.cat(sampled_neg_inds,
                                                   dim=0)).squeeze(1)

        sampled_inds = torch.cat([sampled_pos_inds, sampled_neg_inds], dim=0)

        objectness_flattened = []
        box_regression_flattened = []
        # for each feature level, permute the outputs to make them be in the
        # same format as the labels. Note that the labels are computed for
        # all feature levels concatenated, so we keep the same representation
        # for the objectness and the box_regression
        for objectness_per_level, box_regression_per_level in zip(
                objectness, box_regression):
            N, A, H, W = objectness_per_level.shape
            objectness_per_level = objectness_per_level.permute(0, 2, 3,
                                                                1).reshape(
                                                                    N, -1)
            box_regression_per_level = box_regression_per_level.view(
                N, -1, 4, H, W)
            box_regression_per_level = box_regression_per_level.permute(
                0, 3, 4, 1, 2)
            box_regression_per_level = box_regression_per_level.reshape(
                N, -1, 4)
            objectness_flattened.append(objectness_per_level)
            box_regression_flattened.append(box_regression_per_level)
        # concatenate on the first dimension (representing the feature levels), to
        # take into account the way the labels were generated (with all feature maps
        # being concatenated as well)

        # keep bbox_regression
        box_regression_reploss = cat(box_regression_flattened, dim=1)
        batches = box_regression_reploss.shape[0]
        num_anchors = box_regression_reploss.shape[1]

        objectness = cat(objectness_flattened, dim=1).reshape(-1)
        box_regression = cat(box_regression_flattened, dim=1).reshape(-1, 4)

        labels = torch.cat(labels, dim=0)
        regression_targets = torch.cat(regression_targets, dim=0)

        box_loss = smooth_l1_loss(
            box_regression[sampled_pos_inds],
            regression_targets[sampled_pos_inds],
            beta=1.0 / 9,
            size_average=False,
        ) / (sampled_inds.numel())

        #import pdb
        #pdb.set_trace()
        box_loss_tmp = smooth_l1(
            box_regression[sampled_pos_inds],
            regression_targets[sampled_pos_inds],
            beta=1.0 / 9,
        )

        objectness_loss = F.binary_cross_entropy_with_logits(
            objectness[sampled_inds], labels[sampled_inds])

        ######################################################
        anchor_flattened = []
        for anchor_per in anchors:
            anchor_flattened.append(anchor_per.bbox)
        #assert len(anchor_flattened) <2,"Multi level anchor!"
        #anchors_bbox = cat(anchor_flattened, dim=0).reshape(-1,4)
        anchors_bbox = anchor_flattened

        targets_bbox_flattened = []
        for targets_bbox_per in targets:
            targets_bbox_flattened.append(targets_bbox_per.bbox)
        #import pdb; pdb.set_trace()
        #targets_box = cat(targets_bbox_flattened, dim=0).reshape(-1,4)
        targets_box = targets_bbox_flattened

        RepGT_losses = 0
        RepBox_losses = 0
        tmp_index = 0
        for batch in range(batches):
            box_regression_dx = box_regression_reploss[batch, :, 0]
            box_regression_dy = box_regression_reploss[batch, :, 1]
            box_regression_dw = box_regression_reploss[batch, :, 2]
            box_regression_dh = box_regression_reploss[batch, :, 3]
            #assert box_regression.shape[0] == anchors_bbox.shape[0],"Invalid shape with bbox_regression && anchors!"

            targets_box_batch = targets_box[batch]
            anchors_bbox_batch = anchors_bbox[batch]

            inds_ge = sampled_pos_inds.ge(batch * num_anchors)
            inds_le = sampled_pos_inds.le(batch * num_anchors + num_anchors -
                                          1)
            inds_bet = inds_ge * inds_le
            sampled_pos_inds_batch = sampled_pos_inds[inds_bet] % num_anchors

            if len(sampled_pos_inds_batch) != 0:

                anchors_bbox_cx = (anchors_bbox_batch[:, 0] +
                                   anchors_bbox_batch[:, 2]) / 2.0
                anchors_bbox_cy = (anchors_bbox_batch[:, 1] +
                                   anchors_bbox_batch[:, 3]) / 2.0
                anchors_bbox_w = anchors_bbox_batch[:,
                                                    2] - anchors_bbox_batch[:,
                                                                            0] + 1
                anchors_bbox_h = anchors_bbox_batch[:,
                                                    3] - anchors_bbox_batch[:,
                                                                            1] + 1
                predict_w = torch.exp(box_regression_dw) * anchors_bbox_w
                predict_h = torch.exp(box_regression_dh) * anchors_bbox_h
                predict_x = box_regression_dx * anchors_bbox_w + anchors_bbox_cx
                predict_y = box_regression_dy * anchors_bbox_h + anchors_bbox_cy

                predict_x1 = predict_x - 0.5 * predict_w
                predict_y1 = predict_y - 0.5 * predict_h
                predict_x2 = predict_x + 0.5 * predict_w
                predict_y2 = predict_y + 0.5 * predict_h

                predict_boxes = torch.stack(
                    (predict_x1, predict_y1, predict_x2, predict_y2)).t()
                predict_boxes_pos = predict_boxes[sampled_pos_inds_batch, :]
                IoU = calc_iou(
                    anchors_bbox_batch,
                    targets_box_batch[:, :4])  # num_anchors x num_annotations
                IoU_max, IoU_argmax = torch.max(IoU, dim=1)  # num_anchors x 1

                #add RepGT losses
                IoU_pos = IoU[sampled_pos_inds_batch, :]
                IoU_max_keep, IoU_argmax_keep = torch.max(
                    IoU_pos, dim=1, keepdim=True)  # num_anchors x 1
                for idx in range(IoU_argmax_keep.shape[0]):
                    IoU_pos[idx, IoU_argmax_keep[idx]] = -1
                IoU_sec, IoU_argsec = torch.max(IoU_pos, dim=1)
                assigned_annotations_sec = targets_box_batch[IoU_argsec, :]

                box_loss_tmp_batch = box_loss_tmp[tmp_index:tmp_index +
                                                  sampled_pos_inds_batch.
                                                  shape[0]]
                box_loss_tmp_batch = torch.sum(box_loss_tmp_batch, dim=1)
                IoG_to_minimize = IoG(assigned_annotations_sec,
                                      predict_boxes_pos)
                RepGT_loss = smooth_ln(IoG_to_minimize, 0.5)
                RepGT_loss = RepGT_loss * torch.lt(0.1 * RepGT_loss,
                                                   box_loss_tmp_batch).float()
                RepGT_loss = RepGT_loss.mean() / sampled_pos_inds.numel()
                RepGT_losses += RepGT_loss

                #add RepBox losses
                IoU_argmax_pos = IoU_argmax[sampled_pos_inds_batch].float()
                IoU_argmax_pos = IoU_argmax_pos.unsqueeze(0).t()
                predict_boxes_pos = torch.cat(
                    [predict_boxes_pos, IoU_argmax_pos], dim=1)

                predict_boxes_pos_np = predict_boxes_pos.detach().cpu().numpy()
                num_gt = targets_box_batch.shape[0]
                predict_boxes_pos_sampled = []
                box_loss_tmp_batch_sampled = []
                for id in range(num_gt):
                    index = np.where(predict_boxes_pos_np[:, 4] == id)[0]
                    if index.shape[0]:
                        idx = random.choice(range(index.shape[0]))
                        predict_boxes_pos_sampled.append(
                            predict_boxes_pos[index[idx], :4])
                        box_loss_tmp_batch_sampled.append(
                            box_loss_tmp_batch[index[idx]])
                predict_boxes_pos_sampled = torch.stack(
                    predict_boxes_pos_sampled)
                box_loss_tmp_batch_sampled = torch.stack(
                    box_loss_tmp_batch_sampled)
                iou_repbox = calc_iou(predict_boxes_pos_sampled,
                                      predict_boxes_pos_sampled)
                mask = torch.lt(iou_repbox, 1.).float()
                iou_repbox = iou_repbox * mask
                RepBox_loss = smooth_ln(iou_repbox, 0.5)
                RepBox_loss = RepBox_loss * torch.lt(
                    0.85 * RepBox_loss, box_loss_tmp_batch_sampled).float()
                RepBox_loss = RepBox_loss.sum() / sampled_pos_inds.numel()
                RepBox_losses += RepBox_loss

                tmp_index += sampled_pos_inds_batch.shape[0]
                if RepBox_losses != RepBox_losses or RepGT_losses != RepGT_losses or box_loss != box_loss:
                    import pdb
                    pdb.set_trace()

        RepGT_losses /= batches
        RepBox_losses /= batches
        reg_loss = box_loss + 0.1 * RepGT_losses + 0.7 * RepBox_losses

        return objectness_loss, reg_loss
示例#8
0
def mine_boxes(p_trainval, ov_th, score_th, mined_class_label=1, visualize=False):
    mined_images = 0
    mined_boxes = 0
    # lines = []
    # has_mined = []
    annos = []
    id = 0

    for img_id, (t, p) in tqdm(p_trainval.items(), mininterval=20):
    # for img_id, (t, p) in p_trainval.items():
        p = p.resize(t.size)
        p.add_field('labels', (p.get_field('labels') > 0).to(torch.long) * mined_class_label)
        p = boxlist_nms(p, 0.4)
        s = p.get_field('scores')
        # Strategy 1: keep at least one box per image even the score is low
        # p = p[s >= min(score_th, s.max())]
        # Strategy 2: keep on high score ones
        p = p[s >= score_th]

        if len(p) and len(t):
            #ious = boxlist_iou(p, anno)
            ious = boxlist_overlap1(p, t)
            # try:
            ious = ious.max(1)[0]
            # except:
            #     print (p,t,ious)
            p = p[ious < ov_th]

        if len(p):
            mined_images += 1
            mined_boxes += len(p)

            # pn = [{'class': '_mined_', 'rect': p.bbox[i].tolist()} for i in range(len(p))]
            # l[3] = l[3] + pn
            # has_mined.append(True)
            del p.extra_fields['scores']
            t = cat_boxlist((t, p))
            # print (t.bbox, t.get_field('labels'))

            if visualize:
                #img = d.get_img(img_id)
                path = datasets[0].coco.loadImgs(img_id)[0]['file_name']
                img = Image.open(os.path.join(datasets[0].root, path)).convert('RGB')
                plt.imshow(img)
                for i in range(len(t)):
                    x0, y0, x1, y1 = t.bbox[i]
                    w, h = x1-x0+1, y1-y0+1
                    plt.gca().add_patch(Rectangle((x0, y0), w, h, alpha=0.9,
                                                  facecolor='none', edgecolor='green', linewidth=1.5))
                for i in range(len(p)):
                    x0, y0, x1, y1 = p.bbox[i]
                    w, h = x1-x0+1, y1-y0+1
                    plt.gca().add_patch(Rectangle((x0, y0), w, h, alpha=0.9,
                                                  facecolor='none', edgecolor='red', linewidth=1))
                #plt.title(str(d.lines[id]))
                print (img_id, t, p, mined_images)
                plt.show()
        # else:
        #     has_mined.append(False)

        # lines.append(l)
        boxes = t.bbox.cpu().numpy().copy()
        labels = t.get_field('labels').tolist()
        for i in range(len(boxes)):
            bbox = boxes[i] #.copy()
            bbox[2:] -= bbox[:2] - 1
            bbox = bbox.tolist()
            id += 1
            anno = {'area': bbox[2]*bbox[3], 'iscrowd': 0, 'image_id': int(img_id), 
                    'bbox': bbox, 'category_id': labels[i], 'id': id, 'ignore': 0}
            annos.append(anno)

    print ('mined_images', mined_images, 'mined_boxes', mined_boxes)
    return annos
示例#9
0
    def filter_results(self, objectlist, num_classes):
        boxlist_left = objectlist.get_field("left_box")
        boxlist_right = objectlist.get_field("right_box")
        boxes_left = boxlist_left.bbox.reshape(-1, num_classes * 4)
        boxes_right = boxlist_right.bbox.reshape(-1, num_classes * 4)

        centers_left = objectlist.get_field("left_centers").reshape(
            -1, num_classes * 2)
        centers_right = objectlist.get_field("right_centers").reshape(
            -1, num_classes * 2)
        dimemsions = objectlist.get_field("dimensions").reshape(
            -1, num_classes * 3)
        rotations = objectlist.get_field("rotations").reshape(-1, num_classes)
        scores = objectlist.get_field("scores").reshape(-1, num_classes)

        device = scores.device
        result_box_left = []
        result_box_right = []
        result_center_left = []
        result_center_right = []
        result_dimensions = []
        result_rotations = []

        inds_all = scores > self.score_thresh

        for j in range(1, num_classes):
            inds = inds_all[:, j].nonzero().squeeze(1)

            scores_j = scores[inds, j]
            boxes_left_j = boxes_left[inds, j * 4:(j + 1) * 4]
            boxes_right_j = boxes_right[inds, j * 4:(j + 1) * 4]
            centers_left_j = centers_left[inds, j * 2:(j + 1) * 2]
            centers_right_j = centers_right[inds, j * 2:(j + 1) * 2]
            dimemsions_j = dimemsions[inds, j * 3:(j + 1) * 3]
            rotations_j = rotations[inds, j]

            boxlist_left_for_class = BoxList(boxes_left_j,
                                             boxlist_left.size,
                                             mode="xyxy")
            boxlist_right_for_class = BoxList(boxes_right_j,
                                              boxlist_right.size,
                                              mode="xyxy")

            boxlist_left_for_class.add_field("scores", scores_j)
            boxlist_right_for_class.add_field("scores", scores_j)

            keep, mode = boxlist_nms_stereo_td(boxlist_left_for_class,
                                               boxlist_right_for_class,
                                               self.nms)
            boxlist_left_for_class = boxlist_left_for_class[keep].convert(mode)
            boxlist_right_for_class = boxlist_right_for_class[keep].convert(
                mode)
            centers_left_for_class = centers_left_j[keep]
            centers_right_for_class = centers_right_j[keep]
            dimemsions_for_class = dimemsions_j[keep]
            rotations_for_class = rotations_j[keep]

            num_labels = len(boxlist_left_for_class)
            labels = torch.full((num_labels, ),
                                j,
                                dtype=torch.int64,
                                device=device)

            boxlist_left_for_class.add_field("labels", labels)
            boxlist_right_for_class.add_field("labels", labels)

            result_box_left.append(boxlist_left_for_class)
            result_box_right.append(boxlist_right_for_class)
            result_center_left.append(centers_left_for_class)
            result_center_right.append(centers_right_for_class)
            result_dimensions.append(dimemsions_for_class)
            result_rotations.append(rotations_for_class)

        result_box_left = cat_boxlist(result_box_left)
        result_box_right = cat_boxlist(result_box_right)
        result_center_left = torch.cat(result_center_left)
        result_center_right = torch.cat(result_center_right)
        result_dimensions = torch.cat(result_dimensions)
        result_rotations = torch.cat(result_rotations)

        number_of_detections = len(result_box_left)

        result = ObjectList()
        result.add_field("left_box", result_box_left)
        result.add_field("right_box", result_box_right)
        result.add_field("left_centers", result_center_left)
        result.add_field("right_centers", result_center_right)
        result.add_field("dimensions", result_dimensions)
        result.add_field("rotations", result_rotations)

        # Limit to max_per_image detections **over all classes**
        if number_of_detections > self.detections_per_img > 0:
            cls_scores = result_box_left.get_field("scores")
            image_thresh, _ = torch.kthvalue(
                cls_scores.cpu(),
                number_of_detections - self.detections_per_img + 1)
            keep = cls_scores >= image_thresh.item()
            keep = torch.nonzero(keep).squeeze(1)
            result = result[keep]

        return result
    def __init__(self, cfg, bias, arch="RetinaNet"):
        device = torch.device(cfg.MODEL.DEVICE)
        if arch == "RetinaNet":
            anchor_generator = make_anchor_generator_retinanet(cfg)
            fg_iou, bg_iou = cfg.MODEL.RETINANET.FG_IOU_THRESHOLD, cfg.MODEL.RETINANET.BG_IOU_THRESHOLD
            num_classes = cfg.MODEL.RETINANET.NUM_CLASSES - 1
            num_anchors = len(cfg.MODEL.RETINANET.ASPECT_RATIOS) \
                * cfg.MODEL.RETINANET.SCALES_PER_OCTAVE
        else:
            assert arch == "RPN"
            anchor_generator = make_anchor_generator(cfg)
            fg_iou, bg_iou = cfg.MODEL.RPN.FG_IOU_THRESHOLD, cfg.MODEL.RPN.BG_IOU_THRESHOLD
            num_classes = 1
            num_anchors = anchor_generator.num_anchors_per_location()[0]

        prior = load_prior(cfg, arch)

        if prior is not None:
            nn.init.constant_(bias, -log((1 - prior) / prior))
            return

        data_loader = make_init_data_loader(
            cfg,
            is_distributed=True,
            images_per_batch=cfg.SOLVER.IMS_PER_BATCH)

        proposal_matcher = Matcher(
            fg_iou,
            bg_iou,
            allow_low_quality_matches=True,
        )

        backbone = build_backbone(cfg).to(device)
        num_fg, num_all = 0, 0
        num_gpus = get_num_gpus()

        for images, targets, _ in tqdm(data_loader):
            images = images.to(device)
            targets = [target.to(device) for target in targets]
            h, w = images.tensors.shape[-2:]

            if num_all == 0:
                features = backbone(images.tensors)
                n, c = features[0].shape[:2]
                levels = len(features)
                stride = int(h / features[0].shape[2])

            features = [
                torch.zeros(n,
                            c,
                            int(ceil(h / (stride * 2**i))),
                            int(ceil(w / (stride * 2**i))),
                            device=device) for i in range(levels)
            ]

            anchors = anchor_generator(images, features)
            anchors = [
                cat_boxlist(anchors_per_image).to(device)
                for anchors_per_image in anchors
            ]

            for anchor, target in zip(anchors, targets):
                match_quality_matrix = boxlist_iou(target, anchor)
                matched_idxs = proposal_matcher(match_quality_matrix)
                num_fg_per_image, num_bg_per_image = (
                    matched_idxs >= 0).sum(), (
                        matched_idxs == Matcher.BELOW_LOW_THRESHOLD).sum()
                num_fg += num_fg_per_image
                num_all += num_fg_per_image + num_bg_per_image

        fg_all_ratio = reduce_div(num_fg.float(), num_all.float(),
                                  num_gpus).item()
        prior = fg_all_ratio / num_classes
        nn.init.constant_(bias, -log((1 - prior) / prior))
        if torch.cuda.current_device() == 0:
            save_prior(cfg, prior, arch)
    def __call__(self, anchors, objectness, box_regression, targets):
        """
        Arguments:
            anchors (list[BoxList])
            objectness (list[Tensor])
            box_regression (list[Tensor])
            targets (list[BoxList])

        Returns:
            objectness_loss (Tensor)
            box_loss (Tensor
        """

        anchors = [
            cat_boxlist(anchors_per_image) for anchors_per_image in anchors
        ]

        labels, regression_targets, matched_gt_ids, \
            matched_gt_ious = self.prepare_targets(anchors, targets)

        sampled_pos_inds, sampled_neg_inds = self.fg_bg_sampler(labels)
        labels = torch.cat(labels, dim=0)

        sampled_pos_inds = torch.nonzero(torch.cat(sampled_pos_inds,
                                                   dim=0)).squeeze(1)
        sampled_neg_inds = torch.nonzero(torch.cat(sampled_neg_inds,
                                                   dim=0)).squeeze(1)

        sampled_inds = torch.cat([sampled_pos_inds, sampled_neg_inds], dim=0)

        total_pos = sampled_pos_inds.numel()
        total_neg = sampled_neg_inds.numel()
        total_samples = total_pos + total_neg

        objectness, box_regression = concat_box_prediction_layers(
            objectness, box_regression)
        objectness = objectness.squeeze()

        if total_pos == 0:
            return objectness.sum() * 0, objectness.sum() * 0

        regression_targets = torch.cat(regression_targets, dim=0)

        with torch.no_grad():
            start_gt_idx = 0
            for ix, t in enumerate(targets):
                matched_gt_ids[ix] += start_gt_idx
                start_gt_idx += len(t)

            matched_gt_ids = torch.cat(matched_gt_ids)
            pos_matched_gt_ids = matched_gt_ids[sampled_pos_inds]

            pos_label_weights = torch.zeros_like(pos_matched_gt_ids,
                                                 dtype=torch.float32)

            label_idxs = [
                torch.nonzero(pos_matched_gt_ids == x).squeeze()
                for x in range(start_gt_idx)
            ]

            # """OLD"""
            label_cnts = [li.numel() for li in label_idxs]
            # label_weights = total_pos / label_cnts.to(dtype=torch.float32)
            # label_weights /= start_gt_idx  # equal class weighting
            for x in range(start_gt_idx):
                if label_cnts[x] > 0:
                    pos_label_weights[label_idxs[x]] = total_pos / label_cnts[
                        x] / start_gt_idx  # equal class weighting
        #
        #     # # """NEW"""
        #     # MAX_GT_NUM = 6  # TODO: CONFIG
        #     # matched_gt_ious = torch.cat(matched_gt_ious)
        #     # pos_matched_gt_ious = matched_gt_ious[sampled_pos_inds]
        #     #
        #     # label_cnts = [min(MAX_GT_NUM, nz.numel()) for nz in label_idxs]
        #     # total_pos = sum(label_cnts)
        #     # for x in range(start_gt_idx):
        #     #     nz = label_idxs[x]
        #     #     nnn = nz.numel()
        #     #     if nnn <= MAX_GT_NUM:
        #     #         if nnn > 0:
        #     #             pos_label_weights[nz] = total_pos / nnn
        #     #         continue
        #     #     top_iou_ids = torch.sort(pos_matched_gt_ious[nz], descending=True)[1][:MAX_GT_NUM]
        #     #     inds = nz[top_iou_ids]
        #     #     pos_label_weights[inds] = total_pos / MAX_GT_NUM
        #     #
        #     # pos_label_weights = pos_label_weights / start_gt_idx
        #
        # pos_regression = box_regression[sampled_pos_inds]
        # pos_regression_targets = regression_targets[sampled_pos_inds]
        # # normalize_reg_targets(pos_regression_targets)
        # box_loss = smooth_l1_loss(
        #     pos_regression,#[:, :-1],
        #     pos_regression_targets,#[:, :-1],
        #     beta=1.0 / 9,
        # )
        # box_loss = (box_loss * pos_label_weights.unsqueeze(1)).sum() / total_pos
        #
        # # angle_loss = 0 #torch.abs(torch.sin(pos_regression[:, -1] - pos_regression_targets[:, -1])).mean()
        #
        # # balance negative and positive weights
        sampled_labels = labels[sampled_inds]
        objectness_weights = torch.ones_like(sampled_labels,
                                             dtype=torch.float32)
        objectness_weights[sampled_labels == 1] = pos_label_weights
        objectness_weights[sampled_labels != 1] = min(pos_label_weights.min(),
                                                      0.5)

        # criterion = torch.nn.BCELoss(reduce=False)
        # entropy_loss = criterion(objectness[sampled_inds].sigmoid(), sampled_labels)
        # objectness_loss = torch.mul(entropy_loss, objectness_weights).sum()

        objectness_loss = F.binary_cross_entropy_with_logits(
            objectness[sampled_inds],
            sampled_labels,
            weight=objectness_weights)
        # gamma = 2.0
        # alpha = 0.25
        # p = torch.sigmoid(objectness[sampled_inds])
        # t = sampled_labels
        # term1 = (1 - p) ** gamma * torch.log(p)
        # term2 = p ** gamma * torch.log(1 - p)
        # objectness_loss = -(t == 1).float() * term1 * alpha - ((t != 1) * (t >= 0)).float() * term2 * (1 - alpha)
        # objectness_loss = torch.mul(objectness_weights, objectness_loss).mean()

        box_reg = box_regression[sampled_pos_inds]
        box_reg_targets = regression_targets[sampled_pos_inds]
        box_loss = smooth_l1_loss(
            box_reg[:, :-1],
            box_reg_targets[:, :-1],
            beta=1.0 / 9,
            # size_average=False,
        ).sum() / (total_samples)
        angle_loss = smooth_angle_loss(
            box_reg[:, -1], box_reg_targets[:, -1]).sum() / (total_samples)
        box_loss = (box_loss + angle_loss)

        # with torch.no_grad():
        #     base_anchors = torch.cat([a.get_field("rrects") for a in anchors])[sampled_pos_inds]
        #     gt_box = self.box_coder.decode(box_reg_targets, base_anchors)
        # pred_box = self.box_coder.decode(box_reg, base_anchors)
        # ious = compute_iou_rotate_loss(pred_box, gt_box) + 1e-5
        # iou_loss = -torch.log(ious**2)
        # box_loss = iou_loss.sum() / total_samples

        # objectness_loss = F.binary_cross_entropy_with_logits(
        #     objectness[sampled_inds], labels[sampled_inds]
        # )

        return objectness_loss, box_loss  #, angle_loss
示例#12
0
    def __call__(self, batch):
        transposed_batch = list(zip(*batch))
        if self.mode == 0:
            images = to_image_list(transposed_batch[0], self.size_divisible)
            targets = transposed_batch[1]
            img_ids = transposed_batch[2]
            if self.special_deal:
                if self.post_branch == "retina":
                    grid_sizes = [(math.ceil(self.crop_size / r),
                                math.ceil(self.crop_size / r))
                                for r in (8, 16, 32, 64, 128)]
                    mini_batch_size = len(targets)
                    anchors = self.anchor_generator.get_anchors(mini_batch_size, self.crop_size, grid_sizes)
                    anchors = [cat_boxlist(anchors_per_image) for anchors_per_image in anchors]
                    labels, regression_targets = self.loss_evaluator.prepare_targets(anchors, targets)
                    # cat labels(list) and regression_targets(list) into one single tensor seperately
                    labels = torch.cat(labels, dim=0)
                    regression_targets = torch.cat(regression_targets, dim=0)
                    targets = { 'labels': labels, 'regression_targets': regression_targets }
                else:
                    strides = [8, 16, 32, 64, 128]
                    feature_sizes = [(math.ceil(self.crop_size / r),
                                    math.ceil(self.crop_size / r))
                                    for r in (8, 16, 32, 64, 128)]
                    points = []
                    for level, size in enumerate(feature_sizes):
                        h, w = size
                        points_per_level = self.generate_points_per_level(
                            h, w, strides[level],
                            torch.device("cpu")
                        )
                        points.append(points_per_level)
                    cls_targets, reg_targets = self.loss_evaluator.prepare_targets(points, targets)
                    cls_targets_flatten = []
                    reg_targets_flatten = []
                    for l in range(len(cls_targets)):
                        cls_targets_flatten.append(cls_targets[l].reshape(-1))
                        reg_targets_flatten.append(reg_targets[l].reshape(-1, 4))
                    cls_targets_flatten = torch.cat(cls_targets_flatten, dim=0)
                    reg_targets_flatten = torch.cat(reg_targets_flatten, dim=0)
                    targets = { 
                        'cls_targets_flatten': cls_targets_flatten,
                        'reg_targets_flatten': reg_targets_flatten
                    }
                    
        elif self.mode == 1:
            feature_list = transposed_batch[0]
            feature_list_zip = zip(*(feature_list))
            feature_list_flatten = []
            for feature_per_level in feature_list_zip:
                feature_per_level = [torch.unsqueeze(xaf, dim=0) for xaf in feature_per_level]
                feature_per_level_batch = torch.cat(feature_per_level, dim=0)
                feature_list_flatten.append(feature_per_level_batch)
            images = feature_list_flatten

            if self.special_deal:
                if self.post_branch == "retina":
                    labels = transposed_batch[1]
                    regression_targets = transposed_batch[2]
                    # cat labels(list) and regression_targets(list) into one single tensor seperately
                    labels = torch.cat(labels, dim=0)
                    regression_targets = torch.cat(regression_targets, dim=0)
                    targets = { 'labels': labels, 'regression_targets': regression_targets }
                else:
                    densebox_labels = list(transposed_batch[1])
                    densebox_regs = list(transposed_batch[2])
                    #TODO: Automatically change num_points_per_level according to crop_size
                    # num_points_per_level = [4096, 1024, 256, 64, 16]
                    num_points_per_level = [2304, 576, 144, 36, 9]
                    for xi in range(len(densebox_labels)):
                        densebox_labels[xi] = torch.split(
                            densebox_labels[xi],
                            num_points_per_level,
                            dim = 0
                        )
                        densebox_regs[xi] = torch.split(
                            densebox_regs[xi],
                            num_points_per_level,
                            dim = 0
                        )
                    densebox_labels_level_first = []
                    densebox_regs_level_first = []
                    for level in range(len(num_points_per_level)):
                        densebox_labels_level_first.append(
                            torch.cat([densebox_labels_per_im[level] for densebox_labels_per_im in densebox_labels]
                                    , dim = 0)
                        )
                        densebox_regs_level_first.append(
                            torch.cat([densebox_regs_per_im[level] for densebox_regs_per_im in densebox_regs]
                                    , dim = 0)
                        )
                    cls_targets_flatten = []
                    reg_targets_flatten = []
                    for xl in range(len(densebox_labels_level_first)):
                        cls_targets_flatten.append(densebox_labels_level_first[xl].reshape(-1))
                        reg_targets_flatten.append(densebox_regs_level_first[xl].reshape(-1, 4))
                    cls_targets_flatten = torch.cat(cls_targets_flatten, dim=0)
                    reg_targets_flatten = torch.cat(reg_targets_flatten, dim=0)
                    targets = {
                        "cls_targets_flatten": cls_targets_flatten,
                        "reg_targets_flatten": reg_targets_flatten 
                    }
                img_ids = transposed_batch[3]
            else:
                targets = transposed_batch[1]
                img_ids = transposed_batch[2]            
        else:
            raise ValueError("No mode {} for data batch collect_fn".format(self.mode))
        return images, targets, img_ids
示例#13
0
    def forward(self,
                anchors,
                objectness,
                box_regression,
                targets=None,
                centerness=None,
                rpn_center_box_regression=None,
                centerness_pack=None):
        """
        Arguments:
            anchors: list[list[BoxList]]
            objectness: list[tensor]
            box_regression: list[tensor]

        Returns:
            boxlists (list[BoxList]): the post-processed anchors, after
                applying box decoding and NMS
        """
        sampled_boxes = []
        num_levels = len(objectness)
        anchors = list(zip(*anchors))
        for a, o, b in zip(anchors, objectness, box_regression):
            sampled_boxes.append(self.forward_for_single_feature_map(a, o, b))

        boxlists = list(zip(*sampled_boxes))
        boxlists = [cat_boxlist(boxlist) for boxlist in boxlists]

        if num_levels > 1:
            boxlists = self.select_over_all_levels(boxlists)

        # append ground-truth bboxes to proposals
        if self.training and targets is not None:
            boxlists = self.add_gt_proposals(boxlists, targets)

        if self.pred_targets:
            pred_targets = []
            if True:
                for img_centerness, center_box_reg in zip(
                        centerness, rpn_center_box_regression):
                    # gt_centerness, gt_bbox, anchor_bbox = center_target
                    # print(rpn_center_box_regression, anchor_bbox)
                    # gt_mask = gt_centerness.detach().cpu().numpy() > 0.0
                    img_centerness = img_centerness[0, :, :]

                    center_box_reg = center_box_reg[:, :, :].permute(1, 2, 0)

                    anchor_bbox = np.zeros(shape=(center_box_reg.shape[0],
                                                  center_box_reg.shape[1], 4))
                    for xx in range(anchor_bbox.shape[1]):
                        for yy in range(anchor_bbox.shape[0]):
                            anchor_bbox[yy, xx, :] = [
                                max(0.0, xx * 4 - 16),
                                max(0.0, yy * 4 - 16),
                                min(xx * 4 + 16, boxlists[0].size[0]),
                                min(yy * 4 + 16, boxlists[0].size[1])
                            ]
                    anchor_bbox = torch.as_tensor(anchor_bbox,
                                                  device=center_box_reg.device)

                    # print(center_box_reg.shape, anchor_bbox.shape)
                    boxes = self.box_coder.decode(
                        center_box_reg.reshape(-1, 4), anchor_bbox.view(-1, 4))

                    pred_target = None
                    pred_score = torch.sigmoid(
                        img_centerness.detach()).cpu().numpy()
                    pred_mask = pred_score > 0.95
                    # print(gt_mask.shape, pred_mask.shape)
                    imllabel, numlabel = scipy.ndimage.label(pred_mask)
                    if numlabel > 0:
                        valid = np.zeros(shape=(numlabel, ), dtype=np.bool)
                        box_inds = []
                        for ano in range(1, numlabel + 1):
                            mask = imllabel == ano
                            valid[ano - 1] = True  #  gt_mask[mask].sum() == 0
                            box_inds.append(np.argmax(pred_score * mask))
                        if np.any(valid):
                            boxes = boxes[box_inds, :]
                            # print(box_inds, boxes, anchor_bbox.view(-1, 4)[box_inds, :], gt_bbox.view(-1, 4)[box_inds, :])
                            pred_target = BoxList(torch.as_tensor(boxes),
                                                  boxlists[0].size,
                                                  mode="xyxy")
                            pred_target.clip_to_image()
                            pred_target = pred_target.to(img_centerness.device)
                            # print(img_centerness.device, pred_target.bbox.device)
                    pred_targets.append(pred_target)
            else:
                for img_centerness in centerness:
                    pred_target = None
                    pred_mask = torch.sigmoid(
                        img_centerness[0, :, :].detach()).cpu().numpy() > 0.95
                    # print(gt_mask.shape, pred_mask.shape)
                    imllabel, numlabel = scipy.ndimage.label(pred_mask)
                    if numlabel > 0:
                        masks = np.zeros(shape=(pred_mask.shape[0],
                                                pred_mask.shape[1], numlabel),
                                         dtype=np.uint8)
                        valid = np.zeros(shape=(numlabel, ), dtype=np.bool)
                        for ano in range(1, numlabel + 1):
                            mask = imllabel == ano
                            valid[ano - 1] = True
                            masks[:, :, ano - 1] = mask
                        if np.any(valid):
                            masks = masks[:, :, valid]
                            boxes = extract_bboxes(masks)
                            pred_target = BoxList(torch.as_tensor(boxes),
                                                  boxlists[0].size,
                                                  mode="xyxy")
                            pred_target.clip_to_image()
                            pred_target = pred_target.to(img_centerness.device)
                            # print(img_centerness.device, pred_target.bbox.device)
                    pred_targets.append(pred_target)

            if True:
                if not self.training:
                    print('add', [
                        len(pred_target)
                        for pred_target in pred_targets if pred_target
                    ], 'proposals')
                boxlists = self.add_pred_proposals(boxlists, pred_targets)
        else:
            pred_targets = None

        return boxlists, pred_targets
示例#14
0
    def __call__(self, anchors, box_cls, box_regression, objectness_cls,
                 targets):
        """
        Arguments:
            anchors (list[BoxList])
            box_cls (list[Tensor])
            box_regression (list[Tensor])
            targets (list[BoxList])

        Returns:
            retinanet_cls_loss (Tensor)
            retinanet_regression_loss (Tensor)
        """
        device = box_cls[0].device
        anchors = [
            cat_boxlist(anchors_per_image) for anchors_per_image in anchors
        ]
        labels, regression_targets = self.prepare_targets(anchors, targets)
        if self.classify_objectness_image:
            sampled_pos_inds, sampled_neg_inds = self.fg_bg_sampler(labels)
            sampled_pos_inds = torch.nonzero(torch.cat(sampled_pos_inds,
                                                       dim=0)).squeeze(1)
            sampled_neg_inds = torch.nonzero(torch.cat(sampled_neg_inds,
                                                       dim=0)).squeeze(1)
            sampled_inds = torch.cat([sampled_pos_inds, sampled_neg_inds],
                                     dim=0)
        N = len(labels)
        box_cls, box_regression, objectness_cls = \
                concat_box_prediction_layers(box_cls, box_regression, objectness_cls)

        labels = torch.cat(labels, dim=0)
        regression_targets = torch.cat(regression_targets, dim=0)
        pos_inds = torch.nonzero(labels > 0).squeeze(1)

        if pos_inds.numel() > 0:
            retinanet_regression_loss = smooth_l1_loss(
                box_regression[pos_inds],
                regression_targets[pos_inds],
                beta=self.bbox_reg_beta,
                size_average=False,
            ) / (max(1,
                     pos_inds.numel() * self.regress_norm))
        else:
            retinanet_regression_loss = torch.tensor(0.0, device=device)
            self.logger.info(
                "This batch has none positive anchors for bbox regression")

        if self.use_ignored_bbox:
            labels = labels.int()
            retinanet_cls_loss = self.box_cls_loss_func(
                box_cls, labels) / (pos_inds.numel() + N)
        else:
            valid_inds1 = torch.nonzero(labels >= 0).squeeze(1)
            valid_inds2 = torch.nonzero(labels < -1).squeeze(1)
            valid_inds = torch.cat([valid_inds1, valid_inds2], dim=0)
            labels = labels.int()
            if valid_inds.numel() > 0:
                retinanet_cls_loss = self.box_cls_loss_func(
                    box_cls[valid_inds], labels[valid_inds]) * 1000 / (max(
                        1, valid_inds.numel()))
            else:
                retinanet_cls_loss = torch.tensor(0.0, device=device)
                self.logger.info(
                    "This batch has none valid anchors for bbox classification"
                )

        if self.classify_objectness_image:
            objectness_labels = labels >= 1
            objectness_labels = objectness_labels.view(-1, 1)
            objectness_labels = objectness_labels.float()
            retinanet_objectness_loss = F.binary_cross_entropy_with_logits(
                objectness_cls[sampled_inds],
                objectness_labels[sampled_inds],
                reduction='sum') / (sampled_inds.numel() *
                                    self.objectness_norm)
        else:
            retinanet_objectness_loss = torch.tensor(0.0, device=device)

        return retinanet_cls_loss, retinanet_regression_loss, retinanet_objectness_loss
示例#15
0
    def __call__(self, anchors, objectness, box_regression, targets):
        """
        Arguments:
            anchors (list[BoxList])
            objectness (list[Tensor])
            box_regression (list[Tensor])
            targets (list[BoxList])

        Returns:
            objectness_loss (Tensor)
            box_loss (Tensor
        """
        # HxWxSxA
        anchors = [
            cat_boxlist(anchors_per_image) for anchors_per_image in anchors
        ]
        objectness, box_regression = \
                concat_box_prediction_layers(objectness, box_regression)
        objectness = objectness.squeeze()

        # add by hui ###############################################
        # _box_regression = box_regression.reshape((len(targets), -1, box_regression.shape[-1]))
        # for box_regression_per_img, anchors_per_image, targets_per_img in zip(_box_regression, anchors, targets):
        #     assert len(anchors_per_image) == len(box_regression_per_img)
        #     pred_boxes = self.box_coder.decode(box_regression_per_img, anchors_per_image.bbox)
        #     pred_boxes = BoxList(pred_boxes, targets_per_img.size, mode='xyxy')
        #     ious = boxlist_iou(targets_per_img, pred_boxes)
        #     ious
        # #########################################################

        labels, regression_targets = self.prepare_targets(anchors, targets)

        # show_label(anchors[0].size, labels, regression_targets, objectness)

        # sample
        sampled_pos_inds, sampled_neg_inds = self.fg_bg_sampler(labels)
        sampled_pos_inds = torch.nonzero(torch.cat(sampled_pos_inds,
                                                   dim=0)).squeeze(1)
        sampled_neg_inds = torch.nonzero(torch.cat(sampled_neg_inds,
                                                   dim=0)).squeeze(1)
        sampled_inds = torch.cat([sampled_pos_inds, sampled_neg_inds], dim=0)

        labels = torch.cat(labels, dim=0)
        regression_targets = torch.cat(regression_targets, dim=0)

        box_loss = smooth_l1_loss(
            box_regression[sampled_pos_inds],
            regression_targets[sampled_pos_inds],
            beta=1.0 / 9,
            size_average=False,
        )
        # raise ValueError("(sampled_inds.numel()) devide twice, another time is in line 156")

        # ################################# add by hui ###################################################
        if self.ohem_loss is None:
            objectness_loss = F.binary_cross_entropy_with_logits(
                objectness[sampled_inds], labels[sampled_inds])
            box_loss = box_loss / (sampled_inds.numel())
        #             print('rpnx', sampled_inds.numel())
        else:
            objectness_loss = self.ohem_loss(objectness[sampled_inds],
                                             labels[sampled_inds])
            box_loss = box_loss / self.ohem_loss.sample_count
        #             print('rpn', self.ohem_loss.sample_count)
        # #################################################################################################

        return objectness_loss, box_loss
示例#16
0
    def filter_results_parallel(self, boxlist, num_classes):
        """Returns bounding-box detection results by thresholding on scores and
        applying non-maximum suppression (NMS).
        """
        # unwrap the boxlist to avoid additional overhead.
        # if we had multi-class NMS, we could perform this directly on the boxlist

        # cpu version is faster than gpu. revert it to gpu only by verifying

        boxlist = boxlist.to('cpu')

        boxes = boxlist.bbox.reshape(-1, num_classes * 4)
        scores = boxlist.get_field("scores").reshape(-1, num_classes)

        device = scores.device
        result = []
        # Apply threshold on detection probabilities and apply NMS
        # Skip j = 0, because it's the background class
        inds_all = scores > self.score_thresh
        all_cls_boxlist_for_class = []
        for j in range(self.cls_start_idx, num_classes):
            inds = inds_all[:, j].nonzero().squeeze(1)
            if len(inds) == 0:
                continue
            scores_j = scores[inds, j]
            boxes_j = boxes[inds, j * 4:(j + 1) * 4]
            boxlist_for_class = BoxList(boxes_j, boxlist.size, mode="xyxy")
            boxlist_for_class.add_field("scores", scores_j)
            all_cls_boxlist_for_class.append((j, boxlist_for_class))

        all_boxlist_for_class = [
            boxlist_for_class
            for _, boxlist_for_class in all_cls_boxlist_for_class
        ]
        from qd.qd_common import parallel_map

        all_boxlist_for_class = parallel_map(self.nms_func,
                                             all_boxlist_for_class)

        for i, boxlist_for_class in enumerate(all_boxlist_for_class):
            j = all_cls_boxlist_for_class[i][0]
            num_labels = len(boxlist_for_class)
            boxlist_for_class.add_field(
                "labels",
                torch.full((num_labels, ), j, dtype=torch.int64,
                           device=device))
            result.append(boxlist_for_class)

        if len(result) > 0:
            result = cat_boxlist(result)
        else:
            return self.prepare_empty_boxlist(boxlist)

        number_of_detections = len(result)

        # Limit to max_per_image detections **over all classes**
        if number_of_detections > self.detections_per_img > 0:
            cls_scores = result.get_field("scores")
            image_thresh, _ = torch.kthvalue(
                cls_scores.cpu(),
                number_of_detections - self.detections_per_img + 1)
            keep = cls_scores >= image_thresh.item()
            keep = torch.nonzero(keep).squeeze(1)
            result = result[keep]
        return result
示例#17
0
    def filter_results(self, boxlist, num_classes, return_idx=False):
        """Returns bounding-box detection results by thresholding on scores and
        applying non-maximum suppression (NMS).
        """
        # unwrap the boxlist to avoid additional overhead.
        # if we had multi-class NMS, we could perform this directly on the boxlist
        boxes = boxlist.bbox.reshape(-1, num_classes * 4)
        scores = boxlist.get_field("scores").reshape(-1, num_classes)

        device = scores.device
        result = []
        # Apply threshold on detection probabilities and apply NMS
        # Skip j = 0, because it's the background class
        inds_all = scores > self.score_thresh

        # save the kept indexes
        keep_inds = []
        for j in range(1, num_classes):
            inds = inds_all[:, j].nonzero().squeeze(1)
            scores_j = scores[inds, j]
            boxes_j = boxes[inds, j * 4:(j + 1) * 4]
            boxlist_for_class = BoxList(boxes_j, boxlist.size, mode="xyxy")
            boxlist_for_class.add_field("scores", scores_j)
            boxlist_for_class, keep_from_nms = boxlist_nms(boxlist_for_class,
                                                           self.nms,
                                                           return_idx=True)

            # find which boxed are saved after nms
            keep_from_nms = inds[keep_from_nms]
            if len(keep_inds) == 0:
                keep_inds = keep_from_nms
            else:
                keep_inds = torch.cat((keep_inds, keep_from_nms))

            num_labels = len(boxlist_for_class)
            boxlist_for_class.add_field(
                "labels",
                torch.full((num_labels, ), j, dtype=torch.int64,
                           device=device))
            result.append(boxlist_for_class)
            # print("class: {}; big_score_index: {}; keep_from_nms: {}; new_keep_from_nms:{}".format(j, inds, keep_from_nms, new_keep_from_nms))

        # print("keep index after nms: ", keep_inds)
        result = cat_boxlist(result)

        # NOTE: Nov 20, add a cross-class nms to further get rid of bad detections.
        result, keep_inds = boxlist_nms(result, 0.8, return_idx=True)

        number_of_detections = len(result)

        # Limit to max_per_image detections **over all classes**
        if number_of_detections > self.detections_per_img > 0:
            cls_scores = result.get_field("scores")
            image_thresh, _ = torch.kthvalue(
                cls_scores.cpu(),
                number_of_detections - self.detections_per_img + 1)
            keep = cls_scores >= image_thresh.item()
            keep = torch.nonzero(keep).squeeze(1)
            result = result[keep]
            keep_inds = keep_inds[keep]

        if result.__len__() != len(keep_inds):
            print(result)
            print(keep_inds)
            raise ValueError(
                "The kept index number is different from the save boxlist length"
            )
        if return_idx:
            return result, keep_inds
        else:
            return result, None
示例#18
0
    def __call__(self, anchors, box_cls, box_regression, targets):
        """
        Arguments:
            anchors (list[BoxList])
            objectness (list[Tensor])
            box_regression (list[Tensor])
            targets (list[BoxList])

        Returns:
            objectness_loss (Tensor)
            box_loss (Tensor
        """
        anchors = [
            cat_boxlist(anchors_per_image) for anchors_per_image in anchors
        ]
        box_cls_flattened = []
        box_regression_flattened = []
        # for each feature level, permute the outputs to make them be in the
        # same format as the labels. Note that the labels are computed for
        # all feature levels concatenated, so we keep the same representation
        # for the objectness and the box_regression
        for box_cls_per_level, box_regression_per_level in zip(
                box_cls, box_regression):
            N, A, H, W = box_cls_per_level.shape
            C = self.num_classes
            box_cls_per_level = box_cls_per_level.view(N, -1, C, H, W)
            box_cls_per_level = box_cls_per_level.permute(0, 3, 4, 1, 2)
            box_cls_per_level = box_cls_per_level.reshape(N, -1, C)
            box_regression_per_level = box_regression_per_level.view(
                N, -1, 4, H, W)
            box_regression_per_level = box_regression_per_level.permute(
                0, 3, 4, 1, 2)
            box_regression_per_level = box_regression_per_level.reshape(
                N, -1, 4)
            box_cls_flattened.append(box_cls_per_level)
            box_regression_flattened.append(box_regression_per_level)
        # concatenate on the first dimension (representing the feature levels), to
        # take into account the way the labels were generated (with all feature maps
        # being concatenated as well)
        box_cls = cat(box_cls_flattened, dim=1)
        box_regression = cat(box_regression_flattened, dim=1)

        cls_prob = torch.sigmoid(box_cls)
        box_prob = []
        positive_numels = 0
        positive_losses = []
        for img, (anchors_, targets_, cls_prob_, box_regression_) in enumerate(
                zip(anchors, targets, cls_prob, box_regression)):
            labels_ = targets_.get_field("labels") - 1

            with torch.set_grad_enabled(False):
                # box_localization: a_{j}^{loc}, shape: [j, 4]
                box_localization = self.box_coder.decode(
                    box_regression_, anchors_.bbox)

                # object_box_iou: IoU_{ij}^{loc}, shape: [i, j]
                object_box_iou = boxlist_iou(
                    targets_,
                    BoxList(box_localization, anchors_.size, mode='xyxy'))

                t1 = self.bbox_threshold
                t2 = object_box_iou.max(
                    dim=1, keepdim=True).values.clamp(min=t1 + 1e-12)

                # object_box_prob: P{a_{j} -> b_{i}}, shape: [i, j]
                object_box_prob = ((object_box_iou - t1) / (t2 - t1)).clamp(
                    min=0, max=1)

                indices = torch.stack(
                    [torch.arange(len(labels_)).type_as(labels_), labels_],
                    dim=0)

                # object_cls_box_prob: P{a_{j} -> b_{i}}, shape: [i, c, j]
                object_cls_box_prob = torch.sparse_coo_tensor(
                    indices, object_box_prob)

                # image_box_prob: P{a_{j} \in A_{+}}, shape: [j, c]
                """
                from "start" to "end" implement:
                
                image_box_prob = torch.sparse.max(object_cls_box_prob, dim=0).t()
                
                """
                # start
                indices = torch.nonzero(
                    torch.sparse.sum(object_cls_box_prob,
                                     dim=0).to_dense()).t_()

                if indices.numel() == 0:
                    image_box_prob = torch.zeros(
                        anchors_.bbox.size(0),
                        self.num_classes).type_as(object_box_prob)
                else:
                    nonzero_box_prob = torch.where(
                        (labels_.unsqueeze(dim=-1) == indices[0]),
                        object_box_prob[:, indices[1]],
                        torch.tensor(
                            [0]).type_as(object_box_prob)).max(dim=0).values

                    image_box_prob = torch.sparse_coo_tensor(
                        indices.flip([0]),
                        nonzero_box_prob,
                        size=(anchors_.bbox.size(0),
                              self.num_classes)).to_dense()
                # end

                box_prob.append(image_box_prob)

            # construct bags for objects
            match_quality_matrix = boxlist_iou(targets_, anchors_)
            _, matched = torch.topk(match_quality_matrix,
                                    self.pre_anchor_topk,
                                    dim=1,
                                    sorted=False)
            del match_quality_matrix

            # matched_cls_prob: P_{ij}^{cls}
            matched_cls_prob = torch.gather(
                cls_prob_[matched], 2,
                labels_.view(-1, 1, 1).repeat(1, self.pre_anchor_topk,
                                              1)).squeeze(2)

            # matched_box_prob: P_{ij}^{loc}
            matched_object_targets = self.box_coder.encode(
                targets_.bbox.unsqueeze(dim=1), anchors_.bbox[matched])
            retinanet_regression_loss = smooth_l1_loss(
                box_regression_[matched], matched_object_targets,
                *self.smooth_l1_loss_param)
            matched_box_prob = torch.exp(-retinanet_regression_loss)

            # positive_losses: { -log( Mean-max(P_{ij}^{cls} * P_{ij}^{loc}) ) }
            positive_numels += len(targets_)
            positive_losses.append(
                self.positive_bag_loss_func(matched_cls_prob *
                                            matched_box_prob,
                                            dim=1))

        # positive_loss: \sum_{i}{ -log( Mean-max(P_{ij}^{cls} * P_{ij}^{loc}) ) } / ||B||
        positive_loss = torch.cat(positive_losses).sum() / max(
            1, positive_numels)

        # box_prob: P{a_{j} \in A_{+}}
        box_prob = torch.stack(box_prob, dim=0)

        # negative_loss: \sum_{j}{ FL( (1 - P{a_{j} \in A_{+}}) * (1 - P_{j}^{bg}) ) } / n||B||
        negative_loss = self.negative_bag_loss_func(
            cls_prob * (1 - box_prob), self.focal_loss_gamma) / max(
                1, positive_numels * self.pre_anchor_topk)

        losses = {
            "loss_retina_positive": positive_loss * self.focal_loss_alpha,
            "loss_retina_negative":
            negative_loss * (1 - self.focal_loss_alpha),
        }
        return losses
示例#19
0
    def filter_results(self, boxlist, num_classes):
        """Returns bounding-box detection results by thresholding on scores and
        applying non-maximum suppression (NMS).
        """
        # unwrap the boxlist to avoid additional overhead.
        # if we had multi-class NMS, we could perform this directly on the boxlist
        boxes = boxlist.bbox.reshape(-1, num_classes * 4)
        boxes_per_cls = boxlist.bbox.reshape(-1, num_classes, 4)
        scores = boxlist.get_field("pred_scores").reshape(-1, num_classes)

        device = scores.device
        result = []
        orig_inds = []
        # Apply threshold on detection probabilities and apply NMS
        # Skip j = 0, because it's the background class
        inds_all = scores > self.score_thresh
        for j in range(1, num_classes):
            inds = inds_all[:, j].nonzero().squeeze(1)
            scores_j = scores[inds, j]
            boxes_j = boxes[inds, j * 4:(j + 1) * 4]
            boxlist_for_class = BoxList(boxes_j, boxlist.size, mode="xyxy")
            boxlist_for_class.add_field("pred_scores", scores_j)
            boxlist_for_class, keep = boxlist_nms(
                boxlist_for_class,
                self.nms,
                max_proposals=self.post_nms_per_cls_topn,
                score_field='pred_scores')
            inds = inds[keep]
            num_labels = len(boxlist_for_class)
            boxlist_for_class.add_field(
                "pred_labels",
                torch.full((num_labels, ), j, dtype=torch.int64,
                           device=device))
            result.append(boxlist_for_class)
            orig_inds.append(inds)

        #NOTE: kaihua, according to Neural-MOTIFS (and my experiments, we need remove duplicate bbox)
        if self.nms_filter_duplicates or self.save_proposals:
            assert len(orig_inds) == (num_classes - 1)
            # set all bg to zero
            inds_all[:, 0] = 0
            for j in range(1, num_classes):
                inds_all[:, j] = 0
                orig_idx = orig_inds[j - 1]
                inds_all[orig_idx, j] = 1
            dist_scores = scores * inds_all.float()
            scores_pre, labels_pre = dist_scores.max(1)
            final_inds = scores_pre.nonzero()
            assert final_inds.dim() != 0
            final_inds = final_inds.squeeze(1)

            scores_pre = scores_pre[final_inds]
            labels_pre = labels_pre[final_inds]

            result = BoxList(boxes_per_cls[final_inds, labels_pre],
                             boxlist.size,
                             mode="xyxy")
            result.add_field("pred_scores", scores_pre)
            result.add_field("pred_labels", labels_pre)
            orig_inds = final_inds
        else:
            result = cat_boxlist(result)
            orig_inds = torch.cat(orig_inds, dim=0)

        number_of_detections = len(result)
        # Limit to max_per_image detections **over all classes**
        if number_of_detections > self.detections_per_img > 0:
            cls_scores = result.get_field("pred_scores")
            image_thresh, _ = torch.kthvalue(
                cls_scores.cpu(),
                number_of_detections - self.detections_per_img + 1)
            keep = cls_scores >= image_thresh.item()
            keep = torch.nonzero(keep).squeeze(1)
            result = result[keep]
            orig_inds = orig_inds[keep]
        return result, orig_inds, boxes_per_cls[orig_inds]
示例#20
0
    def __call__(self, anchors, objectness, box_regression, targets):
        """
        Arguments:
            anchors (list[BoxList])
            objectness (list[Tensor])
            box_regression (list[Tensor])
            targets (list[BoxList])

        Returns:
            objectness_loss (Tensor)
            box_loss (Tensor
        """
        anchors = [
            cat_boxlist(anchors_per_image) for anchors_per_image in anchors
        ]
        labels, regression_targets = self.prepare_targets(anchors, targets)
        sampled_pos_inds, sampled_neg_inds = self.fg_bg_sampler(labels)
        sampled_pos_inds = torch.nonzero(torch.cat(sampled_pos_inds,
                                                   dim=0)).squeeze(1)
        sampled_neg_inds = torch.nonzero(torch.cat(sampled_neg_inds,
                                                   dim=0)).squeeze(1)

        sampled_inds = torch.cat([sampled_pos_inds, sampled_neg_inds], dim=0)

        objectness_flattened = []
        box_regression_flattened = []
        # for each feature level, permute the outputs to make them be in the
        # same format as the labels. Note that the labels are computed for
        # all feature levels concatenated, so we keep the same representation
        # for the objectness and the box_regression
        for objectness_per_level, box_regression_per_level in zip(
                objectness, box_regression):
            N, A, H, W = objectness_per_level.shape
            objectness_per_level = objectness_per_level.permute(0, 2, 3,
                                                                1).reshape(
                                                                    N, -1)
            box_regression_per_level = box_regression_per_level.view(
                N, -1, 4, H, W)
            box_regression_per_level = box_regression_per_level.permute(
                0, 3, 4, 1, 2)
            box_regression_per_level = box_regression_per_level.reshape(
                N, -1, 4)
            objectness_flattened.append(objectness_per_level)
            box_regression_flattened.append(box_regression_per_level)
        # concatenate on the first dimension (representing the feature levels), to
        # take into account the way the labels were generated (with all feature maps
        # being concatenated as well)
        objectness = cat(objectness_flattened, dim=1).reshape(-1)
        box_regression = cat(box_regression_flattened, dim=1).reshape(-1, 4)

        # cat anchors dim to match regression dim
        anchor_flattened = []
        for anchor_per in anchors:
            anchor_flattened.append(anchor_per.bbox)
        anchors_bbox = torch.cat(anchor_flattened, dim=0)

        box_regression_dx = box_regression[:, 0]
        box_regression_dy = box_regression[:, 1]
        box_regression_dw = box_regression[:, 2]
        box_regression_dh = box_regression[:, 3]

        anchors_bbox_cx = (anchors_bbox[:, 0] + anchors_bbox[:, 2]) / 2.0
        anchors_bbox_cy = (anchors_bbox[:, 1] + anchors_bbox[:, 3]) / 2.0
        anchors_bbox_w = anchors_bbox[:, 2] - anchors_bbox[:, 0] + 1
        anchors_bbox_h = anchors_bbox[:, 3] - anchors_bbox[:, 1] + 1
        predict_w = torch.exp(box_regression_dw) * anchors_bbox_w
        predict_h = torch.exp(box_regression_dh) * anchors_bbox_h
        predict_x = box_regression_dx * anchors_bbox_w + anchors_bbox_cx
        predict_y = box_regression_dy * anchors_bbox_h + anchors_bbox_cy

        predict_x1 = predict_x - 0.5 * predict_w
        predict_y1 = predict_y - 0.5 * predict_h
        predict_x2 = predict_x + 0.5 * predict_w
        predict_y2 = predict_y + 0.5 * predict_h

        predict_boxes = torch.stack(
            (predict_x1, predict_y1, predict_x2, predict_y2)).t()
        predict_iou = onehot_iou(anchors_bbox, predict_boxes)

        labels = torch.cat(labels, dim=0) * predict_iou
        regression_targets = torch.cat(regression_targets, dim=0)

        box_loss = smooth_l1_loss(
            box_regression[sampled_pos_inds],
            regression_targets[sampled_pos_inds],
            beta=1.0 / 9,
            size_average=False,
        ) / (sampled_inds.numel())

        objectness_loss = F.binary_cross_entropy_with_logits(
            objectness[sampled_inds], labels[sampled_inds])
        return objectness_loss, box_loss
示例#21
0
    def forward(self, images, targets=None, adapt=False):
        """
        Arguments:
            images (list[Tensor] or ImageList): images to be processed
            targets (list[BoxList]): ground-truth boxes present in the image (optional)

        Returns:
            result (list[BoxList] or dict[Tensor]): the output from the model.
                During training, it returns a dict[Tensor] which contains the losses.
                During testing, it returns list[BoxList] contains additional fields
                like `scores`, `labels` and `mask` (for Mask R-CNN models).

        """
        if self.training and targets is None:
            raise ValueError("In training mode, targets should be passed")
        images = to_image_list(images)
        features = self.backbone(images.tensors)

        # Retina RPN Output
        rpn_features = features
        if self.cfg.RETINANET.BACKBONE == "p2p7":
            rpn_features = features[1:]

        if adapt:
            return rpn_features

        (anchors,
         detections), detector_losses = self.rpn(images, rpn_features, targets)
        if self.training:
            losses = {}
            losses.update(detector_losses)
            if self.mask:
                if self.cfg.MODEL.MASK_ON:
                    # Padding the GT
                    proposals = []
                    for (image_detections,
                         image_targets) in zip(detections, targets):
                        merge_list = []
                        if not isinstance(image_detections, list):
                            merge_list.append(
                                image_detections.copy_with_fields('labels'))

                        if not isinstance(image_targets, list):
                            merge_list.append(
                                image_targets.copy_with_fields('labels'))

                        if len(merge_list) == 1:
                            proposals.append(merge_list[0])
                        else:
                            proposals.append(cat_boxlist(merge_list))
                    x, result, mask_losses = self.mask(features, proposals,
                                                       targets)
                elif self.cfg.MODEL.SPARSE_MASK_ON:
                    x, result, mask_losses = self.mask(features, anchors,
                                                       targets)

                losses.update(mask_losses)
            return losses
        else:
            if self.mask:
                proposals = []
                for image_detections in detections:
                    num_of_detections = image_detections.bbox.shape[0]
                    if num_of_detections > self.cfg.RETINANET.NUM_MASKS_TEST > 0:
                        cls_scores = image_detections.get_field("scores")
                        image_thresh, _ = torch.kthvalue(
                            cls_scores.cpu(), num_of_detections - \
                            self.cfg.RETINANET.NUM_MASKS_TEST + 1
                        )
                        keep = cls_scores >= image_thresh.item()
                        keep = torch.nonzero(keep).squeeze(1)
                        image_detections = image_detections[keep]

                    proposals.append(image_detections)

                if self.cfg.MODEL.SPARSE_MASK_ON:
                    x, detections, mask_losses = self.mask(
                        features, proposals, targets)
                else:
                    x, detections, mask_losses = self.mask(
                        features, proposals, targets)
            return detections
示例#22
0
    def select_over_all_levels(self, boxlists):
        # pdb.set_trace()
        num_images = len(boxlists)
        results = []
        for i in range(num_images):
            scores = boxlists[i].get_field("scores")
            labels = boxlists[i].get_field("labels")
            boxes = boxlists[i].bbox
            boxlist = boxlists[i]
            result = []
            # pdb.set_trace()
            # (Pdb) self.num_classes
            # 81
            # (Pdb) labels.dtype
            # torch.int64
            # (Pdb) scores.dtype
            # torch.float32
            # (Pdb) boxes.dtype
            # torch.float32

            # skip the background
            for j in range(1, self.num_classes):
                inds = (labels == j).nonzero().view(-1)
                scores_j = scores[inds]
                boxes_j = boxes[inds, :].view(-1, 4)
                boxlist_for_class = BoxList(boxes_j, boxlist.size, mode="xyxy")
                boxlist_for_class.add_field("scores", scores_j)

                ############################## softNMS ##############################
                if self.nms_method == "nms":
                    # pdb.set_trace()
                    # (Pdb) boxlist_for_class.bbox.shape
                    # torch.Size([291, 4])
                    # (Pdb) boxlist_for_class.bbox[0]
                    # tensor([1422.0798,  192.1235, 1482.6444,  257.5991], device='cuda:0')
                    # (Pdb) boxlist_for_class.bbox[0].dtype
                    # torch.float32
                    # (Pdb) boxlist_for_class.get_field('scores').shape
                    # torch.Size([291])
                    # (Pdb) boxlist_for_class.get_field('scores')[0]
                    # tensor(0.0988, device='cuda:0')
                    # (Pdb) boxlist_for_class.get_field('scores')[0].dtype
                    # torch.float32
                    # (Pdb) self.nms_thresh
                    # 0.6
                    boxlist_for_class = boxlist_nms(boxlist_for_class,
                                                    self.nms_thresh,
                                                    score_field="scores")
                elif self.nms_method == "soft_nms":
                    boxlist_for_class = boxlist_soft_nms(boxlist_for_class,
                                                         self.nms_thresh,
                                                         score_field="scores")
                else:
                    print('the nms method is wrong')
                ############################## softNMS ##############################

                num_labels = len(boxlist_for_class)

                # pdb.set_trace()
                boxlist_for_class.add_field(
                    "labels",
                    torch.full((num_labels, ),
                               j,
                               dtype=torch.int64,
                               device=scores.device))
                result.append(boxlist_for_class)

            # pdb.set_trace()
            # (Pdb) len(result)
            # 80
            # (Pdb) result[0]
            # BoxList(num_boxes=185, image_width=1777, image_height=1000, mode=xyxy)

            result = cat_boxlist(result)

            # pdb.set_trace()
            # (Pdb) result
            # BoxList(num_boxes=529, image_width=1777, image_height=1000, mode=xyxy)

            number_of_detections = len(result)

            # pdb.set_trace()

            # Limit to max_per_image detections **over all classes**
            if number_of_detections > self.fpn_post_nms_top_n > 0:
                cls_scores = result.get_field("scores")
                image_thresh, _ = torch.kthvalue(
                    cls_scores.cpu(),
                    number_of_detections - self.fpn_post_nms_top_n + 1)
                keep = cls_scores >= image_thresh.item()
                keep = torch.nonzero(keep).squeeze(1)
                result = result[keep]
            results.append(result)
        return results
示例#23
0
    def __call__(self, anchors, objectness, box_regression, box_orien,
                 targets):
        """
        Arguments:
            anchors (list[BoxList])
            objectness (list[Tensor])
            box_regression (list[Tensor])
            targets (list[BoxList])

        Returns:
            objectness_loss (Tensor)
            box_loss (Tensor
        """
        # print(targets,'===================================')
        # # TODO  square anchor box expand tragets in xyxy
        # for i, boxlist in enumerate(targets):
        #     boxes = boxlist.bbox
        #     for j, box in enumerate(boxes):
        #         # print(box, '=====')
        #         top_left, bottom_right = box[:2], box[2:]
        #         l = abs(top_left[1] - bottom_right[1])
        #         w = abs(top_left[0] - bottom_right[0])
        #         xc = (top_left[0] + bottom_right[0]) / 2
        #         yc = (top_left[1] + bottom_right[1]) / 2
        #         if l > w:
        #             f = 1.2 * l
        #         else:
        #             f = 1.2 * w
        #         # print(f, xc, yc, '=============')
        #         box = bBox_2D(f, f, xc, yc, 0)
        #         box.xcyc2topleft()
        #         box.xcyc2bottomright()
        #
        #         boxlist.bbox[j] = torch.Tensor([box.xtl, box.ytl, box.xbr, box.ybr])
        # print(box.xtl, box.ytl, box.xbr, box.ybr,'=================')
        # square_targets = []
        # for j, target in enumerate(targets):
        #     wh1 = target.bbox[:, 2:] - target.bbox[:, :2]  # wh of target box1 by their br - tl
        #     maxedge1 = torch.max(wh1[:, 0], wh1[:, 1])
        #     maxedge11 = torch.cat((maxedge1[:, None], maxedge1[:, None]), -1)
        #     xcyc1 = (target.bbox[:, 2:] + target.bbox[:, :2]) * 0.5
        #
        #     box3 = torch.cat((xcyc1 - maxedge11 * 0.5, xcyc1 + maxedge11 * 0.5), -1)
        #     # square box3 correspond to targets
        #     targets[j].bbox = box3

        anchors = [
            cat_boxlist(anchors_per_image) for anchors_per_image in anchors
        ]
        labels, regression_targets, orien_targets = self.prepare_targets(
            anchors, targets)
        sampled_pos_inds, sampled_neg_inds = self.fg_bg_sampler(labels)
        sampled_pos_inds = torch.nonzero(torch.cat(sampled_pos_inds,
                                                   dim=0)).squeeze(1)
        sampled_neg_inds = torch.nonzero(torch.cat(sampled_neg_inds,
                                                   dim=0)).squeeze(1)

        sampled_inds = torch.cat([sampled_pos_inds, sampled_neg_inds], dim=0)

        objectness_flattened = []
        box_regression_flattened = []
        box_orien_flattened = []
        # for each feature level, permute the outputs to make them be in the
        # same format as the labels. Note that the labels are computed for
        # all feature levels concatenated, so we keep the same representation
        # for the objectness and the box_regression
        for objectness_per_level, box_regression_per_level, box_orien_per_level in zip(
                objectness, box_regression, box_orien):
            N, A, H, W = objectness_per_level.shape
            # print(box_orien_per_level.shape)
            objectness_per_level = objectness_per_level.permute(0, 2, 3,
                                                                1).reshape(
                                                                    N, -1)

            box_regression_per_level = box_regression_per_level.view(
                N, -1, 4, H, W)
            box_regression_per_level = box_regression_per_level.permute(
                0, 3, 4, 1, 2)
            box_regression_per_level = box_regression_per_level.reshape(
                N, -1, 4)
            box_orien_per_level = box_orien_per_level.view(N, -1, 2, H, W)
            box_orien_per_level = box_orien_per_level.permute(0, 3, 4, 1, 2)
            box_orien_per_level = box_orien_per_level.reshape(N, -1, 2)
            # print(box_regression_per_level.shape)
            # print(box_orien_per_level.shape,'========================')
            objectness_flattened.append(objectness_per_level)
            box_regression_flattened.append(box_regression_per_level)
            box_orien_flattened.append(box_orien_per_level)
        # concatenate on the first dimension (representing the feature levels), to
        # take into account the way the labels were generated (with all feature maps
        # being concatenated as well)
        objectness = cat(objectness_flattened, dim=1).reshape(-1)
        box_regression = cat(box_regression_flattened, dim=1).reshape(-1, 4)
        orien_regression = cat(box_orien_flattened, dim=1).reshape(-1, 2)

        labels = torch.cat(labels, dim=0)
        regression_targets = torch.cat(regression_targets, dim=0)
        orien_targets = torch.cat(orien_targets, dim=0)

        # to_rotated_boxes(regression_targets[sampled_pos_inds],
        #                  orien_targets[sampled_pos_inds].type(torch.cuda.FloatTensor))

        # print('\noriens:',oriens.size(),'boxes:',boxes.size(),'==========\n')

        box_loss = smooth_l1_loss(
            box_regression[sampled_pos_inds],
            regression_targets[sampled_pos_inds],
            beta=1.0 / 9,
            size_average=False,
        ) / sampled_inds.numel()

        objectness_loss = F.binary_cross_entropy_with_logits(
            objectness[sampled_inds], labels[sampled_inds])
        # print(orien_targets[sampled_pos_inds], '=========orien===========')
        # print(orien_regression[sampled_pos_inds], '=========regression===========\n')

        orien_loss = F.mse_loss(
            orien_regression[sampled_pos_inds],
            # orien_targets[sampled_pos_inds].type(torch.cuda.FloatTensor),
            orien_targets[sampled_pos_inds],
            reduction='sum',
            # size_average=False,
            # beta=1,
        ) / sampled_inds.numel()

        # orien_loss = smooth_l1_loss(
        #     orien_regression[sampled_pos_inds],
        #     orien_targets[sampled_pos_inds].type(torch.cuda.FloatTensor),
        #     size_average=False,
        #     beta=1.0 / 9,
        # ) / sampled_inds.numel()    #  NO Orientation Loss During RPN Stage

        # print(orien_loss)

        return objectness_loss, box_loss, orien_loss
示例#24
0
    def forward(self, x):
        appearance_feature, proposals, cls_score, box_reg, targets = x
        self.device = appearance_feature.device
        appearance_feature = appearance_feature
        cls_score = cls_score
        box_reg = box_reg

        with torch.no_grad():
            sorted_boxlists = self.prepare_ranking(cls_score,
                                                   box_reg,
                                                   proposals,
                                                   targets,
                                                   reg_iou=self.reg_iou)
        # concate value from different images
        boxes_per_image = [len(f) for f in proposals]
        idxs = [f.get_field('sorted_idx') for f in sorted_boxlists]
        scores = torch.cat([f.get_field('scores') for f in sorted_boxlists])
        bboxes = torch.cat(
            [f.bbox.reshape(-1, self.fg_class, 4) for f in sorted_boxlists])
        objectness = torch.cat([
            f.get_field('objectness').reshape(-1, self.fg_class)
            for f in sorted_boxlists
        ])
        all_scores = torch.cat(
            [f.get_field('all_scores') for f in sorted_boxlists])

        # add iou information
        image_sizes = [f.size for f in sorted_boxlists]
        sorted_boxes_per_image = [[*f.shape][0] for f in idxs]
        appearance_feature = self.roi_feat_embedding_fc(appearance_feature)
        appearance_feature = appearance_feature.split(boxes_per_image, dim=0)
        sorted_features = []
        nms_rank_embedding = []
        for id, feature, box_per_image in zip(idxs, appearance_feature,
                                              boxes_per_image):
            feature = feature[id]
            size = feature.size()
            if size[0] <= self.first_n:
                first_n = size[0]
            else:
                first_n = self.first_n
            sorted_features.append(feature)
            #[rank_dim * batch , feat_dim]
            nms_rank_embedding.append(
                extract_rank_embedding(
                    first_n,
                    self.cfg.MODEL.RELATION_NMS.ROI_FEAT_DIM,
                    device=feature.device))
        #  [first_n * batchsize, num_fg_classes, 128]
        sorted_features = torch.cat(sorted_features, dim=0)
        nms_rank_embedding = torch.cat(nms_rank_embedding, dim=0)
        nms_rank_embedding = self.nms_rank_fc(nms_rank_embedding)
        sorted_features = sorted_features + nms_rank_embedding[:, None, :]

        boxes_cls_1 = BoxList(bboxes[:, 0, :], image_sizes[0])
        boxes_cls_2 = BoxList(bboxes[:, 1, :], image_sizes[0])
        iou_1 = boxlist_iou(boxes_cls_1, boxes_cls_1)
        iou_2 = boxlist_iou(boxes_cls_2, boxes_cls_2)
        if self.cfg.MODEL.RELATION_NMS.USE_IOU:
            iou = [iou_1, iou_2]
        else:
            iou = None
        nms_position_matrix = extract_multi_position_matrix(
            bboxes,
            None,
            self.geo_feature_dim,
            1000,
            clswise=self.cfg.MODEL.RELATION_NMS.CLS_WISE_RELATION,
        )
        nms_attention_1 = self.relation_module(sorted_features,
                                               nms_position_matrix, iou)
        sorted_features = sorted_features + nms_attention_1
        sorted_features = self.relu1(sorted_features)
        # [first_n * num_fg_classes, 128]
        sorted_features = sorted_features.view(
            -1, self.cfg.MODEL.RELATION_NMS.APPEARANCE_FEAT_DIM)
        sorted_features = self.classifier(sorted_features)
        # logit_reshape, [first_n, num_fg_classes, num_thread]
        sorted_features = sorted_features.view(-1, self.fg_class,
                                               len(self.target_thresh))
        if not self.reg_iou:
            sorted_features = torch.sigmoid(sorted_features)
        scores = torch.cat([scores[:, :, None]] * len(self.target_thresh),
                           dim=-1)
        loss_dict = {}
        if self.training:
            if self.reg_iou:
                # when use regression donot do sorted_features = scores * sorted_features
                reg_label = torch.cat(
                    [f.get_field('labels_iou_reg') for f in sorted_boxlists])
                reg_label = reg_label.to(scores.device)
                reg_label = reg_label.type(torch.cuda.FloatTensor)
                sorted_features = sorted_features.to(scores.device)
                sorted_features = sorted_features.type(torch.cuda.FloatTensor)
                if reg_label.shape is not None:
                    reg_iou_loss = F.mse_loss(reg_label, sorted_features)
                else:
                    reg_iou_loss = torch.tensor(0.).to(scores.device)
                loss_dict['nms_loss'] = reg_iou_loss
            else:
                sorted_features = scores * sorted_features
                labels = torch.cat(
                    [f.get_field('labels') for f in sorted_boxlists])

                labels = labels.to(scores.device)
                labels = labels.type(torch.cuda.FloatTensor)

                # WEIGHTED NMS
                nms_loss = F.binary_cross_entropy(scores * sorted_features,
                                                  labels)
                loss_dict['nms_loss'] = nms_loss
            return None, loss_dict
        else:
            input_scores = scores
            if self.reg_iou:
                scores = sorted_features * (scores > self.fg_thread).float()
            else:
                scores = sorted_features * scores
            scores = self.merge_multi_thread_score_test(scores)
            scores = scores.split(sorted_boxes_per_image, dim=0)
            bboxes = bboxes.split(sorted_boxes_per_image, dim=0)
            input_scores = input_scores.split(sorted_boxes_per_image, dim=0)
            objectness = objectness.split(sorted_boxes_per_image, dim=0)
            all_scores = all_scores.split(sorted_boxes_per_image, dim=0)
            result = []
            for i_score, score, bbox, obj, image_size, prob_boxhead \
                    in zip(
                                                    input_scores,
                                                        scores,
                                                        bboxes,
                                                    objectness,
                                                    image_sizes, all_scores):
                result_per_image = []
                # for nuclei
                index = (score[:, 1] >= self.fg_thread).nonzero()[:, 0]
                # cls_scores = i_score[index, i,0]
                cls_scores = score[index, 1]
                cls_scores_all = prob_boxhead[index, 1]
                cls_boxes = bbox[index, 1, :]
                cls_obj = obj[index, 1]

                boxlist_for_class = BoxList(cls_boxes, image_size, mode='xyxy')

                boxlist_for_class.add_field('scores', cls_scores)
                boxlist_for_class.add_field('objectness', cls_obj)
                boxlist_for_class.add_field('all_scores', cls_scores_all)
                boxlist_for_class = boxlist_nms(boxlist_for_class,
                                                0.5,
                                                score_field="scores")
                num_labels = len(boxlist_for_class)
                boxlist_for_class.add_field(
                    "labels",
                    torch.full((num_labels, ), 2,
                               dtype=torch.int64).to(self.device))
                result_per_image.append(boxlist_for_class)
                index = (score[:, 0] >= self.fg_thread).nonzero()[:, 0]
                # cls_scores = i_score[index, i,0]
                cls_scores = score[index, 0]
                # pdb.set_trace()

                cls_scores_all = prob_boxhead[index, 0]
                cls_boxes = bbox[index, 0, :]
                cls_obj = obj[index, 0]

                boxlist_for_class = BoxList(cls_boxes, image_size, mode='xyxy')
                # Pos greedy NMS if POS_NMS!=-1
                # boxlist_for_class.add_field('idx', index)
                boxlist_for_class.add_field('scores', cls_scores)
                boxlist_for_class.add_field('objectness', cls_obj)
                boxlist_for_class.add_field('all_scores', cls_scores_all)
                # pdb.set_trace()
                if self.nms:
                    # for nuclei
                    boxlist_for_class = boxlist_nms(boxlist_for_class,
                                                    self.nms,
                                                    score_field="scores")
                # pdb.set_trace()
                num_labels = len(boxlist_for_class)
                boxlist_for_class.add_field(
                    "labels",
                    torch.full((num_labels, ), 1,
                               dtype=torch.int64).to(self.device))
                result_per_image.append(boxlist_for_class)
                result_per_image = cat_boxlist(result_per_image)
                number_of_detections = len(result_per_image)

                # Limit to max_per_image detections **over all classes**
                if number_of_detections > self.detections_per_img > 0:
                    cls_scores = result_per_image.get_field("scores")
                    image_thresh, _ = torch.kthvalue(
                        cls_scores.cpu(),
                        number_of_detections - self.detections_per_img + 1)
                    keep = cls_scores >= image_thresh.item()
                    keep = torch.nonzero(keep).squeeze(1)
                    result_per_image = result_per_image[keep]
                result.append(result_per_image)

            return result, {}
    def forward(self,
                images,
                features,
                gt_bbox=None,
                img_size=None,
                compute_average_recall_RPN=False,
                is_train=None,
                result_dir=None):

        if self.negatives_to_pick is None:
            self.negatives_to_pick = math.ceil(
                (self.batch_size * self.iterations) / self.cfg.NUM_IMAGES)

        features = self.head(features)
        if self.anchors is None:
            features = features[0][0]
            features_map_size = features.size()
            # Extract feature map info
            self.feat_size = features_map_size[0]
            self.height = features_map_size[1]
            self.width = features_map_size[2]

            # Generate anchors
            self.anchors = self.anchor_generator(images, features)[0][0]

            self.feature_ids = torch.empty((0, 2),
                                           dtype=torch.long,
                                           device='cuda')
            self.classifiers = torch.empty(0, dtype=torch.uint8, device='cuda')
            # Associate to each feature tensor an id, corresponding to its position and a classifier id corresponding to an anchor value
            for ind in range(0, int(self.anchors.bbox.size()[0])):
                feat_ij = [[
                    int(int(ind / self.num_classes) / self.width),
                    int(int(ind / self.num_classes) % self.width)
                ]]
                self.feature_ids = torch.cat((self.feature_ids,
                                              torch.tensor(feat_ij,
                                                           dtype=torch.long,
                                                           device='cuda')))
                cls = [ind % self.num_classes]
                self.classifiers = torch.cat((self.classifiers,
                                              torch.tensor(cls,
                                                           dtype=torch.uint8,
                                                           device='cuda')))
            self.anchors.add_field('feature_id', self.feature_ids)
            self.anchors.add_field('classifier', self.classifiers)
            # Remove features with borders external to the image
            self.visible_anchors = self.anchors.get_field('visibility')
            self.anchors = self.anchors[self.visible_anchors]
            # Avoid computing unuseful regions
            self.still_to_complete = list(range(self.num_classes))
            for i in self.still_to_complete:
                if self.anchors[self.anchors.get_field('classifier') ==
                                i].bbox.size()[0] == 0:
                    self.still_to_complete.remove(i)
                    print('Anchor %i does not have visible regions.' % i,
                          'Removed from the list.')
                    if self.save_features:
                        # Saving empty tensors
                        path_to_save = os.path.join(
                            result_dir, 'features_RPN',
                            'negatives_cl_{}_batch_{}'.format(i, 0))
                        torch.save(
                            torch.empty((0, self.feat_size),
                                        device=self.training_device),
                            path_to_save)

                        path_to_save = os.path.join(
                            result_dir, 'features_RPN',
                            'positives_cl_{}_batch_{}'.format(i, 0))
                        torch.save(
                            torch.empty((0, self.feat_size),
                                        device=self.training_device),
                            path_to_save)
            self.anchors_ids = copy.deepcopy(self.still_to_complete)

            # Initialize batches for minibootstrap
            for i in range(self.num_classes):
                self.negatives.append([])
                self.current_batch.append(0)
                self.current_batch_size.append(0)
                self.positives.append([
                    torch.empty((0, self.feat_size),
                                device=self.training_device)
                ])
                for j in range(self.iterations):
                    self.negatives[i].append(
                        torch.empty((0, self.feat_size),
                                    device=self.training_device))

            # Initialize tensors for box regression
            # Regressor features
            self.X = [
                torch.empty((0, self.feat_size),
                            dtype=torch.float32,
                            device=self.training_device)
            ]
            # Regressor target values
            self.Y = [
                torch.empty((0, 4),
                            dtype=torch.float32,
                            device=self.training_device)
            ]
            # Regressor overlap amounts
            self.O = None
            # Regressor classes
            self.C = [
                torch.empty((0),
                            dtype=torch.float32,
                            device=self.training_device)
            ]

        else:
            features = features[0][0]

        anchors_to_return = self.anchors.copy_with_fields(
            self.anchors.fields())
        # Resize ground truth boxes to anchors dimensions
        gt_bbox = gt_bbox.resize(anchors_to_return.size)
        # Compute anchors-gts ious
        ious = torch.squeeze(boxlist_iou(gt_bbox, anchors_to_return))
        # Associate each anchor with the gt with max iou
        if gt_bbox.bbox.size()[0] > 1:
            ious, ious_index = torch.max(ious, dim=0)
            anchors_to_return.add_field('gt_bbox', gt_bbox.bbox[ious_index])
        else:
            gts = torch.ones(
                (ious.size()[0], 4), device='cuda') * gt_bbox.bbox[0]
            anchors_to_return.add_field('gt_bbox', gts)
        anchors_to_return.add_field('overlap', ious)

        # Filter all the negatives, i.e. with iou with the gts < self.neg_iou_thresh
        negative_anchors_total = anchors_to_return[ious < self.neg_iou_thresh]

        indices_to_remove = []
        for i in self.still_to_complete:
            # Filter negatives for the i-th anchor
            anchors_i = negative_anchors_total[
                negative_anchors_total.get_field('classifier') == i]
            # Sample negatives, according to minibootstrap parameters
            if anchors_i.bbox.size()[0] > self.negatives_to_pick:
                anchors_i = anchors_i[torch.randint(
                    anchors_i.bbox.size()[0], (self.negatives_to_pick, ))]
            # Compute their id, i.e. position in the features map
            ids = anchors_i.get_field('feature_id')
            ids_size = ids.size()[0]
            # Compute at most how many negatives to add to each batch
            reg_to_add = math.ceil(self.negatives_to_pick / self.iterations)
            # Initialize index of chosen negatives among all the negatives to pick
            ind_to_add = 0
            for b in range(self.current_batch[i], self.iterations):
                # If the batch is full, start from the subsequent
                if self.negatives[i][b].size()[0] >= self.batch_size:
                    # If features must be saved, save full batches and replace the batch in gpu with an empty tensor
                    if self.save_features:
                        path_to_save = os.path.join(
                            result_dir, 'features_RPN',
                            'negatives_cl_{}_batch_{}'.format(i, b))
                        torch.save(self.negatives[i][b], path_to_save)
                        self.negatives[i][b] = torch.empty(
                            (0, self.feat_size), device=self.training_device)
                    self.current_batch[i] += 1
                    if self.current_batch[i] >= self.iterations:
                        indices_to_remove.append(i)
                    continue

                else:
                    # Compute the end index of negatives to add to the batch
                    end_interval = int(
                        ind_to_add +
                        min(reg_to_add, self.batch_size - self.negatives[i]
                            [b].size()[0], self.negatives_to_pick -
                            ind_to_add, ids_size - ind_to_add))
                    # Extract features corresponding to the ids and add them to the ids
                    # Diagonal choice done for computational efficiency
                    feat = torch.index_select(features, 1,
                                              ids[ind_to_add:end_interval, 0])
                    feat = torch.index_select(
                        feat, 2,
                        ids[ind_to_add:end_interval, 1]).permute(1, 2, 0).view(
                            (end_interval - ind_to_add)**2, self.feat_size)
                    try:
                        feat = feat[self.diag_list[end_interval - ind_to_add]]
                    except:
                        feat = feat[list(
                            range(0, (end_interval - ind_to_add)**2 +
                                  (end_interval - ind_to_add) - 1,
                                  (end_interval - ind_to_add) + 1))]
                    if self.training_device is 'cpu':
                        self.negatives[i][b] = torch.cat(
                            (self.negatives[i][b], feat.cpu()))
                    else:
                        self.negatives[i][b] = torch.cat(
                            (self.negatives[i][b], feat))
                    # Update indices
                    ind_to_add = end_interval
                    if ind_to_add == self.negatives_to_pick:
                        break
        # Check to avoid unuseful computations
        for index in indices_to_remove:
            self.still_to_complete.remove(index)

        # Select all the positives with iou with the gts > 0.7
        positive_anchors = anchors_to_return[
            anchors_to_return.get_field('overlap') > self.pos_iou_thresh]

        # Add to the positives anchors with max iou with a gt, if the gt doesn't have associated anchors with iou > 0.7
        for elem in gt_bbox.bbox:
            if elem in positive_anchors.get_field('gt_bbox'):
                continue
            else:
                elem = elem.unsqueeze(0)
                # Find indices where there are anchors associated to this gt_bbox
                indices, _ = torch.min(torch.eq(
                    anchors_to_return.get_field('gt_bbox'),
                    elem.repeat(anchors_to_return.bbox.size()[0], 1)),
                                       dim=1,
                                       keepdim=True)
                # Additional check to avoid max on an empty tensor
                if True in indices:
                    # Find max overlap with this gt_bbox
                    values, _ = torch.max(
                        anchors_to_return[indices.squeeze()].get_field(
                            'overlap'), 0)
                    positives_i = anchors_to_return[indices.squeeze()]
                    positives_i = positives_i[positives_i.get_field('overlap')
                                              == values.item()]
                    positive_anchors = cat_boxlist(
                        [positive_anchors, positives_i])

        # Find anchors associated to the positives, to avoid unuseful computation
        pos_inds = torch.unique(positive_anchors.get_field('classifier'))
        for i in pos_inds:
            anchors_i = positive_anchors[positive_anchors.get_field(
                'classifier') == i]
            ids = anchors_i.get_field('feature_id')
            ids_size = ids.size()[0]
            feat = torch.index_select(features, 1, ids[:, 0])
            feat = torch.index_select(feat, 2,
                                      ids[:, 1]).permute(1, 2, 0).view(
                                          ids_size**2, self.feat_size)
            try:
                feat = feat[self.diag_list[ids_size]]
            except:
                feat = feat[list(
                    range(0, ids_size**2 + ids_size - 1, ids_size + 1))]
            # Add positive features for the i-th anchor to the i-th positives list
            if self.training_device is 'cpu':
                self.positives[i][len(self.positives[i]) - 1] = torch.cat(
                    (self.positives[i][len(self.positives[i]) - 1],
                     feat.cpu()))
            else:
                self.positives[i][len(self.positives[i]) - 1] = torch.cat(
                    (self.positives[i][len(self.positives[i]) - 1], feat))
            if self.positives[i][len(self.positives[i]) -
                                 1].size()[0] >= self.batch_size:
                if self.save_features:
                    path_to_save = os.path.join(
                        result_dir, 'features_RPN',
                        'positives_cl_{}_batch_{}'.format(
                            i,
                            len(self.positives[i]) - 1))
                    torch.save(self.positives[i][len(self.positives[i]) - 1],
                               path_to_save)
                    self.positives[i][len(self.positives[i]) -
                                      1] = torch.empty(
                                          (0, self.feat_size),
                                          device=self.training_device)
                self.positives[i].append(
                    torch.empty((0, self.feat_size),
                                device=self.training_device))

            # COXY computation for regressors
            ex_boxes = anchors_i.bbox
            gt_boxes = anchors_i.get_field('gt_bbox')

            src_w = ex_boxes[:, 2] - ex_boxes[:, 0] + 1
            src_h = ex_boxes[:, 3] - ex_boxes[:, 1] + 1
            src_ctr_x = ex_boxes[:, 0] + 0.5 * src_w
            src_ctr_y = ex_boxes[:, 1] + 0.5 * src_h

            gt_w = gt_boxes[:, 2] - gt_boxes[:, 0] + 1
            gt_h = gt_boxes[:, 3] - gt_boxes[:, 1] + 1
            gt_ctr_x = gt_boxes[:, 0] + 0.5 * gt_w
            gt_ctr_y = gt_boxes[:, 1] + 0.5 * gt_h

            dst_ctr_x = (gt_ctr_x - src_ctr_x) / src_w
            dst_ctr_y = (gt_ctr_y - src_ctr_y) / src_h
            dst_scl_w = torch.log(gt_w / src_w)
            dst_scl_h = torch.log(gt_h / src_h)

            target = torch.stack((dst_ctr_x, dst_ctr_y, dst_scl_w, dst_scl_h),
                                 dim=1)
            if self.training_device is 'cpu':
                self.Y[len(self.Y) - 1] = torch.cat(
                    (self.Y[len(self.Y) - 1], target.cpu()), dim=0)
                # Add class and features to C and X
                self.C[len(self.C) - 1] = torch.cat((self.C[len(self.C) - 1],
                                                     torch.full(
                                                         (ids_size, 1),
                                                         i,
                                                         dtype=torch.float32)))
                self.X[len(self.X) - 1] = torch.cat(
                    (self.X[len(self.X) - 1], feat.cpu()))
            else:
                self.Y[len(self.Y) - 1] = torch.cat(
                    (self.Y[len(self.Y) - 1], target), dim=0)
                # Add class and features to C and X
                self.C[len(self.C) - 1] = torch.cat((self.C[len(self.C) - 1],
                                                     torch.full(
                                                         (ids_size, 1),
                                                         i,
                                                         dtype=torch.float32,
                                                         device='cuda')))
                self.X[len(self.X) - 1] = torch.cat(
                    (self.X[len(self.X) - 1], feat))
            if self.X[len(self.X) - 1].size()[0] >= self.batch_size:
                if self.save_features:
                    path_to_save = os.path.join(
                        result_dir, 'features_RPN',
                        'reg_x_batch_{}'.format(len(self.X) - 1))
                    torch.save(self.X[len(self.X) - 1], path_to_save)
                    self.X[len(self.X) - 1] = torch.empty(
                        (0, self.feat_size),
                        dtype=torch.float32,
                        device=self.training_device)

                    path_to_save = os.path.join(
                        result_dir, 'features_RPN',
                        'reg_c_batch_{}'.format(len(self.C) - 1))
                    torch.save(self.C[len(self.C) - 1], path_to_save)
                    self.C[len(self.C) - 1] = torch.empty(
                        (0), dtype=torch.float32, device=self.training_device)

                    path_to_save = os.path.join(
                        result_dir, 'features_RPN',
                        'reg_y_batch_{}'.format(len(self.Y) - 1))
                    torch.save(self.Y[len(self.Y) - 1], path_to_save)
                    self.Y[len(self.Y) - 1] = torch.empty(
                        (0, 4),
                        dtype=torch.float32,
                        device=self.training_device)

                self.X.append(
                    torch.empty((0, self.feat_size),
                                dtype=torch.float32,
                                device=self.training_device))
                self.C.append(
                    torch.empty((0),
                                dtype=torch.float32,
                                device=self.training_device))
                self.Y.append(
                    torch.empty((0, 4),
                                dtype=torch.float32,
                                device=self.training_device))

        return {}, {}, 0
示例#26
0
    def __call__(self, square_anchors, guided_anchors, loc_masks,
                 approx_anchors, objectness, box_regression, shapes, locs,
                 targets):
        """
        Arguments:
            anchors (list[BoxList])
            objectness (list[Tensor])
            box_regression (list[Tensor])
            targets (list[BoxList])

        Returns:
            objectness_loss (Tensor)
            box_loss (Tensor
        """
        featmap_sizes = [feat.shape[2:] for feat in objectness]
        loc_targets, loc_weights, loc_avg_factors = self.ga_loc_target(
            targets, featmap_sizes)
        locs, loc_targets, loc_weights = concat_locs(locs, loc_targets,
                                                     loc_weights)
        loc_loss = self.loss_loc_fn.forward_weights(
            locs, loc_targets, loc_weights) / loc_avg_factors

        square_anchors = [
            cat_boxlist(anchors_per_image)
            for anchors_per_image in square_anchors
        ]
        approx_anchors = [
            cat_boxlist(anchors_per_image)
            for anchors_per_image in approx_anchors
        ]

        shape_targets, shape_weights = self.ga_shape_target(
            square_anchors, approx_anchors, targets)

        shapes = concat_shapes(shapes)

        shape_pos_inds, shape_neg_inds = self.fg_bg_sampler(shape_weights)
        shape_pos_inds = torch.nonzero(torch.cat(shape_pos_inds,
                                                 dim=0)).squeeze(1)
        shape_neg_inds = torch.nonzero(torch.cat(shape_neg_inds,
                                                 dim=0)).squeeze(1)
        anchor_total_num = shape_pos_inds.shape[0] + shape_neg_inds.shape[0]

        shape_targets = torch.cat(shape_targets, dim=0)
        square_anchors = cat_boxlist_broad(square_anchors)

        shapes = shapes[shape_pos_inds]
        shape_targets = shape_targets[shape_pos_inds]
        square_anchors = square_anchors[shape_pos_inds]

        shapes = self.anchor_box_coder.decode(shapes, square_anchors.bbox)
        shape_loss = bounded_iou_loss(
            shapes, shape_targets, beta=0.2,
            size_average=False) / anchor_total_num

        anchors = [
            cat_boxlist(anchors_per_image)
            for anchors_per_image in guided_anchors
        ]
        labels, regression_targets = self.prepare_targets(anchors, targets)
        sampled_pos_inds, sampled_neg_inds = self.fg_bg_sampler(labels)
        sampled_pos_inds = torch.nonzero(torch.cat(sampled_pos_inds,
                                                   dim=0)).squeeze(1)
        sampled_neg_inds = torch.nonzero(torch.cat(sampled_neg_inds,
                                                   dim=0)).squeeze(1)

        sampled_inds = torch.cat([sampled_pos_inds, sampled_neg_inds], dim=0)

        objectness, box_regression = \
                concat_box_prediction_layers(objectness, box_regression)

        objectness = objectness.squeeze()

        labels = torch.cat(labels, dim=0)
        regression_targets = torch.cat(regression_targets, dim=0)

        box_loss = smooth_l1_loss(
            box_regression[sampled_pos_inds],
            regression_targets[sampled_pos_inds],
            beta=1.0 / 9,
            size_average=False,
        ) / (sampled_inds.numel())

        objectness_loss = F.binary_cross_entropy_with_logits(
            objectness[sampled_inds], labels[sampled_inds])

        return objectness_loss, box_loss, shape_loss, loc_loss
    def add_gt_proposals(self, proposals, targets):
        """
        Arguments:
            proposals: list[BoxList]
            targets: list[BoxList]
        """
        # Get the device we're operating on
        device = proposals[0].bbox.device
        if cfg.ROTATE:
            if cfg.RECT_POLY_BALANCE == "Rot":
                gt_boxes = [
                    target.copy_with_fields(["xywht", "xyxy"])
                    for target in targets
                ]
            elif cfg.RECT_POLY_BALANCE == "Rect":
                gt_boxes = [
                    target.copy_with_fields(["xyxy"]) for target in targets
                ]
                for gt_box in gt_boxes:
                    xyxy = gt_box.get_field("xyxy")
                    xmin, ymin, xmax, ymax = xyxy.split(1, dim=-1)
                    gt_box.bbox = torch.cat(
                        (xmin, ymin, xmax, ymin, xmax, ymax, xmin, ymax),
                        dim=1)
                    xywht = torch.cat(
                        ((xmin + xmax) / 2.,
                         (ymin + ymax) / 2., xmax - xmin + 1, ymax - ymin + 1,
                         torch.ones_like(xmin) * (-3.14 / 2)),
                        dim=1)
                    gt_box.add_field("xywht", xywht)

            elif cfg.RECT_POLY_BALANCE == "Rect+Rot":
                gt_boxes_rects = [
                    target.copy_with_fields(["xyxy"]) for target in targets
                ]
                gt_boxes_rots = [
                    target.copy_with_fields(["xywht", "xyxy"])
                    for target in targets
                ]

                for gt_box in gt_boxes_rects:
                    xyxy = gt_box.get_field("xyxy")
                    xmin, ymin, xmax, ymax = xyxy.split(1, dim=-1)
                    gt_box.bbox = torch.cat(
                        (xmin, ymin, xmax, ymin, xmax, ymax, xmin, ymax),
                        dim=1)
                    xywht = torch.cat(
                        ((xmin + xmax) / 2.,
                         (ymin + ymax) / 2., xmax - xmin + 1, ymax - ymin + 1,
                         torch.ones_like(xmin) * (-3.14 / 2)),
                        dim=1)
                    gt_box.add_field("xywht", xywht)

                gt_boxes = [
                    cat_boxlist((gt_boxes_rect, gt_boxes_rot))
                    for gt_boxes_rect, gt_boxes_rot in zip(
                        gt_boxes_rects, gt_boxes_rots)
                ]

        else:
            gt_boxes = [target.copy_with_fields([]) for target in targets]
        # later cat of bbox requires all fields to be present for all bbox
        # so we need to add a dummy for objectness that's missing

        for gt_box in gt_boxes:
            gt_box.add_field("objectness",
                             torch.ones(len(gt_box), device=device))

        proposals = [
            cat_boxlist((proposal, gt_box))
            for proposal, gt_box in zip(proposals, gt_boxes)
        ]

        return proposals
示例#28
0
    def __call__(self, anchors, box_cls, box_regression, targets, embeddings = None):
        """
        Arguments:
            anchors (list[BoxList])
            box_cls (list[Tensor])
            box_regression (list[Tensor])
            targets (list[BoxList])

        Returns:
            retinanet_cls_loss (Tensor)
            retinanet_regression_loss (Tensor
        """
        anchors = [cat_boxlist(anchors_per_image) for anchors_per_image in anchors]
        labels, regression_targets = self.prepare_targets(anchors, targets)
        
        sampled_pos_inds, sampled_neg_inds = self.fg_bg_sampler(labels)
        sampled_pos_inds = torch.nonzero(torch.cat(sampled_pos_inds, dim=0)).squeeze(1)
        sampled_neg_inds = torch.nonzero(torch.cat(sampled_neg_inds, dim=0)).squeeze(1)

        sampled_inds = torch.cat([sampled_pos_inds, sampled_neg_inds], dim=0)

        N = len(labels)
        if embeddings is not None:
            box_cls, box_regression, embeddings = \
                concat_box_prediction_embeddings_layers(box_cls, box_regression, embeddings)
        else:
            box_cls, box_regression = \
                    concat_box_prediction_layers(box_cls, box_regression)

        labels = torch.cat(labels, dim=0)
        regression_targets = torch.cat(regression_targets, dim=0)
        pos_inds = torch.nonzero(labels > 0).squeeze(1)

        retinanet_regression_loss = smooth_l1_loss(
            box_regression[pos_inds],
            regression_targets[pos_inds],
            beta=self.bbox_reg_beta,
            size_average=False,
        ) / (max(1, pos_inds.numel() * self.regress_norm))

        labels = labels.int()

        retinanet_cls_loss = self.box_cls_loss_func(
            box_cls,
            labels
        ) / (pos_inds.numel() + N)
        
        # triplet loss
        if embeddings is not None and self.embedding_loss == 2:
            margin = self.embed_margin
#             print('triplet margin:', margin)
            T_Loss = TripletLoss(margin)
            # hard negtive mining version
            anchor_embeddings, positive_embeddings, negative_embeddings = triplet_embeddings(embeddings[sampled_inds], labels[sampled_inds])
#             anchor_embeddings, positive_embeddings, negative_embeddings = triplet_embeddings(embeddings, labels)
            triplet_loss = T_Loss(anchor_embeddings, positive_embeddings, negative_embeddings, size_average=True)
            # dynamic incremental margin
#             if triplet_loss == 0 and np.random.random() > 0.5:
#                 RetinaNetLossComputation.TRIPLET_MARGIN += 1
            return retinanet_cls_loss, retinanet_regression_loss, triplet_loss

        # pair loss
        elif embeddings is not None and self.embedding_loss == 1:
            # print('pair loss ===============================')
            margin = self.embed_margin
            C_loss = ContrastiveLoss(margin)
            embeddings1, embeddings2, targets = pair_embeddings(embeddings[sampled_inds], labels[sampled_inds])
            pair_loss = C_loss(embeddings1, embeddings2, targets)
            return retinanet_cls_loss, retinanet_regression_loss, pair_loss
        
        else:
            return retinanet_cls_loss, retinanet_regression_loss
示例#29
0
    def __call__(self, anchors, box_cls, box_regression, targets):
        """
        Arguments:
            anchors (list[BoxList])
            objectness (list[Tensor])
            box_regression (list[Tensor])
            targets (list[BoxList])

        Returns:
            objectness_loss (Tensor)
            box_loss (Tensor
        """
        anchors = [
            cat_boxlist(anchors_per_image) for anchors_per_image in anchors
        ]
        labels, regression_targets = self.prepare_targets(anchors, targets)

        # sampled_pos_inds, sampled_neg_inds = self.fg_bg_sampler(labels)
        # sampled_pos_inds = torch.nonzero(torch.cat(sampled_pos_inds, dim=0)).squeeze(1)
        # sampled_neg_inds = torch.nonzero(torch.cat(sampled_neg_inds, dim=0)).squeeze(1)

        # sampled_inds = torch.cat([sampled_pos_inds, sampled_neg_inds], dim=0)
        num_layers = len(box_cls)
        box_cls_flattened = []
        box_regression_flattened = []
        # for each feature level, permute the outputs to make them be in the
        # same format as the labels. Note that the labels are computed for
        # all feature levels concatenated, so we keep the same representation
        # for the objectness and the box_regression
        for box_cls_per_level, box_regression_per_level in zip(
                box_cls, box_regression):
            N, A, H, W = box_cls_per_level.shape
            C = self.num_classes
            box_cls_per_level = box_cls_per_level.view(N, -1, C, H, W)
            box_cls_per_level = box_cls_per_level.permute(0, 3, 4, 1, 2)
            box_cls_per_level = box_cls_per_level.reshape(N, -1, C)
            box_regression_per_level = box_regression_per_level.view(
                N, -1, 4, H, W)
            box_regression_per_level = box_regression_per_level.permute(
                0, 3, 4, 1, 2)
            box_regression_per_level = box_regression_per_level.reshape(
                N, -1, 4)
            box_cls_flattened.append(box_cls_per_level)
            box_regression_flattened.append(box_regression_per_level)
        # concatenate on the first dimension (representing the feature levels), to
        # take into account the way the labels were generated (with all feature maps
        # being concatenated as well)
        box_cls = cat(box_cls_flattened, dim=1).reshape(-1, C)
        box_regression = cat(box_regression_flattened, dim=1).reshape(-1, 4)

        labels = torch.cat(labels, dim=0)
        regression_targets = torch.cat(regression_targets, dim=0)
        pos_inds = labels > 0

        retinanet_regression_loss = self.regression_loss(
            box_regression[pos_inds],
            regression_targets[pos_inds],
            size_average=False,
        ) / (pos_inds.sum() * 4)
        labels = labels.int()

        retinanet_cls_loss = self.box_cls_loss_func(box_cls, labels) / (
            (labels > 0).sum() + N)

        losses = {
            "loss_retina_cls": retinanet_cls_loss,
            "loss_retina_reg": retinanet_regression_loss,
        }

        return losses
示例#30
0
文件: loss.py 项目: zhwzhong/NAS-FCOS
    def __call__(self, anchors, box_cls, box_regression, targets):
        """
        Arguments:
            anchors (list[BoxList])
            box_cls (list[Tensor])
            box_regression (list[Tensor])
            targets (list[BoxList])

        Returns:
            retinanet_cls_loss (Tensor)
            retinanet_regression_loss (Tensor)
        """
        if isinstance(targets, dict):
            labels = targets['labels']
            regression_targets = targets['regression_targets']

        else:
            anchors = [cat_boxlist(anchors_per_image) for anchors_per_image in anchors]
            labels, regression_targets = self.prepare_targets(anchors, targets)

        num_layers = len(box_cls)
        box_cls_flattened = []
        box_regression_flattened = []
        # for each feature level, permute the outputs to make them be in the
        # same format as the labels. Note that the labels are computed for
        # all feature levels concatenated, so we keep the same representation
        # for the box_cls and the box_regression
        for box_cls_per_level, box_regression_per_level in zip(
            box_cls, box_regression
        ):
            N, A, H, W = box_cls_per_level.shape
            C = self.num_classes
            box_cls_per_level = box_cls_per_level.view(N, -1, C, H, W)
            box_cls_per_level = box_cls_per_level.permute(0, 3, 4, 1, 2)
            box_cls_per_level = box_cls_per_level.reshape(N, -1, C)
            box_regression_per_level = box_regression_per_level.view(N, -1, 4, H, W)
            box_regression_per_level = box_regression_per_level.permute(0, 3, 4, 1, 2)
            box_regression_per_level = box_regression_per_level.reshape(N, -1, 4)
            box_cls_flattened.append(box_cls_per_level)
            box_regression_flattened.append(box_regression_per_level)
        # concatenate on the first dimension (representing the feature levels), to
        # take into account the way the labels were generated (with all feature maps
        # being concatenated as well)
        box_cls = cat(box_cls_flattened, dim=1).reshape(-1, C)
        box_regression = cat(box_regression_flattened, dim=1).reshape(-1, 4)

        if not isinstance(targets, dict):
            labels = torch.cat(labels, dim=0)
            regression_targets = torch.cat(regression_targets, dim=0)
        
        pos_inds = labels > 0

        retinanet_regression_loss = smooth_l1_loss(
            box_regression[pos_inds],
            regression_targets[pos_inds],
            beta=self.bbox_reg_beta,
            size_average=False,
        ) / (pos_inds.sum() * 4)
        retinanet_regression_loss *= self.bbox_reg_weight

        labels = labels.int()

        retinanet_cls_loss = self.box_cls_loss_func(
            box_cls,
            labels
        ) / ((labels > 0).sum() + N)

        return retinanet_cls_loss * self.weight, retinanet_regression_loss * self.weight
示例#31
0
    box_coder = BoxCoder(weights=None)  #cfg.MODEL.RPN.BBOX_REG_WEIGHTS)
    fg_bg_sampler = BalancedPositiveNegativeSampler(
        cfg.MODEL.RPN.BATCH_SIZE_PER_IMAGE, cfg.MODEL.RPN.POSITIVE_FRACTION)

    loss_evaluator = make_rpn_loss_evaluator(cfg, box_coder)

    start_iter = 0
    for iteration, (images, targets, _) in enumerate(data_loader, start_iter):
        images = images.to(device)
        targets = [target.to(device) for target in targets]
        feature_maps = get_feature_maps(images.tensors,
                                        cfg.MODEL.RPN.ANCHOR_STRIDE)

        anchors = anchor_generator.forward(images, feature_maps)
        anchors = [
            cat_boxlist(anchors_per_image) for anchors_per_image in anchors
        ]
        anchors_cnt = [len(a) for a in anchors]

        labels, regression_targets, matched_gt_ids, _ \
            = loss_evaluator.prepare_targets(anchors, targets)

        sampled_pos_inds, sampled_neg_inds = fg_bg_sampler(labels)

        sampled_pos_inds = torch.nonzero(torch.cat(sampled_pos_inds,
                                                   dim=0)).squeeze(1)
        sampled_neg_inds = torch.nonzero(torch.cat(sampled_neg_inds,
                                                   dim=0)).squeeze(1)

        sampled_inds = torch.cat([sampled_pos_inds, sampled_neg_inds], dim=0)