Exemplo n.º 1
0
class RegressLoss(nn.Module):
    def __init__(self, func='smooth'):
        super(RegressLoss, self).__init__()
        self.box_coder = BoxCoder()
        if func == 'smooth':
            self.criteron = smooth_l1_loss
        elif func == 'mse':
            self.criteron = F.mse_loss
        elif func == 'balanced':
            self.criteron = balanced_l1_loss
        else:
            raise NotImplementedError
            
    def forward(self, regressions, anchors, annotations, iou_thres=0.5):
        losses = []
        batch_size = regressions.shape[0]
        all_pred_boxes = self.box_coder.decode(anchors, regressions, mode='xywht')
        for j in range(batch_size):
            regression = regressions[j, :, :]
            bbox_annotation = annotations[j, :, :]
            bbox_annotation = bbox_annotation[bbox_annotation[:, -1] != -1]
            pred_boxes = all_pred_boxes[j, :, :]
            if bbox_annotation.shape[0] == 0:
                losses.append(torch.tensor(0).float().cuda())
                continue
            indicator = bbox_overlaps(
                min_area_square(anchors[j, :, :]),
                min_area_square(bbox_annotation[:, :-1])
            )
            overlaps = rbox_overlaps(
                anchors[j, :, :].cpu().numpy(),
                bbox_annotation[:, :-1].cpu().numpy(),
                indicator.cpu().numpy(),
                thresh=1e-1
            )
            if not torch.is_tensor(overlaps):
                overlaps = torch.from_numpy(overlaps).cuda()

            iou_max, iou_argmax = torch.max(overlaps, dim=1)
            positive_indices = torch.ge(iou_max, iou_thres)
            # MaxIoU assigner
            max_gt, argmax_gt = overlaps.max(0) 
            if (max_gt < iou_thres).any():
                positive_indices[argmax_gt[max_gt < iou_thres]]=1

            assigned_annotations = bbox_annotation[iou_argmax, :]
            if positive_indices.sum() > 0:
                all_rois = anchors[j, positive_indices, :]
                gt_boxes = assigned_annotations[positive_indices, :]
                targets = self.box_coder.encode(all_rois, gt_boxes)
                loss = self.criteron(regression[positive_indices, :], targets)
                losses.append(loss)
            else:
                losses.append(torch.tensor(0).float().cuda())
        return torch.stack(losses).mean(dim=0, keepdim=True)
Exemplo n.º 2
0
class RetinaNet(nn.Module):
    def __init__(self, backbone='res50', hyps=None):
        super(RetinaNet, self).__init__()
        self.num_classes = int(hyps['num_classes']) + 1
        self.anchor_generator = Anchors(ratios=np.array([0.5, 1, 2]), )
        self.num_anchors = self.anchor_generator.num_anchors
        self.init_backbone(backbone)

        self.fpn = FPN(in_channels_list=self.fpn_in_channels,
                       out_channels=256,
                       top_blocks=LastLevelP6P7(self.fpn_in_channels[-1], 256),
                       use_asff=False)
        self.cls_head = CLSHead(in_channels=256,
                                feat_channels=256,
                                num_stacked=4,
                                num_anchors=self.num_anchors,
                                num_classes=self.num_classes)
        self.reg_head = REGHead(
            in_channels=256,
            feat_channels=256,
            num_stacked=4,
            num_anchors=self.num_anchors,
            num_regress=5  # xywha
        )
        self.loss = IntegratedLoss(func='smooth')
        # self.loss_var = KLLoss()
        self.box_coder = BoxCoder()

    def init_backbone(self, backbone):
        if backbone == 'res34':
            self.backbone = models.resnet34(pretrained=True)
            self.fpn_in_channels = [128, 256, 512]
        elif backbone == 'res50':
            self.backbone = models.resnet50(pretrained=True)
            self.fpn_in_channels = [512, 1024, 2048]
        elif backbone == 'res101':
            self.backbone = models.resnet101(pretrained=True)
            self.fpn_in_channels = [512, 1024, 2048]
        elif backbone == 'res152':
            self.backbone = models.resnet152(pretrained=True)
            self.fpn_in_channels = [512, 1024, 2048]
        elif backbone == 'resnext50':
            self.backbone = models.resnext50_32x4d(pretrained=True)
            self.fpn_in_channels = [512, 1024, 2048]
        else:
            raise NotImplementedError
        del self.backbone.avgpool
        del self.backbone.fc

    def ims_2_features(self, ims):
        c1 = self.backbone.relu(self.backbone.bn1(self.backbone.conv1(ims)))
        c2 = self.backbone.layer1(self.backbone.maxpool(c1))
        c3 = self.backbone.layer2(c2)
        c4 = self.backbone.layer3(c3)
        c5 = self.backbone.layer4(c4)
        #c_i shape: bs,C,H,W
        return [c3, c4, c5]

    def forward(self, ims, gt_boxes=None, test_conf=None, process=None):
        anchors_list, offsets_list, cls_list, var_list = [], [], [], []
        original_anchors = self.anchor_generator(
            ims)  # (bs, num_all_achors, 5)
        anchors_list.append(original_anchors)
        features = self.fpn(self.ims_2_features(ims))
        cls_score = torch.cat([self.cls_head(feature) for feature in features],
                              dim=1)
        bbox_pred = torch.cat([self.reg_head(feature) for feature in features],
                              dim=1)
        bboxes = self.box_coder.decode(anchors_list[-1],
                                       bbox_pred,
                                       mode='xywht').detach()

        if self.training:
            losses = dict()
            bf_weight = self.calc_mining_param(process, 0.3)
            losses['loss_cls'], losses['loss_reg'] = self.loss(cls_score, bbox_pred, anchors_list[-1], bboxes, gt_boxes, \
                                                               md_thres=0.6,
                                                               mining_param=(bf_weight, 1-bf_weight, 5)
                                                              )
            return losses

        else:  # eval() mode
            return self.decoder(ims,
                                anchors_list[-1],
                                cls_score,
                                bbox_pred,
                                test_conf=test_conf)

    def decoder(self,
                ims,
                anchors,
                cls_score,
                bbox_pred,
                thresh=0.6,
                nms_thresh=0.2,
                test_conf=None):
        if test_conf is not None:
            thresh = test_conf
        bboxes = self.box_coder.decode(anchors, bbox_pred, mode='xywht')
        bboxes = clip_boxes(bboxes, ims)
        scores = torch.max(cls_score, dim=2, keepdim=True)[0]
        keep = (scores >= thresh)[0, :, 0]
        if keep.sum() == 0:
            return [torch.zeros(1), torch.zeros(1), torch.zeros(1, 5)]
        scores = scores[:, keep, :]
        anchors = anchors[:, keep, :]
        cls_score = cls_score[:, keep, :]
        bboxes = bboxes[:, keep, :]
        # NMS
        anchors_nms_idx = nms(
            torch.cat([bboxes, scores], dim=2)[0, :, :], nms_thresh)
        nms_scores, nms_class = cls_score[0, anchors_nms_idx, :].max(dim=1)
        output_boxes = torch.cat(
            [bboxes[0, anchors_nms_idx, :], anchors[0, anchors_nms_idx, :]],
            dim=1)
        return [nms_scores, nms_class, output_boxes]

    def freeze_bn(self):
        for layer in self.modules():
            if isinstance(layer, nn.BatchNorm2d):
                layer.eval()

    def calc_mining_param(self, process, alpha):
        if process < 0.1:
            bf_weight = 1.0
        elif process > 0.3:
            bf_weight = alpha
        else:
            bf_weight = 5 * (alpha - 1) * process + 1.5 - 0.5 * alpha
        return bf_weight
Exemplo n.º 3
0
class IntegratedLoss(nn.Module):
    def __init__(self, alpha=0.25, gamma=2.0, func = 'smooth'):
        super(IntegratedLoss, self).__init__()
        self.alpha = alpha
        self.gamma = gamma
        self.box_coder = BoxCoder()
        if func == 'smooth':
            self.criteron = smooth_l1_loss
        elif func == 'mse':
            self.criteron = F.mse_loss
        elif func == 'balanced':
            self.criteron = balanced_l1_loss
            
    def forward(self, classifications, regressions, anchors, annotations,iou_thres=0.5):
        
        cls_losses = []
        reg_losses = []
        batch_size = classifications.shape[0]
        all_pred_boxes = self.box_coder.decode(anchors, regressions, mode='xywht')
        for j in range(batch_size):
            classification = classifications[j, :, :]
            regression = regressions[j, :, :]
            bbox_annotation = annotations[j, :, :]
            bbox_annotation = bbox_annotation[bbox_annotation[:, -1] != -1]
            pred_boxes = all_pred_boxes[j, :, :]
            if bbox_annotation.shape[0] == 0:
                cls_losses.append(torch.tensor(0).float().cuda())
                reg_losses.append(torch.tensor(0).float().cuda())
                continue
            classification = torch.clamp(classification, 1e-4, 1.0 - 1e-4)
            indicator = bbox_overlaps(
                min_area_square(anchors[j, :, :]),
                min_area_square(bbox_annotation[:, :-1])
            )
            ious = rbox_overlaps(
                anchors[j, :, :].cpu().numpy(),
                bbox_annotation[:, :-1].cpu().numpy(),
                indicator.cpu().numpy(),
                thresh=1e-1
            )
            if not torch.is_tensor(ious):
                ious = torch.from_numpy(ious).cuda()
            
            iou_max, iou_argmax = torch.max(ious, dim=1)
           
            positive_indices = torch.ge(iou_max, iou_thres)

            max_gt, argmax_gt = ious.max(0) 
            if (max_gt < iou_thres).any():
                positive_indices[argmax_gt[max_gt < iou_thres]]=1
              
            # cls loss
            cls_targets = (torch.ones(classification.shape) * -1).cuda()
            cls_targets[torch.lt(iou_max, iou_thres - 0.1), :] = 0
            num_positive_anchors = positive_indices.sum()
            assigned_annotations = bbox_annotation[iou_argmax, :]
            cls_targets[positive_indices, :] = 0
            cls_targets[positive_indices, assigned_annotations[positive_indices, -1].long()] = 1
            alpha_factor = torch.ones(cls_targets.shape).cuda() * self.alpha
            alpha_factor = torch.where(torch.eq(cls_targets, 1.), alpha_factor, 1. - alpha_factor)
            focal_weight = torch.where(torch.eq(cls_targets, 1.), 1. - classification, classification)
            focal_weight = alpha_factor * torch.pow(focal_weight, self.gamma)
            bin_cross_entropy = -(cls_targets * torch.log(classification+1e-6) + (1.0 - cls_targets) * torch.log(1.0 - classification+1e-6))
            cls_loss = focal_weight * bin_cross_entropy 
            cls_loss = torch.where(torch.ne(cls_targets, -1.0), cls_loss, torch.zeros(cls_loss.shape).cuda())
            cls_losses.append(cls_loss.sum() / torch.clamp(num_positive_anchors.float(), min=1.0))
            # reg loss
            if positive_indices.sum() > 0:
                all_rois = anchors[j, positive_indices, :]
                gt_boxes = assigned_annotations[positive_indices, :]
                reg_targets = self.box_coder.encode(all_rois, gt_boxes)
                reg_loss = self.criteron(regression[positive_indices, :], reg_targets)
                reg_losses.append(reg_loss)

                if not torch.isfinite(reg_loss) :
                    import ipdb; ipdb.set_trace()
            else:
                reg_losses.append(torch.tensor(0).float().cuda())
        loss_cls = torch.stack(cls_losses).mean(dim=0, keepdim=True)
        loss_reg = torch.stack(reg_losses).mean(dim=0, keepdim=True)
        return loss_cls, loss_reg
Exemplo n.º 4
0
class RetinaNet(nn.Module):
    def __init__(self, backbone='res50', hyps=None):
        super(RetinaNet, self).__init__()
        self.num_classes = int(
            hyps['num_classes']
        ) + 1  #这里的num-class由分类级别决定,hrsc_dataset.py文件中分三个级别,所以训练时的level也需要改
        self.anchor_generator = Anchors(ratios=np.array([0.5, 1, 2]), )
        self.num_anchors = self.anchor_generator.num_anchors
        self.init_backbone(backbone)

        self.fpn = FPN(in_channels_list=self.fpn_in_channels,
                       out_channels=256,
                       top_blocks=LastLevelP6P7(self.fpn_in_channels[-1], 256),
                       use_asff=False)
        self.cls_head = CLSHead(in_channels=256,
                                feat_channels=256,
                                num_stacked=4,
                                num_anchors=self.num_anchors,
                                num_classes=self.num_classes)
        self.reg_head = REGHead(
            in_channels=256,
            feat_channels=256,
            num_stacked=4,
            num_anchors=self.num_anchors,
            num_regress=5  # xywha
        )
        self.loss = IntegratedLoss(func='smooth')  #计算损失函数
        # self.loss_var = KLLoss()
        self.box_coder = BoxCoder()  #计算回归值

    def init_backbone(self, backbone):
        if backbone == 'res34':
            self.backbone = models.resnet34(pretrained=True)
            self.fpn_in_channels = [128, 256, 512]
        elif backbone == 'res50':
            self.backbone = models.resnet50(pretrained=True)
            self.fpn_in_channels = [512, 1024, 2048]
        elif backbone == 'res101':
            self.backbone = models.resnet101(pretrained=True)
            self.fpn_in_channels = [512, 1024, 2048]
        elif backbone == 'res152':
            self.backbone = models.resnet152(pretrained=True)
            self.fpn_in_channels = [512, 1024, 2048]
        elif backbone == 'resnext50':
            self.backbone = models.resnext50_32x4d(pretrained=True)
            self.fpn_in_channels = [512, 1024, 2048]
        else:
            raise NotImplementedError
        del self.backbone.avgpool
        del self.backbone.fc

    def ims_2_features(self, ims):
        c1 = self.backbone.relu(self.backbone.bn1(self.backbone.conv1(ims)))
        c2 = self.backbone.layer1(self.backbone.maxpool(c1))
        c3 = self.backbone.layer2(c2)
        c4 = self.backbone.layer3(c3)
        c5 = self.backbone.layer4(c4)
        #c_i shape: bs,C,H,W
        return [c3, c4, c5]

    def forward(self, ims, gt_boxes=None, test_conf=None, process=None):
        anchors_list, offsets_list, cls_list, var_list = [], [], [], []
        original_anchors = self.anchor_generator(
            ims)  # 尺度=(batchsize, num_all_achors, 5)
        anchors_list.append(original_anchors)

        # 经过网络计算的特征图---两个---一个用于分类一个用于回归
        features = self.fpn(self.ims_2_features(ims))
        cls_score = torch.cat([self.cls_head(feature) for feature in features],
                              dim=1)
        bbox_pred = torch.cat([self.reg_head(feature) for feature in features],
                              dim=1)

        # 获取回归的box
        bboxes = self.box_coder.decode(anchors_list[-1],
                                       bbox_pred,
                                       mode='xywht').detach()

        if self.training:  # 如果是训练,则返回损失---分类损失和回归损失---通过loss函数计算
            losses = dict()
            bf_weight = self.calc_mining_param(
                process, 0.3)  # 逐步调整输入iou对匹配度的影响,也就是跟随训练进度来调整alpha
            # 所有框分类得分特征图(类别)、预测框特征图(偏移)、原始anchor、预测框回归值(真实坐标角度)、标签框(最后一位应该代表的是类别,只有1,否则-1)
            losses['loss_cls'], losses['loss_reg'] = self.loss(cls_score, bbox_pred, anchors_list[-1], bboxes, gt_boxes, \
                                                               md_thres=0.6,
                                                               mining_param=(bf_weight, 1-bf_weight, 5)
                                                              )
            return losses

        else:  # eval() mode 如果不是训练---则返回的是[nms_scores, nms_class, output_boxes]
            return self.decoder(ims,
                                anchors_list[-1],
                                cls_score,
                                bbox_pred,
                                test_conf=test_conf)

    # decode解码是返回检测框(两个坐标)以及角度---encoder编码是计算偏移-用于计算损失
    def decoder(self,
                ims,
                anchors,
                cls_score,
                bbox_pred,
                thresh=0.6,
                nms_thresh=0.2,
                test_conf=None):
        if test_conf is not None:
            thresh = test_conf
        bboxes = self.box_coder.decode(anchors, bbox_pred,
                                       mode='xywht')  #返回真实的预测框,两个坐标以及角度
        bboxes = clip_boxes(bboxes, ims)
        scores = torch.max(cls_score, dim=2, keepdim=True)[0]
        keep = (scores >= thresh)[0, :, 0]
        if keep.sum() == 0:
            return [torch.zeros(1), torch.zeros(1), torch.zeros(1, 5)]
        scores = scores[:, keep, :]
        anchors = anchors[:, keep, :]
        cls_score = cls_score[:, keep, :]
        bboxes = bboxes[:, keep, :]
        # NMS
        anchors_nms_idx = nms(
            torch.cat([bboxes, scores], dim=2)[0, :, :], nms_thresh)
        nms_scores, nms_class = cls_score[0, anchors_nms_idx, :].max(dim=1)
        output_boxes = torch.cat(
            [bboxes[0, anchors_nms_idx, :], anchors[0, anchors_nms_idx, :]],
            dim=1)
        return [nms_scores, nms_class, output_boxes]

    def freeze_bn(self):
        for layer in self.modules():
            if isinstance(layer, nn.BatchNorm2d):
                layer.eval()

    def calc_mining_param(self, process, alpha):  # 逐步调整输入iou对匹配度的影响
        if process < 0.1:
            bf_weight = 1.0
        elif process > 0.3:
            bf_weight = alpha
        else:
            bf_weight = 5 * (alpha - 1) * process + 1.5 - 0.5 * alpha
        return bf_weight
class STELA(nn.Module):
    def __init__(self, backbone='res50', num_classes=2, num_refining=1):
        super(STELA, self).__init__()
        self.anchor_generator = Anchors()
        self.num_anchors = self.anchor_generator.num_anchors
        self.init_backbone(backbone)
        self.fpn = FPN(in_channels_list=self.fpn_in_channels,
                       out_channels=256,
                       top_blocks=LastLevelP6P7(self.fpn_in_channels[-1], 256))
        self.cls_head = CLSHead(in_channels=256,
                                feat_channels=256,
                                num_stacked=1,
                                num_anchors=self.num_anchors,
                                num_classes=num_classes)
        self.reg_head = REGHead(in_channels=256,
                                feat_channels=256,
                                num_stacked=1,
                                num_anchors=self.num_anchors,
                                num_regress=5)
        self.num_refining = num_refining
        if self.num_refining > 0:
            self.ref_heads = nn.ModuleList([
                REGHead(in_channels=256,
                        feat_channels=256,
                        num_stacked=1,
                        num_anchors=self.num_anchors,
                        num_regress=5) for _ in range(self.num_refining)
            ])
            self.loss_ref = RegressLoss(func='smooth')
        self.loss_cls = FocalLoss()
        self.loss_reg = RegressLoss(func='smooth')
        self.box_coder = BoxCoder()

    def init_backbone(self, backbone):
        if backbone == 'res34':
            self.backbone = models.resnet34(pretrained=True)
            self.fpn_in_channels = [128, 256, 512]
        elif backbone == 'res50':
            self.backbone = models.resnet50(pretrained=True)
            self.fpn_in_channels = [512, 1024, 2048]
        elif backbone == 'resnext50':
            self.backbone = models.resnext50_32x4d(pretrained=True)
            self.fpn_in_channels = [512, 1024, 2048]
        else:
            raise NotImplementedError
        del self.backbone.avgpool
        del self.backbone.fc

    def ims_2_features(self, ims):
        c1 = self.backbone.relu(self.backbone.bn1(self.backbone.conv1(ims)))
        c2 = self.backbone.layer1(self.backbone.maxpool(c1))
        c3 = self.backbone.layer2(c2)
        c4 = self.backbone.layer3(c3)
        c5 = self.backbone.layer4(c4)
        return [c3, c4, c5]

    def forward(self, ims, gt_boxes=None):
        anchors_list, offsets_list = [], []
        original_anchors = self.anchor_generator(ims)
        anchors_list.append(original_anchors)
        features = self.fpn(self.ims_2_features(ims))

        # anchor refining
        if self.num_refining > 0:
            for i in range(self.num_refining):
                bbox_pred = torch.cat(
                    [self.ref_heads[i](feature) for feature in features],
                    dim=1)
                refined_anchors = self.box_coder.decode(anchors_list[-1],
                                                        bbox_pred,
                                                        mode='wht').detach()
                anchors_list.append(refined_anchors)
                offsets_list.append(bbox_pred)

        cls_score = torch.cat([self.cls_head(feature) for feature in features],
                              dim=1)
        bbox_pred = torch.cat([self.reg_head(feature) for feature in features],
                              dim=1)
        if self.training:
            losses = dict()
            if self.num_refining > 0:
                ref_losses = []
                for i in range(self.num_refining):
                    ref_losses.append(
                        self.loss_ref(offsets_list[i],
                                      anchors_list[i],
                                      gt_boxes,
                                      iou_thresh=(0.3 + i * 0.1)))
                losses['loss_ref'] = torch.stack(ref_losses).mean(dim=0,
                                                                  keepdim=True)
            losses['loss_cls'] = self.loss_cls(cls_score,
                                               anchors_list[-1],
                                               gt_boxes,
                                               iou_thresh=0.5)
            losses['loss_reg'] = self.loss_reg(bbox_pred,
                                               anchors_list[-1],
                                               gt_boxes,
                                               iou_thresh=0.5)
            return losses
        else:
            return self.decoder(ims, anchors_list[-1], cls_score, bbox_pred)

    def decoder(self,
                ims,
                anchors,
                cls_score,
                bbox_pred,
                thresh=0.3,
                nms_thresh=0.3):
        bboxes = self.box_coder.decode(anchors, bbox_pred, mode='xywht')
        bboxes = clip_boxes(bboxes, ims)
        scores = torch.max(cls_score, dim=2, keepdim=True)[0]
        keep = (scores >= thresh)[0, :, 0]
        if keep.sum() == 0:
            return [torch.zeros(1), torch.zeros(1), torch.zeros(1, 5)]
        scores = scores[:, keep, :]
        anchors = anchors[:, keep, :]
        cls_score = cls_score[:, keep, :]
        bboxes = bboxes[:, keep, :]
        anchors_nms_idx = nms(
            torch.cat([bboxes, scores], dim=2)[0, :, :], nms_thresh)
        nms_scores, nms_class = cls_score[0, anchors_nms_idx, :].max(dim=1)
        output_boxes = torch.cat(
            [bboxes[0, anchors_nms_idx, :], anchors[0, anchors_nms_idx, :]],
            dim=1)
        return [nms_scores, nms_class, output_boxes]

    def freeze_bn(self):
        for layer in self.modules():
            if isinstance(layer, nn.BatchNorm2d):
                layer.eval()