def init_param(self, model_config):
        classes = model_config['classes']
        self.classes = classes
        self.n_classes = len(classes)
        self.class_agnostic = model_config['class_agnostic']
        self.pooling_size = model_config['pooling_size']
        self.pooling_mode = model_config['pooling_mode']
        self.crop_resize_with_max_pool = model_config[
            'crop_resize_with_max_pool']
        self.truncated = model_config['truncated']
        self.use_self_attention = model_config.get('use_self_attention')

        self.use_focal_loss = model_config['use_focal_loss']
        self.subsample_twice = model_config['subsample_twice']
        self.rcnn_batch_size = model_config['rcnn_batch_size']

        # some submodule config
        self.feature_extractor_config = model_config[
            'feature_extractor_config']
        self.rpn_config = model_config['rpn_config']

        # assigner
        self.target_assigner = TargetAssigner(
            model_config['target_assigner_config'])

        # sampler
        # self.bbox_sampler = DetectionSampler({'fg_fraction': 0.5})
        # self.bbox_sampler = HardNegativeSampler({'fg_fraction': 1})
        # self.bbox_sampler = BalancedSampler({'fg_fraction': 1})
        # self.iou_sampler = BalancedSampler(model_config['sampler_config'])
        self.sampler = DetectionSampler({'fg_fraction': 1})
Пример #2
0
    def init_param(self, model_config):
        self.in_channels = model_config['din']
        self.post_nms_topN = model_config['post_nms_topN']
        self.pre_nms_topN = model_config['pre_nms_topN']
        self.nms_thresh = model_config['nms_thresh']
        self.use_score = model_config['use_score']
        self.rpn_batch_size = model_config['rpn_batch_size']
        self.use_focal_loss = model_config['use_focal_loss']

        # sampler
        # self.bbox_sampler = HardNegativeSampler({'fg_fraction': 1})
        # self.iou_sampler = BalancedSampler(model_config['sampler_config'])
        # self.iou_sampler = HardNegativeSampler(model_config['sampler_config'])
        # self.iou_sampler = DetectionSampler(model_config['sampler_config'])
        self.sampler = DetectionSampler({'fg_fraction': 1})

        # anchor generator
        self.anchor_generator = AnchorGenerator(
            model_config['anchor_generator_config'])
        self.num_anchors = self.anchor_generator.num_anchors
        self.nc_bbox_out = 4 * self.num_anchors
        self.nc_score_out = self.num_anchors * 2

        # target assigner
        self.target_assigner = TargetAssigner(
            model_config['target_assigner_config'])

        # bbox coder
        self.bbox_coder = self.target_assigner.bbox_coder
class OrgOHEMThreeIoUFasterRCNN(Model):
    def forward(self, feed_dict):
        # import ipdb
        # ipdb.set_trace()

        prediction_dict = {}

        # base model
        base_feat = self.feature_extractor.first_stage_feature(
            feed_dict['img'])
        feed_dict.update({'base_feat': base_feat})
        # batch_size = base_feat.shape[0]

        # rpn model
        prediction_dict.update(self.rpn_model.forward(feed_dict))

        # proposals = prediction_dict['proposals_batch']
        # shape(N,num_proposals,5)
        # pre subsample for reduce consume of memory
        # if self.training:
        # self.pre_subsample(prediction_dict, feed_dict)
        rois_batch = prediction_dict['rois_batch']

        # note here base_feat (N,C,H,W),rois_batch (N,num_proposals,5)
        pooled_feat = self.rcnn_pooling(base_feat, rois_batch.view(-1, 5))

        # shape(N,C,1,1)
        pooled_feat = self.feature_extractor.second_stage_feature(pooled_feat)
        ########################################
        # semantic map
        ########################################
        # no necessary for iou
        pooled_feat_cls = pooled_feat.mean(3).mean(2)
        rcnn_cls_scores = self.rcnn_cls_pred(pooled_feat_cls)
        # rcnn_cls_scores = rcnn_cls_scores_map.mean(3).mean(2)
        # saliency_map = F.softmax(rcnn_cls_scores_map, dim=1)
        #  rcnn_cls_probs = F.softmax(rcnn_cls_scores, dim=1)
        # shape(N,C)
        # rcnn_bbox_feat = pooled_feat * saliency_map[:, 1:, :, :]
        # rcnn_bbox_feat = rcnn_bbox_feat.mean(3).mean(2)

        # self attention
        # import ipdb
        # ipdb.set_trace()
        # if self.use_self_attention:
        # channel_attention = self.generate_channel_attention(pooled_feat)
        # spatial_attention = self.generate_spatial_attention(pooled_feat)
        # pooled_feat_reg = pooled_feat * channel_attention
        # pooled_feat_reg = pooled_feat * spatial_attention
        # pooled_feat_reg = pooled_feat_reg.mean(3).mean(2)
        # rcnn_bbox_preds = self.rcnn_bbox_pred(pooled_feat_reg)
        # else:
        # rcnn_bbox_preds = self.rcnn_bbox_pred(pooled_feat_cls)
        # shape(N,C)
        # pooled_feat = pooled_feat.mean(3).mean(2)

        # rcnn_bbox_preds = self.rcnn_bbox_pred(pooled_feat)
        # rcnn_cls_scores = self.rcnn_cls_pred(pooled_feat)

        # rcnn_cls_probs = F.softmax(rcnn_cls_scores, dim=1)

        rcnn_bbox_preds = self.rcnn_bbox_pred(pooled_feat)
        rcnn_bbox_preds = rcnn_bbox_preds.view(rcnn_bbox_preds.shape[0] * 4,
                                               -1)
        rcnn_bbox_preds = rcnn_bbox_preds.mean(-1)

        # import ipdb
        # ipdb.set_trace()
        # select min of abs value
        # rcnn_bbox_preds = torch.abs(rcnn_bbox_preds)
        # _, min_idx = torch.min(rcnn_bbox_preds, dim=-1)
        # row_idx = torch.arange(min_idx.numel()).type_as(min_idx)
        # rcnn_bbox_preds = rcnn_bbox_preds[row_idx, min_idx]
        rcnn_bbox_preds = rcnn_bbox_preds.view(-1, 4)
        # rcnn_bbox_preds = rcnn_bbox_preds.max(3)[0].max(2)[0]

        #  prediction_dict['rcnn_cls_probs'] = rcnn_cls_probs
        prediction_dict['rcnn_bbox_preds'] = rcnn_bbox_preds
        prediction_dict['rcnn_cls_scores'] = rcnn_cls_scores

        # used for track
        proposals_order = prediction_dict['proposals_order']
        prediction_dict['second_rpn_anchors'] = prediction_dict['anchors'][0][
            proposals_order]

        return prediction_dict

    #  def unloaded_parameters(self):
    #  return ['rcnn_cls_pred.bias', 'rcnn_cls_pred.weight']
    def generate_channel_attention(self, feat):
        return feat.mean(3, keepdim=True).mean(2, keepdim=True)

    def generate_spatial_attention(self, feat):
        return self.spatial_attention(feat)

    def init_weights(self):
        # submodule init weights
        self.feature_extractor.init_weights()
        self.rpn_model.init_weights()

        Filler.normal_init(self.rcnn_cls_pred, 0, 0.01, self.truncated)
        Filler.normal_init(self.rcnn_bbox_pred, 0, 0.001, self.truncated)

    def init_modules(self):
        self.feature_extractor = ResNetFeatureExtractor(
            self.feature_extractor_config)
        self.rpn_model = RPNModel(self.rpn_config)
        self.rcnn_pooling = RoIAlignAvg(self.pooling_size, self.pooling_size,
                                        1.0 / 16.0)
        # self.rcnn_cls_pred = nn.Conv2d(2048, self.n_classes, 3, 1, 1)
        self.rcnn_cls_pred = nn.Linear(2048, self.n_classes)
        self.rcnn_bbox_pred = nn.Conv2d(2048, 4, 3, 1, 1)
        # if self.class_agnostic:
        # self.rcnn_bbox_pred = nn.Linear(2048, 4)
        # else:
        # self.rcnn_bbox_pred = nn.Linear(2048, 4 * self.n_classes)

        # loss module
        # if self.use_focal_loss:
        # self.rcnn_cls_loss = FocalLoss(2)
        # else:
        # self.rcnn_cls_loss = functools.partial(
        # F.cross_entropy, reduce=False)
        self.rcnn_cls_loss = nn.MSELoss(reduce=False)

        self.rcnn_bbox_loss = nn.modules.SmoothL1Loss(reduce=False)

        # attention
        self.spatial_attention = nn.Conv2d(2048, 1, 3, 1, 1)

    def init_param(self, model_config):
        classes = model_config['classes']
        self.classes = classes
        self.n_classes = len(classes)
        self.class_agnostic = model_config['class_agnostic']
        self.pooling_size = model_config['pooling_size']
        self.pooling_mode = model_config['pooling_mode']
        self.crop_resize_with_max_pool = model_config[
            'crop_resize_with_max_pool']
        self.truncated = model_config['truncated']
        self.use_self_attention = model_config.get('use_self_attention')

        self.use_focal_loss = model_config['use_focal_loss']
        self.subsample_twice = model_config['subsample_twice']
        self.rcnn_batch_size = model_config['rcnn_batch_size']

        # some submodule config
        self.feature_extractor_config = model_config[
            'feature_extractor_config']
        self.rpn_config = model_config['rpn_config']

        # assigner
        self.target_assigner = TargetAssigner(
            model_config['target_assigner_config'])

        # sampler
        # self.bbox_sampler = DetectionSampler({'fg_fraction': 0.5})
        # self.bbox_sampler = HardNegativeSampler({'fg_fraction': 1})
        # self.bbox_sampler = BalancedSampler({'fg_fraction': 1})
        # self.iou_sampler = BalancedSampler(model_config['sampler_config'])
        self.sampler = DetectionSampler({'fg_fraction': 1})

    def loss(self, prediction_dict, feed_dict):
        """
        assign proposals label and subsample from them
        Then calculate loss
        """
        # import ipdb
        # ipdb.set_trace()
        loss_dict = {}

        # submodule loss
        loss_dict.update(self.rpn_model.loss(prediction_dict, feed_dict))

        rois_batch = prediction_dict['rois_batch']
        gt_boxes = feed_dict['gt_boxes']
        gt_labels = feed_dict['gt_labels']

        ##########################
        # assigner
        ##########################
        rcnn_cls_targets, rcnn_reg_targets, rcnn_cls_weights, rcnn_reg_weights = self.target_assigner.assign(
            rois_batch[:, :, 1:], gt_boxes, gt_labels)

        # bounding box regression L1 loss
        rcnn_bbox_preds = prediction_dict['rcnn_bbox_preds']
        rcnn_bbox_loss = self.rcnn_bbox_loss(rcnn_bbox_preds,
                                             rcnn_reg_targets[0]).sum(dim=-1)
        reg_criterion = rcnn_reg_weights * rcnn_bbox_loss
        # reg_criterion = self.target_assigner.matcher.assigned_overlaps_batch

        # bbox subsample
        pos_indicator = rcnn_reg_weights > 0
        bbox_batch_sampled_mask = self.sampler.subsample_batch(
            self.rcnn_batch_size, pos_indicator, criterion=reg_criterion)

        rcnn_reg_weights *= bbox_batch_sampled_mask.type_as(rcnn_reg_weights)
        num_reg_coeff = (rcnn_reg_weights > 0).sum(dim=-1)
        assert num_reg_coeff, 'bug happens'
        rcnn_bbox_loss *= rcnn_reg_weights[0]
        rcnn_bbox_loss = rcnn_bbox_loss.sum(dim=-1) / num_reg_coeff.float()

        # classification loss
        #  rcnn_cls_scores = prediction_dict['rcnn_cls_probs'][:, 1]
        #  rcnn_cls_loss = self.rcnn_cls_loss(rcnn_cls_scores,
        #  rcnn_cls_targets[0])

        # cls subsample
        #  cls_criterion = rcnn_cls_loss * rcnn_cls_weights
        #  indicator = rcnn_cls_weights > 0
        #  pos_indicator = indicator
        #  cls_batch_sampled_mask = self.sampler.subsample_batch(
        #  self.rcnn_batch_size,
        #  pos_indicator,
        #  criterion=cls_criterion,
        #  indicator=indicator)

        #  cls_batch_sampled_mask |= rcnn_reg_weights.type_as(
        #  cls_batch_sampled_mask)
        #  rcnn_cls_weights *= cls_batch_sampled_mask.type_as(rcnn_cls_weights)
        #  num_cls_coeff = (rcnn_cls_weights > 0).sum(dim=-1)
        #  assert num_cls_coeff, 'bug happens'
        #  rcnn_cls_loss *= rcnn_cls_weights[0]
        #  rcnn_cls_loss = rcnn_cls_loss.sum(dim=-1) / num_cls_coeff.float()

        # loss weights has no gradients
        #  loss_dict['rcnn_cls_loss'] = rcnn_cls_loss
        loss_dict['rcnn_bbox_loss'] = rcnn_bbox_loss

        prediction_dict['rcnn_reg_weights'] = rcnn_reg_weights

        return loss_dict
class ThreeIoUFasterRCNN(Model):
    def forward(self, feed_dict):
        # import ipdb
        # ipdb.set_trace()

        prediction_dict = {}

        # base model
        base_feat = self.feature_extractor.first_stage_feature(
            feed_dict['img'])
        feed_dict.update({'base_feat': base_feat})
        # batch_size = base_feat.shape[0]

        # rpn model
        prediction_dict.update(self.rpn_model.forward(feed_dict))

        # proposals = prediction_dict['proposals_batch']
        # shape(N,num_proposals,5)
        # pre subsample for reduce consume of memory
        if self.training:
            self.pre_subsample(prediction_dict, feed_dict)
        rois_batch = prediction_dict['rois_batch']

        # note here base_feat (N,C,H,W),rois_batch (N,num_proposals,5)
        pooled_feat = self.rcnn_pooling(base_feat, rois_batch.view(-1, 5))

        # shape(N,C,1,1)
        pooled_feat = self.feature_extractor.second_stage_feature(pooled_feat)
        ########################################
        # semantic map
        ########################################
        rcnn_cls_scores_map = self.rcnn_cls_pred(pooled_feat)
        rcnn_cls_scores = rcnn_cls_scores_map.mean(3).mean(2)
        saliency_map = F.softmax(rcnn_cls_scores_map, dim=1)
        rcnn_cls_probs = F.softmax(rcnn_cls_scores, dim=1)
        # shape(N,C)
        rcnn_bbox_feat = pooled_feat * saliency_map[:, 1:, :, :]
        rcnn_bbox_feat = rcnn_bbox_feat.mean(3).mean(2)

        rcnn_bbox_preds = self.rcnn_bbox_pred(rcnn_bbox_feat)
        # shape(N,C)
        # pooled_feat = pooled_feat.mean(3).mean(2)

        # rcnn_bbox_preds = self.rcnn_bbox_pred(pooled_feat)
        # rcnn_cls_scores = self.rcnn_cls_pred(pooled_feat)

        # rcnn_cls_probs = F.softmax(rcnn_cls_scores, dim=1)

        prediction_dict['rcnn_cls_probs'] = rcnn_cls_probs
        prediction_dict['rcnn_bbox_preds'] = rcnn_bbox_preds
        prediction_dict['rcnn_cls_scores'] = rcnn_cls_scores

        # used for track
        proposals_order = prediction_dict['proposals_order']
        prediction_dict['second_rpn_anchors'] = prediction_dict['anchors'][0][
            proposals_order]

        return prediction_dict

    def init_weights(self):
        # submodule init weights
        self.feature_extractor.init_weights()
        self.rpn_model.init_weights()

        Filler.normal_init(self.rcnn_cls_pred, 0, 0.01, self.truncated)
        Filler.normal_init(self.rcnn_bbox_pred, 0, 0.001, self.truncated)

    def init_modules(self):
        self.feature_extractor = ResNetFeatureExtractor(
            self.feature_extractor_config)
        self.rpn_model = RPNModel(self.rpn_config)
        self.rcnn_pooling = RoIAlignAvg(self.pooling_size, self.pooling_size,
                                        1.0 / 16.0)
        self.rcnn_cls_pred = nn.Conv2d(2048, self.n_classes, 3, 1, 1)
        # self.rcnn_cls_pred = nn.Linear(2048, self.n_classes)
        if self.class_agnostic:
            self.rcnn_bbox_pred = nn.Linear(2048, 4)
        else:
            self.rcnn_bbox_pred = nn.Linear(2048, 4 * self.n_classes)

        # loss module
        # if self.use_focal_loss:
        # self.rcnn_cls_loss = FocalLoss(2)
        # else:
        # self.rcnn_cls_loss = functools.partial(
        # F.cross_entropy, reduce=False)
        self.rcnn_cls_loss = nn.MSELoss(reduce=False)

        self.rcnn_bbox_loss = nn.modules.SmoothL1Loss(reduce=False)

    def init_param(self, model_config):
        classes = model_config['classes']
        self.classes = classes
        self.n_classes = len(classes)
        self.class_agnostic = model_config['class_agnostic']
        self.pooling_size = model_config['pooling_size']
        self.pooling_mode = model_config['pooling_mode']
        self.crop_resize_with_max_pool = model_config[
            'crop_resize_with_max_pool']
        self.truncated = model_config['truncated']

        self.use_focal_loss = model_config['use_focal_loss']
        self.subsample_twice = model_config['subsample_twice']
        self.rcnn_batch_size = model_config['rcnn_batch_size']

        # some submodule config
        self.feature_extractor_config = model_config[
            'feature_extractor_config']
        self.rpn_config = model_config['rpn_config']

        # assigner
        self.target_assigner = TargetAssigner(
            model_config['target_assigner_config'])

        # sampler
        # self.bbox_sampler = DetectionSampler({'fg_fraction': 0.5})
        self.bbox_sampler = HardNegativeSampler({'fg_fraction': 1})
        self.iou_sampler = BalancedSampler(model_config['sampler_config'])

    def pre_subsample(self, prediction_dict, feed_dict):
        # import ipdb
        # ipdb.set_trace()
        rois_batch = prediction_dict['rois_batch']
        gt_boxes = feed_dict['gt_boxes']
        gt_labels = feed_dict['gt_labels']

        ##########################
        # assigner
        ##########################
        #  import ipdb
        #  ipdb.set_trace()
        rcnn_cls_targets, rcnn_reg_targets, rcnn_cls_weights, rcnn_reg_weights = self.target_assigner.assign(
            rois_batch[:, :, 1:], gt_boxes, gt_labels)

        ##########################
        # double subsampler
        ##########################

        cls_criterion = None

        # bbox subsample
        pos_indicator = rcnn_reg_weights > 0
        cls_criterion = self.target_assigner.matcher.assigned_overlaps_batch
        bbox_batch_sampled_mask = self.bbox_sampler.subsample_batch(
            self.rcnn_batch_size, pos_indicator, criterion=cls_criterion)
        # rcnn_cls_weights = rcnn_cls_weights[bbox_batch_sampled_mask]
        # num_cls_coeff = (rcnn_cls_weights > 0).sum(dim=-1)
        # ignore when bbox loss is not necessary
        bbox_batch_sampled_mask[rcnn_reg_weights == 0] = 0
        rcnn_reg_weights = rcnn_reg_weights[bbox_batch_sampled_mask]
        num_reg_coeff = (rcnn_reg_weights > 0).sum(dim=-1)
        assert num_reg_coeff, 'bug happens'

        # iou subsample
        # balanced subsample
        pos_indicator = rcnn_cls_targets > 0
        iou_batch_sampled_mask = self.iou_sampler.subsample_batch(
            self.rcnn_batch_size, pos_indicator)
        # rcnn_cls_weights = rcnn_cls_weights[iou_batch_sampled_mask]
        # num_cls_coeff = (rcnn_cls_weights > 0).sum(dim=-1)

        # check

        # make sure iou optimized when bbox optimized
        iou_batch_sampled_mask |= bbox_batch_sampled_mask
        rcnn_cls_weights = rcnn_cls_weights[iou_batch_sampled_mask]
        num_cls_coeff = (rcnn_cls_weights > 0).sum(dim=-1)
        assert num_cls_coeff, 'bug happens'

        batch_sampled_mask = iou_batch_sampled_mask

        prediction_dict[
            'rcnn_cls_weights'] = rcnn_cls_weights / num_cls_coeff.float()
        prediction_dict[
            'rcnn_reg_weights'] = rcnn_reg_weights / num_reg_coeff.float()
        prediction_dict['rcnn_cls_targets'] = rcnn_cls_targets[
            iou_batch_sampled_mask]
        prediction_dict['rcnn_reg_targets'] = rcnn_reg_targets[
            bbox_batch_sampled_mask]

        # update rois_batch
        prediction_dict['rois_batch'] = rois_batch[batch_sampled_mask].view(
            rois_batch.shape[0], -1, 5)

        if not self.training:
            # used for track
            proposals_order = prediction_dict['proposals_order']

            prediction_dict['proposals_order'] = proposals_order[
                batch_sampled_mask]
        # prediction_dict['iou_batch_sampled_mask'] = iou_batch_sampled_mask
        prediction_dict['bbox_batch_sampled_mask'] = bbox_batch_sampled_mask[
            batch_sampled_mask]

    def loss(self, prediction_dict, feed_dict):
        """
        assign proposals label and subsample from them
        Then calculate loss
        """
        loss_dict = {}

        # submodule loss
        loss_dict.update(self.rpn_model.loss(prediction_dict, feed_dict))

        # targets and weights
        rcnn_cls_weights = prediction_dict['rcnn_cls_weights']
        rcnn_reg_weights = prediction_dict['rcnn_reg_weights']

        rcnn_cls_targets = prediction_dict['rcnn_cls_targets']
        rcnn_reg_targets = prediction_dict['rcnn_reg_targets']

        # mask
        # iou_batch_sampled_mask = prediction_dict['iou_batch_sampled_mask']
        bbox_batch_sampled_mask = prediction_dict['bbox_batch_sampled_mask']

        # classification loss
        rcnn_cls_scores = prediction_dict['rcnn_cls_probs'][:, 1]
        # exp
        rcnn_cls_scores = torch.exp(rcnn_cls_scores)
        rcnn_cls_targets = torch.exp(rcnn_cls_targets)
        rcnn_cls_loss = self.rcnn_cls_loss(rcnn_cls_scores, rcnn_cls_targets)
        rcnn_cls_loss *= rcnn_cls_weights
        rcnn_cls_loss = rcnn_cls_loss.sum(dim=-1)

        # bounding box regression L1 loss
        rcnn_bbox_preds = prediction_dict['rcnn_bbox_preds']
        rcnn_bbox_preds = rcnn_bbox_preds[bbox_batch_sampled_mask]
        rcnn_bbox_loss = self.rcnn_bbox_loss(rcnn_bbox_preds,
                                             rcnn_reg_targets).sum(dim=-1)
        rcnn_bbox_loss *= rcnn_reg_weights
        # rcnn_bbox_loss *= rcnn_reg_weights
        rcnn_bbox_loss = rcnn_bbox_loss.sum(dim=-1)

        # loss weights has no gradients
        loss_dict['rcnn_cls_loss'] = rcnn_cls_loss
        loss_dict['rcnn_bbox_loss'] = rcnn_bbox_loss

        return loss_dict
class OrgOHEMThreeIoUSecondStageFasterRCNN(Model):
    def forward(self, feed_dict):
        # import ipdb
        # ipdb.set_trace()

        prediction_dict = {}

        # base model
        base_feat = self.feature_extractor.first_stage_feature(
            feed_dict['img'])
        feed_dict.update({'base_feat': base_feat})
        # batch_size = base_feat.shape[0]

        # rpn model
        prediction_dict.update(self.rpn_model.forward(feed_dict))

        rois_batch = prediction_dict['rois_batch']

        # note here base_feat (N,C,H,W),rois_batch (N,num_proposals,5)
        pooled_feat = self.rcnn_pooling(base_feat, rois_batch.view(-1, 5))

        # shape(N,C,1,1)
        pooled_feat_reg = self.feature_extractor.second_stage_feature(
            pooled_feat)
        ########################################
        # semantic map
        ########################################
        # no necessary for iou
        # pooled_feat_reg = pooled_feat_reg.mean(3).mean(2)

        pooled_feat_cls = self.feature_extractor.third_stage_feature(
            pooled_feat)
        pooled_feat_cls = pooled_feat_cls.mean(3).mean(2)

        rcnn_cls_scores = self.rcnn_cls_pred(pooled_feat_cls)
        # rcnn_cls_scores = rcnn_cls_scores_map.mean(3).mean(2)
        # saliency_map = F.softmax(rcnn_cls_scores_map, dim=1)
        rcnn_cls_probs = F.softmax(rcnn_cls_scores, dim=1)

        pooled_feat_reg = pooled_feat_reg.mean(3).mean(2)
        rcnn_bbox_preds = self.rcnn_bbox_pred(pooled_feat_reg)

        prediction_dict['rcnn_cls_probs'] = rcnn_cls_probs
        prediction_dict['rcnn_bbox_preds'] = rcnn_bbox_preds
        prediction_dict['rcnn_cls_scores'] = rcnn_cls_scores

        # used for track
        proposals_order = prediction_dict['proposals_order']
        prediction_dict['second_rpn_anchors'] = prediction_dict['anchors'][0][
            proposals_order]

        return prediction_dict

    def unfreeze_part_modules(self):
        model = self.feature_extractor.third_stage_feature
        for param in model.parameters():
            param.requires_grad = True

    def init_weights(self):
        # submodule init weights
        self.feature_extractor.init_weights()
        self.rpn_model.init_weights()

        Filler.normal_init(self.rcnn_cls_pred, 0, 0.01, self.truncated)
        Filler.normal_init(self.rcnn_bbox_pred, 0, 0.001, self.truncated)

        self.freeze_modules()
        self.unfreeze_part_modules()

    def init_modules(self):
        self.feature_extractor = ResNetFeatureExtractor(
            self.feature_extractor_config)
        self.rpn_model = RPNModel(self.rpn_config)
        self.rcnn_pooling = RoIAlignAvg(self.pooling_size, self.pooling_size,
                                        1.0 / 16.0)
        # self.rcnn_cls_pred = nn.Conv2d(2048, self.n_classes, 3, 1, 1)
        # import ipdb
        # ipdb.set_trace()
        self.rcnn_cls_pred = nn.Linear(2048, self.n_classes)
        #  self.rcnn_bbox_pred = nn.Conv2d(2048, 4, 3, 1, 1)
        # if self.class_agnostic:
        self.rcnn_bbox_pred = nn.Linear(2048, 4)
        # else:
        # self.rcnn_bbox_pred = nn.Linear(2048, 4 * self.n_classes)

        # loss module
        # if self.use_focal_loss:
        # self.rcnn_cls_loss = FocalLoss(2)
        # else:
        # self.rcnn_cls_loss = functools.partial(
        # F.cross_entropy, reduce=False)
        self.rcnn_cls_loss = nn.MSELoss(reduce=False)

        self.rcnn_bbox_loss = nn.modules.SmoothL1Loss(reduce=False)

        # attention
        self.spatial_attention = nn.Conv2d(2048, 1, 3, 1, 1)

    def init_param(self, model_config):
        classes = model_config['classes']
        self.classes = classes
        self.n_classes = len(classes)
        self.class_agnostic = model_config['class_agnostic']
        self.pooling_size = model_config['pooling_size']
        self.pooling_mode = model_config['pooling_mode']
        self.crop_resize_with_max_pool = model_config[
            'crop_resize_with_max_pool']
        self.truncated = model_config['truncated']
        self.use_self_attention = model_config.get('use_self_attention')

        self.use_focal_loss = model_config['use_focal_loss']
        self.subsample_twice = model_config['subsample_twice']
        self.rcnn_batch_size = model_config['rcnn_batch_size']

        # some submodule config
        self.feature_extractor_config = model_config[
            'feature_extractor_config']
        self.rpn_config = model_config['rpn_config']

        # assigner
        self.target_assigner = TargetAssigner(
            model_config['target_assigner_config'])

        # sampler
        # self.bbox_sampler = DetectionSampler({'fg_fraction': 0.5})
        # self.bbox_sampler = HardNegativeSampler({'fg_fraction': 1})
        # self.bbox_sampler = BalancedSampler({'fg_fraction': 1})
        # self.iou_sampler = BalancedSampler(model_config['sampler_config'])
        self.sampler = DetectionSampler({'fg_fraction': 1})

    def loss(self, prediction_dict, feed_dict):
        """
        assign proposals label and subsample from them
        Then calculate loss
        """
        # import ipdb
        # ipdb.set_trace()
        loss_dict = {}

        # submodule loss
        loss_dict.update(self.rpn_model.loss(prediction_dict, feed_dict))

        rois_batch = prediction_dict['rois_batch']
        gt_boxes = feed_dict['gt_boxes']
        gt_labels = feed_dict['gt_labels']

        ##########################
        # assigner
        ##########################
        rcnn_cls_targets, rcnn_reg_targets, rcnn_cls_weights, rcnn_reg_weights = self.target_assigner.assign(
            rois_batch[:, :, 1:], gt_boxes, gt_labels)

        # bounding box regression L1 loss
        #  rcnn_bbox_preds = prediction_dict['rcnn_bbox_preds']
        #  rcnn_bbox_loss = self.rcnn_bbox_loss(rcnn_bbox_preds,
        #  rcnn_reg_targets[0]).sum(dim=-1)
        #  reg_criterion = rcnn_reg_weights * rcnn_bbox_loss
        # reg_criterion = self.target_assigner.matcher.assigned_overlaps_batch

        # bbox subsample
        #  pos_indicator = rcnn_reg_weights > 0
        #  bbox_batch_sampled_mask = self.sampler.subsample_batch(
        #  self.rcnn_batch_size, pos_indicator, criterion=reg_criterion)

        #  rcnn_reg_weights *= bbox_batch_sampled_mask.type_as(rcnn_reg_weights)
        #  num_reg_coeff = (rcnn_reg_weights > 0).sum(dim=-1)
        #  assert num_reg_coeff, 'bug happens'
        #  rcnn_bbox_loss *= rcnn_reg_weights[0]
        #  rcnn_bbox_loss = rcnn_bbox_loss.sum(dim=-1) / num_reg_coeff.float()

        # classification loss
        rcnn_cls_scores = prediction_dict['rcnn_cls_probs'][:, 1]
        rcnn_cls_loss = self.rcnn_cls_loss(rcnn_cls_scores,
                                           rcnn_cls_targets[0])

        # cls subsample
        cls_criterion = rcnn_cls_loss * rcnn_cls_weights
        indicator = rcnn_cls_weights > 0
        pos_indicator = indicator
        cls_batch_sampled_mask = self.sampler.subsample_batch(
            self.rcnn_batch_size,
            pos_indicator,
            criterion=cls_criterion,
            indicator=indicator)

        #  cls_batch_sampled_mask |= rcnn_reg_weights.type_as(
        #  cls_batch_sampled_mask)
        rcnn_cls_weights *= cls_batch_sampled_mask.type_as(rcnn_cls_weights)
        num_cls_coeff = (rcnn_cls_weights > 0).sum(dim=-1)
        assert num_cls_coeff, 'bug happens'
        rcnn_cls_loss *= rcnn_cls_weights[0]
        rcnn_cls_loss = rcnn_cls_loss.sum(dim=-1) / num_cls_coeff.float()

        # loss weights has no gradients
        loss_dict['rcnn_cls_loss'] = rcnn_cls_loss
        #  loss_dict['rcnn_bbox_loss'] = rcnn_bbox_loss

        # analysis precision
        #  import ipdb
        #  ipdb.set_trace()
        rcnn_cls_probs = prediction_dict['rcnn_cls_probs']
        fake_match = self.target_assigner.analyzer.match
        num_gt = feed_dict['gt_labels'].numel()
        self.target_assigner.analyzer.analyze_ap(fake_match,
                                                 rcnn_cls_probs[:, 1],
                                                 num_gt,
                                                 thresh=0.5)
        prediction_dict['rcnn_reg_weights'] = rcnn_reg_weights

        return loss_dict
Пример #6
0
class RPNModel(Model):
    def init_param(self, model_config):
        self.in_channels = model_config['din']
        self.post_nms_topN = model_config['post_nms_topN']
        self.pre_nms_topN = model_config['pre_nms_topN']
        self.nms_thresh = model_config['nms_thresh']
        self.use_score = model_config['use_score']
        self.rpn_batch_size = model_config['rpn_batch_size']
        self.use_focal_loss = model_config['use_focal_loss']

        # sampler
        # self.bbox_sampler = HardNegativeSampler({'fg_fraction': 1})
        # self.iou_sampler = BalancedSampler(model_config['sampler_config'])
        # self.iou_sampler = HardNegativeSampler(model_config['sampler_config'])
        # self.iou_sampler = DetectionSampler(model_config['sampler_config'])
        self.sampler = DetectionSampler({'fg_fraction': 1})

        # anchor generator
        self.anchor_generator = AnchorGenerator(
            model_config['anchor_generator_config'])
        self.num_anchors = self.anchor_generator.num_anchors
        self.nc_bbox_out = 4 * self.num_anchors
        self.nc_score_out = self.num_anchors * 2

        # target assigner
        self.target_assigner = TargetAssigner(
            model_config['target_assigner_config'])

        # bbox coder
        self.bbox_coder = self.target_assigner.bbox_coder

    def init_weights(self):
        self.truncated = False

        Filler.normal_init(self.rpn_conv, 0, 0.01, self.truncated)
        Filler.normal_init(self.rpn_cls_score, 0, 0.01, self.truncated)
        Filler.normal_init(self.rpn_bbox_pred, 0, 0.01, self.truncated)

    def init_modules(self):
        # define the convrelu layers processing input feature map
        self.rpn_conv = nn.Conv2d(self.in_channels, 512, 3, 1, 1, bias=True)

        # define bg/fg classifcation score layer
        self.rpn_cls_score = nn.Conv2d(512, self.nc_score_out, 1, 1, 0)

        # define anchor box offset prediction layer

        if self.use_score:
            bbox_feat_channels = 512 + 2
            self.nc_bbox_out /= self.num_anchors
        else:
            bbox_feat_channels = 512
        self.rpn_bbox_pred = nn.Conv2d(bbox_feat_channels, self.nc_bbox_out, 1,
                                       1, 0)

        # bbox
        self.rpn_bbox_loss = nn.modules.loss.SmoothL1Loss(reduce=False)

        # cls
        # if self.use_focal_loss:
        # self.rpn_cls_loss = FocalLoss(2)
        # else:
        # self.rpn_cls_loss = functools.partial(
        # F.cross_entropy, reduce=False)
        self.rpn_cls_loss = nn.MSELoss(reduce=False)

    def generate_proposal(self, rpn_cls_probs, anchors, rpn_bbox_preds,
                          im_info):
        # TODO create a new Function
        """
        Args:
        rpn_cls_probs: FloatTensor,shape(N,2*num_anchors,H,W)
        rpn_bbox_preds: FloatTensor,shape(N,num_anchors*4,H,W)
        anchors: FloatTensor,shape(N,4,H,W)

        Returns:
        proposals_batch: FloatTensor, shape(N,post_nms_topN,4)
        fg_probs_batch: FloatTensor, shape(N,post_nms_topN)
        """
        # assert len(
        # rpn_bbox_preds) == 1, 'just one feature maps is supported now'
        # rpn_bbox_preds = rpn_bbox_preds[0]
        anchors = anchors[0]
        # do not backward
        anchors = anchors
        rpn_cls_probs = rpn_cls_probs.detach()
        rpn_bbox_preds = rpn_bbox_preds.detach()

        batch_size = rpn_bbox_preds.shape[0]
        rpn_bbox_preds = rpn_bbox_preds.permute(0, 2, 3, 1).contiguous()
        # shape(N,H*W*num_anchors,4)
        rpn_bbox_preds = rpn_bbox_preds.view(batch_size, -1, 4)
        # apply deltas to anchors to decode
        # loop here due to many features maps
        # proposals = []
        # for rpn_bbox_preds_single_map, anchors_single_map in zip(
        # rpn_bbox_preds, anchors):
        # proposals.append(
        # self.bbox_coder.decode(rpn_bbox_preds_single_map,
        # anchors_single_map))
        # proposals = torch.cat(proposals, dim=1)

        proposals = self.bbox_coder.decode_batch(rpn_bbox_preds, anchors)

        # filer and clip
        proposals = box_ops.clip_boxes(proposals, im_info)

        # fg prob
        fg_probs = rpn_cls_probs[:, self.num_anchors:, :, :]
        fg_probs = fg_probs.permute(0, 2, 3,
                                    1).contiguous().view(batch_size, -1)

        # sort fg
        _, fg_probs_order = torch.sort(fg_probs, dim=1, descending=True)

        # fg_probs_batch = torch.zeros(batch_size,
        # self.post_nms_topN).type_as(rpn_cls_probs)
        proposals_batch = torch.zeros(batch_size, self.post_nms_topN,
                                      4).type_as(rpn_bbox_preds)
        proposals_order = torch.zeros(
            batch_size, self.post_nms_topN).fill_(-1).type_as(fg_probs_order)

        for i in range(batch_size):
            proposals_single = proposals[i]
            fg_probs_single = fg_probs[i]
            fg_order_single = fg_probs_order[i]
            # pre nms
            if self.pre_nms_topN > 0:
                fg_order_single = fg_order_single[:self.pre_nms_topN]
            proposals_single = proposals_single[fg_order_single]
            fg_probs_single = fg_probs_single[fg_order_single]

            # nms
            keep_idx_i = nms(
                torch.cat((proposals_single, fg_probs_single.unsqueeze(1)), 1),
                self.nms_thresh)
            keep_idx_i = keep_idx_i.long().view(-1)

            # post nms
            if self.post_nms_topN > 0:
                keep_idx_i = keep_idx_i[:self.post_nms_topN]
            proposals_single = proposals_single[keep_idx_i, :]
            fg_probs_single = fg_probs_single[keep_idx_i]
            fg_order_single = fg_order_single[keep_idx_i]

            # padding 0 at the end.
            num_proposal = keep_idx_i.numel()
            proposals_batch[i, :num_proposal, :] = proposals_single
            # fg_probs_batch[i, :num_proposal] = fg_probs_single
            proposals_order[i, :num_proposal] = fg_order_single
        return proposals_batch, proposals_order

    def forward(self, bottom_blobs):
        base_feat = bottom_blobs['base_feat']
        batch_size = base_feat.shape[0]
        gt_boxes = bottom_blobs['gt_boxes']
        im_info = bottom_blobs['im_info']

        # rpn conv
        rpn_conv = F.relu(self.rpn_conv(base_feat), inplace=True)

        # rpn cls score
        # shape(N,2*num_anchors,H,W)
        rpn_cls_scores = self.rpn_cls_score(rpn_conv)

        # rpn cls prob shape(N,2*num_anchors,H,W)
        rpn_cls_score_reshape = rpn_cls_scores.view(batch_size, 2, -1)
        rpn_cls_probs = F.softmax(rpn_cls_score_reshape, dim=1)
        rpn_cls_probs = rpn_cls_probs.view_as(rpn_cls_scores)
        # import ipdb
        # ipdb.set_trace()

        # rpn bbox pred
        # shape(N,4*num_anchors,H,W)
        if self.use_score:
            # shape (N,2,num_anchoros*H*W)
            rpn_cls_scores = rpn_cls_score_reshape.permute(0, 2, 1)
            rpn_bbox_preds = []
            for i in range(self.num_anchors):
                rpn_bbox_feat = torch.cat(
                    [rpn_conv, rpn_cls_scores[:, ::self.num_anchors, :, :]],
                    dim=1)
                rpn_bbox_preds.append(self.rpn_bbox_pred(rpn_bbox_feat))
            rpn_bbox_preds = torch.cat(rpn_bbox_preds, dim=1)
        else:
            # get rpn offsets to the anchor boxes
            rpn_bbox_preds = self.rpn_bbox_pred(rpn_conv)
            # rpn_bbox_preds = [rpn_bbox_preds]

        # generate anchors
        feature_map_list = [base_feat.size()[-2:]]
        anchors = self.anchor_generator.generate(feature_map_list)

        ###############################
        # Proposal
        ###############################
        # note that proposals_order is used for track transform of propsoals
        proposals_batch, proposals_order = self.generate_proposal(
            rpn_cls_probs, anchors, rpn_bbox_preds, im_info)
        batch_idx = torch.arange(batch_size).view(batch_size, 1).expand(
            -1, proposals_batch.shape[1]).type_as(proposals_batch)
        rois_batch = torch.cat((batch_idx.unsqueeze(-1), proposals_batch),
                               dim=2)

        if self.training:
            rois_batch = self.append_gt(rois_batch, gt_boxes)

        # postprocess
        rpn_cls_probs = rpn_cls_probs.view(batch_size, 2, -1,
                                           rpn_cls_probs.shape[2],
                                           rpn_cls_probs.shape[3])
        rpn_cls_probs = rpn_cls_probs.permute(0, 3, 4, 2, 1).contiguous().view(
            batch_size, -1, 2)
        predict_dict = {
            'proposals_batch': proposals_batch,
            'rois_batch': rois_batch,
            'anchors': anchors,

            # used for loss
            'rpn_bbox_preds': rpn_bbox_preds,
            'rpn_cls_probs': rpn_cls_probs,
            'proposals_order': proposals_order,
        }

        return predict_dict

    def append_gt(self, rois_batch, gt_boxes):
        ################################
        # append gt_boxes to rois_batch for losses
        ################################
        # may be some bugs here
        gt_boxes_append = torch.zeros(gt_boxes.shape[0], gt_boxes.shape[1],
                                      5).type_as(gt_boxes)
        gt_boxes_append[:, :, 1:5] = gt_boxes[:, :, :4]
        # cat gt_boxes to rois_batch
        rois_batch = torch.cat([rois_batch, gt_boxes_append], dim=1)
        return rois_batch

    def loss(self, prediction_dict, feed_dict):
        # loss for cls
        loss_dict = {}

        gt_boxes = feed_dict['gt_boxes']

        anchors = prediction_dict['anchors']

        assert len(anchors) == 1, 'just one feature maps is supported now'
        anchors = anchors[0]

        #################################
        # target assigner
        ################################
        # no need gt labels here,it just a binary classifcation problem
        #  import ipdb
        #  ipdb.set_trace()
        rpn_cls_targets, rpn_reg_targets, \
            rpn_cls_weights, rpn_reg_weights = \
            self.target_assigner.assign(anchors, gt_boxes, gt_labels=None)

        ################################
        # double subsample
        ################################

        # reg subsample(ohem)
        # bbox loss first
        rpn_bbox_preds = prediction_dict['rpn_bbox_preds']
        rpn_bbox_preds = rpn_bbox_preds.permute(0, 2, 3, 1).contiguous()
        # shape(N,H*W*num_anchors,4)
        rpn_bbox_preds = rpn_bbox_preds.view(rpn_bbox_preds.shape[0], -1, 4)
        rpn_reg_loss = self.rpn_bbox_loss(rpn_bbox_preds, rpn_reg_targets)
        bbox_criterion = rpn_reg_loss.sum(dim=-1)

        # ohem
        pos_indicator = rpn_reg_weights > 0
        # subsample from fg
        reg_batch_sampled_mask = self.sampler.subsample_batch(
            self.rpn_batch_size, pos_indicator, criterion=bbox_criterion)
        # reg_batch_sampled_mask = reg_batch_sampled_mask.type_as(
        # rpn_cls_weights)
        rpn_reg_weights = rpn_reg_weights * reg_batch_sampled_mask.type_as(
            rpn_cls_weights)
        num_reg_coeff = (rpn_reg_weights > 0).sum(dim=1)

        if num_reg_coeff == 0:
            num_reg_coeff = torch.ones([]).type_as(num_reg_coeff)

        # loss weight
        rpn_reg_loss *= rpn_reg_weights.unsqueeze(-1).expand(-1, -1, 4)
        rpn_reg_loss = rpn_reg_loss.view(
            rpn_reg_loss.shape[0], -1).sum(dim=1) / num_reg_coeff.float()

        # cls loss
        rpn_cls_probs = prediction_dict['rpn_cls_probs'][:, :, 1]
        # rpn_cls_loss = self.rpn_cls_loss(rpn_cls_score, rpn_cls_targets)
        # rpn_cls_probs = torch.exp(rpn_cls_probs)
        # pos_indicator = rpn_cls_targets > 0
        # rpn_cls_targets = torch.exp(rpn_cls_targets)
        rpn_cls_loss = self.rpn_cls_loss(rpn_cls_probs.view(-1),
                                         rpn_cls_targets.view(-1))
        rpn_cls_loss = rpn_cls_loss.view_as(rpn_cls_weights)

        # iou subsample(ohem)
        #  import ipdb
        #  ipdb.set_trace()
        # subsample from all
        indicator = rpn_cls_weights > 0
        pos_indicator = indicator
        iou_batch_sampled_mask = self.sampler.subsample_batch(
            self.rpn_batch_size,
            pos_indicator,
            criterion=rpn_cls_loss,
            indicator=indicator)

        # check
        #  assert num_cls_coeff, 'bug happens'
        #  assert num_reg_coeff, 'bug happens'

        # calculate iou loss after reg subsample
        # to make sure iou samples includes reg samples
        iou_batch_sampled_mask |= rpn_reg_weights.type_as(
            iou_batch_sampled_mask)
        rpn_cls_weights = rpn_cls_weights * iou_batch_sampled_mask.type_as(
            rpn_cls_weights)
        num_cls_coeff = (rpn_cls_weights > 0).sum(dim=1)
        if num_cls_coeff == 0:
            num_cls_coeff = torch.ones([]).type_as(num_cls_coeff)

        # loss weight
        rpn_cls_loss *= rpn_cls_weights
        rpn_cls_loss = rpn_cls_loss.sum(dim=1) / num_cls_coeff.float()

        loss_dict['rpn_cls_loss'] = rpn_cls_loss
        loss_dict['rpn_bbox_loss'] = rpn_reg_loss
        return loss_dict