Exemplo n.º 1
0
    def test_fast_rcnn(self):
        torch.manual_seed(132)

        box_head_output_size = 8

        box_predictor = FastRCNNOutputLayers(
            ShapeSpec(channels=box_head_output_size),
            box2box_transform=Box2BoxTransform(weights=(10, 10, 5, 5)),
            num_classes=5,
        )
        feature_pooled = torch.rand(2, box_head_output_size)
        predictions = box_predictor(feature_pooled)

        proposal_boxes = torch.tensor([[0.8, 1.1, 3.2, 2.8], [2.3, 2.5, 7, 8]],
                                      dtype=torch.float32)
        gt_boxes = torch.tensor([[1, 1, 3, 3], [2, 2, 6, 6]],
                                dtype=torch.float32)
        proposal = Instances((10, 10))
        proposal.proposal_boxes = Boxes(proposal_boxes)
        proposal.gt_boxes = Boxes(gt_boxes)
        proposal.gt_classes = torch.tensor([1, 2])

        with EventStorage():  # capture events in a new storage to discard them
            losses = box_predictor.losses(predictions, [proposal])

        expected_losses = {
            "loss_cls": torch.tensor(1.7951188087),
            "loss_box_reg": torch.tensor(4.0357131958),
        }
        for name in expected_losses.keys():
            assert torch.allclose(losses[name], expected_losses[name])
Exemplo n.º 2
0
    def test_fast_rcnn_empty_batch(self):
        box_predictor = FastRCNNOutputLayers(
            ShapeSpec(channels=10), Box2BoxTransform(weights=(10, 10, 5, 5)),
            8)

        logits = torch.randn(0, 100, requires_grad=True)
        deltas = torch.randn(0, 4, requires_grad=True)
        losses = box_predictor.losses([logits, deltas], [])
        for value in losses.values():
            self.assertTrue(torch.allclose(value, torch.zeros_like(value)))
        sum(losses.values()).backward()
        self.assertTrue(logits.grad is not None)
        self.assertTrue(deltas.grad is not None)

        predictions, _ = box_predictor.inference([logits, deltas], [])
        self.assertEqual(len(predictions), 0)
Exemplo n.º 3
0
class RelationROIHeads(Res5ROIHeads):
    def __init__(self, cfg, input_shape):
        """
        Args:
            num_ralation (int): the number of relation modules used. Each with 
            seperate parameters
        """
        super().__init__(cfg, input_shape)
        ############################### parameters #################################
        if self.training:
            self.pre_nms_dim = cfg.MODEL.RPN.PRE_NMS_TOPK_TRAIN
        else:
            self.pre_nms_dim = cfg.MODEL.RPN.PRE_NMS_TOPK_TEST
        self.num_relation = cfg.MODEL.RELATIONNET.NUM_RELATION
        self.pos_emb_dim = cfg.MODEL.RELATIONNET.POS_EMB_DIM
        self.feat_dim = cfg.MODEL.RELATIONNET.FEAT_DIM
        self.att_fc_dim = cfg.MODEL.RELATIONNET.ATT_FC_DIM
        self.att_groups = cfg.MODEL.RELATIONNET.ATT_GROUPS
        self.att_dim = cfg.MODEL.RELATIONNET.ATT_DIM
        self.pooler_resolution = cfg.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION
        self.num_reg_classes = self.num_classes = cfg.MODEL.ROI_HEADS.NUM_CLASSES
        if cfg.MODEL.ROI_BOX_HEAD.CLS_AGNOSTIC_BBOX_REG:
            self.num_reg_classes = 2
        self.device = torch.device(cfg.MODEL.DEVICE)
        self.learn_nms_train = cfg.MODEL.RELATIONNET.LEARN_NMS_TRAIN
        self.learn_nms_test = cfg.MODEL.RELATIONNET.LEARN_NMS_TEST
        self.first_n = cfg.MODEL.RELATIONNET.FIRST_N_TEST
        self.num_boxes = self.batch_size_per_image
        if self.training:
            self.first_n = cfg.MODEL.RELATIONNET.FIRST_N_TRAIN
        ############################### modules ####################################
        self.res5, self.res5_out_channels = self._build_res5_block(cfg)
        self.box_predictor = FastRCNNOutputLayers(
            cfg, ShapeSpec(channels=self.res5_out_channels, height=1, width=1))
        self.fc_feat = nn.Linear(self.res5_out_channels,
                                 self.feat_dim).to(self.device)
        self.fc = [
            nn.Linear(self.feat_dim, self.feat_dim).to(self.device)
            for i in range(2)
        ]
        self.nms_module = LearnNMSModule(cfg)
        ########################## freeze parameters ###############################
        # for block in self.res5:
        #     block.freeze()
        # for p in self.box_predictor.parameters():
        #     p.requires_grad = False
        ############################# intialization ################################
        mean, std = 0.0, 0.01
        nn.init.normal_(self.fc_feat.weight, mean, std)
        nn.init.constant_(self.fc_feat.bias, mean)
        for i in range(2):
            nn.init.normal_(self.fc[i].weight, mean, std)
            nn.init.constant_(self.fc[i].bias, mean)

    def _build_attention_module_multi_head(self):
        attention_module_multi_head = AttentionModule(
            self.att_fc_dim, self.pos_emb_dim, self.feat_dim, self.att_dim,
            self.att_groups, self.num_reg_classes, self.device)
        return attention_module_multi_head

    @torch.no_grad()
    def label_proposals(self, proposals, targets):
        proposals_with_gt = []
        self.num_boxes = np.min([len(x.proposal_boxes) for x in proposals])
        for proposals_per_image, targets_per_image in zip(proposals, targets):
            has_gt = len(targets_per_image) > 0
            _, indices = torch.sort(proposals_per_image.objectness_logits,
                                    descending=True)
            sampled_idxs = indices[:self.num_boxes]
            proposals_per_image = proposals_per_image[sampled_idxs]
            match_quality_matrix = pairwise_iou(
                targets_per_image.gt_boxes, proposals_per_image.proposal_boxes)
            matched_idxs, matched_labels = self.proposal_matcher(
                match_quality_matrix)
            gt_classes = self._label_proposals(matched_idxs, matched_labels,
                                               targets_per_image.gt_classes)
            proposals_per_image.gt_classes = gt_classes
            if has_gt:
                for (trg_name,
                     trg_value) in targets_per_image.get_fields().items():
                    if trg_name.startswith(
                            "gt_") and not proposals_per_image.has(trg_name):
                        proposals_per_image.set(trg_name,
                                                trg_value[matched_idxs])
            else:
                gt_boxes = Boxes(
                    targets_per_image.gt_boxes.tensor.new_zeros(
                        (len(sampled_idxs), 4)))
                proposals_per_image.gt_boxes = gt_boxes
            proposals_with_gt.append(proposals_per_image)
        return proposals_with_gt

    def _label_proposals(self, matched_idxs, matched_labels, gt_classes):
        has_gt = gt_classes.numel() > 0
        if has_gt:
            gt_classes = gt_classes[matched_idxs]
            gt_classes[matched_labels <= 0] = self.num_classes
            gt_classes[matched_labels == -1] = -1
        else:
            gt_classes = torch.zeros_like(matched_idxs) + self.num_classes
        return gt_classes

    def forward(self, images, features, proposals, targets=None):
        """
        Args:
            images (ImageList)
            features (dict[str,Tensor]):
                key: str like ["p2", "p3", "p4", "p5"] or ["res4"]
                value: Tensor.shape = (N, C, H, W)
            proposals (list[Instances]):
                Each Instances contains bboxes/masks/keypoints of a image. We focus on
                    - proposal_boxes: proposed bboxes in format `Boxes`
                    - objectness_logits: list[np.ndarray] each is an N sized array of 
                      objectness scores corresponding to the boxes

            targets (list[Instances], optional): length `N` list of `Instances`. The i-th
                `Instances` contains the ground-truth per-instance annotations
                for the i-th input image.  Specify `targets` during training only.
                It may have the following fields:

                - gt_boxes: the bounding box of each instance.
                - gt_classes: the label for each instance with a category ranging in [0, #class].

        Returns:
            pred_instances (list[Instances]): length `N` list of `Instances` containing the
            detected instances. Returned during inference only; may be [] during training.

            loss (dict[str->Tensor]):
            mapping from a named loss to a tensor storing the loss. Used during training only.
        """
        # TODO: index the nms_multi_target to get the corresponding "first_n"
        # complete the binary cross_entropy loss
        del images
        if self.training:
            assert targets
            proposals = self.label_proposals(proposals, targets)
        # proposal_boxes: List[Boxes]
        proposal_boxes = [x.proposal_boxes for x in proposals]
        # (all_valid_boxes, channels, outshape1, outshape2)
        box_features = self._shared_roi_transform(
            [features[f] for f in self.in_features], proposal_boxes)
        # (all_valid_boxes, channels * outshape1 * outshape2)
        # box_features = box_features.view(box_features.shape[0], -1)

        ################################ 2fc+RM Head ###############################
        # Input:
        #     box_features (Tensor): (batch_images*num_boxes, channels, outshape1, outshape2)
        #     proposal_boxes (List[Boxes]): has batch_images instances
        # Output:
        #     rois:
        #     cls_prob:
        #     bbox_pred:
        # TODO: add ground truth boxes in query

        fc_out = self.fc_feat(box_features.mean(dim=[2, 3]))

        ############################### learn nms ##################################
        #
        # Input is a set of detected objects:
        #     Each object has its final 1024-d feature, classification score s0 and bounding boxes.
        #
        # The network has three steps.
        # 1. The 1024-d feature and classification score is fused to generate the appearance feature.
        # 2. A relation module transforms such appearance features of all objects.
        # 3. The transformed features of each object pass a linear classifier and sigmoid to output
        #    the probabilit y ∈ [0, 1].

        # predictions: (cls_score, bbox_pred)
        #   - scores (Tensor): (all_valid_boxes, num_classes + 1), [0, num_classes]
        #     => num_classes indicates backgroud
        #   - proposal_deltas (Tensor): (all_valid_boxes, num_reg_classes * 4)
        predictions = self.box_predictor(box_features.mean(dim=[2, 3]))
        # do not use learn_nms
        if self.training and (not self.learn_nms_train):
            raise NoImplementationError(
                "training should set learn_nms == True!")
        elif (not self.training) and (not self.learn_nms_test):
            pred_instances, _ = self.box_predictor.inference(
                predictions, proposals)
            pred_instances = self.forward_with_given_boxes(
                features, pred_instances)
            return pred_instances, {}

        # nms_multi_score: (batch_images, first_n, num_classes, num_thresh)
        # sorted_boxes: (batch_images, first_n, num_classes, 4)
        # sorted_score: (batch_images, first_n, num_classes)
        nms_multi_score, sorted_boxes, sorted_score = self.nms_module(
            fc_out, predictions, proposal_boxes, self.num_boxes)
        # (batch_images, first_n, num_classes, num_thresh)
        nms_multi_target = self.nms_module.get_multi_target(
            sorted_boxes, targets, sorted_score)
        nms_multi_target = nms_multi_target.detach()
        del targets
        ############################# construct losses ################################
        if self.training:
            del features
            losses = self.box_predictor.losses(predictions, proposals)
            losses["loss_relation"] = self.nms_module.nms_relation_loss(
                nms_multi_score, nms_multi_target)
            return [], losses
        else:
            pred_instances = self.nms_module.relationnet_inference(
                sorted_boxes,
                nms_multi_score,
                nms_multi_target,
                image_shapes=[x.image_size for x in proposals],
            )
            pred_instances = self.forward_with_given_boxes(
                features, pred_instances)
            return pred_instances, {}
class GraphConnection(nn.Module):
    def __init__(
        self,
        cfg,
        input_shape,
    ):
        super(GraphConnection, self).__init__()
        self.cfg = cfg.clone()
        self.graph_channel = cfg.GRAPH.CHANNEL
        self.ignore_value = cfg.MODEL.SEM_SEG_HEAD.IGNORE_VALUE
        self.heads = cfg.GRAPH.HEADS
        self.stuff_out_channel = cfg.GRAPH.STUFF_OUT_CHANNEL
        self.loss_weight_stuff = cfg.MODEL.SEM_SEG_HEAD.LOSS_WEIGHT
        self.num_classes = cfg.MODEL.ROI_HEADS.NUM_CLASSES

        self.region_in_proj = nn.Linear(cfg.MODEL.ROI_BOX_HEAD.FC_DIM,
                                        self.graph_channel)
        self.stuff_in_proj = nn.Linear(cfg.MODEL.SEM_SEG_HEAD.CONVS_DIM * 4,
                                       self.graph_channel)
        weight_init.c2_xavier_fill(self.region_in_proj)
        weight_init.c2_xavier_fill(self.stuff_in_proj)

        self.graph = GAT(nfeat=self.graph_channel,
                         nhid=self.graph_channel // self.heads,
                         nclass=self.graph_channel,
                         dropout=0.1,
                         alpha=0.4,
                         nheads=self.heads)
        '''New box head'''
        self.region_out_proj = nn.Linear(self.graph_channel,
                                         self.graph_channel)
        weight_init.c2_xavier_fill(self.region_out_proj)

        # in_features = cfg.MODEL.ROI_HEADS.IN_FEATURES
        # pooler_resolution = cfg.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION
        # box_head = build_box_head(
        #     cfg, ShapeSpec(channels=256, height=pooler_resolution, width=pooler_resolution)
        # )  # TODO: hard code in the channels
        # print(box_head.output_shape)
        box_output_shape = ShapeSpec(channels=cfg.MODEL.ROI_BOX_HEAD.FC_DIM +
                                     self.graph_channel)

        self.new_box_predictor = FastRCNNOutputLayers(cfg, box_output_shape)
        '''New mask head'''
        ret_dict = self._init_mask_head(cfg, input_shape)
        self.mask_in_features = ret_dict["mask_in_features"]
        self.new_mask_pooler = ret_dict["mask_pooler"]
        self.new_mask_head = ret_dict["mask_head"]
        # weight_init.c2_xavier_fill(self.new_mask_head)
        '''New segment head'''
        self.stuff_out_proj = nn.Linear(self.graph_channel,
                                        self.stuff_out_channel)
        self.seg_score = nn.Conv2d(
            cfg.MODEL.SEM_SEG_HEAD.CONVS_DIM * 4 + self.stuff_out_channel,
            cfg.MODEL.SEM_SEG_HEAD.NUM_CLASSES, 1)
        self.upsample_rate = 4
        weight_init.c2_xavier_fill(self.stuff_out_proj)
        weight_init.c2_xavier_fill(self.seg_score)

    @classmethod
    def _init_mask_head(cls, cfg, input_shape):
        if not cfg.MODEL.MASK_ON:
            return {}
        # fmt: off
        in_features = cfg.MODEL.ROI_HEADS.IN_FEATURES
        pooler_resolution = cfg.MODEL.ROI_MASK_HEAD.POOLER_RESOLUTION
        pooler_scales = tuple(1.0 / input_shape[k].stride for k in in_features)
        sampling_ratio = cfg.MODEL.ROI_MASK_HEAD.POOLER_SAMPLING_RATIO
        pooler_type = cfg.MODEL.ROI_MASK_HEAD.POOLER_TYPE
        # fmt: on

        in_channels = [input_shape[f].channels for f in in_features][0]

        ret = {"mask_in_features": in_features}
        ret["mask_pooler"] = (ROIPooler(
            output_size=pooler_resolution,
            scales=pooler_scales,
            sampling_ratio=sampling_ratio,
            pooler_type=pooler_type,
        ) if pooler_type else None)
        if pooler_type:
            shape = ShapeSpec(channels=in_channels,
                              width=pooler_resolution,
                              height=pooler_resolution)
        else:
            shape = {f: input_shape[f] for f in in_features}
        ret["mask_head"] = build_mask_head(cfg, shape)
        return ret

    # def _forward_mask(self, features: Dict[str, torch.Tensor], instances: List[Instances]):
    #     """
    #     Forward logic of the mask prediction branch.
    #
    #     Args:
    #         features (dict[str, Tensor]): mapping from feature map names to tensor.
    #             Same as in :meth:`ROIHeads.forward`.
    #         instances (list[Instances]): the per-image instances to train/predict masks.
    #             In training, they can be the proposals.
    #             In inference, they can be the boxes predicted by R-CNN box head.
    #
    #     Returns:
    #         In training, a dict of losses.
    #         In inference, update `instances` with new fields "pred_masks" and return it.
    #     """
    #     if not self.mask_on:
    #         return {} if self.training else instances
    #
    #     if self.training:
    #         # head is only trained on positive proposals.
    #         instances, _ = select_foreground_proposals(instances, self.num_classes)
    #
    #     if self.mask_pooler is not None:
    #         features = [features[f] for f in self.mask_in_features]
    #         boxes = [x.proposal_boxes if self.training else x.pred_boxes for x in instances]
    #         features = self.new_mask_pooler(features, boxes)
    #     else:
    #         features = {f: features[f] for f in self.mask_in_features}
    #     return self.new_mask_head(features, instances)

    def forward(
        self,
        region_f,
        proposals,
        features,
        stuff_f,
        semseg_score,
        semseg_targets,
        images=None,
        seg_result=None,
        c2d=None,
        ori_sizes=None,
        img_ids=None,
    ):
        '''

        Args:
            region_f: region features
            proposals: predicted proposals, containing the gt
            features: fpn features
            stuff_f: stuff features
            semseg_score: predicted sematnic scores
            semseg_targets: semantic segmentation gt
            images: original images
            seg_result:
            c2d:
            ori_sizes:
            img_ids:

        Returns:

        '''
        assert len(proposals) == len(stuff_f)
        bs, _, h, w = semseg_score.shape
        proposals_num = [len(p) for p in proposals]

        assert sum(proposals_num) == len(region_f)

        region_nodes = self.region_in_proj(region_f)
        class_center = torch.matmul(
            F.softmax(semseg_score.flatten(start_dim=2),
                      dim=-1),  # softmax along hw
            stuff_f.flatten(start_dim=2).transpose(1, 2))  # bs x cls x 512
        class_nodes = self.stuff_in_proj(class_center)
        region_nodes_split = region_nodes.split(proposals_num)

        new_region_nodes, new_class_nodes = [], []
        for i in range(bs):
            region_node_per_img = region_nodes_split[i]
            stuff_node_per_img = class_nodes[i]
            nodes_num = len(region_node_per_img) + len(stuff_node_per_img)
            adj = torch.ones(
                nodes_num, nodes_num).cuda().detach()  # fully connected graph
            graph_nodes = self.graph(
                torch.cat([region_node_per_img, stuff_node_per_img]), adj)

            new_region_f_per_img, new_stuff_f_per_img = graph_nodes.split(
                [len(region_node_per_img),
                 len(stuff_node_per_img)])
            new_region_nodes.append(new_region_f_per_img)
            new_class_nodes.append(new_stuff_f_per_img)

        new_region_f = torch.cat(
            [region_f,
             self.region_out_proj(torch.cat(new_region_nodes))],
            dim=-1)
        new_prediction = self.new_box_predictor(new_region_f)

        # box post-process
        if self.training:
            losses_box = self.new_box_predictor.losses(new_prediction,
                                                       proposals)
            # losses_mask
            instances, _ = select_foreground_proposals(proposals,
                                                       self.num_classes)
            features = [features[f] for f in self.mask_in_features]
            boxes = [
                x.proposal_boxes if self.training else x.pred_boxes
                for x in instances
            ]
            features = self.new_mask_pooler(features, boxes)
            losses_mask = self.new_mask_head(features, instances)
        else:
            # testing
            # box
            pred_instances, _ = self.new_box_predictor.inference(
                new_prediction, proposals)
            # mask
            assert pred_instances[0].has(
                "pred_boxes") and pred_instances[0].has("pred_classes")
            features = [features[f] for f in self.mask_in_features]
            boxes = [
                x.proposal_boxes if self.training else x.pred_boxes
                for x in pred_instances
            ]
            features = self.new_mask_pooler(features, boxes)
            instances = self.new_mask_head(features, pred_instances)

        # stuff
        new_class_nodes = self.stuff_out_proj(torch.stack(new_class_nodes))
        new_stuff_f = torch.matmul(
            F.softmax(semseg_score.flatten(start_dim=2),
                      dim=1).permute(0, 2, 1),  # softmax along nodes
            new_class_nodes).permute(0, 2, 1).view(bs, self.stuff_out_channel,
                                                   h, w)
        semseg_score = self.seg_score(torch.cat([stuff_f, new_stuff_f], dim=1))
        semseg_score = semseg_score.float()
        segments = F.interpolate(semseg_score,
                                 None,
                                 self.upsample_rate,
                                 mode='bilinear',
                                 align_corners=False)
        #
        del semseg_score
        if self.training:
            loss = F.cross_entropy(segments,
                                   semseg_targets,
                                   reduction="mean",
                                   ignore_index=self.ignore_value)
            losses_sem = {"new_loss_sem_seg": loss * self.loss_weight_stuff}

            # update loss weight name
            # pdb.set_trace()
            losses_box.update(losses_mask)
            losses_box.update(losses_sem)
            key_list = list(losses_box.keys())
            for key in key_list:
                if 'new' not in key:
                    losses_box["new_" + key] = losses_box.pop(key)

            return None, None, losses_box
        else:
            return instances, segments, None
        ''' ###############################   '''
class AttributeStandardROIHeads(AttributeROIHeads, StandardROIHeads):
    """
    An extension of StandardROIHeads to include attribute prediction.
    """

    def __init__(self, cfg, input_shape):
        super(StandardROIHeads, self).__init__(cfg, input_shape)
        self._init_box_head(cfg, input_shape)
        self._init_mask_head(cfg, input_shape)
        self._init_keypoint_head(cfg, input_shape)

    def _init_box_head(self, cfg, input_shape):
        # fmt: off
        pooler_resolution        = cfg.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION
        pooler_scales            = tuple(1.0 / input_shape[k].stride for k in self.in_features)
        sampling_ratio           = cfg.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO
        pooler_type              = cfg.MODEL.ROI_BOX_HEAD.POOLER_TYPE
        self.train_on_pred_boxes = cfg.MODEL.ROI_BOX_HEAD.TRAIN_ON_PRED_BOXES
        self.attribute_on        = cfg.MODEL.ATTRIBUTE_ON
        # fmt: on

        in_channels = [input_shape[f].channels for f in self.in_features]
        assert len(set(in_channels)) == 1, in_channels
        in_channels = in_channels[0]

        self.box_pooler = ROIPooler(
            output_size=pooler_resolution,
            scales=pooler_scales,
            sampling_ratio=sampling_ratio,
            pooler_type=pooler_type,
        )
        self.box_head = build_box_head(
            cfg,
            ShapeSpec(
                channels=in_channels, height=pooler_resolution, width=pooler_resolution
            ),
        )
        self.box_predictor = FastRCNNOutputLayers(cfg, self.box_head.output_shape)

        if self.attribute_on:
            self.attribute_predictor = AttributePredictor(
                cfg, self.box_head.output_shape.channels
            )

    def _forward_box(self, features, proposals):
        features = [features[f] for f in self.in_features]
        box_features = self.box_pooler(features, [x.proposal_boxes for x in proposals])
        box_features, _ = self.box_head(box_features)
        predictions = self.box_predictor(box_features)

        if self.training:
            if self.train_on_pred_boxes:
                with torch.no_grad():
                    pred_boxes = self.box_predictor.predict_boxes_for_gt_classes(
                        predictions, proposals
                    )
                    for proposals_per_image, pred_boxes_per_image in zip(
                        proposals, pred_boxes
                    ):
                        proposals_per_image.proposal_boxes = Boxes(pred_boxes_per_image)
            losses = self.box_predictor.losses(predictions, proposals)
            if self.attribute_on:
                losses.update(self.forward_attribute_loss(proposals, box_features))
                del box_features

            return losses
        else:
            pred_instances, r_indices = self.box_predictor.inference(
                predictions, proposals
            )
            return pred_instances[0], r_indices[0]

    def get_conv5_features(self, features):
        assert len(self.in_features) == 1

        features = [features[f] for f in self.in_features]
        return features[0]

    def get_roi_features(self, features, proposals):
        features = [features[f] for f in self.in_features]
        box_features = self.box_pooler(features, [x.proposal_boxes for x in proposals])
        fc7, fc6  = self.box_head(box_features)
        return box_features, fc7, fc6
class AttributeRes5ROIHeads(AttributeROIHeads, Res5ROIHeads):
    """
    An extension of Res5ROIHeads to include attribute prediction.
    """
    def __init__(self, cfg, input_shape):
        # super(Res5ROIHeads, self).__init__(cfg, input_shape) # d2 0.1.1
        super(Res5ROIHeads, self).__init__(cfg)   # d2 0.2.1
        # added to fit d2 0.2.1
        self.in_features = cfg.MODEL.ROI_HEADS.IN_FEATURES
        
        assert len(self.in_features) == 1

        # fmt: off
        pooler_resolution = cfg.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION
        pooler_type       = cfg.MODEL.ROI_BOX_HEAD.POOLER_TYPE
        pooler_scales     = (1.0 / input_shape[self.in_features[0]].stride, )
        sampling_ratio    = cfg.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO
        self.mask_on      = cfg.MODEL.MASK_ON
        self.attribute_on = cfg.MODEL.BUA.ATTRIBUTE_ON
        self.extract_on = cfg.MODEL.BUA.EXTRACT_FEATS
        self.extractor_mode  = cfg.MODEL.BUA.EXTRACTOR.MODE
        # fmt: on
        assert not cfg.MODEL.KEYPOINT_ON

        self.pooler = ROIPooler(
            output_size=pooler_resolution,
            scales=pooler_scales,
            sampling_ratio=sampling_ratio,
            pooler_type=pooler_type,
        )

        self.box2box_transform = BUABox2BoxTransform(weights=cfg.MODEL.ROI_BOX_HEAD.BBOX_REG_WEIGHTS)

        self.res5, out_channels = self._build_res5_block(cfg)
        self.box_predictor = FastRCNNOutputLayers(
            cfg, ShapeSpec(channels=out_channels, height=1, width=1)
        )

        if self.mask_on:
            self.mask_head = build_mask_head(
                cfg,
                ShapeSpec(channels=out_channels, width=pooler_resolution, height=pooler_resolution),
            )

        if self.attribute_on:
            self.attribute_predictor = AttributePredictor(cfg, out_channels)

    def forward(self, images, features, proposals, targets=None):
        del images

        if self.training:
            assert targets
            proposals = self.label_and_sample_proposals(proposals, targets)
        del targets

        proposal_boxes = [x.proposal_boxes for x in proposals]
        box_features = self._shared_roi_transform(
            [features[f] for f in self.in_features], proposal_boxes
        )
        feature_pooled = box_features.mean(dim=[2, 3])
        predictions = self.box_predictor(feature_pooled)

        if self.training:
            del features
            losses = self.box_predictor.losses(predictions, proposals)
            if self.mask_on:
                proposals, fg_selection_masks = select_foreground_proposals(
                    proposals, self.num_classes
                )
                mask_features = box_features[torch.cat(fg_selection_masks, dim=0)]
                del box_features
                losses.update(self.mask_head(mask_features, proposals))
            if self.attribute_on:
                losses.update(self.forward_attribute_loss(proposals, feature_pooled))
            return [], losses
        elif self.extract_on:
            pred_class_logits, pred_proposal_deltas = predictions
            # pred_class_logits = pred_class_logits[:, :-1]  # background is last
            cls_lables = torch.argmax(pred_class_logits, dim=1)
            num_preds_per_image = [len(p) for p in proposals]
            if self.extractor_mode == 1 or self.extractor_mode == 3:
                if self.attribute_on:
                    attr_scores = self.forward_attribute_score(feature_pooled, cls_lables)
                    return proposal_boxes, self.predict_probs(pred_class_logits, num_preds_per_image), feature_pooled.split(num_preds_per_image, dim=0), attr_scores.split(num_preds_per_image, dim=0)
                else:
                    return proposal_boxes, self.predict_probs(pred_class_logits, num_preds_per_image), feature_pooled.split(num_preds_per_image, dim=0)
            elif self.extractor_mode == 2:
                return self.predict_boxes(proposals, pred_proposal_deltas, num_preds_per_image), self.predict_probs(pred_class_logits, num_preds_per_image)
            else:
                raise ValueError('BUA.EXTRATOR.MODE ERROR')
        else:
            pred_instances, _ = self.box_predictor.inference(predictions, proposals)
            pred_instances = self.forward_with_given_boxes(features, pred_instances)
            return pred_instances, {}

    def get_conv5_features(self, features):
        features = [features[f] for f in self.in_features]
        return self.res5(features[0])

    def get_roi_features(self, features, proposals):
        assert len(self.in_features) == 1

        features = [features[f] for f in self.in_features]
        box_features = self._shared_roi_transform(
            features, [x.proposal_boxes for x in proposals]
        )
        pooled_features = box_features.mean(dim=[2, 3])
        return box_features, pooled_features, None
    
    def predict_boxes(self, proposals, pred_proposal_deltas, num_preds_per_image):
        """
        Returns:
            list[Tensor]: A list of Tensors of predicted class-specific or class-agnostic boxes
                for each image. Element i has shape (Ri, K * B) or (Ri, B), where Ri is
                the number of predicted objects for image i and B is the box dimension (4 or 5)
        """
        # Always use 1 image per worker during inference since this is the
        # standard when reporting inference time in papers.
        box_type = type(proposals[0].proposal_boxes)
        # cat(..., dim=0) concatenates over all images in the batch
        proposals = box_type.cat([p.proposal_boxes for p in proposals])
        num_pred = len(proposals)
        B = proposals.tensor.shape[1]
        K = pred_proposal_deltas.shape[1] // B
        boxes = self.box2box_transform.apply_deltas(
            pred_proposal_deltas,
            proposals.tensor,
        )
        return boxes.view(num_pred, K * B).split(num_preds_per_image, dim=0)

    def predict_probs(self, pred_class_logits, num_preds_per_image):
        """
        Returns:
            list[Tensor]: A list of Tensors of predicted class probabilities for each image.
                Element i has shape (Ri, K + 1), where Ri is the number of predicted objects
                for image i.
        """
        probs = F.softmax(pred_class_logits, dim=-1)
        probs = probs[:, :-1]  # background is last
        return probs.split(num_preds_per_image, dim=0)
Exemplo n.º 7
0
class AttributeRes5ROIHeads(AttributeROIHeads, Res5ROIHeads):
    """
    An extension of Res5ROIHeads to include attribute prediction.
    """
    def __init__(self, cfg, input_shape):
        super(Res5ROIHeads, self).__init__(cfg, input_shape)

        assert len(self.in_features) == 1

        # fmt: off
        pooler_resolution = cfg.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION
        pooler_type       = cfg.MODEL.ROI_BOX_HEAD.POOLER_TYPE
        pooler_scales     = (1.0 / input_shape[self.in_features[0]].stride, )
        sampling_ratio    = cfg.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO
        self.mask_on      = cfg.MODEL.MASK_ON
        self.attribute_on = cfg.MODEL.ATTRIBUTE_ON
        if self.attribute_on:
            self.attribute_thre = cfg.MODEL.ATTRIBUTE_THRE
        # fmt: on
        assert not cfg.MODEL.KEYPOINT_ON

        self.pooler = ROIPooler(
            output_size=pooler_resolution,
            scales=pooler_scales,
            sampling_ratio=sampling_ratio,
            pooler_type=pooler_type,
        )

        self.res5, out_channels = self._build_res5_block(cfg)
        self.box_predictor = FastRCNNOutputLayers(
            cfg, ShapeSpec(channels=out_channels, height=1, width=1)
        )

        if self.mask_on:
            self.mask_head = build_mask_head(
                cfg,
                ShapeSpec(channels=out_channels, width=pooler_resolution, height=pooler_resolution),
            )

        if self.attribute_on:
            self.attribute_predictor = AttributePredictor(cfg, out_channels)

    def forward(self, images, features, proposals, targets=None):
        del images

        if self.training:
            assert targets
            proposals = self.label_and_sample_proposals(proposals, targets)
        del targets

        proposal_boxes = [x.proposal_boxes for x in proposals]
        box_features = self._shared_roi_transform(
            [features[f] for f in self.in_features], proposal_boxes
        )
        feature_pooled = box_features.mean(dim=[2, 3])
        predictions = self.box_predictor(feature_pooled)

        if self.training:
            del features
            losses = self.box_predictor.losses(predictions, proposals)
            if self.mask_on:
                proposals, fg_selection_masks = select_foreground_proposals(
                    proposals, self.num_classes
                )
                mask_features = box_features[torch.cat(fg_selection_masks, dim=0)]
                del box_features
                losses.update(self.mask_head(mask_features, proposals))
            if self.attribute_on:
                losses.update(self.forward_attribute_loss(proposals, feature_pooled))
            return [], losses
        else:
            pred_instances, chose_indices = self.box_predictor.inference(predictions, proposals)
            pred_instances = self.forward_with_given_boxes(features, pred_instances)
            pred_instances[0].pred_attributes = [[] for _ in range(chose_indices[0].size(0))]
            pred_instances[0].attr_scores = [[] for _ in range(chose_indices[0].size(0))]

            if self.attribute_on and chose_indices[0].size(0) != 0:
                attr_labels, attr_scores = self.predict_attrs(
                    feature_pooled[chose_indices], 
                    predictions[0][chose_indices],
                    self.attribute_thre
                )
                pred_instances[0].pred_attributes = attr_labels
                pred_instances[0].attr_scores = attr_scores

            return pred_instances, {}

    def predict_attrs(self, features, obj_probs, score_thresh=0.5):
        obj_labels = torch.argmax(obj_probs, dim=1)

        attribute_scores = self.attribute_predictor(features, obj_labels)
        attr_labels = torch.argmax(attribute_scores, dim=1) 
        attr_scores = attribute_scores.gather(1, attr_labels.unsqueeze(1))

        return attr_labels, attr_scores

    def get_conv5_features(self, features):
        features = [features[f] for f in self.in_features]
        return self.res5(features[0])
Exemplo n.º 8
0
class WSRes5ROIHeads(ROIHeads):
    """
    The ROIHeads in a typical "C4" R-CNN model, where
    the box and mask head share the cropping and
    the per-region feature computation by a Res5 block.
    """
    def __init__(self, cfg, input_shape):
        super().__init__(cfg)

        # fmt: off
        self.in_features = cfg.MODEL.ROI_HEADS.IN_FEATURES
        pooler_resolution = cfg.MODEL.ROI_BOX_HEAD.POOLER_RESOLUTION
        pooler_type = cfg.MODEL.ROI_BOX_HEAD.POOLER_TYPE
        pooler_scales = (1.0 / input_shape[self.in_features[0]].stride, )
        sampling_ratio = cfg.MODEL.ROI_BOX_HEAD.POOLER_SAMPLING_RATIO
        self.mask_on = cfg.MODEL.MASK_ON
        # fmt: on
        assert not cfg.MODEL.KEYPOINT_ON
        assert len(self.in_features) == 1

        self.pooler = ROIPooler(
            output_size=pooler_resolution,
            scales=pooler_scales,
            sampling_ratio=sampling_ratio,
            pooler_type=pooler_type,
        )

        self.res5, out_channels = self._build_res5_block(cfg)
        self.box_predictor = FastRCNNOutputLayers(
            cfg, ShapeSpec(channels=out_channels, height=1, width=1))

        if self.mask_on:
            self.mask_head = build_mask_head(
                cfg,
                ShapeSpec(channels=out_channels,
                          width=pooler_resolution,
                          height=pooler_resolution),
            )

    def _build_res5_block(self, cfg):
        # fmt: off
        stage_channel_factor = 2**3  # res5 is 8x res2
        num_groups = cfg.MODEL.RESNETS.NUM_GROUPS
        width_per_group = cfg.MODEL.RESNETS.WIDTH_PER_GROUP
        bottleneck_channels = num_groups * width_per_group * stage_channel_factor
        out_channels = cfg.MODEL.RESNETS.RES2_OUT_CHANNELS * stage_channel_factor
        stride_in_1x1 = cfg.MODEL.RESNETS.STRIDE_IN_1X1
        norm = cfg.MODEL.RESNETS.NORM
        assert not cfg.MODEL.RESNETS.DEFORM_ON_PER_STAGE[-1], \
            "Deformable conv is not yet supported in res5 head."
        # fmt: on

        blocks = make_stage(
            BottleneckBlock,
            3,
            stride_per_block=[1, 1, 2],
            has_pool_per_block=[False, False, True],
            in_channels=out_channels // 2,
            bottleneck_channels=bottleneck_channels,
            out_channels=out_channels,
            num_groups=num_groups,
            norm=norm,
            stride_in_1x1=stride_in_1x1,
        )
        return nn.Sequential(*blocks), out_channels

    def _shared_roi_transform(self, features, boxes):
        x = self.pooler(features, boxes)
        return self.res5(x)

    def forward(self, images, features, proposals, targets=None):
        """
        See :meth:`ROIHeads.forward`.
        """
        del images

        if self.training:
            assert targets
            proposals = self.label_and_sample_proposals(proposals, targets)
        del targets

        proposal_boxes = [x.proposal_boxes for x in proposals]
        box_features = self._shared_roi_transform(
            [features[f] for f in self.in_features], proposal_boxes)
        predictions = self.box_predictor(box_features.mean(dim=[2, 3]))

        if self.training:
            del features
            losses = self.box_predictor.losses(predictions, proposals)
            if self.mask_on:
                proposals, fg_selection_masks = select_foreground_proposals(
                    proposals, self.num_classes)
                # Since the ROI feature transform is shared between boxes and masks,
                # we don't need to recompute features. The mask loss is only defined
                # on foreground proposals, so we need to select out the foreground
                # features.
                mask_features = box_features[torch.cat(fg_selection_masks,
                                                       dim=0)]
                del box_features
                losses.update(self.mask_head(mask_features, proposals))
            return [], losses
        else:
            pred_instances, _ = self.box_predictor.inference(
                predictions, proposals)
            pred_instances = self.forward_with_given_boxes(
                features, pred_instances)
            return pred_instances, {}

    def forward_with_given_boxes(self, features, instances):
        """
        Use the given boxes in `instances` to produce other (non-box) per-ROI outputs.

        Args:
            features: same as in `forward()`
            instances (list[Instances]): instances to predict other outputs. Expect the keys
                "pred_boxes" and "pred_classes" to exist.

        Returns:
            instances (Instances):
                the same `Instances` object, with extra
                fields such as `pred_masks` or `pred_keypoints`.
        """
        assert not self.training
        assert instances[0].has("pred_boxes") and instances[0].has(
            "pred_classes")

        if self.mask_on:
            features = [features[f] for f in self.in_features]
            x = self._shared_roi_transform(features,
                                           [x.pred_boxes for x in instances])
            return self.mask_head(x, instances)
        else:
            return instances