Exemplo n.º 1
0
    def forward(self, inputs):
        image = self.preprocess_image(inputs["image"])
        features = self.backbone(image)
        features = [features[f] for f in self.in_features]

        box_cls, box_delta = self.head(features)

        box_cls_list = [
            _.dimshuffle(0, 2, 3, 1).reshape(self.batch_size, -1, self.cfg.num_classes)
            for _ in box_cls
        ]
        box_delta_list = [
            _.dimshuffle(0, 2, 3, 1).reshape(self.batch_size, -1, 4) for _ in box_delta
        ]

        anchors_list = [
            self.anchor_gen(features[i], self.stride_list[i]) for i in range(5)
        ]

        all_level_box_cls = F.sigmoid(F.concat(box_cls_list, axis=1))
        all_level_box_delta = F.concat(box_delta_list, axis=1)
        all_level_anchors = F.concat(anchors_list, axis=0)

        if self.training:
            box_gt_cls, box_gt_delta = self.get_ground_truth(
                all_level_anchors,
                inputs["gt_boxes"],
                inputs["im_info"][:, 4].astype(np.int32),
            )
            rpn_cls_loss = layers.get_focal_loss(
                all_level_box_cls,
                box_gt_cls,
                alpha=self.cfg.focal_loss_alpha,
                gamma=self.cfg.focal_loss_gamma,
            )
            rpn_bbox_loss = (
                layers.get_smooth_l1_loss(all_level_box_delta, box_gt_delta, box_gt_cls)
                * self.cfg.reg_loss_weight
            )

            total = rpn_cls_loss + rpn_bbox_loss
            return total, rpn_cls_loss, rpn_bbox_loss
        else:
            # currently not support multi-batch testing
            assert self.batch_size == 1

            transformed_box = self.box_coder.decode(
                all_level_anchors, all_level_box_delta[0],
            )
            transformed_box = transformed_box.reshape(-1, 4)

            scale_w = inputs["im_info"][0, 1] / inputs["im_info"][0, 3]
            scale_h = inputs["im_info"][0, 0] / inputs["im_info"][0, 2]
            transformed_box = transformed_box / F.concat(
                [scale_w, scale_h, scale_w, scale_h], axis=0
            )
            clipped_box = layers.get_clipped_box(
                transformed_box, inputs["im_info"][0, 2:4]
            ).reshape(-1, 4)
            return all_level_box_cls[0], clipped_box
Exemplo n.º 2
0
    def forward(self, features, im_info, boxes=None):
        # prediction
        features = [features[x] for x in self.in_features]

        # get anchors
        all_anchors_list = [
            self.anchors_generator(fm, stride)
            for fm, stride in zip(features, self.stride_list)
        ]

        pred_cls_logit_list = []
        pred_bbox_offset_list = []
        for x in features:
            t = F.relu(self.rpn_conv(x))
            scores = self.rpn_cls_score(t)
            pred_cls_logit_list.append(
                scores.reshape(
                    scores.shape[0],
                    2,
                    self.num_cell_anchors,
                    scores.shape[2],
                    scores.shape[3],
                ))
            bbox_offsets = self.rpn_bbox_offsets(t)
            pred_bbox_offset_list.append(
                bbox_offsets.reshape(
                    bbox_offsets.shape[0],
                    self.num_cell_anchors,
                    4,
                    bbox_offsets.shape[2],
                    bbox_offsets.shape[3],
                ))
        # sample from the predictions
        rpn_rois = self.find_top_rpn_proposals(pred_bbox_offset_list,
                                               pred_cls_logit_list,
                                               all_anchors_list, im_info)

        if self.training:
            rpn_labels, rpn_bbox_targets = self.get_ground_truth(
                boxes, im_info, all_anchors_list)
            pred_cls_logits, pred_bbox_offsets = self.merge_rpn_score_box(
                pred_cls_logit_list, pred_bbox_offset_list)

            # rpn loss
            loss_rpn_cls = layers.softmax_loss(pred_cls_logits, rpn_labels)
            loss_rpn_loc = layers.get_smooth_l1_loss(
                pred_bbox_offsets,
                rpn_bbox_targets,
                rpn_labels,
                self.cfg.rpn_smooth_l1_beta,
                norm_type="all",
            )
            loss_dict = {
                "loss_rpn_cls": loss_rpn_cls,
                "loss_rpn_loc": loss_rpn_loc
            }
            return rpn_rois, loss_dict
        else:
            return rpn_rois
Exemplo n.º 3
0
    def forward(self, fpn_fms, rcnn_rois, im_info=None, gt_boxes=None):
        rcnn_rois, labels, bbox_targets = self.get_ground_truth(
            rcnn_rois, im_info, gt_boxes)

        fpn_fms = [fpn_fms[x] for x in self.in_features]
        pool_features = layers.roi_pool(
            fpn_fms,
            rcnn_rois,
            self.stride,
            self.pooling_size,
            self.pooling_method,
        )
        flatten_feature = F.flatten(pool_features, start_axis=1)
        roi_feature = F.relu(self.fc1(flatten_feature))
        roi_feature = F.relu(self.fc2(roi_feature))
        pred_cls = self.pred_cls(roi_feature)
        pred_delta = self.pred_delta(roi_feature)

        if self.training:
            # loss for classification
            loss_rcnn_cls = layers.softmax_loss(pred_cls, labels)
            # loss for regression
            pred_delta = pred_delta.reshape(-1, self.cfg.num_classes + 1, 4)

            vlabels = labels.reshape(-1, 1).broadcast((labels.shapeof(0), 4))
            pred_delta = F.indexing_one_hot(pred_delta, vlabels, axis=1)

            loss_rcnn_loc = layers.get_smooth_l1_loss(
                pred_delta,
                bbox_targets,
                labels,
                self.cfg.rcnn_smooth_l1_beta,
                norm_type="all",
            )
            loss_dict = {
                'loss_rcnn_cls': loss_rcnn_cls,
                'loss_rcnn_loc': loss_rcnn_loc
            }
            return loss_dict
        else:
            # slice 1 for removing background
            pred_scores = F.softmax(pred_cls, axis=1)[:, 1:]
            pred_delta = pred_delta[:, 4:].reshape(-1, 4)
            target_shape = (rcnn_rois.shapeof(0), self.cfg.num_classes, 4)
            # rois (N, 4) -> (N, 1, 4) -> (N, 80, 4) -> (N * 80, 4)
            base_rois = F.add_axis(rcnn_rois[:, 1:5],
                                   1).broadcast(target_shape).reshape(-1, 4)
            pred_bbox = self.box_coder.decode(base_rois, pred_delta)
            return pred_bbox, pred_scores
Exemplo n.º 4
0
    def forward(self, inputs):
        image = self.preprocess_image(inputs["image"])
        features = self.backbone(image)
        features = [features[f] for f in self.in_features]

        box_logits, box_offsets = self.head(features)

        box_logits_list = [
            _.dimshuffle(0, 2, 3, 1).reshape(self.batch_size, -1,
                                             self.cfg.num_classes)
            for _ in box_logits
        ]
        box_offsets_list = [
            _.dimshuffle(0, 2, 3, 1).reshape(self.batch_size, -1, 4)
            for _ in box_offsets
        ]

        anchors_list = [
            self.anchor_gen(features[i], self.stride_list[i])
            for i in range(len(features))
        ]

        all_level_box_logits = F.concat(box_logits_list, axis=1)
        all_level_box_offsets = F.concat(box_offsets_list, axis=1)
        all_level_anchors = F.concat(anchors_list, axis=0)

        if self.training:
            box_gt_scores, box_gt_offsets = self.get_ground_truth(
                all_level_anchors,
                inputs["gt_boxes"],
                inputs["im_info"][:, 4].astype(np.int32),
            )
            norm_type = "none" if self.cfg.loss_normalizer_momentum > 0.0 else "fg"
            rpn_cls_loss = layers.get_focal_loss(
                all_level_box_logits,
                box_gt_scores,
                alpha=self.cfg.focal_loss_alpha,
                gamma=self.cfg.focal_loss_gamma,
                norm_type=norm_type,
            )
            rpn_bbox_loss = (layers.get_smooth_l1_loss(
                all_level_box_offsets,
                box_gt_offsets,
                box_gt_scores,
                self.cfg.smooth_l1_beta,
                norm_type=norm_type,
            ) * self.cfg.reg_loss_weight)

            if norm_type == "none":
                F.add_update(
                    self.loss_normalizer,
                    (box_gt_scores > 0).sum(),
                    alpha=self.cfg.loss_normalizer_momentum,
                    beta=1 - self.cfg.loss_normalizer_momentum,
                )
                rpn_cls_loss = rpn_cls_loss / F.maximum(
                    self.loss_normalizer, 1)
                rpn_bbox_loss = rpn_bbox_loss / F.maximum(
                    self.loss_normalizer, 1)

            total = rpn_cls_loss + rpn_bbox_loss
            loss_dict = {
                "total_loss": total,
                "loss_cls": rpn_cls_loss,
                "loss_loc": rpn_bbox_loss,
            }
            self.cfg.losses_keys = list(loss_dict.keys())
            return loss_dict
        else:
            # currently not support multi-batch testing
            assert self.batch_size == 1

            transformed_box = self.box_coder.decode(
                all_level_anchors,
                all_level_box_offsets[0],
            )
            transformed_box = transformed_box.reshape(-1, 4)

            scale_w = inputs["im_info"][0, 1] / inputs["im_info"][0, 3]
            scale_h = inputs["im_info"][0, 0] / inputs["im_info"][0, 2]
            transformed_box = transformed_box / F.concat(
                [scale_w, scale_h, scale_w, scale_h], axis=0)
            clipped_box = layers.get_clipped_box(
                transformed_box, inputs["im_info"][0, 2:4]).reshape(-1, 4)
            all_level_box_scores = F.sigmoid(all_level_box_logits)
            return all_level_box_scores[0], clipped_box