Exemplo n.º 1
0
    def _init_test_roi_heads_faster_rcnn(self):
        out_channels = 256
        num_classes = 91

        box_fg_iou_thresh = 0.5
        box_bg_iou_thresh = 0.5
        box_batch_size_per_image = 512
        box_positive_fraction = 0.25
        bbox_reg_weights = None
        box_score_thresh = 0.05
        box_nms_thresh = 0.5
        box_detections_per_img = 100

        box_roi_pool = ops.MultiScaleRoIAlign(
            featmap_names=['0', '1', '2', '3'],
            output_size=7,
            sampling_ratio=2)

        resolution = box_roi_pool.output_size[0]
        representation_size = 1024
        box_head = TwoMLPHead(out_channels * resolution**2,
                              representation_size)

        representation_size = 1024
        box_predictor = FastRCNNPredictor(representation_size, num_classes)

        roi_heads = RoIHeads(box_roi_pool, box_head, box_predictor,
                             box_fg_iou_thresh, box_bg_iou_thresh,
                             box_batch_size_per_image, box_positive_fraction,
                             bbox_reg_weights, box_score_thresh,
                             box_nms_thresh, box_detections_per_img)
        return roi_heads
Exemplo n.º 2
0
    def __init__(self, num_thing_classes, backbone_out_channels=256,
                 roi_out_res=14,
                 feat_maps_names=['P4', 'P8', 'P16', 'P32'],
                 representation_size=1024,
                 box_score_thresh=0.05, box_nms_thresh=0.5, box_detections_per_img=100,
                 box_fg_iou_thresh=0.5, box_bg_iou_thresh=0.5,
                 box_batch_size_per_image=512, box_positive_fraction=0.25,
                 bbox_reg_weights=None):
        super().__init__()

        ## Boxes
        box_roi_pool = RoiAlign(feat_maps_names, roi_out_res, 2)
        bbox_head = box_head(backbone_out_channels*roi_out_res ** 2, representation_size)
        bbox_predictor = box_predictor(representation_size, num_thing_classes)

        #Masks

        mask_roi_pool = RoiAlign(feat_maps_names, roi_out_res, 2)
        m_head = mask_head(backbone_out_channels)
        m_predictor = mask_predictor(backbone_out_channels, num_thing_classes)

        self.heads = RoIHeads(box_roi_pool, bbox_head, bbox_predictor,
                              box_fg_iou_thresh, box_bg_iou_thresh,
                              box_batch_size_per_image, box_positive_fraction,
                              bbox_reg_weights, 
                              box_score_thresh, box_nms_thresh, box_detections_per_img,
                              mask_roi_pool, m_head, m_predictor)
    def test_assign_targets_to_proposals(self):

        proposals = [torch.randint(-50, 50, (20, 4), dtype=torch.float32)]
        gt_boxes = [torch.zeros((0, 4), dtype=torch.float32)]
        gt_labels = [torch.tensor([[0]], dtype=torch.int64)]

        box_roi_pool = MultiScaleRoIAlign(featmap_names=['0', '1', '2', '3'],
                                          output_size=7,
                                          sampling_ratio=2)

        resolution = box_roi_pool.output_size[0]
        representation_size = 1024
        box_head = TwoMLPHead(4 * resolution**2, representation_size)

        representation_size = 1024
        box_predictor = FastRCNNPredictor(representation_size, 2)

        roi_heads = RoIHeads(
            # Box
            box_roi_pool,
            box_head,
            box_predictor,
            0.5,
            0.5,
            512,
            0.25,
            None,
            0.05,
            0.5,
            100)

        matched_idxs, labels = roi_heads.assign_targets_to_proposals(
            proposals, gt_boxes, gt_labels)

        self.assertEqual(matched_idxs[0].sum(), 0)
        self.assertEqual(matched_idxs[0].shape,
                         torch.Size([proposals[0].shape[0]]))
        self.assertEqual(matched_idxs[0].dtype, torch.int64)

        self.assertEqual(labels[0].sum(), 0)
        self.assertEqual(labels[0].shape, torch.Size([proposals[0].shape[0]]))
        self.assertEqual(labels[0].dtype, torch.int64)
Exemplo n.º 4
0
 def __init__(
     self,
     box_roi_pool,
     box_head,
     box_predictor,
     # Faster R-CNN training
     fg_iou_thresh,
     bg_iou_thresh,
     batch_size_per_image,
     positive_fraction,
     bbox_reg_weights,
     # Faster R-CNN inference
     score_thresh,
     nms_thresh,
     detections_per_img,
     # Mask
     mask_roi_pool=None,
     mask_head=None,
     mask_predictor=None,
     keypoint_roi_pool=None,
     keypoint_head=None,
     keypoint_predictor=None,
 ):
     RoIHeads.__init__(
         self,
         box_roi_pool,
         box_head,
         box_predictor,
         # Faster R-CNN training
         fg_iou_thresh,
         bg_iou_thresh,
         batch_size_per_image,
         positive_fraction,
         bbox_reg_weights,
         # Faster R-CNN inference
         score_thresh,
         nms_thresh,
         detections_per_img)
    def __init__(self, backbone, num_classes=None,
        min_size = 800, max_size = 1333,
        image_mean = None, image_std = None,
        rpn_anchor_generator = None, rpn_head = None,
        rpn_pre_nms_top_n_train = 2000, rpn_pre_nms_top_n_test = 1000,
        rpn_post_nms_top_n_train = 2000, rpn_post_nms_top_n_test = 1000,
        rpn_nms_thresh=0.7,
        rpn_fg_iou_thresh=0.7, rpn_bg_iou_thresh=0.3,
        rpn_batch_size_per_image=256, rpn_positive_fraction=0.5,
        box_roi_pool=None, box_head=None, box_predictor=None,
        box_score_thresh=0.05, box_nms_thresh=0.5, box_detections_per_img=100,
        box_fg_iou_thresh=0.5, box_bg_iou_thresh=0.5,
        box_batch_size_per_image=512, box_positive_fraction=0.25,
        bbox_reg_weights=None):

        if not hasattr(backbone, "out_channels"):
            raise ValueError("backbone should contain an attribute out_channels")

        assert isinstance(rpn_anchor_generator, (AnchorGenerator, type(None)))
        assert isinstance(box_roi_pool, (MultiScaleRoIAlign, type(None)))

        if num_classes is not None:
            if box_predictor is not None:
                raise ValueError("num_classes should be None when box_predictor is specified")
        else:
            if box_predictor is None:
                raise ValueError("num_classes should not be None when box_predictor is not specified")

        out_channels = backbone.out_channels

        if rpn_anchor_generator is None:
            anchor_sizes = ((32,), (64,), (128,), (256,), (512,))
            aspect_ratios = ((0.5, 1.0, 2.0),) * len(anchor_sizes)
            rpn_anchor_generator = AnchorGenerator(anchor_sizes, aspect_ratios)

        if rpn_head is None:
            rpn_head = RPNHead(out_channels, rpn_anchor_generator.num_anchors_per_location()[0])

        rpn_pre_nms_top_n = dict(training=rpn_pre_nms_top_n_train, testing=rpn_pre_nms_top_n_test)
        rpn_post_nms_top_n = dict(training=rpn_post_nms_top_n_train, testing=rpn_post_nms_top_n_test)

        rpn = RegionProposalNetwork(
            rpn_anchor_generator, rpn_head,
            rpn_fg_iou_thresh, rpn_bg_iou_thresh,
            rpn_batch_size_per_image, rpn_positive_fraction,
            rpn_pre_nms_top_n, rpn_post_nms_top_n, rpn_nms_thresh)

        if box_roi_pool is None:
            box_roi_pool = MultiScaleRoIAlign(featmap_names = ['0', '1', '2', '3'],
                output_size=7, sampling_ratio = 2)

        if box_head is None:
            resolution = box_roi_pool.output_size[0]
            representation_size = 1024
            box_head = TwoMLPHead(out_channels * resolution ** 2, representation_size)

        if box_predictor is None:
            representation_size = 1024
            box_predictor = FastRCNNPredictor(representation_size, num_classes)
#---------------------------------------
        roi_heads = RoIHeads(box_roi_pool, box_head, box_predictor,
            box_fg_iou_thresh, box_bg_iou_thresh, box_batch_size_per_image, box_positive_fraction,
            bbox_reg_weights,
            box_score_thresh, box_nms_thresh, box_detections_per_img)

        if image_mean is None:
            image_mean = [0.485, 0.456, 0.406]
        if image_std is None:
            image_std = [0.229, 0.224, 0.225]
        transform = GeneralizedRCNNTransform(min_size, max_size, image_mean, image_std)

        super(FasterRCNN, self).__init__(backbone, rpn, roi_heads, transform)
Exemplo n.º 6
0
    def __init__(
            self,
            backbone,
            num_classes=2,
            num_pids=5532,
            num_cq_size=5000,
            # transform parameters
            min_size=900,
            max_size=1500,
            image_mean=None,
            image_std=None,
            # Anchor settings:
            anchor_scales=None,
            anchor_ratios=None,
            # RPN parameters
            rpn_anchor_generator=None,
            rpn_head=None,
            rpn_pre_nms_top_n_train=12000,
            rpn_pre_nms_top_n_test=6000,
            rpn_post_nms_top_n_train=2000,
            rpn_post_nms_top_n_test=300,
            rpn_nms_thresh=0.7,
            rpn_fg_iou_thresh=0.7,
            rpn_bg_iou_thresh=0.3,
            rpn_batch_size_per_image=256,
            rpn_positive_fraction=0.5,
            # Box parameters
            rcnn_bbox_bn=True,
            box_roi_pool=None,
            box_head=None,
            box_predictor=None,
            box_score_thresh=0.0,
            box_nms_thresh=0.4,
            box_detections_per_img=300,
            box_fg_iou_thresh=0.5,
            box_bg_iou_thresh=0.1,
            box_batch_size_per_image=128,
            box_positive_fraction=0.5,
            bbox_reg_weights=None,
            # ReID parameters
            feat_head=None,
            reid_head=None,
            reid_loss=None):
        if rpn_anchor_generator is None:
            anchor_sizes = ((32, ), (64, ), (128, ), (256, ), (512, ))
            aspect_ratios = ((0.5, 1.0, 2.0), ) * len(anchor_sizes)
            rpn_anchor_generator = AnchorGenerator(anchor_sizes, aspect_ratios)
        rpn_head = RPNHead(backbone.out_channels,
                           rpn_anchor_generator.num_anchors_per_location()[0])

        rpn_pre_nms_top_n = dict(training=rpn_pre_nms_top_n_train,
                                 testing=rpn_pre_nms_top_n_test)
        rpn_post_nms_top_n = dict(training=rpn_post_nms_top_n_train,
                                  testing=rpn_post_nms_top_n_test)
        rpn = RegionProposalNetwork(rpn_anchor_generator, rpn_head,
                                    rpn_fg_iou_thresh, rpn_bg_iou_thresh,
                                    rpn_batch_size_per_image,
                                    rpn_positive_fraction, rpn_pre_nms_top_n,
                                    rpn_post_nms_top_n, rpn_nms_thresh)

        if box_roi_pool is None:
            box_roi_pool = MultiScaleRoIAlign(featmap_names=['feat2rpn'],
                                              output_size=[7, 7],
                                              sampling_ratio=2)
        if box_head is None:
            resolution = box_roi_pool.output_size[0]
            representation_size = 2048
            box_head = TwoMLPHead(backbone.out_channels * resolution**2,
                                  representation_size)
        if box_predictor is None:
            representation_size = 2048
            box_predictor = FastRCNNPredictor(representation_size, num_classes)
        roi_heads = RoIHeads(  # box
            box_roi_pool, box_head, box_predictor, box_fg_iou_thresh,
            box_bg_iou_thresh, box_batch_size_per_image, box_positive_fraction,
            bbox_reg_weights, box_score_thresh, box_nms_thresh,
            box_detections_per_img)
        if image_mean is None:
            image_mean = [0.485, 0.456, 0.406]
        if image_std is None:
            image_std = [0.229, 0.224, 0.225]
        transform = GeneralizedRCNNTransform(min_size, max_size, image_mean,
                                             image_std)
        super(FasterRCNN, self).__init__(backbone, rpn, roi_heads, transform)
Exemplo n.º 7
0
    def __init__(
            self,
            backbone,
            num_classes=None,
            # transform parameters
            min_size=800,
            max_size=1333,
            image_mean=None,
            image_std=None,
            # RPN parameters
            rpn_anchor_generator=None,
            rpn_head=None,
            rpn_pre_nms_top_n_train=2000,
            rpn_pre_nms_top_n_test=1000,
            rpn_post_nms_top_n_train=2000,
            rpn_post_nms_top_n_test=1000,
            rpn_nms_thresh=0.7,
            rpn_fg_iou_thresh=0.7,
            rpn_bg_iou_thresh=0.3,
            rpn_batch_size_per_image=256,
            rpn_positive_fraction=0.5,
            # Box parameters
            box_roi_pool=None,
            box_head=None,
            box_predictor=None,
            box_score_thresh=0.05,
            box_nms_thresh=0.3,
            box_detections_per_img=128,
            box_fg_iou_thresh=0.5,
            box_bg_iou_thresh=0.5,
            box_batch_size_per_image=64,
            box_positive_fraction=0.25,
            bbox_reg_weights=None):

        print("Using modified Faster RCNN....")
        if not hasattr(backbone, "out_channels"):
            raise ValueError(
                "backbone should contain an attribute out_channels "
                "specifying the number of output channels (assumed to be the "
                "same for all the levels)")

        if num_classes is not None:
            if box_predictor is not None:
                raise ValueError(
                    "num_classes should be None when box_predictor is specified"
                )
        else:
            if box_predictor is None:
                raise ValueError(
                    "num_classes should not be None when box_predictor "
                    "is not specified")

        out_channels = backbone.out_channels

        if box_head is None:
            resolution = box_roi_pool.output_size[0]
            representation_size = 1024
            box_head = TwoMLPHead(out_channels * resolution**2,
                                  representation_size)

        if box_predictor is None:
            representation_size = 1024
            box_predictor = FastRCNNPredictor(representation_size, num_classes)

        rpn = None

        roi_heads = RoIHeads(
            # Box
            box_roi_pool,
            box_head,
            box_predictor,
            box_fg_iou_thresh,
            box_bg_iou_thresh,
            box_batch_size_per_image,
            box_positive_fraction,
            bbox_reg_weights,
            box_score_thresh,
            box_nms_thresh,
            box_detections_per_img)

        if image_mean is None:
            image_mean = [0.485, 0.456, 0.406]
        if image_std is None:
            image_std = [0.229, 0.224, 0.225]

        transform = GeneralizedRCNNTransform(min_size, max_size, image_mean,
                                             image_std)

        super().__init__(backbone, rpn, roi_heads, transform)
Exemplo n.º 8
0
    def __init__(
            self,
            backbone,
            num_classes=None,
            # transform parameters
            min_size=800,
            max_size=1333,
            image_mean=None,
            image_std=None,
            # RPN parameters
            rpn_anchor_generator=None,
            rpn_head=None,
            rpn_pre_nms_top_n_train=2000,
            rpn_pre_nms_top_n_test=1000,
            rpn_post_nms_top_n_train=2000,
            rpn_post_nms_top_n_test=1000,
            rpn_nms_thresh=0.7,
            rpn_fg_iou_thresh=0.7,
            rpn_bg_iou_thresh=0.3,
            rpn_batch_size_per_image=256,
            rpn_positive_fraction=0.5,
            # Box parameters
            box_roi_pool=None,
            box_head=None,
            box_predictor=None,
            box_score_thresh=0.05,
            box_nms_thresh=0.1,
            box_detections_per_img=100,
            box_fg_iou_thresh=0.5,
            box_bg_iou_thresh=0.5,
            box_batch_size_per_image=512,
            box_positive_fraction=0.25,
            bbox_reg_weights=None,
            depth_estimator_path='_depth_net.pth'):

        #         if not hasattr(backbone, "out_channels"):
        #             raise ValueError(
        #                 "backbone should contain an attribute out_channels "
        #                 "specifying the number of output channels (assumed to be the "
        #                 "same for all the levels)")

        assert isinstance(rpn_anchor_generator, (AnchorGenerator, type(None)))
        assert isinstance(box_roi_pool, (MultiScaleRoIAlign, type(None)))

        if num_classes is not None:
            if box_predictor is not None:
                raise ValueError(
                    "num_classes should be None when box_predictor is specified"
                )
        else:
            if box_predictor is None:
                raise ValueError(
                    "num_classes should not be None when box_predictor "
                    "is not specified")

        out_channels = 6  # backbone.out_channels

        if rpn_anchor_generator is None:
            # anchor_sizes = ((32,), (64,), (128,), (256,), (512,))
            # anchor_sizes = ((4,), (8,), (16,),)
            anchor_sizes = (
                (16, ),
                (32, ),
                (64, ),
            )
            aspect_ratios = ((
                0.5,
                0.7,
                1.0,
            ), ) * len(anchor_sizes)
            rpn_anchor_generator = AnchorGenerator(anchor_sizes, aspect_ratios)
        if rpn_head is None:
            rpn_head = RPNHead(
                out_channels,
                rpn_anchor_generator.num_anchors_per_location()[0])

        rpn_pre_nms_top_n = dict(training=rpn_pre_nms_top_n_train,
                                 testing=rpn_pre_nms_top_n_test)
        rpn_post_nms_top_n = dict(training=rpn_post_nms_top_n_train,
                                  testing=rpn_post_nms_top_n_test)

        rpn = RegionProposalNetwork(rpn_anchor_generator, rpn_head,
                                    rpn_fg_iou_thresh, rpn_bg_iou_thresh,
                                    rpn_batch_size_per_image,
                                    rpn_positive_fraction, rpn_pre_nms_top_n,
                                    rpn_post_nms_top_n, rpn_nms_thresh)

        if box_roi_pool is None:
            box_roi_pool = MultiScaleRoIAlign(
                # featmap_names=['0', '1', '2', '3'],
                featmap_names=['0'],
                output_size=7,
                sampling_ratio=2)

        if box_head is None:
            resolution = box_roi_pool.output_size[0]
            representation_size = 1024
            box_head = TwoMLPHead(
                64 * resolution**2,  # out_channels * resolution ** 2,
                representation_size)

        if box_predictor is None:
            representation_size = 1024
            box_predictor = FastRCNNPredictor(representation_size, num_classes)

        roi_heads = RoIHeads(
            # Box
            box_roi_pool,
            box_head,
            box_predictor,
            box_fg_iou_thresh,
            box_bg_iou_thresh,
            box_batch_size_per_image,
            box_positive_fraction,
            bbox_reg_weights,
            box_score_thresh,
            box_nms_thresh,
            box_detections_per_img)

        mask_net = MaskNet(out_channels)

        if image_mean is None:
            image_mean = [0.485, 0.456, 0.406]
        if image_std is None:
            image_std = [0.229, 0.224, 0.225]
        transform = GeneralizedRCNNTransform(min_size, max_size, image_mean,
                                             image_std)

        super(ModifiedFasterRCNN,
              self).__init__(backbone,
                             rpn,
                             roi_heads,
                             mask_net,
                             transform,
                             depth_estimator_path=depth_estimator_path)