def get_box_head(self):
     box_head = TwoMLPHead(
         self.obj_detect.backbone.out_channels *
         self.obj_detect.roi_heads.box_roi_pool.output_size[0]**2,
         representation_size=1024).to(device)
     box_head.load_state_dict(self.bbox_head_weights)
     return box_head
 def __init__(self, in_features, num_classes, pretrained=False):
     super(RoIFeatureExtractor_new, self).__init__()
     self.fc_head = TwoMLPHead(in_channels=1280 * 7 * 7,
                               representation_size=in_features)
     layers = [
         BasicBlock(256 * 5, 1024 * 5),
         Bottleneck(1024 * 5, 1024 * 5),
         BasicBlock(256 * 5, 1024 * 5),
         Bottleneck(1024 * 5, 1024 * 5),
         BasicBlock(256 * 5, 1024 * 5),
         Bottleneck(1024 * 5, 1024 * 5)
     ]
     self.conv_head = nn.Sequential(*layers)
Exemplo n.º 3
0
    def _init_test_roi_heads_faster_rcnn(self):
        out_channels = 256
        num_classes = 91

        box_fg_iou_thresh = 0.5
        box_bg_iou_thresh = 0.5
        box_batch_size_per_image = 512
        box_positive_fraction = 0.25
        bbox_reg_weights = None
        box_score_thresh = 0.05
        box_nms_thresh = 0.5
        box_detections_per_img = 100

        box_roi_pool = ops.MultiScaleRoIAlign(
            featmap_names=['0', '1', '2', '3'],
            output_size=7,
            sampling_ratio=2)

        resolution = box_roi_pool.output_size[0]
        representation_size = 1024
        box_head = TwoMLPHead(out_channels * resolution**2,
                              representation_size)

        representation_size = 1024
        box_predictor = FastRCNNPredictor(representation_size, num_classes)

        roi_heads = RoIHeads(box_roi_pool, box_head, box_predictor,
                             box_fg_iou_thresh, box_bg_iou_thresh,
                             box_batch_size_per_image, box_positive_fraction,
                             bbox_reg_weights, box_score_thresh,
                             box_nms_thresh, box_detections_per_img)
        return roi_heads
Exemplo n.º 4
0
    def __call__(self,
                 classes=3,
                 sizes=((32, 64, 128, 256, 512), ),
                 aspect_ratios=((0.5, 1.0, 2.0), )):
        from torchvision.models.detection.rpn import AnchorGenerator
        import torchvision
        from torchvision.models.detection import FasterRCNN
        from torchvision.models.detection.faster_rcnn import TwoMLPHead, FastRCNNPredictor
        # load a pre-trained model for classification and return
        # only the features
        backbone = torchvision.models.squeezenet1_1(pretrained=True).features
        # FasterRCNN needs to know the number of
        # output channels in a backbone. For squeezenet1_1, it's 512
        # so we need to add it here
        backbone.out_channels = 512
        anchor_generator = AnchorGenerator(sizes=sizes,
                                           aspect_ratios=aspect_ratios)
        roi_out_size = 7
        roi_pooler = torchvision.ops.MultiScaleRoIAlign(
            featmap_names=['0'], output_size=roi_out_size, sampling_ratio=2)

        representation_size = 256  # Scaled down from 1024 in original implementation.
        # allows to reduce considerably the number of parameters
        box_head = TwoMLPHead(backbone.out_channels * roi_out_size**2,
                              representation_size)

        box_predictor = FastRCNNPredictor(representation_size, classes)

        model = FasterRCNN(backbone,
                           rpn_anchor_generator=anchor_generator,
                           box_roi_pool=roi_pooler,
                           box_head=box_head,
                           box_predictor=box_predictor)

        return model
Exemplo n.º 5
0
    def __init__(self):
        super(FasterRCNN, self).__init__()
        # Define FPN
        self.fpn = resnet_fpn_backbone(backbone_name='resnet101',
                                       pretrained=True)
        self.rpn = RPN()

        # transform parameters
        min_size = 800
        max_size = 1333
        image_mean = [0.485, 0.456, 0.406]
        image_std = [0.229, 0.224, 0.225]
        self.transform = GeneralizedRCNNTransform(min_size, max_size,
                                                  image_mean, image_std)

        # Box parameters
        box_roi_pool = None
        box_head = None
        box_predictor = None
        box_score_thresh = 0.5
        box_nms_thresh = 0.5
        box_detections_per_img = 100
        box_fg_iou_thresh = 0.5
        box_bg_iou_thresh = 0.5
        box_batch_size_per_image = 512
        box_positive_fraction = 0.25
        bbox_reg_weights = None
        num_classes = 101

        if box_roi_pool is None:
            box_roi_pool = MultiScaleRoIAlign(
                featmap_names=['0', '1', '2', '3'],
                output_size=7,
                sampling_ratio=2)

        if box_head is None:
            resolution = box_roi_pool.output_size[0]
            representation_size = 1024
            box_head = TwoMLPHead(256 * resolution**2, representation_size)

        if box_predictor is None:
            representation_size = 1024
            box_predictor = FastRCNNPredictor(representation_size, num_classes)

        self.roi_heads = RoIHeads(
            # Box
            box_roi_pool,
            box_head,
            box_predictor,
            box_fg_iou_thresh,
            box_bg_iou_thresh,
            box_batch_size_per_image,
            box_positive_fraction,
            bbox_reg_weights,
            box_score_thresh,
            box_nms_thresh,
            box_detections_per_img)
    def test_assign_targets_to_proposals(self):

        proposals = [torch.randint(-50, 50, (20, 4), dtype=torch.float32)]
        gt_boxes = [torch.zeros((0, 4), dtype=torch.float32)]
        gt_labels = [torch.tensor([[0]], dtype=torch.int64)]

        box_roi_pool = MultiScaleRoIAlign(featmap_names=['0', '1', '2', '3'],
                                          output_size=7,
                                          sampling_ratio=2)

        resolution = box_roi_pool.output_size[0]
        representation_size = 1024
        box_head = TwoMLPHead(4 * resolution**2, representation_size)

        representation_size = 1024
        box_predictor = FastRCNNPredictor(representation_size, 2)

        roi_heads = RoIHeads(
            # Box
            box_roi_pool,
            box_head,
            box_predictor,
            0.5,
            0.5,
            512,
            0.25,
            None,
            0.05,
            0.5,
            100)

        matched_idxs, labels = roi_heads.assign_targets_to_proposals(
            proposals, gt_boxes, gt_labels)

        self.assertEqual(matched_idxs[0].sum(), 0)
        self.assertEqual(matched_idxs[0].shape,
                         torch.Size([proposals[0].shape[0]]))
        self.assertEqual(matched_idxs[0].dtype, torch.int64)

        self.assertEqual(labels[0].sum(), 0)
        self.assertEqual(labels[0].shape, torch.Size([proposals[0].shape[0]]))
        self.assertEqual(labels[0].dtype, torch.int64)
Exemplo n.º 7
0
def mask_rcnn(pretrained=False,
              num_classes=1 + 90,
              representation=1024,
              backbone=None,
              with_mask=True,
              **kwargs):
    if backbone is None:
        model = maskrcnn_resnet50_fpn(pretrained,
                                      pretrained_backbone=not pretrained,
                                      progress=True,
                                      **kwargs)
    else:
        model = maskrcnn_resnet50_fpn(pretrained,
                                      pretrained_backbone=False,
                                      progress=True,
                                      **kwargs)
        model.backbone = backbone

    in_features = model.roi_heads.box_predictor.cls_score.in_features
    out_features = model.roi_heads.box_predictor.cls_score.out_features
    if representation != in_features:
        logging.info(
            f"Replaced box_head with representation size of {representation}")
        out_channels = model.backbone.out_channels
        resolution = model.roi_heads.box_roi_pool.output_size[0]
        model.roi_heads.box_head = TwoMLPHead(out_channels * resolution**2,
                                              representation)

    if representation != in_features or num_classes != out_features:
        logging.info(
            f"Replaced box_predictor with (representation, num_classes) = ({representation}, {num_classes})"
        )
        model.roi_heads.box_predictor = FastRCNNPredictor(
            representation, num_classes)

    if not with_mask:
        model.roi_heads.mask_roi_pool = None
        model.roi_heads.mask_head = None
        model.roi_heads.mask_predictor = None

    return THDetector(model)
Exemplo n.º 8
0
    def __init__(self):
        super(RoIHeads, self).__init__()

        self.box_roi_pool = MultiScaleRoIAlign(
            featmap_names=['0', '1', '2', '3'],
            output_size=7,
            sampling_ratio=2)

        resolution = self.box_roi_pool.output_size[0]
        representation_size = 1024
        self.box_head = TwoMLPHead(256 * resolution**2, representation_size)
        self.rlp_head = copy.deepcopy(self.box_head)

        representation_size = 1024
        self.box_predictor = FastRCNNPredictor(representation_size,
                                               cfg.BOX.NUM_CLASSES)

        self.RelDN = reldn_heads.reldn_head(self.box_head.fc7.out_features *
                                            3)  # concat of SPO

        self.box_similarity = box_ops.box_iou
        # assign ground-truth boxes for each proposal
        self.proposal_matcher = det_utils.Matcher(
            cfg.BOX.FG_IOU_THRESH,
            cfg.BOX.BG_IOU_THRESH,
            allow_low_quality_matches=False)

        self.fg_bg_sampler = det_utils.BalancedPositiveNegativeSampler(
            cfg.BOX.BATCH_SIZE_PER_IMAGE, cfg.BOX.POSITIVE_FRACTION)

        self.fg_bg_sampler_so = det_utils.BalancedPositiveNegativeSampler(
            cfg.MODEL.BATCH_SIZE_PER_IMAGE_SO, cfg.MODEL.POSITIVE_FRACTION_SO)

        self.fg_bg_sampler_rlp = det_utils.BalancedPositiveNegativeSampler(
            cfg.MODEL.BATCH_SIZE_PER_IMAGE_REL,
            cfg.MODEL.POSITIVE_FRACTION_REL)

        bbox_reg_weights = (10., 10., 5., 5.)
        self.box_coder = det_utils.BoxCoder(bbox_reg_weights)
class RoIFeatureExtractor_new(nn.Module):
    def __init__(self, in_features, num_classes, pretrained=False):
        super(RoIFeatureExtractor_new, self).__init__()
        self.fc_head = TwoMLPHead(in_channels=1280 * 7 * 7,
                                  representation_size=in_features)
        layers = [
            BasicBlock(256 * 5, 1024 * 5),
            Bottleneck(1024 * 5, 1024 * 5),
            BasicBlock(256 * 5, 1024 * 5),
            Bottleneck(1024 * 5, 1024 * 5),
            BasicBlock(256 * 5, 1024 * 5),
            Bottleneck(1024 * 5, 1024 * 5)
        ]
        self.conv_head = nn.Sequential(*layers)
        # self.avgpool = nn.AvgPool2d(kernel_size=7, stride=7)

    def forward(self, features):  # N, 1280, 7, 7
        print(features.shape)
        fc_feature = self.fc_head.forward(features)
        conv_feature = self.conv_head(features)
        avgPool = nn.AvgPool2d((conv_feature.shape[2], conv_feature.shape[3]))
        conv_feature = avgPool(conv_feature)

        return (fc_feature, conv_feature)
Exemplo n.º 10
0
    'min_size': 512,
    'max_size': 1024,
    'box_detections_per_img': 128,
    'box_nms_thresh': 0.25,
    'box_score_thresh': .75,
    'rpn_nms_thresh': 0.25
}
print(inference_args)
# many small anchors
anchor_generator = AnchorGenerator(sizes=tuple([(2, 4, 8, 16, 32)
                                                for r in range(5)]),
                                   aspect_ratios=tuple([(0.1, 0.25, 0.5, 1,
                                                         1.5, 2)
                                                        for rh in range(5)]))

box_head = TwoMLPHead(in_channels=7 * 7 * 256, representation_size=128)
box_predictor = FastRCNNPredictor(in_channels=128, num_classes=3)
mask_roi_pool = torchvision.ops.MultiScaleRoIAlign(featmap_names=[0, 1, 2, 3],
                                                   output_size=14,
                                                   sampling_ratio=2)
mask_predictor = MaskRCNNPredictor(in_channels=256,
                                   dim_reduced=256,
                                   num_classes=3)

inference_args['box_head'] = box_head
inference_args['rpn_anchor_generator'] = anchor_generator
inference_args['mask_roi_pool'] = mask_roi_pool
inference_args['mask_predictor'] = mask_predictor
inference_args['box_predictor'] = box_predictor

maskrcnn_model = maskrcnn_resnet50_fpn(pretrained=False, **inference_args)
Exemplo n.º 11
0
    def __init__(
            self,
            num_classes=2,
            # transform parameters
            backbone_name='resnet50',
            min_size=256,
            max_size=512,
            image_mean=None,
            image_std=None,
            # RPN parameters
            rpn_anchor_generator=None,
            rpn_head=None,
            rpn_pre_nms_top_n_train=2000,
            rpn_pre_nms_top_n_test=1000,
            rpn_post_nms_top_n_train=2000,
            rpn_post_nms_top_n_test=1000,
            rpn_nms_thresh=0.7,
            rpn_fg_iou_thresh=0.7,
            rpn_bg_iou_thresh=0.3,
            rpn_batch_size_per_image=256,
            rpn_positive_fraction=0.5,
            rpn_score_thresh=0.0,
            # Box parameters
            box_roi_pool=None,
            box_head=None,
            box_predictor=None,
            box_score_thresh=0.05,
            box_nms_thresh=0.5,
            box_detections_per_img=100,
            box_fg_iou_thresh=0.5,
            box_bg_iou_thresh=0.5,
            box_batch_size_per_image=512,
            box_positive_fraction=0.25,
            bbox_reg_weights=None,
            # Ellipse regressor
            ellipse_roi_pool=None,
            ellipse_head=None,
            ellipse_predictor=None,
            ellipse_loss_metric="gaussian-angle"):

        backbone = resnet_fpn_backbone(backbone_name,
                                       pretrained=True,
                                       trainable_layers=5)

        # Input image is grayscale -> in_channels = 1 instead of 3 (COCO)
        backbone.body.conv1 = Conv2d(1,
                                     64,
                                     kernel_size=(7, 7),
                                     stride=(2, 2),
                                     padding=(3, 3),
                                     bias=False)

        if not hasattr(backbone, "out_channels"):
            raise ValueError(
                "backbone should contain an attribute out_channels "
                "specifying the number of output channels (assumed to be the "
                "same for all the levels)")

        assert isinstance(rpn_anchor_generator, (AnchorGenerator, type(None)))
        assert isinstance(box_roi_pool, (MultiScaleRoIAlign, type(None)))

        if num_classes is not None:
            if box_predictor is not None:
                raise ValueError(
                    "num_classes should be None when box_predictor is specified"
                )
        else:
            if box_predictor is None:
                raise ValueError(
                    "num_classes should not be None when box_predictor "
                    "is not specified")

        out_channels = backbone.out_channels

        if rpn_anchor_generator is None:
            anchor_sizes = ((32, ), (64, ), (128, ), (256, ), (512, ))
            aspect_ratios = ((0.5, 1.0, 2.0), ) * len(anchor_sizes)
            rpn_anchor_generator = AnchorGenerator(anchor_sizes, aspect_ratios)
        if rpn_head is None:
            rpn_head = RPNHead(
                out_channels,
                rpn_anchor_generator.num_anchors_per_location()[0])

        rpn_pre_nms_top_n = dict(training=rpn_pre_nms_top_n_train,
                                 testing=rpn_pre_nms_top_n_test)
        rpn_post_nms_top_n = dict(training=rpn_post_nms_top_n_train,
                                  testing=rpn_post_nms_top_n_test)

        rpn = RegionProposalNetwork(rpn_anchor_generator,
                                    rpn_head,
                                    rpn_fg_iou_thresh,
                                    rpn_bg_iou_thresh,
                                    rpn_batch_size_per_image,
                                    rpn_positive_fraction,
                                    rpn_pre_nms_top_n,
                                    rpn_post_nms_top_n,
                                    rpn_nms_thresh,
                                    score_thresh=rpn_score_thresh)

        if box_roi_pool is None:
            box_roi_pool = MultiScaleRoIAlign(
                featmap_names=['0', '1', '2', '3'],
                output_size=7,
                sampling_ratio=2)

        if box_head is None:
            resolution = box_roi_pool.output_size[0]
            representation_size = 1024
            box_head = TwoMLPHead(out_channels * resolution**2,
                                  representation_size)

        if box_predictor is None:
            representation_size = 1024
            box_predictor = FastRCNNPredictor(representation_size, num_classes)

        if ellipse_roi_pool is None:
            ellipse_roi_pool = MultiScaleRoIAlign(
                featmap_names=['0', '1', '2', '3'],
                output_size=7,
                sampling_ratio=2)

        if ellipse_head is None:
            resolution = box_roi_pool.output_size[0]
            representation_size = 1024
            ellipse_head = TwoMLPHead(out_channels * resolution**2,
                                      representation_size)

        if ellipse_predictor is None:
            representation_size = 1024
            ellipse_predictor = EllipseRegressor(representation_size,
                                                 num_classes)

        roi_heads = EllipseRoIHeads(
            # Box
            box_roi_pool,
            box_head,
            box_predictor,
            box_fg_iou_thresh,
            box_bg_iou_thresh,
            box_batch_size_per_image,
            box_positive_fraction,
            bbox_reg_weights,
            box_score_thresh,
            box_nms_thresh,
            box_detections_per_img,
            # Ellipse
            ellipse_roi_pool=ellipse_roi_pool,
            ellipse_head=ellipse_head,
            ellipse_predictor=ellipse_predictor,
            ellipse_loss_metric=ellipse_loss_metric)

        if image_mean is None:
            image_mean = [0.156]
        if image_std is None:
            image_std = [0.272]
        transform = GeneralizedRCNNTransform(min_size, max_size, image_mean,
                                             image_std)

        super().__init__(backbone, rpn, roi_heads, transform)
Exemplo n.º 12
0
def faster_rcnn_resnet_fpn(backbone_name,
                           image_size,
                           num_classes,
                           max_objs_per_image,
                           backbone_pretrained=False,
                           logger=None,
                           obj_thresh=0.1):
    resnet = models.resnet.__dict__[backbone_name](
        pretrained=backbone_pretrained)
    return_layers = {
        'layer1': 'c2',
        'layer2': 'c3',
        'layer3': 'c4',
        'layer4': 'c5'
    }
    in_channels_stage2 = resnet.inplanes // 8
    in_channels_list = [
        in_channels_stage2,
        in_channels_stage2 * 2,
        in_channels_stage2 * 4,
        in_channels_stage2 * 8,
    ]
    out_channels = 256
    from torchvision.models.detection.backbone_utils import BackboneWithFPN
    backbone = BackboneWithFPN(resnet, return_layers, in_channels_list,
                               out_channels)

    rpn_in_channels = out_channels
    roi_pooling_output_size = 7
    dim_roi_features = 1024  # roi特征向量长度

    from torchvision.models.detection.faster_rcnn import TwoMLPHead
    roi_head = TwoMLPHead(out_channels * roi_pooling_output_size**2,
                          dim_roi_features)
    # roi_head = nn.Sequential()
    # roi_head.add_module("0", nn.Conv2d(out_channels, out_channels, 3, 2, padding=1))
    # roi_head.add_module("1", nn.BatchNorm2d(out_channels))
    # roi_head.add_module("2", nn.ReLU())
    # roi_head.add_module("3", TwoMLPHead(out_channels * floor(roi_pooling_output_size / 2) ** 2, dim_roi_features))

    strides = (2**2, 2**3, 2**4, 2**5, 2**6)  # P* 的步长
    sizes = [(ceil(image_size[0] / i), ceil(image_size[1] / i))
             for i in strides]
    sizes = tuple(sizes)
    scales = ((32**2, ), (64**2, ), (128**2, ), (256**2, ), (512**2, ))
    ratios = ((0.5, 1, 2), ) * len(scales)

    return FasterRCNN(
        backbone=backbone,
        roi_head=roi_head,
        dim_roi_features=dim_roi_features,
        image_size=image_size,
        num_classes=num_classes,
        strides=strides,
        sizes=sizes,
        scales=scales,
        ratios=ratios,
        rpn_in_channels=rpn_in_channels,
        max_objs_per_image=max_objs_per_image,
        roi_pooling="roi_align",
        roi_pooling_output_size=roi_pooling_output_size,
        obj_thresh=obj_thresh,
        logger=logger,
    )
Exemplo n.º 13
0
box_detections_per_img = 100,
box_fg_iou_thresh = 0.5
box_bg_iou_thresh = 0.5
box_batch_size_per_image = 256
box_positive_fraction = 0.25
bbox_reg_weights = None

if box_roi_pool is None:
    box_roi_pool = MultiScaleRoIAlign(featmap_names=['0', '1', '2', '3'],
                                      output_size=7,
                                      sampling_ratio=2)

if box_head is None:
    resolution = box_roi_pool.output_size[0]
    representation_size = 1024
    box_head = TwoMLPHead(256 * resolution**2, representation_size)

if box_predictor is None:
    representation_size = 1024
    box_predictor = FastRCNNPredictor(representation_size, num_classes=21)

rpn = RPN()
roi_heads = RoIHeads(
    # Box
    box_roi_pool,
    box_head,
    box_predictor,
    box_fg_iou_thresh,
    box_bg_iou_thresh,
    box_batch_size_per_image,
    box_positive_fraction,
Exemplo n.º 14
0
    def __init__(self,
                 train_data,
                 mode='sgcls',
                 require_overlap_det=True,
                 use_bias=False,
                 test_bias=False,
                 backbone='vgg16',
                 RELS_PER_IMG=1024,
                 min_size=None,
                 max_size=None,
                 edge_model='motifs'):
        """
        Base class for an SGG model
        :param mode: (sgcls, predcls, or sgdet)
        :param require_overlap_det: Whether two objects must intersect
        """
        super(RelModelBase, self).__init__()
        self.classes = train_data.ind_to_classes
        self.rel_classes = train_data.ind_to_predicates
        self.mode = mode
        self.backbone = backbone
        self.RELS_PER_IMG = RELS_PER_IMG
        self.pool_sz = 7
        self.stride = 16

        self.use_bias = use_bias
        self.test_bias = test_bias

        self.require_overlap = require_overlap_det and self.mode == 'sgdet'

        if self.backbone == 'resnet50':
            self.obj_dim = 1024
            self.fmap_sz = 21

            if min_size is None:
                min_size = 1333
            if max_size is None:
                max_size = 1333

            print('\nLoading COCO pretrained model maskrcnn_resnet50_fpn...\n')
            # See https://pytorch.org/tutorials/intermediate/torchvision_tutorial.html
            self.detector = torchvision.models.detection.maskrcnn_resnet50_fpn(
                pretrained=True,
                min_size=min_size,
                max_size=max_size,
                box_detections_per_img=50,
                box_score_thresh=0.2)
            in_features = self.detector.roi_heads.box_predictor.cls_score.in_features
            # replace the pre-trained head with a new one
            self.detector.roi_heads.box_predictor = FastRCNNPredictor(
                in_features, len(self.classes))
            self.detector.roi_heads.mask_predictor = None

            layers = list(self.detector.roi_heads.children())[:2]
            self.roi_fmap_obj = copy.deepcopy(layers[1])
            self.roi_fmap = copy.deepcopy(layers[1])
            self.roi_pool = copy.deepcopy(layers[0])

        elif self.backbone == 'vgg16':
            self.obj_dim = 4096
            self.fmap_sz = 38

            if min_size is None:
                min_size = IM_SCALE
            if max_size is None:
                max_size = IM_SCALE

            vgg = load_vgg(use_dropout=False,
                           use_relu=False,
                           use_linear=True,
                           pretrained=False)
            vgg.features.out_channels = 512
            anchor_generator = AnchorGenerator(sizes=((32, 64, 128, 256,
                                                       512), ),
                                               aspect_ratios=((0.5, 1.0,
                                                               2.0), ))

            roi_pooler = torchvision.ops.MultiScaleRoIAlign(
                featmap_names=['0'],
                output_size=self.pool_sz,
                sampling_ratio=2)

            self.detector = FasterRCNN(vgg.features,
                                       min_size=min_size,
                                       max_size=max_size,
                                       rpn_anchor_generator=anchor_generator,
                                       box_head=TwoMLPHead(
                                           vgg.features.out_channels *
                                           self.pool_sz**2, self.obj_dim),
                                       box_predictor=FastRCNNPredictor(
                                           self.obj_dim,
                                           len(train_data.ind_to_classes)),
                                       box_roi_pool=roi_pooler,
                                       box_detections_per_img=50,
                                       box_score_thresh=0.2)

            self.roi_fmap = nn.Sequential(nn.Flatten(), vgg.classifier)
            self.roi_fmap_obj = load_vgg(pretrained=False).classifier
            self.roi_pool = copy.deepcopy(
                list(self.detector.roi_heads.children())[0])

        else:
            raise NotImplementedError(self.backbone)

        self.edge_dim = self.detector.backbone.out_channels

        self.union_boxes = UnionBoxesAndFeats(pooling_size=self.pool_sz,
                                              stride=self.stride,
                                              dim=self.edge_dim,
                                              edge_model=edge_model)
        if self.use_bias:
            self.freq_bias = FrequencyBias(train_data)
Exemplo n.º 15
0
    def __init__(
        self,
        box_roi_pool,
        box_head,
        box_predictor,
        # Faster R-CNN training
        fg_iou_thresh,
        bg_iou_thresh,
        batch_size_per_image,
        positive_fraction,
        bbox_reg_weights,
        # Faster R-CNN inference
        score_thresh,
        nms_thresh,
        detections_per_img,
        out_channels,
        # Mask
        mask_roi_pool=None,
        mask_head=None,
        mask_predictor=None,
        keypoint_roi_pool=None,
        keypoint_head=None,
        keypoint_predictor=None,
        pose_mean=None,
        pose_stddev=None,
        threed_68_points=None,
        threed_5_points=None,
        bbox_x_factor=1.1,
        bbox_y_factor=1.1,
        expand_forehead=0.3,
    ):
        super(RoIHeads, self).__init__()

        self.box_similarity = box_ops.box_iou
        # assign ground-truth boxes for each proposal
        self.proposal_matcher = det_utils.Matcher(
            fg_iou_thresh, bg_iou_thresh, allow_low_quality_matches=False)

        self.fg_bg_sampler = det_utils.BalancedPositiveNegativeSampler(
            batch_size_per_image, positive_fraction)

        if bbox_reg_weights is None:
            bbox_reg_weights = (10.0, 10.0, 5.0, 5.0)
        self.box_coder = det_utils.BoxCoder(bbox_reg_weights)

        self.box_roi_pool = box_roi_pool
        self.box_head = box_head
        self.box_predictor = box_predictor

        num_classes = 2
        self.class_roi_pool = MultiScaleRoIAlign(
            featmap_names=["0", "1", "2", "3"],
            output_size=7,
            sampling_ratio=2)
        resolution = box_roi_pool.output_size[0]
        representation_size = 1024
        self.class_head = TwoMLPHead(out_channels * resolution**2,
                                     representation_size)
        self.class_predictor = FastRCNNClassPredictor(representation_size,
                                                      num_classes)
        self.score_thresh = score_thresh
        self.nms_thresh = nms_thresh
        self.detections_per_img = detections_per_img
        self.mask_roi_pool = mask_roi_pool
        self.mask_head = mask_head
        self.mask_predictor = mask_predictor

        self.keypoint_roi_pool = keypoint_roi_pool
        self.keypoint_head = keypoint_head
        self.keypoint_predictor = keypoint_predictor

        self.pose_mean = pose_mean
        self.pose_stddev = pose_stddev
        self.threed_68_points = threed_68_points
        self.threed_5_points = threed_5_points

        self.bbox_x_factor = bbox_x_factor
        self.bbox_y_factor = bbox_y_factor
        self.expand_forehead = expand_forehead
    def __init__(self,
                 out_channels,
                 num_classes,
                 input_mode,
                 acf_head,
                 fg_iou_thresh=0.5,
                 bg_iou_thresh=0.5,
                 batch_size_per_image=512,
                 positive_fraction=0.25,
                 bbox_reg_weights=None,
                 box_score_thresh=0.05,
                 box_nms_thresh=0.5,
                 box_detections_per_img=100):
        super(RoIHeadsExtend, self).__init__()

        self.in_channels = out_channels
        self.input_mode = input_mode
        self.score_thresh = box_score_thresh
        self.nms_thresh = box_nms_thresh
        self.detections_per_img = box_detections_per_img
        self.fg_iou_thresh = fg_iou_thresh
        self.bg_iou_thresh = bg_iou_thresh
        self.batch_size_per_image = batch_size_per_image
        self.positive_fraction = positive_fraction
        self.num_classes = num_classes

        # Detection
        self.box_similarity = box_ops.box_iou
        # assign ground-truth boxes for each proposal
        self.proposal_matcher = det_utils.Matcher(
            fg_iou_thresh, bg_iou_thresh, allow_low_quality_matches=False)

        self.fg_bg_sampler = det_utils.BalancedPositiveNegativeSampler(
            batch_size_per_image, positive_fraction)

        if bbox_reg_weights is None:
            bbox_reg_weights = (10., 10., 5., 5.)
        self.box_coder = det_utils.BoxCoder(bbox_reg_weights)

        self.box_roi_pool = MultiScaleRoIAlign(featmap_names=[0, 1, 2, 3],
                                               output_size=7,
                                               sampling_ratio=2)

        representation_size = 1024
        resolution = self.box_roi_pool.output_size[0]
        self.box_head = TwoMLPHead(out_channels * resolution**2,
                                   representation_size)

        self.box_predictor = FastRCNNPredictor(representation_size,
                                               num_classes)

        # Segmentation
        self.shared_roi_pool = MultiScaleRoIAlign(featmap_names=[0, 1, 2, 3],
                                                  output_size=14,
                                                  sampling_ratio=2)
        resolution = self.shared_roi_pool.output_size[0]

        mask_layers = (256, 256, 256, 256, 256, 256, 256, 256)
        mask_dilation = 1
        self.mask_head = MaskRCNNHeads(out_channels, mask_layers,
                                       mask_dilation)

        mask_predictor_in_channels = 256  # == mask_layers[-1]
        mask_dim_reduced = 256
        self.mask_predictor = MaskRCNNPredictor(mask_predictor_in_channels,
                                                mask_dim_reduced, num_classes)

        self.with_paf_branch = True
        if self.with_paf_branch:
            self.paf_head = MaskRCNNHeads(out_channels, mask_layers,
                                          mask_dilation)
            self.paf_predictor = MaskRCNNPredictor(mask_predictor_in_channels,
                                                   mask_dim_reduced,
                                                   2 * (num_classes - 1))

        if self.input_mode == config.INPUT_RGBD:
            self.attention_block = ContextBlock(256, 2)
            self.global_feature_dim = 256
            self.with_3d_keypoints = True
            self.with_axis_keypoints = False
            self.regress_axis = False
            self.estimate_norm_vector = False
            if acf_head == 'endpoints':
                self.with_axis_keypoints = True
            elif acf_head == 'scatters':
                self.regress_axis = True
            elif acf_head == 'norm_vector':
                self.estimate_norm_vector = True
            else:
                print("Don't assign a vaild acf head")
                exit()
            keypoint_layers = (256, ) * 4
            self.keypoint_dim_reduced = keypoint_layers[-1]
            if self.with_3d_keypoints:
                self.vote_keypoint_head = Vote_Kpoints_head(
                    self.global_feature_dim, keypoint_layers, "conv2d")
                self.vote_keypoint_predictor = Vote_Kpoints_Predictor(
                    self.keypoint_dim_reduced, 3 * (num_classes - 1))
            if self.with_axis_keypoints:
                self.orientation_keypoint_head = Vote_Kpoints_head(
                    self.global_feature_dim, keypoint_layers, "conv2d")

                self.orientation_keypoint_predictor = Vote_Kpoints_Predictor(
                    self.keypoint_dim_reduced, 6 * (num_classes - 1))

            if self.regress_axis:
                self.axis_head = Vote_Kpoints_head(self.global_feature_dim,
                                                   keypoint_layers, "conv2d")
                self.axis_predictor = Vote_Kpoints_Predictor(
                    self.keypoint_dim_reduced, 4 * (num_classes - 1))

            if self.estimate_norm_vector:
                self.norm_vector_head = Vote_Kpoints_head(
                    self.global_feature_dim, keypoint_layers, "conv2d")
                self.norm_vector_predictor = Vote_Kpoints_Predictor(
                    self.keypoint_dim_reduced, 3 * (num_classes - 1))
Exemplo n.º 17
0
    def __init__(
            self,
            backbone,
            num_ID,
            num_classes=2,
            len_embeddings=128,
            # transform parameters
            min_size=720,
            max_size=960,
            image_mean=None,
            image_std=None,
            # RPN parameters
            rpn_anchor_generator=None,
            rpn_head=None,
            rpn_pre_nms_top_n_train=2000,
            rpn_pre_nms_top_n_test=1000,
            rpn_post_nms_top_n_train=2000,
            rpn_post_nms_top_n_test=1000,
            rpn_nms_thresh=0.7,
            rpn_fg_iou_thresh=0.5,
            rpn_bg_iou_thresh=0.4,
            rpn_batch_size_per_image=256,
            rpn_positive_fraction=0.5,
            # Box parameters
            box_roi_pool=None,
            box_head=None,
            box_predictor=None,
            box_score_thresh=0.05,
            box_nms_thresh=0.5,
            box_detections_per_img=100,
            box_fg_iou_thresh=0.5,
            box_bg_iou_thresh=0.5,
            box_batch_size_per_image=512,
            box_positive_fraction=0.25,
            bbox_reg_weights=None):

        if not hasattr(backbone, "out_channels"):
            raise ValueError(
                "backbone should contain an attribute out_channels "
                "specifying the number of output channels (assumed to be the "
                "same for all the levels)")

        assert isinstance(rpn_anchor_generator, (AnchorGenerator, type(None)))
        assert isinstance(box_roi_pool, (MultiScaleRoIAlign, type(None)))

        out_channels = backbone.out_channels

        if rpn_anchor_generator is None:
            anchor_sizes = ((16, 22), (32, 45), (64, 90), (128, 181), (256,
                                                                       362))
            aspect_ratios = ((1 / 3, ), ) * len(anchor_sizes)
            rpn_anchor_generator = AnchorGenerator(anchor_sizes, aspect_ratios)
        if rpn_head is None:
            rpn_head = RPNHead(
                out_channels,
                rpn_anchor_generator.num_anchors_per_location()[0])

        rpn_pre_nms_top_n = dict(training=rpn_pre_nms_top_n_train,
                                 testing=rpn_pre_nms_top_n_test)
        rpn_post_nms_top_n = dict(training=rpn_post_nms_top_n_train,
                                  testing=rpn_post_nms_top_n_test)

        rpn = RegionProposalNetwork(rpn_anchor_generator, rpn_head,
                                    rpn_fg_iou_thresh, rpn_bg_iou_thresh,
                                    rpn_batch_size_per_image,
                                    rpn_positive_fraction, rpn_pre_nms_top_n,
                                    rpn_post_nms_top_n, rpn_nms_thresh)

        if box_roi_pool is None:
            box_roi_pool = MultiScaleRoIAlign(featmap_names=[0, 1, 2, 3],
                                              output_size=7,
                                              sampling_ratio=2)

        if box_head is None:
            resolution = box_roi_pool.output_size[0]
            representation_size = 1024
            box_head = TwoMLPHead(out_channels * resolution**2,
                                  representation_size)

        emb_scale = math.sqrt(2) * math.log(num_ID - 1) if num_ID > 1 else 1

        if box_predictor is None:
            representation_size = 1024
            box_predictor = JDEPredictor(representation_size, num_classes,
                                         len_embeddings, emb_scale)

        roi_heads = JDE_RoIHeads(
            # Box
            box_roi_pool,
            box_head,
            box_predictor,
            box_fg_iou_thresh,
            box_bg_iou_thresh,
            box_batch_size_per_image,
            box_positive_fraction,
            bbox_reg_weights,
            box_score_thresh,
            box_nms_thresh,
            box_detections_per_img,
            len_embeddings,
            num_ID)

        if image_mean is None:
            image_mean = [0.485, 0.456, 0.406]
        if image_std is None:
            image_std = [0.229, 0.224, 0.225]
        transform = GeneralizedRCNNTransform(min_size, max_size, image_mean,
                                             image_std)

        super(Jde_RCNN, self).__init__(backbone, rpn, roi_heads, transform)
        self.eval_embed = False
Exemplo n.º 18
0
    def __init__(self):
        super(FasterRCNN, self).__init__()
        # Define FPN
        self.fpn = resnet_fpn_backbone(backbone_name='resnet101',
                                       pretrained=True)
        anchor_sizes = ((32, ), (64, ), (128, ), (256, ), (512, ))
        aspect_ratios = ((0.5, 1.0, 2.0), ) * len(anchor_sizes)
        # Generate anchor boxes
        anchor_generator = AnchorGenerator(anchor_sizes, aspect_ratios)
        # Define RPN Head
        # rpn_head = RPNHead(256, 9)
        rpn_head = RPNHead(256, anchor_generator.num_anchors_per_location()[0])
        # RPN parameters,
        rpn_pre_nms_top_n_train = 2000
        rpn_pre_nms_top_n_test = 1000
        rpn_post_nms_top_n_train = 2000
        rpn_post_nms_top_n_test = 1000
        rpn_nms_thresh = 0.7
        rpn_fg_iou_thresh = 0.7
        rpn_bg_iou_thresh = 0.3
        # rpn_nms_thresh = 0.45
        # rpn_fg_iou_thresh = 0.5
        # rpn_bg_iou_thresh = 0.5
        rpn_batch_size_per_image = 256
        rpn_positive_fraction = 0.5

        # transform parameters
        min_size = 800
        max_size = 1333
        image_mean = [0.485, 0.456, 0.406]
        image_std = [0.229, 0.224, 0.225]
        self.transform = GeneralizedRCNNTransform(min_size, max_size,
                                                  image_mean, image_std)

        rpn_pre_nms_top_n = dict(training=rpn_pre_nms_top_n_train,
                                 testing=rpn_pre_nms_top_n_test)
        rpn_post_nms_top_n = dict(training=rpn_post_nms_top_n_train,
                                  testing=rpn_post_nms_top_n_test)

        # Create RPN
        self.rpn = RegionProposalNetwork(anchor_generator, rpn_head,
                                         rpn_fg_iou_thresh, rpn_bg_iou_thresh,
                                         rpn_batch_size_per_image,
                                         rpn_positive_fraction,
                                         rpn_pre_nms_top_n, rpn_post_nms_top_n,
                                         rpn_nms_thresh)

        # Box parameters
        box_roi_pool = None
        box_head = None
        box_predictor = None
        box_score_thresh = 0.05
        box_nms_thresh = 0.5
        box_detections_per_img = 100
        box_fg_iou_thresh = 0.5
        box_bg_iou_thresh = 0.5
        box_batch_size_per_image = 512
        box_positive_fraction = 0.25
        bbox_reg_weights = None
        num_classes = 101

        if box_roi_pool is None:
            box_roi_pool = MultiScaleRoIAlign(
                featmap_names=['0', '1', '2', '3'],
                output_size=7,
                sampling_ratio=2)

        if box_head is None:
            resolution = box_roi_pool.output_size[0]
            representation_size = 1024
            box_head = TwoMLPHead(256 * resolution**2, representation_size)

        if box_predictor is None:
            representation_size = 1024
            box_predictor = FastRCNNPredictor(representation_size, num_classes)

        self.roi_heads = RoIHeads(
            # Box
            box_roi_pool,
            box_head,
            box_predictor,
            box_fg_iou_thresh,
            box_bg_iou_thresh,
            box_batch_size_per_image,
            box_positive_fraction,
            bbox_reg_weights,
            box_score_thresh,
            box_nms_thresh,
            box_detections_per_img)
Exemplo n.º 19
0
    def __init__(self, backbone, num_classes=None,
                 # transform parameters
                 min_size=800, max_size=1333,
                 image_mean=None, image_std=None,
                 # RPN parameters
                 rpn_anchor_generator=None, rpn_head=None,
                 rpn_pre_nms_top_n_train=2000, rpn_pre_nms_top_n_test=1000,
                 rpn_post_nms_top_n_train=2000, rpn_post_nms_top_n_test=1000,
                 rpn_nms_thresh=0.7,
                 rpn_fg_iou_thresh=0.7, rpn_bg_iou_thresh=0.3,
                 rpn_batch_size_per_image=256, rpn_positive_fraction=0.5,
                 # Box parameters
                 box_roi_pool=None, box_head=None, box_predictor=None,
                 box_score_thresh=0.05, box_nms_thresh=0.5, box_detections_per_img=100,
                 box_fg_iou_thresh=0.5, box_bg_iou_thresh=0.5,
                 box_batch_size_per_image=512, box_positive_fraction=0.25,
                 bbox_reg_weights=None,
                 # Pose parameters
                 pose_roi_pool=None, pose_head=None, pose_predictor=None,
                 # Translation
                 translation_head=None, translation_predictor=None):

        assert isinstance(pose_roi_pool, (MultiScaleRoIAlign, type(None)))

        if num_classes is not None:
            if pose_predictor is not None:
                raise ValueError("num_classes should be None when mask_predictor is specified")

        out_channels = backbone.out_channels
        # Pose
        if pose_roi_pool is None:
            pose_roi_pool = MultiScaleRoIAlign(
                featmap_names=[0, 1, 2, 3],
                output_size=7,
                sampling_ratio=2)

        if pose_head is None:
            resolution = pose_roi_pool.output_size[0]    # 7
            representation_size = 1024
            pose_head = TwoMLPHead(
                out_channels * resolution ** 2,
                representation_size)

        representation_size = 1024
        pose_predictor = PoseRCNNPredictor(representation_size, num_classes)

        # Translation
        if translation_head is None:
            translation_head = MLPFeatureExtractor()

        if translation_predictor is None:
            translation_predictor = MLPCONCATPredictor(representation_size)

        super(PoseRCNN, self).__init__(
            backbone, num_classes,
            # transform parameters
            min_size, max_size,
            image_mean, image_std,
            # RPN-specific parameters
            rpn_anchor_generator, rpn_head,
            rpn_pre_nms_top_n_train, rpn_pre_nms_top_n_test,
            rpn_post_nms_top_n_train, rpn_post_nms_top_n_test,
            rpn_nms_thresh,
            rpn_fg_iou_thresh, rpn_bg_iou_thresh,
            rpn_batch_size_per_image, rpn_positive_fraction,
            # Box parameters
            box_roi_pool, box_head, box_predictor,
            box_score_thresh, box_nms_thresh, box_detections_per_img,
            box_fg_iou_thresh, box_bg_iou_thresh,
            box_batch_size_per_image, box_positive_fraction,
            bbox_reg_weights)

        if box_roi_pool is None:
            box_roi_pool = MultiScaleRoIAlign(
                featmap_names=[0, 1, 2, 3],
                output_size=7,
                sampling_ratio=2)

        if box_head is None:
            resolution = box_roi_pool.output_size[0]
            representation_size = 1024
            box_head = TwoMLPHead(
                out_channels * resolution ** 2,
                representation_size)

        if box_predictor is None:
            representation_size = 1024
            box_predictor = FastRCNNPredictor(
                representation_size,
                num_classes)

        self.roi_heads = MyRoIHeads(
            # Box
            box_roi_pool, box_head, box_predictor,
            box_fg_iou_thresh, box_bg_iou_thresh,
            box_batch_size_per_image, box_positive_fraction,
            bbox_reg_weights,
            box_score_thresh, box_nms_thresh, box_detections_per_img,
            pose_roi_pool=pose_roi_pool,
            pose_head=pose_head,
            pose_predictor=pose_predictor,
            translation_head=translation_head,
            translation_predictor=translation_predictor)
Exemplo n.º 20
0
    def build_model(cls, args, task):
        """Build a new model instance."""
        # make sure that all args are properly defaulted (in case there are any new ones)
        base_architecture(args)

        rpn_anchor_generator = task.rpn_anchor_generator
        rpn_head = task.rpn_head
        box_roi_pool = task.box_roi_pool
        box_predictor = task.box_predictor
        box_head = task.box_head

        # setup backbone
        backbone = resnet_fpn_backbone(args.backbone, args.backbone_pretrained)

        if not hasattr(backbone, "out_channels"):
            raise ValueError(
                "backbone should contain an attribute out_channels "
                "specifying the number of output channels (assumed to be the "
                "same for all the levels)"
            )

        assert isinstance(rpn_anchor_generator, (AnchorGenerator, type(None)))
        assert isinstance(box_roi_pool, (MultiScaleRoIAlign, type(None)))

        if task.num_classes > 0:
            if box_predictor is not None:
                raise ValueError("num_classes should be -1 when box_predictor is specified")
        else:
            if box_predictor is None:
                raise ValueError("num_classes should be > 0 when box_predictor is not specified")

        out_channels = backbone.out_channels

        if rpn_anchor_generator is None:
            anchor_sizes = ((32,), (64,), (128,), (256,), (512,))
            aspect_ratios = ((0.5, 1.0, 2.0),) * len(anchor_sizes)
            rpn_anchor_generator = AnchorGenerator(anchor_sizes, aspect_ratios)
        if rpn_head is None:
            rpn_head = RPNHead(
                out_channels,
                rpn_anchor_generator.num_anchors_per_location()[0],
            )

        rpn_pre_nms_top_n = dict(training=args.rpn_pre_nms_top_n_train, testing=args.rpn_pre_nms_top_n_test)
        rpn_post_nms_top_n = dict(training=args.rpn_post_nms_top_n_train, testing=args.rpn_post_nms_top_n_test)

        rpn = RPN(
            rpn_anchor_generator, rpn_head,
            args.rpn_fg_iou_thresh, args.rpn_bg_iou_thresh,
            args.rpn_batch_size_per_image, args.rpn_positive_fraction,
            rpn_pre_nms_top_n, rpn_post_nms_top_n, args.rpn_nms_thresh,
        )

        if box_roi_pool is None:
            box_roi_pool = MultiScaleRoIAlign(
                featmap_names=[0, 1, 2, 3],
                output_size=7,
                sampling_ratio=2,
            )

        if box_head is None:
            resolution = box_roi_pool.output_size[0]
            representation_size = 1024
            box_head = TwoMLPHead(
                out_channels * resolution ** 2,
                representation_size,
            )

        if box_predictor is None:
            representation_size = 1024
            box_predictor = FastRCNNPredictor(
                representation_size,
                task.num_classes,
            )

        roi_heads = RegionOfInterestHeads(
            # Box
            box_roi_pool, box_head, box_predictor,
            args.box_fg_iou_thresh, args.box_bg_iou_thresh,
            args.box_batch_size_per_image, args.box_positive_fraction,
            args.bbox_reg_weights, args.box_score_thresh,
            args.box_nms_thresh, args.box_detections_per_img,
        )

        if args.image_mean is None:
            args.image_mean = [0.485, 0.456, 0.406]
        if args.image_std is None:
            args.image_std = [0.229, 0.224, 0.225]
        transform = GeneralizedRCNNTransform(
            args.min_size, args.max_size,
            args.image_mean, args.image_std,
        )

        return cls(backbone, rpn, roi_heads, transform)
Exemplo n.º 21
0
	def __init__(self):
		super(FasterRCNN, self).__init__()
		# Define FPN
		self.fpn = resnet_fpn_backbone(backbone_name='resnet101', pretrained=True)
		self.rpn = RPN()

		# transform parameters
		min_size = 800
		max_size = 1333
		image_mean = [0.485, 0.456, 0.406]
		image_std = [0.229, 0.224, 0.225]
		self.transform = GeneralizedRCNNTransform(
			min_size, max_size, image_mean, image_std)

		# Box parameters
		box_roi_pool=None
		box_head=None
		box_predictor=None
		box_score_thresh=0.5
		box_nms_thresh=0.5
		box_detections_per_img=100
		box_fg_iou_thresh=0.5
		box_bg_iou_thresh=0.5
		box_batch_size_per_image=512
		box_positive_fraction=0.25
		bbox_reg_weights=None
		num_classes=101

		if box_roi_pool is None:
			box_roi_pool = MultiScaleRoIAlign(
				featmap_names=['0', '1', '2', '3'],
				output_size=7,
				sampling_ratio=2)

		if box_head is None:
			resolution = box_roi_pool.output_size[0]
			representation_size = 1024
			box_head = TwoMLPHead(
				256 * resolution ** 2,
				representation_size)

		if box_predictor is None:
			representation_size = 1024
			box_predictor = FastRCNNPredictor(
				representation_size,
				num_classes)

		# initialize word vectors
		ds_name =  '/Users/pranoyr/Downloads/GoogleNews-vectors-negative300.bin'
		# ds_name =  '/home/neuroplex/data/GoogleNews-vectors-negative300.bin'
		self.obj_vecs, self.prd_vecs = get_obj_prd_vecs(ds_name, dataset_path)

		self.RelDN = reldn_heads.reldn_head(box_head.fc7.out_features * 3, self.obj_vecs, self.prd_vecs)  # concat of SPO

		self.roi_heads = RoIHeads(
			# Box
			self.RelDN,
			box_roi_pool, box_head, box_predictor,
			box_fg_iou_thresh, box_bg_iou_thresh,
			box_batch_size_per_image, box_positive_fraction,
			bbox_reg_weights,
			box_score_thresh, box_nms_thresh, box_detections_per_img)
def main(config, main_step):
    devices = ['cpu', 'cuda']

    # use pretrained?
    use_pretrained_model = config.use_pretrained_model
    pretrained_model = config.model
    if use_pretrained_model and pretrained_model is None:
        print("Model not provided, training from scratch")
        use_pretrained_model = False
    if not use_pretrained_model and model is not None:
        print("It seems you want to load the weights")
        use_pretrained_model = True
        backbone = False
    #
    if use_pretrained_model:
        model = torch.load(pretrained_model)
    # import arguments from the config file
    start_epoch, model_name, backbone, num_epochs, save_dir, train_data_dir, val_data_dir, imgs_dir, gt_dir, batch_size, device, save_every, lrate = \
        config.start_epoch, config.model_name, config.use_pretrained_resnet_backbone, config.num_epochs, config.save_dir, \
        config.train_data_dir, config.val_data_dir, config.imgs_dir, config.gt_dir, config.batch_size, config.device, config.save_every, config.lrate

    if use_pretrained_model:
        backbone = False

    assert device in devices
    if not save_dir in os.listdir('.'):
        os.mkdir(save_dir)

    if batch_size > 1:
        print("The model was implemented for batch size of one")
    if device == 'cuda' and torch.cuda.is_available():
        device = torch.device('cuda')
    else:
        device = torch.device('cpu')

    print(device)
    torch.manual_seed(time.time())
    ##############################################################################################
    # DATASETS+DATALOADERS
    # Alex: could be added in the config file in the future
    # parameters for the dataset
    dataset_covid_pars_train = {
        'stage': 'train',
        'gt': os.path.join(train_data_dir, gt_dir),
        'data': os.path.join(train_data_dir, imgs_dir)
    }
    datapoint_covid_train = dataset.CovidCTData(**dataset_covid_pars_train)

    dataset_covid_pars_eval = {
        'stage': 'eval',
        'gt': os.path.join(val_data_dir, gt_dir),
        'data': os.path.join(val_data_dir, imgs_dir)
    }
    datapoint_covid_eval = dataset.CovidCTData(**dataset_covid_pars_eval)
    ###############################################################################################
    dataloader_covid_pars_train = {'shuffle': True, 'batch_size': batch_size}
    dataloader_covid_train = data.DataLoader(datapoint_covid_train,
                                             **dataloader_covid_pars_train)
    #
    dataloader_covid_pars_eval = {'shuffle': True, 'batch_size': batch_size}
    dataloader_covid_eval = data.DataLoader(datapoint_covid_eval,
                                            **dataloader_covid_pars_eval)
    ###############################################################################################
    # MASK R-CNN model
    # Alex: these settings could also be added to the config
    maskrcnn_args = {
        'min_size': 512,
        'max_size': 1024,
        'rpn_batch_size_per_image': 1024,
        'rpn_positive_fraction': 0.75,
        'box_positive_fraction': 0.75,
        'box_fg_iou_thresh': 0.75,
        'box_bg_iou_thresh': 0.5,
        'num_classes': None,
        'box_batch_size_per_image': 1024,
        'box_nms_thresh': 0.75,
        'rpn_nms_thresh': 0.75
    }

    # Alex: for Ground glass opacity and consolidatin segmentation
    # many small anchors
    # use all outputs of FPN
    # IMPORTANT!! For the pretrained weights, this determines the size of the anchor layer in RPN!!!!
    # pretrained model must have anchors
    if not use_pretrained_model:
        anchor_generator = AnchorGenerator(sizes=tuple([(2, 4, 8, 16, 32)
                                                        for r in range(5)]),
                                           aspect_ratios=tuple([
                                               (0.1, 0.25, 0.5, 1, 1.5, 2)
                                               for rh in range(5)
                                           ]))
    else:
        sizes = model['anchor_generator'].sizes
        aspect_ratios = model['anchor_generator'].aspect_ratios
        anchor_generator = AnchorGenerator(sizes, aspect_ratios)

    # num_classes:3 (1+2)
    box_head_input_size = 256 * 7 * 7
    box_head = TwoMLPHead(in_channels=box_head_input_size,
                          representation_size=128)
    box_predictor = FastRCNNPredictor(in_channels=128, num_classes=3)
    mask_roi_pool = torchvision.ops.MultiScaleRoIAlign(
        featmap_names=[0, 1, 2, 3], output_size=14, sampling_ratio=2)
    mask_predictor = MaskRCNNPredictor(in_channels=256,
                                       dim_reduced=256,
                                       num_classes=3)

    maskrcnn_args['rpn_anchor_generator'] = anchor_generator
    maskrcnn_args['mask_roi_pool'] = mask_roi_pool
    maskrcnn_args['mask_predictor'] = mask_predictor
    maskrcnn_args['box_predictor'] = box_predictor
    maskrcnn_args['box_head'] = box_head
    # Instantiate the segmentation model
    maskrcnn_model = torchvision.models.detection.maskrcnn_resnet50_fpn(
        pretrained=False,
        pretrained_backbone=backbone,
        progress=True,
        **maskrcnn_args)
    # pretrained?
    if use_pretrained_model:
        maskrcnn_model.load_state_dict(model['model_weights'])
        if model['epoch']:
            start_epoch = int(model['epoch'])
        if model['model_name']:
            model_name = model['model_name']

    # Set to training mode
    print(maskrcnn_model)
    maskrcnn_model.train().to(device)

    optimizer_pars = {'lr': lrate, 'weight_decay': 1e-3}
    optimizer = torch.optim.Adam(list(maskrcnn_model.parameters()),
                                 **optimizer_pars)
    if use_pretrained_model and model['optimizer_state']:
        optimizer.load_state_dict(model['optimizer_state'])

    start_time = time.time()

    for e in range(start_epoch, num_epochs):
        train_loss_epoch = main_step("train", e, dataloader_covid_train,
                                     optimizer, device, maskrcnn_model,
                                     save_every, lrate, model_name, None, None)
        eval_loss_epoch = main_step("eval", e, dataloader_covid_eval,
                                    optimizer, device, maskrcnn_model,
                                    save_every, lrate, model_name,
                                    anchor_generator, save_dir)
        print("Epoch {0:d}: train loss = {1:.3f}, validation loss = {2:.3f}".
              format(e, train_loss_epoch, eval_loss_epoch))
    end_time = time.time()
    print("Training took {0:.1f} seconds".format(end_time - start_time))
Exemplo n.º 23
0
    def __init__(
            self,
            backbone,
            num_classes=None,
            # transform parameters
            min_size=800,
            max_size=1333,
            image_mean=None,
            image_std=None,
            # RPN parameters
            rpn_anchor_generator=None,
            rpn_head=None,
            rpn_pre_nms_top_n_train=2000,
            rpn_pre_nms_top_n_test=1000,
            rpn_post_nms_top_n_train=2000,
            rpn_post_nms_top_n_test=1000,
            rpn_nms_thresh=0.7,
            rpn_fg_iou_thresh=0.7,
            rpn_bg_iou_thresh=0.3,
            rpn_batch_size_per_image=256,
            rpn_positive_fraction=0.5,
            # Box parameters
            box_roi_pool=None,
            box_head=None,
            box_predictor=None,
            box_score_thresh=0.05,
            box_nms_thresh=0.5,
            box_detections_per_img=100,
            box_fg_iou_thresh=0.5,
            box_bg_iou_thresh=0.5,
            box_batch_size_per_image=512,
            box_positive_fraction=0.25,
            bbox_reg_weights=None):

        if not hasattr(backbone, "out_channels"):
            raise ValueError(
                "backbone should contain an attribute out_channels "
                "specifying the number of output channels (assumed to be the "
                "same for all the levels)")

        assert isinstance(rpn_anchor_generator, (AnchorGenerator, type(None)))
        assert isinstance(box_roi_pool, (MultiScaleRoIAlign, type(None)))

        if num_classes is not None:
            if box_predictor is not None:
                raise ValueError(
                    "num_classes should be None when box_predictor is specified"
                )
        else:
            if box_predictor is None:
                raise ValueError(
                    "num_classes should not be None when box_predictor "
                    "is not specified")

        out_channels = backbone.out_channels

        if rpn_anchor_generator is None:
            anchor_sizes = ((32, ), (64, ), (128, ), (256, ), (512, ))
            aspect_ratios = ((0.5, 1.0, 2.0), ) * len(anchor_sizes)
            rpn_anchor_generator = AnchorGenerator(anchor_sizes, aspect_ratios)
        if rpn_head is None:
            rpn_head = RPNHead(
                out_channels,
                rpn_anchor_generator.num_anchors_per_location()[0])

        rpn_pre_nms_top_n = dict(training=rpn_pre_nms_top_n_train,
                                 testing=rpn_pre_nms_top_n_test)
        rpn_post_nms_top_n = dict(training=rpn_post_nms_top_n_train,
                                  testing=rpn_post_nms_top_n_test)

        rpn = RegionProposalNetwork(rpn_anchor_generator, rpn_head,
                                    rpn_fg_iou_thresh, rpn_bg_iou_thresh,
                                    rpn_batch_size_per_image,
                                    rpn_positive_fraction, rpn_pre_nms_top_n,
                                    rpn_post_nms_top_n, rpn_nms_thresh)

        if box_roi_pool is None:
            box_roi_pool = MultiScaleRoIAlign(
                featmap_names=['0', '1', '2', '3'],
                output_size=7,
                sampling_ratio=2)

        if box_head is None:
            resolution = box_roi_pool.output_size[0]
            representation_size = 1024
            box_head = TwoMLPHead(out_channels * resolution**2,
                                  representation_size)

        if box_predictor is None:
            representation_size = 1024
            box_predictor = FastRCNNPredictor(representation_size, num_classes)
        roi_heads = RoIHeads(  # Box
            box_roi_pool, box_head, box_predictor, box_fg_iou_thresh,
            box_bg_iou_thresh, box_batch_size_per_image, box_positive_fraction,
            bbox_reg_weights, box_score_thresh, box_nms_thresh,
            box_detections_per_img)

        if image_mean is None:
            image_mean = [0.485, 0.456, 0.406]
        if image_std is None:
            image_std = [0.229, 0.224, 0.225]
        transform = GeneralizedRCNNTransform(min_size, max_size, image_mean,
                                             image_std)
        self.ssm = False
        super(FasterRCNN, self).__init__(backbone, rpn, roi_heads, transform)
Exemplo n.º 24
0
    def __init__(
            self,
            backbone,
            num_ID,
            num_classes=2,
            version='v1',
            # transform parameters
            min_size=800,
            max_size=1333,
            image_mean=None,
            image_std=None,
            # RPN parameters
            rpn_anchor_generator=None,
            rpn_head=None,
            rpn_pre_nms_top_n_train=2000,
            rpn_pre_nms_top_n_test=1000,
            rpn_post_nms_top_n_train=2000,
            rpn_post_nms_top_n_test=1000,
            rpn_nms_thresh=0.7,
            rpn_fg_iou_thresh=0.5,
            rpn_bg_iou_thresh=0.4,  #FIXME 这两个参数是参照论文Towards Real-Time Multi-Object Tracking
            rpn_batch_size_per_image=256,
            rpn_positive_fraction=0.5,
            # Box parameters
            box_roi_pool=None,
            box_head=None,
            box_predictor=None,
            box_score_thresh=0.05,
            box_nms_thresh=0.5,
            box_detections_per_img=100,
            box_fg_iou_thresh=0.5,
            box_bg_iou_thresh=0.5,
            box_batch_size_per_image=256,
            box_positive_fraction=0.25,
            bbox_reg_weights=None,
            # Embedding parameters ##FIXME 添加的参数
            len_embeddings=128,
            embed_head=None,
            embed_extractor=None):

        if not hasattr(backbone, "out_channels"):
            raise ValueError(
                "backbone should contain an attribute out_channels "
                "specifying the number of output channels (assumed to be the "
                "same for all the levels)")

        assert isinstance(rpn_anchor_generator, (AnchorGenerator, type(None)))
        assert isinstance(box_roi_pool, (MultiScaleRoIAlign, type(None)))

        out_channels = backbone.out_channels

        ##FIXME 改了anchor size,并且只使用宽高比1/3的anchor,参考了Towards Real-Time Multi-Object Tracking
        if rpn_anchor_generator is None:
            anchor_sizes = ((16, 22), (32, 45), (64, 90), (128, 181), (256,
                                                                       362))
            aspect_ratios = ((1 / 3, ), ) * len(anchor_sizes)
            rpn_anchor_generator = AnchorGenerator(anchor_sizes, aspect_ratios)
        if rpn_head is None:
            rpn_head = RPNHead(
                out_channels,
                rpn_anchor_generator.num_anchors_per_location()[0])

        rpn_pre_nms_top_n = dict(training=rpn_pre_nms_top_n_train,
                                 testing=rpn_pre_nms_top_n_test)
        rpn_post_nms_top_n = dict(training=rpn_post_nms_top_n_train,
                                  testing=rpn_post_nms_top_n_test)

        rpn = RegionProposalNetwork(rpn_anchor_generator, rpn_head,
                                    rpn_fg_iou_thresh, rpn_bg_iou_thresh,
                                    rpn_batch_size_per_image,
                                    rpn_positive_fraction, rpn_pre_nms_top_n,
                                    rpn_post_nms_top_n, rpn_nms_thresh)

        if box_roi_pool is None:
            box_roi_pool = MultiScaleRoIAlign(featmap_names=[0, 1, 2, 3],
                                              output_size=11,
                                              sampling_ratio=2)

        if box_head is None:
            resolution = box_roi_pool.output_size[0]
            representation_size = 1024
            box_head = TwoMLPHead(out_channels * resolution**2,
                                  representation_size)

        emb_scale = math.sqrt(2) * math.log(num_ID - 1) if num_ID > 1 else 1

        ## FIXME 现在用的是v1
        if embed_head is None:
            if version == 'v1':
                resolution = box_roi_pool.output_size[0]
                representation_size = 1024
                embed_head = featureHead(out_channels * resolution**2,
                                         representation_size)
            if version == 'v2':
                embed_head = None

        if box_predictor is None:
            representation_size = 1024
            box_predictor = FastRCNNPredictor(representation_size, num_classes)

        if embed_extractor is None:
            representation_size = 1024
            embed_extractor = featureExtractor(representation_size,
                                               len_embeddings, emb_scale)

        roi_heads = JDE_RoIHeads(
            # Box
            box_roi_pool,
            box_head,
            box_predictor,
            box_fg_iou_thresh,
            box_bg_iou_thresh,
            box_batch_size_per_image,
            box_positive_fraction,
            bbox_reg_weights,
            box_score_thresh,
            box_nms_thresh,
            box_detections_per_img,
            len_embeddings,
            num_ID,
            embed_head,
            embed_extractor)
        roi_heads.version = version

        #FIXME 这一部分是照搬faster RCNN代码里面的###################
        if image_mean is None:
            image_mean = [0.485, 0.456, 0.406]
        if image_std is None:
            image_std = [0.229, 0.224, 0.225]
        transform = GeneralizedRCNNTransform(min_size, max_size, image_mean,
                                             image_std)
        ###########################################################

        super(Jde_RCNN, self).__init__(backbone, rpn, roi_heads, transform)
        ## FIXME 跟踪时用的参数,与训练无关
        self.version = version
        self.original_image_sizes = None
        self.preprocessed_images = None
        self.features = None
        self.box_features = None