Exemplo n.º 1
0
    def __init__(self, backbone, num_classes,
        min_size=800, max_size=1333,
        image_mean=None, image_std=None,
        anchor_generator=None, head=None,
        proposal_matcher=None,
        score_thresh=0.05,
        nms_thresh=0.5,
        detections_per_img=300,
        fg_iou_thresh=0.5, bg_iou_thresh=0.4,
        topk_candidates=1000):

        super(RetinaNet, self).__init__()

        if not hasattr(backbone, "out_channels"):
            raise ValueError("backbone should contain an attribute out_channels specifying the number of output channels "
                "assumed be the samefor all the levels")

        self.backbone = backbone

        assert isinstance(anchor_generator, (AnchorGenerator, type(None)))

        if anchor_generator is None:
            anchor_sizes = tuple((x, int(x * 2 ** (1.0 / 3)), int(x * 2 ** (2.0 / 3))) for x in [32, 64, 128, 256, 512])
            aspect_ratios = ((0.5, 1.0, 2.0),) * len(anchor_sizes)
            anchor_generator = AnchorGenerator(anchor_sizes, aspect_ratios)

        self.anchor_generator = anchor_generator

        if head is None:
            head = RetinaNetHead(backbone.out_channels, anchor_generator.num_anchors_per_location()[0], num_classes)
        self.head = head

        if proposal_matcher is None:
            proposal_matcher = det_utils.Matcher(
                fg_iou_thresh,
                bg_iou_thresh,
                allow_low_quality_matches = True,
            )
        self.proposal_matcher = proposal_matcher

        self.box_coder = det_utils.BoxCoder(weights=(1.0, 1.0, 1.0, 1.0))

        if image_mean is None:
            image_mean = [0.485, 0.456, 0.406]
        if image_std is None:
            image_std = [0.229, 0.224, 0.225]
        self.transform = GeneralizedRCNNTransform(min_size, max_size, image_mean, image_std)

        self.score_thresh = score_thresh
        self.nms_thresh = nms_thresh
        self.detections_per_img = detections_per_img
        self.topk_candidates = topk_candidates

        self.has_warned = False
Exemplo n.º 2
0
def get_model():
    # load a model pre-trained pre-trained on COCO
    anchor_sizes = ((32, ), (64, ), (128, ), (256, ), (512, ))
    aspect_ratios = ((0.5, 1.0, 2.0), ) * len(anchor_sizes)
    rpn_anchor_generator = AnchorGenerator(anchor_sizes, aspect_ratios)

    model = torchvision.models.detection.fasterrcnn_resnet50_fpn(
        min_size=500,
        max_size=833,
        pretrained=True,
        progress=True,
        box_detections_per_img=58,
        image_mean=[0.17045, 0.1338, 0.2242],
        image_std=[0.17390, 0.1502, 0.13195],
        rpn_anchor_generator=rpn_anchor_generator)

    # replace the classifier with a new one, that has
    # num_classes which is user-defined
    num_classes = len(OBJ_TYPES) + 1  # 12 class + background
    # get number of input features for the classifier
    in_features = model.roi_heads.box_predictor.cls_score.in_features
    # replace the pre-trained head with a new one
    model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes)

    model.to(device)

    return model
Exemplo n.º 3
0
 def test_incorrect_anchors(self):
     incorrect_sizes = ((2, 4, 8), (32, 8), )
     incorrect_aspects = (0.5, 1.0)
     anc = AnchorGenerator(incorrect_sizes, incorrect_aspects)
     image1 = torch.randn(3, 800, 800)
     image_list = ImageList(image1, [(800, 800)])
     feature_maps = [torch.randn(1, 50)]
     pytest.raises(ValueError, anc, image_list, feature_maps)
Exemplo n.º 4
0
def retinanet_mobilenet(pretrained=False,
                        progress=True,
                        num_classes=91,
                        pretrained_backbone=False,
                        trainable_backbone_layers=None,
                        min_size=320,
                        max_size=640,
                        **kwargs):
    """
    Constructs a RetinaNet model with a MobileNetV3-Large backbone. It works similarly
    to RetinaNet with ResNet-50-FPN backbone. See `retinanet_resnet50_fpn` for more details.
    Example::
        >>> model = torchvision.models.detection.retinanet_mobilenet_v3_large(pretrained=True)
        >>> model.eval()
        >>> x = [torch.rand(3, 300, 400), torch.rand(3, 500, 400)]
        >>> predictions = model(x)
    Args:
        pretrained (bool): If True, returns a model pre-trained on COCO train2017
        progress (bool): If True, displays a progress bar of the download to stderr
        num_classes (int): number of output classes of the model (including the background)
        pretrained_backbone (bool): If True, returns a model with backbone pre-trained on Imagenet
        trainable_backbone_layers (int): number of trainable (not frozen) resnet layers starting from final block.
            Valid values are between 0 and 6, with 6 meaning all backbone layers are trainable.
        min_size (int): minimum size of the image to be rescaled before feeding it to the backbone
        max_size (int): maximum size of the image to be rescaled before feeding it to the backbone
    """
    # check default parameters and by default set it to 6 if possible
    trainable_backbone_layers = _validate_trainable_layers(
        pretrained or pretrained_backbone, trainable_backbone_layers, 6, 6)

    if pretrained:
        pretrained_backbone = False
    backbone = mobilenet_backbone("retinanet_mobilenet_v3_large",
                                  pretrained_backbone,
                                  trainable_layers=trainable_backbone_layers)

    anchor_sizes = ((
        16,
        32,
        64,
        128,
        256,
    ), )
    aspect_ratios = ((0.5, 1.0, 2.0), )

    model = RetinaNet(backbone,
                      num_classes,
                      anchor_generator=AnchorGenerator(anchor_sizes,
                                                       aspect_ratios),
                      min_size=min_size,
                      max_size=max_size,
                      **kwargs)
    if pretrained:
        state_dict = load_state_dict_from_url(
            model_urls['retinanet_mobilenet_v3_large_coco'], progress=progress)
        model.load_state_dict(state_dict)
    return model
Exemplo n.º 5
0
    def __init__(self, args):
        super().__init__()
        self.args = args
        anchor_sizes = tuple((x, int(x * 2**(1.0 / 3)), int(x * 2**(2.0 / 3)))
                             for x in [32, 64, 128, 256, 512])
        aspect_ratios = ((0.5, 1.0, 2.0), ) * len(anchor_sizes)
        anchor_generator = AnchorGenerator(anchor_sizes, aspect_ratios)
        self.anchor_generator = anchor_generator
        self.backbone = self.get_backbone(True)
        self.model = RetinaNetEmbedding(self.backbone, num_classes=2378)
        self.save_hyperparameters()
        self.teacher_model = self.get_teacher(args)
        self.tm_full = self.teacher_model.get_model()
        self.data_dir = args.data_dir

        if self.args.loss == 'CrossEntropy':
            self.loss = torch.nn.CrossEntropyLoss()
            self.loss_requires_classifier = True
Exemplo n.º 6
0
 def __init__(self, args):
     super().__init__()
     self.args = args
     anchor_sizes = tuple((x, int(x * 2**(1.0 / 3)), int(x * 2**(2.0 / 3)))
                          for x in [32, 64, 128, 256, 512])
     aspect_ratios = ((0.5, 1.0, 2.0), ) * len(anchor_sizes)
     anchor_generator = AnchorGenerator(anchor_sizes, aspect_ratios)
     self.anchor_generator = anchor_generator
     self.backbone = self.backbone1(False)
     self.head = HeadJDE(
         self.backbone.out_channels,
         self.anchor_generator.num_anchors_per_location()[0], 195,
         self.args)
     self.model = RetinaNetEmbedding(self.backbone,
                                     num_classes=195,
                                     head=self.head)
     self.save_hyperparameters()
     self.teacher_model = self.teacher(args)
     self.tm_full = self.teacher_model.get_model()
     self.tm_extractor = self.teacher_model.get_extractor()
Exemplo n.º 7
0
def _fasterrcnn_mobilenet_v3_large_fpn(weights_name,
                                       pretrained=False,
                                       progress=True,
                                       num_classes=91,
                                       pretrained_backbone=True,
                                       trainable_backbone_layers=None,
                                       **kwargs):
    trainable_backbone_layers = _validate_trainable_layers(
        pretrained or pretrained_backbone, trainable_backbone_layers, 6, 3)

    if pretrained:
        pretrained_backbone = False
    backbone = mobilenet_backbone("mobilenet_v3_large",
                                  pretrained_backbone,
                                  True,
                                  trainable_layers=trainable_backbone_layers)

    anchor_sizes = ((
        32,
        64,
        128,
        256,
        512,
    ), ) * 3
    aspect_ratios = ((0.5, 1.0, 2.0), ) * len(anchor_sizes)

    model = FasterRCNN(backbone,
                       num_classes,
                       rpn_anchor_generator=AnchorGenerator(
                           anchor_sizes, aspect_ratios),
                       **kwargs)
    if pretrained:
        if model_urls.get(weights_name, None) is None:
            raise ValueError(
                "No checkpoint is available for model {}".format(weights_name))
        state_dict = load_state_dict_from_url(model_urls[weights_name],
                                              progress=progress)
        model.load_state_dict(state_dict)
    return model
def keypointrcnn_mobilenet(backbone_name, path, device):
    if backbone_name == "mobilenet_v3_large":
        backbone = torchvision.models.mobilenet_v3_large(
            pretrained=True).features
        backbone.out_channels = 960
    elif backbone_name == "mobilenet_v3_small":
        backbone = torchvision.models.mobilenet_v3_small(
            pretrained=True).features
        backbone.out_channels = 576
    elif backbone_name == "mobilenet_v2":
        backbone = torchvision.models.mobilenet_v2(pretrained=True).features
        backbone.out_channels = 1280
    else:
        raise Exception('Bad backbone name')

    anchor_generator = AnchorGenerator(sizes=((16, 32, 64, 128, 256), ),
                                       aspect_ratios=((0.5, 1.0, 2.0), ))

    roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=['0'],
                                                    output_size=7,
                                                    sampling_ratio=2)
    keypoint_roi_pooler = torchvision.ops.MultiScaleRoIAlign(
        featmap_names=['0'], output_size=14, sampling_ratio=2)
    model_keypoints = KeypointRCNN(backbone,
                                   num_classes=6,
                                   num_keypoints=20,
                                   rpn_anchor_generator=anchor_generator,
                                   box_roi_pool=roi_pooler,
                                   keypoint_roi_pool=keypoint_roi_pooler)

    model_keypoints = model_keypoints.to(device)

    model_keypoints.load_state_dict(torch.load(path, map_location=device))
    model_keypoints.eval()

    return model_keypoints
    def _init_test_anchor_generator(self):
        anchor_sizes = ((10, ), )
        aspect_ratios = ((1, ), )
        anchor_generator = AnchorGenerator(anchor_sizes, aspect_ratios)

        return anchor_generator
Exemplo n.º 10
0
    def __init__(
            self,
            backbone,
            num_classes=None,
            # transform parameters
            min_size=800,
            max_size=1333,
            image_mean=None,
            image_std=None,
            # RPN parameters
            rpn_anchor_generator=None,
            rpn_head=None,
            rpn_pre_nms_top_n_train=2000,
            rpn_pre_nms_top_n_test=1000,
            rpn_post_nms_top_n_train=2000,
            rpn_post_nms_top_n_test=1000,
            rpn_nms_thresh=0.7,
            rpn_fg_iou_thresh=0.7,
            rpn_bg_iou_thresh=0.3,
            rpn_batch_size_per_image=256,
            rpn_positive_fraction=0.5,
            # Box parameters
            box_roi_pool=None,
            box_head=None,
            box_predictor=None,
            box_score_thresh=0.05,
            box_nms_thresh=0.5,
            box_detections_per_img=100,
            box_fg_iou_thresh=0.5,
            box_bg_iou_thresh=0.5,
            box_batch_size_per_image=512,
            box_positive_fraction=0.25,
            bbox_reg_weights=None):

        if not hasattr(backbone, "out_channels"):
            raise ValueError(
                "backbone should contain an attribute out_channels "
                "specifying the number of output channels (assumed to be the "
                "same for all the levels)")

        assert isinstance(rpn_anchor_generator, (AnchorGenerator, type(None)))
        assert isinstance(box_roi_pool, (MultiScaleRoIAlign, type(None)))

        if num_classes is not None:
            if box_predictor is not None:
                raise ValueError(
                    "num_classes should be None when box_predictor is specified"
                )
        else:
            if box_predictor is None:
                raise ValueError(
                    "num_classes should not be None when box_predictor "
                    "is not specified")

        out_channels = backbone.out_channels

        if rpn_anchor_generator is None:
            anchor_sizes = ((32, ), (64, ), (128, ), (256, ), (512, ))
            aspect_ratios = ((0.5, 1.0, 2.0), ) * len(anchor_sizes)
            rpn_anchor_generator = AnchorGenerator(anchor_sizes, aspect_ratios)
        if rpn_head is None:
            rpn_head = RPNHead(
                out_channels,
                rpn_anchor_generator.num_anchors_per_location()[0])

        rpn_pre_nms_top_n = dict(training=rpn_pre_nms_top_n_train,
                                 testing=rpn_pre_nms_top_n_test)
        rpn_post_nms_top_n = dict(training=rpn_post_nms_top_n_train,
                                  testing=rpn_post_nms_top_n_test)

        rpn = RegionProposalNetwork(rpn_anchor_generator, rpn_head,
                                    rpn_fg_iou_thresh, rpn_bg_iou_thresh,
                                    rpn_batch_size_per_image,
                                    rpn_positive_fraction, rpn_pre_nms_top_n,
                                    rpn_post_nms_top_n, rpn_nms_thresh)

        if box_roi_pool is None:
            box_roi_pool = MultiScaleRoIAlign(
                featmap_names=['0', '1', '2', '3'],
                output_size=7,
                sampling_ratio=2)

        if box_head is None:
            resolution = box_roi_pool.output_size[0]
            representation_size = 1024
            box_head = TwoMLPHead(out_channels * resolution**2,
                                  representation_size)

        if box_predictor is None:
            representation_size = 1024
            box_predictor = FastRCNNPredictor(representation_size, num_classes)

        roi_heads = RoIHeads(
            # Box
            box_roi_pool,
            box_head,
            box_predictor,
            box_fg_iou_thresh,
            box_bg_iou_thresh,
            box_batch_size_per_image,
            box_positive_fraction,
            bbox_reg_weights,
            box_score_thresh,
            box_nms_thresh,
            box_detections_per_img)

        if image_mean is None:
            image_mean = [0.485, 0.456, 0.406]
        if image_std is None:
            image_std = [0.229, 0.224, 0.225]
        transform = GeneralizedRCNNTransform(min_size, max_size, image_mean,
                                             image_std)

        super(FasterRCNN, self).__init__(backbone, rpn, roi_heads, transform)
rpn_fg_iou_thresh=0.7
rpn_bg_iou_thresh=0.3
rpn_batch_size_per_image=256
rpn_positive_fraction=0.5

rpn_pre_nms_top_n = dict(training=rpn_pre_nms_top_n_train, testing=rpn_pre_nms_top_n_test)
rpn_post_nms_top_n = dict(training=rpn_post_nms_top_n_train, testing=rpn_post_nms_top_n_test)




anchor_sizes = ((32,), (64,), (128,), (256,), (512,))
aspect_ratios = ((0.5, 1.0, 2.0),) * len(anchor_sizes)


rpn_anchor_generator = AnchorGenerator(anchor_sizes, aspect_ratios)
rpn_head = RPNHead(512, rpn_anchor_generator.num_anchors_per_location()[0])

rpn = RegionProposalNetwork(
            rpn_anchor_generator, rpn_head,
            rpn_fg_iou_thresh, rpn_bg_iou_thresh,
            rpn_batch_size_per_image, rpn_positive_fraction,
            rpn_pre_nms_top_n, rpn_post_nms_top_n, rpn_nms_thresh)


trainable_backbone_layers = None
pretrained = True
trainable_backbone_layers = _validate_resnet_trainable_layers(pretrained or pretrained_backbone, trainable_backbone_layers)
if pretrained:
    pretrained_backbone = False
Exemplo n.º 12
0
    def __init__(
            self,
            num_classes=2,
            # transform parameters
            backbone_name='resnet50',
            min_size=256,
            max_size=512,
            image_mean=None,
            image_std=None,
            # RPN parameters
            rpn_anchor_generator=None,
            rpn_head=None,
            rpn_pre_nms_top_n_train=2000,
            rpn_pre_nms_top_n_test=1000,
            rpn_post_nms_top_n_train=2000,
            rpn_post_nms_top_n_test=1000,
            rpn_nms_thresh=0.7,
            rpn_fg_iou_thresh=0.7,
            rpn_bg_iou_thresh=0.3,
            rpn_batch_size_per_image=256,
            rpn_positive_fraction=0.5,
            rpn_score_thresh=0.0,
            # Box parameters
            box_roi_pool=None,
            box_head=None,
            box_predictor=None,
            box_score_thresh=0.05,
            box_nms_thresh=0.5,
            box_detections_per_img=100,
            box_fg_iou_thresh=0.5,
            box_bg_iou_thresh=0.5,
            box_batch_size_per_image=512,
            box_positive_fraction=0.25,
            bbox_reg_weights=None,
            # Ellipse regressor
            ellipse_roi_pool=None,
            ellipse_head=None,
            ellipse_predictor=None,
            ellipse_loss_metric="gaussian-angle"):

        backbone = resnet_fpn_backbone(backbone_name,
                                       pretrained=True,
                                       trainable_layers=5)

        # Input image is grayscale -> in_channels = 1 instead of 3 (COCO)
        backbone.body.conv1 = Conv2d(1,
                                     64,
                                     kernel_size=(7, 7),
                                     stride=(2, 2),
                                     padding=(3, 3),
                                     bias=False)

        if not hasattr(backbone, "out_channels"):
            raise ValueError(
                "backbone should contain an attribute out_channels "
                "specifying the number of output channels (assumed to be the "
                "same for all the levels)")

        assert isinstance(rpn_anchor_generator, (AnchorGenerator, type(None)))
        assert isinstance(box_roi_pool, (MultiScaleRoIAlign, type(None)))

        if num_classes is not None:
            if box_predictor is not None:
                raise ValueError(
                    "num_classes should be None when box_predictor is specified"
                )
        else:
            if box_predictor is None:
                raise ValueError(
                    "num_classes should not be None when box_predictor "
                    "is not specified")

        out_channels = backbone.out_channels

        if rpn_anchor_generator is None:
            anchor_sizes = ((32, ), (64, ), (128, ), (256, ), (512, ))
            aspect_ratios = ((0.5, 1.0, 2.0), ) * len(anchor_sizes)
            rpn_anchor_generator = AnchorGenerator(anchor_sizes, aspect_ratios)
        if rpn_head is None:
            rpn_head = RPNHead(
                out_channels,
                rpn_anchor_generator.num_anchors_per_location()[0])

        rpn_pre_nms_top_n = dict(training=rpn_pre_nms_top_n_train,
                                 testing=rpn_pre_nms_top_n_test)
        rpn_post_nms_top_n = dict(training=rpn_post_nms_top_n_train,
                                  testing=rpn_post_nms_top_n_test)

        rpn = RegionProposalNetwork(rpn_anchor_generator,
                                    rpn_head,
                                    rpn_fg_iou_thresh,
                                    rpn_bg_iou_thresh,
                                    rpn_batch_size_per_image,
                                    rpn_positive_fraction,
                                    rpn_pre_nms_top_n,
                                    rpn_post_nms_top_n,
                                    rpn_nms_thresh,
                                    score_thresh=rpn_score_thresh)

        if box_roi_pool is None:
            box_roi_pool = MultiScaleRoIAlign(
                featmap_names=['0', '1', '2', '3'],
                output_size=7,
                sampling_ratio=2)

        if box_head is None:
            resolution = box_roi_pool.output_size[0]
            representation_size = 1024
            box_head = TwoMLPHead(out_channels * resolution**2,
                                  representation_size)

        if box_predictor is None:
            representation_size = 1024
            box_predictor = FastRCNNPredictor(representation_size, num_classes)

        if ellipse_roi_pool is None:
            ellipse_roi_pool = MultiScaleRoIAlign(
                featmap_names=['0', '1', '2', '3'],
                output_size=7,
                sampling_ratio=2)

        if ellipse_head is None:
            resolution = box_roi_pool.output_size[0]
            representation_size = 1024
            ellipse_head = TwoMLPHead(out_channels * resolution**2,
                                      representation_size)

        if ellipse_predictor is None:
            representation_size = 1024
            ellipse_predictor = EllipseRegressor(representation_size,
                                                 num_classes)

        roi_heads = EllipseRoIHeads(
            # Box
            box_roi_pool,
            box_head,
            box_predictor,
            box_fg_iou_thresh,
            box_bg_iou_thresh,
            box_batch_size_per_image,
            box_positive_fraction,
            bbox_reg_weights,
            box_score_thresh,
            box_nms_thresh,
            box_detections_per_img,
            # Ellipse
            ellipse_roi_pool=ellipse_roi_pool,
            ellipse_head=ellipse_head,
            ellipse_predictor=ellipse_predictor,
            ellipse_loss_metric=ellipse_loss_metric)

        if image_mean is None:
            image_mean = [0.156]
        if image_std is None:
            image_std = [0.272]
        transform = GeneralizedRCNNTransform(min_size, max_size, image_mean,
                                             image_std)

        super().__init__(backbone, rpn, roi_heads, transform)
Exemplo n.º 13
0
def get_model(config):

    model = None
    # input_size = 0

    if config.model_name == "resnet":
        """ Resnet34
        """
        model = models.resnet18(pretrained=config.use_pretrained)
        set_parameter_requires_grad(model, config.freeze)

        n_features = model.fc.in_features
        model.fc = nn.Linear(n_features, config.n_classes)
        # input_size = 224
    elif config.model_name == "alexnet":
        """ Alexnet
        """
        model = models.alexnet(pretrained=config.use_pretrained)
        set_parameter_requires_grad(model, config.freeze)

        n_features = model.classifier[-1].in_features
        model.classifier[-1] = nn.Linear(n_features, config.n_classes)
        # input_size = 224
    elif config.model_name == "vgg":
        """ VGG16_bn
        """
        model = models.vgg16_bn(pretrained=config.use_pretrained)
        set_parameter_requires_grad(model, config.freeze)

        n_features = model.classifier[-1].in_features
        model.classifier[-1] = nn.Linear(n_features, config.n_classes)
        # input_size = 224
    elif config.model_name == "densenet":
        """ Densenet
        """
        model = models.densenet121(pretrained=config.use_pretrained)
        set_parameter_requires_grad(model, config.freeze)

        n_features = model.classifier.in_features
        model.classifier = nn.Linear(n_features, config.n_classes)
        # input_size = 224
        
    elif config.model_name == 'mobilenet':
        model = models.mobilenet_v2(pretrained = config.use_pretrained)
        set_parameter_requires_grad(model, config.freeze)

        n_features = model.classifier[-1].in_features
        model.classifier[-1] = nn.Linear(n_features, config.n_classes)

        n_features

    elif config.model_name == "KeypointRCNN":
      backbone = models.mobilenet_v2(pretrained=True).features
      backbone.out_channels = 1280
      roi_pooler = MultiScaleRoIAlign(
          featmap_names=['0'],
          output_size=7,
          sampling_ratio=2
      )
      anchor_generator = AnchorGenerator(sizes=((32, 64, 128, 256, 512),),
                                    aspect_ratios=((0.5, 1.0, 2.0),))
      keypoint_roi_pooler = MultiScaleRoIAlign(
          featmap_names=['0'],
          output_size=14,
          sampling_ratio=2
      )

      model = KeypointRCNN(
          backbone, 
          num_classes=2,
          num_keypoints=24,
          box_roi_pool=roi_pooler,
          keypoint_roi_pool=keypoint_roi_pooler,rpn_anchor_generator=anchor_generator
      )

    elif config.model_name == "keypointrcnn_resnet50":
      model = models.detection.keypointrcnn_resnet50_fpn(pretrained=config.use_pretrained, progress=False)
      model.roi_heads.keypoint_predictor.kps_score_lowres = nn.ConvTranspose2d(512, 24, kernel_size=(4, 4), stride=(2, 2), padding=(1, 1))
    
    elif config.model_name == "keypointrcnn_resnet101":
      pretrained_backbone = True
      pretrained = False
      trainable_backbone_layers = None
      trainable_backbone_layers = _validate_trainable_layers(
              pretrained or pretrained_backbone, trainable_backbone_layers, 5, 3)

      backbone = resnet_fpn_backbone('resnet101', pretrained_backbone, trainable_layers=trainable_backbone_layers)

      model = KeypointRCNN(
          backbone, 
          num_classes=2,
          num_keypoints=24)

    else:
        raise NotImplementedError('You need to specify model name.')

    return model
Exemplo n.º 14
0
def get_fasterrcnn_model(arch_str,
                         num_classes,
                         pretrained=True,
                         pretrained_backbone=True,
                         trainable_layers=5,
                         **kwargs):
    """Creates FasterRCNN model with resnet backbone"""

    #if pretrained == True: pretrained_backbone=False

    backbone = resnet_fpn_backbone(arch_str,
                                   pretrained=pretrained_backbone,
                                   trainable_layers=trainable_layers)

    anchor_sizes = (
        (16, ),
        (32, ),
        (64, ),
        (128, ),
        (256, ),
    )
    aspect_ratios = ((0.5, 1.0, 2.0), ) * len(anchor_sizes)

    anchor_generator = AnchorGenerator(sizes=anchor_sizes,
                                       aspect_ratios=aspect_ratios)

    model = FasterRCNN(
        backbone,
        num_classes=num_classes,
        rpn_anchor_generator=anchor_generator,
        box_fg_iou_thresh=0.5,
        box_bg_iou_thresh=0.5,
        image_mean=[0.0, 0.0, 0.0],  # already normalized by fastai
        image_std=[1.0, 1.0, 1.0],
        #min_size = 1,
        #box_score_thresh=0.6,
        **kwargs)

    if pretrained:
        try:
            pretrained_dict = load_state_dict_from_url(
                _model_urls['fasterrcnn_' + arch_str + '_fpn_coco'],
                progress=True)
            model_dict = model.state_dict()

            pretrained_dict = {
                k: v
                for k, v in pretrained_dict.items() if (k in model_dict) and (
                    model_dict[k].shape == pretrained_dict[k].shape)
            }

            model_dict.update(pretrained_dict)
            model.load_state_dict(model_dict)
            #overwrite_eps(model, 0.0)
            for module in model.modules():
                if isinstance(module, FrozenBatchNorm2d):
                    module.eps = 0.0

        except Exception as e:
            #print(e)
            print("No pretrained coco model found for fasterrcnn_" + arch_str)
            print("This does not affect the backbone.")

    return model.train()
Exemplo n.º 15
0
def get_fasterrcnn_model_swin(arch_str,
                              num_classes,
                              pretrained=False,
                              pretrained_backbone=True,
                              **kwargs):
    """Creates FasterRCNN model with swin transformer backbone"""
    anchor_sizes = (
        (32, ),
        (64, ),
        (128, ),
        (256, ),
    )
    aspect_ratios = ((0.5, 1.0, 2.0), ) * len(anchor_sizes)
    anchor_generator = AnchorGenerator(sizes=anchor_sizes,
                                       aspect_ratios=aspect_ratios)
    #roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=['0','1','2','3'],
    #                                                output_size=7,
    #                                                sampling_ratio=2)

    img_size = 224 if arch_str in "swin_tiny swin_small".split() else 384
    window_size = 7 if arch_str in "swin_tiny swin_small".split() else 12
    depths = [2, 2, 6, 2] if arch_str == "swin_tiny" else [2, 2, 18, 2]

    scale_factors = {
        "swin_tiny": 1.0,
        "swin_small": 1.5,
        "swin_base": 2.0,
        "swin_large": 2.0
    }
    sf = scale_factors[arch_str]
    embed_dim = int(96 * sf)
    fpn_cin = [int(96 * sf * 2**i) for i in range(4)]
    #fpn_cin = [int(i*sf) for i in [96, 192, 384, 768]]

    backbone = SwinTransformerFPN(img_size=img_size,
                                  window_size=window_size,
                                  embed_dim=embed_dim,
                                  depths=depths,
                                  fpn_cin=fpn_cin,
                                  fpn_cout=256)

    if pretrained_backbone:
        sd = load_state_dict_from_url(_model_urls[f'{arch_str}_{img_size}'],
                                      progress=True,
                                      map_location=default_device())['model']
        sd_model = backbone.state_dict()
        sd = {k: v for k, v in sd.items() if k in sd_model.keys()}
        sd_model.update(sd)
        backbone.load_state_dict(sd_model)

    model = FasterRCNN(
        backbone,
        num_classes=num_classes,
        rpn_anchor_generator=anchor_generator,
        #box_roi_pool=roi_pooler,
        box_fg_iou_thresh=0.5,
        box_bg_iou_thresh=0.5,
        image_mean=[0.0, 0.0, 0.0],  # already normalized by fastai
        image_std=[1.0, 1.0, 1.0],
        #min_size=IMG_SIZE,
        #max_size=IMG_SIZE,
        **kwargs)

    return model.train()
Exemplo n.º 16
0
    def _init_test_anchor_generator(self):
        anchor_sizes = tuple((x, ) for x in [32, 64, 128])
        aspect_ratios = ((0.5, 1.0, 2.0), ) * len(anchor_sizes)
        anchor_generator = AnchorGenerator(anchor_sizes, aspect_ratios)

        return anchor_generator
Exemplo n.º 17
0
import torch
import torchvision
from torchvision.models.detection import MaskRCNN
from torchvision.models.detection.anchor_utils import AnchorGenerator

backbone = torchvision.models.mobilenet_v2(pretrained=True).features
backbone.out_channels = 1280
      
anchor_generator = AnchorGenerator(sizes=((32, 64, 128, 256, 512),),aspect_ratios=((0.5, 1.0, 2.0),))
roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=['0'], output_size=7,sampling_ratio=2)
mask_roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=['0'], output_size=14,sampling_ratio=2)
MaskRCNN_mobile_model = MaskRCNN(backbone,num_classes=2,rpn_anchor_generator=anchor_generator, box_roi_pool=roi_pooler,mask_roi_pool=mask_roi_pooler)

if __name__  == '__main__':
    model = MaskRCNN_mobile_model
    model.eval()
    x = [torch.rand(3, 300, 400), torch.rand(3, 500, 400)]
    predictions = model(x)