def custom_fasterrcnn_resnet_fpn(backbone, pretrained=True, progress=True, num_classes=91, pretrained_backbone=True, trainable_backbone_layers=3, **kwargs): backbone_name = backbone['name'] backbone_params_config = backbone['params'] assert 0 <= trainable_backbone_layers <= 5 # dont freeze any layers if pretrained model or backbone is not used if not (pretrained or pretrained_backbone): backbone_params_config['trainable_backbone_layers'] = 5 if pretrained: # no need to download the backbone if pretrained is set backbone_params_config['pretrained'] = False backbone_model = custom_resnet_fpn_backbone(backbone_name, backbone_params_config) num_feature_maps = len(backbone_model.body.return_layers) box_roi_pool = None if num_feature_maps == 4 \ else MultiScaleRoIAlign(featmap_names=[str(i) for i in range(num_feature_maps)], output_size=7, sampling_ratio=2) model = FasterRCNN(backbone_model, num_classes, box_roi_pool=box_roi_pool, **kwargs) if pretrained and backbone_name.endswith('resnet50'): state_dict = load_state_dict_from_url( fasterrcnn_model_urls['fasterrcnn_resnet50_fpn_coco'], progress=progress) model.load_state_dict(state_dict, strict=False) return model
def custom_fasterrcnn_resnet_fpn(backbone, pretrained=True, progress=True, num_classes=91, pretrained_backbone=True, trainable_backbone_layers=3, **kwargs): backbone_name = backbone['name'] backbone_params_config = backbone['params'] assert 0 <= trainable_backbone_layers <= 5 # dont freeze any layers if pretrained model or backbone is not used if not (pretrained or pretrained_backbone): backbone_params_config['trainable_backbone_layers'] = 5 if pretrained: # no need to download the backbone if pretrained is set backbone_params_config['pretrained'] = False backbone_model = custom_resnet_fpn_backbone(backbone_name, backbone_params_config) model = FasterRCNN(backbone_model, num_classes, **kwargs) if pretrained and backbone_name.endswith('resnet50'): state_dict = load_state_dict_from_url( fasterrcnn_model_urls['fasterrcnn_resnet50_fpn_coco'], progress=progress) model.load_state_dict(state_dict, strict=False) return model
def test(): backbone = torchvision.models.mobilenet_v2(pretrained=False).features backbone.out_channels = 1280 anchor_generator = AnchorGenerator(sizes=((32, 64, 128, 256, 512), ), aspect_ratios=((0.5, 1.0, 2.0), )) # roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=[0], output_size=7, sampling_ratio=2) # model = FasterRCNN(backbone, num_classes=2, rpn_anchor_generator=anchor_generator, box_roi_pool=roi_pooler) model = FasterRCNN(backbone, num_classes=2, rpn_anchor_generator=anchor_generator) model.eval().cuda() x = [ torch.rand(3, 300, 400).float().cuda(), torch.rand(3, 500, 400).float().cuda() ] predictions = model(x) print(len(predictions)) print(predictions[0].keys())
def get_mobilenet_model(num_classes): """ Seguir ejemplo en https://github.com/pytorch/vision/blob/master/torchvision/models/detection/faster_rcnn.py """ backbone = torchvision.models.mobilenet_v2(pretrained=True).features backbone.out_channels = 1280 anchor_generator = AnchorGenerator(sizes=((32, 64, 128), ), aspect_ratios=((0.5, 1.0, 2.0), )) roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=[0], output_size=7, sampling_ratio=2) # stats for test images #Original Width avg 172.58 std_dev 122.58 min 31 max 1083 #Original Height avg 105.00 std_dev 52.75 min 13 max 516 model = FasterRCNN(backbone, num_classes=num_classes, min_size=100, max_size=300, rpn_anchor_generator=anchor_generator, box_roi_pool=roi_pooler) return model
def model(num_classes: int, backbone: Optional[nn.Module] = None, remove_internal_transforms: bool = True, **faster_rcnn_kwargs) -> nn.Module: """ FasterRCNN model given by torchvision Args: num_classes (int): Number of classes. backbone (nn.Module): Backbone model to use. Defaults to a resnet50_fpn model. Return: nn.Module """ if backbone is None: model = fasterrcnn_resnet50_fpn(pretrained=True, **faster_rcnn_kwargs) in_features = model.roi_heads.box_predictor.cls_score.in_features model.roi_heads.box_predictor = FastRCNNPredictor( in_features, num_classes) backbone_param_groups = resnet_fpn.param_groups(model.backbone) else: model = FasterRCNN(backbone, num_classes=num_classes, **faster_rcnn_kwargs) backbone_param_groups = backbone.param_groups() patch_param_groups(model=model, backbone_param_groups=backbone_param_groups) if remove_internal_transforms: remove_internal_model_transforms(model) return model
def get_faster_rcnn( backbone: torch.nn.Module, anchor_generator: AnchorGenerator, roi_pooler: MultiScaleRoIAlign, num_classes: int, image_mean: List[float] = [0.485, 0.456, 0.406], image_std: List[float] = [0.229, 0.224, 0.225], min_size: int = 512, max_size: int = 1024, **kwargs, ) -> FasterRCNN: """Returns the Faster-RCNN model. Default normalization: ImageNet""" model = FasterRCNN( backbone=backbone, rpn_anchor_generator=anchor_generator, box_roi_pool=roi_pooler, num_classes=num_classes, image_mean=image_mean, # ImageNet image_std=image_std, # ImageNet min_size=min_size, max_size=max_size, **kwargs, ) model.num_classes = num_classes model.image_mean = image_mean model.image_std = image_std model.min_size = min_size model.max_size = max_size return model
def faster_rcnn_mobile(self_pretrained, num_classes): if not self_pretrained: print('load mobilenet_v2 backbone pretrained on ImageNet') pretrained = False if self_pretrained else True # pretrain on ImageNet backbone = torchvision.models.mobilenet_v2(pretrained=pretrained).features backbone.out_channels = 1280 # 考虑更多长条形 anchor_generator = AnchorGenerator( sizes=((32, 64, 128, 256, 512), ), aspect_ratios=((0.5, 1.0, 2.0), )) # todo: add 0.3 4.0 # need this, as mobilenet out 1 level features roi_pooler = torchvision.ops.MultiScaleRoIAlign( featmap_names=[0], # can be multi output_size=7, sampling_ratio=2) # model will do normalize and resize itself # box_nms_thresh used during inference model = FasterRCNN(backbone, num_classes=num_classes, rpn_anchor_generator=anchor_generator, box_roi_pool=roi_pooler) return model
def __init__(self, n_classes, box_size=7, backbone_name='resnet50', pretrained_backbone=True): super().__init__() _dum = set(dir(self)) self.n_classes = n_classes self.pretrained_backbone = pretrained_backbone self.box_size = box_size self.half_box_size = box_size // 2 self._input_names = list( set(dir(self)) - _dum ) #i want the name of this fields so i can access them if necessary anchor_sizes = ((4, ), (8, ), (16, ), (32, ), (64, )) aspect_ratios = ((1.0, ), ) * len(anchor_sizes) rpn_anchor_generator = AnchorGenerator(anchor_sizes, aspect_ratios) backbone = resnet_fpn_backbone(backbone_name, pretrained_backbone) # i am using n_classes + 1 because 0 correspond to the background in the torchvision fasterrcnn convension self.fasterrcnn = FasterRCNN(backbone, n_classes + 1, rpn_anchor_generator=rpn_anchor_generator)
def fasterrcnn_resnet50_fpn( pretrained=False, progress=True, num_classes=91, pretrained_backbone=True, **kwargs ): if pretrained: # no need to download the backbone if pretrained is set pretrained_backbone = False backbone = resnet_fpn_backbone("resnet50", pretrained_backbone) anchor_sizes = ((32,), (64,), (128,), (256,), (512,)) aspect_ratios = ((0.5, 1.0, 2.0),) * len(anchor_sizes) rpn_anchor_generator = CachelessAnchorGenerator(anchor_sizes, aspect_ratios) model = FasterRCNN( backbone, num_classes, rpn_anchor_generator=rpn_anchor_generator, **kwargs ) # min_size = 300 # max_size = 400 # anchor_sizes = ((12,), (24,), (48,), (96,), (192,)) # aspect_ratios = ((0.5, 1.0, 2.0),) * len(anchor_sizes) # rpn_anchor_generator = CachelessAnchorGenerator( # anchor_sizes, aspect_ratios # ) # model = FasterRCNN(backbone, num_classes, rpn_anchor_generator=rpn_anchor_generator, min_size=min_size, max_size=max_size, **kwargs) if pretrained: state_dict = load_state_dict_from_url( model_urls["fasterrcnn_resnet50_fpn_coco"], progress=progress ) model.load_state_dict(state_dict) return model
def fasterrcnn_resnet_fpn(resnet_name='resnet50', pretrained_backbone=True, progress=True, num_classes=2, **kwargs): """ Constructs a Faster R-CNN model with a ResNet-FPN backbone. The input to the model is expected to be a list of tensors, each of shape ``[C, H, W]``, one for each image, and should be in ``0-1`` range. Different images can have different sizes. The behavior of the model changes depending if it is in training or evaluation mode. During training, the model expects both the input tensors, as well as a targets (list of dictionary), containing: - boxes (``FloatTensor[N, 4]``): the ground-truth boxes in ``[x1, y1, x2, y2]`` format, with values between ``0`` and ``H`` and ``0`` and ``W`` - labels (``Int64Tensor[N]``): the class label for each ground-truth box The model returns a ``Dict[Tensor]`` during training, containing the classification and regression losses for both the RPN and the R-CNN. During inference, the model requires only the input tensors, and returns the post-processed predictions as a ``List[Dict[Tensor]]``, one for each input image. The fields of the ``Dict`` are as follows: - boxes (``FloatTensor[N, 4]``): the predicted boxes in ``[x1, y1, x2, y2]`` format, with values between ``0`` and ``H`` and ``0`` and ``W`` - labels (``Int64Tensor[N]``): the predicted labels for each image - scores (``Tensor[N]``): the scores or each prediction """ backbone = resnet_fpn_backbone(resnet_name, pretrained_backbone) model = FasterRCNN(backbone, num_classes, box_detections_per_img=100, **kwargs) return model
def faster_rcnn_mobile(input_size, num_classes, self_pretrained): if not self_pretrained: print('load mobilenet_v2 backbone pretrained on ImageNet') backbone = torchvision.models.mobilenet_v2( pretrained=not self_pretrained).features backbone.out_channels = 1280 # anchors anchor_sizes = ((32, ), (64, ), (128, ), (256, ), (512, )) aspect_ratios = ((0.5, 1.0, 2.0), ) * len(anchor_sizes) anchor_generator = AnchorGenerator(sizes=( (32, ), (64, ), (128, ), (256, ), (512, ), ), aspect_ratios=((0.5, 1.0, 2.0), )) # need this, as mobilenet out 1 level features roi_pooler = torchvision.ops.MultiScaleRoIAlign( featmap_names=[0], # can be multi output_size=7, sampling_ratio=2) # model will do normalize and resize itself # box_nms_thresh used during inference model = FasterRCNN(backbone, num_classes=num_classes, min_size=input_size[0], max_size=input_size[1], rpn_anchor_generator=anchor_generator, box_roi_pool=roi_pooler) return model
def create_fastercnn( num_classes: int = 91, backbone: nn.Module = None, **kwargs, ): """ Creates Faster RCNN implementation based on torchvision library. Args: num_classes (int) : number of classes. Do not have class_id "0" it is reserved as background. num_classes = number of classes to label + 1 for background. """ if backbone is None: # Creates the default fasterrcnn as given in pytorch. Trained on COCO dataset model = fasterrcnn_resnet50_fpn( pretrained=True, **kwargs, ) in_features = model.roi_heads.box_predictor.cls_score.in_features model.roi_heads.box_predictor = FastRCNNPredictor( in_features, num_classes) else: model = FasterRCNN(backbone, num_classes=num_classes, **kwargs) return model
def __init__(self, f): trainable_backbone_layers = 5 pretrained = True backbone = resnet_fpn_backbone( 'resnet50', True, trainable_layers=trainable_backbone_layers) self.model = FasterRCNN(backbone, num_classes=10, max_size=3840, min_size=2160, rpn_pre_nms_top_n_train=2000, rpn_pre_nms_top_n_test=2000, rpn_post_nms_top_n_train=2000, rpn_post_nms_top_n_test=2000, box_detections_per_img=100, rpn_nms_thresh=0.01, box_nms_thresh=0.01) #num_classes = 10 #self.model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True) #in_features = self.model.roi_heads.box_predictor.cls_score.in_features #self.model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes) device = torch.device( 'cuda') if torch.cuda.is_available() else torch.device('cpu') self.model.to(device) if (isinstance(f, str)): #local file print("Loading model from local file at {}".format(f)) self.model.load_state_dict(torch.load(f, map_location=device)) elif (isinstance(f, io.BytesIO)): #stream print("Loading model from stream") pass
def fasterrcnn_resnet50_fpn(pretrained=False, progress=True, num_classes=91, pretrained_backbone=True, trainable_backbone_layers=3, model_dir=None, **kwargs): """ Constructs a Faster R-CNN model with a ResNet-50-FPN backbone. The input to the model is expected to be a list of tensors, each of shape ``[C, H, W]``, one for each image, and should be in ``0-1`` range. Different images can have different sizes. The behavior of the model changes depending if it is in training or evaluation mode. During training, the model expects both the input tensors, as well as a targets (list of dictionary), containing: - boxes (``FloatTensor[N, 4]``): the ground-truth boxes in ``[x1, y1, x2, y2]`` format, with values of ``x`` between ``0`` and ``W`` and values of ``y`` between ``0`` and ``H`` - labels (``Int64Tensor[N]``): the class label for each ground-truth box The model returns a ``Dict[Tensor]`` during training, containing the classification and regression losses for both the RPN and the R-CNN. During inference, the model requires only the input tensors, and returns the post-processed predictions as a ``List[Dict[Tensor]]``, one for each input image. The fields of the ``Dict`` are as follows: - boxes (``FloatTensor[N, 4]``): the predicted boxes in ``[x1, y1, x2, y2]`` format, with values of ``x`` between ``0`` and ``W`` and values of ``y`` between ``0`` and ``H`` - labels (``Int64Tensor[N]``): the predicted labels for each image - scores (``Tensor[N]``): the scores or each prediction Faster R-CNN is exportable to ONNX for a fixed batch size with inputs images of fixed size. Arguments: pretrained (bool): If True, returns a model pre-trained on COCO train2017 progress (bool): If True, displays a progress bar of the download to stderr pretrained_backbone (bool): If True, returns a model with backbone pre-trained on Imagenet num_classes (int): number of output classes of the model (including the background) trainable_backbone_layers (int): number of trainable (not frozen) resnet layers starting from final block. Valid values are between 0 and 5, with 5 meaning all backbone layers are trainable. """ assert trainable_backbone_layers <= 5 and trainable_backbone_layers >= 0 # dont freeze any layers if pretrained model or backbone is not used if not (pretrained or pretrained_backbone): trainable_backbone_layers = 5 if pretrained: # no need to download the backbone if pretrained is set pretrained_backbone = False backbone = resnet_fpn_backbone('resnet50', pretrained_backbone) model = FasterRCNN(backbone, num_classes, **kwargs) if pretrained: state_dict = load_state_dict_from_url( model_urls['fasterrcnn_resnet50_fpn_coco'], progress=progress, model_dir=model_dir) model.load_state_dict(state_dict) return model
def rcnn_model(): anchor_sizes = ((52,), (95,), (245,), (348,), (457,)) aspect_ratios = ((0.58, 1.0, 2.0),) * len(anchor_sizes) rpn_anchor_generator = AnchorGenerator( anchor_sizes, aspect_ratios ) cls_weights = torch.tensor([1,1.38, 1.,1.84,6.73,12.55,72.34,56.89]).to(torch.device('cuda')) backbone = resnet_fpn_backbone('resnet50', pretrained=False) model = FasterRCNN(backbone, 91, rpn_anchor_generator=rpn_anchor_generator,box_nms_thresh=0.3,cross_weights=cls_weights) return model
def get_model( model_name, num_classes, backbone, fpn, pretrained, pretrained_backbone, trainable_backbone_layers, anchor_generator, **kwargs, ): if backbone is None: # Constructs a model with a ResNet-50-FPN backbone when no backbone is specified. if model_name == "fasterrcnn": model = _models[model_name]( pretrained=pretrained, pretrained_backbone=pretrained_backbone, trainable_backbone_layers=trainable_backbone_layers, ) in_features = model.roi_heads.box_predictor.cls_score.in_features head = FastRCNNPredictor(in_features, num_classes) model.roi_heads.box_predictor = head else: model = _models[model_name]( pretrained=pretrained, pretrained_backbone=pretrained_backbone) model.head = RetinaNetHead( in_channels=model.backbone.out_channels, num_anchors=model.head.classification_head.num_anchors, num_classes=num_classes, **kwargs) else: backbone_model, num_features = ObjectDetector.backbones.get( backbone)( pretrained=pretrained_backbone, trainable_layers=trainable_backbone_layers, **kwargs, ) backbone_model.out_channels = num_features if anchor_generator is None: anchor_generator = AnchorGenerator( sizes=((32, 64, 128, 256, 512), ), aspect_ratios=((0.5, 1.0, 2.0), )) if not hasattr( backbone_model, "fpn") else None if model_name == "fasterrcnn": model = FasterRCNN(backbone_model, num_classes=num_classes, rpn_anchor_generator=anchor_generator) else: model = RetinaNet(backbone_model, num_classes=num_classes, anchor_generator=anchor_generator) return model
def __init__(self, backbone_arch, num_labels, img_sz, pretrained=True): super().__init__() backbone = resnet_fpn_backbone(backbone_arch, pretrained) self.model = FasterRCNN(backbone, num_labels, min_size=img_sz, max_size=img_sz) self.subloss_names = [ 'total_loss', 'loss_box_reg', 'loss_classifier', 'loss_objectness', 'loss_rpn_box_reg' ]
def build_model_vgg16(config, device): if config.backbone == 'vgg16': vgg = vgg16(pretrained=True) backbone = vgg.features[:-1] for layer in backbone[:10]: for p in layer.parameters(): p.requires_grad = False backbone.out_channels = 512 class BoxHead(torch.nn.Module): def __init__(self, vgg, dropout=False): super(BoxHead, self).__init__() classifier = list(vgg.classifier._modules.values())[:-1] if not dropout: del classifier[5] del classifier[2] self.classifier = torch.nn.Sequential(*classifier) def forward(self, x): x = x.flatten(start_dim=1) x = self.classifier(x) return x box_head = BoxHead(vgg) anchor_generator = AnchorGenerator(sizes=config.anchor_sizes, aspect_ratios=config.aspect_ratios) # Head - Box RoI pooling roi_pooler = MultiScaleRoIAlign(featmap_names=['0'], output_size=7, sampling_ratio=2) # Faster RCNN - Model model = FasterRCNN(backbone=backbone, min_size=224, max_size=224, rpn_anchor_generator=anchor_generator, box_roi_pool=roi_pooler, box_head=box_head, box_predictor=FastRCNNPredictor(4096, num_classes=2)) # Init weights torch.nn.init.normal_(model.roi_heads.box_predictor.cls_score.weight, std=0.01) torch.nn.init.constant_(model.roi_heads.box_predictor.cls_score.bias, 0) torch.nn.init.normal_(model.roi_heads.box_predictor.bbox_pred.weight, std=0.001) torch.nn.init.constant_(model.roi_heads.box_predictor.bbox_pred.bias, 0) return model
def FasterRCNN_VGG(): backbone = vgg16(pretrained=True).features backbone._modules.pop('30') # 去掉最后一层Max_Pool层 # for layer in range(10): # 冻结conv3之前的层 # for p in backbone[layer].parameters(): # p.requires_grad = False backbone.out_channels = 512 # backbone = resnet_fpn_backbone('resnet50', pretrained_backbone) model = FasterRCNN(backbone, num_classes=opt.num_classes + 1) return model
def fasterrcnn_resnet101_fpn(pretrained=False, progress=True, num_classes=91, pretrained_backbone=True, model_dir=None, **kwargs): """ Constructs a Faster R-CNN model with a ResNet-101-FPN backbone. Note that it is NOT an official model. The input to the model is expected to be a list of tensors, each of shape ``[C, H, W]``, one for each image, and should be in ``0-1`` range. Different images can have different sizes. The behavior of the model changes depending if it is in training or evaluation mode. During training, the model expects both the input tensors, as well as a targets (list of dictionary), containing: - boxes (``FloatTensor[N, 4]``): the ground-truth boxes in ``[x1, y1, x2, y2]`` format, with values of ``x`` between ``0`` and ``W`` and values of ``y`` between ``0`` and ``H`` - labels (``Int64Tensor[N]``): the class label for each ground-truth box The model returns a ``Dict[Tensor]`` during training, containing the classification and regression losses for both the RPN and the R-CNN. During inference, the model requires only the input tensors, and returns the post-processed predictions as a ``List[Dict[Tensor]]``, one for each input image. The fields of the ``Dict`` are as follows: - boxes (``FloatTensor[N, 4]``): the predicted boxes in ``[x1, y1, x2, y2]`` format, with values of ``x`` between ``0`` and ``W`` and values of ``y`` between ``0`` and ``H`` - labels (``Int64Tensor[N]``): the predicted labels for each image - scores (``Tensor[N]``): the scores or each prediction Faster R-CNN is exportable to ONNX for a fixed batch size with inputs images of fixed size. Arguments: pretrained (bool): If True, returns a model pre-trained on COCO train2017 progress (bool): If True, displays a progress bar of the download to stderr """ if pretrained: # no need to download the backbone if pretrained is set pretrained_backbone = False backbone = resnet_fpn_backbone('resnet101', pretrained_backbone) model = FasterRCNN(backbone, num_classes, **kwargs) if pretrained: state_dict = load_state_dict_from_url( model_urls['fasterrcnn_resnet101_fpn_coco'], progress=progress, model_dir=model_dir) model.load_state_dict(state_dict['model']) return model
def _fasterrcnn_resnet_fpn(backbone='resnet50', num_classes=91, pretrained_backbone=True, **kwargs): if import_error is not None: raise import_error from torchvision.models.detection.faster_rcnn import FasterRCNN, resnet_fpn_backbone backbone = resnet_fpn_backbone(backbone, pretrained_backbone) model = FasterRCNN(backbone, num_classes, **kwargs) detection.fasterrcnn_resnet50_fpn() return model
def get_model(num_classes, backbone=None): ''' Model function to output network according to arguments. Args: num_classes: number of classes(total_classes+1 for background) backbone: to design network with other backbone, default backbone of faster RCNN is resnet50. Returns: model. ''' if backbone == 'mobile_net': net = mobilenet_v2(pretrained=True) backbone_ft = net.features backbone_ft.out_channels = 1280 elif backbone == 'vgg19': net = vgg19(pretrained=True) backbone_ft = net.features backbone_ft.out_channels = 512 # https://stackoverflow.com/questions/58362892/resnet-18-as-backbone-in-faster-r-cnn elif backbone == 'resnet101': net = resnet101(pretrained=True) modules = list(net.children())[:-1] backbone_ft = nn.Sequential(*modules) backbone_ft.out_channels = 2048 if backbone is None: model = fasterrcnn_resnet50_fpn(pretrained=True) in_features = model.roi_heads.box_predictor.cls_score.in_features # print(in_features) = 1024 model.roi_heads.box_predictor = FastRCNNPredictor( in_features, num_classes) return model else: anchor_gen = AnchorGenerator(sizes=((32, 64, 128), )) # featmap_names = [0] gives list index out of range error. roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=['0'], output_size=7, sampling_ratio=2) model = FasterRCNN(backbone_ft, num_classes, rpn_anchor_generator=anchor_gen, box_roi_pool=roi_pooler) return model
def get_model(): backbone = resnet_fpn_backbone('resnet34', pretrained=True) rpn_anchor_generator = AnchorGenerator( sizes=((16, ), (32, ), (64, ), (128, ), (256, )), aspect_ratios=((0.5, 1.0, 2.0), ) * 5) model = FasterRCNN( backbone, num_classes=1 + 3, min_size=512, max_size=512, rpn_anchor_generator=rpn_anchor_generator, ) return model
def fasterrcnn_resnet18_fpn(num_classes=2, pretrained_backbone=True, **kwargs): """ Constructs a Faster R-CNN model with a ResNet-18-FPN backbone. The input to the model is expected to be a list of tensors, each of shape ``[C, H, W]``, one for each image, and should be in ``0-1`` range. Different images can have different sizes. The behavior of the model changes depending if it is in training or evaluation mode. During training, the model expects both the input tensors, as well as a targets (list of dictionary), containing: - boxes (``FloatTensor[N, 4]``): the ground-truth boxes in ``[x1, y1, x2, y2]`` format, with values between ``0`` and ``H`` and ``0`` and ``W`` - labels (``Int64Tensor[N]``): the class label for each ground-truth box The model returns a ``Dict[Tensor]`` during training, containing the classification and regression losses for both the RPN and the R-CNN. During inference, the model requires only the input tensors, and returns the post-processed predictions as a ``List[Dict[Tensor]]``, one for each input image. The fields of the ``Dict`` are as follows: - boxes (``FloatTensor[N, 4]``): the predicted boxes in ``[x1, y1, x2, y2]`` format, with values between ``0`` and ``H`` and ``0`` and ``W`` - labels (``Int64Tensor[N]``): the predicted labels for each image - scores (``Tensor[N]``): the scores or each prediction Example:: >>> model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True) >>> model.eval() >>> x = [torch.rand(3, 300, 400), torch.rand(3, 500, 400)] >>> predictions = model(x) """ backbone = resnet_fpn_backbone('resnet18', pretrained_backbone) model = FasterRCNN(backbone, num_classes, **kwargs) # Modifications make the model smaller -- lessen overfitting # model.backbone.body.layer3 = nn.Sequential() # model.backbone.body.layer4 = nn.Sequential() # model.backbone.fpn.inner_blocks[1] = nn.Conv2d(128, 256, kernel_size=(1, 1), stride=(1, 1)) # model.backbone.fpn.inner_blocks[2] = nn.Conv2d(128, 256, kernel_size=(1, 1), stride=(1, 1)) # model.backbone.fpn.inner_blocks[3] = nn.Conv2d(128, 256, kernel_size=(1, 1), stride=(1, 1)) # model.backbone.fpn.layer_blocks[0] = nn.Conv2d(256, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) # model.backbone.fpn.layer_blocks[1] = nn.Conv2d(256, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) # model.backbone.fpn.layer_blocks[2] = nn.Conv2d(256, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) # model.backbone.fpn.layer_blocks[3] = nn.Conv2d(256, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) # model.rpn.head.conv = nn.Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) # model.rpn.head.cls_logits = nn.Conv2d(128, 3, kernel_size=(1, 1), stride=(1, 1)) # model.rpn.head.bbox_pred = nn.Conv2d(128, 12, kernel_size=(1, 1), stride=(1, 1)) # model.rpn.conv = nn.Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) # model.rpn.cls_logits = nn.Conv2d(128, 3, kernel_size=(1, 1), stride=(1, 1)) # model.rpn.bbox_pred = nn.Conv2d(128, 12, kernel_size=(1, 1), stride=(1, 1)) # model.roi_heads.box_head.fc6 = nn.Linear(in_features=6272, out_features=256, bias=True) # model.roi_heads.box_head.fc7 = nn.Linear(in_features=256, out_features=256, bias=True) # model.roi_heads.box_predictor.cls_score = nn.Linear(in_features=256, out_features=2, bias=True) # model.roi_heads.box_predictor.bbox_pred = nn.Linear(in_features=256, out_features=8, bias=True) return model
def model(basic_layers, rpn_anchor_generator, box_roi_pool, box_predictor): """ Creates a faster r-cnn model with all the required modules. :param basic_layers: The backbone (e.g. VGG16). :param box_roi_pool: The roi pool must be a (sub-)type of MultiScaleRoIAlign. :return: The faster r-cnn model. """ return FasterRCNN(backbone=basic_layers.backbone, min_size=512, max_size=512, rpn_anchor_generator=rpn_anchor_generator, rpn_batch_size_per_image=32, box_roi_pool=box_roi_pool, box_head=basic_layers.classifier, box_predictor=box_predictor, box_detections_per_img=32)
def __init__(self, backbone_arch, num_labels, img_sz, pretrained=True): super().__init__() backbone = resnet_fpn_backbone(backbone_arch, pretrained) # Custom Anchors anchor_sizes = ((10,), (20,), (30,), (40,), (50,)) aspect_ratios = ((0.8, 1.0, 1.2),) * len(anchor_sizes) anchor_gen = AnchorGenerator( anchor_sizes, aspect_ratios ) self.model = FasterRCNN( backbone, num_labels, min_size=img_sz, max_size=img_sz, rpn_anchor_generator=anchor_gen) self.subloss_names = [ 'total_loss', 'loss_box_reg', 'loss_classifier', 'loss_objectness', 'loss_rpn_box_reg' ]
def model(num_classes: int, backbone: Optional[nn.Module] = None, remove_internal_transforms: bool = True, pretrained: bool = True, **faster_rcnn_kwargs) -> nn.Module: """FasterRCNN model implemented by torchvision. # Arguments num_classes: Number of classes. backbone: Backbone model to use. Defaults to a resnet50_fpn model. remove_internal_transforms: The torchvision model internally applies transforms like resizing and normalization, but we already do this at the `Dataset` level, so it's safe to remove those internal transforms. pretrained: Argument passed to `fastercnn_resnet50_fpn` if `backbone is None`. By default it is set to True: this is generally used when training a new model (transfer learning). `pretrained = False` is used during inference (prediction) for cases where the users have their own pretrained weights. **faster_rcnn_kwargs: Keyword arguments that internally are going to be passed to `torchvision.models.detection.faster_rcnn.FastRCNN`. # Returns A Pytorch `nn.Module`. """ if backbone is None: model = fasterrcnn_resnet50_fpn(pretrained=pretrained, pretrained_backbone=pretrained, **faster_rcnn_kwargs) in_features = model.roi_heads.box_predictor.cls_score.in_features model.roi_heads.box_predictor = FastRCNNPredictor( in_features, num_classes) backbone_param_groups = resnet_fpn.param_groups(model.backbone) else: model = FasterRCNN(backbone, num_classes=num_classes, **faster_rcnn_kwargs) backbone_param_groups = backbone.param_groups() patch_param_groups(model=model, backbone_param_groups=backbone_param_groups) if remove_internal_transforms: remove_internal_model_transforms(model) return model
def __init__(self, backbone_arch, num_class_ids, img_sz, pretrained=True): super().__init__() backbone = resnet_fpn_backbone(backbone_arch, pretrained) # Add an extra null class for the bogus boxes. self.null_class_id = num_class_ids # class_ids must start at 1, and there is an extra null class, so # that's why we add 2 to the num_class_ids self.model = FasterRCNN(backbone, num_class_ids + 2, min_size=img_sz, max_size=img_sz) self.subloss_names = [ 'total_loss', 'loss_box_reg', 'loss_classifier', 'loss_objectness', 'loss_rpn_box_reg' ] self.batch_ind = 0
def _make_model(backbone_state_dict, num_classes, anchor_sizes: tuple, box_detections_per_img: int, num_trainable_backbone_layers: int): inception = torchvision.models.inception_v3(pretrained=False, progress=False, num_classes=num_classes, aux_logits=False) if backbone_state_dict is not None: inception.load_state_dict(torch.load(backbone_state_dict)) modules = list(inception.children())[:-1] backbone = nn.Sequential(*modules) #for layer in backbone: # for p in layer.parameters(): # p.requires_grad = False # Freezes the backbone layers num_layers = len(backbone) if (num_trainable_backbone_layers < num_layers) and (num_trainable_backbone_layers != -1): trainable_layers = [num_layers - (3 + i) for i in range(num_trainable_backbone_layers)] print('Trainable layers: \n') for layer_idx, layer in enumerate(backbone): if layer_idx not in trainable_layers: for p in layer.parameters(): p.requires_grad = False # Freezes the backbone layers else: print(layer, '\n\n') print('=================================\n\n') backbone.out_channels = 2048 # Use smaller anchor boxes since targets are relatively small anchor_generator = AnchorGenerator( sizes=anchor_sizes, aspect_ratios=((0.25, 0.5, 1.0, 2.0, 4.0),) * len(anchor_sizes) ) model = FasterRCNN(backbone, min_size=299, # Backbone expects 299x299 inputs max_size=299, # so you don't need to rescale rpn_anchor_generator=anchor_generator, box_predictor=FastRCNNPredictor(1024, num_classes), box_detections_per_img=box_detections_per_img ) return model
def __init__( self, learning_rate: float = 0.0001, num_classes: int = 91, backbone: str = None, fpn: bool = True, pretrained_backbone: str = None, trainable_backbone_layers: int = 3, **kwargs, ): """ Args: learning_rate: the learning rate num_classes: number of detection classes (including background) pretrained: if true, returns a model pre-trained on COCO train2017 pretrained_backbone (str): if "imagenet", returns a model with backbone pre-trained on Imagenet trainable_backbone_layers: number of trainable resnet layers starting from final block """ super().__init__() self.learning_rate = learning_rate self.num_classes = num_classes self.backbone = backbone if backbone is None: self.model = fasterrcnn_resnet50_fpn( pretrained=True, trainable_backbone_layers=trainable_backbone_layers, ) in_features = self.model.roi_heads.box_predictor.cls_score.in_features self.model.roi_heads.box_predictor = FastRCNNPredictor( in_features, self.num_classes) else: backbone_model = create_fastercnn_backbone( self.backbone, fpn, pretrained_backbone, trainable_backbone_layers, **kwargs, ) self.model = FasterRCNN(backbone_model, num_classes=num_classes, **kwargs)