def retinanet_resnet50_fpn(pretrained=False, progress=True, num_classes=91, pretrained_backbone=True, trainable_backbone_layers=None,tfidf=None, **kwargs): """ Constructs a RetinaNet model with a ResNet-50-FPN backbone. The input to the model is expected to be a list of tensors, each of shape ``[C, H, W]``, one for each image, and should be in ``0-1`` range. Different images can have different sizes. The behavior of the model changes depending if it is in training or evaluation mode. During training, the model expects both the input tensors, as well as a targets (list of dictionary), containing: - boxes (``FloatTensor[N, 4]``): the ground-truth boxes in ``[x1, y1, x2, y2]`` format, with ``0 <= x1 < x2 <= W`` and ``0 <= y1 < y2 <= H``. - labels (``Int64Tensor[N]``): the class label for each ground-truth box The model returns a ``Dict[Tensor]`` during training, containing the classification and regression losses. During inference, the model requires only the input tensors, and returns the post-processed predictions as a ``List[Dict[Tensor]]``, one for each input image. The fields of the ``Dict`` are as follows: - boxes (``FloatTensor[N, 4]``): the predicted boxes in ``[x1, y1, x2, y2]`` format, with ``0 <= x1 < x2 <= W`` and ``0 <= y1 < y2 <= H``. - labels (``Int64Tensor[N]``): the predicted labels for each image - scores (``Tensor[N]``): the scores or each prediction Example:: >>> model = torchvision.models.detection.retinanet_resnet50_fpn(pretrained=True) >>> model.eval() >>> x = [torch.rand(3, 300, 400), torch.rand(3, 500, 400)] >>> predictions = model(x) Args: pretrained (bool): If True, returns a model pre-trained on COCO train2017 progress (bool): If True, displays a progress bar of the download to stderr num_classes (int): number of output classes of the model (including the background) pretrained_backbone (bool): If True, returns a model with backbone pre-trained on Imagenet trainable_backbone_layers (int): number of trainable (not frozen) resnet layers starting from final block. Valid values are between 0 and 5, with 5 meaning all backbone layers are trainable. """ trainable_backbone_layers = _validate_trainable_layers( pretrained or pretrained_backbone, trainable_backbone_layers, 5, 3) if pretrained: # no need to download the backbone if pretrained is set pretrained_backbone = False # skip P2 because it generates too many anchors (according to their paper) backbone = resnet_fpn_backbone('resnet50', pretrained_backbone, returned_layers=[2, 3, 4], extra_blocks=LastLevelP6P7(256, 256), trainable_layers=trainable_backbone_layers) model = RetinaNet(backbone, num_classes,tfidf, **kwargs) if pretrained: state_dict = load_state_dict_from_url(model_urls['retinanet_resnet50_fpn_coco'], progress=progress) model.load_state_dict(state_dict) overwrite_eps(model, 0.0) return model
def retinanet_resnet50_fpn( pretrained=False, progress=True, num_classes=91, pretrained_backbone=True, **kwargs ): """ Constructs a RetinaNet model with a ResNet-50-FPN backbone. The input to the model is expected to be a list of tensors, each of shape ``[C, H, W]``, one for each image, and should be in ``0-1`` range. Different images can have different sizes. The behavior of the model changes depending if it is in training or evaluation mode. During training, the model expects both the input tensors, as well as a targets (list of dictionary), containing: - boxes (``FloatTensor[N, 4]``): the ground-truth boxes in ``[x1, y1, x2, y2]`` format, with values between ``0`` and ``H`` and ``0`` and ``W`` - labels (``Int64Tensor[N]``): the class label for each ground-truth box The model returns a ``Dict[Tensor]`` during training, containing the classification and regression losses. During inference, the model requires only the input tensors, and returns the post-processed predictions as a ``List[Dict[Tensor]]``, one for each input image. The fields of the ``Dict`` are as follows: - boxes (``FloatTensor[N, 4]``): the predicted boxes in ``[x1, y1, x2, y2]`` format, with values between ``0`` and ``H`` and ``0`` and ``W`` - labels (``Int64Tensor[N]``): the predicted labels for each image - scores (``Tensor[N]``): the scores or each prediction Example:: >>> model = torchvision.models.detection.retinanet_resnet50_fpn(pretrained=True) >>> model.eval() >>> x = [torch.rand(3, 300, 400), torch.rand(3, 500, 400)] >>> predictions = model(x) Arguments: pretrained (bool): If True, returns a model pre-trained on COCO train2017 progress (bool): If True, displays a progress bar of the download to stderr """ if pretrained: # no need to download the backbone if pretrained is set pretrained_backbone = False # skip P2 because it generates too many anchors (according to their paper) backbone = resnet_fpn_backbone( "resnet50", pretrained_backbone, returned_layers=[2, 3, 4], extra_blocks=LastLevelP6P7(256, 256), ) model = RetinaNet(backbone, num_classes, **kwargs) if pretrained: state_dict = load_state_dict_from_url( model_urls["retinanet_resnet50_fpn_coco"], progress=progress ) model.load_state_dict(state_dict) return model
def create_model(backbone_name: str, num_classes: int = 10, **kwargs): """ backbone_name (string): resnet architecture. Possible values are 'ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101', 'resnet152', 'resnext50_32x4d', 'resnext101_32x8d', 'wide_resnet50_2', 'wide_resnet101_2' """ backbone = resnet_fpn_backbone(backbone_name, False, returned_layers=[2, 3, 4], extra_blocks=LastLevelP6P7(256, 256), trainable_layers=5) return RetinaNet(backbone, num_classes, **kwargs)
def resnext50_32x4d_fpn( pretrained: bool = True, returned_layers=(2, 3, 4), extra_blocks=LastLevelP6P7(256, 256), **kwargs, ): return resnet_fpn.resnext50_32x4d_fpn( pretrained=pretrained, returned_layers=returned_layers, extra_blocks=extra_blocks, **kwargs, )
def wide_resnet101_2_fpn( pretrained: bool = True, returned_layers=(2, 3, 4), extra_blocks=LastLevelP6P7(256, 256), **kwargs, ): return resnet_fpn.wide_resnet101_2_fpn( pretrained=pretrained, returned_layers=returned_layers, extra_blocks=extra_blocks, **kwargs, )
def __init__( self, backbone_fn, returned_layers=(2, 3, 4), extra_blocks=LastLevelP6P7(256, 256), **backbone_fn_kwargs, ): super().__init__( backbone_fn=backbone_fn, returned_layers=returned_layers, extra_blocks=extra_blocks, **backbone_fn_kwargs, )
def backbone1(self, pretrained_backbone, pretrained=False, trainable_backbone_layers=None): trainable_backbone_layers = _validate_trainable_layers( pretrained or pretrained_backbone, trainable_backbone_layers, 5, 3) if pretrained: # no need to download the backbone if pretrained is set pretrained_backbone = False # skip P2 because it generates too many anchors (according to their paper) backbone = resnet_fpn_backbone( 'resnet18', pretrained_backbone, returned_layers=[2, 3, 4], extra_blocks=LastLevelP6P7(256, 256), trainable_layers=trainable_backbone_layers) return backbone
def __init__(self, backbone_name, pretrained=False, norm_layer=misc_nn_ops.FrozenBatchNorm2d, trainable_layers=3, out_channels=256): super().__init__() # Get ResNet backbone = resnet.__dict__[backbone_name](pretrained=pretrained, norm_layer=norm_layer) # select layers that wont be frozen assert 0 <= trainable_layers <= 5 layers_to_train = ['layer4', 'layer3', 'layer2', 'layer1', 'conv1'][:trainable_layers] # freeze layers only if pretrained backbone is used for name, parameter in backbone.named_parameters(): if all([not name.startswith(layer) for layer in layers_to_train]): parameter.requires_grad_(False) return_layers = { 'layer1': '0', 'layer2': '1', 'layer3': '2', 'layer4': '3' } in_channels_stage2 = backbone.inplanes // 8 self.in_channels_list = [ 0, in_channels_stage2 * 2, in_channels_stage2 * 4, in_channels_stage2 * 8, ] self.body = IntermediateLayerGetter(backbone, return_layers=return_layers) self.fpn = FeaturePyramidNetwork( in_channels_list=self.in_channels_list[1:], # nonzero only out_channels=out_channels, extra_blocks=LastLevelP6P7(out_channels, out_channels), ) self.out_channels = out_channels
def __init__(self, backbone, return_layers, in_channels_list, out_channels, last_level='pool'): body = IntermediateLayerGetter(backbone, return_layers=return_layers) if last_level == 'pool': extra_blocks = LastLevelMaxPool() elif last_level == 'p6p7': extra_blocks = LastLevelP6P7(in_channels_list[-1], out_channels) else: raise ValueError(f'Not implemented {last_level}.') fpn = FeaturePyramidNetwork( in_channels_list=in_channels_list, out_channels=out_channels, extra_blocks=extra_blocks, ) super(BackboneWithFPN, self).__init__(OrderedDict([("body", body), ("fpn", fpn)])) self.out_channels = out_channels
def __init__(self, in_channels, out_channels): super().__init__() self.p6p7 = LastLevelP6P7(in_channels, out_channels)
losses = {} detections: List[Dict[str, Tensor]] = [] if self.training: assert targets is not None losses = self.compute_loss(targets, head_outputs, anchors, orig_targets, seg_features) pretrained=True, progress=True, num_classes=91, pretrained_backbone=True if pretrained: pretrained_backbone=False backbone = resnet_fpn_backbone('resnet50', pretrained_backbone, returned_layers=[2,3,4], extra_blocks=LastLevelP6P7(256, 256)) model = RetinaNet(backbone, num_classes) if pretrained: state_dict = load_state_dict_from_url(model_urls['retinanet_resnet50_fpn_coco'], progress = progress) model.load_state_dict(state_dict) overwrite_eps(model, 0.0) print(model)