def __init__(self, learning_rate: float = 0.0001, num_classes: int = 91, backbone: str = None, fpn: bool = True, pretrained_backbone: str = None, trainable_backbone_layers: int = 3, **kwargs, ): """ Args: learning_rate: the learning rate num_classes: number of detection classes (including background) pretrained: if true, returns a model pre-trained on COCO train2017 pretrained_backbone (str): if "imagenet", returns a model with backbone pre-trained on Imagenet trainable_backbone_layers: number of trainable resnet layers starting from final block """ super().__init__() self.learning_rate = learning_rate self.num_classes = num_classes self.backbone = backbone if backbone is None: self.model = retinanet_resnet50_fpn(pretrained=True, **kwargs) self.model.head = RetinaNetHead(in_channels=self.model.backbone.out_channels, num_anchors=self.model.head.classification_head.num_anchors, num_classes=num_classes, **kwargs) else: backbone_model = create_retinanet_backbone(self.backbone, fpn, pretrained_backbone, trainable_backbone_layers, **kwargs) self.model = RetinaNet(backbone_model, num_classes=num_classes, **kwargs)
def model(num_classes: int, backbone: Optional[TorchvisionBackboneConfig] = None, remove_internal_transforms: bool = True, pretrained: bool = True, **retinanet_kwargs) -> nn.Module: if backbone is None: model = retinanet_resnet50_fpn(pretrained=pretrained, pretrained_backbone=pretrained, **retinanet_kwargs) model.head = RetinaNetHead( in_channels=model.backbone.out_channels, num_anchors=model.head.classification_head.num_anchors, num_classes=num_classes, ) resnet_fpn.patch_param_groups(model.backbone) else: model = RetinaNet(backbone=backbone.backbone, num_classes=num_classes, **retinanet_kwargs) patch_retinanet_param_groups(model) if remove_internal_transforms: remove_internal_model_transforms(model) return model
def create_retinanet( num_classes: int = 91, backbone: nn.Module = None, **kwargs, ): """ Creates RetinaNet implementation based on torchvision library. Args: num_classes (int) : number of classes. Do not have class_id "0" it is reserved as background. num_classes = number of classes to label + 1 for background. """ if backbone is None: model = retinanet_resnet50_fpn( pretrained=True, num_classes=91, **kwargs, ) model.head = RetinaNetHead( in_channels=model.backbone.out_channels, num_anchors=model.head.classification_head.num_anchors, num_classes=num_classes, ) else: model = RetinaNet(backbone, num_classes=num_classes, **kwargs) return model
def create_model(num_classes, nms_thresh, score_thresh): """Create a retinanet model Args: num_classes (int): number of classes in the model nms_thresh (float): non-max suppression threshold for intersection-over-union [0,1] score_thresh (float): minimum prediction score to keep during prediction [0,1] Returns: model: a pytorch nn module """ backbone = load_backbone() model = RetinaNet(backbone.backbone, num_classes=num_classes) model.nms_thresh = nms_thresh model.score_thresh = score_thresh # Optionally allow anchor generator parameters to be created here # https://pytorch.org/vision/stable/_modules/torchvision/models/detection/retinanet.html return model
def create_model(backbone_name: str, num_classes: int = 10, **kwargs): """ backbone_name (string): resnet architecture. Possible values are 'ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101', 'resnet152', 'resnext50_32x4d', 'resnext101_32x8d', 'wide_resnet50_2', 'wide_resnet101_2' """ backbone = resnet_fpn_backbone(backbone_name, False, returned_layers=[2, 3, 4], extra_blocks=LastLevelP6P7(256, 256), trainable_layers=5) return RetinaNet(backbone, num_classes, **kwargs)
def get_model( model_name, num_classes, backbone, fpn, pretrained, pretrained_backbone, trainable_backbone_layers, anchor_generator, **kwargs, ): if backbone is None: # Constructs a model with a ResNet-50-FPN backbone when no backbone is specified. if model_name == "fasterrcnn": model = _models[model_name]( pretrained=pretrained, pretrained_backbone=pretrained_backbone, trainable_backbone_layers=trainable_backbone_layers, ) in_features = model.roi_heads.box_predictor.cls_score.in_features head = FastRCNNPredictor(in_features, num_classes) model.roi_heads.box_predictor = head else: model = _models[model_name]( pretrained=pretrained, pretrained_backbone=pretrained_backbone) model.head = RetinaNetHead( in_channels=model.backbone.out_channels, num_anchors=model.head.classification_head.num_anchors, num_classes=num_classes, **kwargs) else: backbone_model, num_features = ObjectDetector.backbones.get( backbone)( pretrained=pretrained_backbone, trainable_layers=trainable_backbone_layers, **kwargs, ) backbone_model.out_channels = num_features if anchor_generator is None: anchor_generator = AnchorGenerator( sizes=((32, 64, 128, 256, 512), ), aspect_ratios=((0.5, 1.0, 2.0), )) if not hasattr( backbone_model, "fpn") else None if model_name == "fasterrcnn": model = FasterRCNN(backbone_model, num_classes=num_classes, rpn_anchor_generator=anchor_generator) else: model = RetinaNet(backbone_model, num_classes=num_classes, anchor_generator=anchor_generator) return model
class lit_retinanet(pl.LightningModule): """ Creates a ReinaNet which can be fine-tuned. """ def __init__(self, learning_rate: float = 0.0001, num_classes: int = 91, backbone: str = None, fpn: bool = True, pretrained_backbone: str = None, trainable_backbone_layers: int = 3, **kwargs, ): """ Args: learning_rate: the learning rate num_classes: number of detection classes (including background) pretrained: if true, returns a model pre-trained on COCO train2017 pretrained_backbone (str): if "imagenet", returns a model with backbone pre-trained on Imagenet trainable_backbone_layers: number of trainable resnet layers starting from final block """ super().__init__() self.learning_rate = learning_rate self.num_classes = num_classes self.backbone = backbone if backbone is None: self.model = retinanet_resnet50_fpn(pretrained=True, **kwargs) self.model.head = RetinaNetHead(in_channels=self.model.backbone.out_channels, num_anchors=self.model.head.classification_head.num_anchors, num_classes=num_classes, **kwargs) else: backbone_model = create_retinanet_backbone(self.backbone, fpn, pretrained_backbone, trainable_backbone_layers, **kwargs) self.model = RetinaNet(backbone_model, num_classes=num_classes, **kwargs) def forward(self, x): self.model.eval() return self.model(x) def training_step(self, batch, batch_idx): images, targets = batch targets = [{k: v for k, v in t.items()} for t in targets] # RetinaNet takes both images and targets for training, returns loss_dict = self.model(images, targets) loss = sum(loss for loss in loss_dict.values()) return {"loss": loss, "log": loss_dict} def validation_step(self, batch, batch_idx): images, targets = batch # Retinanet takes only images for eval() mode outs = self.model(images) iou = torch.stack([_evaluate_iou(t, o) for t, o in zip(targets, outs)]).mean() giou = torch.stack([_evaluate_giou(t, o) for t, o in zip(targets, outs)]).mean() return {"val_iou": iou, "val_giou": giou} def validation_epoch_end(self, outs): avg_iou = torch.stack([o["val_iou"] for o in outs]).mean() avg_giou = torch.stack([o["val_giou"] for o in outs]).mean() logs = {"val_iou": avg_iou, "val_giou": avg_giou} return {"avg_val_iou": avg_iou, "avg_val_giou": avg_giou, "log": logs} def configure_optimizers(self): return torch.optim.SGD(self.model.parameters(), lr=self.learning_rate, momentum=0.9, weight_decay=0.005,)