def custom_maskrcnn_resnet_fpn(backbone, pretrained=True, progress=True, num_classes=91, pretrained_backbone=True, trainable_backbone_layers=3, **kwargs): backbone_name = backbone['name'] backbone_params_config = backbone['params'] assert 0 <= trainable_backbone_layers <= 5 # dont freeze any layers if pretrained model or backbone is not used if not (pretrained or pretrained_backbone): backbone_params_config['trainable_backbone_layers'] = 5 if pretrained: # no need to download the backbone if pretrained is set backbone_params_config['pretrained'] = False backbone_model = custom_resnet_fpn_backbone(backbone_name, backbone_params_config) model = MaskRCNN(backbone_model, num_classes, **kwargs) if pretrained and backbone_name.endswith('resnet50'): state_dict = load_state_dict_from_url( maskrcnn_model_urls['maskrcnn_resnet50_fpn_coco'], progress=progress) model.load_state_dict(state_dict, strict=False) return model
def get_instance_segmentation_model(num_classes, backbone, dropout=False): # load an instance segmentation model where backbone is pretrained ImageNet backbone = resnet_fpn_backbone(backbone, pretrained=True) model = MaskRCNN(backbone, num_classes) if dropout: # add drop out after FC layer of box head resolution = model.roi_heads.box_roi_pool.output_size[0] representation_size = 1024 model.roi_heads.box_head = TwoMLPHead( backbone.out_channels * resolution**2, representation_size) # add drop out in mask head mask_layers = (256, 256, 256, 256) mask_dilation = 1 model.roi_heads.mask_head = MaskRCNNHeads(backbone.out_channels, mask_layers, mask_dilation) # get the number of input features for the classifier in_features = model.roi_heads.box_predictor.cls_score.in_features # replace the pre-trained head with a new one model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes) # now get the number of input features for the mask classifier in_features_mask = model.roi_heads.mask_predictor.conv5_mask.in_channels hidden_layer = 256 # and replace the mask predictor with a new one model.roi_heads.mask_predictor = MaskRCNNPredictor(in_features_mask, hidden_layer, num_classes) return model
def maskrcnn_mobileNetV3_fpn(num_classes=3, backbone_chkp=False, **kwargs): ''' This function builds torchvision version of MASK RCNN using FPN-mobileNetV3 backbone. ''' from torchvision.models.detection.mask_rcnn import MaskRCNN backbone = mobilenetV3_fpn_backbone(backbone_chkp) return MaskRCNN(backbone, num_classes, **kwargs)
class DetectorConfig(AppConfig): name = 'detector' #load model model_path = 'detector/saved_models/maskrcnn_resnet50_fpn_coco-bf2d0c1e.pth' backbone = backbone = resnet_fpn_backbone('resnet50', False) maskRCNNModel = MaskRCNN(backbone, 91) checkpoint = torch.load(model_path) maskRCNNModel.load_state_dict(checkpoint) maskRCNNModel.eval() print("Model Loaded")
def fasterrcnn_resnet_fpn(pretrained_path=None, backbone='resnet50', num_classes=91, pretrained_backbone=True, mask=False, hidden_layer=256, **kwargs): """ Based on torchvision.models.detection.faster_rcnn """ if pretrained_path is not None: # no need to download the backbone if pretrained is set pretrained_backbone = False backbone = resnet_fpn_backbone(backbone, pretrained_backbone) if mask: model = MaskRCNN(backbone, num_classes, **kwargs) else: model = FasterRCNN(backbone, num_classes, **kwargs) # === handle non-standard case (different number of classes) if num_classes != 91: # get number of input features for the classifier in_features = model.roi_heads.box_predictor.cls_score.in_features # replace the pre-trained head with a new one model.roi_heads.box_predictor = FastRCNNPredictor( in_features, num_classes) # === Mask-RCNN or not if mask: # now get the number of input features for the mask classifier in_features_mask = model.roi_heads.mask_predictor.conv5_mask.in_channels # and replace the mask predictor with a new one model.roi_heads.mask_predictor = MaskRCNNPredictor( in_features_mask, hidden_layer, num_classes) if pretrained_path is not None: state_dict = torch.load(pretrained_path) model.load_state_dict(state_dict) return model
def get_model_instance_segmentation_efficientnet(model_name, num_classes, target_dim, freeze_batch_norm=False): print("Using EffDet detection model") roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=[0], output_size=7, sampling_ratio=2) # ofekp: note that roi_pooler is passed to box_roi_pooler in the MaskRCNN network # and is not being used in roi_heads.py mask_roi_pool = MultiScaleRoIAlign(featmap_names=[0, 1, 2, 3], output_size=14, sampling_ratio=2) config = effdet.get_efficientdet_config(model_name) efficientDetModelTemp = EfficientDet(config, pretrained_backbone=False) load_pretrained(efficientDetModelTemp, config.url) config.num_classes = num_classes config.image_size = target_dim out_channels = config.fpn_channels # This is since the config of 'tf_efficientdet_d5' creates fpn outputs with num of channels = 288 backbone_fpn = BackboneWithCustomFPN( config, efficientDetModelTemp.backbone, efficientDetModelTemp.fpn, out_channels ) # TODO(ofekp): pretrained! # from the repo trainable_layers=trainable_backbone_layers=3 model = MaskRCNN( backbone_fpn, min_size=target_dim, max_size=target_dim, num_classes=num_classes, mask_roi_pool=mask_roi_pool, # rpn_anchor_generator=anchor_generator, box_roi_pool=roi_pooler) # for training with different number of classes (default is 90) we need to add this line # TODO(ofekp): we might want to init weights of the new HeadNet class_net = HeadNet(config, num_outputs=config.num_classes, norm_kwargs=dict(eps=.001, momentum=.01)) efficientDetModel = EfficientDetBB(config, class_net, efficientDetModelTemp.box_net) model.roi_heads.box_predictor = DetBenchTrain(efficientDetModel, config) if freeze_batch_norm: # we only freeze BN layers in backbone and the BiFPN print("Freezing batch normalization weights") freeze_bn(model.backbone) return model
def custom_maskrcnn_resnet_fpn(backbone, pretrained=True, progress=True, num_classes=91, pretrained_backbone=True, trainable_backbone_layers=3, **kwargs): backbone_name = backbone['name'] backbone_params_config = backbone['params'] assert 0 <= trainable_backbone_layers <= 5 # dont freeze any layers if pretrained model or backbone is not used if not (pretrained or pretrained_backbone): backbone_params_config['trainable_backbone_layers'] = 5 if pretrained: # no need to download the backbone if pretrained is set backbone_params_config['pretrained'] = False backbone_model = custom_resnet_fpn_backbone(backbone_name, backbone_params_config) num_feature_maps = len(backbone_model.body.return_layers) box_roi_pool = None if num_feature_maps == 4 \ else MultiScaleRoIAlign(featmap_names=[str(i) for i in range(num_feature_maps)], output_size=7, sampling_ratio=2) mask_roi_pool = None if num_feature_maps == 4 \ else MultiScaleRoIAlign(featmap_names=[str(i) for i in range(num_feature_maps)], output_size=14, sampling_ratio=2) model = MaskRCNN(backbone_model, num_classes, box_roi_pool=box_roi_pool, mask_roi_pool=mask_roi_pool**kwargs) if pretrained and backbone_name.endswith('resnet50'): state_dict = load_state_dict_from_url( maskrcnn_model_urls['maskrcnn_resnet50_fpn_coco'], progress=progress) model.load_state_dict(state_dict, strict=False) return model
def model( num_classes: int, backbone: Optional[nn.Module] = None, remove_internal_transforms: bool = True, pretrained: bool = True, **mask_rcnn_kwargs ) -> nn.Module: """MaskRCNN model implemented by torchvision. # Arguments num_classes: Number of classes. backbone: Backbone model to use. Defaults to a resnet50_fpn model. remove_internal_transforms: The torchvision model internally applies transforms like resizing and normalization, but we already do this at the `Dataset` level, so it's safe to remove those internal transforms. pretrained: Argument passed to `maskrcnn_resnet50_fpn` if `backbone is None`. By default it is set to True: this is generally used when training a new model (transfer learning). `pretrained = False` is used during inference (prediction) for cases where the users have their own pretrained weights. **mask_rcnn_kwargs: Keyword arguments that internally are going to be passed to `torchvision.models.detection.mask_rcnn.MaskRCNN`. # Return A Pytorch `nn.Module`. """ if backbone is None: model = maskrcnn_resnet50_fpn( pretrained=pretrained, pretrained_backbone=pretrained, **mask_rcnn_kwargs ) in_features_box = model.roi_heads.box_predictor.cls_score.in_features model.roi_heads.box_predictor = FastRCNNPredictor(in_features_box, num_classes) in_features_mask = model.roi_heads.mask_predictor.conv5_mask.in_channels model.roi_heads.mask_predictor = MaskRCNNPredictor( in_channels=in_features_mask, dim_reduced=256, num_classes=num_classes ) backbone_param_groups = resnet_fpn.param_groups(model.backbone) else: model = MaskRCNN(backbone, num_classes=num_classes, **mask_rcnn_kwargs) backbone_param_groups = backbone.param_groups() patch_param_groups(model=model, backbone_param_groups=backbone_param_groups) if remove_internal_transforms: remove_internal_model_transforms(model) return model
def maskrcnn_resnet18_fpn(num_classes): src_backbone = torchvision.models.resnet18(pretrained=True) # 去掉后面的全连接层 return_layers = {'layer1': 0, 'layer2': 1, 'layer3': 2, 'layer4': 3} names = [name for name, _ in src_backbone.named_children()] # just 验证,失败则报异常 if not set(return_layers).issubset(names): raise ValueError("return_layers are not present in model") orig_return_layers = return_layers # 复制一份到 layers return_layers = {k: v for k, v in return_layers.items()} layers = OrderedDict() for name, module in src_backbone.named_children(): layers[name] = module if name in return_layers: del return_layers[name] if not return_layers: break # 得到去掉池化、全连接层的模型 backbone_module = backbone_body(layers, orig_return_layers) # FPN层,resnet18 layer4 chanels为 512,fpn顶层512/8 in_channels_stage2 = 64 in_channels_list = [ in_channels_stage2, in_channels_stage2 * 2, in_channels_stage2 * 4, in_channels_stage2 * 8, ] out_channels = 64 fpn = FeaturePyramidNetwork( in_channels_list=in_channels_list, out_channels=out_channels, extra_blocks=LastLevelMaxPool(), ) backbone_fpn = BackboneFPN(backbone_module, fpn, out_channels) model = MaskRCNN(backbone_fpn, num_classes) return model
def model(num_classes: int, backbone: Optional[TorchvisionBackboneConfig] = None, remove_internal_transforms: bool = True, **mask_rcnn_kwargs) -> nn.Module: """MaskRCNN model implemented by torchvision. # Arguments num_classes: Number of classes. backbone: Backbone model to use. Defaults to a resnet50_fpn model. remove_internal_transforms: The torchvision model internally applies transforms like resizing and normalization, but we already do this at the `Dataset` level, so it's safe to remove those internal transforms. **mask_rcnn_kwargs: Keyword arguments that internally are going to be passed to `torchvision.models.detection.mask_rcnn.MaskRCNN`. # Return A Pytorch `nn.Module`. """ if backbone is None: model = maskrcnn_resnet50_fpn(pretrained=True, pretrained_backbone=True, **mask_rcnn_kwargs) in_features_box = model.roi_heads.box_predictor.cls_score.in_features model.roi_heads.box_predictor = FastRCNNPredictor( in_features_box, num_classes) in_features_mask = model.roi_heads.mask_predictor.conv5_mask.in_channels model.roi_heads.mask_predictor = MaskRCNNPredictor( in_channels=in_features_mask, dim_reduced=256, num_classes=num_classes) resnet_fpn.patch_param_groups(model.backbone) else: model = MaskRCNN(backbone.backbone, num_classes=num_classes, **mask_rcnn_kwargs) patch_rcnn_param_groups(model=model) if remove_internal_transforms: remove_internal_model_transforms(model) return model
def get_model_instance_segmentation(num_classes): # load an instance segmentation model pre-trained pre-trained on COCO backbone = ResnestBackboneWithFPN() model = MaskRCNN(backbone, num_classes=21, min_size=500, max_size=500) # model = torchvision.models.detection.mask_rcnn.maskrcnn_resnet50_fpn(pretrained=True, min_size=500, max_size=600) # get number of input features for the classifier in_features = model.roi_heads.box_predictor.cls_score.in_features # replace the pre-trained head with a new one model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes) # now get the number of input features for the mask classifier in_features_mask = model.roi_heads.mask_predictor.conv5_mask.in_channels hidden_layer = 256 # and replace the mask predictor with a new one model.roi_heads.mask_predictor = MaskRCNNPredictor(in_features_mask, hidden_layer, num_classes) return model
def make_model(cfg): """Initializes the model. Args: cfg (Config): pass in all configurations """ if cfg.model_name == 'maskrcnn_resnet50_fpn': if cfg.coco_pretrained: model = maskrcnn_resnet50_fpn(pretrained=True) else: model = maskrcnn_resnet50_fpn(num_classes=cfg.num_classes, pretrained=False) pretrained_num_classes = ( model.roi_heads.mask_predictor.mask_fcn_logits.out_channels) swap_predictors = ((cfg.num_classes != pretrained_num_classes) or cfg.swap_model_predictors) if swap_predictors: # replace the pre-trained FasterRCNN head with a new one model.roi_heads.box_predictor = FastRCNNPredictor( # in_features model.roi_heads.box_predictor.cls_score.in_features, # num_classes cfg.num_classes) # replace the pre-trained MaskRCNN head with a new one model.roi_heads.mask_predictor = MaskRCNNPredictor( # in_features_mask model.roi_heads.mask_predictor.conv5_mask.in_channels, # hidden_layer model.roi_heads.mask_predictor.conv5_mask.out_channels, # num_classes cfg.num_classes) elif cfg.model_name == 'adjust_anchor': anchor_generator = AnchorGenerator( sizes=((16, ), (32, ), (64, ), (128, ), (256, )), aspect_ratios=((0.8, 1.0, 1.25), ) * 5) backbone = resnet_fpn_backbone('resnet50', pretrained=True) model = MaskRCNN(backbone=backbone, num_classes=cfg.num_classes, rpn_anchor_generator=anchor_generator) else: raise NotImplementedError return model
def model(num_classes: int, backbone: Optional[nn.Module] = None, remove_internal_transforms: bool = True, **faster_rcnn_kwargs) -> nn.Module: """ FasterRCNN model given by torchvision Args: num_classes (int): Number of classes. backbone (nn.Module): Backbone model to use. Defaults to a resnet50_fpn model. Return: nn.Module """ if backbone is None: model = maskrcnn_resnet50_fpn(pretrained=True, **faster_rcnn_kwargs) in_features_box = model.roi_heads.box_predictor.cls_score.in_features model.roi_heads.box_predictor = FastRCNNPredictor( in_features_box, num_classes) in_features_mask = model.roi_heads.mask_predictor.conv5_mask.in_channels model.roi_heads.mask_predictor = MaskRCNNPredictor( in_channels=in_features_mask, dim_reduced=256, num_classes=num_classes) backbone_param_groups = resnet_fpn.param_groups(model.backbone) else: model = MaskRCNN(backbone, num_classes=num_classes, **faster_rcnn_kwargs) backbone_param_groups = backbone.param_groups() patch_param_groups(model=model, backbone_param_groups=backbone_param_groups) if remove_internal_transforms: remove_internal_model_transforms(model) return model
model_path = cfg.model_folder + model_name bifpn = cfg.bifpn backbone = backboneNet_efficient() backboneFPN = backboneWithFPN(backbone) if bifpn == True: backboneFPN = backboneWithBiFPN(backbone) anchor_sizes = (32, 64, 128, 256, 512) aspect_ratios = ((0.5, 1, 2), ) * len(anchor_sizes) anchor_generator = AnchorGenerator(anchor_sizes, aspect_ratios) model_ft = MaskRCNN( backboneFPN, num_classes=cfg.num_classes, rpn_anchor_generator=anchor_generator, min_size=cfg.min_size, max_size=cfg.max_size, ) model_ft.to(device) optimizer_ft = optim.SGD( model_ft.parameters(), lr=cfg.learning_rate, momentum=cfg.momentum, weight_decay=cfg.weight_decay, ) lr_scheduler = lr_scheduler.CosineAnnealingWarmRestarts( optimizer_ft, T_0=cfg.epochs, T_mult=cfg.T_mult, eta_min=cfg.eta_min)
def get_instance_segmentation_model_cum(num_classes=2, model_name="resnet101", pretrained=False, usize=256, use_FPN=False): # super(FasterRCNN1, self).__init__() model_dict = { 'resnet18': 512, 'resnet34': 512, 'resnet50': 2048, 'resnet101': 2048, 'resnet152': 2048, 'resnext50_32x4d': 2048, 'resnext101_32x8d': 2048, 'wide_resnet50_2': 2048, 'wide_resnet101_2': 2048 } assert model_name in model_dict, "%s must be in %s" % (model_name, model_dict.keys()) backbone_size = model_dict[model_name] _model = torchvision.models.resnet.__dict__[model_name]( pretrained=pretrained) # backbone = resnet.__dict__[model_name]( # pretrained=pretrained, # norm_layer=misc_nn_ops.FrozenBatchNorm2d) backbone = nn.Sequential( OrderedDict([ ('conv1', _model.conv1), ('bn1', _model.bn1), ('relu1', _model.relu), ('maxpool1', _model.maxpool), ("layer1", _model.layer1), ("layer2", _model.layer2), ("layer3", _model.layer3), ("layer4", _model.layer4), ])) if use_FPN: # freeze layers (layer1) for name, parameter in backbone.named_parameters(): if 'layer2' not in name and 'layer3' not in name and 'layer4' not in name: parameter.requires_grad_(False) # return_layers = {'layer1': 0, 'layer2': 1, 'layer3': 2, 'layer4': 3} return_layers = { 'layer1': '0', 'layer2': '1', 'layer3': '2', 'layer4': '3' } in_channels_list = [ backbone_size // 8, # 64 layer1 输出特征维度 backbone_size // 4, # 128 layer2 输出特征维度 backbone_size // 2, # 256 layer3 输出特征维度 backbone_size, # 512 layer4 输出特征维度 ] out_channels = usize # 每个FPN层输出维度 (这个值不固定,也可以设置为64,512等) backbone = BackboneWithFPN(backbone, return_layers, in_channels_list, out_channels) # model = FasterRCNN(backbone, num_classes) model = MaskRCNN(backbone, num_classes) else: backbone.out_channels = model_dict[model_name] # 特征的输出维度 anchor_generator = AnchorGenerator(sizes=((32, 64, 128, 256, 512), ), aspect_ratios=((0.5, 1.0, 2.0), )) roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=[str(0)], output_size=7, sampling_ratio=2) mask_roi_pooler = torchvision.ops.MultiScaleRoIAlign( featmap_names=[str(0)], output_size=14, sampling_ratio=2) # model = FasterRCNN(backbone, # num_classes=num_classes, # rpn_anchor_generator=anchor_generator, # box_roi_pool=roi_pooler) model = MaskRCNN(backbone, num_classes, rpn_anchor_generator=anchor_generator, box_roi_pool=roi_pooler, mask_roi_pool=mask_roi_pooler) return model
# model_path = "./models/"+'mask_rcnn_effb7_frozen_bifpn_60_v8_60' # model_ft = get_model_instance_segmentation(num_classes=21) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") backbone = backboneNet_efficient() backboneFPN = backboneWithFPN(backbone) if cfg.bifpn == True: backboneFPN = backboneWithBiFPN(backbone) anchor_sizes = (32, 64, 128, 256, 512) aspect_ratios = ((0.5, 1, 2),) * len(anchor_sizes) anchor_generator = AnchorGenerator(anchor_sizes, aspect_ratios) model_ft = MaskRCNN( backboneFPN, num_classes=cfg.num_classes, rpn_anchor_generator=anchor_generator, min_size=cfg.min_size, max_size=cfg.max_size, ) model_path = cfg.model_folder + cfg.model_name model_ft.load_state_dict(torch.load(model_path)) with torch.cuda.device(0): model_ft.eval().to(device) with torch.no_grad(): for iter, imgid in enumerate(cocoGt.imgs): image = Image.open( cfg.test_path + cocoGt.loadImgs(ids=imgid)[0]["file_name"] ) image = image.convert("RGB")
type=str, default='examples_detection', help='Folder for output images') if __name__ == '__main__': args = parser.parse_args() device = 'cuda' if torch.cuda.is_available() else 'cpu' root = args.root annfile = args.annfile # Load a maskRCNN finetuned on our birds network_transform = GeneralizedRCNNTransform(800, 1333, (0, 0, 0), (1, 1, 1)) backbone = resnet_fpn_backbone(backbone_name='resnet101', pretrained=False) model = MaskRCNN(backbone, num_classes=2) model.transform = network_transform model.eval() model.load_state_dict(torch.load('models/detector.pth')) model.to(device) # Load a data split normalize = T.Normalize(mean=[102.9801, 115.9465, 122.7717], std=[1., 1., 1.]) coco = COCO(annfile) # Load an image example available_Ids = coco.getImgIds() imgfile = coco.loadImgs(available_Ids[args.index])[0]['file_name'] imgpath = root + '/' + imgfile
def get_DCN_Resnet(num_classes, **kwargs): backbone = resnet_fpn_backbone_DCN('resnet50', pretrained=True) model = MaskRCNN(backbone, num_classes, **kwargs) return model
# Synchronize at this stage to take into account all configuration finder.synchronize() # A bit awkward but the only current way to get the model's device device = next(finder.net.parameters()).device # Define custom class, backbone, or model parameters if finder.params["deep"]["model"].value == "": # manually reinstantiate the model with a fully trainable backbone from torchvision.models.detection.backbone_utils import resnet_fpn_backbone # can change the backbone and its retained pre-training here backbone = resnet_fpn_backbone('resnet50', True, trainable_layers=3) if "faster" in finder.params["deep"]["arch"].value: from torchvision.models.detection.faster_rcnn import FasterRCNN finder.net = FasterRCNN(backbone, num_classes=len(dataset.classes)) elif "mask" in finder.params["deep"]["arch"].value: from torchvision.models.detection.mask_rcnn import MaskRCNN finder.net = MaskRCNN(backbone, num_classes=len(dataset.classes)) # TODO: eventually support keypoint R-CNN if it shows to be promising #elif "keypoint" in finder.params["deep"]["arch"].value: # from torchvision.models.detection.keypoint_rcnn import KeypointRCNN # finder.net = KeypointRCNN(backbone, num_classes=len(dataset.classes)) else: raise ValueError( f'Invalid choice of architecture: {finder.params["deep"]["arch"].value}' ) finder.net.to(device) # Train and test the network for epoch in range(1, hyperparams["epochs"] + 1): train(epoch, finder.net, train_loader, device, hyperparams) test(epoch, finder.net, test_loader, device, hyperparams)