def get_model_masck_fpn_new_anchor(num_classes, pretrained, new_AS): if pretrained == True: model = torchvision.models.detection.maskrcnn_resnet50_fpn( pretrained=True) else: model = torchvision.models.detection.maskrcnn_resnet50_fpn( pretrained=False) if new_AS == True: anchor_generator = AnchorGenerator( sizes=tuple([(16, 32, 64, 128, 256, 512) for _ in range(5)]), aspect_ratios=tuple([(0.25, 0.5, 1.0, 2.0) for _ in range(5)])) # CHANGE ANCHOR SIZES model.rpn.anchor_generator = anchor_generator anchor_generator = AnchorGenerator( sizes=tuple([(16, 32, 64, 128, 256, 512) for _ in range(5)]), aspect_ratios=tuple([(0.25, 0.5, 1.0, 2.0) for _ in range(5)])) model.rpn.anchor_generator = anchor_generator model.rpn.head = RPNHead( 256, anchor_generator.num_anchors_per_location()[0]) model.roi_heads.mask_roi_pool = None else: model.roi_heads.mask_roi_pool = None # SET CLASSES NUMEBR in_features = model.roi_heads.box_predictor.cls_score.in_features model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes) return model
def test_targets_to_anchors(self): boxes = torch.zeros((0, 4), dtype=torch.float32) negative_target = {"boxes": boxes, "labels": torch.zeros((1, 1), dtype=torch.int64), "image_id": 4, "area": (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0]), "iscrowd": torch.zeros((0,), dtype=torch.int64)} anchors = [torch.randint(-50, 50, (3, 4), dtype=torch.float32)] targets = [negative_target] anchor_sizes = ((32,), (64,), (128,), (256,), (512,)) aspect_ratios = ((0.5, 1.0, 2.0),) * len(anchor_sizes) rpn_anchor_generator = AnchorGenerator( anchor_sizes, aspect_ratios ) rpn_head = RPNHead(4, rpn_anchor_generator.num_anchors_per_location()[0]) head = RegionProposalNetwork( rpn_anchor_generator, rpn_head, 0.5, 0.3, 256, 0.5, 2000, 2000, 0.7) labels, matched_gt_boxes = head.assign_targets_to_anchors(anchors, targets) self.assertEqual(labels[0].sum(), 0) self.assertEqual(labels[0].shape, torch.Size([anchors[0].shape[0]])) self.assertEqual(labels[0].dtype, torch.float32) self.assertEqual(matched_gt_boxes[0].sum(), 0) self.assertEqual(matched_gt_boxes[0].shape, anchors[0].shape) self.assertEqual(matched_gt_boxes[0].dtype, torch.float32)
def getCustomModel(): model = torchvision.models.detection.fasterrcnn_resnet50_fpn( pretrained=True) ''' Aug2016 : Anchors: 140, 150, 160, 170, 180 Aspect Ratios: 0.8, 0.9, 1, 1.1, 1.2 Aerosol: Anchors : 10, 12, 14, 16 Ratios : 3, 3.5, 4, 4.5 ''' anchor_generator = AnchorGenerator(sizes=tuple([(30, 40, 50, 60, 70) for _ in range(5)]), aspect_ratios=tuple([ (0.25, 0.5, 1, 2, 4) for _ in range(5) ])) #75 ? rpn_head = RPNHead(256, anchor_generator.num_anchors_per_location()[0]) print(anchor_generator.num_anchors_per_location()) model.rpn.anchor_generator = anchor_generator model.rpn.head = rpn_head # get number of input features for the classifier in_features = model.roi_heads.box_predictor.cls_score.in_features # replace the pre-trained head with a new one model.roi_heads.box_predictor = FastRCNNPredictor(in_features, 2) return model
def get_faster_rcnn(n_classes: int): faster_rcnn = torchvision.models.detection.fasterrcnn_resnet50_fpn( pretrained=True) anchor_generator = AnchorGenerator(sizes=tuple([(16, 32, 64, 128, 256) for _ in range(5)]), aspect_ratios=tuple([ (0.75, 0.5, 1.25) for _ in range(5) ])) rpn_head = RPNHead(256, anchor_generator.num_anchors_per_location()[0]) faster_rcnn.rpn = RegionProposalNetwork(anchor_generator=anchor_generator, head=rpn_head, fg_iou_thresh=0.7, bg_iou_thresh=0.3, batch_size_per_image=48, positive_fraction=0.5, pre_nms_top_n=dict(training=200, testing=100), post_nms_top_n=dict(training=160, testing=80), nms_thresh=0.7) in_features = faster_rcnn.roi_heads.box_predictor.cls_score.in_features faster_rcnn.roi_heads.box_predictor = FastRCNNPredictor( in_features, n_classes) faster_rcnn.roi_heads.fg_bg_sampler.batch_size_per_image = 24 faster_rcnn.roi_heads.fg_bg_sampler.positive_fraction = 0.5 return faster_rcnn
def get_model_instance_segmentation(num_classes): # load an instance segmentation model pre-trained pre-trained on COCO model = torchvision.models.detection.maskrcnn_resnet50_fpn(pretrained=True) anchor_generator = AnchorGenerator( sizes=tuple([(4, 8, 16, 32, 64, 128, 256, 512) for _ in range(5)]), aspect_ratios=tuple([(0.25, 0.5, 1.0, 2.0) for _ in range(5)])) model.rpn.anchor_generator = anchor_generator # 256 because that's the number of features that FPN returns model.rpn.head = RPNHead(256, anchor_generator.num_anchors_per_location()[0]) # get number of input features for the classifier in_features = model.roi_heads.box_predictor.cls_score.in_features # replace the pre-trained head with a new one model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes) # now get the number of input features for the mask classifier in_features_mask = model.roi_heads.mask_predictor.conv5_mask.in_channels hidden_layer = 256 # and replace the mask predictor with a new one model.roi_heads.mask_predictor = MaskRCNNPredictor(in_features_mask, hidden_layer, num_classes) return model
def test_targets_to_anchors(self): _, targets = self._make_empty_sample() anchors = [torch.randint(-50, 50, (3, 4), dtype=torch.float32)] anchor_sizes = ((32,), (64,), (128,), (256,), (512,)) aspect_ratios = ((0.5, 1.0, 2.0),) * len(anchor_sizes) rpn_anchor_generator = AnchorGenerator( anchor_sizes, aspect_ratios ) rpn_head = RPNHead(4, rpn_anchor_generator.num_anchors_per_location()[0]) head = RegionProposalNetwork( rpn_anchor_generator, rpn_head, 0.5, 0.3, 256, 0.5, 2000, 2000, 0.7, 0.05) labels, matched_gt_boxes = head.assign_targets_to_anchors(anchors, targets) self.assertEqual(labels[0].sum(), 0) self.assertEqual(labels[0].shape, torch.Size([anchors[0].shape[0]])) self.assertEqual(labels[0].dtype, torch.float32) self.assertEqual(matched_gt_boxes[0].sum(), 0) self.assertEqual(matched_gt_boxes[0].shape, anchors[0].shape) self.assertEqual(matched_gt_boxes[0].dtype, torch.float32)
def __init__(self): super(RPN, self).__init__() # Define FPN anchor_sizes = ((32,), (64,), (128,), (256,), (512,)) aspect_ratios = ((0.5, 1.0, 2.0),) * len(anchor_sizes) # Generate anchor boxes anchor_generator = AnchorGenerator(anchor_sizes, aspect_ratios) # Define RPN Head # rpn_head = RPNHead(256, 9) rpn_head = RPNHead(256, anchor_generator.num_anchors_per_location()[0]) # RPN parameters, rpn_pre_nms_top_n_train = 2000 rpn_pre_nms_top_n_test = 1000 rpn_post_nms_top_n_train = 2000 rpn_post_nms_top_n_test = 1000 rpn_nms_thresh = 0.7 rpn_fg_iou_thresh = 0.7 rpn_bg_iou_thresh = 0.3 rpn_batch_size_per_image = 256 rpn_positive_fraction = 0.5 rpn_pre_nms_top_n = dict(training=rpn_pre_nms_top_n_train, testing=rpn_pre_nms_top_n_test) rpn_post_nms_top_n = dict( training=rpn_post_nms_top_n_train, testing=rpn_post_nms_top_n_test) # Create RPN self.rpn = RegionProposalNetwork( anchor_generator, rpn_head, rpn_fg_iou_thresh, rpn_bg_iou_thresh, rpn_batch_size_per_image, rpn_positive_fraction, rpn_pre_nms_top_n, rpn_post_nms_top_n, rpn_nms_thresh)
def _init_test_rpn(self): anchor_sizes = ((32, ), (64, ), (128, ), (256, ), (512, )) aspect_ratios = ((0.5, 1.0, 2.0), ) * len(anchor_sizes) rpn_anchor_generator = AnchorGenerator(anchor_sizes, aspect_ratios) out_channels = 256 rpn_head = RPNHead(out_channels, rpn_anchor_generator.num_anchors_per_location()[0]) rpn_fg_iou_thresh = 0.7 rpn_bg_iou_thresh = 0.3 rpn_batch_size_per_image = 256 rpn_positive_fraction = 0.5 rpn_pre_nms_top_n = dict(training=2000, testing=1000) rpn_post_nms_top_n = dict(training=2000, testing=1000) rpn_nms_thresh = 0.7 rpn_score_thresh = 0.0 rpn = RegionProposalNetwork(rpn_anchor_generator, rpn_head, rpn_fg_iou_thresh, rpn_bg_iou_thresh, rpn_batch_size_per_image, rpn_positive_fraction, rpn_pre_nms_top_n, rpn_post_nms_top_n, rpn_nms_thresh, score_thresh=rpn_score_thresh) return rpn
def __init__(self, backbone, dope_roi_pool, dope_head, dope_predictor, # transform parameters min_size=800, max_size=1333, image_mean=None, image_std=None, # RPN parameters rpn_anchor_generator=None, rpn_head=None, rpn_pre_nms_top_n_train=2000, rpn_pre_nms_top_n_test=1000, rpn_post_nms_top_n_train=2000, rpn_post_nms_top_n_test=1000, rpn_nms_thresh=0.7, rpn_fg_iou_thresh=0.7, rpn_bg_iou_thresh=0.3, rpn_batch_size_per_image=256, rpn_positive_fraction=0.5, # others num_anchor_poses = {'body': 20, 'hand': 10, 'face': 10}, pose2d_reg_weights = {part: 5.0 for part in parts}, pose3d_reg_weights = {part: 5.0 for part in parts}, ): if not hasattr(backbone, "out_channels"): raise ValueError( "backbone should contain an attribute out_channels " "specifying the number of output channels (assumed to be the " "same for all the levels)") assert isinstance(rpn_anchor_generator, (AnchorGenerator, type(None))) assert isinstance(dope_roi_pool, (MultiScaleRoIAlign, type(None))) out_channels = backbone.out_channels if rpn_anchor_generator is None: anchor_sizes = ((32,), (64,), (128,), (256,), (512,)) aspect_ratios = ((0.5, 1.0, 2.0),) * len(anchor_sizes) rpn_anchor_generator = AnchorGenerator( anchor_sizes, aspect_ratios ) if rpn_head is None: rpn_head = RPNHead( out_channels, rpn_anchor_generator.num_anchors_per_location()[0] ) rpn_pre_nms_top_n = dict(training=rpn_pre_nms_top_n_train, testing=rpn_pre_nms_top_n_test) rpn_post_nms_top_n = dict(training=rpn_post_nms_top_n_train, testing=rpn_post_nms_top_n_test) rpn = RegionProposalNetwork( rpn_anchor_generator, rpn_head, rpn_fg_iou_thresh, rpn_bg_iou_thresh, rpn_batch_size_per_image, rpn_positive_fraction, rpn_pre_nms_top_n, rpn_post_nms_top_n, rpn_nms_thresh) dope_heads = Dope_RoIHeads(dope_roi_pool, dope_head, dope_predictor, num_anchor_poses, pose2d_reg_weights=pose2d_reg_weights, pose3d_reg_weights=pose3d_reg_weights) if image_mean is None: image_mean = [0.485, 0.456, 0.406] if image_std is None: image_std = [0.229, 0.224, 0.225] transform = Dope_Transform(min_size, max_size, image_mean, image_std) super(Dope_RCNN, self).__init__(backbone, rpn, dope_heads, transform)
def get_model_detection(num_classes): model = posercnn_resnet50_fpn(pretrained=False, num_classes=num_classes) # in_features = model.roi_heads.box_predictor.cls_score.in_features # model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes) anchor_generator = AnchorGenerator( sizes=tuple([(32, 64, 128, 256, 512, 768) for _ in range(5)]), aspect_ratios=tuple([(0.5, 1.0, 2.0) for _ in range(5)])) model.rpn.anchor_generator = anchor_generator # 256 because that's the number of features that resnet_fpn_backbone returns model.rpn.head = RPNHead(256, anchor_generator.num_anchors_per_location()[0]) return model
def __init__(self, **kwargs): super().__init__() backbone = ResNet() backbone = backbone.features anchor_generator = AnchorGenerator(sizes=((16, 32, 48, 65, 128, 300), ), aspect_ratios=((0.5, 1.0, 2.0, 5.0, 10.0), )) roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=[0], output_size=7, sampling_ratio=2) self.detector = FasterRCNN( backbone, num_classes=9, image_mean=[0], image_std=[1], min_size=300, max_size=1500, rpn_anchor_generator=anchor_generator, box_roi_pool=roi_pooler, # box_nms_thresh=0.3, # box_score_thresh=0.5, **kwargs)
def get_model_instance_segmentation_v2(num_classes, architecture: str = 'resnet18'): """ By modifying this function we will be able to use a large variety of pretrained backbones but besides the backbones nothing else will be trained. A better solution seems to be to load a pre-trained model and then to change the mask and box predictors. """ # Pretrained model for num_classes=1000, but we will not use the final layers anyway. model = pretrainedmodels.__dict__[architecture](num_classes=1000, pretrained='imagenet') my_backbone = MyBackbone(model.features, 512) anchor_generator = AnchorGenerator(sizes=((32, 64, 128, 256, 512), ), aspect_ratios=((0.5, 1.0, 2.0), )) roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=[0], output_size=7, sampling_ratio=2) mask_roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=[0], output_size=14, sampling_ratio=2) model = MaskRCNN(my_backbone, num_classes=num_classes, rpn_anchor_generator=anchor_generator, box_roi_pool=roi_pooler, mask_roi_pool=mask_roi_pooler) return model
def _init_pretrained_model(self, num_classes): box_roi_pool = MultiScaleRoIAlign( featmap_names=[0, 1, 2, 3], # + "pool" -> 5 feature maps output_size=7, sampling_ratio=2) model = fasterrcnn_resnet50_fpn(pretrained=True, min_size=config.IMAGE_SIZE, box_nms_thresh=.5, box_roi_pool=box_roi_pool) torch.manual_seed(0) # Init the same params in all processes model.roi_heads.box_predictor = FastRCNNPredictor( in_channels=model.roi_heads.box_head.fc7.out_features, num_classes=num_classes) model.rpn.anchor_generator = AnchorGenerator( sizes=[[16], [32], [64], [128], [256]], aspect_ratios=[.25, .5, 1., 2., 4.]) model.rpn.head = RPNHead(in_channels=256, num_anchors=5) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") model.to(device) model = DDP(model, find_unused_parameters=True) if self.is_master(): print(model) return model
def __init__(self, num_class=10, snap=None, trainable_layers=5): backbone = resnet_fpn_backbone('resnet101', True, trainable_layers=trainable_layers) anchor_sizes = ((8, 16, 32, 64, 128), ) aspect_ratios = [(0.5, 1.0, 2.0) for _ in range(len(anchor_sizes))] rpn_anchor_generator = AnchorGenerator( anchor_sizes, aspect_ratios ) super(MaskRCNN, self).__init__( backbone, num_class, # rpn_anchor_generator=rpn_anchor_generator, ) if snap is not None: state_dict = torch.load(open(self.snap, 'rb')) for k in list(state_dict.keys()): if k not in self.state_dict(): continue if self.state_dict()[k].shape != state_dict[k].shape: print(f'removing key {k}') del state_dict[k] # del state_dict['roi_heads.box_predictor.cls_score.weight'] # del state_dict['roi_heads.box_predictor.cls_score.bias'] # del state_dict['roi_heads.box_predictor.bbox_pred.weight'] # del state_dict['roi_heads.box_predictor.bbox_pred.bias'] # del state_dict['roi_heads.mask_predictor.mask_fcn_logits.weight'] # del state_dict['roi_heads.mask_predictor.mask_fcn_logits.bias'] unused = self.load_state_dict(state_dict, strict=False)
def get_model_instance_segmentation2(num_classes): # COCO 에서 미리 학습된 인스턴스 분할 모델을 읽어옵니다 #model = torchvision.models.detection.maskrcnn_resnet50_fpn(pretrained=False, pretrained_backbone=False) #model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=False, pretrained_backbone=False) backbone = torchvision.models.mobilenet_v2(pretrained=False).features #backbone.out_channels = 1 backbone.out_channels = 1280 anchor_generator = AnchorGenerator(sizes=((32, 64, 128, 256, 512), ), aspect_ratios=((0.5, 1.0, 2.0), )) roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=[0], output_size=1, sampling_ratio=2) model = FasterRCNN(backbone, num_classes=num_classes, rpn_anchor_generator=anchor_generator, box_roi_pool=roi_pooler) print("mobilenet_v2 call2 - out_channels :1280, 19,540,921") # 분류를 위한 입력 특징 차원을 얻습니다 #in_features = backbone # 미리 학습된 헤더를 새로운 것으로 바꿉니다 #model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes) #in_features_mask = model.roi_heads.mask_predictor.conv5_mask.in_channels #hidden_layer = 1 # and replace the mask predictor with a new one #model.roi_heads.mask_predictor = MaskRCNNPredictor(in_features_mask, # hidden_layer, # num_classes) return model
def get_mobilenet_model(num_classes): """ Seguir ejemplo en https://github.com/pytorch/vision/blob/master/torchvision/models/detection/faster_rcnn.py """ backbone = torchvision.models.mobilenet_v2(pretrained=True).features backbone.out_channels = 1280 anchor_generator = AnchorGenerator(sizes=((32, 64, 128), ), aspect_ratios=((0.5, 1.0, 2.0), )) roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=[0], output_size=7, sampling_ratio=2) # stats for test images #Original Width avg 172.58 std_dev 122.58 min 31 max 1083 #Original Height avg 105.00 std_dev 52.75 min 13 max 516 model = FasterRCNN(backbone, num_classes=num_classes, min_size=100, max_size=300, rpn_anchor_generator=anchor_generator, box_roi_pool=roi_pooler) return model
def __init__(self, pretrained_path): super().__init__() self.pretrained_path = pretrained_path self.output_dim = 800 * 800 # ------------------ # PRE-TRAINED MODEL # ------------------ ae = BasicAE.load_from_checkpoint(pretrained_path) ae.freeze() self.backbone = ae.encoder self.backbone.c3_only = True self.backbone.out_channels = 32 # ------------------ # FAST RCNN # ------------------ anchor_generator = AnchorGenerator(sizes=((32, 64, 128, 256, 512), ), aspect_ratios=((0.5, 1.0, 2.0), )) roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=['0'], output_size=7, sampling_ratio=2) self.fast_rcnn = FasterRCNN(self.backbone, num_classes=9, rpn_anchor_generator=anchor_generator, box_roi_pool=roi_pooler) # for unfreezing encoder later self.frozen = True
def maskrcnn_resnet50_fpn(pretrained=False, progress=True, num_classes=91, pretrained_backbone=True, config=None, **kwargs): if pretrained: # no need to download the backbone if pretrained is set pretrained_backbone = False backbone = resnet_fpn_backbone('resnet50', pretrained_backbone, config) sizes = (32, 64, 128, 256, 512) # roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=['0', '1', '2', '3'], # output_size=7, # sampling_ratio=2) anchor_generator = AnchorGenerator(sizes=(sizes), aspect_ratios=((0.5, 1.0, 2.0))) model = MaskRCNN(backbone, num_classes, rpn_anchor_generator=anchor_generator, **kwargs) if pretrained: state_dict = load_state_dict_from_url( model_urls['maskrcnn_resnet50_fpn_coco'], progress=progress) model.load_state_dict(state_dict) return model
def fasterrcnn_resnetxx_fpnxx(cfg): backbone = resnet.__dict__[cfg['backbone_name']]( pretrained=cfg['backbone_pretrained'], norm_layer=misc_nn_ops.FrozenBatchNorm2d) # freeze layers for name, parameter in backbone.named_parameters(): if 'layer2' not in name and 'layer3' not in name and 'layer4' not in name: parameter.requires_grad_(False) # wrapper backbone with fpn return_layers = cfg['fpn']['return_layers'] in_channels_stage2 = backbone.inplanes // 8 in_channels_list = [in_channels_stage2 * 2**i for i in range(len(return_layers))] out_channels = cfg['fpn']['out_channels'] backbone_fpn = BackboneWithFPN(backbone, return_layers, in_channels_list, out_channels) anchor_generator = AnchorGenerator(**cfg['anchor_generator']) # print(anchor_generator.num_anchors_per_location()) roi_pooler = MultiScaleRoIAlign(**cfg['box_roi_pool']) model = FasterRCNN(backbone_fpn, num_classes=cfg['num_classes'], rpn_anchor_generator=anchor_generator, box_roi_pool=roi_pooler) if os.path.exists(cfg['fasterrcnn_pretrained']): state_dict = torch.load(cfg['fasterrcnn_pretrained']) model.load_state_dict(state_dict) return model
def FasterRCNN_resnext50_32x4d(): net = torchvision.models.resnext50_32x4d(pretrained=True) modules = list(net.children())[:-1] backbone = nn.Sequential(*modules) # backbone = torchvision.models.resnext50_32x4d(pretrained=True).features # test_backbone(backbone) backbone.out_channels = 2048 anchor_generator = AnchorGenerator(sizes=((32, 64, 128, 256, 512),), aspect_ratios=((0.5, 1.0, 2.0),)) roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=['0'], output_size=7, sampling_ratio=2) model = FasterRCNN(backbone, num_classes=config.CLASSES, rpn_anchor_generator=anchor_generator, box_roi_pool=roi_pooler) return model
def demo(): # load a pre-trained model for classification and return # only the features backbone = torchvision.models.mobilenet_v2(pretrained=True).features # FasterRCNN needs to know the number of # output channels in a backbone. For mobilenet_v2, it's 1280 # so we need to add it here backbone.out_channels = 1280 # let's make the RPN generate 5 x 3 anchors per spatial # location, with 5 different sizes and 3 different aspect # ratios. We have a Tuple[Tuple[int]] because each feature # map could potentially have different sizes and # aspect ratios anchor_generator = AnchorGenerator(sizes=((32, 64, 128, 256, 512), ), aspect_ratios=((0.5, 1.0, 2.0), )) # let's define what are the feature maps that we will # use to perform the region of interest cropping, as well as # the size of the crop after rescaling. # if your backbone returns a Tensor, featmap_names is expected to # be [0]. More generally, the backbone should return an # OrderedDict[Tensor], and in featmap_names you can choose which # feature maps to use. roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=['0'], output_size=7, sampling_ratio=2) # put the pieces together inside a FasterRCNN model model = FasterRCNN(backbone, num_classes=2, rpn_anchor_generator=anchor_generator, box_roi_pool=roi_pooler) model.eval() x = [torch.rand(3, 300, 400), torch.rand(3, 500, 400)] predictions = model(x) print(predictions)
def __init__(self, h_g, h_l, g, k, s, c): super(glimpse_network, self).__init__() self.retina = retina(g, k, s) self.feature_extractor = nn.Sequential( *list(resnet50(pretrained=True).children())[:-1]) #detection feature backbone = torchvision.models.mobilenet_v2(pretrained=True).features backbone.out_channels = 1280 anchor_generator = AnchorGenerator(sizes=((32, 64, 128, 256, 512), ), aspect_ratios=((0.5, 1.0, 2.0), )) roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=[0], output_size=7, sampling_ratio=2) self.detection_model = FasterRCNN( backbone, num_classes=2, rpn_anchor_generator=anchor_generator, box_roi_pool=roi_pooler) self.detection_model.eval() # glimpse layer D_in = k * g * g * c D_in = 2048 self.fc1 = nn.Linear(D_in, h_g) # location layer D_in = 2 self.fc2 = nn.Linear(D_in, h_l) self.fc3 = nn.Linear(h_g, h_g + h_l) self.fc4 = nn.Linear(h_l, h_g + h_l)
def fasterrcnn_mobilenetv2_fpn(pretrained=False, progress=True, num_classes=91, pretrained_backbone=True, **kwargs): """ Construct a Faster RCNN model with mobilenetv2 backbone """ if pretrained: pretrained_backbone = False backbone = torchvision.models.mobilenet_v2( pretrained=pretrained_backbone).features backbone.out_channels = 1280 anchor_generator = AnchorGenerator(sizes=((32, 64, 128, 256, 512), ), aspect_ratios=((0.5, 1.0, 2.0), )) roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=[0], output_size=7, sampling_ratio=2) model = FasterRCNN(backbone, num_classes=num_classes, rpn_anchor_generator=anchor_generator, box_roi_pool=roi_pooler) return model
def faster_rcnn_alt_backbone(): # 特徴量のみを取得 backbone = torchvision.models.mobilentet_v2(pretrained=True).features # FasterRCNNはバックボーンからの出力チャネル数を知る必要がある # mobilenet_v2の出力は1280 backbone.out_channels = 1280 # RPN: Resion Proposal Networkに空間ごとにアンカーを生成。 # 5つのサイズと3つのアスペクト比が存在することを意味します。 # 特徴マップごとに異なるサイズとアスペクト比となる可能性があるので, # Tuple[Tuple[int]] という形式で指定します。 anchor_generator = AnchorGenerator(sizes=((32, 64, 128, 256, 512),), aspect_ratios=((0.5, 1.0, 2.0),)) # 関心領域のトリミングを実行するために使用する特徴マップ(featmap_names)と # 画像の大きさをもとに戻したあとのトリミングのサイズ(output_size)を適宜しましょう。 # バックボーンがテンソルを返す場合、featmap_nameは[0]になっているはずです。 # もう少し一般化して説明すると、バックボーンはOrderedDict[Tensor]を返すことになるので、 # featmap_namesで使用する特徴マップを選択できます。 roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=[0], output_size=7, sampling_ratio=2) model = FasterRCNN(backbone, num_classes=2, rpn_anchor_generator=anchor_generator, box_roi_pool=roi_pooler) return model
def Model(self, model_name, use_pretrained=True, use_gpu=True): self.system_dict["model_name"] = model_name self.system_dict["use_pretrained"] = use_pretrained self.system_dict["use_gpu"] = use_gpu if (self.system_dict["model_name"] in self.system_dict["model_set_1"]): first_name, second_name = self.system_dict["model_name"].split("_") if (first_name == "faster-rcnn" and second_name == "mobilenet-v2"): backbone = torchvision.models.mobilenet_v2( pretrained=use_pretrained).features backbone.out_channels = 1280 anchor_generator = AnchorGenerator(sizes=((32, 64, 128, 256, 512), ), aspect_ratios=((0.5, 1.0, 2.0), )) roi_pooler = torchvision.ops.MultiScaleRoIAlign( featmap_names=[0], output_size=7, sampling_ratio=2) self.system_dict["local"]["model"] = FasterRCNN( backbone, num_classes=self.system_dict["local"]["num_classes"], rpn_anchor_generator=anchor_generator, box_roi_pool=roi_pooler) self.set_device(use_gpu=self.system_dict["use_gpu"]) self.system_dict["local"]["model"].to( self.system_dict["local"]["device"])
def __call__(self, classes=3, sizes=((32, 64, 128, 256, 512), ), aspect_ratios=((0.5, 1.0, 2.0), )): from torchvision.models.detection.rpn import AnchorGenerator import torchvision from torchvision.models.detection import FasterRCNN from torchvision.models.detection.faster_rcnn import TwoMLPHead, FastRCNNPredictor # load a pre-trained model for classification and return # only the features backbone = torchvision.models.squeezenet1_1(pretrained=True).features # FasterRCNN needs to know the number of # output channels in a backbone. For squeezenet1_1, it's 512 # so we need to add it here backbone.out_channels = 512 anchor_generator = AnchorGenerator(sizes=sizes, aspect_ratios=aspect_ratios) roi_out_size = 7 roi_pooler = torchvision.ops.MultiScaleRoIAlign( featmap_names=['0'], output_size=roi_out_size, sampling_ratio=2) representation_size = 256 # Scaled down from 1024 in original implementation. # allows to reduce considerably the number of parameters box_head = TwoMLPHead(backbone.out_channels * roi_out_size**2, representation_size) box_predictor = FastRCNNPredictor(representation_size, classes) model = FasterRCNN(backbone, rpn_anchor_generator=anchor_generator, box_roi_pool=roi_pooler, box_head=box_head, box_predictor=box_predictor) return model
def __init__(self, backbone_name: str, pretrained: bool = True, finetune: bool = True, num_classes: int = 2): self.__pretrained = pretrained self.__num_classes = num_classes self.__model_name = backbone_name backbone = build_backbone(backbone_name, pretrained, finetune) anchor_generator = AnchorGenerator(sizes=((32, 64, 128, 256, 512), ), aspect_ratios=((0.5, 1.0, 2.0), )) roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=[0], output_size=7, sampling_ratio=2) self.model = FasterRCNN(backbone=backbone, num_classes=num_classes, rpn_anchor_generator=anchor_generator, box_roi_pool=roi_pooler) self.params = [p for p in self.model.parameters() if p.requires_grad] self.optimizer = torch.optim.Adam(params=self.params, lr=0.005, weight_decay=0.0005) self.lr_scheduler = torch.optim.lr_scheduler.StepLR( optimizer=self.optimizer, step_size=3, gamma=0.1)
def __init__(self): backbone = torchvision.models.mobilenet_v2(pretrained=True).features backbone.out_channels = 1280 anchor_generator = AnchorGenerator(sizes=((32, 64, 128, 256), ), aspect_ratios=((0.5, 1.0, 2.0), )) roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=[0], output_size=7, sampling_ratio=2) mask_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=[0], output_size=14, sampling_ratio=2) self.net = MaskRCNN(backbone, 5, rpn_anchor_generator=anchor_generator, box_roi_pool=roi_pooler, mask_roi_pool=mask_pooler) for p in self.net.backbone.parameters(): p.requires_grad = False params = [p for p in self.net.parameters() if p.requires_grad] self.optim = torch.optim.SGD(params, lr=0.001, momentum=0.9, weight_decay=0.0005) self.lr_schuduler = torch.optim.lr_scheduler.StepLR(self.optim, step_size=3, gamma=0.1)
def resnet(n_classes, backbone_n='resnet50', pretrained_backbone=True): # FIX doesn't work """ Faster-RCNN with :param n_classes: number of classes of Fast-RCNN-Predictor :param backbone_n: name of backbone which will extract feature maps :param pretrained_backbone: if True, return pretrained backbone of resnet :return: instance of FasterRCNN """ if backbone_n not in names: raise Exception('Wrong backbone name') backbone = torchvision.models.detection.backbone_utils.resnet_fpn_backbone( backbone_n, pretrained=pretrained_backbone) # set out channels for FasterRCNN backbone.out_channels = 256 # define custom anchors for RPN anchor_generator = AnchorGenerator(sizes=cfg.ANCHOR.SCALES, aspect_ratios=cfg.ANCHOR.RATIOS) roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=[0], output_size=7, sampling_ratio=2) # define model model = FasterRCNN(backbone, num_classes=n_classes, rpn_anchor_generator=anchor_generator, box_roi_pool=roi_pooler) return model
def create_model(num_classes=3): backbone = torchvision.models.mobilenet_v2(pretrained=True).features backbone.out_channels = 1280 anchor_generator = AnchorGenerator(sizes=((32, 64, 128, 256, 512), ), aspect_ratios=((0.5, 1.0, 2.0), )) roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names=["0"], output_size=7, sampling_ratio=2) model = FasterRCNN(backbone, num_classes=num_classes, rpn_anchor_generator=anchor_generator, box_roi_pool=roi_pooler) # model = torchvision.models.detection.fasterrcnn_resnet50_fpn( # pretrained=False, # pretrained_backbone=True, # num_classes=num_classes # ) # model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True) # # get number of input features for the classifier # in_features = model.roi_heads.box_predictor.cls_score.in_features # # replace the pre-trained head with a new one # model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes) return model