def _init_test_rpn(self): anchor_sizes = ((32, ), (64, ), (128, ), (256, ), (512, )) aspect_ratios = ((0.5, 1.0, 2.0), ) * len(anchor_sizes) rpn_anchor_generator = AnchorGenerator(anchor_sizes, aspect_ratios) out_channels = 256 rpn_head = RPNHead(out_channels, rpn_anchor_generator.num_anchors_per_location()[0]) rpn_fg_iou_thresh = 0.7 rpn_bg_iou_thresh = 0.3 rpn_batch_size_per_image = 256 rpn_positive_fraction = 0.5 rpn_pre_nms_top_n = dict(training=2000, testing=1000) rpn_post_nms_top_n = dict(training=2000, testing=1000) rpn_nms_thresh = 0.7 rpn_score_thresh = 0.0 rpn = RegionProposalNetwork(rpn_anchor_generator, rpn_head, rpn_fg_iou_thresh, rpn_bg_iou_thresh, rpn_batch_size_per_image, rpn_positive_fraction, rpn_pre_nms_top_n, rpn_post_nms_top_n, rpn_nms_thresh, score_thresh=rpn_score_thresh) return rpn
def get_model_masck_fpn_new_anchor(num_classes, pretrained, new_AS): if pretrained == True: model = torchvision.models.detection.maskrcnn_resnet50_fpn( pretrained=True) else: model = torchvision.models.detection.maskrcnn_resnet50_fpn( pretrained=False) if new_AS == True: anchor_generator = AnchorGenerator( sizes=tuple([(16, 32, 64, 128, 256, 512) for _ in range(5)]), aspect_ratios=tuple([(0.25, 0.5, 1.0, 2.0) for _ in range(5)])) # CHANGE ANCHOR SIZES model.rpn.anchor_generator = anchor_generator anchor_generator = AnchorGenerator( sizes=tuple([(16, 32, 64, 128, 256, 512) for _ in range(5)]), aspect_ratios=tuple([(0.25, 0.5, 1.0, 2.0) for _ in range(5)])) model.rpn.anchor_generator = anchor_generator model.rpn.head = RPNHead( 256, anchor_generator.num_anchors_per_location()[0]) model.roi_heads.mask_roi_pool = None else: model.roi_heads.mask_roi_pool = None # SET CLASSES NUMEBR in_features = model.roi_heads.box_predictor.cls_score.in_features model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes) return model
def get_faster_rcnn(n_classes: int): faster_rcnn = torchvision.models.detection.fasterrcnn_resnet50_fpn( pretrained=True) anchor_generator = AnchorGenerator(sizes=tuple([(16, 32, 64, 128, 256) for _ in range(5)]), aspect_ratios=tuple([ (0.75, 0.5, 1.25) for _ in range(5) ])) rpn_head = RPNHead(256, anchor_generator.num_anchors_per_location()[0]) faster_rcnn.rpn = RegionProposalNetwork(anchor_generator=anchor_generator, head=rpn_head, fg_iou_thresh=0.7, bg_iou_thresh=0.3, batch_size_per_image=48, positive_fraction=0.5, pre_nms_top_n=dict(training=200, testing=100), post_nms_top_n=dict(training=160, testing=80), nms_thresh=0.7) in_features = faster_rcnn.roi_heads.box_predictor.cls_score.in_features faster_rcnn.roi_heads.box_predictor = FastRCNNPredictor( in_features, n_classes) faster_rcnn.roi_heads.fg_bg_sampler.batch_size_per_image = 24 faster_rcnn.roi_heads.fg_bg_sampler.positive_fraction = 0.5 return faster_rcnn
def test_targets_to_anchors(self): _, targets = self._make_empty_sample() anchors = [torch.randint(-50, 50, (3, 4), dtype=torch.float32)] anchor_sizes = ((32,), (64,), (128,), (256,), (512,)) aspect_ratios = ((0.5, 1.0, 2.0),) * len(anchor_sizes) rpn_anchor_generator = AnchorGenerator( anchor_sizes, aspect_ratios ) rpn_head = RPNHead(4, rpn_anchor_generator.num_anchors_per_location()[0]) head = RegionProposalNetwork( rpn_anchor_generator, rpn_head, 0.5, 0.3, 256, 0.5, 2000, 2000, 0.7) labels, matched_gt_boxes = head.assign_targets_to_anchors(anchors, targets) self.assertEqual(labels[0].sum(), 0) self.assertEqual(labels[0].shape, torch.Size([anchors[0].shape[0]])) self.assertEqual(labels[0].dtype, torch.float32) self.assertEqual(matched_gt_boxes[0].sum(), 0) self.assertEqual(matched_gt_boxes[0].shape, anchors[0].shape) self.assertEqual(matched_gt_boxes[0].dtype, torch.float32)
def __init__(self, backbone, dope_roi_pool, dope_head, dope_predictor, # transform parameters min_size=800, max_size=1333, image_mean=None, image_std=None, # RPN parameters rpn_anchor_generator=None, rpn_head=None, rpn_pre_nms_top_n_train=2000, rpn_pre_nms_top_n_test=1000, rpn_post_nms_top_n_train=2000, rpn_post_nms_top_n_test=1000, rpn_nms_thresh=0.7, rpn_fg_iou_thresh=0.7, rpn_bg_iou_thresh=0.3, rpn_batch_size_per_image=256, rpn_positive_fraction=0.5, # others num_anchor_poses = {'body': 20, 'hand': 10, 'face': 10}, pose2d_reg_weights = {part: 5.0 for part in parts}, pose3d_reg_weights = {part: 5.0 for part in parts}, ): if not hasattr(backbone, "out_channels"): raise ValueError( "backbone should contain an attribute out_channels " "specifying the number of output channels (assumed to be the " "same for all the levels)") assert isinstance(rpn_anchor_generator, (AnchorGenerator, type(None))) assert isinstance(dope_roi_pool, (MultiScaleRoIAlign, type(None))) out_channels = backbone.out_channels if rpn_anchor_generator is None: anchor_sizes = ((32,), (64,), (128,), (256,), (512,)) aspect_ratios = ((0.5, 1.0, 2.0),) * len(anchor_sizes) rpn_anchor_generator = AnchorGenerator( anchor_sizes, aspect_ratios ) if rpn_head is None: rpn_head = RPNHead( out_channels, rpn_anchor_generator.num_anchors_per_location()[0] ) rpn_pre_nms_top_n = dict(training=rpn_pre_nms_top_n_train, testing=rpn_pre_nms_top_n_test) rpn_post_nms_top_n = dict(training=rpn_post_nms_top_n_train, testing=rpn_post_nms_top_n_test) rpn = RegionProposalNetwork( rpn_anchor_generator, rpn_head, rpn_fg_iou_thresh, rpn_bg_iou_thresh, rpn_batch_size_per_image, rpn_positive_fraction, rpn_pre_nms_top_n, rpn_post_nms_top_n, rpn_nms_thresh) dope_heads = Dope_RoIHeads(dope_roi_pool, dope_head, dope_predictor, num_anchor_poses, pose2d_reg_weights=pose2d_reg_weights, pose3d_reg_weights=pose3d_reg_weights) if image_mean is None: image_mean = [0.485, 0.456, 0.406] if image_std is None: image_std = [0.229, 0.224, 0.225] transform = Dope_Transform(min_size, max_size, image_mean, image_std) super(Dope_RCNN, self).__init__(backbone, rpn, dope_heads, transform)
def get_model_detection(num_classes): model = posercnn_resnet50_fpn(pretrained=False, num_classes=num_classes) # in_features = model.roi_heads.box_predictor.cls_score.in_features # model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes) anchor_generator = AnchorGenerator( sizes=tuple([(32, 64, 128, 256, 512, 768) for _ in range(5)]), aspect_ratios=tuple([(0.5, 1.0, 2.0) for _ in range(5)])) model.rpn.anchor_generator = anchor_generator # 256 because that's the number of features that resnet_fpn_backbone returns model.rpn.head = RPNHead(256, anchor_generator.num_anchors_per_location()[0]) return model
def get_model( num_classes, anchor_sizes, anchor_aspect_ratios, rpn_nms_threshold, box_nms_threshold, box_score_threshold, num_box_detections, ): # load pre-trained mask R-CNN model model = torchvision.models.detection.maskrcnn_resnet50_fpn( pretrained=True, rpn_nms_thresh=rpn_nms_threshold, box_nms_thresh=box_nms_threshold, box_score_thresh=box_score_threshold, box_detections_per_img=num_box_detections, ) # get number of input features for the classifier in_features = model.roi_heads.box_predictor.cls_score.in_features # replace the pre-trained head with a new one model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes) anchor_sizes = tuple([float(i) for i in anchor_sizes.split(",")]) anchor_aspect_ratios = tuple([float(i) for i in anchor_aspect_ratios.split(",")]) # create an anchor_generator for the FPN which by default has 5 outputs anchor_generator = AnchorGenerator( sizes=tuple([anchor_sizes for _ in range(5)]), aspect_ratios=tuple([anchor_aspect_ratios for _ in range(5)]), ) model.rpn.anchor_generator = anchor_generator # get number of input features for the RPN returned by FPN (256) in_channels = model.backbone.out_channels # replace the RPN head model.rpn.head = RPNHead( in_channels, anchor_generator.num_anchors_per_location()[0] ) # turn off masks since dataset only has bounding boxes model.roi_heads.mask_roi_pool = None return model
def fr50_Model(pretrained=False): model = torchvision.models.detection.fasterrcnn_resnet50_fpn( pretrained=pretrained) #true works # create an anchor_generator for the FPN # which by default has 5 outputs anchor_generator = AnchorGenerator( #sizes=tuple([(16, 32, 64, 128, 256, 512) for _ in range(5)]), sizes=tuple([(10, 15, 20, 30, 40) for _ in range(5)]), aspect_ratios=tuple([(0.25, 0.5, 1.0, 2.0) for _ in range(5)])) model.rpn.anchor_generator = anchor_generator # 256 because that's the number of features that FPN returns model.rpn.head = RPNHead(256, anchor_generator.num_anchors_per_location()[0]) return model
def mask_rcnn_custom_anchors(num_classes: int, backbone=None, pretrained: bool = True, sizes: tuple = ((32, ), (64, ), (128, ), (256, ), (512, )), aspect_ratios: tuple = (0.5, 1.0, 2.0), min_size: int = 800, max_size: int = 1333) -> nn.Module: "Make icevision Mask RCNN with custom anchors. Default values are torchvision defaults" if backbone is None: backbone = mask_rcnn.backbones.resnet50_fpn(pretrained=pretrained) rpn_anchor_generator = AnchorGenerator(sizes=sizes, aspect_ratios=aspect_ratios) rpn_head = RPNHead(256, rpn_anchor_generator.num_anchors_per_location()[0]) in_features = 1024 box_predictor = FastRCNNPredictor(in_features, num_classes) in_features_mask = 256 hidden_layer = 256 mask_predictor = MaskRCNNPredictor(in_features_mask, hidden_layer, num_classes) mask_rcnn_kwargs = { 'rpn_anchor_generator': rpn_anchor_generator, 'rpn_head': rpn_head, 'box_predictor': box_predictor, 'mask_predictor': mask_predictor, 'num_classes': None, 'image_mean': [1., 1., 1.], 'image_std': [1., 1., 1.], # This way no need to remove model normalization 'min_size': min_size, 'max_size': max_size } custom_model = mask_rcnn.model(backbone=backbone, remove_internal_transforms=False, **mask_rcnn_kwargs) return custom_model
def __init__(self): super(RPN, self).__init__() # Define FPN self.fpn = resnet_fpn_backbone(backbone_name='resnet101', pretrained=True) anchor_sizes = ((32, ), (64, ), (128, ), (256, ), (512, )) aspect_ratios = ((0.5, 1.0, 2.0), ) * len(anchor_sizes) # Generate anchor boxes anchor_generator = AnchorGenerator(anchor_sizes, aspect_ratios) # Define RPN Head # rpn_head = RPNHead(256, 9) rpn_head = RPNHead(256, anchor_generator.num_anchors_per_location()[0]) # RPN parameters, rpn_pre_nms_top_n_train = 2000 rpn_pre_nms_top_n_test = 1000 rpn_post_nms_top_n_train = 2000 rpn_post_nms_top_n_test = 1000 rpn_nms_thresh = 0.7 rpn_fg_iou_thresh = 0.7 rpn_bg_iou_thresh = 0.3 rpn_batch_size_per_image = 256 rpn_positive_fraction = 0.5 # transform parameters min_size = 800 max_size = 1333 image_mean = [0.485, 0.456, 0.406] image_std = [0.229, 0.224, 0.225] self.transform = GeneralizedRCNNTransform(min_size, max_size, image_mean, image_std) rpn_pre_nms_top_n = dict(training=rpn_pre_nms_top_n_train, testing=rpn_pre_nms_top_n_test) rpn_post_nms_top_n = dict(training=rpn_post_nms_top_n_train, testing=rpn_post_nms_top_n_test) # Create RPN self.rpn = RegionProposalNetwork(anchor_generator, rpn_head, rpn_fg_iou_thresh, rpn_bg_iou_thresh, rpn_batch_size_per_image, rpn_positive_fraction, rpn_pre_nms_top_n, rpn_post_nms_top_n, rpn_nms_thresh)
def get_model_frcnn_test(num_classes, new_as): model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True) if new_as == True: anchor_generator = AnchorGenerator( sizes=tuple([(16, 32, 64, 128, 256, 512) for _ in range(5)]), aspect_ratios=tuple([(0.25, 0.5, 1.0, 2.0) for _ in range(5)])) # CHANGE ANCHOR SIZES model.rpn.anchor_generator = anchor_generator anchor_generator = AnchorGenerator( sizes=tuple([(16, 32, 64, 128, 256, 512) for _ in range(5)]), aspect_ratios=tuple([(0.25, 0.5, 1.0, 2.0) for _ in range(5)])) model.rpn.anchor_generator = anchor_generator model.rpn.head = RPNHead(256, anchor_generator.num_anchors_per_location()[0]) in_features = model.roi_heads.box_predictor.cls_score.in_features model.roi_heads.box_predictor = FastRCNNPredictor(in_features,num_classes) return model
def __init__(self): super(RPN, self).__init__() # Define FPN anchor_sizes = ((32, ), (64, ), (128, ), (256, ), (512, )) aspect_ratios = ((0.5, 1.0, 2.0), ) * len(anchor_sizes) # Generate anchor boxes anchor_generator = AnchorGenerator(anchor_sizes, aspect_ratios) # Define RPN Head rpn_head = RPNHead(256, anchor_generator.num_anchors_per_location()[0]) RPN_PRE_NMS_TOP_N = dict(training=cfg.RPN.PRE_NMS_TOP_N_TRAIN, testing=cfg.RPN.PRE_NMS_TOP_N_TEST) RPN_POST_NMS_TOP_N = dict(training=cfg.RPN.POST_NMS_TOP_N_TRAIN, testing=cfg.RPN.POST_NMS_TOP_N_TEST) # Create RPN self.rpn = RegionProposalNetwork( anchor_generator, rpn_head, cfg.RPN.FG_IOU_THRESH, cfg.RPN.BG_IOU_THRESH, cfg.RPN.BATCH_SIZE_PER_IMAGE, cfg.RPN.POSITIVE_FRACTION, RPN_PRE_NMS_TOP_N, RPN_POST_NMS_TOP_N, cfg.RPN.NMS_THRESH)
def _init_pretrained_model(self, num_classes): box_roi_pool = MultiScaleRoIAlign( featmap_names=[0, 1, 2, 3], # + "pool" -> 5 feature maps output_size=7, sampling_ratio=2) model = fasterrcnn_resnet50_fpn(pretrained=True, max_size=config.IMAGE_SIZE, box_nms_thresh=.5, # rpn_anchor_generator=rpn_anchor_generator, box_roi_pool=box_roi_pool) torch.manual_seed(0) # Init the same params in all processes model.roi_heads.box_predictor = FastRCNNPredictor( in_channels=model.roi_heads.box_head.fc7.out_features, num_classes=num_classes) model.rpn.anchor_generator = AnchorGenerator(sizes=[[16], [32], [64], [128], [256]], aspect_ratios=[.25, .5, 1., 2., 4.]) model.rpn.head = RPNHead(in_channels=256, num_anchors=5) device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") model.to(device) model = DDP(model, find_unused_parameters=True) return model
def __init_pretrain_faster_rcnn(cfg): """ cfg -- dict / edict, configuration object """ # load in key config from cfg class_n = cfg.CLASS_N anchor_scales = tuple(cfg.ANCHOR_SCALES) anchor_ratios = tuple(cfg.ANCHOR_RATIOS) feature_n = cfg.FEATURE_N # setup backbone model = torchvision.models.detection.fasterrcnn_resnet50_fpn( pretrained=True) # setup RPN anchor_generator = AnchorGenerator( # size refer to length of one side sizes=tuple([anchor_scales for _ in range(feature_n)]), aspect_ratios=tuple([anchor_ratios for _ in range(feature_n)])) model.rpn.anchor_generator = anchor_generator model.rpn.head = RPNHead(256, anchor_generator.num_anchors_per_location()[0]) # setup RCNN in_features = model.roi_heads.box_predictor.cls_score.in_features model.roi_heads.box_predictor = FastRCNNPredictor(in_features, class_n) return model
rpn_bg_iou_thresh=0.3 rpn_batch_size_per_image=256 rpn_positive_fraction=0.5 rpn_pre_nms_top_n = dict(training=rpn_pre_nms_top_n_train, testing=rpn_pre_nms_top_n_test) rpn_post_nms_top_n = dict(training=rpn_post_nms_top_n_train, testing=rpn_post_nms_top_n_test) anchor_sizes = ((32,), (64,), (128,), (256,), (512,)) aspect_ratios = ((0.5, 1.0, 2.0),) * len(anchor_sizes) rpn_anchor_generator = AnchorGenerator(anchor_sizes, aspect_ratios) rpn_head = RPNHead(512, rpn_anchor_generator.num_anchors_per_location()[0]) rpn = RegionProposalNetwork( rpn_anchor_generator, rpn_head, rpn_fg_iou_thresh, rpn_bg_iou_thresh, rpn_batch_size_per_image, rpn_positive_fraction, rpn_pre_nms_top_n, rpn_post_nms_top_n, rpn_nms_thresh) trainable_backbone_layers = None pretrained = True trainable_backbone_layers = _validate_resnet_trainable_layers(pretrained or pretrained_backbone, trainable_backbone_layers) if pretrained: pretrained_backbone = False backbone = resnet_fpn_backbone('resnet50', pretrained_backbone, trainable_layers=trainable_backbone_layers)
def __init__( self, backbone, num_classes=None, # transform parameters scale_factor=2.5, scale_factor_jitter=0.25, image_mean=None, image_std=None, # RPN parameters rpn_anchor_generator=None, rpn_head=None, rpn_pre_nms_top_n_train=2000, rpn_pre_nms_top_n_test=1000, rpn_post_nms_top_n_train=2000, rpn_post_nms_top_n_test=1000, rpn_nms_thresh=0.7, rpn_fg_iou_thresh=0.7, rpn_bg_iou_thresh=0.3, rpn_batch_size_per_image=256, rpn_positive_fraction=0.5, # Box parameters box_roi_pool=None, box_head=None, box_predictor=None, box_score_thresh=0.05, box_nms_thresh=0.5, box_detections_per_img=100, box_fg_iou_thresh=0.5, box_bg_iou_thresh=0.5, box_batch_size_per_image=512, box_positive_fraction=0.25, bbox_reg_weights=None, # added by Mohamed batched_nms=True, indep_classif_boxes=False, classification_bbox_size=None, n_fc_classif_layers=1, fc_classif_dropout=0.1, cconvhead=None, sattention_head=None, ignore_label: int = None, proposal_augmenter=None, n_testtime_augmentations=0): if not hasattr(backbone, "out_channels"): raise ValueError( "backbone should contain an attribute out_channels " "specifying the number of output channels (assumed to be the " "same for all the levels)") assert isinstance(rpn_anchor_generator, (AnchorGenerator, type(None))) assert isinstance(box_roi_pool, (MultiScaleRoIAlign, type(None))) if num_classes is not None: if box_predictor is not None: raise ValueError( "num_classes should be None when box_predictor is specified" ) else: if box_predictor is None: raise ValueError( "num_classes should not be None when box_predictor " "is not specified") out_channels = backbone.out_channels if rpn_anchor_generator is None: anchor_sizes = ((32, ), (64, ), (128, ), (256, ), (512, )) aspect_ratios = ((0.5, 1.0, 2.0), ) * len(anchor_sizes) rpn_anchor_generator = AnchorGenerator(anchor_sizes, aspect_ratios) if rpn_head is None: rpn_head = RPNHead( out_channels, rpn_anchor_generator.num_anchors_per_location()[0]) rpn_pre_nms_top_n = dict(training=rpn_pre_nms_top_n_train, testing=rpn_pre_nms_top_n_test) rpn_post_nms_top_n = dict(training=rpn_post_nms_top_n_train, testing=rpn_post_nms_top_n_test) rpn = RegionProposalNetwork(rpn_anchor_generator, rpn_head, rpn_fg_iou_thresh, rpn_bg_iou_thresh, rpn_batch_size_per_image, rpn_positive_fraction, rpn_pre_nms_top_n, rpn_post_nms_top_n, rpn_nms_thresh) if box_roi_pool is None: box_roi_pool = MultiScaleRoIAlign(featmap_names=[0, 1, 2, 3], output_size=7, sampling_ratio=2) if box_head is None: resolution = box_roi_pool.output_size[0] representation_size = 1024 box_head = TwoMLPHead(out_channels * resolution**2, representation_size) if box_predictor is None: representation_size = 1024 box_predictor = FastRCNNPredictor( in_channels=representation_size, num_classes=num_classes, n_fc_classif_layers=n_fc_classif_layers, dropout=fc_classif_dropout, batched_nms=batched_nms, ) roi_heads = RoIHeads( # Box box_roi_pool=box_roi_pool, box_head=box_head, box_predictor=box_predictor, fg_iou_thresh=box_fg_iou_thresh, bg_iou_thresh=box_bg_iou_thresh, batch_size_per_image=box_batch_size_per_image, positive_fraction=box_positive_fraction, bbox_reg_weights=bbox_reg_weights, score_thresh=box_score_thresh, nms_thresh=box_nms_thresh, detections_per_img=box_detections_per_img, # added by Mohamed batched_nms=batched_nms, indep_classif_boxes=indep_classif_boxes, classification_bbox_size=classification_bbox_size, cconvhead=cconvhead, sattention_head=sattention_head, ignore_label=ignore_label, ) if image_mean is None: image_mean = [0.485, 0.456, 0.406] if image_std is None: image_std = [0.229, 0.224, 0.225] # Mohamed: I changed GeneralizedRCNNTransform to take a scale factor # as opposed to a fixed size to allow free size images in inference transform = GeneralizedRCNNTransform( scale_factor=scale_factor, scale_factor_jitter=scale_factor_jitter, image_mean=image_mean, image_std=image_std) super(FasterRCNN, self).__init__( backbone=backbone, rpn=rpn, roi_heads=roi_heads, transform=transform, # Mohamed: added this proposal_augmenter=proposal_augmenter, n_testtime_augmentations=n_testtime_augmentations, )
def __init__( self, num_classes=2, # transform parameters backbone_name='resnet50', min_size=256, max_size=512, image_mean=None, image_std=None, # RPN parameters rpn_anchor_generator=None, rpn_head=None, rpn_pre_nms_top_n_train=2000, rpn_pre_nms_top_n_test=1000, rpn_post_nms_top_n_train=2000, rpn_post_nms_top_n_test=1000, rpn_nms_thresh=0.7, rpn_fg_iou_thresh=0.7, rpn_bg_iou_thresh=0.3, rpn_batch_size_per_image=256, rpn_positive_fraction=0.5, rpn_score_thresh=0.0, # Box parameters box_roi_pool=None, box_head=None, box_predictor=None, box_score_thresh=0.05, box_nms_thresh=0.5, box_detections_per_img=100, box_fg_iou_thresh=0.5, box_bg_iou_thresh=0.5, box_batch_size_per_image=512, box_positive_fraction=0.25, bbox_reg_weights=None, # Ellipse regressor ellipse_roi_pool=None, ellipse_head=None, ellipse_predictor=None, ellipse_loss_metric="gaussian-angle"): backbone = resnet_fpn_backbone(backbone_name, pretrained=True, trainable_layers=5) # Input image is grayscale -> in_channels = 1 instead of 3 (COCO) backbone.body.conv1 = Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False) if not hasattr(backbone, "out_channels"): raise ValueError( "backbone should contain an attribute out_channels " "specifying the number of output channels (assumed to be the " "same for all the levels)") assert isinstance(rpn_anchor_generator, (AnchorGenerator, type(None))) assert isinstance(box_roi_pool, (MultiScaleRoIAlign, type(None))) if num_classes is not None: if box_predictor is not None: raise ValueError( "num_classes should be None when box_predictor is specified" ) else: if box_predictor is None: raise ValueError( "num_classes should not be None when box_predictor " "is not specified") out_channels = backbone.out_channels if rpn_anchor_generator is None: anchor_sizes = ((32, ), (64, ), (128, ), (256, ), (512, )) aspect_ratios = ((0.5, 1.0, 2.0), ) * len(anchor_sizes) rpn_anchor_generator = AnchorGenerator(anchor_sizes, aspect_ratios) if rpn_head is None: rpn_head = RPNHead( out_channels, rpn_anchor_generator.num_anchors_per_location()[0]) rpn_pre_nms_top_n = dict(training=rpn_pre_nms_top_n_train, testing=rpn_pre_nms_top_n_test) rpn_post_nms_top_n = dict(training=rpn_post_nms_top_n_train, testing=rpn_post_nms_top_n_test) rpn = RegionProposalNetwork(rpn_anchor_generator, rpn_head, rpn_fg_iou_thresh, rpn_bg_iou_thresh, rpn_batch_size_per_image, rpn_positive_fraction, rpn_pre_nms_top_n, rpn_post_nms_top_n, rpn_nms_thresh, score_thresh=rpn_score_thresh) if box_roi_pool is None: box_roi_pool = MultiScaleRoIAlign( featmap_names=['0', '1', '2', '3'], output_size=7, sampling_ratio=2) if box_head is None: resolution = box_roi_pool.output_size[0] representation_size = 1024 box_head = TwoMLPHead(out_channels * resolution**2, representation_size) if box_predictor is None: representation_size = 1024 box_predictor = FastRCNNPredictor(representation_size, num_classes) if ellipse_roi_pool is None: ellipse_roi_pool = MultiScaleRoIAlign( featmap_names=['0', '1', '2', '3'], output_size=7, sampling_ratio=2) if ellipse_head is None: resolution = box_roi_pool.output_size[0] representation_size = 1024 ellipse_head = TwoMLPHead(out_channels * resolution**2, representation_size) if ellipse_predictor is None: representation_size = 1024 ellipse_predictor = EllipseRegressor(representation_size, num_classes) roi_heads = EllipseRoIHeads( # Box box_roi_pool, box_head, box_predictor, box_fg_iou_thresh, box_bg_iou_thresh, box_batch_size_per_image, box_positive_fraction, bbox_reg_weights, box_score_thresh, box_nms_thresh, box_detections_per_img, # Ellipse ellipse_roi_pool=ellipse_roi_pool, ellipse_head=ellipse_head, ellipse_predictor=ellipse_predictor, ellipse_loss_metric=ellipse_loss_metric) if image_mean is None: image_mean = [0.156] if image_std is None: image_std = [0.272] transform = GeneralizedRCNNTransform(min_size, max_size, image_mean, image_std) super().__init__(backbone, rpn, roi_heads, transform)
def __init__(self, cfg): super(SeqNet, self).__init__() backbone, box_head = build_resnet(name="resnet50", pretrained=True) anchor_generator = AnchorGenerator(sizes=((32, 64, 128, 256, 512), ), aspect_ratios=((0.5, 1.0, 2.0), )) head = RPNHead( in_channels=backbone.out_channels, num_anchors=anchor_generator.num_anchors_per_location()[0], ) pre_nms_top_n = dict(training=cfg.MODEL.RPN.PRE_NMS_TOPN_TRAIN, testing=cfg.MODEL.RPN.PRE_NMS_TOPN_TEST) post_nms_top_n = dict(training=cfg.MODEL.RPN.POST_NMS_TOPN_TRAIN, testing=cfg.MODEL.RPN.POST_NMS_TOPN_TEST) rpn = RegionProposalNetwork( anchor_generator=anchor_generator, head=head, fg_iou_thresh=cfg.MODEL.RPN.POS_THRESH_TRAIN, bg_iou_thresh=cfg.MODEL.RPN.NEG_THRESH_TRAIN, batch_size_per_image=cfg.MODEL.RPN.BATCH_SIZE_TRAIN, positive_fraction=cfg.MODEL.RPN.POS_FRAC_TRAIN, pre_nms_top_n=pre_nms_top_n, post_nms_top_n=post_nms_top_n, nms_thresh=cfg.MODEL.RPN.NMS_THRESH, ) faster_rcnn_predictor = FastRCNNPredictor(2048, 2) reid_head = deepcopy(box_head) box_roi_pool = MultiScaleRoIAlign(featmap_names=["feat_res4"], output_size=14, sampling_ratio=2) box_predictor = BBoxRegressor(2048, num_classes=2, bn_neck=cfg.MODEL.ROI_HEAD.BN_NECK) roi_heads = SeqRoIHeads( # OIM num_pids=cfg.MODEL.LOSS.LUT_SIZE, num_cq_size=cfg.MODEL.LOSS.CQ_SIZE, oim_momentum=cfg.MODEL.LOSS.OIM_MOMENTUM, oim_scalar=cfg.MODEL.LOSS.OIM_SCALAR, # SeqNet faster_rcnn_predictor=faster_rcnn_predictor, reid_head=reid_head, # parent class box_roi_pool=box_roi_pool, box_head=box_head, box_predictor=box_predictor, fg_iou_thresh=cfg.MODEL.ROI_HEAD.POS_THRESH_TRAIN, bg_iou_thresh=cfg.MODEL.ROI_HEAD.NEG_THRESH_TRAIN, batch_size_per_image=cfg.MODEL.ROI_HEAD.BATCH_SIZE_TRAIN, positive_fraction=cfg.MODEL.ROI_HEAD.POS_FRAC_TRAIN, bbox_reg_weights=None, score_thresh=cfg.MODEL.ROI_HEAD.SCORE_THRESH_TEST, nms_thresh=cfg.MODEL.ROI_HEAD.NMS_THRESH_TEST, detections_per_img=cfg.MODEL.ROI_HEAD.DETECTIONS_PER_IMAGE_TEST, ) transform = GeneralizedRCNNTransform( min_size=cfg.INPUT.MIN_SIZE, max_size=cfg.INPUT.MAX_SIZE, image_mean=[0.485, 0.456, 0.406], image_std=[0.229, 0.224, 0.225], ) self.backbone = backbone self.rpn = rpn self.roi_heads = roi_heads self.transform = transform # loss weights self.lw_rpn_reg = cfg.SOLVER.LW_RPN_REG self.lw_rpn_cls = cfg.SOLVER.LW_RPN_CLS self.lw_proposal_reg = cfg.SOLVER.LW_PROPOSAL_REG self.lw_proposal_cls = cfg.SOLVER.LW_PROPOSAL_CLS self.lw_box_reg = cfg.SOLVER.LW_BOX_REG self.lw_box_cls = cfg.SOLVER.LW_BOX_CLS self.lw_box_reid = cfg.SOLVER.LW_BOX_REID
collate_fn=collate_fn) device = torch.device( 'cuda') if torch.cuda.is_available() else torch.device('cpu') model = torchvision.models.detection.fasterrcnn_resnet50_fpn( pretrained=True) anchor_generator = AnchorGenerator( sizes=((32, ), (24, ), (24, ), (16, ), (8, )), aspect_ratios=([1.0, 1.0, 1.0, 1.0], [0.8, 1.0, 1.0, 1.0], [1.0, 0.8, 1.0, 1.0], [1.0, 1.0, 1.0, 1.0], [1.0, 1.0, 1.0, 1.0])) model.rpn.anchor_generator = anchor_generator model.rpn.head = RPNHead(256, anchor_generator.num_anchors_per_location()[0]) # get the number of input features for the classifier in_features = model.roi_heads.box_predictor.cls_score.in_features # replace the pre-trained head with a new one model.roi_heads.box_predictor = FastRCNNPredictor(in_features, num_classes) model.to(device) params = [p for p in model.parameters() if p.requires_grad] optimizer = torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005) # and a learning rate scheduler lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1)
def build_model(cls, args, task): """Build a new model instance.""" # make sure that all args are properly defaulted (in case there are any new ones) base_architecture(args) rpn_anchor_generator = task.rpn_anchor_generator rpn_head = task.rpn_head box_roi_pool = task.box_roi_pool box_predictor = task.box_predictor box_head = task.box_head # setup backbone backbone = resnet_fpn_backbone(args.backbone, args.backbone_pretrained) if not hasattr(backbone, "out_channels"): raise ValueError( "backbone should contain an attribute out_channels " "specifying the number of output channels (assumed to be the " "same for all the levels)" ) assert isinstance(rpn_anchor_generator, (AnchorGenerator, type(None))) assert isinstance(box_roi_pool, (MultiScaleRoIAlign, type(None))) if task.num_classes > 0: if box_predictor is not None: raise ValueError("num_classes should be -1 when box_predictor is specified") else: if box_predictor is None: raise ValueError("num_classes should be > 0 when box_predictor is not specified") out_channels = backbone.out_channels if rpn_anchor_generator is None: anchor_sizes = ((32,), (64,), (128,), (256,), (512,)) aspect_ratios = ((0.5, 1.0, 2.0),) * len(anchor_sizes) rpn_anchor_generator = AnchorGenerator(anchor_sizes, aspect_ratios) if rpn_head is None: rpn_head = RPNHead( out_channels, rpn_anchor_generator.num_anchors_per_location()[0], ) rpn_pre_nms_top_n = dict(training=args.rpn_pre_nms_top_n_train, testing=args.rpn_pre_nms_top_n_test) rpn_post_nms_top_n = dict(training=args.rpn_post_nms_top_n_train, testing=args.rpn_post_nms_top_n_test) rpn = RPN( rpn_anchor_generator, rpn_head, args.rpn_fg_iou_thresh, args.rpn_bg_iou_thresh, args.rpn_batch_size_per_image, args.rpn_positive_fraction, rpn_pre_nms_top_n, rpn_post_nms_top_n, args.rpn_nms_thresh, ) if box_roi_pool is None: box_roi_pool = MultiScaleRoIAlign( featmap_names=[0, 1, 2, 3], output_size=7, sampling_ratio=2, ) if box_head is None: resolution = box_roi_pool.output_size[0] representation_size = 1024 box_head = TwoMLPHead( out_channels * resolution ** 2, representation_size, ) if box_predictor is None: representation_size = 1024 box_predictor = FastRCNNPredictor( representation_size, task.num_classes, ) roi_heads = RegionOfInterestHeads( # Box box_roi_pool, box_head, box_predictor, args.box_fg_iou_thresh, args.box_bg_iou_thresh, args.box_batch_size_per_image, args.box_positive_fraction, args.bbox_reg_weights, args.box_score_thresh, args.box_nms_thresh, args.box_detections_per_img, ) if args.image_mean is None: args.image_mean = [0.485, 0.456, 0.406] if args.image_std is None: args.image_std = [0.229, 0.224, 0.225] transform = GeneralizedRCNNTransform( args.min_size, args.max_size, args.image_mean, args.image_std, ) return cls(backbone, rpn, roi_heads, transform)
def __init__( self, backbone, num_ID, num_classes=2, len_embeddings=128, # transform parameters min_size=720, max_size=960, image_mean=None, image_std=None, # RPN parameters rpn_anchor_generator=None, rpn_head=None, rpn_pre_nms_top_n_train=2000, rpn_pre_nms_top_n_test=1000, rpn_post_nms_top_n_train=2000, rpn_post_nms_top_n_test=1000, rpn_nms_thresh=0.7, rpn_fg_iou_thresh=0.5, rpn_bg_iou_thresh=0.4, rpn_batch_size_per_image=256, rpn_positive_fraction=0.5, # Box parameters box_roi_pool=None, box_head=None, box_predictor=None, box_score_thresh=0.05, box_nms_thresh=0.5, box_detections_per_img=100, box_fg_iou_thresh=0.5, box_bg_iou_thresh=0.5, box_batch_size_per_image=512, box_positive_fraction=0.25, bbox_reg_weights=None): if not hasattr(backbone, "out_channels"): raise ValueError( "backbone should contain an attribute out_channels " "specifying the number of output channels (assumed to be the " "same for all the levels)") assert isinstance(rpn_anchor_generator, (AnchorGenerator, type(None))) assert isinstance(box_roi_pool, (MultiScaleRoIAlign, type(None))) out_channels = backbone.out_channels if rpn_anchor_generator is None: anchor_sizes = ((16, 22), (32, 45), (64, 90), (128, 181), (256, 362)) aspect_ratios = ((1 / 3, ), ) * len(anchor_sizes) rpn_anchor_generator = AnchorGenerator(anchor_sizes, aspect_ratios) if rpn_head is None: rpn_head = RPNHead( out_channels, rpn_anchor_generator.num_anchors_per_location()[0]) rpn_pre_nms_top_n = dict(training=rpn_pre_nms_top_n_train, testing=rpn_pre_nms_top_n_test) rpn_post_nms_top_n = dict(training=rpn_post_nms_top_n_train, testing=rpn_post_nms_top_n_test) rpn = RegionProposalNetwork(rpn_anchor_generator, rpn_head, rpn_fg_iou_thresh, rpn_bg_iou_thresh, rpn_batch_size_per_image, rpn_positive_fraction, rpn_pre_nms_top_n, rpn_post_nms_top_n, rpn_nms_thresh) if box_roi_pool is None: box_roi_pool = MultiScaleRoIAlign(featmap_names=[0, 1, 2, 3], output_size=7, sampling_ratio=2) if box_head is None: resolution = box_roi_pool.output_size[0] representation_size = 1024 box_head = TwoMLPHead(out_channels * resolution**2, representation_size) emb_scale = math.sqrt(2) * math.log(num_ID - 1) if num_ID > 1 else 1 if box_predictor is None: representation_size = 1024 box_predictor = JDEPredictor(representation_size, num_classes, len_embeddings, emb_scale) roi_heads = JDE_RoIHeads( # Box box_roi_pool, box_head, box_predictor, box_fg_iou_thresh, box_bg_iou_thresh, box_batch_size_per_image, box_positive_fraction, bbox_reg_weights, box_score_thresh, box_nms_thresh, box_detections_per_img, len_embeddings, num_ID) if image_mean is None: image_mean = [0.485, 0.456, 0.406] if image_std is None: image_std = [0.229, 0.224, 0.225] transform = GeneralizedRCNNTransform(min_size, max_size, image_mean, image_std) super(Jde_RCNN, self).__init__(backbone, rpn, roi_heads, transform) self.eval_embed = False
def get_model(pre_trained, pretrained_backbone, numclasses): anchor_generator = AnchorGenerator(sizes=tuple([(16, 24, 32, 48, 96) for _ in range(5)]), aspect_ratios=tuple([ (0.5, 1.0, 2.0) for _ in range(5) ])) rpnhead = RPNHead(256, anchor_generator.num_anchors_per_location()[0]) if pre_trained: # dl_model = maskrcnn_resnet50_fpn(pretrained=pre_trained, max_size=MAX_SIZE, rpn_head=rpnhead # , rpn_anchor_generator=anchor_generator, rpn_pre_nms_top_n_train=12000 # , rpn_pre_nms_top_n_test=6000, rpn_post_nms_top_n_train=2000 # , rpn_post_nms_top_n_test=300, rpn_fg_iou_thresh=0.5, rpn_bg_iou_thresh=0.3 # , rpn_positive_fraction=0.7, bbox_reg_weights=(1.0, 1.0, 1.0, 1.0) # , box_batch_size_per_image=32) dl_model = maskrcnn_resnet50_fpn(pretrained=pre_trained, max_size=MAX_SIZE, rpn_pre_nms_top_n_train=12000, rpn_pre_nms_top_n_test=6000, rpn_post_nms_top_n_train=2000, rpn_post_nms_top_n_test=300, rpn_fg_iou_thresh=0.5, rpn_bg_iou_thresh=0.3, rpn_positive_fraction=0.7, bbox_reg_weights=(1.0, 1.0, 1.0, 1.0), box_batch_size_per_image=32) # dl_model = maskrcnn_resnet50_fpn(pretrained=pre_trained, max_size=MAX_SIZE) # del dl_model.state_dict()["roi_heads.box_predictor.bbox_pred.weight"] # del dl_model.state_dict()["roi_heads.box_predictor.cls_score.weight"] # del dl_model.state_dict()["roi_heads.box_predictor.cls_score.bias"] # del dl_model.state_dict()["roi_heads.box_predictor.bbox_pred.bias"] # Remove incompatible parameters # newdict = removekey(dl_model.state_dict(), ['roi_heads.box_predictor.cls_score.bias' # , 'roi_heads.box_predictor.cls_score.weight' # , 'roi_heads.box_predictor.bbox_pred.bias' # , 'roi_heads.box_predictor.bbox_pred.weight']) # dl_model.state_dict = newdict # dl_model.load_state_dict(newdict) for param in dl_model.parameters(): param.requires_grad = False # replace the classifier with a new one, that has # num_classes which is user-defined num_classes = numclasses # 1 class (lesion) + background # get number of input features for the classifier in_features = dl_model.roi_heads.box_predictor.cls_score.in_features # replace the pre-trained head with a new one dl_model.roi_heads.box_predictor = FastRCNNPredictor( in_features, num_classes) # now get the number of input features for the mask classifier in_features_mask = dl_model.roi_heads.mask_predictor.conv5_mask.in_channels hidden_layer = 256 # and replace the mask predictor with a new one dl_model.roi_heads.mask_predictor = MaskRCNNPredictor( in_features_mask, hidden_layer, num_classes) else: dl_model = maskrcnn_resnet50_fpn( num_classes=numclasses, pretrained_backbone=pretrained_backbone, max_size=MAX_SIZE, rpn_head=rpnhead, rpn_anchor_generator=anchor_generator, rpn_pre_nms_top_n_train=12000, rpn_pre_nms_top_n_test=6000, rpn_post_nms_top_n_train=2000, rpn_post_nms_top_n_test=300, rpn_fg_iou_thresh=0.5, rpn_bg_iou_thresh=0.3, rpn_positive_fraction=0.7, bbox_reg_weights=(1.0, 1.0, 1.0, 1.0), box_batch_size_per_image=32) return dl_model
def __init__( self, backbone, num_classes=2, num_pids=5532, num_cq_size=5000, # transform parameters min_size=900, max_size=1500, image_mean=None, image_std=None, # Anchor settings: anchor_scales=None, anchor_ratios=None, # RPN parameters rpn_anchor_generator=None, rpn_head=None, rpn_pre_nms_top_n_train=12000, rpn_pre_nms_top_n_test=6000, rpn_post_nms_top_n_train=2000, rpn_post_nms_top_n_test=300, rpn_nms_thresh=0.7, rpn_fg_iou_thresh=0.7, rpn_bg_iou_thresh=0.3, rpn_batch_size_per_image=256, rpn_positive_fraction=0.5, # Box parameters rcnn_bbox_bn=True, box_roi_pool=None, box_head=None, box_predictor=None, box_score_thresh=0.05, box_nms_thresh=0.4, box_detections_per_img=300, box_fg_iou_thresh=0.5, box_bg_iou_thresh=0.1, box_batch_size_per_image=128, box_positive_fraction=0.5, bbox_reg_weights=None, # ReID parameters feat_head=None, reid_head=None, reid_loss=None): if rpn_anchor_generator is None: anchor_sizes = ((32, 64, 128, 256, 512), ) aspect_ratios = ((0.5, 1.0, 2.0), ) rpn_anchor_generator = AnchorGenerator(anchor_sizes, aspect_ratios) rpn_head = RPNHead(backbone.out_channels, rpn_anchor_generator.num_anchors_per_location()[0]) rpn_pre_nms_top_n = dict(training=rpn_pre_nms_top_n_train, testing=rpn_pre_nms_top_n_test) rpn_post_nms_top_n = dict(training=rpn_post_nms_top_n_train, testing=rpn_post_nms_top_n_test) rpn = RegionProposalNetwork(rpn_anchor_generator, rpn_head, rpn_fg_iou_thresh, rpn_bg_iou_thresh, rpn_batch_size_per_image, rpn_positive_fraction, rpn_pre_nms_top_n, rpn_post_nms_top_n, rpn_nms_thresh) if box_roi_pool is None: box_roi_pool = MultiScaleRoIAlign(featmap_names=['feat2rpn'], output_size=[14, 14], sampling_ratio=2) if box_head is None: resolution = box_roi_pool.output_size[0] representation_size = 2048 box_head = GAP_BOX_HEAD(resolution, feat_head, representation_size) if box_predictor is None: representation_size = 2048 box_predictor = FastRCNNPredictor(representation_size, num_classes, RCNN_bbox_bn=False) if reid_head is None: reid_head = REID_HEAD(box_head.out_dims, 256) if reid_loss is None: reid_loss = OIMLoss(256, num_pids, num_cq_size, 0.5, 30) roi_heads = OIM_ROI_HEAD( reid_head, reid_loss, # box box_roi_pool, box_head, box_predictor, box_fg_iou_thresh, box_bg_iou_thresh, box_batch_size_per_image, box_positive_fraction, bbox_reg_weights, box_score_thresh, box_nms_thresh, box_detections_per_img) if image_mean is None: image_mean = [0.485, 0.456, 0.406] if image_std is None: image_std = [0.229, 0.224, 0.225] transform = GeneralizedRCNNTransform(min_size, max_size, image_mean, image_std) super(FasterRCNN_OIM, self).__init__(backbone, rpn, roi_heads, transform)
def __init__(self): super(FasterRCNN, self).__init__() # Define FPN self.fpn = resnet_fpn_backbone(backbone_name='resnet101', pretrained=True) anchor_sizes = ((32, ), (64, ), (128, ), (256, ), (512, )) aspect_ratios = ((0.5, 1.0, 2.0), ) * len(anchor_sizes) # Generate anchor boxes anchor_generator = AnchorGenerator(anchor_sizes, aspect_ratios) # Define RPN Head # rpn_head = RPNHead(256, 9) rpn_head = RPNHead(256, anchor_generator.num_anchors_per_location()[0]) # RPN parameters, rpn_pre_nms_top_n_train = 2000 rpn_pre_nms_top_n_test = 1000 rpn_post_nms_top_n_train = 2000 rpn_post_nms_top_n_test = 1000 rpn_nms_thresh = 0.7 rpn_fg_iou_thresh = 0.7 rpn_bg_iou_thresh = 0.3 # rpn_nms_thresh = 0.45 # rpn_fg_iou_thresh = 0.5 # rpn_bg_iou_thresh = 0.5 rpn_batch_size_per_image = 256 rpn_positive_fraction = 0.5 # transform parameters min_size = 800 max_size = 1333 image_mean = [0.485, 0.456, 0.406] image_std = [0.229, 0.224, 0.225] self.transform = GeneralizedRCNNTransform(min_size, max_size, image_mean, image_std) rpn_pre_nms_top_n = dict(training=rpn_pre_nms_top_n_train, testing=rpn_pre_nms_top_n_test) rpn_post_nms_top_n = dict(training=rpn_post_nms_top_n_train, testing=rpn_post_nms_top_n_test) # Create RPN self.rpn = RegionProposalNetwork(anchor_generator, rpn_head, rpn_fg_iou_thresh, rpn_bg_iou_thresh, rpn_batch_size_per_image, rpn_positive_fraction, rpn_pre_nms_top_n, rpn_post_nms_top_n, rpn_nms_thresh) # Box parameters box_roi_pool = None box_head = None box_predictor = None box_score_thresh = 0.05 box_nms_thresh = 0.5 box_detections_per_img = 100 box_fg_iou_thresh = 0.5 box_bg_iou_thresh = 0.5 box_batch_size_per_image = 512 box_positive_fraction = 0.25 bbox_reg_weights = None num_classes = 101 if box_roi_pool is None: box_roi_pool = MultiScaleRoIAlign( featmap_names=['0', '1', '2', '3'], output_size=7, sampling_ratio=2) if box_head is None: resolution = box_roi_pool.output_size[0] representation_size = 1024 box_head = TwoMLPHead(256 * resolution**2, representation_size) if box_predictor is None: representation_size = 1024 box_predictor = FastRCNNPredictor(representation_size, num_classes) self.roi_heads = RoIHeads( # Box box_roi_pool, box_head, box_predictor, box_fg_iou_thresh, box_bg_iou_thresh, box_batch_size_per_image, box_positive_fraction, bbox_reg_weights, box_score_thresh, box_nms_thresh, box_detections_per_img)
def define_model(num_classes, net, anchors, up_thres=0.5, low_thres=0.2, box_score=0.3, data='binary'): if net == 'mobilenet': backbone = torchvision.models.mobilenet_v2(pretrained=True).features # FasterRCNN needs to know the number of # output channels in a backbone. For mobilenet_v2, it's 1280 # so we need to add it here backbone.out_channels = 1280 # let's make the RPN generate 5 x 3 anchors per spatial # location, with 5 different sizes and 3 different aspect # ratios. We have a Tuple[Tuple[int]] because each feature # map could potentially have different sizes and # aspect ratios> if data == 'tick_bite': anchor_generator = AnchorGenerator(sizes=((8, 16, 32, 64, 128), ), aspect_ratios=((0.5, 1.0, 2.0), )) else: anchor_generator = AnchorGenerator(sizes=((16, 32, 64, 128, 256), ), aspect_ratios=((0.5, 1.0, 2.0), )) # let's define what are the feature maps that we will # use to perform the region of interest cropping, as well as # the size of the crop after rescaling. # if your backbone returns a Tensor, featmap_names is expected to # be [0]. More generally, the backbone should return an # OrderedDict[Tensor], and in featmap_names you can choose which # feature maps to use. roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names='0', output_size=7, sampling_ratio=2) # put the pieces together inside a FasterRCNN model model = FasterRCNN(backbone, num_classes=num_classes, rpn_anchor_generator=anchor_generator, rpn_fg_iou_thresh=up_thres, rpn_bg_iou_thresh=low_thres, box_roi_pool=roi_pooler, box_score_thresh=box_score) elif net == 'resnet50': resnet50 = init_model(num_classes=num_classes) anchor_sizes = anchors aspect_ratios = ((0.5, 1.0, 2.0), ) * len(anchor_sizes) rpn_anchor_generator = AnchorGenerator(anchor_sizes, aspect_ratios) rpn_head = RPNHead(resnet50.backbone.out_channels, rpn_anchor_generator.num_anchors_per_location()[0]) roi_pooler = torchvision.ops.MultiScaleRoIAlign(featmap_names='0', output_size=7, sampling_ratio=2) model = FasterRCNN(resnet50.backbone, num_classes=num_classes, rpn_anchor_generator=rpn_anchor_generator, rpn_head=rpn_head, rpn_fg_iou_thresh=up_thres, rpn_bg_iou_thresh=low_thres, box_roi_pool=roi_pooler, box_score_thresh=box_score) return model
def main(): anchor_generator = AnchorGenerator(sizes=tuple([(16, 24, 32, 48, 96) for _ in range(5)]), aspect_ratios=tuple([ (0.5, 1.0, 2.0) for _ in range(5) ])) rpnhead = RPNHead(256, anchor_generator.num_anchors_per_location()[0]) model = maskrcnn_resnet50_fpn(num_classes=2, pretrained_backbone=True, max_size=MAX_SIZE, rpn_head=rpnhead, rpn_anchor_generator=anchor_generator, rpn_pre_nms_top_n_train=12000, rpn_pre_nms_top_n_test=6000, rpn_post_nms_top_n_train=2000, rpn_post_nms_top_n_test=300, rpn_fg_iou_thresh=0.5, rpn_bg_iou_thresh=0.3, rpn_positive_fraction=0.7, bbox_reg_weights=(1.0, 1.0, 1.0, 1.0), box_batch_size_per_image=32) model.load_state_dict( torch.load('saved_models' + os.sep + '0_deeplesion.pth', map_location='cpu')) data_transforms = { 'train': T.Compose([ T.ToOriginalHU(INTENSITY_OFFSET), T.IntensityWindowing(WINDOWING), T.SpacingResize(NORM_SPACING, MAX_SIZE), T.ToTensor() ]), 'val': T.Compose([ T.ToOriginalHU(INTENSITY_OFFSET), T.IntensityWindowing(WINDOWING), T.SpacingResize(NORM_SPACING, MAX_SIZE), T.ToTensor() ]), 'test': T.Compose([ T.ToOriginalHU(INTENSITY_OFFSET), T.IntensityWindowing(WINDOWING), T.SpacingResize(NORM_SPACING, MAX_SIZE), T.ToTensor() ]) } image_datasets = { x: DeepLesion(DIR_IN + os.sep + x, GT_FN_DICT[x], data_transforms[x]) for x in ['train', 'val', 'test'] } dataloaders = { x: DataLoader(image_datasets[x], batch_size=3, shuffle=True, num_workers=0, collate_fn=BatchCollator) for x in ['train', 'val', 'test'] } for batch_id, (inputs, targets) in enumerate(dataloaders['test']): outputs = test_model(model, inputs) outputs = remove_overlapping(outputs, 0.655) for image, target, output in zip(inputs, targets, outputs): img_copy = image.squeeze().numpy() images = [img_copy] * 3 images = [im.astype(float) for im in images] img_copy = cv2.merge(images) for bbox, pseudo_mask in zip(target["boxes"], target["masks"]): bbox = bbox.squeeze().numpy() bbox = np.int16(bbox) mask = pseudo_mask.squeeze().numpy() cv2.rectangle(img_copy, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (0, 255, 0), 1) msk_idx = np.where(mask == 1) img_copy[msk_idx[0], msk_idx[1], 0] = 255 for predbox, predmask, score in zip(output['boxes'], output['masks'], output['scores']): if score < 0.655: break predbox = predbox.numpy() predmask = predmask.squeeze().numpy() score = score.numpy() predmask = np.where(predmask > 0.5, 1, 0) cv2.rectangle(img_copy, (predbox[0], predbox[1]), (predbox[2], predbox[3]), (0, 0, 255), 1) pmsk_idx = np.where(predmask == 1) img_copy[pmsk_idx[0], pmsk_idx[1], 2] = 255 cv2.putText(img_copy, str(score), (int(predbox[0]), int(predbox[1] - 5)), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1, cv2.LINE_AA) # cv2.imshow(str(target['image_id']), img_copy) cv2.imwrite( 'simple_test' + os.sep + str(target['image_id']).replace(os.sep, '_') + '_pred.jpg', img_copy * 255)
def __init__( self, backbone, num_classes=None, num_pids=5532, num_cq_size=5000, # transform parameters min_size=900, max_size=1500, image_mean=None, image_std=None, # Anchor settings: anchor_scales=None, anchor_ratios=None, # RPN parameters rpn_anchor_generator=None, rpn_head=None, rpn_pre_nms_top_n_train=12000, rpn_pre_nms_top_n_test=6000, rpn_post_nms_top_n_train=2000, rpn_post_nms_top_n_test=300, rpn_nms_thresh=0.7, rpn_fg_iou_thresh=0.7, rpn_bg_iou_thresh=0.3, rpn_batch_size_per_image=256, rpn_positive_fraction=0.5, # Box parameters box_roi_pool=None, feat_head=None, box_predictor=None, box_score_thresh=0.0, box_nms_thresh=0.4, box_detections_per_img=300, box_fg_iou_thresh=0.5, box_bg_iou_thresh=0.1, box_batch_size_per_image=128, box_positive_fraction=0.5, bbox_reg_weights=None, # ReID parameters embedding_head=None, reid_loss=None): if not hasattr(backbone, "out_channels"): raise ValueError( 'backbone should contain an attribute out_channels ' 'specifying the number of output channels (assumed to be the ' 'same for all the levels)') assert isinstance(rpn_anchor_generator, (AnchorGenerator, type(None))) assert isinstance(box_roi_pool, (MultiScaleRoIAlign, type(None))) if num_classes is not None: if box_predictor is not None: raise ValueError( 'num_classes should be None when box_predictor is specified' ) else: if box_predictor is None: raise ValueError( 'num_classes should not be None when box_predictor' 'is not specified') out_channels = backbone.out_channels if rpn_anchor_generator is None: if anchor_scales is None: anchor_scales = ((32, 64, 128, 256, 512), ) if anchor_ratios is None: anchor_ratios = ((0.5, 1.0, 2.0), ) rpn_anchor_generator = AnchorGenerator(anchor_scales, anchor_ratios) if rpn_head is None: rpn_head = RPNHead( out_channels, rpn_anchor_generator.num_anchors_per_location()[0]) rpn_pre_nms_top_n = dict(training=rpn_pre_nms_top_n_train, testing=rpn_pre_nms_top_n_test) rpn_post_nms_top_n = dict(training=rpn_post_nms_top_n_train, testing=rpn_post_nms_top_n_test) rpn = self._set_rpn(rpn_anchor_generator, rpn_head, rpn_fg_iou_thresh, rpn_bg_iou_thresh, rpn_batch_size_per_image, rpn_positive_fraction, rpn_pre_nms_top_n, rpn_post_nms_top_n, rpn_nms_thresh) if box_roi_pool is None: box_roi_pool = MultiScaleRoIAlign(featmap_names=['feat_res4'], output_size=14, sampling_ratio=2) if feat_head is None: raise ValueError('feat_head should be specified manually.') # resolution = box_roi_pool.output_size[0] # representation_size = 2048 # # ConvHead should be part of the backbone # # feat_head = TwoMLPHead( # # out_channels * resolution ** 2, # # representation_size) if box_predictor is None: box_predictor = CoordRegressor(2048, num_classes) if embedding_head is None: embedding_head = ReIDEmbeddingProj( featmap_names=['feat_res4', 'feat_res5'], in_channels=[1024, 2048], dim=256) if reid_loss is None: reid_loss = HOIMLoss(256, num_pids, num_cq_size, 0.5, 30.0) roi_heads = self._set_roi_heads( embedding_head, reid_loss, box_roi_pool, feat_head, box_predictor, box_fg_iou_thresh, box_bg_iou_thresh, box_batch_size_per_image, box_positive_fraction, bbox_reg_weights, box_score_thresh, box_nms_thresh, box_detections_per_img) if image_mean is None: image_mean = [0.485, 0.456, 0.406] if image_std is None: image_std = [0.229, 0.224, 0.225] transform = GeneralizedRCNNTransform(min_size, max_size, image_mean, image_std) super(FasterRCNN_HOIM, self).__init__(backbone, rpn, roi_heads, transform)
def __init__( self, arch, pretrained, num_classes, input_mode, acf_head='endpoints', # transform parameters min_size=800, max_size=1333, image_mean=None, image_std=None, # RPN parameters rpn_anchor_generator=None, rpn_head=None, rpn_pre_nms_top_n_train=2000, rpn_pre_nms_top_n_test=1000, rpn_post_nms_top_n_train=2000, rpn_post_nms_top_n_test=1000, rpn_nms_thresh=0.5, rpn_fg_iou_thresh=0.7, rpn_bg_iou_thresh=0.3, rpn_batch_size_per_image=256, rpn_positive_fraction=0.5, # Box parameters box_roi_pool=None, box_head=None, box_predictor=None, box_score_thresh=0.05, box_nms_thresh=0.5, box_detections_per_img=100, box_fg_iou_thresh=0.5, box_bg_iou_thresh=0.5, box_batch_size_per_image=512, box_positive_fraction=0.25, bbox_reg_weights=None): super(ACFNetwork, self).__init__() self.input_mode = input_mode self.backbone = resnet_fpn_backbone(arch, pretrained) # change first layer to 4 channel for early fusion with 1 channel depth, load pretrained weights on RGB channels conv1_weight_old = nn.Parameter(self.backbone.body.conv1.weight.data ) # self.backbone.body.conv1.weight conv1_weight = torch.zeros((64, 4, 7, 7)) conv1_weight[:, 0:3, :, :] = conv1_weight_old avg_weight = conv1_weight_old.mean(dim=1, keepdim=False) conv1_weight[:, 3, :, :] = avg_weight self.backbone.body.conv1.weight = torch.nn.Parameter(conv1_weight) # self.backbone.body.conv1.weight.detach() # self.backbone.body.conv1 = nn.Conv2d(4, 64, kernel_size=7, stride=2, padding=3, bias=False) out_channels = self.backbone.out_channels if rpn_anchor_generator is None: anchor_sizes = ((16, ), (32, ), (64, ), (128, ), (256, ), (512, )) aspect_ratios = ((0.5, 1.0, 2.0), ) * len(anchor_sizes) rpn_anchor_generator = AnchorGenerator(anchor_sizes, aspect_ratios) if rpn_head is None: rpn_head = RPNHead( out_channels, rpn_anchor_generator.num_anchors_per_location()[0]) rpn_pre_nms_top_n = dict(training=rpn_pre_nms_top_n_train, testing=rpn_pre_nms_top_n_test) rpn_post_nms_top_n = dict(training=rpn_post_nms_top_n_train, testing=rpn_post_nms_top_n_test) self.rpn = RegionProposalNetwork(rpn_anchor_generator, rpn_head, rpn_fg_iou_thresh, rpn_bg_iou_thresh, rpn_batch_size_per_image, rpn_positive_fraction, rpn_pre_nms_top_n, rpn_post_nms_top_n, rpn_nms_thresh) self.roi_heads = RoIHeadsExtend(out_channels, num_classes, self.input_mode, acf_head) # freeze RGB backbone and RPN when training on poses if self.input_mode == config.INPUT_RGBD: for param in self.rpn.parameters(): param.requires_grad = False for param in self.backbone.parameters(): param.requires_grad = False
def __init__( self, backbone, num_classes=None, # transform parameters min_size=800, max_size=1333, image_mean=None, image_std=None, # RPN parameters rpn_anchor_generator=None, rpn_head=None, rpn_pre_nms_top_n_train=2000, rpn_pre_nms_top_n_test=1000, rpn_post_nms_top_n_train=2000, rpn_post_nms_top_n_test=1000, rpn_nms_thresh=0.7, rpn_fg_iou_thresh=0.7, rpn_bg_iou_thresh=0.3, rpn_batch_size_per_image=256, rpn_positive_fraction=0.5, # Box parameters box_roi_pool=None, box_head=None, box_predictor=None, box_score_thresh=0.05, box_nms_thresh=0.5, box_detections_per_img=100, box_fg_iou_thresh=0.5, box_bg_iou_thresh=0.5, box_batch_size_per_image=512, box_positive_fraction=0.25, bbox_reg_weights=None): if not hasattr(backbone, "out_channels"): raise ValueError( "backbone should contain an attribute out_channels " "specifying the number of output channels (assumed to be the " "same for all the levels)") assert isinstance(rpn_anchor_generator, (AnchorGenerator, type(None))) assert isinstance(box_roi_pool, (MultiScaleRoIAlign, type(None))) if num_classes is not None: if box_predictor is not None: raise ValueError( "num_classes should be None when box_predictor is specified" ) else: if box_predictor is None: raise ValueError( "num_classes should not be None when box_predictor " "is not specified") out_channels = backbone.out_channels if rpn_anchor_generator is None: anchor_sizes = ((32, ), (64, ), (128, ), (256, ), (512, )) aspect_ratios = ((0.5, 1.0, 2.0), ) * len(anchor_sizes) rpn_anchor_generator = AnchorGenerator(anchor_sizes, aspect_ratios) if rpn_head is None: rpn_head = RPNHead( out_channels, rpn_anchor_generator.num_anchors_per_location()[0]) rpn_pre_nms_top_n = dict(training=rpn_pre_nms_top_n_train, testing=rpn_pre_nms_top_n_test) rpn_post_nms_top_n = dict(training=rpn_post_nms_top_n_train, testing=rpn_post_nms_top_n_test) rpn = RegionProposalNetwork(rpn_anchor_generator, rpn_head, rpn_fg_iou_thresh, rpn_bg_iou_thresh, rpn_batch_size_per_image, rpn_positive_fraction, rpn_pre_nms_top_n, rpn_post_nms_top_n, rpn_nms_thresh) if box_roi_pool is None: box_roi_pool = MultiScaleRoIAlign( featmap_names=['0', '1', '2', '3'], output_size=7, sampling_ratio=2) if box_head is None: resolution = box_roi_pool.output_size[0] representation_size = 1024 box_head = TwoMLPHead(out_channels * resolution**2, representation_size) if box_predictor is None: representation_size = 1024 box_predictor = FastRCNNPredictor(representation_size, num_classes) roi_heads = RoIHeads( # Box box_roi_pool, box_head, box_predictor, box_fg_iou_thresh, box_bg_iou_thresh, box_batch_size_per_image, box_positive_fraction, bbox_reg_weights, box_score_thresh, box_nms_thresh, box_detections_per_img) if image_mean is None: image_mean = [0.485, 0.456, 0.406] if image_std is None: image_std = [0.229, 0.224, 0.225] transform = GeneralizedRCNNTransform(min_size, max_size, image_mean, image_std) super(FasterRCNN, self).__init__(backbone, rpn, roi_heads, transform)
def __init__( self, backbone, num_ID, num_classes=2, version='v1', # transform parameters min_size=800, max_size=1333, image_mean=None, image_std=None, # RPN parameters rpn_anchor_generator=None, rpn_head=None, rpn_pre_nms_top_n_train=2000, rpn_pre_nms_top_n_test=1000, rpn_post_nms_top_n_train=2000, rpn_post_nms_top_n_test=1000, rpn_nms_thresh=0.7, rpn_fg_iou_thresh=0.5, rpn_bg_iou_thresh=0.4, #FIXME 这两个参数是参照论文Towards Real-Time Multi-Object Tracking rpn_batch_size_per_image=256, rpn_positive_fraction=0.5, # Box parameters box_roi_pool=None, box_head=None, box_predictor=None, box_score_thresh=0.05, box_nms_thresh=0.5, box_detections_per_img=100, box_fg_iou_thresh=0.5, box_bg_iou_thresh=0.5, box_batch_size_per_image=256, box_positive_fraction=0.25, bbox_reg_weights=None, # Embedding parameters ##FIXME 添加的参数 len_embeddings=128, embed_head=None, embed_extractor=None): if not hasattr(backbone, "out_channels"): raise ValueError( "backbone should contain an attribute out_channels " "specifying the number of output channels (assumed to be the " "same for all the levels)") assert isinstance(rpn_anchor_generator, (AnchorGenerator, type(None))) assert isinstance(box_roi_pool, (MultiScaleRoIAlign, type(None))) out_channels = backbone.out_channels ##FIXME 改了anchor size,并且只使用宽高比1/3的anchor,参考了Towards Real-Time Multi-Object Tracking if rpn_anchor_generator is None: anchor_sizes = ((16, 22), (32, 45), (64, 90), (128, 181), (256, 362)) aspect_ratios = ((1 / 3, ), ) * len(anchor_sizes) rpn_anchor_generator = AnchorGenerator(anchor_sizes, aspect_ratios) if rpn_head is None: rpn_head = RPNHead( out_channels, rpn_anchor_generator.num_anchors_per_location()[0]) rpn_pre_nms_top_n = dict(training=rpn_pre_nms_top_n_train, testing=rpn_pre_nms_top_n_test) rpn_post_nms_top_n = dict(training=rpn_post_nms_top_n_train, testing=rpn_post_nms_top_n_test) rpn = RegionProposalNetwork(rpn_anchor_generator, rpn_head, rpn_fg_iou_thresh, rpn_bg_iou_thresh, rpn_batch_size_per_image, rpn_positive_fraction, rpn_pre_nms_top_n, rpn_post_nms_top_n, rpn_nms_thresh) if box_roi_pool is None: box_roi_pool = MultiScaleRoIAlign(featmap_names=[0, 1, 2, 3], output_size=11, sampling_ratio=2) if box_head is None: resolution = box_roi_pool.output_size[0] representation_size = 1024 box_head = TwoMLPHead(out_channels * resolution**2, representation_size) emb_scale = math.sqrt(2) * math.log(num_ID - 1) if num_ID > 1 else 1 ## FIXME 现在用的是v1 if embed_head is None: if version == 'v1': resolution = box_roi_pool.output_size[0] representation_size = 1024 embed_head = featureHead(out_channels * resolution**2, representation_size) if version == 'v2': embed_head = None if box_predictor is None: representation_size = 1024 box_predictor = FastRCNNPredictor(representation_size, num_classes) if embed_extractor is None: representation_size = 1024 embed_extractor = featureExtractor(representation_size, len_embeddings, emb_scale) roi_heads = JDE_RoIHeads( # Box box_roi_pool, box_head, box_predictor, box_fg_iou_thresh, box_bg_iou_thresh, box_batch_size_per_image, box_positive_fraction, bbox_reg_weights, box_score_thresh, box_nms_thresh, box_detections_per_img, len_embeddings, num_ID, embed_head, embed_extractor) roi_heads.version = version #FIXME 这一部分是照搬faster RCNN代码里面的################### if image_mean is None: image_mean = [0.485, 0.456, 0.406] if image_std is None: image_std = [0.229, 0.224, 0.225] transform = GeneralizedRCNNTransform(min_size, max_size, image_mean, image_std) ########################################################### super(Jde_RCNN, self).__init__(backbone, rpn, roi_heads, transform) ## FIXME 跟踪时用的参数,与训练无关 self.version = version self.original_image_sizes = None self.preprocessed_images = None self.features = None self.box_features = None