def faster_rcnn_fpn_resnet50_v1b_coco(dataset, pretrained=False, pretrained_base=True, **kwargs): r"""Faster RCNN model with FPN from the paper "Ren, S., He, K., Girshick, R., & Sun, J. (2015). Faster r-cnn: Towards real-time object detection with region proposal networks" "Lin, T., Dollar, P., Girshick, R., He, K., Hariharan, B., Belongie, S. (2016). Feature Pyramid Networks for Object Detection" Parameters ---------- dataset : VisionDataset pretrained : bool or str Boolean value controls whether to load the default pretrained weights for model. String value represents the hashtag for a certain version of pretrained weights. pretrained_base : bool or str, optional, default is True Load pretrained base network, the extra layers are randomized. Note that if pretrained is `Ture`, this has no effect. ctx : Context, default CPU The context in which to load the pretrained weights. root : str, default '~/.mxnet/models' Location for keeping the model parameters. Examples -------- >>> model = faster_rcnn_fpn_resnet50_v1b_coco(dataset, pretrained=True) >>> print(model) """ from gluoncv.model_zoo.resnetv1b import resnet50_v1b classes = dataset.classes pretrained_base = False if pretrained else pretrained_base base_network = resnet50_v1b(pretrained=pretrained_base, dilated=False, use_global_stats=True, **kwargs) features = FPNFeatureExpander( network=base_network, outputs=['layers1_relu8_fwd', 'layers2_relu11_fwd', 'layers3_relu17_fwd', 'layers4_relu8_fwd'], num_filters=[256, 256, 256, 256], use_1x1=True, use_upsample=True, use_elewadd=True, use_p6=True, no_bias=False, pretrained=pretrained_base) top_features = None # 2 FC layer before RCNN cls and reg box_features = nn.HybridSequential() for _ in range(2): box_features.add(nn.Dense(1024, weight_initializer=mx.init.Normal(0.01))) box_features.add(nn.Activation('relu')) train_patterns = '|'.join( ['.*dense', '.*rpn', '.*down(2|3|4)_conv', '.*layers(2|3|4)_conv', 'P']) return get_faster_rcnn( name='fpn_resnet50_v1b', dataset='coco', pretrained=pretrained, features=features, top_features=top_features, box_features=box_features, classes=classes, short=800, max_size=1333, min_stage=2, max_stage=6, train_patterns=train_patterns, nms_thresh=0.5, nms_topk=-1, post_nms=-1, roi_mode='align', roi_size=(7,7), strides=(4, 8, 16, 32, 64), clip=4.14, rpn_channel=1024, base_size=16, scales=(2, 4, 8, 16, 32), ratios=(0.5, 1, 2), alloc_size=(384, 384), rpn_nms_thresh=0.7, rpn_train_pre_nms=12000, rpn_train_post_nms=2000, rpn_test_pre_nms=6000, rpn_test_post_nms=1000, rpn_min_size=1, num_sample=512, pos_iou_thresh=0.5, pos_ratio=0.25, max_num_gt=100, **kwargs)
def __init__(self, num_kernels, scale=1, ctx=mx.cpu(), pretrained=False, num_device=0, **kwargs): super(PSENet, self).__init__() self.num_kernels = num_kernels base_network = resnet50_v1b(pretrained=pretrained, dilated=False, use_global_stats=False, norm_layer=nn.BatchNorm, ctx=ctx, **kwargs) self.features = FPNFeatureExpander(network=base_network, outputs=[ 'layers1_relu8_fwd', 'layers2_relu11_fwd', 'layers3_relu17_fwd', 'layers4_relu8_fwd' ], num_filters=[256, 256, 256, 256], use_1x1=True, use_upsample=True, use_elewadd=True, use_p6=False, no_bias=True, pretrained=pretrained, ctx=ctx) self.scale = scale self.extrac_convs = [] for i in range(4): weight_init = mx.init.Normal(0.001) extra_conv = nn.HybridSequential(prefix='extra_conv_{}'.format(i)) with extra_conv.name_scope(): extra_conv.add(nn.Conv2D(256, 3, 1, 1)) # extra_conv.add(nn.BatchNorm()) extra_conv.add(nn.Activation('relu')) extra_conv.initialize(weight_init, ctx=ctx) self.register_child(extra_conv) self.extrac_convs.append(extra_conv) self.decoder_out = nn.HybridSequential(prefix='decoder_out') with self.decoder_out.name_scope(): weight_init = mx.init.Normal(0.001) self.decoder_out.add(nn.Conv2D(256, 3, 1, 1)) # self.decoder_out.add(nn.BatchNorm()) self.decoder_out.add(nn.Activation('relu')) self.decoder_out.add(nn.Conv2D(self.num_kernels, 1, 1)) self.decoder_out.initialize(weight_init, ctx=ctx)
def faster_rcnn_resnet50_v1b(dataset, pretrained=False, pretrained_base=True, **kwargs): r"""Faster RCNN model from the paper "Ren, S., He, K., Girshick, R., & Sun, J. (2015). Faster r-cnn: Towards real-time object detection with region proposal networks" Parameters ---------- dataset: VisionDataset pretrained : bool or str Boolean value controls whether to load the default pretrained weights for model. String value represents the hashtag for a certain version of pretrained weights. pretrained_base : bool or str, optional, default is True Load pretrained base network, the extra layers are randomized. Note that if pretrained is `True`, this has no effect. ctx : Context, default CPU The context in which to load the pretrained weights. root : str, default '~/.mxnet/models' Location for keeping the model parameters. Examples -------- >>> model = faster_rcnn_resnet50_v1b(dataset, pretrained=True) >>> print(model) """ assert kwargs["roi_mode"] != "bilinear", "not support" from gluoncv.model_zoo.resnetv1b import resnet50_v1b classes = dataset.classes pretrained_base = False if pretrained else pretrained_base base_network = resnet50_v1b(pretrained=pretrained_base, dilated=False, use_global_stats=True, **kwargs) features = nn.HybridSequential() top_features = nn.HybridSequential() for layer in ['conv1', 'bn1', 'relu', 'maxpool', 'layer1', 'layer2', 'layer3']: features.add(getattr(base_network, layer)) for layer in ['layer4']: top_features.add(getattr(base_network, layer)) train_patterns = '|'.join(['.*dense', '.*rpn', '.*down(2|3|4)_conv', '.*layers(2|3|4)_conv']) return get_faster_rcnn( name='resnet50_v1b', dataset='coco', pretrained=pretrained, features=features, top_features=top_features, classes=classes, short=800, max_size=1333, train_patterns=train_patterns, nms_thresh=0.5, nms_topk=-1, post_nms=-1, strides=16, clip=4.14, rpn_channel=1024, base_size=16, scales=(2, 4, 8, 16, 32), ratios=(0.5, 1, 2), alloc_size=(128, 128), rpn_nms_thresh=0.7, rpn_train_pre_nms=12000, rpn_train_post_nms=2000, rpn_test_pre_nms=6000, rpn_test_post_nms=1000, rpn_min_size=1, num_sample=128, pos_iou_thresh=0.5, pos_ratio=0.25, max_num_gt=100, **kwargs)
def faster_rcnn_fpn_resnet50_v1b(dataset, pretrained=False, pretrained_base=True, **kwargs): from gluoncv.model_zoo.resnetv1b import resnet50_v1b classes = dataset.classes pretrained_base = False if pretrained else pretrained_base base_network = resnet50_v1b(pretrained=pretrained_base, dilated=False, use_global_stats=True, **kwargs) top_features = None if kwargs["roi_mode"] == "bilinear": features = FPNFeatureExpander( network=base_network, outputs=['layers1_relu8_fwd', 'layers2_relu11_fwd', 'layers3_relu17_fwd', 'layers4_relu8_fwd'], num_filters=[100, 100, 100, 100], use_1x1=True, use_upsample=True, use_elewadd=True, use_p6=True, no_bias=False, pretrained=pretrained_base) box_features = nn.HybridSequential() for _ in range(2): box_features.add(nn.Dense(1024, weight_initializer=mx.init.Normal(0.01))) box_features.add(nn.Activation('relu')) else: features = FPNFeatureExpander( network=base_network, outputs=['layers1_relu8_fwd', 'layers2_relu11_fwd', 'layers3_relu17_fwd', 'layers4_relu8_fwd'], num_filters=[256, 256, 256, 256], use_1x1=True, use_upsample=True, use_elewadd=True, use_p6=True, no_bias=False, pretrained=pretrained_base) # 2 FC layer before RCNN cls and reg box_features = nn.HybridSequential() for _ in range(2): box_features.add(nn.Dense(1024, weight_initializer=mx.init.Normal(0.01))) box_features.add(nn.Activation('relu')) train_patterns = '|'.join( ['.*dense', '.*rpn', '.*down(2|3|4)_conv', '.*layers(2|3|4)_conv', 'P']) return get_faster_rcnn( name='fpn_resnet50_v1b', dataset='coco', pretrained=pretrained, features=features, top_features=top_features, box_features=box_features, classes=classes, short=800, max_size=1333, min_stage=2, max_stage=6, train_patterns=train_patterns, nms_thresh=0.5, nms_topk=-1, post_nms=-1, strides=(4, 8, 16, 32, 64), clip=4.14, rpn_channel=1024, base_size=16, scales=(2, 4, 8, 16, 32), ratios=(0.5, 1, 2), alloc_size=(384, 384), rpn_nms_thresh=0.7, rpn_train_pre_nms=12000, rpn_train_post_nms=2000, rpn_test_pre_nms=6000, rpn_test_post_nms=1000, rpn_min_size=1, num_sample=512, pos_iou_thresh=0.5, pos_ratio=0.25, max_num_gt=100, **kwargs)
def __init__(self, text_scale=1024, ctx=mx.cpu(), pretrained=True, **kwargs): super(EAST, self).__init__() self.text_scale = text_scale base_network = resnet50_v1b(pretrained=pretrained, dilated=False, use_global_stats=True, ctx=ctx, **kwargs) self.features = FPNFeatureExpander(network=base_network, outputs=[ 'layers1_relu8_fwd', 'layers2_relu11_fwd', 'layers3_relu17_fwd', 'layers4_relu8_fwd' ], num_filters=[256, 256, 256, 256], use_1x1=True, use_upsample=True, use_elewadd=True, use_p6=False, no_bias=False, pretrained=pretrained, ctx=ctx) weight_init = mx.init.Xavier(rnd_type='gaussian', factor_type='out', magnitude=2.) self.head = _EAST_head(text_scale=text_scale, prefix='east_head') self.decoder_out = nn.HybridSequential(prefix='decoder_out') with self.decoder_out.name_scope(): self.decoder_out.add(nn.Conv2D(128, 3, 1, 1)) self.decoder_out.add(nn.BatchNorm()) self.decoder_out.add(nn.Activation('relu')) self.decoder_out.add(nn.Conv2D(64, 3, 1, 1)) self.decoder_out.add(nn.BatchNorm()) self.decoder_out.add(nn.Activation('relu')) self.decoder_out.initialize(weight_init, ctx=ctx)
val_transforms = Compose([ Resize(image_size, True), Normalize(mean=(127, 127, 127), std=(255, 255, 255)), ToTensor() ]) train_dataset = COCODataset(root, train_ann_file, train_transforms) train_data_loader = DataLoader(train_dataset, batch_size, True, last_batch="discard", batchify_fn=Collator(10), num_workers=num_workers) val_dataset = COCODataset(root, val_ann_file, val_transforms) val_data_loader = DataLoader(val_dataset, batch_size, False, last_batch="discard", batchify_fn=Collator(10), num_workers=num_workers) ctx = cpu() num_devices = 1 gluon_norm_kwargs = {"num_devices": num_devices} if num_devices >= 1 else {} base_network = resnet50_v1b(pretrained=True, dilated=False, use_global_stats=False, norm_layer=SyncBatchNorm, norm_kwargs=gluon_norm_kwargs) sym_norm_kwargs = {"ndev": num_devices} if num_devices >= 1 else {} features = FPNFeatureExpander( network=base_network, outputs=['layers1_relu8_fwd', 'layers2_relu11_fwd', 'layers3_relu17_fwd', 'layers4_relu8_fwd'], num_filters=[256, 256, 256, 256], use_1x1=True, use_upsample=True, use_elewadd=True, use_p6=True, no_bias=True, pretrained=True, norm_layer=mx.sym.contrib.SyncBatchNorm, norm_kwargs=sym_norm_kwargs ) box_features = nn.HybridSequential() box_features.add(nn.Conv2D(256, 3, padding=1, use_bias=False), SyncBatchNorm(**gluon_norm_kwargs), nn.Activation('relu'), nn.Dense(1024, weight_initializer=mx.init.Normal(0.01)), nn.Activation('relu')) # resnet50 = vision.resnet50_v1(pretrained=True, ctx=ctx).features
def custom_rcnn_fpn(pretrained_base=True, base_network_name='resnet18_v1b', norm_layer=nn.BatchNorm, norm_kwargs=None, sym_norm_layer=None, sym_norm_kwargs=None, num_fpn_filters=256, num_box_head_conv=4, num_box_head_conv_filters=256, num_box_head_dense_filters=1024): r"""Generate custom RCNN model with resnet base network w/FPN. Parameters ---------- pretrained_base : bool or str Boolean value controls whether to load the default pretrained weights for model. String value represents the hashtag for a certain version of pretrained weights. base_network_name : str, default 'resnet18_v1b' base network for mask RCNN. Currently support: 'resnet18_v1b', 'resnet50_v1b', and 'resnet101_v1d' norm_layer : nn.HybridBlock, default nn.BatchNorm Gluon normalization layer to use. Default is frozen batch normalization layer. norm_kwargs : dict Keyword arguments for gluon normalization layer sym_norm_layer : nn.SymbolBlock, default `None` Symbol normalization layer to use in FPN. This is due to FPN being implemented using SymbolBlock. Default is `None`, meaning no normalization layer will be used in FPN. sym_norm_kwargs : dict Keyword arguments for symbol normalization layer used in FPN. num_fpn_filters : int, default 256 Number of filters for FPN output layers. num_box_head_conv : int, default 4 Number of convolution layers to use in box head if batch normalization is not frozen. num_box_head_conv_filters : int, default 256 Number of filters for convolution layers in box head. Only applicable if batch normalization is not frozen. num_box_head_dense_filters : int, default 1024 Number of hidden units for the last fully connected layer in box head. Returns ------- SymbolBlock or HybridBlock Base feature extractor eg. resnet w/ FPN. None or HybridBlock R-CNN feature before each task heads. HybridBlock Box feature extractor """ use_global_stats = norm_layer is nn.BatchNorm if base_network_name == 'resnet18_v1b': from gluoncv.model_zoo.resnetv1b import resnet18_v1b base_network = resnet18_v1b(pretrained=pretrained_base, dilated=False, use_global_stats=use_global_stats, norm_layer=norm_layer, norm_kwargs=norm_kwargs) fpn_inputs_names = [ 'layers1_relu3_fwd', 'layers2_relu3_fwd', 'layers3_relu3_fwd', 'layers4_relu3_fwd' ] elif base_network_name == 'resnet50_v1b': from gluoncv.model_zoo.resnetv1b import resnet50_v1b base_network = resnet50_v1b(pretrained=pretrained_base, dilated=False, use_global_stats=use_global_stats, norm_layer=norm_layer, norm_kwargs=norm_kwargs) fpn_inputs_names = [ 'layers1_relu8_fwd', 'layers2_relu11_fwd', 'layers3_relu17_fwd', 'layers4_relu8_fwd' ] elif base_network_name == 'resnet101_v1d': from gluoncv.model_zoo.resnetv1b import resnet101_v1d base_network = resnet101_v1d(pretrained=pretrained_base, dilated=False, use_global_stats=use_global_stats, norm_layer=norm_layer, norm_kwargs=norm_kwargs) fpn_inputs_names = [ 'layers1_relu8_fwd', 'layers2_relu11_fwd', 'layers3_relu68_fwd', 'layers4_relu8_fwd' ] else: raise NotImplementedError('Unsupported network', base_network_name) features = FPNFeatureExpander(network=base_network, outputs=fpn_inputs_names, num_filters=[num_fpn_filters] * len(fpn_inputs_names), use_1x1=True, use_upsample=True, use_elewadd=True, use_p6=True, no_bias=not use_global_stats, pretrained=pretrained_base, norm_layer=sym_norm_layer, norm_kwargs=sym_norm_kwargs) top_features = None box_features = nn.HybridSequential() box_features.add(nn.AvgPool2D(pool_size=(3, 3), strides=2, padding=1)) # reduce to 7x7 if use_global_stats: box_features.add( nn.Dense(num_box_head_dense_filters, weight_initializer=mx.init.Normal(0.01)), nn.Activation('relu')) else: for _ in range(num_box_head_conv): box_features.add( nn.Conv2D(num_box_head_conv_filters, 3, padding=1, use_bias=False), norm_layer(**norm_kwargs), nn.Activation('relu')) box_features.add( nn.Dense(num_box_head_dense_filters, weight_initializer=mx.init.Normal(0.01)), nn.Activation('relu')) return features, top_features, box_features
def __init__(self, depth, nclass, pretrained_base=True, input_channel=3, dropout_ratio=0.5, init_std=0.01, feat_dim=2048, num_segments=1, num_crop=1, partial_bn=False, **kwargs): super(ActionRecResNetV1b, self).__init__() if depth == 18: pretrained_model = resnet18_v1b(pretrained=pretrained_base, **kwargs) self.expansion = 1 elif depth == 34: pretrained_model = resnet34_v1b(pretrained=pretrained_base, **kwargs) self.expansion = 1 elif depth == 50: pretrained_model = resnet50_v1b(pretrained=pretrained_base, **kwargs) self.expansion = 4 elif depth == 101: pretrained_model = resnet101_v1b(pretrained=pretrained_base, **kwargs) self.expansion = 4 elif depth == 152: pretrained_model = resnet152_v1b(pretrained=pretrained_base, **kwargs) self.expansion = 4 elif depth == 418: pretrained_model = get_model('resnet18_v1b_kinetics400', pretrained=True) self.expansion = 1 elif depth == 434: pretrained_model = get_model('resnet34_v1b_kinetics400', pretrained=True) self.expansion = 1 elif depth == 450: pretrained_model = get_model('resnet50_v1b_kinetics400', pretrained=True) self.expansion = 4 elif depth == 501: pretrained_model = get_model('resnet101_v1b_kinetics400', pretrained=True) self.expansion = 4 elif depth == 552: pretrained_model = get_model('resnet152_v1b_kinetics400', pretrained=True) self.expansion = 4 else: print('No such ResNet configuration for depth=%d' % (depth)) self.dropout_ratio = dropout_ratio self.init_std = init_std self.feat_dim = 512 * self.expansion self.num_segments = num_segments self.num_crop = num_crop self.input_channel = input_channel with self.name_scope(): if self.input_channel == 3: self.conv1 = pretrained_model.conv1 else: self.conv1 = nn.Conv2D( in_channels=input_channel, channels=64, kernel_size=7, strides=2, padding=3, use_bias=False, weight_initializer=mx.init.Xavier(magnitude=2)) self.conv1.initialize() self.bn1 = pretrained_model.bn1 self.relu = pretrained_model.relu self.maxpool = pretrained_model.maxpool self.layer1 = pretrained_model.layer1 self.layer2 = pretrained_model.layer2 self.layer3 = pretrained_model.layer3 self.layer4 = pretrained_model.layer4 self.avgpool = pretrained_model.avgpool self.flat = pretrained_model.flat self.drop = nn.Dropout(rate=self.dropout_ratio) self.output = nn.Dense( units=nclass, in_units=self.feat_dim, weight_initializer=init.Normal(sigma=self.init_std)) self.output.initialize()
def faster_rcnn_resnet50_v1b_custom(classes, transfer=None, pretrained_base=True, pretrained=False, **kwargs): r"""Faster RCNN model with resnet50_v1b base network on custom dataset. Parameters ---------- classes : iterable of str Names of custom foreground classes. `len(classes)` is the number of foreground classes. transfer : str or None If not `None`, will try to reuse pre-trained weights from faster RCNN networks trained on other datasets. pretrained_base : boolean Whether fetch and load pretrained weights for base network. ctx : Context, default CPU The context in which to load the pretrained weights. root : str, default '~/.mxnet/models' Location for keeping the model parameters. Returns ------- mxnet.gluon.HybridBlock Hybrid faster RCNN network. """ if transfer is None: from gluoncv.model_zoo.resnetv1b import resnet50_v1b base_network = resnet50_v1b(pretrained=pretrained_base, dilated=False, use_global_stats=True) features = nn.HybridSequential() top_features = nn.HybridSequential() for layer in [ 'conv1', 'bn1', 'relu', 'maxpool', 'layer1', 'layer2', 'layer3' ]: features.add(getattr(base_network, layer)) for layer in ['layer4']: top_features.add(getattr(base_network, layer)) train_patterns = '|'.join( ['.*dense', '.*rpn', '.*down(2|3|4)_conv', '.*layers(2|3|4)_conv']) return get_faster_rcnn(name='resnet50_v1b', dataset='custom', pretrained=pretrained, features=features, top_features=top_features, classes=classes, short=600, max_size=1000, train_patterns=train_patterns, nms_thresh=0.3, nms_topk=400, post_nms=100, roi_mode='align', roi_size=(14, 14), stride=16, clip=None, rpn_channel=1024, base_size=16, scales=(2, 4, 8, 16, 32), ratios=(0.5, 1, 2), alloc_size=(128, 128), rpn_nms_thresh=0.7, rpn_train_pre_nms=12000, rpn_train_post_nms=2000, rpn_test_pre_nms=6000, rpn_test_post_nms=300, rpn_min_size=16, num_sample=128, pos_iou_thresh=0.5, pos_ratio=0.25, **kwargs) else: from gluoncv.model_zoo import get_model net = get_model('faster_rcnn_resnet50_v1b_' + str(transfer), pretrained=True, **kwargs) net.reset_class(classes) return net
def faster_rcnn_resnet50_v1b_voc(pretrained=False, pretrained_base=True, **kwargs): r"""Faster RCNN model from the paper "Ren, S., He, K., Girshick, R., & Sun, J. (2015). Faster r-cnn: Towards real-time object detection with region proposal networks" Parameters ---------- pretrained : bool, optional, default is False Load pretrained weights. pretrained_base : bool, optional, default is True Load pretrained base network, the extra layers are randomized. Note that if pretrained is `Ture`, this has no effect. ctx : Context, default CPU The context in which to load the pretrained weights. root : str, default '~/.mxnet/models' Location for keeping the model parameters. Examples -------- >>> model = get_faster_rcnn_resnet50_v1b_voc(pretrained=True) >>> print(model) """ from gluoncv.model_zoo.resnetv1b import resnet50_v1b from gluoncv.data import VOCDetection classes = VOCDetection.CLASSES pretrained_base = False if pretrained else pretrained_base base_network = resnet50_v1b(pretrained=pretrained_base, dilated=False, use_global_stats=True) features = nn.HybridSequential() top_features = nn.HybridSequential() for layer in [ 'conv1', 'bn1', 'relu', 'maxpool', 'layer1', 'layer2', 'layer3' ]: features.add(getattr(base_network, layer)) for layer in ['layer4']: top_features.add(getattr(base_network, layer)) train_patterns = '|'.join( ['.*dense', '.*rpn', '.*down(2|3|4)_conv', '.*layers(2|3|4)_conv']) return get_faster_rcnn(name='resnet50_v1b', dataset='voc', pretrained=pretrained, features=features, top_features=top_features, classes=classes, short=600, max_size=1000, train_patterns=train_patterns, nms_thresh=0.3, nms_topk=400, post_nms=100, roi_mode='align', roi_size=(14, 14), stride=16, clip=None, rpn_channel=1024, base_size=16, scales=(2, 4, 8, 16, 32), ratios=(0.5, 1, 2), alloc_size=(128, 128), rpn_nms_thresh=0.7, rpn_train_pre_nms=12000, rpn_train_post_nms=2000, rpn_test_pre_nms=6000, rpn_test_post_nms=300, rpn_min_size=16, num_sample=128, pos_iou_thresh=0.5, pos_ratio=0.25, **kwargs)