Exemplo n.º 1
0
def faster_rcnn_fpn_resnet50_v1b_coco(dataset, pretrained=False, pretrained_base=True, **kwargs):
    r"""Faster RCNN model with FPN from the paper
    "Ren, S., He, K., Girshick, R., & Sun, J. (2015). Faster r-cnn: Towards
    real-time object detection with region proposal networks"
    "Lin, T., Dollar, P., Girshick, R., He, K., Hariharan, B., Belongie, S. (2016).
    Feature Pyramid Networks for Object Detection"

    Parameters
    ----------
    dataset : VisionDataset
    pretrained : bool or str
        Boolean value controls whether to load the default pretrained weights for model.
        String value represents the hashtag for a certain version of pretrained weights.
    pretrained_base : bool or str, optional, default is True
        Load pretrained base network, the extra layers are randomized. Note that
        if pretrained is `Ture`, this has no effect.
    ctx : Context, default CPU
        The context in which to load the pretrained weights.
    root : str, default '~/.mxnet/models'
        Location for keeping the model parameters.

    Examples
    --------
    >>> model = faster_rcnn_fpn_resnet50_v1b_coco(dataset, pretrained=True)
    >>> print(model)
    """
    from gluoncv.model_zoo.resnetv1b import resnet50_v1b
    classes = dataset.classes
    pretrained_base = False if pretrained else pretrained_base
    base_network = resnet50_v1b(pretrained=pretrained_base, dilated=False,
                                use_global_stats=True, **kwargs)
    features = FPNFeatureExpander(
        network=base_network,
        outputs=['layers1_relu8_fwd', 'layers2_relu11_fwd', 'layers3_relu17_fwd',
                 'layers4_relu8_fwd'], num_filters=[256, 256, 256, 256], use_1x1=True,
        use_upsample=True, use_elewadd=True, use_p6=True, no_bias=False, pretrained=pretrained_base)
    top_features = None
    # 2 FC layer before RCNN cls and reg
    box_features = nn.HybridSequential()
    for _ in range(2):
        box_features.add(nn.Dense(1024, weight_initializer=mx.init.Normal(0.01)))
        box_features.add(nn.Activation('relu'))

    train_patterns = '|'.join(
        ['.*dense', '.*rpn', '.*down(2|3|4)_conv', '.*layers(2|3|4)_conv', 'P'])
    return get_faster_rcnn(
        name='fpn_resnet50_v1b', dataset='coco', pretrained=pretrained, features=features,
        top_features=top_features, box_features=box_features, classes=classes,
        short=800, max_size=1333, min_stage=2, max_stage=6, train_patterns=train_patterns,
        nms_thresh=0.5, nms_topk=-1, post_nms=-1, roi_mode='align', roi_size=(7,7),
        strides=(4, 8, 16, 32, 64), clip=4.14, rpn_channel=1024, base_size=16,
        scales=(2, 4, 8, 16, 32), ratios=(0.5, 1, 2), alloc_size=(384, 384),
        rpn_nms_thresh=0.7, rpn_train_pre_nms=12000, rpn_train_post_nms=2000,
        rpn_test_pre_nms=6000, rpn_test_post_nms=1000, rpn_min_size=1, num_sample=512,
        pos_iou_thresh=0.5, pos_ratio=0.25, max_num_gt=100, **kwargs)
Exemplo n.º 2
0
    def __init__(self,
                 num_kernels,
                 scale=1,
                 ctx=mx.cpu(),
                 pretrained=False,
                 num_device=0,
                 **kwargs):
        super(PSENet, self).__init__()
        self.num_kernels = num_kernels

        base_network = resnet50_v1b(pretrained=pretrained,
                                    dilated=False,
                                    use_global_stats=False,
                                    norm_layer=nn.BatchNorm,
                                    ctx=ctx,
                                    **kwargs)
        self.features = FPNFeatureExpander(network=base_network,
                                           outputs=[
                                               'layers1_relu8_fwd',
                                               'layers2_relu11_fwd',
                                               'layers3_relu17_fwd',
                                               'layers4_relu8_fwd'
                                           ],
                                           num_filters=[256, 256, 256, 256],
                                           use_1x1=True,
                                           use_upsample=True,
                                           use_elewadd=True,
                                           use_p6=False,
                                           no_bias=True,
                                           pretrained=pretrained,
                                           ctx=ctx)

        self.scale = scale
        self.extrac_convs = []

        for i in range(4):
            weight_init = mx.init.Normal(0.001)
            extra_conv = nn.HybridSequential(prefix='extra_conv_{}'.format(i))
            with extra_conv.name_scope():
                extra_conv.add(nn.Conv2D(256, 3, 1, 1))
                # extra_conv.add(nn.BatchNorm())
                extra_conv.add(nn.Activation('relu'))
            extra_conv.initialize(weight_init, ctx=ctx)
            self.register_child(extra_conv)
            self.extrac_convs.append(extra_conv)

        self.decoder_out = nn.HybridSequential(prefix='decoder_out')
        with self.decoder_out.name_scope():
            weight_init = mx.init.Normal(0.001)
            self.decoder_out.add(nn.Conv2D(256, 3, 1, 1))
            # self.decoder_out.add(nn.BatchNorm())
            self.decoder_out.add(nn.Activation('relu'))
            self.decoder_out.add(nn.Conv2D(self.num_kernels, 1, 1))
            self.decoder_out.initialize(weight_init, ctx=ctx)
Exemplo n.º 3
0
def faster_rcnn_resnet50_v1b(dataset, pretrained=False, pretrained_base=True, **kwargs):
    r"""Faster RCNN model from the paper
    "Ren, S., He, K., Girshick, R., & Sun, J. (2015). Faster r-cnn: Towards
    real-time object detection with region proposal networks"

    Parameters
    ----------
    dataset: VisionDataset
    pretrained : bool or str
        Boolean value controls whether to load the default pretrained weights for model.
        String value represents the hashtag for a certain version of pretrained weights.
    pretrained_base : bool or str, optional, default is True
        Load pretrained base network, the extra layers are randomized. Note that
        if pretrained is `True`, this has no effect.
    ctx : Context, default CPU
        The context in which to load the pretrained weights.
    root : str, default '~/.mxnet/models'
        Location for keeping the model parameters.

    Examples
    --------
    >>> model = faster_rcnn_resnet50_v1b(dataset, pretrained=True)
    >>> print(model)
    """
    assert kwargs["roi_mode"] != "bilinear", "not support"
    from gluoncv.model_zoo.resnetv1b import resnet50_v1b
    classes = dataset.classes
    pretrained_base = False if pretrained else pretrained_base
    base_network = resnet50_v1b(pretrained=pretrained_base, dilated=False,
                                use_global_stats=True, **kwargs)
    features = nn.HybridSequential()
    top_features = nn.HybridSequential()
    for layer in ['conv1', 'bn1', 'relu', 'maxpool', 'layer1', 'layer2', 'layer3']:
        features.add(getattr(base_network, layer))
    for layer in ['layer4']:
        top_features.add(getattr(base_network, layer))
    train_patterns = '|'.join(['.*dense', '.*rpn', '.*down(2|3|4)_conv', '.*layers(2|3|4)_conv'])
    return get_faster_rcnn(
        name='resnet50_v1b', dataset='coco', pretrained=pretrained,
        features=features, top_features=top_features, classes=classes,
        short=800, max_size=1333, train_patterns=train_patterns,
        nms_thresh=0.5, nms_topk=-1, post_nms=-1,
        strides=16, clip=4.14,
        rpn_channel=1024, base_size=16, scales=(2, 4, 8, 16, 32),
        ratios=(0.5, 1, 2), alloc_size=(128, 128), rpn_nms_thresh=0.7,
        rpn_train_pre_nms=12000, rpn_train_post_nms=2000,
        rpn_test_pre_nms=6000, rpn_test_post_nms=1000, rpn_min_size=1,
        num_sample=128, pos_iou_thresh=0.5, pos_ratio=0.25,
        max_num_gt=100, **kwargs)
Exemplo n.º 4
0
def faster_rcnn_fpn_resnet50_v1b(dataset, pretrained=False, pretrained_base=True, **kwargs):
    from gluoncv.model_zoo.resnetv1b import resnet50_v1b
    classes = dataset.classes
    pretrained_base = False if pretrained else pretrained_base
    base_network = resnet50_v1b(pretrained=pretrained_base, dilated=False,
                                use_global_stats=True, **kwargs)
    top_features = None
    if kwargs["roi_mode"] == "bilinear":
        features = FPNFeatureExpander(
            network=base_network,
            outputs=['layers1_relu8_fwd', 'layers2_relu11_fwd', 'layers3_relu17_fwd',
                     'layers4_relu8_fwd'], num_filters=[100, 100, 100, 100], use_1x1=True,
            use_upsample=True, use_elewadd=True, use_p6=True, no_bias=False, pretrained=pretrained_base)
        box_features = nn.HybridSequential()
        for _ in range(2):
            box_features.add(nn.Dense(1024, weight_initializer=mx.init.Normal(0.01)))
            box_features.add(nn.Activation('relu'))
    else:
        features = FPNFeatureExpander(
            network=base_network,
            outputs=['layers1_relu8_fwd', 'layers2_relu11_fwd', 'layers3_relu17_fwd',
                     'layers4_relu8_fwd'], num_filters=[256, 256, 256, 256], use_1x1=True,
            use_upsample=True, use_elewadd=True, use_p6=True, no_bias=False, pretrained=pretrained_base)
        # 2 FC layer before RCNN cls and reg
        box_features = nn.HybridSequential()
        for _ in range(2):
            box_features.add(nn.Dense(1024, weight_initializer=mx.init.Normal(0.01)))
            box_features.add(nn.Activation('relu'))

    train_patterns = '|'.join(
        ['.*dense', '.*rpn', '.*down(2|3|4)_conv', '.*layers(2|3|4)_conv', 'P'])
    return get_faster_rcnn(
        name='fpn_resnet50_v1b', dataset='coco', pretrained=pretrained, features=features,
        top_features=top_features, box_features=box_features, classes=classes,
        short=800, max_size=1333, min_stage=2, max_stage=6, train_patterns=train_patterns,
        nms_thresh=0.5, nms_topk=-1, post_nms=-1,
        strides=(4, 8, 16, 32, 64), clip=4.14, rpn_channel=1024, base_size=16,
        scales=(2, 4, 8, 16, 32), ratios=(0.5, 1, 2), alloc_size=(384, 384),
        rpn_nms_thresh=0.7, rpn_train_pre_nms=12000, rpn_train_post_nms=2000,
        rpn_test_pre_nms=6000, rpn_test_post_nms=1000, rpn_min_size=1, num_sample=512,
        pos_iou_thresh=0.5, pos_ratio=0.25, max_num_gt=100, **kwargs)
Exemplo n.º 5
0
 def __init__(self,
              text_scale=1024,
              ctx=mx.cpu(),
              pretrained=True,
              **kwargs):
     super(EAST, self).__init__()
     self.text_scale = text_scale
     base_network = resnet50_v1b(pretrained=pretrained,
                                 dilated=False,
                                 use_global_stats=True,
                                 ctx=ctx,
                                 **kwargs)
     self.features = FPNFeatureExpander(network=base_network,
                                        outputs=[
                                            'layers1_relu8_fwd',
                                            'layers2_relu11_fwd',
                                            'layers3_relu17_fwd',
                                            'layers4_relu8_fwd'
                                        ],
                                        num_filters=[256, 256, 256, 256],
                                        use_1x1=True,
                                        use_upsample=True,
                                        use_elewadd=True,
                                        use_p6=False,
                                        no_bias=False,
                                        pretrained=pretrained,
                                        ctx=ctx)
     weight_init = mx.init.Xavier(rnd_type='gaussian',
                                  factor_type='out',
                                  magnitude=2.)
     self.head = _EAST_head(text_scale=text_scale, prefix='east_head')
     self.decoder_out = nn.HybridSequential(prefix='decoder_out')
     with self.decoder_out.name_scope():
         self.decoder_out.add(nn.Conv2D(128, 3, 1, 1))
         self.decoder_out.add(nn.BatchNorm())
         self.decoder_out.add(nn.Activation('relu'))
         self.decoder_out.add(nn.Conv2D(64, 3, 1, 1))
         self.decoder_out.add(nn.BatchNorm())
         self.decoder_out.add(nn.Activation('relu'))
         self.decoder_out.initialize(weight_init, ctx=ctx)
Exemplo n.º 6
0
    val_transforms = Compose([
        Resize(image_size, True),
        Normalize(mean=(127, 127, 127), std=(255, 255, 255)),
        ToTensor()
    ])

    train_dataset = COCODataset(root, train_ann_file, train_transforms)
    train_data_loader = DataLoader(train_dataset, batch_size, True, last_batch="discard", batchify_fn=Collator(10), num_workers=num_workers)

    val_dataset = COCODataset(root, val_ann_file, val_transforms)
    val_data_loader = DataLoader(val_dataset, batch_size, False, last_batch="discard", batchify_fn=Collator(10), num_workers=num_workers)

    ctx = cpu()
    num_devices = 1
    gluon_norm_kwargs = {"num_devices": num_devices} if num_devices >= 1 else {}
    base_network = resnet50_v1b(pretrained=True, dilated=False, use_global_stats=False,
                                norm_layer=SyncBatchNorm, norm_kwargs=gluon_norm_kwargs)
    sym_norm_kwargs = {"ndev": num_devices} if num_devices >= 1 else {}
    features = FPNFeatureExpander(
        network=base_network,
        outputs=['layers1_relu8_fwd', 'layers2_relu11_fwd', 'layers3_relu17_fwd', 'layers4_relu8_fwd'],
        num_filters=[256, 256, 256, 256], use_1x1=True, use_upsample=True, use_elewadd=True, use_p6=True,
        no_bias=True, pretrained=True, norm_layer=mx.sym.contrib.SyncBatchNorm, norm_kwargs=sym_norm_kwargs
    )
    box_features = nn.HybridSequential()
    box_features.add(nn.Conv2D(256, 3, padding=1, use_bias=False),
                     SyncBatchNorm(**gluon_norm_kwargs),
                     nn.Activation('relu'),
                     nn.Dense(1024, weight_initializer=mx.init.Normal(0.01)),
                     nn.Activation('relu'))

    # resnet50 = vision.resnet50_v1(pretrained=True, ctx=ctx).features
Exemplo n.º 7
0
def custom_rcnn_fpn(pretrained_base=True,
                    base_network_name='resnet18_v1b',
                    norm_layer=nn.BatchNorm,
                    norm_kwargs=None,
                    sym_norm_layer=None,
                    sym_norm_kwargs=None,
                    num_fpn_filters=256,
                    num_box_head_conv=4,
                    num_box_head_conv_filters=256,
                    num_box_head_dense_filters=1024):
    r"""Generate custom RCNN model with resnet base network w/FPN.

    Parameters
    ----------
    pretrained_base : bool or str
        Boolean value controls whether to load the default pretrained weights for model.
        String value represents the hashtag for a certain version of pretrained weights.
    base_network_name : str, default 'resnet18_v1b'
        base network for mask RCNN. Currently support: 'resnet18_v1b', 'resnet50_v1b',
        and 'resnet101_v1d'
    norm_layer : nn.HybridBlock, default nn.BatchNorm
        Gluon normalization layer to use. Default is frozen batch normalization layer.
    norm_kwargs : dict
        Keyword arguments for gluon normalization layer
    sym_norm_layer : nn.SymbolBlock, default `None`
        Symbol normalization layer to use in FPN. This is due to FPN being implemented using
        SymbolBlock. Default is `None`, meaning no normalization layer will be used in FPN.
    sym_norm_kwargs : dict
        Keyword arguments for symbol normalization layer used in FPN.
    num_fpn_filters : int, default 256
        Number of filters for FPN output layers.
    num_box_head_conv : int, default 4
        Number of convolution layers to use in box head if batch normalization is not frozen.
    num_box_head_conv_filters : int, default 256
        Number of filters for convolution layers in box head.
        Only applicable if batch normalization is not frozen.
    num_box_head_dense_filters : int, default 1024
        Number of hidden units for the last fully connected layer in box head.

    Returns
    -------
    SymbolBlock or HybridBlock
        Base feature extractor eg. resnet w/ FPN.
    None or HybridBlock
        R-CNN feature before each task heads.
    HybridBlock
        Box feature extractor
    """
    use_global_stats = norm_layer is nn.BatchNorm
    if base_network_name == 'resnet18_v1b':
        from gluoncv.model_zoo.resnetv1b import resnet18_v1b
        base_network = resnet18_v1b(pretrained=pretrained_base,
                                    dilated=False,
                                    use_global_stats=use_global_stats,
                                    norm_layer=norm_layer,
                                    norm_kwargs=norm_kwargs)
        fpn_inputs_names = [
            'layers1_relu3_fwd', 'layers2_relu3_fwd', 'layers3_relu3_fwd',
            'layers4_relu3_fwd'
        ]
    elif base_network_name == 'resnet50_v1b':
        from gluoncv.model_zoo.resnetv1b import resnet50_v1b
        base_network = resnet50_v1b(pretrained=pretrained_base,
                                    dilated=False,
                                    use_global_stats=use_global_stats,
                                    norm_layer=norm_layer,
                                    norm_kwargs=norm_kwargs)
        fpn_inputs_names = [
            'layers1_relu8_fwd', 'layers2_relu11_fwd', 'layers3_relu17_fwd',
            'layers4_relu8_fwd'
        ]
    elif base_network_name == 'resnet101_v1d':
        from gluoncv.model_zoo.resnetv1b import resnet101_v1d
        base_network = resnet101_v1d(pretrained=pretrained_base,
                                     dilated=False,
                                     use_global_stats=use_global_stats,
                                     norm_layer=norm_layer,
                                     norm_kwargs=norm_kwargs)
        fpn_inputs_names = [
            'layers1_relu8_fwd', 'layers2_relu11_fwd', 'layers3_relu68_fwd',
            'layers4_relu8_fwd'
        ]
    else:
        raise NotImplementedError('Unsupported network', base_network_name)
    features = FPNFeatureExpander(network=base_network,
                                  outputs=fpn_inputs_names,
                                  num_filters=[num_fpn_filters] *
                                  len(fpn_inputs_names),
                                  use_1x1=True,
                                  use_upsample=True,
                                  use_elewadd=True,
                                  use_p6=True,
                                  no_bias=not use_global_stats,
                                  pretrained=pretrained_base,
                                  norm_layer=sym_norm_layer,
                                  norm_kwargs=sym_norm_kwargs)
    top_features = None
    box_features = nn.HybridSequential()
    box_features.add(nn.AvgPool2D(pool_size=(3, 3), strides=2,
                                  padding=1))  # reduce to 7x7
    if use_global_stats:
        box_features.add(
            nn.Dense(num_box_head_dense_filters,
                     weight_initializer=mx.init.Normal(0.01)),
            nn.Activation('relu'))
    else:
        for _ in range(num_box_head_conv):
            box_features.add(
                nn.Conv2D(num_box_head_conv_filters,
                          3,
                          padding=1,
                          use_bias=False), norm_layer(**norm_kwargs),
                nn.Activation('relu'))
    box_features.add(
        nn.Dense(num_box_head_dense_filters,
                 weight_initializer=mx.init.Normal(0.01)),
        nn.Activation('relu'))
    return features, top_features, box_features
    def __init__(self,
                 depth,
                 nclass,
                 pretrained_base=True,
                 input_channel=3,
                 dropout_ratio=0.5,
                 init_std=0.01,
                 feat_dim=2048,
                 num_segments=1,
                 num_crop=1,
                 partial_bn=False,
                 **kwargs):
        super(ActionRecResNetV1b, self).__init__()

        if depth == 18:
            pretrained_model = resnet18_v1b(pretrained=pretrained_base,
                                            **kwargs)
            self.expansion = 1
        elif depth == 34:
            pretrained_model = resnet34_v1b(pretrained=pretrained_base,
                                            **kwargs)
            self.expansion = 1
        elif depth == 50:
            pretrained_model = resnet50_v1b(pretrained=pretrained_base,
                                            **kwargs)
            self.expansion = 4
        elif depth == 101:
            pretrained_model = resnet101_v1b(pretrained=pretrained_base,
                                             **kwargs)
            self.expansion = 4
        elif depth == 152:
            pretrained_model = resnet152_v1b(pretrained=pretrained_base,
                                             **kwargs)
            self.expansion = 4
        elif depth == 418:
            pretrained_model = get_model('resnet18_v1b_kinetics400',
                                         pretrained=True)
            self.expansion = 1
        elif depth == 434:
            pretrained_model = get_model('resnet34_v1b_kinetics400',
                                         pretrained=True)
            self.expansion = 1
        elif depth == 450:
            pretrained_model = get_model('resnet50_v1b_kinetics400',
                                         pretrained=True)
            self.expansion = 4
        elif depth == 501:
            pretrained_model = get_model('resnet101_v1b_kinetics400',
                                         pretrained=True)
            self.expansion = 4
        elif depth == 552:
            pretrained_model = get_model('resnet152_v1b_kinetics400',
                                         pretrained=True)
            self.expansion = 4
        else:
            print('No such ResNet configuration for depth=%d' % (depth))

        self.dropout_ratio = dropout_ratio
        self.init_std = init_std
        self.feat_dim = 512 * self.expansion
        self.num_segments = num_segments
        self.num_crop = num_crop
        self.input_channel = input_channel

        with self.name_scope():
            if self.input_channel == 3:
                self.conv1 = pretrained_model.conv1
            else:
                self.conv1 = nn.Conv2D(
                    in_channels=input_channel,
                    channels=64,
                    kernel_size=7,
                    strides=2,
                    padding=3,
                    use_bias=False,
                    weight_initializer=mx.init.Xavier(magnitude=2))
                self.conv1.initialize()
            self.bn1 = pretrained_model.bn1
            self.relu = pretrained_model.relu
            self.maxpool = pretrained_model.maxpool
            self.layer1 = pretrained_model.layer1
            self.layer2 = pretrained_model.layer2
            self.layer3 = pretrained_model.layer3
            self.layer4 = pretrained_model.layer4
            self.avgpool = pretrained_model.avgpool
            self.flat = pretrained_model.flat
            self.drop = nn.Dropout(rate=self.dropout_ratio)
            self.output = nn.Dense(
                units=nclass,
                in_units=self.feat_dim,
                weight_initializer=init.Normal(sigma=self.init_std))
            self.output.initialize()
Exemplo n.º 9
0
def faster_rcnn_resnet50_v1b_custom(classes,
                                    transfer=None,
                                    pretrained_base=True,
                                    pretrained=False,
                                    **kwargs):
    r"""Faster RCNN model with resnet50_v1b base network on custom dataset.

    Parameters
    ----------
    classes : iterable of str
        Names of custom foreground classes. `len(classes)` is the number of foreground classes.
    transfer : str or None
        If not `None`, will try to reuse pre-trained weights from faster RCNN networks trained
        on other datasets.
    pretrained_base : boolean
        Whether fetch and load pretrained weights for base network.
    ctx : Context, default CPU
        The context in which to load the pretrained weights.
    root : str, default '~/.mxnet/models'
        Location for keeping the model parameters.

    Returns
    -------
    mxnet.gluon.HybridBlock
        Hybrid faster RCNN network.
    """
    if transfer is None:
        from gluoncv.model_zoo.resnetv1b import resnet50_v1b
        base_network = resnet50_v1b(pretrained=pretrained_base,
                                    dilated=False,
                                    use_global_stats=True)
        features = nn.HybridSequential()
        top_features = nn.HybridSequential()
        for layer in [
                'conv1', 'bn1', 'relu', 'maxpool', 'layer1', 'layer2', 'layer3'
        ]:
            features.add(getattr(base_network, layer))
        for layer in ['layer4']:
            top_features.add(getattr(base_network, layer))
        train_patterns = '|'.join(
            ['.*dense', '.*rpn', '.*down(2|3|4)_conv', '.*layers(2|3|4)_conv'])
        return get_faster_rcnn(name='resnet50_v1b',
                               dataset='custom',
                               pretrained=pretrained,
                               features=features,
                               top_features=top_features,
                               classes=classes,
                               short=600,
                               max_size=1000,
                               train_patterns=train_patterns,
                               nms_thresh=0.3,
                               nms_topk=400,
                               post_nms=100,
                               roi_mode='align',
                               roi_size=(14, 14),
                               stride=16,
                               clip=None,
                               rpn_channel=1024,
                               base_size=16,
                               scales=(2, 4, 8, 16, 32),
                               ratios=(0.5, 1, 2),
                               alloc_size=(128, 128),
                               rpn_nms_thresh=0.7,
                               rpn_train_pre_nms=12000,
                               rpn_train_post_nms=2000,
                               rpn_test_pre_nms=6000,
                               rpn_test_post_nms=300,
                               rpn_min_size=16,
                               num_sample=128,
                               pos_iou_thresh=0.5,
                               pos_ratio=0.25,
                               **kwargs)
    else:
        from gluoncv.model_zoo import get_model
        net = get_model('faster_rcnn_resnet50_v1b_' + str(transfer),
                        pretrained=True,
                        **kwargs)
        net.reset_class(classes)
    return net
Exemplo n.º 10
0
def faster_rcnn_resnet50_v1b_voc(pretrained=False,
                                 pretrained_base=True,
                                 **kwargs):
    r"""Faster RCNN model from the paper
    "Ren, S., He, K., Girshick, R., & Sun, J. (2015). Faster r-cnn: Towards
    real-time object detection with region proposal networks"

    Parameters
    ----------
    pretrained : bool, optional, default is False
        Load pretrained weights.
    pretrained_base : bool, optional, default is True
        Load pretrained base network, the extra layers are randomized. Note that
        if pretrained is `Ture`, this has no effect.
    ctx : Context, default CPU
        The context in which to load the pretrained weights.
    root : str, default '~/.mxnet/models'
        Location for keeping the model parameters.

    Examples
    --------
    >>> model = get_faster_rcnn_resnet50_v1b_voc(pretrained=True)
    >>> print(model)
    """
    from gluoncv.model_zoo.resnetv1b import resnet50_v1b
    from gluoncv.data import VOCDetection
    classes = VOCDetection.CLASSES
    pretrained_base = False if pretrained else pretrained_base
    base_network = resnet50_v1b(pretrained=pretrained_base,
                                dilated=False,
                                use_global_stats=True)
    features = nn.HybridSequential()
    top_features = nn.HybridSequential()
    for layer in [
            'conv1', 'bn1', 'relu', 'maxpool', 'layer1', 'layer2', 'layer3'
    ]:
        features.add(getattr(base_network, layer))
    for layer in ['layer4']:
        top_features.add(getattr(base_network, layer))
    train_patterns = '|'.join(
        ['.*dense', '.*rpn', '.*down(2|3|4)_conv', '.*layers(2|3|4)_conv'])
    return get_faster_rcnn(name='resnet50_v1b',
                           dataset='voc',
                           pretrained=pretrained,
                           features=features,
                           top_features=top_features,
                           classes=classes,
                           short=600,
                           max_size=1000,
                           train_patterns=train_patterns,
                           nms_thresh=0.3,
                           nms_topk=400,
                           post_nms=100,
                           roi_mode='align',
                           roi_size=(14, 14),
                           stride=16,
                           clip=None,
                           rpn_channel=1024,
                           base_size=16,
                           scales=(2, 4, 8, 16, 32),
                           ratios=(0.5, 1, 2),
                           alloc_size=(128, 128),
                           rpn_nms_thresh=0.7,
                           rpn_train_pre_nms=12000,
                           rpn_train_post_nms=2000,
                           rpn_test_pre_nms=6000,
                           rpn_test_post_nms=300,
                           rpn_min_size=16,
                           num_sample=128,
                           pos_iou_thresh=0.5,
                           pos_ratio=0.25,
                           **kwargs)