示例#1
0
def get_symbol_train(num_classes=20,
                     nms_thresh=0.5,
                     force_suppress=True,
                     nms_topk=400,
                     **kwargs):

    block_config = [2, 4, 4]
    bottleneck_width = [1, 2, 4]
    growth_rate = [32, 32, 32]
    num_init_features = 64
    total_filters = [128, 256, 448]

    total_filter = num_init_features
    if type(bottleneck_width) is list:
        bottleneck_widths = bottleneck_width
    else:
        bottleneck_widths = [bottleneck_width] * 4
    if type(growth_rate) is list:
        growth_rates = growth_rate
    else:
        growth_rates = [growth_rate] * 4

#######################################################

    data = mx.sym.var('data')
    label = mx.symbol.Variable(name="label")

    # stem1 = _conv_block(data, 16, 3, 2, 1, 'stem1')
    # stem2 = _conv_block(stem1, 32, 3, 2, 1, 'stem2')
    # stem3 = _conv_block(stem2, 64, 3, 2, 1, 'stem3')

    stem1 = _conv_block(data, 16, 3, 2, 1, 'stem1')
    stem2 = _conv_block(stem1, 32, 3, 2, 1, 'stem2')
    stem3 = _conv_block(stem2, 64, 3, 1, 1, 'stem3')

    from_layer = stem3

    feat_layers = []

    for idx, num_layers in enumerate(block_config):
        from_layer = _dense_block(from_layer, num_layers, growth_rates[idx],
                                  bottleneck_widths[idx],
                                  'stage{}'.format(idx + 1))
        total_filter = total_filters[idx]
        if idx == len(block_config) - 1:
            with_pooling = False
        else:
            with_pooling = True

        from_layer = _transition_block(from_layer, total_filter, with_pooling,
                                       'stage{}_tb'.format(idx + 1))
        if idx >= 1:
            feat_layers.append(from_layer)


#######################################################
    stage2_tb = from_layer.get_internals()['stage2_tb/relu_output']
    stage4_tb_ext_pm2_b2a = _conv_block(stage2_tb, 128, 1, 1, 0,
                                        'stage4_tb_ext_pm2_b2a')
    stage4_tb_ext_pm2_b2b = _conv_block(stage4_tb_ext_pm2_b2a, 128, 1, 1, 0,
                                        'stage4_tb_ext_pm2_b2b')
    stage4_tb_ext_pm2_b2c = _conv_block(stage4_tb_ext_pm2_b2b, 256, 1, 1, 0,
                                        'stage4_tb_ext_pm2_b2c')

    stage4_tb_ext_pm2 = _conv_block(stage2_tb, 256, 1, 1, 0,
                                    'stage4_tb_ext_pm2')

    stage4_tb_ext_pm2_res = mx.sym.broadcast_add(stage4_tb_ext_pm2,
                                                 stage4_tb_ext_pm2_b2c)
    stage4_tb_ext_pm2_res_relu = mx.sym.Activation(
        data=stage4_tb_ext_pm2_res,
        act_type='relu',
        name='stage4_tb_ext_pm2_res/relu')

    stage3_tb = from_layer.get_internals()['stage3_tb/relu_output']
    stage4_tb_ext_pm3_b2a = _conv_block(stage3_tb, 128, 1, 1, 0,
                                        'stage4_tb_ext_pm3_b2a')
    stage4_tb_ext_pm3_b2b = _conv_block(stage4_tb_ext_pm3_b2a, 128, 1, 1, 0,
                                        'stage4_tb_ext_pm3_b2b')
    stage4_tb_ext_pm3_b2c = _conv_block(stage4_tb_ext_pm3_b2b, 256, 1, 1, 0,
                                        'stage4_tb_ext_pm3_b2c')

    stage4_tb_ext_pm3 = _conv_block(stage3_tb, 256, 1, 1, 0,
                                    'stage4_tb_ext_pm3')

    stage4_tb_ext_pm3_res = mx.sym.broadcast_add(stage4_tb_ext_pm3,
                                                 stage4_tb_ext_pm3_b2c)
    stage4_tb_ext_pm3_res_relu = mx.sym.Activation(
        data=stage4_tb_ext_pm3_res,
        act_type='relu',
        name='stage4_tb_ext_pm3_res/relu')

    stage4_tb_relu_ext1_fe1_1 = _conv_block(stage3_tb, 256, 1, 1, 0,
                                            'stage4_tb_relu_ext1_fe1_1')
    ext1_fe1_2 = _conv_block(stage4_tb_relu_ext1_fe1_1, 256, 3, 2, 1,
                             'ext1_fe1_2')

    stage4_tb_ext_pm4_b2a = _conv_block(ext1_fe1_2, 128, 1, 1, 0,
                                        'stage4_tb_ext_pm4_b2a')
    stage4_tb_ext_pm4_b2b = _conv_block(stage4_tb_ext_pm4_b2a, 128, 1, 1, 0,
                                        'stage4_tb_ext_pm4_b2b')
    stage4_tb_ext_pm4_b2c = _conv_block(stage4_tb_ext_pm4_b2b, 256, 1, 1, 0,
                                        'stage4_tb_ext_pm4_b2c')

    stage4_tb_ext_pm4 = _conv_block(ext1_fe1_2, 256, 1, 1, 0,
                                    'stage4_tb_ext_pm4')

    stage4_tb_ext_pm4_res = mx.sym.broadcast_add(stage4_tb_ext_pm4,
                                                 stage4_tb_ext_pm4_b2c)
    stage4_tb_ext_pm4_res_relu = mx.sym.Activation(
        data=stage4_tb_ext_pm4_res,
        act_type='relu',
        name='stage4_tb_ext_pm4_res/relu')

    #######################################################

    stage4_tb_ext_pm4_feat_deconv_pre = _conv_block(
        stage4_tb_ext_pm4_res_relu, 256, 1, 1, 0,
        'stage4/tb/ext/pm4/feat/deconv/pre')
    stage4_tb_ext_pm4_feat_deconv = _deconv_block(
        stage4_tb_ext_pm4_feat_deconv_pre, 256, 2, 2, 0,
        'stage4/tb/ext/pm4/feat/deconv')

    stage4_tb_ext_pm3_res_hyper = _conv_block(
        stage4_tb_ext_pm3_res_relu, 256, 1, 1, 0,
        'stage4_tb/ext/pm3/res/hyper/relu')

    stage4_tb_ext_pm3_feat = mx.sym.broadcast_add(
        stage4_tb_ext_pm3_res_hyper, stage4_tb_ext_pm4_feat_deconv)
    stage4_tb_ext_pm3_feat_relu = mx.sym.Activation(
        data=stage4_tb_ext_pm3_feat,
        act_type='relu',
        name='stage4/tb/ext/pm3/res/deconv/pre/relu')

    stage4_tb_ext_pm3_feat_deconv_pre = _conv_block(
        stage4_tb_ext_pm3_feat_relu, 256, 1, 1, 0,
        'stage4/tb/ext/pm3/feat/deconv/pre')
    stage4_tb_ext_pm3_feat_deconv = _deconv_block(
        stage4_tb_ext_pm3_feat_deconv_pre, 256, 2, 2, 0,
        'stage4/tb/ext/pm3/feat/deconv')

    stage4_tb_ext_pm2_res_hyper = _conv_block(
        stage4_tb_ext_pm2_res_relu, 256, 1, 1, 0,
        'stage4_tb/ext/pm2/res/hyper/relu')

    stage4_tb_ext_pm2_feat = mx.sym.broadcast_add(
        stage4_tb_ext_pm2_res_hyper, stage4_tb_ext_pm3_feat_deconv)
    stage4_tb_ext_pm2_feat_relu = mx.sym.Activation(
        data=stage4_tb_ext_pm2_feat,
        act_type='relu',
        name='stage4/tb/ext/pm2/res/deconv/pre/relu')

    #######################################################

    from_layers = [
        stage4_tb_ext_pm2_feat_relu, stage4_tb_ext_pm3_feat_relu,
        stage4_tb_ext_pm4_res_relu
    ]
    # sizes = [[0.1,0.16, 0.22], [0.3,0.38, 0.46], [0.56,0.66,0.76]]
    # ratios = [[0.25, 0.5, 1.0],[0.25, 0.5, 1.0,1.5], [0.33,0.5,1.0,1.5]]
    # sizes = [[0.02, 0.04, 0.06], [0.08, 0.10, 0.12], [0.14, 0.16, 0.2]]
    # ratios = [[0.8, 1.2, 1.8],[0.8, 1.2, 1.8, 2.1], [0.8, 1.2, 1.8, 2.1]]
    # sizes = [[0.03, 0.05, 0.08], [0.1, 0.12, 0.15], [0.18, 0.2, 0.25]]
    # ratios = [[2, 3.14, 3.6],[2, 3, 3.14, 3.6], [2, 3, 3.14, 3.6]]

    # sizes = [[0.02, 0.04, 0.06], [0.08, 0.10, 0.12], [0.14, 0.16, 0.2]]
    # ratios = [[0.8, 1.2, 1.8],[0.8, 1.2, 1.8, 2.1], [0.8, 1.2, 1.8, 2.1]]

    # 减少锚点框数量
    sizes = [[0.02, 0.04, 0.06], [0.08, 0.10, 0.12], [0.14, 0.16, 0.2]]
    ratios = [[0.8, 1.2, 1.8], [0.8, 1.2, 1.8, 2.1], [0.8, 1.2, 1.8, 2.1]]
    normalizations = [-1, -1, -1]
    steps = []
    num_channels = [-1, -1, -1]
    loc_preds, cls_preds, anchor_boxes = multibox_layer(from_layers, \
     num_classes, sizes=sizes, ratios=ratios, normalization=normalizations, \
     num_channels=num_channels, clip=False, interm_layer=0, steps=steps)

    tmp = mx.symbol.contrib.MultiBoxTarget(
     *[anchor_boxes, label, cls_preds], overlap_threshold=.5, \
     ignore_label=-1, negative_mining_ratio=3, minimum_negative_samples=0, \
     negative_mining_thresh=.5, variances=(0.1, 0.1, 0.2, 0.2),
     name="multibox_target")
    loc_target = tmp[0]
    loc_target_mask = tmp[1]
    cls_target = tmp[2]

    cls_prob = mx.symbol.SoftmaxOutput(data=cls_preds, label=cls_target, \
     ignore_label=-1, use_ignore=True, grad_scale=1., multi_output=True, \
     normalization='valid', name="cls_prob")
    loc_loss_ = mx.symbol.smooth_l1(name="loc_loss_", \
     data=loc_target_mask * (loc_preds - loc_target), scalar=1.0)
    loc_loss = mx.symbol.MakeLoss(loc_loss_, grad_scale=1., \
     normalization='valid', name="loc_loss")

    cls_label = mx.symbol.MakeLoss(data=cls_target,
                                   grad_scale=0,
                                   name="cls_label")
    det = mx.symbol.contrib.MultiBoxDetection(*[cls_prob, loc_preds, anchor_boxes], \
     name="detection", nms_threshold=nms_thresh, force_suppress=force_suppress,
     variances=(0.1, 0.1, 0.2, 0.2), nms_topk=nms_topk)
    det = mx.symbol.MakeLoss(data=det, grad_scale=0, name="det_out")

    out = mx.symbol.Group([cls_prob, loc_loss, cls_label, det])

    return out
示例#2
0
def get_symbol_train(network,
                     num_classes,
                     from_layers,
                     num_filters,
                     strides,
                     pads,
                     sizes,
                     ratios,
                     normalizations=-1,
                     steps=[],
                     min_filter=128,
                     square_bb=False,
                     per_cls_reg=False,
                     nms_thresh=0.5,
                     force_suppress=False,
                     nms_topk=400,
                     **kwargs):
    """Build network symbol for training SSD

    Parameters
    ----------
    network : str
        base network symbol name
    num_classes : int
        number of object classes not including background
    from_layers : list of str
        feature extraction layers, use '' for add extra layers
        For example:
        from_layers = ['relu4_3', 'fc7', '', '', '', '']
        which means extract feature from relu4_3 and fc7, adding 4 extra layers
        on top of fc7
    num_filters : list of int
        number of filters for extra layers, you can use -1 for extracted features,
        however, if normalization and scale is applied, the number of filter for
        that layer must be provided.
        For example:
        num_filters = [512, -1, 512, 256, 256, 256]
    strides : list of int
        strides for the 3x3 convolution appended, -1 can be used for extracted
        feature layers
    pads : list of int
        paddings for the 3x3 convolution, -1 can be used for extracted layers
    sizes : list or list of list
        [min_size, max_size] for all layers or [[], [], []...] for specific layers
    ratios : list or list of list
        [ratio1, ratio2...] for all layers or [[], [], ...] for specific layers
    normalizations : int or list of int
        use normalizations value for all layers or [...] for specific layers,
        -1 indicate no normalizations and scales
    steps : list
        specify steps for each MultiBoxPrior layer, leave empty, it will calculate
        according to layer dimensions
    min_filter : int
        minimum number of filters used in 1x1 convolution
    nms_thresh : float
        non-maximum suppression threshold
    force_suppress : boolean
        whether suppress different class objects
    nms_topk : int
        apply NMS to top K detections

    Returns
    -------
    mx.Symbol

    """
    use_python_layer = True
    use_focal_loss = cfg.train['use_focal_loss']
    use_smooth_ce = cfg.train['use_smooth_ce']

    label = mx.sym.Variable('label')
    if not 'use_global_stats' in kwargs:
        import ipdb
        ipdb.set_trace()
        kwargs['use_global_stats'] = 0

    mimic_fc = 0 if not 'mimic_fc' in kwargs else kwargs['mimic_fc']
    python_anchor = False if not 'python_anchor' in kwargs else kwargs[
        'python_anchor']
    dense_vh = False if not 'dense_vh' in kwargs else kwargs['dense_vh']
    data_shape = (0, 0) if not 'data_shape' in kwargs else kwargs['data_shape']
    if isinstance(data_shape, int):
        data_shape = (data_shape, data_shape)
    ignore_labels = [] if not 'ignore_labels' in kwargs else kwargs[
        'ignore_labels']

    body = import_module(network).get_symbol(num_classes, **kwargs)
    layers = multi_layer_feature(body,
                                 from_layers,
                                 num_filters,
                                 strides,
                                 pads,
                                 min_filter=min_filter)

    loc_preds, cls_preds, anchor_boxes = multibox_layer(layers, \
        num_classes, sizes=sizes, ratios=ratios, normalization=normalizations, \
        num_channels=num_filters, clip=False, interm_layer=0, steps=steps, dense_vh=dense_vh, \
        data_shape=data_shape, per_cls_reg=per_cls_reg, mimic_fc=mimic_fc, python_anchor=python_anchor)

    if use_python_layer:
        neg_ratio = -1 if use_focal_loss else 3
        th_small = 0.04 if not 'th_small' in kwargs else kwargs['th_small']
        cls_probs = mx.sym.SoftmaxActivation(cls_preds, mode='channel')
        tmp = mx.sym.Custom(*[anchor_boxes, label, cls_probs],
                            name='multibox_target',
                            op_type='multibox_target',
                            ignore_labels=ignore_labels,
                            per_cls_reg=per_cls_reg,
                            hard_neg_ratio=neg_ratio,
                            th_small=th_small,
                            square_bb=square_bb)
    else:
        assert not per_cls_reg
        neg_ratio = -1 if use_focal_loss else 3
        tmp = mx.contrib.symbol.MultiBoxTarget(
            *[anchor_boxes, label, cls_preds], overlap_threshold=.5, \
            ignore_label=-1, negative_mining_ratio=neg_ratio, minimum_negative_samples=0, \
            negative_mining_thresh=.4, variances=(0.1, 0.1, 0.2, 0.2),
            name="multibox_target")
    loc_target = tmp[0]
    loc_target_mask = tmp[1]
    cls_target = tmp[2]
    if not use_python_layer:
        cls_target = mx.sym.reshape(cls_target, (0, 1, -1))
    # match_info = tmp[3]

    if use_focal_loss:
        gamma = cfg.train['focal_loss_gamma']
        alpha = cfg.train['focal_loss_alpha']
        cls_prob = mx.sym.SoftmaxActivation(cls_preds, mode='channel')
        if not use_smooth_ce:
            cls_loss = mx.sym.Custom(cls_preds,
                                     cls_prob,
                                     cls_target,
                                     op_type='reweight_loss',
                                     name='cls_loss',
                                     gamma=gamma,
                                     alpha=alpha,
                                     normalize=True)
        else:
            th_prob = cfg.train['smooth_ce_th']  # / float(num_classes)
            w_reg = cfg.train['smooth_ce_lambda'] * float(num_classes)
            var_th_prob = mx.sym.var(name='th_prob_sce', shape=(1,), dtype=np.float32, \
                    init=mx.init.Constant(np.log(th_prob)))
            var_th_prob = mx.sym.exp(var_th_prob)
            cls_loss = mx.sym.Custom(cls_preds,
                                     cls_prob,
                                     cls_target,
                                     var_th_prob,
                                     op_type='smoothed_focal_loss',
                                     name='cls_loss',
                                     gamma=gamma,
                                     alpha=alpha,
                                     th_prob=th_prob,
                                     w_reg=w_reg,
                                     normalize=True)
        # cls_loss = mx.sym.MakeLoss(cls_loss, grad_scale=1.0, name='cls_loss')
    elif use_smooth_ce:
        th_prob = cfg.train['smooth_ce_th']
        cls_prob = mx.sym.SoftmaxActivation(cls_preds, mode='channel')
        cls_loss = mx.sym.Custom(cls_preds,
                                 cls_prob,
                                 cls_target,
                                 op_type='smoothed_softmax_loss',
                                 name='cls_loss',
                                 th_prob=th_prob,
                                 normalization='valid')
    else:
        cls_loss = mx.symbol.SoftmaxOutput(data=cls_preds, label=cls_target, \
            ignore_label=-1, use_ignore=True, grad_scale=1., multi_output=True, \
            normalization='valid', name="cls_loss")
    # loc_preds = mx.sym.Activation(loc_preds, act_type='tanh')
    # loc_loss_ = mx.sym.square(name='loc_loss_', \
    #         data=loc_target_mask * (loc_preds - loc_target)) * 10.0
    loc_loss_ = mx.symbol.smooth_l1(name="loc_loss_", \
        data=loc_target_mask * (loc_preds - loc_target), scalar=1.0)
    loc_loss = mx.symbol.MakeLoss(loc_loss_, grad_scale=cfg.train['smoothl1_weight'], \
        normalization='valid', name="loc_loss")

    # monitoring training status
    cls_label = mx.sym.BlockGrad(cls_target, name="cls_label")
    loc_label = mx.sym.BlockGrad(loc_target_mask, name='loc_label')
    #
    # cls_prob = mx.sym.slice_axis(cls_prob, axis=1, begin=1, end=None)
    # det = mx.sym.Custom(cls_prob, loc_preds, anchor_boxes, name='detection', op_type='multibox_detection',
    #         th_pos=cfg.valid['th_pos'], th_nms=cfg.valid['th_nms'])
    #
    det = mx.contrib.symbol.MultiBoxDetection(*[cls_loss, loc_preds, anchor_boxes], \
        name="detection", nms_threshold=nms_thresh, force_suppress=force_suppress,
        variances=(0.1, 0.1, 0.2, 0.2), nms_topk=nms_topk)
    #
    det = mx.symbol.MakeLoss(data=det, grad_scale=0, name="det_out")

    # group output
    out = [cls_loss, loc_loss, cls_label, loc_label, det]
    # out = [cls_loss, loc_loss, cls_label, loc_label, det, match_info]
    if use_focal_loss and use_smooth_ce:
        out.append(mx.sym.BlockGrad(var_th_prob))
    return mx.sym.Group(out)
示例#3
0
def get_symbol(network,
               num_classes,
               from_layers,
               num_filters,
               sizes,
               ratios,
               strides,
               pads,
               normalizations=-1,
               steps=[],
               min_filter=128,
               per_cls_reg=False,
               nms_thresh=0.5,
               force_suppress=False,
               nms_topk=400,
               **kwargs):
    """Build network for testing SSD

    Parameters
    ----------
    network : str
        base network symbol name
    num_classes : int
        number of object classes not including background
    from_layers : list of str
        feature extraction layers, use '' for add extra layers
        For example:
        from_layers = ['relu4_3', 'fc7', '', '', '', '']
        which means extract feature from relu4_3 and fc7, adding 4 extra layers
        on top of fc7
    num_filters : list of int
        number of filters for extra layers, you can use -1 for extracted features,
        however, if normalization and scale is applied, the number of filter for
        that layer must be provided.
        For example:
        num_filters = [512, -1, 512, 256, 256, 256]
    strides : list of int
        strides for the 3x3 convolution appended, -1 can be used for extracted
        feature layers
    pads : list of int
        paddings for the 3x3 convolution, -1 can be used for extracted layers
    sizes : list or list of list
        [min_size, max_size] for all layers or [[], [], []...] for specific layers
    ratios : list or list of list
        [ratio1, ratio2...] for all layers or [[], [], ...] for specific layers
    normalizations : int or list of int
        use normalizations value for all layers or [...] for specific layers,
        -1 indicate no normalizations and scales
    steps : list
        specify steps for each MultiBoxPrior layer, leave empty, it will calculate
        according to layer dimensions
    min_filter : int
        minimum number of filters used in 1x1 convolution
    nms_thresh : float
        non-maximum suppression threshold
    force_suppress : boolean
        whether suppress different class objects
    nms_topk : int
        apply NMS to top K detections

    Returns
    -------
    mx.Symbol

    """
    data_shape = (0, 0) if not 'data_shape' in kwargs else kwargs['data_shape']
    if isinstance(data_shape, int):
        data_shape = (data_shape, data_shape)
    mimic_fc = 0 if not 'mimic_fc' in kwargs else kwargs['mimic_fc']
    python_anchor = False if not 'python_anchor' in kwargs else kwargs[
        'python_anchor']
    dense_vh = False if not 'dense_vh' in kwargs else kwargs['dense_vh']

    kwargs['use_global_stats'] = True
    body = import_module(network).get_symbol(num_classes, **kwargs)
    layers = multi_layer_feature(body,
                                 from_layers,
                                 num_filters,
                                 strides,
                                 pads,
                                 min_filter=min_filter)

    loc_preds, cls_preds, anchor_boxes = multibox_layer(layers, \
        num_classes, sizes=sizes, ratios=ratios, normalization=normalizations, \
        num_channels=num_filters, clip=False, interm_layer=0, steps=steps, dense_vh=dense_vh, \
        data_shape=data_shape, per_cls_reg=per_cls_reg, mimic_fc=mimic_fc, python_anchor=python_anchor)
    # body = import_module(network).get_symbol(num_classes, **kwargs)
    # layers = multi_layer_feature(body, from_layers, num_filters, strides, pads,
    #     min_filter=min_filter)
    #
    # loc_preds, cls_preds, anchor_boxes = multibox_layer(layers, \
    #     num_classes, sizes=sizes, ratios=ratios, normalization=normalizations, \
    #     num_channels=num_filters, clip=False, interm_layer=0, steps=steps)

    cls_prob = mx.symbol.SoftmaxActivation(data=cls_preds,
                                           mode='channel',
                                           name='cls_prob')
    ###
    cls_prob = mx.sym.slice_axis(cls_prob, axis=1, begin=1, end=None)
    out = mx.sym.Custom(cls_prob,
                        loc_preds,
                        anchor_boxes,
                        name='detection',
                        op_type='multibox_detection',
                        th_pos=cfg.valid['th_pos'],
                        th_nms=cfg.valid['th_nms'],
                        per_cls_reg=per_cls_reg)
    ###
    # out = mx.contrib.symbol.MultiBoxDetection(*[cls_prob, loc_preds, anchor_boxes], \
    #         name="detection", nms_threshold=nms_thresh, force_suppress=force_suppress,
    #         variances=(0.1, 0.1, 0.2, 0.2), nms_topk=nms_topk, clip=False)
    ###
    return out
def get_symbol_train(num_classes=20, nms_thresh=0.5, force_suppress=False,
                     nms_topk=400, **kwargs):
    """
    Single-shot multi-box detection with VGG 16 layers ConvNet
    This is a modified version, with fc6/fc7 layers replaced by conv layers
    And the network is slightly smaller than original VGG 16 network
    This is a training network with losses

    Parameters:
    ----------
    num_classes: int
        number of object classes not including background
    nms_thresh : float
        non-maximum suppression threshold
    force_suppress : boolean
        whether suppress different class objects
    nms_topk : int
        apply NMS to top K detections

    Returns:
    ----------
    mx.Symbol
    """
    data = mx.symbol.Variable(name="data")
    label = mx.symbol.Variable(name="label")

    # group 1
    conv1_1 = mx.symbol.Convolution(
        data=data, kernel=(3, 3), pad=(1, 1), num_filter=64, name="conv1_1")
    relu1_1 = mx.symbol.Activation(data=conv1_1, act_type="relu", name="relu1_1")
    conv1_2 = mx.symbol.Convolution(
        data=relu1_1, kernel=(3, 3), pad=(1, 1), num_filter=64, name="conv1_2")
    relu1_2 = mx.symbol.Activation(data=conv1_2, act_type="relu", name="relu1_2")
    pool1 = mx.symbol.Pooling(
        data=relu1_2, pool_type="max", kernel=(2, 2), stride=(2, 2), name="pool1")
    # group 2
    conv2_1 = mx.symbol.Convolution(
        data=pool1, kernel=(3, 3), pad=(1, 1), num_filter=128, name="conv2_1")
    relu2_1 = mx.symbol.Activation(data=conv2_1, act_type="relu", name="relu2_1")
    conv2_2 = mx.symbol.Convolution(
        data=relu2_1, kernel=(3, 3), pad=(1, 1), num_filter=128, name="conv2_2")
    relu2_2 = mx.symbol.Activation(data=conv2_2, act_type="relu", name="relu2_2")
    pool2 = mx.symbol.Pooling(
        data=relu2_2, pool_type="max", kernel=(2, 2), stride=(2, 2), name="pool2")
    # group 3
    conv3_1 = mx.symbol.Convolution(
        data=pool2, kernel=(3, 3), pad=(1, 1), num_filter=256, name="conv3_1")
    relu3_1 = mx.symbol.Activation(data=conv3_1, act_type="relu", name="relu3_1")
    conv3_2 = mx.symbol.Convolution(
        data=relu3_1, kernel=(3, 3), pad=(1, 1), num_filter=256, name="conv3_2")
    relu3_2 = mx.symbol.Activation(data=conv3_2, act_type="relu", name="relu3_2")
    conv3_3 = mx.symbol.Convolution(
        data=relu3_2, kernel=(3, 3), pad=(1, 1), num_filter=256, name="conv3_3")
    relu3_3 = mx.symbol.Activation(data=conv3_3, act_type="relu", name="relu3_3")
    pool3 = mx.symbol.Pooling(
        data=relu3_3, pool_type="max", kernel=(2, 2), stride=(2, 2), \
        pooling_convention="full", name="pool3")
    # group 4
    conv4_1 = mx.symbol.Convolution(
        data=pool3, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv4_1")
    relu4_1 = mx.symbol.Activation(data=conv4_1, act_type="relu", name="relu4_1")
    conv4_2 = mx.symbol.Convolution(
        data=relu4_1, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv4_2")
    relu4_2 = mx.symbol.Activation(data=conv4_2, act_type="relu", name="relu4_2")
    conv4_3 = mx.symbol.Convolution(
        data=relu4_2, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv4_3")
    relu4_3 = mx.symbol.Activation(data=conv4_3, act_type="relu", name="relu4_3")
    pool4 = mx.symbol.Pooling(
        data=relu4_3, pool_type="max", kernel=(2, 2), stride=(2, 2), name="pool4")
    # group 5
    conv5_1 = mx.symbol.Convolution(
        data=pool4, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv5_1")
    relu5_1 = mx.symbol.Activation(data=conv5_1, act_type="relu", name="relu5_1")
    conv5_2 = mx.symbol.Convolution(
        data=relu5_1, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv5_2")
    relu5_2 = mx.symbol.Activation(data=conv5_2, act_type="relu", name="relu5_2")
    conv5_3 = mx.symbol.Convolution(
        data=relu5_2, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv5_3")
    relu5_3 = mx.symbol.Activation(data=conv5_3, act_type="relu", name="relu5_3")
    pool5 = mx.symbol.Pooling(
        data=relu5_3, pool_type="max", kernel=(3, 3), stride=(1, 1),
        pad=(1,1), name="pool5")
    # group 6
    conv6 = mx.symbol.Convolution(
        data=pool5, kernel=(3, 3), pad=(6, 6), dilate=(6, 6),
        num_filter=1024, name="conv6")
    relu6 = mx.symbol.Activation(data=conv6, act_type="relu", name="relu6")
    # drop6 = mx.symbol.Dropout(data=relu6, p=0.5, name="drop6")
    # group 7
    conv7 = mx.symbol.Convolution(
        data=relu6, kernel=(1, 1), pad=(0, 0), num_filter=1024, name="conv7")
    relu7 = mx.symbol.Activation(data=conv7, act_type="relu", name="relu7")
    # drop7 = mx.symbol.Dropout(data=relu7, p=0.5, name="drop7")

    ### ssd extra layers ###
    conv8_1, relu8_1 = legacy_conv_act_layer(relu7, "8_1", 256, kernel=(1,1), pad=(0,0), \
        stride=(1,1), act_type="relu", use_batchnorm=False)
    conv8_2, relu8_2 = legacy_conv_act_layer(relu8_1, "8_2", 512, kernel=(3,3), pad=(1,1), \
        stride=(2,2), act_type="relu", use_batchnorm=False)
    conv9_1, relu9_1 = legacy_conv_act_layer(relu8_2, "9_1", 128, kernel=(1,1), pad=(0,0), \
        stride=(1,1), act_type="relu", use_batchnorm=False)
    conv9_2, relu9_2 = legacy_conv_act_layer(relu9_1, "9_2", 256, kernel=(3,3), pad=(1,1), \
        stride=(2,2), act_type="relu", use_batchnorm=False)
    conv10_1, relu10_1 = legacy_conv_act_layer(relu9_2, "10_1", 128, kernel=(1,1), pad=(0,0), \
        stride=(1,1), act_type="relu", use_batchnorm=False)
    conv10_2, relu10_2 = legacy_conv_act_layer(relu10_1, "10_2", 256, kernel=(3,3), pad=(0,0), \
        stride=(1,1), act_type="relu", use_batchnorm=False)
    conv11_1, relu11_1 = legacy_conv_act_layer(relu10_2, "11_1", 128, kernel=(1,1), pad=(0,0), \
        stride=(1,1), act_type="relu", use_batchnorm=False)
    conv11_2, relu11_2 = legacy_conv_act_layer(relu11_1, "11_2", 256, kernel=(3,3), pad=(0,0), \
        stride=(1,1), act_type="relu", use_batchnorm=False)

    # specific parameters for VGG16 network
    from_layers = [relu4_3, relu7, relu8_2, relu9_2, relu10_2, relu11_2]
    sizes = [[.1, .141], [.2,.272], [.37, .447], [.54, .619], [.71, .79], [.88, .961]]
    ratios = [[1,2,.5], [1,2,.5,3,1./3], [1,2,.5,3,1./3], [1,2,.5,3,1./3], \
        [1,2,.5], [1,2,.5]]
    normalizations = [20, -1, -1, -1, -1, -1]
    steps = [ x / 300.0 for x in [8, 16, 32, 64, 100, 300]]
    num_channels = [512]

    loc_preds, cls_preds, anchor_boxes = multibox_layer(from_layers, \
        num_classes, sizes=sizes, ratios=ratios, normalization=normalizations, \
        num_channels=num_channels, clip=False, interm_layer=0, steps=steps)

    tmp = mx.symbol.contrib.MultiBoxTarget(
        *[anchor_boxes, label, cls_preds], overlap_threshold=.5, \
        ignore_label=-1, negative_mining_ratio=3, minimum_negative_samples=0, \
        negative_mining_thresh=.5, variances=(0.1, 0.1, 0.2, 0.2),
        name="multibox_target")
    loc_target = tmp[0]
    loc_target_mask = tmp[1]
    cls_target = tmp[2]

    cls_prob = mx.symbol.SoftmaxOutput(data=cls_preds, label=cls_target, \
        ignore_label=-1, use_ignore=True, grad_scale=1., multi_output=True, \
        normalization='valid', name="cls_prob")
    loc_loss_ = mx.symbol.smooth_l1(name="loc_loss_", \
        data=loc_target_mask * (loc_preds - loc_target), scalar=1.0)
    loc_loss = mx.symbol.MakeLoss(loc_loss_, grad_scale=1., \
        normalization='valid', name="loc_loss")

    # monitoring training status
    cls_label = mx.symbol.MakeLoss(data=cls_target, grad_scale=0, name="cls_label")
    det = mx.symbol.contrib.MultiBoxDetection(*[cls_prob, loc_preds, anchor_boxes], \
        name="detection", nms_threshold=nms_thresh, force_suppress=force_suppress,
        variances=(0.1, 0.1, 0.2, 0.2), nms_topk=nms_topk)
    det = mx.symbol.MakeLoss(data=det, grad_scale=0, name="det_out")

    # group output
    out = mx.symbol.Group([cls_prob, loc_loss, cls_label, det])
    return out
示例#5
0
def get_symbol_train(num_classes=20, nms_thresh=0.5, force_suppress=False,
                     nms_topk=400, **kwargs):
    """
    Single-shot multi-box detection with VGG 16 layers ConvNet
    This is a modified version, with fc6/fc7 layers replaced by conv layers
    And the network is slightly smaller than original VGG 16 network
    This is a training network with losses

    Parameters:
    ----------
    num_classes: int
        number of object classes not including background
    nms_thresh : float
        non-maximum suppression threshold
    force_suppress : boolean
        whether suppress different class objects
    nms_topk : int
        apply NMS to top K detections

    Returns:
    ----------
    mx.Symbol
    """
    data = mx.symbol.Variable(name="data")
    label = mx.symbol.Variable(name="label")

    # group 1
    conv1_1 = mx.symbol.Convolution(
        data=data, kernel=(3, 3), pad=(1, 1), num_filter=64, name="conv1_1")
    relu1_1 = mx.symbol.Activation(data=conv1_1, act_type="relu", name="relu1_1")
    conv1_2 = mx.symbol.Convolution(
        data=relu1_1, kernel=(3, 3), pad=(1, 1), num_filter=64, name="conv1_2")
    relu1_2 = mx.symbol.Activation(data=conv1_2, act_type="relu", name="relu1_2")
    pool1 = mx.symbol.Pooling(
        data=relu1_2, pool_type="max", kernel=(2, 2), stride=(2, 2), name="pool1")
    # group 2
    conv2_1 = mx.symbol.Convolution(
        data=pool1, kernel=(3, 3), pad=(1, 1), num_filter=128, name="conv2_1")
    relu2_1 = mx.symbol.Activation(data=conv2_1, act_type="relu", name="relu2_1")
    conv2_2 = mx.symbol.Convolution(
        data=relu2_1, kernel=(3, 3), pad=(1, 1), num_filter=128, name="conv2_2")
    relu2_2 = mx.symbol.Activation(data=conv2_2, act_type="relu", name="relu2_2")
    pool2 = mx.symbol.Pooling(
        data=relu2_2, pool_type="max", kernel=(2, 2), stride=(2, 2), name="pool2")
    # group 3
    conv3_1 = mx.symbol.Convolution(
        data=pool2, kernel=(3, 3), pad=(1, 1), num_filter=256, name="conv3_1")
    relu3_1 = mx.symbol.Activation(data=conv3_1, act_type="relu", name="relu3_1")
    conv3_2 = mx.symbol.Convolution(
        data=relu3_1, kernel=(3, 3), pad=(1, 1), num_filter=256, name="conv3_2")
    relu3_2 = mx.symbol.Activation(data=conv3_2, act_type="relu", name="relu3_2")
    conv3_3 = mx.symbol.Convolution(
        data=relu3_2, kernel=(3, 3), pad=(1, 1), num_filter=256, name="conv3_3")
    relu3_3 = mx.symbol.Activation(data=conv3_3, act_type="relu", name="relu3_3")
    pool3 = mx.symbol.Pooling(
        data=relu3_3, pool_type="max", kernel=(2, 2), stride=(2, 2), \
        pooling_convention="full", name="pool3")
    # group 4
    conv4_1 = mx.symbol.Convolution(
        data=pool3, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv4_1")
    relu4_1 = mx.symbol.Activation(data=conv4_1, act_type="relu", name="relu4_1")
    conv4_2 = mx.symbol.Convolution(
        data=relu4_1, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv4_2")
    relu4_2 = mx.symbol.Activation(data=conv4_2, act_type="relu", name="relu4_2")
    conv4_3 = mx.symbol.Convolution(
        data=relu4_2, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv4_3")
    relu4_3 = mx.symbol.Activation(data=conv4_3, act_type="relu", name="relu4_3")
    pool4 = mx.symbol.Pooling(
        data=relu4_3, pool_type="max", kernel=(2, 2), stride=(2, 2), name="pool4")
    # group 5
    conv5_1 = mx.symbol.Convolution(
        data=pool4, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv5_1")
    relu5_1 = mx.symbol.Activation(data=conv5_1, act_type="relu", name="relu5_1")
    conv5_2 = mx.symbol.Convolution(
        data=relu5_1, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv5_2")
    relu5_2 = mx.symbol.Activation(data=conv5_2, act_type="relu", name="relu5_2")
    conv5_3 = mx.symbol.Convolution(
        data=relu5_2, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv5_3")
    relu5_3 = mx.symbol.Activation(data=conv5_3, act_type="relu", name="relu5_3")
    pool5 = mx.symbol.Pooling(
        data=relu5_3, pool_type="max", kernel=(3, 3), stride=(1, 1),
        pad=(1,1), name="pool5")
    # group 6
    conv6 = mx.symbol.Convolution(
        data=pool5, kernel=(3, 3), pad=(6, 6), dilate=(6, 6),
        num_filter=1024, name="conv6")
    relu6 = mx.symbol.Activation(data=conv6, act_type="relu", name="relu6")
    # drop6 = mx.symbol.Dropout(data=relu6, p=0.5, name="drop6")
    # group 7
    conv7 = mx.symbol.Convolution(
        data=relu6, kernel=(1, 1), pad=(0, 0), num_filter=1024, name="conv7")
    relu7 = mx.symbol.Activation(data=conv7, act_type="relu", name="relu7")
    # drop7 = mx.symbol.Dropout(data=relu7, p=0.5, name="drop7")

    ### ssd extra layers ###
    conv8_1, relu8_1 = legacy_conv_act_layer(relu7, "8_1", 256, kernel=(1,1), pad=(0,0), \
        stride=(1,1), act_type="relu", use_batchnorm=False)
    conv8_2, relu8_2 = legacy_conv_act_layer(relu8_1, "8_2", 512, kernel=(3,3), pad=(1,1), \
        stride=(2,2), act_type="relu", use_batchnorm=False)
    conv9_1, relu9_1 = legacy_conv_act_layer(relu8_2, "9_1", 128, kernel=(1,1), pad=(0,0), \
        stride=(1,1), act_type="relu", use_batchnorm=False)
    conv9_2, relu9_2 = legacy_conv_act_layer(relu9_1, "9_2", 256, kernel=(3,3), pad=(1,1), \
        stride=(2,2), act_type="relu", use_batchnorm=False)
    conv10_1, relu10_1 = legacy_conv_act_layer(relu9_2, "10_1", 128, kernel=(1,1), pad=(0,0), \
        stride=(1,1), act_type="relu", use_batchnorm=False)
    conv10_2, relu10_2 = legacy_conv_act_layer(relu10_1, "10_2", 256, kernel=(3,3), pad=(0,0), \
        stride=(1,1), act_type="relu", use_batchnorm=False)
    conv11_1, relu11_1 = legacy_conv_act_layer(relu10_2, "11_1", 128, kernel=(1,1), pad=(0,0), \
        stride=(1,1), act_type="relu", use_batchnorm=False)
    conv11_2, relu11_2 = legacy_conv_act_layer(relu11_1, "11_2", 256, kernel=(3,3), pad=(0,0), \
        stride=(1,1), act_type="relu", use_batchnorm=False)

    # specific parameters for VGG16 network
    from_layers = [relu4_3, relu7, relu8_2, relu9_2, relu10_2, relu11_2]
    sizes = [[.1, .141], [.2,.272], [.37, .447], [.54, .619], [.71, .79], [.88, .961]]
    ratios = [[1,2,.5], [1,2,.5,3,1./3], [1,2,.5,3,1./3], [1,2,.5,3,1./3], \
        [1,2,.5], [1,2,.5]]
    normalizations = [20, -1, -1, -1, -1, -1]
    steps = [ x / 300.0 for x in [8, 16, 32, 64, 100, 300]]
    num_channels = [512]

    loc_preds, cls_preds, anchor_boxes = multibox_layer(from_layers, \
        num_classes, sizes=sizes, ratios=ratios, normalization=normalizations, \
        num_channels=num_channels, clip=False, interm_layer=0, steps=steps)

    tmp = mx.symbol.contrib.MultiBoxTarget(
        *[anchor_boxes, label, cls_preds], overlap_threshold=.5, \
        ignore_label=-1, negative_mining_ratio=3, minimum_negative_samples=0, \
        negative_mining_thresh=.5, variances=(0.1, 0.1, 0.2, 0.2),
        name="multibox_target")
    loc_target = tmp[0]
    loc_target_mask = tmp[1]
    cls_target = tmp[2]

    cls_prob = mx.symbol.SoftmaxOutput(data=cls_preds, label=cls_target, \
        ignore_label=-1, use_ignore=True, grad_scale=1., multi_output=True, \
        normalization='valid', name="cls_prob")
    loc_loss_ = mx.symbol.smooth_l1(name="loc_loss_", \
        data=loc_target_mask * (loc_preds - loc_target), scalar=1.0)
    loc_loss = mx.symbol.MakeLoss(loc_loss_, grad_scale=1., \
        normalization='valid', name="loc_loss")

    # monitoring training status
    cls_label = mx.symbol.MakeLoss(data=cls_target, grad_scale=0, name="cls_label")
    det = mx.symbol.contrib.MultiBoxDetection(*[cls_prob, loc_preds, anchor_boxes], \
        name="detection", nms_threshold=nms_thresh, force_suppress=force_suppress,
        variances=(0.1, 0.1, 0.2, 0.2), nms_topk=nms_topk)
    det = mx.symbol.MakeLoss(data=det, grad_scale=0, name="det_out")

    # group output
    out = mx.symbol.Group([cls_prob, loc_loss, cls_label, det])
    return out
def get_symbol_train(num_classes=1,
                     nms_thresh=0.5,
                     force_suppress=False,
                     nms_topk=400):
    """
    Single-shot multi-box detection with VGG 16 layers ConvNet
    This is a modified version, with fc6/fc7 layers replaced by conv layers
    And the network is slightly smaller than original VGG 16 network
    This is a training network with losses

    Parameters:
    ----------
    num_classes: int
        number of object classes not including background
    nms_thresh : float
        non-maximum suppression threshold
    force_suppress : boolean
        whether suppress different class objects
    nms_topk : int
        apply NMS to top K detections

    Returns:
    ----------
    mx.Symbol
    """
    data = mx.symbol.Variable(name="data")
    label = mx.symbol.Variable(name="label")

    # group 1
    conv1_1 = mx.symbol.Convolution(data=data,
                                    kernel=(3, 3),
                                    pad=(1, 1),
                                    num_filter=64,
                                    name="conv1_1")
    relu1_1 = mx.symbol.Activation(data=conv1_1,
                                   act_type="relu",
                                   name="relu1_1")
    conv1_2 = mx.symbol.Convolution(data=relu1_1,
                                    kernel=(3, 3),
                                    pad=(1, 1),
                                    num_filter=64,
                                    name="conv1_2")
    relu1_2 = mx.symbol.Activation(data=conv1_2,
                                   act_type="relu",
                                   name="relu1_2")
    pool1 = mx.symbol.Pooling(data=relu1_2,
                              pool_type="max",
                              kernel=(2, 2),
                              stride=(2, 2),
                              name="pool1")
    # group 2
    conv2_1 = mx.symbol.Convolution(data=pool1,
                                    kernel=(3, 3),
                                    pad=(1, 1),
                                    num_filter=128,
                                    name="conv2_1")
    relu2_1 = mx.symbol.Activation(data=conv2_1,
                                   act_type="relu",
                                   name="relu2_1")
    conv2_2 = mx.symbol.Convolution(data=relu2_1,
                                    kernel=(3, 3),
                                    pad=(1, 1),
                                    num_filter=128,
                                    name="conv2_2")
    relu2_2 = mx.symbol.Activation(data=conv2_2,
                                   act_type="relu",
                                   name="relu2_2")
    pool2 = mx.symbol.Pooling(data=relu2_2,
                              pool_type="max",
                              kernel=(2, 2),
                              stride=(2, 2),
                              name="pool2")
    # group 3
    conv3_1 = mx.symbol.Convolution(data=pool2,
                                    kernel=(3, 3),
                                    pad=(1, 1),
                                    num_filter=256,
                                    name="conv3_1")
    relu3_1 = mx.symbol.Activation(data=conv3_1,
                                   act_type="relu",
                                   name="relu3_1")
    conv3_2 = mx.symbol.Convolution(data=relu3_1,
                                    kernel=(3, 3),
                                    pad=(1, 1),
                                    num_filter=256,
                                    name="conv3_2")
    relu3_2 = mx.symbol.Activation(data=conv3_2,
                                   act_type="relu",
                                   name="relu3_2")
    conv3_3 = mx.symbol.Convolution(data=relu3_2,
                                    kernel=(3, 3),
                                    pad=(1, 1),
                                    num_filter=256,
                                    name="conv3_3")
    relu3_3 = mx.symbol.Activation(data=conv3_3,
                                   act_type="relu",
                                   name="relu3_3")
    pool3 = mx.symbol.Pooling(
        data=relu3_3, pool_type="max", kernel=(2, 2), stride=(2, 2), \
        pooling_convention="full", name="pool3")
    # group 4
    conv4_1 = mx.symbol.Convolution(data=pool3,
                                    kernel=(3, 3),
                                    pad=(1, 1),
                                    num_filter=512,
                                    name="conv4_1")
    relu4_1 = mx.symbol.Activation(data=conv4_1,
                                   act_type="relu",
                                   name="relu4_1")
    conv4_2 = mx.symbol.Convolution(data=relu4_1,
                                    kernel=(3, 3),
                                    pad=(1, 1),
                                    num_filter=512,
                                    name="conv4_2")
    relu4_2 = mx.symbol.Activation(data=conv4_2,
                                   act_type="relu",
                                   name="relu4_2")
    conv4_3 = mx.symbol.Convolution(data=relu4_2,
                                    kernel=(3, 3),
                                    pad=(1, 1),
                                    num_filter=512,
                                    name="conv4_3")
    relu4_3 = mx.symbol.Activation(data=conv4_3,
                                   act_type="relu",
                                   name="relu4_3")
    pool4 = mx.symbol.Pooling(data=relu4_3,
                              pool_type="max",
                              kernel=(2, 2),
                              stride=(2, 2),
                              name="pool4")
    # group 5
    conv5_1 = mx.symbol.Convolution(data=pool4,
                                    kernel=(3, 3),
                                    pad=(1, 1),
                                    num_filter=512,
                                    name="conv5_1")
    relu5_1 = mx.symbol.Activation(data=conv5_1,
                                   act_type="relu",
                                   name="relu5_1")
    conv5_2 = mx.symbol.Convolution(data=relu5_1,
                                    kernel=(3, 3),
                                    pad=(1, 1),
                                    num_filter=512,
                                    name="conv5_2")
    relu5_2 = mx.symbol.Activation(data=conv5_2,
                                   act_type="relu",
                                   name="relu5_2")
    conv5_3 = mx.symbol.Convolution(data=relu5_2,
                                    kernel=(3, 3),
                                    pad=(1, 1),
                                    num_filter=512,
                                    name="conv5_3")
    relu5_3 = mx.symbol.Activation(data=conv5_3,
                                   act_type="relu",
                                   name="relu5_3")
    pool5 = mx.symbol.Pooling(data=relu5_3,
                              pool_type="max",
                              kernel=(2, 2),
                              stride=(2, 2),
                              name='pool5')

    P5 = mx.symbol.Convolution(data=pool5,
                               num_filter=256,
                               kernel=(1, 1),
                               stride=(1, 1),
                               pad=(0, 0),
                               name='P5')

    P5_topdown = mx.symbol.Deconvolution(data=P5,
                                         num_filter=256,
                                         kernel=(4, 4),
                                         stride=(2, 2),
                                         pad=(1, 1),
                                         name='P5_topdown')
    P4_lateral = mx.symbol.Convolution(data=pool4,
                                       num_filter=256,
                                       kernel=(1, 1),
                                       stride=(1, 1),
                                       pad=(0, 0),
                                       name='P4_lateral')
    P4 = P4_lateral + P5_topdown

    P4_topdown = mx.symbol.Deconvolution(data=P4,
                                         num_filter=256,
                                         kernel=(4, 4),
                                         stride=(2, 2),
                                         pad=(1, 1),
                                         name='P4_topdown')
    P3_lateral = mx.symbol.Convolution(data=pool3,
                                       num_filter=256,
                                       kernel=(1, 1),
                                       stride=(1, 1),
                                       pad=(0, 0),
                                       name='P3_lateral')
    P3 = P3_lateral + P4_topdown

    P3_topdown = mx.symbol.Deconvolution(data=P3,
                                         num_filter=256,
                                         kernel=(4, 4),
                                         stride=(2, 2),
                                         pad=(1, 1),
                                         name='P3_topdown')
    P2_lateral = mx.symbol.Convolution(data=pool2,
                                       num_filter=256,
                                       kernel=(1, 1),
                                       stride=(1, 1),
                                       pad=(0, 0),
                                       name='P2_lateral')
    P2 = P3_topdown + P2_lateral

    # specific parameters for VGG16 network
    from_layers = [P2, P3, P4, P5]
    sizes = [[0.01, .03], [.05, .07], [.09, .11], [.13, .15]]
    ratios = [[
        1,
    ], [
        1,
    ], [
        1,
    ], [
        1,
    ]]
    normalizations = [20, -1, -1, -1]
    steps = [x / 640.0 for x in [4, 8, 16, 32]]
    num_channels = [256]

    loc_preds, cls_preds, anchor_boxes = multibox_layer(from_layers, \
        num_classes, sizes=sizes, ratios=ratios, normalization=normalizations, \
        num_channels=num_channels, clip=False, interm_layer=0, steps=steps)

    tmp = mx.contrib.symbol.MultiBoxTarget(
        *[anchor_boxes, label, cls_preds], overlap_threshold=.5, \
        ignore_label=-1, negative_mining_ratio=3, minimum_negative_samples=0, \
        negative_mining_thresh=.5, variances=(0.1, 0.1, 0.2, 0.2),
        name="multibox_target")
    loc_target = tmp[0]
    loc_target_mask = tmp[1]
    cls_target = tmp[2]

    cls_prob = mx.symbol.SoftmaxOutput(data=cls_preds, label=cls_target, \
        ignore_label=-1, use_ignore=True, grad_scale=1., multi_output=True, \
        normalization='valid', name="cls_prob")
    loc_loss_ = mx.symbol.smooth_l1(name="loc_loss_", \
        data=loc_target_mask * (loc_preds - loc_target), scalar=1.0)
    loc_loss = mx.symbol.MakeLoss(loc_loss_, grad_scale=1., \
        normalization='valid', name="loc_loss")

    # monitoring training status
    cls_label = mx.symbol.MakeLoss(data=cls_target,
                                   grad_scale=0,
                                   name="cls_label")
    det = mx.contrib.symbol.MultiBoxDetection(*[cls_prob, loc_preds, anchor_boxes], \
        name="detection", nms_threshold=nms_thresh, force_suppress=force_suppress,
        variances=(0.1, 0.1, 0.2, 0.2), nms_topk=nms_topk)
    det = mx.symbol.MakeLoss(data=det, grad_scale=0, name="det_out")

    # group output
    out = mx.symbol.Group([cls_prob, loc_loss, cls_label, det])
    return out
示例#7
0
def pvanet_multibox(data,
                    num_classes,
                    use_global_stats=True,
                    no_bias=False,
                    lr_mult=1.0):
    ''' pvanet 10.0 '''
    conv1 = conv_bn_relu(data,
                         group_name='conv1',
                         num_filter=16,
                         kernel=(4, 4),
                         pad=(1, 1),
                         stride=(2, 2),
                         no_bias=no_bias,
                         use_global_stats=use_global_stats,
                         use_crelu=True,
                         lr_mult=lr_mult)
    # conv2
    conv2 = mcrelu(conv1,
                   prefix_group='conv2',
                   filters=(16, 24, 48),
                   no_bias=no_bias,
                   use_global_stats=use_global_stats,
                   lr_mult=lr_mult)
    # conv3
    conv3 = mcrelu(conv2,
                   prefix_group='conv3',
                   filters=(24, 48, 96),
                   no_bias=no_bias,
                   use_global_stats=use_global_stats,
                   lr_mult=lr_mult)
    # inc3a
    inc3a = inception(conv3,
                      prefix_group='inc3a',
                      filters_1=96,
                      filters_3=(16, 64),
                      filters_5=(16, 32, 32),
                      no_bias=no_bias,
                      use_global_stats=use_global_stats,
                      do_pool=True,
                      lr_mult=lr_mult)
    # inc3b
    inc3b = inception(inc3a,
                      prefix_group='inc3b',
                      filters_1=96,
                      filters_3=(16, 64),
                      filters_5=(16, 32, 32),
                      no_bias=no_bias,
                      use_global_stats=use_global_stats,
                      lr_mult=lr_mult)
    # inc3b/residual
    inc3b, inc3b_elt = residual_inc(conv3,
                                    inc3b,
                                    prefix_lhs='inc3a',
                                    prefix_rhs='inc3b',
                                    num_filter=128,
                                    stride=(2, 2),
                                    no_bias=no_bias,
                                    use_global_stats=use_global_stats,
                                    lr_mult=lr_mult)
    # inc3c
    inc3c = inception(inc3b,
                      prefix_group='inc3c',
                      filters_1=96,
                      filters_3=(16, 64),
                      filters_5=(16, 32, 32),
                      no_bias=no_bias,
                      use_global_stats=use_global_stats,
                      lr_mult=lr_mult)
    # inc3d
    inc3d = inception(inc3c,
                      prefix_group='inc3d',
                      filters_1=96,
                      filters_3=(16, 64),
                      filters_5=(16, 32, 32),
                      no_bias=no_bias,
                      use_global_stats=use_global_stats,
                      lr_mult=lr_mult)
    # inc3e
    inc3e = inception(inc3d,
                      prefix_group='inc3e',
                      filters_1=96,
                      filters_3=(16, 64),
                      filters_5=(16, 32, 32),
                      no_bias=no_bias,
                      use_global_stats=use_global_stats,
                      lr_mult=lr_mult)
    # inc3e/residual
    inc3e, _ = residual_inc(inc3b_elt,
                            inc3e,
                            prefix_lhs='inc3c',
                            prefix_rhs='inc3e',
                            num_filter=128,
                            stride=(1, 1),
                            no_bias=no_bias,
                            use_global_stats=use_global_stats,
                            lr_mult=lr_mult)
    # inc4a
    inc4a = inception(inc3e,
                      prefix_group='inc4a',
                      filters_1=128,
                      filters_3=(32, 96),
                      filters_5=(16, 32, 32),
                      no_bias=no_bias,
                      use_global_stats=use_global_stats,
                      do_pool=True,
                      lr_mult=lr_mult)
    # inc4b
    inc4b = inception(inc4a,
                      prefix_group='inc4b',
                      filters_1=128,
                      filters_3=(32, 96),
                      filters_5=(16, 32, 32),
                      no_bias=no_bias,
                      use_global_stats=use_global_stats,
                      lr_mult=lr_mult)
    # inc4b/residual
    inc4b, inc4b_elt = residual_inc(inc3e,
                                    inc4b,
                                    prefix_lhs='inc4a',
                                    prefix_rhs='inc4b',
                                    num_filter=192,
                                    stride=(2, 2),
                                    no_bias=no_bias,
                                    use_global_stats=use_global_stats,
                                    lr_mult=lr_mult)
    # inc4c
    inc4c = inception(inc4b,
                      prefix_group='inc4c',
                      filters_1=128,
                      filters_3=(32, 96),
                      filters_5=(16, 32, 32),
                      no_bias=no_bias,
                      use_global_stats=use_global_stats,
                      lr_mult=lr_mult)
    # inc4d
    inc4d = inception(inc4c,
                      prefix_group='inc4d',
                      filters_1=128,
                      filters_3=(32, 96),
                      filters_5=(16, 32, 32),
                      no_bias=no_bias,
                      use_global_stats=use_global_stats,
                      lr_mult=lr_mult)
    # inc4e
    inc4e = inception(inc4d,
                      prefix_group='inc4e',
                      filters_1=128,
                      filters_3=(32, 96),
                      filters_5=(16, 32, 32),
                      no_bias=no_bias,
                      use_global_stats=use_global_stats,
                      lr_mult=lr_mult)
    # inc4e/residual
    inc4e, _ = residual_inc(inc4b_elt,
                            inc4e,
                            prefix_lhs='inc4c',
                            prefix_rhs='inc4e',
                            num_filter=384,
                            stride=(1, 1),
                            no_bias=no_bias,
                            use_global_stats=use_global_stats,
                            lr_mult=lr_mult)

    # hyperfeature
    downsample = mx.sym.Pooling(conv3,
                                name='downsample',
                                kernel=(3, 3),
                                pad=(0, 0),
                                stride=(2, 2),
                                pool_type='max',
                                pooling_convention='full')
    upsample = mx.sym.UpSampling(inc4e,
                                 name='upsample',
                                 scale=2,
                                 sample_type='bilinear',
                                 num_filter=384,
                                 num_args=2)
    concat = mx.sym.concat(downsample, inc3e, upsample)

    # TODO: feature size tuning
    # For now I will just use 256.
    # feature size would be (n, 256, 32, 32)
    convf = conv_bn_relu(concat,
                         group_name='convf_16',
                         num_filter=256,
                         pad=(0, 0),
                         kernel=(1, 1),
                         stride=(1, 1),
                         no_bias=no_bias,
                         use_global_stats=use_global_stats,
                         lr_mult=lr_mult)

    from_layers = [convf]
    sizes = [(32.0 / 512.0)]
    feat_strides = [16, 32, 64, 128, 256]
    for fs in feat_strides[1:]:
        projf = conv_bn_relu(convf,
                             group_name='projf_{}'.format(fs),
                             num_filter=64,
                             pad=(0, 0),
                             kernel=(1, 1),
                             stride=(1, 1),
                             no_bias=no_bias,
                             use_global_stats=use_global_stats,
                             lr_mult=lr_mult)
        convf = conv_bn_relu(projf,
                             group_name='convf_{}'.format(fs),
                             num_filter=256,
                             pad=(1, 1),
                             kernel=(3, 3),
                             stride=(2, 2),
                             no_bias=no_bias,
                             use_global_stats=use_global_stats,
                             lr_mult=lr_mult)
        from_layers.append(convf)
        sizes.append((fs * 2.0 / 512.0))

    ratios = [(1.0, 0.5, 2.0)] * len(from_layers)
    normalizations = [(-1)] * len(from_layers)
    num_channels = [256]

    loc_preds, cls_preds, anchor_boxes = multibox_layer(from_layers, \
            num_classes, sizes=sizes, ratios=ratios, normalization=normalizations, \
            num_channels=num_channels, clip=False, interm_layer=0, steps=feat_strides)

    return loc_preds, cls_preds, anchor_boxes
示例#8
0
def get_symbol(network,
               num_classes,
               from_layers,
               num_filters,
               sizes,
               ratios,
               strides,
               pads,
               normalizations=-1,
               steps=[],
               min_filter=128,
               nms_thresh=0.5,
               force_suppress=False,
               nms_topk=400,
               **kwargs):
    """Build network for testing SSD

    Parameters
    ----------
    network : str
        base network symbol name
    num_classes : int
        number of object classes not including background
    from_layers : list of str
        feature extraction layers, use '' for add extra layers
        For example:
        from_layers = ['relu4_3', 'fc7', '', '', '', '']
        which means extract feature from relu4_3 and fc7, adding 4 extra layers
        on top of fc7
    num_filters : list of int
        number of filters for extra layers, you can use -1 for extracted features,
        however, if normalization and scale is applied, the number of filter for
        that layer must be provided.
        For example:
        num_filters = [512, -1, 512, 256, 256, 256]
    strides : list of int
        strides for the 3x3 convolution appended, -1 can be used for extracted
        feature layers
    pads : list of int
        paddings for the 3x3 convolution, -1 can be used for extracted layers
    sizes : list or list of list
        [min_size, max_size] for all layers or [[], [], []...] for specific layers
    ratios : list or list of list
        [ratio1, ratio2...] for all layers or [[], [], ...] for specific layers
    normalizations : int or list of int
        use normalizations value for all layers or [...] for specific layers,
        -1 indicate no normalizations and scales
    steps : list
        specify steps for each MultiBoxPrior layer, leave empty, it will calculate
        according to layer dimensions
    min_filter : int
        minimum number of filters used in 1x1 convolution
    nms_thresh : float
        non-maximum suppression threshold
    force_suppress : boolean
        whether suppress different class objects
    nms_topk : int
        apply NMS to top K detections

    Returns
    -------
    mx.Symbol

    """
    body = import_module(network).get_symbol(num_classes, **kwargs)
    layers = multi_layer_feature(body,
                                 from_layers,
                                 num_filters,
                                 strides,
                                 pads,
                                 min_filter=min_filter)

    loc_preds, cls_preds, anchor_boxes = multibox_layer(layers, \
        num_classes, sizes=sizes, ratios=ratios, normalization=normalizations, \
        num_channels=num_filters, clip=False, interm_layer=0, steps=steps)

    cls_prob = mx.symbol.SoftmaxActivation(data=cls_preds, mode='channel', \
        name='cls_prob')
    out = mx.contrib.symbol.MultiBoxDetection(*[cls_prob, loc_preds, anchor_boxes], \
        name="detection", nms_threshold=nms_thresh, force_suppress=force_suppress,
        variances=(0.1, 0.1, 0.2, 0.2), nms_topk=nms_topk)
    return out
示例#9
0
def get_symbol_train(network,
                     num_classes,
                     from_layers,
                     num_filters,
                     strides,
                     pads,
                     sizes,
                     ratios,
                     normalizations=-1,
                     steps=[],
                     min_filter=128,
                     nms_thresh=0.5,
                     force_suppress=False,
                     nms_topk=400,
                     **kwargs):
    """Build network symbol for training SSD

    Parameters
    ----------
    network : str
        base network symbol name
    num_classes : int
        number of object classes not including background
    from_layers : list of str
        feature extraction layers, use '' for add extra layers
        For example:
        from_layers = ['relu4_3', 'fc7', '', '', '', '']
        which means extract feature from relu4_3 and fc7, adding 4 extra layers
        on top of fc7
    num_filters : list of int
        number of filters for extra layers, you can use -1 for extracted features,
        however, if normalization and scale is applied, the number of filter for
        that layer must be provided.
        For example:
        num_filters = [512, -1, 512, 256, 256, 256]
    strides : list of int
        strides for the 3x3 convolution appended, -1 can be used for extracted
        feature layers
    pads : list of int
        paddings for the 3x3 convolution, -1 can be used for extracted layers
    sizes : list or list of list
        [min_size, max_size] for all layers or [[], [], []...] for specific layers
    ratios : list or list of list
        [ratio1, ratio2...] for all layers or [[], [], ...] for specific layers
    normalizations : int or list of int
        use normalizations value for all layers or [...] for specific layers,
        -1 indicate no normalizations and scales
    steps : list
        specify steps for each MultiBoxPrior layer, leave empty, it will calculate
        according to layer dimensions
    min_filter : int
        minimum number of filters used in 1x1 convolution
    nms_thresh : float
        non-maximum suppression threshold
    force_suppress : boolean
        whether suppress different class objects
    nms_topk : int
        apply NMS to top K detections

    Returns
    -------
    mx.Symbol

    """
    label = mx.sym.Variable('label')
    body = import_module(network).get_symbol(num_classes, **kwargs)
    layers = multi_layer_feature(body,
                                 from_layers,
                                 num_filters,
                                 strides,
                                 pads,
                                 min_filter=min_filter)

    loc_preds, cls_preds, anchor_boxes = multibox_layer(layers, \
        num_classes, sizes=sizes, ratios=ratios, normalization=normalizations, \
        num_channels=num_filters, clip=False, interm_layer=0, steps=steps)

    tmp = mx.contrib.symbol.MultiBoxTarget(
        *[anchor_boxes, label, cls_preds], overlap_threshold=.5, \
        ignore_label=-1, negative_mining_ratio=3, minimum_negative_samples=0, \
        negative_mining_thresh=.5, variances=(0.1, 0.1, 0.2, 0.2),
        name="multibox_target")
    loc_target = tmp[0]
    loc_target_mask = tmp[1]
    cls_target = tmp[2]
    ''' Focal loss related '''
    cls_prob_ = mx.sym.SoftmaxActivation(cls_preds, mode='channel')
    cls_prob = mx.sym.Custom(cls_preds,
                             cls_prob_,
                             cls_target,
                             op_type='focal_loss',
                             name='cls_prob',
                             gamma=2.0,
                             alpha=0.25,
                             normalize=True)
    # cls_prob = mx.symbol.SoftmaxOutput(data=cls_preds, label=cls_target, \
    #     ignore_label=-1, use_ignore=True, grad_scale=1., multi_output=True, \
    #     normalization='valid', name="cls_prob")
    loc_loss_ = mx.symbol.smooth_l1(name="loc_loss_", \
        data=loc_target_mask * (loc_preds - loc_target), scalar=1.0)
    loc_loss = mx.symbol.MakeLoss(loc_loss_, grad_scale=1., \
        normalization='valid', name="loc_loss")

    # monitoring training status
    cls_label = mx.symbol.MakeLoss(data=cls_target,
                                   grad_scale=0,
                                   name="cls_label")
    det = mx.contrib.symbol.MultiBoxDetection(*[cls_prob, loc_preds, anchor_boxes], \
        name="detection", nms_threshold=nms_thresh, force_suppress=force_suppress,
        variances=(0.1, 0.1, 0.2, 0.2), nms_topk=nms_topk)
    det = mx.symbol.MakeLoss(data=det, grad_scale=0, name="det_out")

    # group output
    out = mx.symbol.Group([cls_prob, loc_loss, cls_label, det])
    return out
示例#10
0
def get_symbol_train(network, num_classes, from_layers, num_filters, strides, pads,
                     sizes, ratios, normalizations=-1, steps=[], min_filter=128,
                     nms_thresh=0.5, force_suppress=False, nms_topk=400, **kwargs):
    """Build network symbol for training SSD

    Parameters
    ----------
    network : str
        base network symbol name
    num_classes : int
        number of object classes not including background
    from_layers : list of str
        feature extraction layers, use '' for add extra layers
        For example:
        from_layers = ['relu4_3', 'fc7', '', '', '', '']
        which means extract feature from relu4_3 and fc7, adding 4 extra layers
        on top of fc7
    num_filters : list of int
        number of filters for extra layers, you can use -1 for extracted features,
        however, if normalization and scale is applied, the number of filter for
        that layer must be provided.
        For example:
        num_filters = [512, -1, 512, 256, 256, 256]
    strides : list of int
        strides for the 3x3 convolution appended, -1 can be used for extracted
        feature layers
    pads : list of int
        paddings for the 3x3 convolution, -1 can be used for extracted layers
    sizes : list or list of list
        [min_size, max_size] for all layers or [[], [], []...] for specific layers
    ratios : list or list of list
        [ratio1, ratio2...] for all layers or [[], [], ...] for specific layers
    normalizations : int or list of int
        use normalizations value for all layers or [...] for specific layers,
        -1 indicate no normalizations and scales
    steps : list
        specify steps for each MultiBoxPrior layer, leave empty, it will calculate
        according to layer dimensions
    min_filter : int
        minimum number of filters used in 1x1 convolution
    nms_thresh : float
        non-maximum suppression threshold
    force_suppress : boolean
        whether suppress different class objects
    nms_topk : int
        apply NMS to top K detections

    Returns
    -------
    mx.Symbol

    """
    label = mx.sym.Variable('label')
    body = import_module(network).get_symbol(num_classes, **kwargs)
    layers = multi_layer_feature(body, from_layers, num_filters, strides, pads,
        min_filter=min_filter)

    loc_preds, cls_preds, anchor_boxes = multibox_layer(layers, \
        num_classes, sizes=sizes, ratios=ratios, normalization=normalizations, \
        num_channels=num_filters, clip=False, interm_layer=0, steps=steps)

    tmp = mx.contrib.symbol.MultiBoxTarget(
        *[anchor_boxes, label, cls_preds], overlap_threshold=.5, \
        ignore_label=-1, negative_mining_ratio=3, minimum_negative_samples=0, \
        negative_mining_thresh=.5, variances=(0.1, 0.1, 0.2, 0.2),
        name="multibox_target")
    loc_target = tmp[0]
    loc_target_mask = tmp[1]
    cls_target = tmp[2]

    cls_prob = mx.symbol.SoftmaxOutput(data=cls_preds, label=cls_target, \
        ignore_label=-1, use_ignore=True, grad_scale=1., multi_output=True, \
        normalization='valid', name="cls_prob")
    loc_loss_ = mx.symbol.smooth_l1(name="loc_loss_", \
        data=loc_target_mask * (loc_preds - loc_target), scalar=1.0)
    loc_loss = mx.symbol.MakeLoss(loc_loss_, grad_scale=1., \
        normalization='valid', name="loc_loss")

    # monitoring training status
    cls_label = mx.symbol.MakeLoss(data=cls_target, grad_scale=0, name="cls_label")
    det = mx.contrib.symbol.MultiBoxDetection(*[cls_prob, loc_preds, anchor_boxes], \
        name="detection", nms_threshold=nms_thresh, force_suppress=force_suppress,
        variances=(0.1, 0.1, 0.2, 0.2), nms_topk=nms_topk)
    det = mx.symbol.MakeLoss(data=det, grad_scale=0, name="det_out")

    # group output
    out = mx.symbol.Group([cls_prob, loc_loss, cls_label, det])
    return out
示例#11
0
def get_symbol_train(num_classes=20):
    """
    Single-shot multi-box detection with VGG 16 layers ConvNet
    This is a modified version, with fc6/fc7 layers replaced by conv layers
    And the network is slightly smaller than original VGG 16 network
    This is a training network with losses

    Parameters:
    ----------
    num_classes: int
        number of object classes not including background

    Returns:
    ----------
    mx.Symbol
    """
    data = mx.symbol.Variable(name="data")
    label = mx.symbol.Variable(name="label")

    # group 1
    conv1_1 = mx.symbol.Convolution(data=data,
                                    kernel=(3, 3),
                                    pad=(1, 1),
                                    num_filter=64,
                                    name="conv1_1")
    relu1_1 = mx.symbol.Activation(data=conv1_1,
                                   act_type="relu",
                                   name="relu1_1")
    conv1_2 = mx.symbol.Convolution(data=relu1_1,
                                    kernel=(3, 3),
                                    pad=(1, 1),
                                    num_filter=64,
                                    name="conv1_2")
    relu1_2 = mx.symbol.Activation(data=conv1_2,
                                   act_type="relu",
                                   name="relu1_2")
    pool1 = mx.symbol.Pooling(data=relu1_2,
                              pool_type="max",
                              kernel=(2, 2),
                              stride=(2, 2),
                              name="pool1")
    # group 2
    conv2_1 = mx.symbol.Convolution(data=pool1,
                                    kernel=(3, 3),
                                    pad=(1, 1),
                                    num_filter=128,
                                    name="conv2_1")
    relu2_1 = mx.symbol.Activation(data=conv2_1,
                                   act_type="relu",
                                   name="relu2_1")
    conv2_2 = mx.symbol.Convolution(data=relu2_1,
                                    kernel=(3, 3),
                                    pad=(1, 1),
                                    num_filter=128,
                                    name="conv2_2")
    relu2_2 = mx.symbol.Activation(data=conv2_2,
                                   act_type="relu",
                                   name="relu2_2")
    pool2 = mx.symbol.Pooling(data=relu2_2,
                              pool_type="max",
                              kernel=(2, 2),
                              stride=(2, 2),
                              name="pool2")
    # group 3
    conv3_1 = mx.symbol.Convolution(data=pool2,
                                    kernel=(3, 3),
                                    pad=(1, 1),
                                    num_filter=256,
                                    name="conv3_1")
    relu3_1 = mx.symbol.Activation(data=conv3_1,
                                   act_type="relu",
                                   name="relu3_1")
    conv3_2 = mx.symbol.Convolution(data=relu3_1,
                                    kernel=(3, 3),
                                    pad=(1, 1),
                                    num_filter=256,
                                    name="conv3_2")
    relu3_2 = mx.symbol.Activation(data=conv3_2,
                                   act_type="relu",
                                   name="relu3_2")
    conv3_3 = mx.symbol.Convolution(data=relu3_2,
                                    kernel=(3, 3),
                                    pad=(1, 1),
                                    num_filter=256,
                                    name="conv3_3")
    relu3_3 = mx.symbol.Activation(data=conv3_3,
                                   act_type="relu",
                                   name="relu3_3")
    pool3 = mx.symbol.Pooling(
        data=relu3_3, pool_type="max", kernel=(2, 2), stride=(2, 2), \
        pooling_convention="full", name="pool3")
    # group 4
    conv4_1 = mx.symbol.Convolution(data=pool3,
                                    kernel=(3, 3),
                                    pad=(1, 1),
                                    num_filter=512,
                                    name="conv4_1")
    relu4_1 = mx.symbol.Activation(data=conv4_1,
                                   act_type="relu",
                                   name="relu4_1")
    conv4_2 = mx.symbol.Convolution(data=relu4_1,
                                    kernel=(3, 3),
                                    pad=(1, 1),
                                    num_filter=512,
                                    name="conv4_2")
    relu4_2 = mx.symbol.Activation(data=conv4_2,
                                   act_type="relu",
                                   name="relu4_2")
    conv4_3 = mx.symbol.Convolution(data=relu4_2,
                                    kernel=(3, 3),
                                    pad=(1, 1),
                                    num_filter=512,
                                    name="conv4_3")
    relu4_3 = mx.symbol.Activation(data=conv4_3,
                                   act_type="relu",
                                   name="relu4_3")
    pool4 = mx.symbol.Pooling(data=relu4_3,
                              pool_type="max",
                              kernel=(2, 2),
                              stride=(2, 2),
                              name="pool4")
    # group 5
    conv5_1 = mx.symbol.Convolution(data=pool4,
                                    kernel=(3, 3),
                                    pad=(1, 1),
                                    num_filter=512,
                                    name="conv5_1")
    relu5_1 = mx.symbol.Activation(data=conv5_1,
                                   act_type="relu",
                                   name="relu5_1")
    conv5_2 = mx.symbol.Convolution(data=relu5_1,
                                    kernel=(3, 3),
                                    pad=(1, 1),
                                    num_filter=512,
                                    name="conv5_2")
    relu5_2 = mx.symbol.Activation(data=conv5_2,
                                   act_type="relu",
                                   name="relu5_2")
    conv5_3 = mx.symbol.Convolution(data=relu5_2,
                                    kernel=(3, 3),
                                    pad=(1, 1),
                                    num_filter=512,
                                    name="conv5_3")
    relu5_3 = mx.symbol.Activation(data=conv5_3,
                                   act_type="relu",
                                   name="relu5_3")
    pool5 = mx.symbol.Pooling(data=relu5_3,
                              pool_type="max",
                              kernel=(3, 3),
                              stride=(1, 1),
                              pad=(1, 1),
                              name="pool5")
    # group 6
    conv6 = mx.symbol.Convolution(data=pool5,
                                  kernel=(3, 3),
                                  pad=(6, 6),
                                  dilate=(6, 6),
                                  num_filter=1024,
                                  name="conv6")
    relu6 = mx.symbol.Activation(data=conv6, act_type="relu", name="relu6")
    # drop6 = mx.symbol.Dropout(data=relu6, p=0.5, name="drop6")
    # group 7
    conv7 = mx.symbol.Convolution(data=relu6,
                                  kernel=(1, 1),
                                  pad=(0, 0),
                                  num_filter=1024,
                                  name="conv7")
    relu7 = mx.symbol.Activation(data=conv7, act_type="relu", name="relu7")
    # drop7 = mx.symbol.Dropout(data=relu7, p=0.5, name="drop7")

    ### ssd extra layers ###
    conv8_1, relu8_1 = conv_act_layer(relu7, "8_1", 256, kernel=(1,1), pad=(0,0), \
        stride=(1,1), act_type="relu", use_batchnorm=False)
    conv8_2, relu8_2 = conv_act_layer(relu8_1, "8_2", 512, kernel=(3,3), pad=(1,1), \
        stride=(2,2), act_type="relu", use_batchnorm=False)
    conv9_1, relu9_1 = conv_act_layer(relu8_2, "9_1", 128, kernel=(1,1), pad=(0,0), \
        stride=(1,1), act_type="relu", use_batchnorm=False)
    conv9_2, relu9_2 = conv_act_layer(relu9_1, "9_2", 256, kernel=(3,3), pad=(1,1), \
        stride=(2,2), act_type="relu", use_batchnorm=False)
    conv10_1, relu10_1 = conv_act_layer(relu9_2, "10_1", 128, kernel=(1,1), pad=(0,0), \
        stride=(1,1), act_type="relu", use_batchnorm=False)
    conv10_2, relu10_2 = conv_act_layer(relu10_1, "10_2", 256, kernel=(3,3), pad=(1,1), \
        stride=(2,2), act_type="relu", use_batchnorm=False)
    # global Pooling
    pool10 = mx.symbol.Pooling(data=relu10_2,
                               pool_type="avg",
                               global_pool=True,
                               kernel=(1, 1),
                               name='pool10')

    # specific parameters for VGG16 network
    from_layers = [relu4_3, relu7, relu8_2, relu9_2, relu10_2, pool10]
    sizes = [[.1], [.2, .276], [.38, .461], [.56, .644], [.74, .825],
             [.92, 1.01]]
    ratios = [[1,2,.5], [1,2,.5,3,1./3], [1,2,.5,3,1./3], [1,2,.5,3,1./3], \
        [1,2,.5,3,1./3], [1,2,.5,3,1./3]]
    normalizations = [20, -1, -1, -1, -1, -1]
    num_channels = [512]

    loc_preds, cls_preds, anchor_boxes = multibox_layer(from_layers, \
        num_classes, sizes=sizes, ratios=ratios, normalization=normalizations, \
        num_channels=num_channels, clip=True, interm_layer=0)

    tmp = mx.symbol.MultiBoxTarget(
        *[anchor_boxes, label, cls_preds], overlap_threshold=.5, \
        ignore_label=-1, negative_mining_ratio=3, minimum_negative_samples=0, \
        negative_mining_thresh=.5, variances=(0.1, 0.1, 0.2, 0.2),
        name="multibox_target")
    loc_target = tmp[0]
    loc_target_mask = tmp[1]
    cls_target = tmp[2]

    cls_prob = mx.symbol.SoftmaxOutput(data=cls_preds, label=cls_target, \
        ignore_label=-1, use_ignore=True, grad_scale=3., multi_output=True, \
        normalization='valid', name="cls_prob")
    loc_loss_ = mx.symbol.smooth_l1(name="loc_loss_", \
        data=loc_target_mask * (loc_preds - loc_target), scalar=1.0)
    loc_loss = mx.symbol.MakeLoss(loc_loss_, grad_scale=1., \
        normalization='valid', name="loc_loss")

    # monitoring training status
    cls_label = mx.symbol.MakeLoss(data=cls_target,
                                   grad_scale=0,
                                   name="cls_label")

    # group output
    out = mx.symbol.Group([cls_prob, loc_loss, cls_label])
    return out
示例#12
0
def get_symbol(network, num_classes, from_layers, num_filters, sizes, ratios,
               strides, pads, normalizations=-1, steps=[], min_filter=128,
               nms_thresh=0.5, force_suppress=False, nms_topk=400, **kwargs):
    """Build network for testing SSD

    Parameters
    ----------
    network : str
        base network symbol name
    num_classes : int
        number of object classes not including background
    from_layers : list of str
        feature extraction layers, use '' for add extra layers
        For example:
        from_layers = ['relu4_3', 'fc7', '', '', '', '']
        which means extract feature from relu4_3 and fc7, adding 4 extra layers
        on top of fc7
    num_filters : list of int
        number of filters for extra layers, you can use -1 for extracted features,
        however, if normalization and scale is applied, the number of filter for
        that layer must be provided.
        For example:
        num_filters = [512, -1, 512, 256, 256, 256]
    strides : list of int
        strides for the 3x3 convolution appended, -1 can be used for extracted
        feature layers
    pads : list of int
        paddings for the 3x3 convolution, -1 can be used for extracted layers
    sizes : list or list of list
        [min_size, max_size] for all layers or [[], [], []...] for specific layers
    ratios : list or list of list
        [ratio1, ratio2...] for all layers or [[], [], ...] for specific layers
    normalizations : int or list of int
        use normalizations value for all layers or [...] for specific layers,
        -1 indicate no normalizations and scales
    steps : list
        specify steps for each MultiBoxPrior layer, leave empty, it will calculate
        according to layer dimensions
    min_filter : int
        minimum number of filters used in 1x1 convolution
    nms_thresh : float
        non-maximum suppression threshold
    force_suppress : boolean
        whether suppress different class objects
    nms_topk : int
        apply NMS to top K detections

    Returns
    -------
    mx.Symbol

    """
    body = import_module(network).get_symbol(num_classes, **kwargs)
    layers = multi_layer_feature(body, from_layers, num_filters, strides, pads,
        min_filter=min_filter)

    loc_preds, cls_preds, anchor_boxes = multibox_layer(layers, \
        num_classes, sizes=sizes, ratios=ratios, normalization=normalizations, \
        num_channels=num_filters, clip=False, interm_layer=0, steps=steps)

    cls_prob = mx.symbol.SoftmaxActivation(data=cls_preds, mode='channel', \
        name='cls_prob')
    out = mx.contrib.symbol.MultiBoxDetection(*[cls_prob, loc_preds, anchor_boxes], \
        name="detection", nms_threshold=nms_thresh, force_suppress=force_suppress,
        variances=(0.1, 0.1, 0.2, 0.2), nms_topk=nms_topk)
    return out
示例#13
0
def get_symbol_train(num_classes=20):
    """
    Single-shot multi-box detection with VGG 16 layers ConvNet
    This is a modified version, with fc6/fc7 layers replaced by conv layers
    And the network is slightly smaller than original VGG 16 network
    This is a training network with losses

    Parameters:
    ----------
    num_classes: int
        number of object classes not including background

    Returns:
    ----------
    mx.Symbol
    """
    data = mx.symbol.Variable(name="data")
    label = mx.symbol.Variable(name="label")

    # group 1
    conv1_1 = mx.symbol.Convolution(
        data=data, kernel=(3, 3), pad=(1, 1), num_filter=64, name="conv1_1")
    relu1_1 = mx.symbol.Activation(data=conv1_1, act_type="relu", name="relu1_1")
    conv1_2 = mx.symbol.Convolution(
        data=relu1_1, kernel=(3, 3), pad=(1, 1), num_filter=64, name="conv1_2")
    relu1_2 = mx.symbol.Activation(data=conv1_2, act_type="relu", name="relu1_2")
    pool1 = mx.symbol.Pooling(
        data=relu1_2, pool_type="max", kernel=(2, 2), stride=(2, 2), name="pool1")
    # group 2
    conv2_1 = mx.symbol.Convolution(
        data=pool1, kernel=(3, 3), pad=(1, 1), num_filter=128, name="conv2_1")
    relu2_1 = mx.symbol.Activation(data=conv2_1, act_type="relu", name="relu2_1")
    conv2_2 = mx.symbol.Convolution(
        data=relu2_1, kernel=(3, 3), pad=(1, 1), num_filter=128, name="conv2_2")
    relu2_2 = mx.symbol.Activation(data=conv2_2, act_type="relu", name="relu2_2")
    pool2 = mx.symbol.Pooling(
        data=relu2_2, pool_type="max", kernel=(2, 2), stride=(2, 2), name="pool2")
    # group 3
    conv3_1 = mx.symbol.Convolution(
        data=pool2, kernel=(3, 3), pad=(1, 1), num_filter=256, name="conv3_1")
    relu3_1 = mx.symbol.Activation(data=conv3_1, act_type="relu", name="relu3_1")
    conv3_2 = mx.symbol.Convolution(
        data=relu3_1, kernel=(3, 3), pad=(1, 1), num_filter=256, name="conv3_2")
    relu3_2 = mx.symbol.Activation(data=conv3_2, act_type="relu", name="relu3_2")
    conv3_3 = mx.symbol.Convolution(
        data=relu3_2, kernel=(3, 3), pad=(1, 1), num_filter=256, name="conv3_3")
    relu3_3 = mx.symbol.Activation(data=conv3_3, act_type="relu", name="relu3_3")
    pool3 = mx.symbol.Pooling(
        data=relu3_3, pool_type="max", kernel=(2, 2), stride=(2, 2), \
        pooling_convention="full", name="pool3")
    # group 4
    conv4_1 = mx.symbol.Convolution(
        data=pool3, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv4_1")
    relu4_1 = mx.symbol.Activation(data=conv4_1, act_type="relu", name="relu4_1")
    conv4_2 = mx.symbol.Convolution(
        data=relu4_1, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv4_2")
    relu4_2 = mx.symbol.Activation(data=conv4_2, act_type="relu", name="relu4_2")
    conv4_3 = mx.symbol.Convolution(
        data=relu4_2, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv4_3")
    relu4_3 = mx.symbol.Activation(data=conv4_3, act_type="relu", name="relu4_3")
    pool4 = mx.symbol.Pooling(
        data=relu4_3, pool_type="max", kernel=(2, 2), stride=(2, 2), name="pool4")
    # group 5
    conv5_1 = mx.symbol.Convolution(
        data=pool4, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv5_1")
    relu5_1 = mx.symbol.Activation(data=conv5_1, act_type="relu", name="relu5_1")
    conv5_2 = mx.symbol.Convolution(
        data=relu5_1, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv5_2")
    relu5_2 = mx.symbol.Activation(data=conv5_2, act_type="relu", name="relu5_2")
    conv5_3 = mx.symbol.Convolution(
        data=relu5_2, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv5_3")
    relu5_3 = mx.symbol.Activation(data=conv5_3, act_type="relu", name="relu5_3")
    pool5 = mx.symbol.Pooling(
        data=relu5_3, pool_type="max", kernel=(3, 3), stride=(1, 1),
        pad=(1,1), name="pool5")
    # group 6
    conv6 = mx.symbol.Convolution(
        data=pool5, kernel=(3, 3), pad=(6, 6), dilate=(6, 6),
        num_filter=1024, name="conv6")
    relu6 = mx.symbol.Activation(data=conv6, act_type="relu", name="relu6")
    # drop6 = mx.symbol.Dropout(data=relu6, p=0.5, name="drop6")
    # group 7
    conv7 = mx.symbol.Convolution(
        data=relu6, kernel=(1, 1), pad=(0, 0), num_filter=1024, name="conv7")
    relu7 = mx.symbol.Activation(data=conv7, act_type="relu", name="relu7")
    # drop7 = mx.symbol.Dropout(data=relu7, p=0.5, name="drop7")

    ### ssd extra layers ###
    conv8_1, relu8_1 = conv_act_layer(relu7, "8_1", 256, kernel=(1,1), pad=(0,0), \
        stride=(1,1), act_type="relu", use_batchnorm=False)
    conv8_2, relu8_2 = conv_act_layer(relu8_1, "8_2", 512, kernel=(3,3), pad=(1,1), \
        stride=(2,2), act_type="relu", use_batchnorm=False)
    conv9_1, relu9_1 = conv_act_layer(relu8_2, "9_1", 128, kernel=(1,1), pad=(0,0), \
        stride=(1,1), act_type="relu", use_batchnorm=False)
    conv9_2, relu9_2 = conv_act_layer(relu9_1, "9_2", 256, kernel=(3,3), pad=(1,1), \
        stride=(2,2), act_type="relu", use_batchnorm=False)
    conv10_1, relu10_1 = conv_act_layer(relu9_2, "10_1", 128, kernel=(1,1), pad=(0,0), \
        stride=(1,1), act_type="relu", use_batchnorm=False)
    conv10_2, relu10_2 = conv_act_layer(relu10_1, "10_2", 256, kernel=(3,3), pad=(1,1), \
        stride=(2,2), act_type="relu", use_batchnorm=False)
    # global Pooling
    pool10 = mx.symbol.Pooling(data=relu10_2, pool_type="avg",
        global_pool=True, kernel=(1,1), name='pool10')

    # specific parameters for VGG16 network
    from_layers = [relu4_3, relu7, relu8_2, relu9_2, relu10_2, pool10]
    sizes = [[.1], [.2,.276], [.38, .461], [.56, .644], [.74, .825], [.92, 1.01]]
    ratios = [[1,2,.5], [1,2,.5,3,1./3], [1,2,.5,3,1./3], [1,2,.5,3,1./3], \
        [1,2,.5,3,1./3], [1,2,.5,3,1./3]]
    normalizations = [20, -1, -1, -1, -1, -1]
    num_channels = [512]

    loc_preds, cls_preds, anchor_boxes = multibox_layer(from_layers, \
        num_classes, sizes=sizes, ratios=ratios, normalization=normalizations, \
        num_channels=num_channels, clip=True, interm_layer=0)

    tmp = mx.contrib.symbol.MultiBoxTarget(
        *[anchor_boxes, label, cls_preds], overlap_threshold=.5, \
        ignore_label=-1, negative_mining_ratio=3, minimum_negative_samples=0, \
        negative_mining_thresh=.5, variances=(0.1, 0.1, 0.2, 0.2),
        name="multibox_target")
    loc_target = tmp[0]
    loc_target_mask = tmp[1]
    cls_target = tmp[2]

    cls_prob = mx.symbol.SoftmaxOutput(data=cls_preds, label=cls_target, \
        ignore_label=-1, use_ignore=True, grad_scale=3., multi_output=True, \
        normalization='valid', name="cls_prob")
    loc_loss_ = mx.symbol.smooth_l1(name="loc_loss_", \
        data=loc_target_mask * (loc_preds - loc_target), scalar=1.0)
    loc_loss = mx.symbol.MakeLoss(loc_loss_, grad_scale=1., \
        normalization='valid', name="loc_loss")

    # monitoring training status
    cls_label = mx.symbol.MakeLoss(data=cls_target, grad_scale=0, name="cls_label")

    # group output
    out = mx.symbol.Group([cls_prob, loc_loss, cls_label])
    return out
示例#14
0
def get_symbol_train(num_classes=20,
                     nms_thresh=0.5,
                     force_suppress=False,
                     nms_topk=400):

    label = mx.symbol.Variable(name="label")

    feature_net1, feature_net2, feature_net3, feature_net4 = get_feature_layer(
    )

    conv1, relu1 = conv_act_layer(feature_net4, "8_1", 512, stride=(2, 2))
    conv2, relu2 = conv_act_layer(relu1, "9_1", 512, stride=(2, 2))
    conv3, relu3 = conv_act_layer(relu2, "10_1", 512, stride=(2, 2))
    conv4, relu4 = conv_act_layer(relu3,
                                  "11_1",
                                  512,
                                  stride=(1, 1),
                                  pad=(0, 0),
                                  kernel=(3, 3))
    deconv1 = deconv_layer(relu4,
                           relu3,
                           deconv_kernel=(3, 3),
                           deconv_pad=(0, 0))
    deconv2 = deconv_layer(deconv1, relu2)
    deconv3 = deconv_layer(deconv2,
                           relu1,
                           deconv_kernel=(2, 2),
                           deconv_pad=(0, 0))
    deconv4 = deconv_layer(deconv3,
                           feature_net4,
                           deconv_kernel=(2, 2),
                           deconv_pad=(0, 0))
    deconv5 = deconv_layer(deconv4,
                           feature_net2,
                           deconv_kernel=(2, 2),
                           deconv_pad=(0, 0))
    layer1 = residual_predict(relu4)
    layer2 = residual_predict(deconv1)
    layer3 = residual_predict(deconv2)
    layer4 = residual_predict(deconv3)
    layer5 = residual_predict(deconv4)
    layer6 = residual_predict(deconv5)
    from_layers = [layer6, layer5, layer4, layer3, layer2, layer1]
    sizes = [[.1, .141], [.2, .272], [.37, .447], [.54, .619], [.71, .79],
             [.88, .961]]
    ratios = [[1, 2, .5], [1, 2, .5, 3, 1. / 3], [1, 2, .5, 3, 1. / 3],
              [1, 2, .5, 3, 1. / 3], [1, 2, .5], [1, 2, .5]]
    loc_preds, cls_preds, anchor_boxes = multibox_layer(from_layers,
                                                        num_classes,
                                                        sizes=sizes,
                                                        ratios=ratios,
                                                        clip=False,
                                                        interm_layer=0)

    tmp = mx.contrib.symbol.MultiBoxTarget(
        *[anchor_boxes, label, cls_preds], overlap_threshold=.5, \
        ignore_label=-1, negative_mining_ratio=3, minimum_negative_samples=0, \
        negative_mining_thresh=.5, variances=(0.1, 0.1, 0.2, 0.2),
        name="multibox_target")
    loc_target = tmp[0]
    loc_target_mask = tmp[1]
    cls_target = tmp[2]

    cls_prob = mx.symbol.SoftmaxOutput(data=cls_preds, label=cls_target, \
        ignore_label=-1, use_ignore=True, grad_scale=1., multi_output=True, \
        normalization='valid', name="cls_prob")
    loc_loss_ = mx.symbol.smooth_l1(name="loc_loss_", \
        data=loc_target_mask * (loc_preds - loc_target), scalar=1.0)
    loc_loss = mx.symbol.MakeLoss(loc_loss_, grad_scale=1., \
        normalization='valid', name="loc_loss")

    # monitoring training status
    cls_label = mx.symbol.MakeLoss(data=cls_target,
                                   grad_scale=0,
                                   name="cls_label")
    det = mx.contrib.symbol.MultiBoxDetection(*[cls_prob, loc_preds, anchor_boxes], \
        name="detection", nms_threshold=nms_thresh, force_suppress=force_suppress,
        variances=(0.1, 0.1, 0.2, 0.2), nms_topk=nms_topk)
    det = mx.symbol.MakeLoss(data=det, grad_scale=0, name="det_out")

    # group output
    out = mx.symbol.Group([cls_prob, loc_loss, cls_label, det])
    return out
示例#15
0
def get_symbol_train(seq_len):
    """
    Single-shot multi-box detection with VGG 16 layers ConvNet
    This is a modified version, with fc6/fc7 layers replaced by conv layers
    And the network is slightly smaller than original VGG 16 network
    This is a training network with losses

    Parameters:
    ----------
    num_classes: int
        number of object classes not including background
    nms_thresh : float
        non-maximum suppression threshold
    force_suppress : boolean
        whether suppress different class objects
    nms_topk : int
        apply NMS to top K detections

    Returns:
    ----------
    mx.Symbol
    """
    #print('xx')
    network = cfg.NETWORK
    num_classes = cfg.NUM_CLASSES
    data = mx.symbol.Variable(name="data")
    expression = mx.symbol.Variable(name='expression')
    label = mx.symbol.Variable(name="label")
    if network=='vgg16':
        c5,c4,c3,_ = symbol_vgg(data)
    elif network=='vgg16_bn':
        c5,c4,c3,_ = symbol_vgg_bn(data)
    elif network.startswith('resnet'):
        #yi fan hou xi
        num_layers = int(network.split('_')[-1])
        if num_layers >= 50:
            filter_list = [64, 256, 512, 1024, 2048]
            bottle_neck = True
        else:
            filter_list = [64, 64, 128, 256, 512]
            bottle_neck = False
        #num_stages = 4
        if num_layers == 18:
            units = [2, 2, 2, 2]
        elif num_layers == 34:
            units = [3, 4, 6, 3]
        elif num_layers == 50:
            units = [3, 4, 6, 3]
        elif num_layers == 101:
            units = [3, 4, 23, 3]
        elif num_layers == 152:
            units = [3, 8, 36, 3]
        elif num_layers == 200:
            units = [3, 24, 36, 3]
        elif num_layers == 269:
            units = [3, 30, 48, 8]
        else:
            raise ValueError("no experiments done on num_layers {}, you can do it yourself".format(num_layers))
        c5,c4,c3,_=symbol_resnet(data,units,filter_list,bottle_neck)
    elif network=='inceptionv3':
        c5,c4,c3,_=symbol_Inception_v3(data)
    rnn_feat = get_rnn_feat(seq_len,expression)
    c5 = residual_att_unit(data=c5,express=rnn_feat,ratio=0.75,num_filter=512,stride=(1,1),bottle_neck=False,dim_match=False,name='c5',deform=False)
    c4 = residual_att_unit(data=c4,express=rnn_feat,ratio=0.5,num_filter=256,stride=(1,1),bottle_neck=False,dim_match=False,name='c4',deform=False)
    c3 = residual_att_unit(data=c3,express=rnn_feat,ratio=0.25,num_filter=128,stride=(1,1),bottle_neck=False,dim_match=False,name='c3',deform=False)
    P6 = mx.symbol.Convolution(data=c5,num_filter=256,kernel=(3,3),stride=(2,2),pad=(1,1),name='P6')
    p6_relu = mx.symbol.Activation(data=P6,act_type='relu',name='p6_relu')
    P7 = mx.symbol.Convolution(data=p6_relu,num_filter=256,kernel=(3,3),stride=(2,2),pad=(1,1),name='P7')
    P5 = mx.symbol.Convolution(data=c5,num_filter=256,kernel=(1,1),stride=(1,1),pad=(0,0),name='P5')
    
    P5_topdown = mx.symbol.Deconvolution(data=P5,num_filter=256,kernel=(4,4),stride=(2,2),pad=(1,1),name='P5_topdown')
    P4_lateral = mx.symbol.Convolution(data=c4,num_filter=256,kernel=(1,1),stride=(1,1),pad=(0,0),name='P4_lateral')
    P4 = mx.sym.elemwise_add(P4_lateral,P5_topdown,name='P4')
    
    P4_topdown = mx.symbol.Deconvolution(data=P4,num_filter=256,kernel=(4,4),stride=(2,2),pad=(1,1),name='P4_topdown')
    P3_lateral = mx.symbol.Convolution(data=c3,num_filter=256,kernel=(1,1),stride=(1,1),pad=(0,0),name='P3_lateral')
    P3 = mx.sym.elemwise_add(P3_lateral, P4_topdown,name='P3')
    #specific parameters
    from_layers = [P7,P6,P5,P4,P3]
    sizes = [[0.01, .1], [.2,.3], [.4, .5], [.6, .7],[.9,1.]]
    ratios = [[1,], [1,], [1,], [1,],[1,]]
    normalizations = [20, -1, -1, -1,-1]
    steps = [ x / 640.0 for x in [4, 8, 16, 32,32]]
    num_channels = [256]

    loc_preds, cls_preds, anchor_boxes = multibox_layer(from_layers,\
        num_classes, sizes=sizes, ratios=ratios, normalization=normalizations, \
        num_channels=num_channels, clip=False, interm_layer=0, steps=steps)

    tmp = mx.contrib.symbol.MultiBoxTarget(
        *[anchor_boxes, label, cls_preds], overlap_threshold=.5, \
        ignore_label=-1, negative_mining_ratio=3, minimum_negative_samples=0, \
        negative_mining_thresh=.5, variances=(0.1, 0.1, 0.2, 0.2),
        name="multibox_target")
    loc_target = tmp[0]
    loc_target_mask = tmp[1]
    cls_target = tmp[2]

    cls_prob = mx.symbol.SoftmaxOutput(data=cls_preds, label=cls_target, \
        ignore_label=-1, use_ignore=True, grad_scale=1., multi_output=True, \
        normalization='valid', name="cls_prob")
    loc_loss_ = mx.symbol.smooth_l1(name="loc_loss_", \
        data=loc_target_mask * (loc_preds - loc_target), scalar=1.0)
    loc_loss = mx.symbol.MakeLoss(loc_loss_, grad_scale=1., \
        normalization='valid', name="loc_loss")

    # monitoring training status
    cls_label = mx.symbol.MakeLoss(data=cls_target, grad_scale=0, name="cls_label")
    det = mx.contrib.symbol.MultiBoxDetection(*[cls_prob, loc_preds, anchor_boxes], \
        name="detection", nms_threshold=cfg.NMS_THRESHOLD, force_suppress=cfg.FORCE_SUPPRESS,
        variances=(0.1, 0.1, 0.2, 0.2), nms_topk=cfg.NMS_TOPK)
    det = mx.symbol.MakeLoss(data=det, grad_scale=0, name="det_out")

    # group output
    out = mx.symbol.Group([cls_prob, loc_loss, cls_label, det])
    return out,('expression','data',),('label',)
def get_symbol_train(num_classes=20,
                     nms_thresh=0.5,
                     force_suppress=False,
                     nms_topk=400):
    data = mx.symbol.Variable(name="data")
    label = mx.symbol.Variable(name="label")

    # group 1
    '''conv1_1 = mx.symbol.Convolution(
        data=data, kernel=(3, 3), pad=(1, 1), num_filter=64, name="conv1_1")
    relu1_1 = mx.symbol.Activation(data=conv1_1, act_type="relu", name="relu1_1")
    conv1_2 = mx.symbol.Convolution(
        data=relu1_1, kernel=(3, 3), pad=(1, 1), num_filter=64, name="conv1_2")
    relu1_2 = mx.symbol.Activation(data=conv1_2, act_type="relu", name="relu1_2")
    pool1 = mx.symbol.Pooling(
        data=relu1_2, pool_type="max", kernel=(2, 2), stride=(2, 2), name="pool1")'''
    conv1 = mx.symbol.Convolution(name='conv1',
                                  data=data,
                                  num_filter=32,
                                  pad=(1, 1),
                                  kernel=(3, 3),
                                  stride=(2, 2),
                                  no_bias=True)
    conv1_bn = mx.symbol.BatchNorm(name='conv1_bn',
                                   data=conv1,
                                   use_global_stats=False,
                                   fix_gamma=False,
                                   eps=0.000100)
    conv1_scale = conv1_bn
    relu1 = mx.symbol.Activation(name='relu1',
                                 data=conv1_scale,
                                 act_type='relu')

    # group 2
    '''conv2_1 = mx.symbol.Convolution(
        data=pool1, kernel=(3, 3), pad=(1, 1), num_filter=128, name="conv2_1")
    relu2_1 = mx.symbol.Activation(data=conv2_1, act_type="relu", name="relu2_1")
    conv2_2 = mx.symbol.Convolution(
        data=relu2_1, kernel=(3, 3), pad=(1, 1), num_filter=128, name="conv2_2")
    relu2_2 = mx.symbol.Activation(data=conv2_2, act_type="relu", name="relu2_2")
    pool2 = mx.symbol.Pooling(
        data=relu2_2, pool_type="max", kernel=(2, 2), stride=(2, 2), name="pool2")'''
    conv2_1_dw = mx.symbol.ChannelwiseConvolution(name='conv2_1_dw',
                                                  data=relu1,
                                                  num_filter=32,
                                                  pad=(1, 1),
                                                  kernel=(3, 3),
                                                  stride=(1, 1),
                                                  no_bias=True,
                                                  num_group=32)
    conv2_1_dw_bn = mx.symbol.BatchNorm(name='conv2_1_dw_bn',
                                        data=conv2_1_dw,
                                        use_global_stats=False,
                                        fix_gamma=False,
                                        eps=0.000100)
    conv2_1_dw_scale = conv2_1_dw_bn
    relu2_1_dw = mx.symbol.Activation(name='relu2_1_dw',
                                      data=conv2_1_dw_scale,
                                      act_type='relu')

    conv2_1_sep = mx.symbol.Convolution(name='conv2_1_sep',
                                        data=relu2_1_dw,
                                        num_filter=64,
                                        pad=(0, 0),
                                        kernel=(1, 1),
                                        stride=(1, 1),
                                        no_bias=True)
    conv2_1_sep_bn = mx.symbol.BatchNorm(name='conv2_1_sep_bn',
                                         data=conv2_1_sep,
                                         use_global_stats=False,
                                         fix_gamma=False,
                                         eps=0.000100)
    conv2_1_sep_scale = conv2_1_sep_bn
    relu2_1_sep = mx.symbol.Activation(name='relu2_1_sep',
                                       data=conv2_1_sep_scale,
                                       act_type='relu')

    conv2_2_dw = mx.symbol.ChannelwiseConvolution(name='conv2_2_dw',
                                                  data=relu2_1_sep,
                                                  num_filter=64,
                                                  pad=(1, 1),
                                                  kernel=(3, 3),
                                                  stride=(2, 2),
                                                  no_bias=True,
                                                  num_group=64)
    conv2_2_dw_bn = mx.symbol.BatchNorm(name='conv2_2_dw_bn',
                                        data=conv2_2_dw,
                                        use_global_stats=False,
                                        fix_gamma=False,
                                        eps=0.000100)
    conv2_2_dw_scale = conv2_2_dw_bn
    relu2_2_dw = mx.symbol.Activation(name='relu2_2_dw',
                                      data=conv2_2_dw_scale,
                                      act_type='relu')

    conv2_2_sep = mx.symbol.Convolution(name='conv2_2_sep',
                                        data=relu2_2_dw,
                                        num_filter=128,
                                        pad=(0, 0),
                                        kernel=(1, 1),
                                        stride=(1, 1),
                                        no_bias=True)
    conv2_2_sep_bn = mx.symbol.BatchNorm(name='conv2_2_sep_bn',
                                         data=conv2_2_sep,
                                         use_global_stats=False,
                                         fix_gamma=False,
                                         eps=0.000100)
    conv2_2_sep_scale = conv2_2_sep_bn
    relu2_2_sep = mx.symbol.Activation(name='relu2_2_sep',
                                       data=conv2_2_sep_scale,
                                       act_type='relu')

    # group 3
    '''conv3_1 = mx.symbol.Convolution(
        data=pool2, kernel=(3, 3), pad=(1, 1), num_filter=256, name="conv3_1")
    relu3_1 = mx.symbol.Activation(data=conv3_1, act_type="relu", name="relu3_1")
    conv3_2 = mx.symbol.Convolution(
        data=relu3_1, kernel=(3, 3), pad=(1, 1), num_filter=256, name="conv3_2")
    relu3_2 = mx.symbol.Activation(data=conv3_2, act_type="relu", name="relu3_2")
    conv3_3 = mx.symbol.Convolution(
        data=relu3_2, kernel=(3, 3), pad=(1, 1), num_filter=256, name="conv3_3")
    relu3_3 = mx.symbol.Activation(data=conv3_3, act_type="relu", name="relu3_3")
    pool3 = mx.symbol.Pooling(
        data=relu3_3, pool_type="max", kernel=(2, 2), stride=(2, 2), \
        pooling_convention="full", name="pool3")'''
    conv3_1_dw = mx.symbol.ChannelwiseConvolution(name='conv3_1_dw',
                                                  data=relu2_2_sep,
                                                  num_filter=128,
                                                  pad=(1, 1),
                                                  kernel=(3, 3),
                                                  stride=(1, 1),
                                                  no_bias=True,
                                                  num_group=128)
    conv3_1_dw_bn = mx.symbol.BatchNorm(name='conv3_1_dw_bn',
                                        data=conv3_1_dw,
                                        use_global_stats=False,
                                        fix_gamma=False,
                                        eps=0.000100)
    conv3_1_dw_scale = conv3_1_dw_bn
    relu3_1_dw = mx.symbol.Activation(name='relu3_1_dw',
                                      data=conv3_1_dw_scale,
                                      act_type='relu')

    conv3_1_sep = mx.symbol.Convolution(name='conv3_1_sep',
                                        data=relu3_1_dw,
                                        num_filter=128,
                                        pad=(0, 0),
                                        kernel=(1, 1),
                                        stride=(1, 1),
                                        no_bias=True)
    conv3_1_sep_bn = mx.symbol.BatchNorm(name='conv3_1_sep_bn',
                                         data=conv3_1_sep,
                                         use_global_stats=False,
                                         fix_gamma=False,
                                         eps=0.000100)
    conv3_1_sep_scale = conv3_1_sep_bn
    relu3_1_sep = mx.symbol.Activation(name='relu3_1_sep',
                                       data=conv3_1_sep_scale,
                                       act_type='relu')

    conv3_2_dw = mx.symbol.ChannelwiseConvolution(name='conv3_2_dw',
                                                  data=relu3_1_sep,
                                                  num_filter=128,
                                                  pad=(1, 1),
                                                  kernel=(3, 3),
                                                  stride=(2, 2),
                                                  no_bias=True,
                                                  num_group=128)
    conv3_2_dw_bn = mx.symbol.BatchNorm(name='conv3_2_dw_bn',
                                        data=conv3_2_dw,
                                        use_global_stats=False,
                                        fix_gamma=False,
                                        eps=0.000100)
    conv3_2_dw_scale = conv3_2_dw_bn
    relu3_2_dw = mx.symbol.Activation(name='relu3_2_dw',
                                      data=conv3_2_dw_scale,
                                      act_type='relu')

    conv3_2_sep = mx.symbol.Convolution(name='conv3_2_sep',
                                        data=relu3_2_dw,
                                        num_filter=256,
                                        pad=(0, 0),
                                        kernel=(1, 1),
                                        stride=(1, 1),
                                        no_bias=True)
    conv3_2_sep_bn = mx.symbol.BatchNorm(name='conv3_2_sep_bn',
                                         data=conv3_2_sep,
                                         use_global_stats=False,
                                         fix_gamma=False,
                                         eps=0.000100)
    conv3_2_sep_scale = conv3_2_sep_bn
    relu3_2_sep = mx.symbol.Activation(name='relu3_2_sep',
                                       data=conv3_2_sep_scale,
                                       act_type='relu')

    # group 4
    '''conv4_1 = mx.symbol.Convolution(
        data=pool3, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv4_1")
    relu4_1 = mx.symbol.Activation(data=conv4_1, act_type="relu", name="relu4_1")
    conv4_2 = mx.symbol.Convolution(
        data=relu4_1, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv4_2")
    relu4_2 = mx.symbol.Activation(data=conv4_2, act_type="relu", name="relu4_2")
    conv4_3 = mx.symbol.Convolution(
        data=relu4_2, kernel=(3, 3), pad=(1, 1), num_filter=512, name="conv4_3")
    relu4_3 = mx.symbol.Activation(data=conv4_3, act_type="relu", name="relu4_3")
    pool4 = mx.symbol.Pooling(
        data=relu4_3, pool_type="max", kernel=(2, 2), stride=(2, 2), name="pool4")'''
    conv4_1_dw = mx.symbol.ChannelwiseConvolution(name='conv4_1_dw',
                                                  data=relu3_2_sep,
                                                  num_filter=256,
                                                  pad=(1, 1),
                                                  kernel=(3, 3),
                                                  stride=(1, 1),
                                                  no_bias=True,
                                                  num_group=256)
    conv4_1_dw_bn = mx.symbol.BatchNorm(name='conv4_1_dw_bn',
                                        data=conv4_1_dw,
                                        use_global_stats=False,
                                        fix_gamma=False,
                                        eps=0.000100)
    conv4_1_dw_scale = conv4_1_dw_bn
    relu4_1_dw = mx.symbol.Activation(name='relu4_1_dw',
                                      data=conv4_1_dw_scale,
                                      act_type='relu')

    conv4_1_sep = mx.symbol.Convolution(name='conv4_1_sep',
                                        data=relu4_1_dw,
                                        num_filter=256,
                                        pad=(0, 0),
                                        kernel=(1, 1),
                                        stride=(1, 1),
                                        no_bias=True)
    conv4_1_sep_bn = mx.symbol.BatchNorm(name='conv4_1_sep_bn',
                                         data=conv4_1_sep,
                                         use_global_stats=False,
                                         fix_gamma=False,
                                         eps=0.000100)
    conv4_1_sep_scale = conv4_1_sep_bn
    relu4_1_sep = mx.symbol.Activation(name='relu4_1_sep',
                                       data=conv4_1_sep_scale,
                                       act_type='relu')

    conv4_2_dw = mx.symbol.ChannelwiseConvolution(name='conv4_2_dw',
                                                  data=relu4_1_sep,
                                                  num_filter=256,
                                                  pad=(1, 1),
                                                  kernel=(3, 3),
                                                  stride=(2, 2),
                                                  no_bias=True,
                                                  num_group=256)
    conv4_2_dw_bn = mx.symbol.BatchNorm(name='conv4_2_dw_bn',
                                        data=conv4_2_dw,
                                        use_global_stats=False,
                                        fix_gamma=False,
                                        eps=0.000100)
    conv4_2_dw_scale = conv4_2_dw_bn
    relu4_2_dw = mx.symbol.Activation(name='relu4_2_dw',
                                      data=conv4_2_dw_scale,
                                      act_type='relu')

    conv4_2_sep = mx.symbol.Convolution(name='conv4_2_sep',
                                        data=relu4_2_dw,
                                        num_filter=512,
                                        pad=(0, 0),
                                        kernel=(1, 1),
                                        stride=(1, 1),
                                        no_bias=True)
    conv4_2_sep_bn = mx.symbol.BatchNorm(name='conv4_2_sep_bn',
                                         data=conv4_2_sep,
                                         use_global_stats=False,
                                         fix_gamma=False,
                                         eps=0.000100)
    conv4_2_sep_scale = conv4_2_sep_bn
    relu4_2_sep = mx.symbol.Activation(name='relu4_2_sep',
                                       data=conv4_2_sep_scale,
                                       act_type='relu')

    # group 5
    conv5_1 = mx.symbol.Convolution(data=relu4_2_sep,
                                    kernel=(3, 3),
                                    pad=(1, 1),
                                    num_filter=512,
                                    name="conv5_1")
    relu5_1 = mx.symbol.Activation(data=conv5_1,
                                   act_type="relu",
                                   name="relu5_1")
    conv5_2 = mx.symbol.Convolution(data=relu5_1,
                                    kernel=(3, 3),
                                    pad=(1, 1),
                                    num_filter=512,
                                    name="conv5_2")
    relu5_2 = mx.symbol.Activation(data=conv5_2,
                                   act_type="relu",
                                   name="relu5_2")
    conv5_3 = mx.symbol.Convolution(data=relu5_2,
                                    kernel=(3, 3),
                                    pad=(1, 1),
                                    num_filter=512,
                                    name="conv5_3")
    relu5_3 = mx.symbol.Activation(data=conv5_3,
                                   act_type="relu",
                                   name="relu5_3")
    pool5 = mx.symbol.Pooling(data=relu5_3,
                              pool_type="max",
                              kernel=(3, 3),
                              stride=(1, 1),
                              pad=(1, 1),
                              name="pool5")
    # group 6
    conv6 = mx.symbol.Convolution(data=pool5,
                                  kernel=(3, 3),
                                  pad=(6, 6),
                                  dilate=(6, 6),
                                  num_filter=1024,
                                  name="conv6")
    relu6 = mx.symbol.Activation(data=conv6, act_type="relu", name="relu6")
    # drop6 = mx.symbol.Dropout(data=relu6, p=0.5, name="drop6")
    # group 7
    conv7 = mx.symbol.Convolution(data=relu6,
                                  kernel=(1, 1),
                                  pad=(0, 0),
                                  num_filter=1024,
                                  name="conv7")
    relu7 = mx.symbol.Activation(data=conv7, act_type="relu", name="relu7")
    # drop7 = mx.symbol.Dropout(data=relu7, p=0.5, name="drop7")

    ### ssd extra layers ###
    conv8_2, relu8_2 = conv_act_layer(relu7, "8_2", 512, kernel=(3,3), pad=(1,1), \
        stride=(2,2), act_type="relu", use_batchnorm=False)
    conv9_2, relu9_2 = conv_act_layer(relu8_2, "9_2", 256, kernel=(3,3), pad=(1,1), \
        stride=(2,2), act_type="relu", use_batchnorm=False)
    conv10_2, relu10_2 = conv_act_layer(relu9_2, "10_2", 256, kernel=(3,3), pad=(0,0), \
        stride=(1,1), act_type="relu", use_batchnorm=False)
    conv11_2, relu11_2 = conv_act_layer(relu10_2, "11_2", 256, kernel=(3,3), pad=(0,0), \
        stride=(1,1), act_type="relu", use_batchnorm=False)

    # specific parameters for VGG16 network
    from_layers = [relu4_1_sep, relu7, relu8_2, relu9_2, relu10_2, relu11_2]
    sizes = [[.1, .141], [.2, .272], [.37, .447], [.54, .619], [.71, .79],
             [.88, .961]]
    ratios = [[1,2,.5], [1,2,.5,3,1./3], [1,2,.5,3,1./3], [1,2,.5,3,1./3], \
        [1,2,.5], [1,2,.5]]
    normalizations = [20, -1, -1, -1, -1, -1]
    steps = [x / 300.0 for x in [8, 16, 32, 64, 100, 300]]
    num_channels = [512]

    loc_preds, cls_preds, anchor_boxes = multibox_layer(from_layers, \
        num_classes, sizes=sizes, ratios=ratios, normalization=normalizations, \
        num_channels=num_channels, clip=False, interm_layer=0, steps=steps)

    tmp = mx.contrib.symbol.MultiBoxTarget(
        *[anchor_boxes, label, cls_preds], overlap_threshold=.5, \
        ignore_label=-1, negative_mining_ratio=3, minimum_negative_samples=0, \
        negative_mining_thresh=.5, variances=(0.1, 0.1, 0.2, 0.2),
        name="multibox_target")
    loc_target = tmp[0]
    loc_target_mask = tmp[1]
    cls_target = tmp[2]

    cls_prob = mx.symbol.SoftmaxOutput(data=cls_preds, label=cls_target, \
        ignore_label=-1, use_ignore=True, grad_scale=1., multi_output=True, \
        normalization='valid', name="cls_prob")
    loc_loss_ = mx.symbol.smooth_l1(name="loc_loss_", \
        data=loc_target_mask * (loc_preds - loc_target), scalar=1.0)
    loc_loss = mx.symbol.MakeLoss(loc_loss_, grad_scale=1., \
        normalization='valid', name="loc_loss")

    # monitoring training status
    cls_label = mx.symbol.MakeLoss(data=cls_target,
                                   grad_scale=0,
                                   name="cls_label")
    det = mx.contrib.symbol.MultiBoxDetection(*[cls_prob, loc_preds, anchor_boxes], \
        name="detection", nms_threshold=nms_thresh, force_suppress=force_suppress,
        variances=(0.1, 0.1, 0.2, 0.2), nms_topk=nms_topk)
    det = mx.symbol.MakeLoss(data=det, grad_scale=0, name="det_out")

    # group output
    out = mx.symbol.Group([cls_prob, loc_loss, cls_label, det])
    return out