示例#1
0
def sample_fbnet(theta_path, feature_dim=192, data=None, prefix='fbnet'):
    if len(prefix) > 0:
        prefix += '_'
    if data is None:
        data = mx.symbol.Variable(name="data")
    with open(theta_path, 'r') as f:
        lines = f.readlines()
    tbs_idx = 0

    _f = [16, 16, 24, 32, 64, 112, 184, 352, 1984]
    _n = [1, 1, 4, 4, 4, 4, 4, 1, 1]

    _s = [2, 1, 2, 2, 2, 1, 2, 1, 1]
    _e = [1, 1, 3, 6, 1, 1, 3, 6]
    _kernel = [3, 3, 3, 3, 5, 5, 5, 5]
    _group = [1, 2, 1, 1, 1, 2, 1, 1]
    _tbs = [1, 7]
    _block_size = len(_e) + 1
    for outer_layer_idx in range(len(_f)):
        num_filter = _f[outer_layer_idx]
        num_layers = _n[outer_layer_idx]
        s_size = _s[outer_layer_idx]

        if outer_layer_idx == 0:
            data = mx.sym.Convolution(data=data,
                                      num_filter=num_filter,
                                      kernel=(3, 3),
                                      stride=(s_size, s_size),
                                      pad=(1, 1),
                                      name=prefix + 'conv0')
            data = mx.sym.Activation(data=data,
                                     act_type='relu',
                                     name=prefix + 'relu0')
            input_channels = num_filter
        elif (outer_layer_idx <= _tbs[1]) and (outer_layer_idx >= _tbs[0]):
            for inner_layer_idx in range(num_layers):

                if inner_layer_idx == 0:
                    s_size = s_size
                else:
                    s_size = 1
                # tbs part
                line = lines[tbs_idx]
                theta = [float(tmp) for tmp in line.strip().split(' ')[1:]]
                block_idx = np.argmax(theta)

                if block_idx != _block_size - 1:
                    kernel_size = (_kernel[block_idx], _kernel[block_idx])
                    group = _group[block_idx]
                    prefix_ = "%s_layer_%d_%d_block_%d" % (
                        prefix, outer_layer_idx, inner_layer_idx, block_idx)
                    expansion = _e[block_idx]
                    stride = (s_size, s_size)

                    # data = mx.sym.BatchNorm(data=data, fix_gamma=False, eps=2e-5, momentum=0.9,
                    #                 name="%slayer_%d_%d_bn" % (prefix, outer_layer_idx, inner_layer_idx))

                    block_out = block_factory(data,
                                              input_channels=input_channels,
                                              num_filters=num_filter,
                                              kernel_size=kernel_size,
                                              prefix=prefix_,
                                              expansion=expansion,
                                              group=group,
                                              shuffle=True,
                                              stride=stride,
                                              bn=False)
                    if (input_channels == num_filter) and (s_size == 1):
                        block_out = block_out + data
                    data = block_out
                tbs_idx += 1
                input_channels = num_filter

        elif outer_layer_idx == len(_f) - 1:
            # last 1x1 conv part
            data = mx.sym.BatchNorm(data=data,
                                    fix_gamma=False,
                                    eps=2e-5,
                                    momentum=0.9,
                                    name="%slayer_out_bn" % (prefix))
            data = mx.sym.Activation(data=data,
                                     act_type='relu',
                                     name="%sout_relu0" % prefix)
            data = mx.sym.Convolution(data,
                                      num_filter=num_filter,
                                      stride=(s_size, s_size),
                                      kernel=(3, 3),
                                      name="%slayer_%d_last_conv" %
                                      (prefix, outer_layer_idx))
        else:
            raise ValueError("Wrong layer index %d" % outer_layer_idx)

    # avg pool part
    data = mx.symbol.Pooling(data=data,
                             global_pool=True,
                             kernel=(7, 7),
                             pool_type='avg',
                             name=prefix + "global_pool")

    data = mx.symbol.Flatten(data=data, name=prefix + 'flat_pool')
    data = mx.symbol.FullyConnected(data=data,
                                    num_hidden=feature_dim,
                                    name=prefix + 'flat')
    return data
示例#2
0
文件: FBNet.py 项目: junqiangwu/NAS
    def _build(self):
        """Build symbol.
    """
        self._logger.info("Build symbol")
        data = self._data
        for outer_layer_idx in range(len(self._f)):
            num_filter = self._f[outer_layer_idx]
            num_layers = self._n[outer_layer_idx]
            s_size = self._s[outer_layer_idx]

            if outer_layer_idx == 0:
                data = mx.sym.Convolution(data=data,
                                          num_filter=num_filter,
                                          kernel=(3, 3),
                                          stride=(s_size, s_size),
                                          pad=(1, 1))
                # data = mx.sym.BatchNorm(data=data, fix_gamma=False, eps=2e-5, momentum=0.9)
                data = mx.sym.Activation(data=data, act_type='relu')
                input_channels = num_filter
            elif (outer_layer_idx <= self._tbs[1]) and (outer_layer_idx >=
                                                        self._tbs[0]):
                for inner_layer_idx in range(num_layers):
                    data = mx.sym.BatchNorm(data=data,
                                            fix_gamma=False,
                                            eps=2e-5,
                                            momentum=0.9)
                    if inner_layer_idx == 0:
                        s_size = s_size
                    else:
                        s_size = 1
                    # tbs part
                    block_list = []

                    for block_idx in range(self._block_size - 1):
                        kernel_size = (self._kernel[block_idx],
                                       self._kernel[block_idx])
                        group = self._group[block_idx]
                        prefix = "layer_%d_%d_block_%d" % (
                            outer_layer_idx, inner_layer_idx, block_idx)
                        expansion = self._e[block_idx]
                        stride = (s_size, s_size)

                        block_out = block_factory(
                            data,
                            input_channels=input_channels,
                            num_filters=num_filter,
                            kernel_size=kernel_size,
                            prefix=prefix,
                            expansion=expansion,
                            group=group,
                            shuffle=True,
                            stride=stride,
                            bn=False)
                        # block_out = mx.sym.BatchNorm(data=block_out, fix_gamma=False, eps=2e-5, momentum=0.9)
                        if (input_channels == num_filter) and (s_size == 1):
                            block_out = block_out + data
                        block_out = mx.sym.expand_dims(block_out, axis=1)
                        block_list.append(block_out)
                    # theta parameters, gumbel
                    tmp_name = "layer_%d_%d_%s" % (outer_layer_idx,
                                                   inner_layer_idx,
                                                   self._theta_unique_name)
                    tmp_gumbel_name = "layer_%d_%d_%s" % (
                        outer_layer_idx, inner_layer_idx, "gumbel_random")
                    self._theta_name.append(tmp_name)
                    if inner_layer_idx >= 1:  # skip part
                        theta_var = mx.sym.var(tmp_name,
                                               shape=(self._block_size, ))
                        gumbel_var = mx.sym.var(tmp_gumbel_name,
                                                shape=(self._block_size, ))
                        self._input_shapes[tmp_name] = (self._block_size, )
                        self._input_shapes[tmp_gumbel_name] = (
                            self._block_size, )
                        block_list.append(mx.sym.expand_dims(data, axis=1))
                        self._m_size.append(self._block_size)
                    else:
                        theta_var = mx.sym.var(tmp_name,
                                               shape=(self._block_size - 1, ))
                        gumbel_var = mx.sym.var(tmp_gumbel_name,
                                                shape=(self._block_size - 1, ))
                        self._m_size.append(self._block_size - 1)
                        self._input_shapes[tmp_name] = (self._block_size - 1, )
                        self._input_shapes[tmp_gumbel_name] = (
                            self._block_size - 1, )

                    self._theta_vars.append(theta_var)
                    self._gumbel_vars.append(gumbel_var)
                    self._gumbel_var_names.append(
                        [tmp_gumbel_name, self._m_size[-1]])

                    theta = mx.sym.broadcast_div(
                        mx.sym.elemwise_add(theta_var, gumbel_var),
                        self._temperature)

                    m = mx.sym.repeat(mx.sym.reshape(mx.sym.softmax(theta),
                                                     (1, -1)),
                                      repeats=self._dev_batch_size,
                                      axis=0)
                    self._m.append(m)
                    m = mx.sym.reshape(m, (-2, 1, 1, 1))
                    # TODO why stack wrong
                    data = mx.sym.concat(*block_list,
                                         dim=1,
                                         name="layer_%d_%d_concat" %
                                         (outer_layer_idx, inner_layer_idx))
                    data = mx.sym.broadcast_mul(data, m)
                    data = mx.sym.sum(data, axis=1)
                    input_channels = num_filter

            elif outer_layer_idx == len(self._f) - 1:
                # last 1x1 conv part
                data = mx.sym.BatchNorm(data=data,
                                        fix_gamma=False,
                                        eps=2e-5,
                                        momentum=0.9)
                data = mx.sym.Activation(data=data, act_type='relu')
                data = mx.sym.Convolution(data,
                                          num_filter=num_filter,
                                          stride=(s_size, s_size),
                                          kernel=(3, 3),
                                          name="layer_%d_last_conv" %
                                          outer_layer_idx)
            else:
                raise ValueError("Wrong layer index %d" % outer_layer_idx)

        # avg pool part
        data = mx.symbol.Pooling(data=data,
                                 global_pool=True,
                                 kernel=(7, 7),
                                 pool_type='avg',
                                 name="global_pool")

        data = mx.symbol.Flatten(data=data, name='flat_pool')
        data = mx.symbol.FullyConnected(data=data,
                                        num_hidden=self._feature_dim)
        # fc part
        if self._model_type == 'softmax':
            data = mx.symbol.FullyConnected(name="output_fc",
                                            data=data,
                                            num_hidden=self._output_dim)
        elif self._model_type == 'amsoftmax':
            s = 30.0
            margin = 0.3
            data = mx.symbol.L2Normalization(data, mode='instance',
                                             eps=1e-8) * s
            w = mx.sym.Variable('fc_weight',
                                init=mx.init.Xavier(magnitude=2),
                                shape=(self._output_dim, self._feature_dim),
                                dtype=np.float32)
            norm_w = mx.symbol.L2Normalization(w, mode='instance', eps=1e-8)
            data = mx.symbol.AmSoftmax(data,
                                       weight=norm_w,
                                       num_hidden=self._output_dim,
                                       lower_class_idx=0,
                                       upper_class_idx=self._output_dim,
                                       verbose=False,
                                       margin=margin,
                                       s=s,
                                       label=self._label_index)
        elif self._model_type == 'arcface':
            s = 64.0
            margin = 0.5
            data = mx.symbol.L2Normalization(data, mode='instance',
                                             eps=1e-8) * s
            w = mx.sym.Variable('fc_weight',
                                init=mx.init.Xavier(magnitude=2),
                                shape=(self._output_dim, self._feature_dim),
                                dtype=np.float32)
            norm_w = mx.symbol.L2Normalization(w, mode='instance', eps=1e-8)
            data = mx.symbol.Arcface(data,
                                     weight=norm_w,
                                     num_hidden=self._output_dim,
                                     lower_class_idx=0,
                                     upper_class_idx=self._output_dim,
                                     verbose=False,
                                     margin=margin,
                                     s=s,
                                     label=self._label_index)
        self._output = data
示例#3
0
文件: FBNet.py 项目: junqiangwu/NAS
    def _Sample(self, train_iter, val_iter, theat_filepath):
        self._logger.info("Sample symbol")
        data = self._data
        f_index = 0
        for outer_layer_idx in range(len(self._f)):
            num_filter = self._f[outer_layer_idx]
            num_layers = self._n[outer_layer_idx]
            s_size = self._s[outer_layer_idx]

            if outer_layer_idx == 0:
                data = mx.sym.Convolution(data=data,
                                          num_filter=num_filter,
                                          kernel=(3, 3),
                                          stride=(s_size, s_size),
                                          pad=(1, 1))
                # data = mx.sym.BatchNorm(data=data, fix_gamma=False, eps=2e-5, momentum=0.9)
                data = mx.sym.Activation(data=data, act_type='relu')
                input_channels = num_filter
            elif (outer_layer_idx <= self._tbs[1]) and (outer_layer_idx >=
                                                        self._tbs[0]):

                for inner_layer_idx in range(num_layers):
                    data = mx.sym.BatchNorm(data=data,
                                            fix_gamma=False,
                                            eps=2e-5,
                                            momentum=0.9)
                    if inner_layer_idx == 0:
                        s_size = s_size
                    else:
                        s_size = 1

                    with open(theat_filepath) as f:
                        theta_result = f.readlines()
                    block_idx = np.argmax(
                        theta_result[f_index].strip().split(' ')[1:])

                    if block_idx <= 7:
                        kernel_size = (self._kernel[block_idx],
                                       self._kernel[block_idx])
                        group = self._group[block_idx]
                        prefix = "layer_%d_%d_block_%d" % (
                            outer_layer_idx, inner_layer_idx, block_idx)
                        expansion = self._e[block_idx]
                        stride = (s_size, s_size)
                        data = block_factory(data,
                                             input_channels=input_channels,
                                             num_filters=num_filter,
                                             kernel_size=kernel_size,
                                             prefix=prefix,
                                             expansion=expansion,
                                             group=group,
                                             shuffle=True,
                                             stride=stride,
                                             bn=False)
                    else:  # skip layer
                        data = data
                    input_channels = num_filter
                    f_index += 1

            elif outer_layer_idx == len(self._f) - 1:
                # last 1x1 conv part
                data = mx.sym.BatchNorm(data=data,
                                        fix_gamma=False,
                                        eps=2e-5,
                                        momentum=0.9)
                data = mx.sym.Activation(data=data, act_type='relu')
                data = mx.sym.Convolution(data,
                                          num_filter=num_filter,
                                          stride=(s_size, s_size),
                                          kernel=(3, 3),
                                          name="layer_%d_last_conv" %
                                          outer_layer_idx)
            else:
                raise ValueError("Wrong layer index %d" % outer_layer_idx)

        # avg pool part
        data = mx.symbol.Pooling(data=data,
                                 global_pool=True,
                                 kernel=(7, 7),
                                 pool_type='avg',
                                 name="global_pool")

        data = mx.symbol.Flatten(data=data, name='flat_pool')
        data = mx.symbol.FullyConnected(data=data,
                                        num_hidden=self._feature_dim)
        # fc part
        if self._model_type == 'softmax':
            data = mx.symbol.FullyConnected(name="output_fc",
                                            data=data,
                                            num_hidden=self._output_dim)
        elif self._model_type == 'amsoftmax':
            s = 30.0
            margin = 0.3
            data = mx.symbol.L2Normalization(data, mode='instance',
                                             eps=1e-8) * s
            w = mx.sym.Variable('fc_weight',
                                init=mx.init.Xavier(magnitude=2),
                                shape=(self._output_dim, self._feature_dim),
                                dtype=np.float32)
            norm_w = mx.symbol.L2Normalization(w, mode='instance', eps=1e-8)
            data = mx.symbol.AmSoftmax(data,
                                       weight=norm_w,
                                       num_hidden=self._output_dim,
                                       lower_class_idx=0,
                                       upper_class_idx=self._output_dim,
                                       verbose=False,
                                       margin=margin,
                                       s=s,
                                       label=self._label_index)
        elif self._model_type == 'arcface':
            s = 64.0
            margin = 0.5
            data = mx.symbol.L2Normalization(data, mode='instance',
                                             eps=1e-8) * s
            w = mx.sym.Variable('fc_weight',
                                init=mx.init.Xavier(magnitude=2),
                                shape=(self._output_dim, self._feature_dim),
                                dtype=np.float32)
            norm_w = mx.symbol.L2Normalization(w, mode='instance', eps=1e-8)
            data = mx.symbol.Arcface(data,
                                     weight=norm_w,
                                     num_hidden=self._output_dim,
                                     lower_class_idx=0,
                                     upper_class_idx=self._output_dim,
                                     verbose=False,
                                     margin=margin,
                                     s=s,
                                     label=self._label_index)
        self._output = data
        self._fbnet_model = mx.mod.Module(symbol=self._output,
                                          context=mx.cpu())
        self._fbnet_model.fit(train_iter,
                              eval_data=val_iter,
                              optimizer='sgd',
                              optimizer_params={
                                  'learning_rate': 0.08,
                                  'wd': 0.0005
                              },
                              eval_metric='acc',
                              batch_end_callback=mx.callback.Speedometer(
                                  100, 100),
                              num_epoch=10)

        # Using Test_Iter test acc
        acc = mx.metric.Accuracy()
        self._fbnet_model.score(val_iter, acc)
        print(acc)
示例#4
0
def speed_test(input_shape, s_size, num_filter, ctx=mx.cpu()):
    input_channels = input_shape[0]
    data = mx.sym.var('data')
    block_list = []
    for block_idx in range(8):
        kernel_size = (_kernel[block_idx], _kernel[block_idx])
        group = _group[block_idx]
        expansion = _e[block_idx]
        stride = (s_size, s_size)
        prefix = "block_%d" % block_idx

        block_out = block_factory(data,
                                  input_channels=input_channels,
                                  num_filters=num_filter,
                                  kernel_size=kernel_size,
                                  prefix=prefix,
                                  expansion=expansion,
                                  group=group,
                                  shuffle=True,
                                  stride=stride,
                                  bn=False)
        # block_out = mx.sym.BatchNorm(data=block_out, fix_gamma=False, eps=2e-5, momentum=0.9)
        if (input_channels == num_filter) and (s_size == 1):
            block_out = block_out + data
        block_out = mx.sym.expand_dims(block_out, axis=1)
        block_list.append(block_out)
    block_list.append(data)
    # print(block_list)

    speed_list = []
    for i, sym in enumerate(block_list):

        mod = mx.mod.Module(symbol=sym,
                            context=[ctx],
                            data_names=['data'],
                            label_names=None)
        mod.bind(data_shapes=[['data', (1, ) + input_shape]],
                 for_training=False)
        mod.init_params(initializer=mx.init.Xavier(rnd_type='gaussian',
                                                   factor_type="out",
                                                   magnitude=2),
                        allow_missing=True,
                        allow_extra=True)

        data = mx.nd.random.normal(shape=(1, ) + input_shape, ctx=ctx)
        _dataiter = mx.io.NDArrayIter(data={'data': data}, batch_size=1)
        tmp_data = _dataiter.next()
        mod.forward(tmp_data)
        mod.get_outputs()[0].asnumpy()
        # tic
        start = time.time()
        for _ in range(times):
            mod.forward(tmp_data)
            y = mod.get_outputs()
            y[0].asnumpy()
        # toe
        end = time.time()
        speed = 1.0 * (end - start) / times * 1000
        speed_list.append(speed)
        msg = "Block %d speed %f" % (i, speed)
        print(msg)
    print(' '.join([str(t) for t in speed_list]))