Exemplo n.º 1
0
def check_quantize(sym, data_shape, out_type, name='conv',
                   check_calibration=True, gluon_forward=False):
  sg_pass_name = config[name][SG_PASS_NAME]
  post_sg_pass_name = config[name][POST_SG_PASS_NAME]

  fc = mx.sym.FullyConnected(data=sym, num_hidden=10, flatten=True, name='fc_softmax')
  if gluon_forward == True:
    sym = fc
    sym_sg = sym.get_backend_symbol(sg_pass_name)
    mod = Module(symbol=sym, label_names=[])
    mod.bind(for_training=False,
            data_shapes=[('data', data_shape)])
  else:
    sym = mx.sym.SoftmaxOutput(data=fc, name='softmax')
    sym_sg = sym.get_backend_symbol(sg_pass_name)
    label_shape = (data_shape[0], 10)
    mod = Module(symbol=sym)
    mod.bind(for_training=False,
            data_shapes=[('data', data_shape)],
            label_shapes=[('softmax_label', label_shape)])
  mod.init_params(mx.init.Normal(0.5))
  arg_params, aux_params = mod.get_params()

  data = [mx.random.uniform(-1, 1, shape=shape, ctx=mx.current_context()) for _, shape in mod.data_shapes]
  batch = mx.io.DataBatch(data, [])

  mod.forward(batch, is_train=False)
  for output in mod.get_outputs():
      output.wait_to_read()
  ref_out = mod.get_outputs()

  excluded_sym_names = []
  if mx.current_context() == mx.cpu() and gluon_forward == True:
    excluded_sym_names += ['sg_mkldnn_fully_connected_0']
    excluded_sym_names += ['fc_softmax']

  calib_data = mx.nd.random.uniform(shape=data_shape)
  calib_data = NDArrayIter(data=calib_data)
  calib_data = DummyIter(calib_data)
  calib_layer = lambda name: name.endswith('_output')
  qsym, qarg_params, qaux_params = mx.contrib.quant.quantize_model(sym=sym_sg,
                                                                   arg_params=arg_params,
                                                                   aux_params=aux_params,
                                                                   ctx=mx.current_context(),
                                                                   excluded_sym_names=excluded_sym_names,
                                                                   quantized_dtype=out_type,
                                                                   calib_mode='naive',
                                                                   calib_data=calib_data,
                                                                   calib_layer=calib_layer,
                                                                   num_calib_examples=5)
  qsym = qsym.get_backend_symbol(post_sg_pass_name)
  if check_calibration:
    check_qsym_calibrated(qsym, out_type, name=name)
  if gluon_forward == True:
    check_qsym_gluon_forward(qsym, qarg_params, qaux_params, data_shape)
  else:
    check_qsym_dummy_forward(qsym, batch, data_shape, label_shape)
    quantized_out = check_qsym_forward(qsym, qarg_params, qaux_params, batch, data_shape, label_shape)
    for i in range(len(ref_out)):
      assert_almost_equal(ref_out[i].asnumpy(), quantized_out[i].asnumpy(), atol = 1)
Exemplo n.º 2
0
    def _prepare_module(task_id, symbol, ctx_config, data_names, label_names,
                        resume_config):
        if not resume_config['is_resume'] == '0':
            return Module(symbol=symbol,
                          context=Executor._prepare_ctx(ctx_config),
                          data_names=data_names,
                          label_names=label_names,
                          logger=get_logger('mxnet_logger[tid=%s]' % task_id,
                                            log_to_console=False,
                                            log_to_file=True))
        else:
            ckp = resume_config['ckp']
            prefix = ckp['prefix']
            epoch = ckp['epoch']
            params_path = osp.join(params_root_path,
                                   '%s-%04d.params' % (prefix, epoch))
            # Copyed from MXNet

            # Licensed to the Apache Software Foundation (ASF) under one
            # or more contributor license agreements.  See the NOTICE file
            # distributed with this work for additional information
            # regarding copyright ownership.  The ASF licenses this file
            # to you under the Apache License, Version 2.0 (the
            # "License"); you may not use this file except in compliance
            # with the License.  You may obtain a copy of the License at
            #
            #   http://www.apache.org/licenses/LICENSE-2.0
            #
            # Unless required by applicable law or agreed to in writing,
            # software distributed under the License is distributed on an
            # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
            # KIND, either express or implied.  See the License for the
            # specific language governing permissions and limitations
            # under the License.
            save_dict = nd.load(params_path)
            arg_params = {}
            aux_params = {}
            for k, v in save_dict.items():
                tp, name = k.split(':', 1)
                if tp == 'arg':
                    arg_params[name] = v
                if tp == 'aux':
                    aux_params[name] = v
            mod = Module(symbol=symbol,
                         context=Executor._prepare_ctx(ctx_config),
                         logger=get_logger('mxnet_logger[tid=%s]' % task_id,
                                           log_to_console=False,
                                           log_to_file=True))
            mod._arg_params = arg_params
            mod._aux_params = aux_params
            mod.params_initialized = True
            # TODO: There is a parameter named load_optimizer_states in Module.load
            return mod
Exemplo n.º 3
0
def convert_net(sym, args):
    # setup context
    ctx = mx.cpu(0)

    # weight_map = get_weight_map(args.step_old, args.is_bin_old,
    #                             args.step_new, args.is_bin_new)
    # load params
    arg_params, aux_params = load_param(args.params, ctx=ctx)

    # produce shape max possible
    data_names = ['data', 'im_info']
    label_names = None
    data_shapes = [('data', (1, 3, args.img_long_side, args.img_long_side)),
                   ('im_info', (1, 3))]
    label_shapes = None

    # check shapes
    check_shape(sym, data_shapes, arg_params, aux_params)

    # create and bind module
    mod = Module(sym, data_names, label_names, context=ctx)
    mod.bind(data_shapes, label_shapes, for_training=False)
    mod.init_params(arg_params=arg_params, aux_params=aux_params)

    # forward
    mod.save_checkpoint(args.save_prefix, epoch=0)
Exemplo n.º 4
0
def load_model(sym):
    if system_dict["gpu"]:
        ctx = mx.gpu(int(system_dict["gpu"]))
    else:
        ctx = mx.cpu(0)

    # load params
    arg_params, aux_params = load_param(system_dict["params"], ctx=ctx)

    # produce shape max possible
    data_names = ['data', 'im_info']
    label_names = None
    data_shapes = [('data', (1, 3, system_dict["img_long_side"],
                             system_dict["img_long_side"])),
                   ('im_info', (1, 3))]
    label_shapes = None

    # check shapes
    check_shape(sym, data_shapes, arg_params, aux_params)

    # create and bind module
    mod = Module(sym, data_names, label_names, context=ctx)
    mod.bind(data_shapes, label_shapes, for_training=False)
    mod.init_params(arg_params=arg_params, aux_params=aux_params)

    return mod
Exemplo n.º 5
0
    def check_quantize_model(qdtype):
        def check_params(params, qparams, qsym=None):
            if qsym is None:
                assert len(params) == len(qparams)
                for k, v in params.items():
                    assert k in qparams
                    assert same(v.asnumpy(), qparams[k].asnumpy())
            else:
                qparams_ground_truth = mx.contrib.quant._quantize_params(qsym, params)
                assert len(qparams) == len(qparams_ground_truth)
                for k, v in qparams_ground_truth.items():
                    assert k in qparams
                    assert same(v.asnumpy(), qparams[k].asnumpy())

        def check_qsym_calibrated(qsym):
            attrs = qsym.attr_dict()
            for k, v in attrs.items():
                if k.find('requantize_') != -1:
                    assert 'min_calib_range' in v
                    assert 'max_calib_range' in v

        def check_qsym_qdtype(qsym, qdtype):
            attrs = qsym.attr_dict()
            for k, v in attrs.items():
                if k.find('_quantize') != -1:
                    assert 'out_type' in v
                    assert v['out_type'] == qdtype

        sym = get_fp32_sym()
        mod = Module(symbol=sym)
        batch_size = 4
        data_shape = (batch_size, 4, 10, 10)
        label_shape = (batch_size, 10)
        mod.bind(data_shapes=[('data', data_shape)], label_shapes=[('softmax_label', label_shape)])
        mod.init_params()
        arg_params, aux_params = mod.get_params()
        qsym, qarg_params, qaux_params = mx.contrib.quant.quantize_model(sym=sym,
                                                                         arg_params=arg_params,
                                                                         aux_params=aux_params,
                                                                         ctx=mx.current_context(),
                                                                         quantized_dtype=qdtype,
                                                                         calib_mode='none')
        check_params(arg_params, qarg_params, qsym)
        check_params(aux_params, qaux_params)

        calib_data = mx.nd.random.uniform(shape=data_shape)
        calib_data = NDArrayIter(data=calib_data)
        calib_data = DummyIter(calib_data)
        qsym, qarg_params, qaux_params = mx.contrib.quant.quantize_model(sym=sym,
                                                                         arg_params=arg_params,
                                                                         aux_params=aux_params,
                                                                         ctx=mx.current_context(),
                                                                         quantized_dtype=qdtype,
                                                                         calib_mode='naive',
                                                                         calib_data=calib_data,
                                                                         num_calib_examples=20)
        check_params(arg_params, qarg_params, qsym)
        check_params(aux_params, qaux_params)
        check_qsym_calibrated(qsym)
        check_qsym_qdtype(qsym, qdtype)
Exemplo n.º 6
0
    def __init__(self,
                 symbol,
                 data_names,
                 label_names,
                 data_shapes,
                 label_shapes,
                 logger=logging,
                 context=mx.cpu(),
                 work_load_list=None,
                 fixed_param_names=None):
        self.symbol = symbol
        self.data_names = data_names
        self.label_names = label_names
        self.data_shapes = data_shapes
        self.label_shapes = label_shapes
        self.context = context
        self.work_load_list = work_load_list
        self.fixed_param_names = fixed_param_names

        if logger is None:
            logger = logging.getLogger()
            logger.setLevel(logging.INFO)
        self.logger = logger
        self.module = Module(symbol=self.symbol,
                             data_names=self.data_names,
                             label_names=self.label_names,
                             logger=self.logger,
                             context=self.context,
                             work_load_list=self.work_load_list,
                             fixed_param_names=self.fixed_param_names)
Exemplo n.º 7
0
def test_weight_async_reorder():
    data = mx.sym.Variable("data")
    w1 = mx.sym.Variable("1_weight")
    w2 = mx.sym.Variable("2_weight")
    conv1 = mx.sym.Convolution(data=data,
                               weight=w1 + w1,
                               num_filter=32,
                               no_bias=True,
                               kernel=(3, 3))
    conv2 = mx.sym.Convolution(data=conv1,
                               weight=w2 + w2,
                               num_filter=32,
                               no_bias=True,
                               kernel=(1, 1))
    mod = Module(symbol=conv2, label_names=None, context=mx.current_context())
    mod.bind(for_training=False, data_shapes=[('data', (10, 16, 50, 50))])
    mod.init_params(initializer=mx.init.Xavier(magnitude=2.))
    data = [
        mx.random.uniform(-1.0,
                          1.0,
                          shape=(10, 16, 50, 50),
                          ctx=mx.current_context())
    ]
    batch = mx.io.DataBatch(data, [])
    for i in range(2):
        mod.forward(batch, is_train=False)
        for output in mod.get_outputs():
            output.wait_to_read()
Exemplo n.º 8
0
  def check_quantize_whole_model(out_type):
    batch_size = 4
    data_shape = (batch_size, 4, 10, 10)
    data = mx.sym.Variable('data')
    conv0 = mx.sym.Convolution(data, kernel=(1, 1), num_filter=16, name='conv0')
    sym = mx.sym.Convolution(conv0, kernel=(1, 1), num_filter=16, name='conv1')
    sym_sg = sym.get_backend_symbol('MKLDNN_QUANTIZE')
    mod = Module(symbol=sym, label_names=None)
    mod.bind(for_training=False,
             data_shapes=[('data', data_shape)])

    mod.init_params(mx.init.Normal(0.5))
    arg_params, aux_params = mod.get_params()

    excluded_sym_names = []

    calib_data = mx.nd.random.uniform(shape=data_shape)
    calib_data = mx.io.NDArrayIter(data=calib_data)
    calib_data = DummyIter(calib_data)
    calib_layer = lambda name: name.endswith('_output')
    qsym, qarg_params, qaux_params = mx.contrib.quant.quantize_model(sym=sym_sg,
                                                                     arg_params=arg_params,
                                                                     aux_params=aux_params,
                                                                     ctx=mx.current_context(),
                                                                     excluded_sym_names=excluded_sym_names,
                                                                     quantized_dtype=out_type,
                                                                     calib_mode='naive',
                                                                     calib_data=calib_data,
                                                                     calib_layer=calib_layer,
                                                                     label_names=None,
                                                                     num_calib_examples=1)
    qsym = qsym.get_backend_symbol('MKLDNN_QUANTIZE')
    check_qsym_forward(qsym, qarg_params, qaux_params, data_shape)
Exemplo n.º 9
0
def load_model(sym):
    '''
    User function: Loads the trained model weights 

    Args:
        sym (mxnet model): Mxnet model returned from set_network() function

    Returns:
        mxnet model: Model with trained weights
    '''
    if system_dict["gpu"]:
        ctx = mx.gpu(int(system_dict["gpu"]))
    else:
        ctx = mx.cpu(0)

    # load params
    arg_params, aux_params = load_param(system_dict["params"], ctx=ctx)

    # produce shape max possible
    data_names = ['data', 'im_info']
    label_names = None
    data_shapes = [('data', (1, 3, system_dict["img_long_side"],
                             system_dict["img_long_side"])),
                   ('im_info', (1, 3))]
    label_shapes = None

    # check shapes
    check_shape(sym, data_shapes, arg_params, aux_params)

    # create and bind module
    mod = Module(sym, data_names, label_names, context=ctx)
    mod.bind(data_shapes, label_shapes, for_training=False)
    mod.init_params(arg_params=arg_params, aux_params=aux_params)

    return mod
Exemplo n.º 10
0
def demo_net(sym, class_names, args):
    # print config
    print('called with args\n{}'.format(pprint.pformat(vars(args))))

    # setup context
    if args.gpu:
        ctx = mx.gpu(int(args.gpu))
    else:
        ctx = mx.cpu(0)

    # load single test
    im_tensor, im_info, im_orig = load_test(args.image, short=args.img_short_side, max_size=args.img_long_side,
                                            mean=args.img_pixel_means, std=args.img_pixel_stds)

    # generate data batch
    data_batch = generate_batch(im_tensor, im_info)

    # load params
    arg_params, aux_params = load_param(args.params, ctx=ctx)

    # produce shape max possible
    data_names = ['data', 'im_info']
    label_names = None
    data_shapes = [('data', (1, 3, args.img_long_side, args.img_long_side)), ('im_info', (1, 3))]
    label_shapes = None

    # check shapes
    check_shape(sym, data_shapes, arg_params, aux_params)

    # create and bind module
    mod = Module(sym, data_names, label_names, context=ctx)
    mod.bind(data_shapes, label_shapes, for_training=False)
    mod.init_params(arg_params=arg_params, aux_params=aux_params)

    # forward
    forward_starts = time.time()
    mod.forward(data_batch)
    rois, scores, bbox_deltas = mod.get_outputs()
    rois.wait_to_read()
    rois = rois[:, 1:]
    scores = scores[0]
    bbox_deltas = bbox_deltas[0]
    forward_costs = time.time() - forward_starts
    print("forward costs %.4f" % (forward_costs))

    im_info = im_info[0]
    # decode detection
    det = im_detect(rois, scores, bbox_deltas, im_info,
                    bbox_stds=args.rcnn_bbox_stds, nms_thresh=args.rcnn_nms_thresh,
                    conf_thresh=args.rcnn_conf_thresh)

    # print out
    for [cls, conf, x1, y1, x2, y2] in det:
        if cls > 0 and conf > args.vis_thresh:
            print(class_names[int(cls)], conf, [x1, y1, x2, y2])

    # if vis
    if args.vis:
        vis_detection(im_orig, det, class_names, thresh=args.vis_thresh, prefix=args.image)
Exemplo n.º 11
0
def check_qsym_dummy_forward(qsym, batch, data_shape):
    mod = Module(symbol=qsym, label_names=None, context=mx.current_context())
    mod.bind(for_training=False, data_shapes=[('data', data_shape)])
    mod.init_params(initializer=mx.init.Xavier(magnitude=2.))
    mod.forward(batch, is_train=False)
    for output in mod.get_outputs():
        output.wait_to_read()
    return mod.get_outputs()
Exemplo n.º 12
0
def check_qsym_forward(qsym, qarg_params, qaux_params, batch, data_shape):
    mod = Module(symbol=qsym, label_names=None, context=mx.current_context())
    mod.bind(for_training=False, data_shapes=[('data', data_shape)])
    mod.set_params(qarg_params, qaux_params)
    mod.forward(batch, is_train=False)
    for output in mod.get_outputs():
        output.wait_to_read()
    return mod.get_outputs()
Exemplo n.º 13
0
def check_quantize(sym, data_shape, out_type, name='conv',
                   check_calibration=True, gluon_forward=False, check_scale_align=False):
  if name in config:
    name = config[name][OP_NAME]
  sym_sg = sym.get_backend_symbol(QUANTIZE_SG_PASS_NAME)
  mod = Module(symbol=sym, label_names=None)
  mod.bind(for_training=False,
            data_shapes=[('data', data_shape)])
  mod.init_params(mx.init.Normal(0.5))
  arg_params, aux_params = mod.get_params()

  if out_type == 'uint8':
    data = [mx.random.uniform(0.0, 1.0, shape=shape, ctx=mx.current_context()) for _, shape in mod.data_shapes]
  else:
    data = [mx.random.uniform(-1.0, 1.0, shape=shape, ctx=mx.current_context()) for _, shape in mod.data_shapes]
  batch = mx.io.DataBatch(data, [])

  mod.forward(batch, is_train=False)
  for output in mod.get_outputs():
      output.wait_to_read()
  ref_out = mod.get_outputs()

  excluded_sym_names = []
  excluded_op_names = []
  if mx.current_context() == mx.cpu() and gluon_forward == True:
    excluded_op_names += ['_sg_mkldnn_fully_connected']

  calib_data = CalibIter(batch, data_shape, 1)

  qsym, qarg_params, qaux_params = mx.contrib.quant.quantize_model(sym=sym_sg,
                                                                   arg_params=arg_params,
                                                                   aux_params=aux_params,
                                                                   ctx=mx.current_context(),
                                                                   excluded_sym_names=excluded_sym_names,
                                                                   excluded_op_names=excluded_op_names,
                                                                   quantized_dtype=out_type,
                                                                   calib_mode='naive',
                                                                   calib_data=calib_data,
                                                                   calib_layer=None,
                                                                   label_names=None,
                                                                   num_calib_examples=1)
  qsym = qsym.get_backend_symbol(QUANTIZE_SG_PASS_NAME)
  if check_calibration:
    check_qsym_calibrated(qsym, out_type, name=name)
  if check_scale_align:
    check_qsym_scale_align(qsym)
  if gluon_forward == True:
    check_qsym_gluon_forward(qsym, qarg_params, qaux_params, data_shape)
  else:
    quantized_out = check_qsym_forward(qsym, qarg_params, qaux_params, batch, data_shape)
    for i in range(len(ref_out)):
      min_range = mx.nd.min(ref_out[i]).asscalar()
      max_range = mx.nd.max(ref_out[i]).asscalar()
      atol = 0.1 * max(abs(min_range), abs(max_range))
      assert_almost_equal_with_err(quantized_out[i].asnumpy(), ref_out[i].asnumpy(), rtol=0.1, atol=atol, etol=0.2)
    check_qsym_dummy_forward(qsym, batch, data_shape)
Exemplo n.º 14
0
def check_quantize(sym, data_shape, check_conv=True):
    fc = mx.sym.FullyConnected(data=sym,
                               num_hidden=10,
                               flatten=True,
                               name='fc')
    sym = mx.sym.SoftmaxOutput(data=fc, name='softmax')
    sym_sg = sym.get_backend_symbol("MKLDNN")
    label_shape = (data_shape[0], 10)
    mod = Module(symbol=sym)
    mod.bind(for_training=False,
             data_shapes=[('data', data_shape)],
             label_shapes=[('softmax_label', label_shape)])
    mod.init_params(mx.init.Normal(0.5))
    arg_params, aux_params = mod.get_params()

    data = [
        mx.random.uniform(-1, 1, shape=shape, ctx=mx.current_context())
        for _, shape in mod.data_shapes
    ]
    batch = mx.io.DataBatch(data, [])

    mod.forward(batch, is_train=False)
    for output in mod.get_outputs():
        output.wait_to_read()
    ref_out = mod.get_outputs()

    excluded_sym_names = []
    if mx.current_context() == mx.cpu():
        excluded_sym_names += ['fc']

    calib_data = mx.nd.random.uniform(shape=data_shape)
    calib_data = NDArrayIter(data=calib_data)
    calib_data = DummyIter(calib_data)
    calib_layer = lambda name: name.endswith('_output')
    qsym, qarg_params, qaux_params = mx.contrib.quant.quantize_model(
        sym=sym_sg,
        arg_params=arg_params,
        aux_params=aux_params,
        ctx=mx.current_context(),
        excluded_sym_names=excluded_sym_names,
        quantized_dtype='uint8',
        calib_mode='naive',
        calib_data=calib_data,
        calib_layer=calib_layer,
        calib_quantize_op=True,
        num_calib_examples=5)
    qsym = qsym.get_backend_symbol("MKLDNN_POST_QUANTIZE")
    if check_conv:
        check_qsym_calibrated(qsym)
    quantized_out = check_qsym_forward(qsym, qarg_params, qaux_params, batch,
                                       data_shape, label_shape)
    for i in range(len(ref_out)):
        assert_almost_equal(ref_out[i].asnumpy(),
                            quantized_out[i].asnumpy(),
                            atol=1)
    check_qsym_dummy_forward(qsym, batch, data_shape, label_shape)
Exemplo n.º 15
0
 def check_qsym_forward(qsym, qarg_params, qaux_params, data_shape):
   mod = Module(symbol=qsym, label_names=None, context=mx.current_context())
   mod.bind(for_training=False,
            data_shapes=[('data', data_shape)])
   mod.set_params(qarg_params, qaux_params)
   data = [mx.random.uniform(-1.0, 1.0, shape=shape) for _, shape in mod.data_shapes]
   batch = mx.io.DataBatch(data, [])
   mod.forward(batch, is_train=False)
   for output in mod.get_outputs():
       output.wait_to_read()
Exemplo n.º 16
0
 def __init__(self,
              symbol,
              data_names,
              label_names,
              context=mx.cpu(),
              max_data_shapes=None,
              provide_data=None,
              provide_label=None,
              arg_params=None,
              aux_params=None):
     #self._mod = MutableModule(symbol, data_names, label_names,
     #                          context=context, max_data_shapes=max_data_shapes)
     self._mod = Module(symbol, data_names, label_names, context=context)
     self._mod.bind(provide_data, provide_label, for_training=False)
     self._mod.init_params(arg_params=arg_params, aux_params=aux_params)
Exemplo n.º 17
0
    def load_check_point(sym_json_path, params_path, ctx_config_tuple,
                         task_id):
        ctx_config = list(ctx_config_tuple)
        # Copyed from MXNet

        # Licensed to the Apache Software Foundation (ASF) under one
        # or more contributor license agreements.  See the NOTICE file
        # distributed with this work for additional information
        # regarding copyright ownership.  The ASF licenses this file
        # to you under the Apache License, Version 2.0 (the
        # "License"); you may not use this file except in compliance
        # with the License.  You may obtain a copy of the License at
        #
        #   http://www.apache.org/licenses/LICENSE-2.0
        #
        # Unless required by applicable law or agreed to in writing,
        # software distributed under the License is distributed on an
        # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
        # KIND, either express or implied.  See the License for the
        # specific language governing permissions and limitations
        # under the License.
        if not isinstance(sym_json_path, sym.Symbol):
            symbol = sym.load(sym_json_path)
        else:
            # If sym_json_path is already an instance of mxnet.sym.Symbol
            symbol = sym_json_path
        save_dict = nd.load(params_path)
        arg_params = {}
        aux_params = {}
        for k, v in save_dict.items():
            tp, name = k.split(':', 1)
            if tp == 'arg':
                arg_params[name] = v
            if tp == 'aux':
                aux_params[name] = v
        mod = Module(symbol=symbol,
                     context=generate_ctx(ctx_config),
                     logger=get_logger('mxnet_logger[tid=%s]' % task_id,
                                       log_to_console=False,
                                       log_to_file=True))
        mod._arg_params = arg_params
        mod._aux_params = aux_params
        mod.params_initialized = True
        # TODO: There is a parameter named load_optimizer_states in Module.load
        return mod
Exemplo n.º 18
0
 def __init__(self, config):
     # size = config.SCALE.lower()
     # if size == "small":
     #     scale = [576, 1024]
     # elif size == "middle":
     #     scale = [864, 1536]
     # elif size == "big":
     #     scale = [1152, 2048]    
     sym = mx.sym.load(config.SYMBOL_PATH)
     
     self.nms = py_nms_wrapper(0.3)
     self.scale = config.SCALE
     self.mod = Module(sym, ['data', 'im_info'], [], context=[mx.gpu(config.GPU_ID)])
     self.thresh = config.THRESH
     self.rebind = not config.FIXSIZE
     self.model_path = config.MODEL_PATH
     self.font = config.FONT_PATH
     self.preprocess = False
Exemplo n.º 19
0
    def __init__(
            self,
            symbol,
            data_names,
            label_names,
            data_shapes,
            label_shapes,
            logger=logging,
            context=mx.cpu(),
            work_load_list=None,
            fixed_param_names=None,
            allow_missing=False,
            # for evaluate fold bn to create eval symbol
            config=None):
        self.symbol = symbol
        self.data_names = data_names
        self.label_names = label_names
        self.data_shapes = data_shapes
        self.label_shapes = label_shapes
        self.context = context
        self.work_load_list = work_load_list
        self.fixed_param_names = fixed_param_names

        if logger is None:
            logger = logging.getLogger()
            logger.setLevel(logging.INFO)
        self.logger = logger
        self.module = Module(symbol=self.symbol,
                             data_names=self.data_names,
                             label_names=self.label_names,
                             logger=self.logger,
                             context=self.context,
                             work_load_list=self.work_load_list,
                             fixed_param_names=self.fixed_param_names)
        # for fold bn
        self.config = config
Exemplo n.º 20
0
def quantize_model(sym,
                   arg_params,
                   aux_params,
                   data_names=('data', ),
                   label_names=('softmax_label', ),
                   ctx=cpu(),
                   excluded_sym_names=None,
                   calib_mode='entropy',
                   calib_data=None,
                   num_calib_examples=None,
                   calib_layer=None,
                   quantized_dtype='int8',
                   logger=logging):
    """User-level API for generating a quantized model from a FP32 model w/ or w/o calibration.
    The backend quantized operators are only enabled for Linux systems. Please do not run
    inference using the quantized models on Windows for now.
    The quantization implementation adopts the TensorFlow's approach:
    https://www.tensorflow.org/performance/quantization.
    The calibration implementation borrows the idea of Nvidia's 8-bit Inference with TensorRT:
    http://on-demand.gputechconf.com/gtc/2017/presentation/s7310-8-bit-inference-with-tensorrt.pdf
    and adapts the method to MXNet.
    
    Parameters
    ----------
    sym : str or Symbol
        Defines the structure of a neural network for FP32 data types.
    arg_params : dict
        Dictionary of name to `NDArray`.
    aux_params : dict
        Dictionary of name to `NDArray`.
    data_names : a list of strs
        Data names required for creating a Module object to run forward propagation on the
        calibration dataset.
    label_names : a list of strs
        Label names required for creating a Module object to run forward propagation on the
        calibration dataset.
    ctx : Context
        Defines the device that users want to run forward propagation on the calibration
        dataset for collecting layer output statistics. Currently, only supports single context.
    excluded_sym_names : list of strings
        A list of strings representing the names of the symbols that users want to excluding
        from being quantized.
    calib_mode : str
        If calib_mode='none', no calibration will be used and the thresholds for
        requantization after the corresponding layers will be calculated at runtime by
        calling min and max operators. The quantized models generated in this
        mode are normally 10-20% slower than those with calibrations during inference.
        If calib_mode='naive', the min and max values of the layer outputs from a calibration
        dataset will be directly taken as the thresholds for quantization.
        If calib_mode='entropy' (default mode), the thresholds for quantization will be
        derived such that the KL divergence between the distributions of FP32 layer outputs and
        quantized layer outputs is minimized based upon the calibration dataset.
    calib_data : DataIter
        A data iterator initialized by the calibration dataset.
    num_calib_examples : int or None
        The maximum number of examples that user would like to use for calibration. If not provided,
        the whole calibration dataset will be used.
    calib_layer : function
        Given a layer's output name in string, return True or False for deciding whether to
        calibrate this layer. If yes, the statistics of the layer's output will be collected;
        otherwise, no information of the layer's output will be collected. If not provided,
        all the layers' outputs that need requantization will be collected.
    quantized_dtype : str
        The quantized destination type for input data. Currently support 'int8'
        , 'uint8' and 'auto'. 'auto' means automatically select output type according to calibration result.
        Default value is 'int8'.
    logger : Object
        A logging object for printing information during the process of quantization.
        
    Returns
    -------
    tuple
        A tuple of quantized symbol, quantized arg_params, and aux_params.
    -------
    """
    if excluded_sym_names is None:
        excluded_sym_names = []
    if not isinstance(excluded_sym_names, list):
        raise ValueError(
            'excluded_sym_names must be a list of strings representing'
            ' the names of the symbols that will not be quantized,'
            ' while received type %s' % str(type(excluded_sym_names)))

    logger.info('Quantizing symbol')
    if quantized_dtype not in ('int8', 'uint8', 'auto'):
        raise ValueError('unknown quantized_dtype %s received,'
                         ' expected `int8`, `uint8` or `auto`' %
                         quantized_dtype)
    qsym = _quantize_symbol(sym,
                            excluded_symbols=excluded_sym_names,
                            offline_params=list(arg_params.keys()),
                            quantized_dtype=quantized_dtype)

    th_dict = {}
    if calib_mode is not None and calib_mode != 'none':
        if not isinstance(ctx, Context):
            raise ValueError(
                'currently only supports single ctx, while received %s' %
                str(ctx))
        if calib_data is None:
            raise ValueError('calib_data must be provided when calib_mode=%s' %
                             calib_mode)
        if not isinstance(calib_data, DataIter):
            raise ValueError(
                'calib_data must be of DataIter type when calib_mode=%s,'
                ' while received type %s' %
                (calib_mode, str(type(calib_data))))

        mod = Module(symbol=sym,
                     data_names=data_names,
                     label_names=label_names,
                     context=ctx)
        if len(calib_data.provide_label) > 0:
            mod.bind(for_training=False,
                     data_shapes=calib_data.provide_data,
                     label_shapes=calib_data.provide_label)
        else:
            mod.bind(for_training=False, data_shapes=calib_data.provide_data)
        mod.set_params(arg_params, aux_params)
        if calib_mode == 'entropy':
            nd_dict, num_examples = _collect_layer_outputs(
                mod,
                calib_data,
                include_layer=calib_layer,
                max_num_examples=num_calib_examples,
                logger=logger)
            logger.info(
                'Collected layer outputs from FP32 model using %d examples' %
                num_examples)
            logger.info('Calculating optimal thresholds for quantization')
            th_dict = _get_optimal_thresholds(nd_dict,
                                              quantized_dtype,
                                              logger=logger)
        elif calib_mode == 'naive':
            th_dict, num_examples = _collect_layer_output_min_max(
                mod,
                calib_data,
                include_layer=calib_layer,
                max_num_examples=num_calib_examples,
                logger=logger)
            logger.info(
                'Collected layer output min/max values from FP32 model using %d examples'
                % num_examples)
        else:
            raise ValueError('unknown calibration mode %s received,'
                             ' expected `none`, `naive`, or `entropy`' %
                             calib_mode)
        logger.info('Calibrating quantized symbol')
        qsym = _calibrate_quantized_sym(qsym, th_dict)

    logger.info('Quantizing parameters')
    qarg_params = _quantize_params(qsym, arg_params, th_dict)

    return qsym, qarg_params, aux_params
Exemplo n.º 21
0
    def check_quantize_model(qdtype):
        if is_test_for_native_cpu():
            print(
                'skipped testing test_quantize_model_with_forward for native cpu since it is not supported yet'
            )
            return
        elif qdtype == 'int8' and is_test_for_mkldnn():
            print(
                'skipped testing test_quantize_model_with_forward for mkldnn cpu int8 since it is not supported yet'
            )
            return
        elif qdtype == 'uint8' and is_test_for_gpu():
            print(
                'skipped testing test_quantize_model_with_forward for gpu uint8 since it is not supported yet'
            )
            return

        def check_params(params, qparams, qsym=None):
            if qsym is None:
                assert len(params) == len(qparams)
                for k, v in params.items():
                    assert k in qparams
                    assert same(v.asnumpy(), qparams[k].asnumpy())
            else:
                qparams_ground_truth = mx.contrib.quant._quantize_params(
                    qsym, params, th_dict={})
                assert len(qparams) == len(qparams_ground_truth)
                for k, v in qparams_ground_truth.items():
                    assert k in qparams
                    assert same(v.asnumpy(), qparams[k].asnumpy())

        def check_qsym_calibrated(qsym):
            attrs = qsym.attr_dict()
            for k, v in attrs.items():
                if k.find('requantize_') != -1:
                    assert 'min_calib_range' in v
                    assert 'max_calib_range' in v

        def check_qsym_qdtype(qsym, qdtype):
            attrs = qsym.attr_dict()
            for k, v in attrs.items():
                if k.find('_quantize') != -1:
                    assert 'out_type' in v
                    assert v['out_type'] == qdtype

        def check_qsym_forward(qsym, qarg_params, qaux_params, data_shape):
            mod = mx.mod.Module(symbol=qsym,
                                label_names=None,
                                context=mx.current_context())
            mod.bind(for_training=False, data_shapes=[('data', data_shape)])
            mod.set_params(qarg_params, qaux_params)
            data = [
                mx.random.uniform(-1.0, 1.0, shape=shape)
                for _, shape in mod.data_shapes
            ]
            batch = mx.io.DataBatch(data, [])
            mod.forward(batch, is_train=False)
            for output in mod.get_outputs():
                output.wait_to_read()

        batch_size = 4
        dshape = (batch_size, 4, 10, 10)
        data = mx.sym.Variable('data')
        sym = mx.sym.Convolution(data,
                                 kernel=(1, 1),
                                 num_filter=16,
                                 name='conv0')

        mod = Module(symbol=sym, label_names=None)
        mod.bind(data_shapes=[('data', dshape)])

        mod.init_params()
        arg_params, aux_params = mod.get_params()
        excluded_sym_names = []

        qsym, qarg_params, qaux_params = mx.contrib.quant.quantize_model(
            sym=sym,
            arg_params=arg_params,
            aux_params=aux_params,
            excluded_sym_names=excluded_sym_names,
            ctx=mx.current_context(),
            quantized_dtype=qdtype,
            calib_mode='none')
        check_params(arg_params, qarg_params, qsym)
        check_params(aux_params, qaux_params)
        check_qsym_forward(qsym, qarg_params, qaux_params, dshape)

        calib_data = mx.nd.random.uniform(shape=dshape)
        calib_data = NDArrayIter(data=calib_data, batch_size=batch_size)
        calib_data = DummyIter(calib_data)
        qsym, qarg_params, qaux_params = mx.contrib.quant.quantize_model(
            sym=sym,
            arg_params=arg_params,
            aux_params=aux_params,
            excluded_sym_names=excluded_sym_names,
            ctx=mx.current_context(),
            quantized_dtype=qdtype,
            calib_mode='naive',
            calib_data=calib_data,
            num_calib_examples=20)
        check_params(arg_params, qarg_params, qsym)
        check_params(aux_params, qaux_params)
        check_qsym_calibrated(qsym)
        check_qsym_qdtype(qsym, qdtype)
        check_qsym_forward(qsym, qarg_params, qaux_params, dshape)

def dummy_data(ctx, batch_size=1):
    return [
        mx.nd.random.uniform(shape=shape, ctx=ctx)
        for shape in ([batch_size, 3, 600, 600], [batch_size])
    ]


data_names = ['data']
label_names = None
data_shapes = [('data', (1, 3, 1000, 600))]
label_shapes = None

data = mx.symbol.Variable(name="data")
GLUON_LAYER = VGGConvBlock(isBin=True, step=4)
GLUON_LAYER.hybridize()
conv_feat = GLUON_LAYER(data)

arg_params, aux_params = load_param(
    "/home/skutukov/work/mxnet_fasterrcnn_binary/convert/temp-0000.params",
    ctx=mx.cpu())
check_shape(conv_feat, data_shapes, arg_params, aux_params)

mod = Module(conv_feat, data_names, label_names, context=mx.cpu())
mod.bind(data_shapes, label_shapes, for_training=False)
mod.init_params(arg_params=arg_params, aux_params=aux_params)

data1, _ = dummy_data(ctx=mx.cpu())
# mod.forward(data1)
mod.save_checkpoint('test_vgg', epoch=0)
Exemplo n.º 23
0
def demo_net(sym, class_names, args):
    # print config
    print('called with args\n{}'.format(pprint.pformat(vars(args))))

    # setup context
    if args.gpu:
        ctx = mx.gpu(int(args.gpu))
    else:
        ctx = mx.cpu(0)

    # load single test
    im_tensor, im_info, im_orig = load_test(args.image, short=args.img_short_side, max_size=args.img_long_side,
                                            mean=args.img_pixel_means, std=args.img_pixel_stds)


    # generate data batch
    data_batch = generate_batch(im_tensor, im_info)

    # load params
    arg_params, aux_params = load_param(args.params, ctx=ctx)

    # produce shape max possible
    data_names = ['data', 'im_info']
    label_names = None
    data_shapes = [('data', (1, 3, args.img_long_side, args.img_long_side)), ('im_info', (1, 3))]
    label_shapes = None

    # check shapes
    check_shape(sym, data_shapes, arg_params, aux_params)

    # create and bind module
    mod = Module(sym, data_names, label_names, context=ctx)
    mod.bind(data_shapes, label_shapes, for_training=False)
    mod.init_params(arg_params=arg_params, aux_params=aux_params)

    # forward
    start=time.time()
    mod.forward(data_batch)
    rois, scores, bbox_deltas = mod.get_outputs()
    print("time=", time.time() - start)

    #rois = rois.asnumpy()


    rois = rois[:, 1:]
    #print('rois=',rois)
    scores = scores[0]
    bbox_deltas = bbox_deltas[0]
    #print("BBox_deltas.shape=",bbox_deltas.shape)
    #print("BBOX_deltas=",bbox_deltas)
    im_info = im_info[0]

    # decode detection

    det = im_detect(rois, scores, bbox_deltas, im_info,
                    bbox_stds=args.rcnn_bbox_stds, nms_thresh=args.rcnn_nms_thresh,
                    conf_thresh=args.rcnn_conf_thresh)

    # print out
    for [cls, conf, x_c,y_c,w,h,theta] in det:
        if cls > 0 and conf > args.vis_thresh:
            print('class_name=',class_names[int(cls)], 'conf=',conf, [x_c

                , y_c, w,h,theta])

    if True:

        draw_rotate_box_cv(det,class_names,0.95)
Exemplo n.º 24
0
def test_net(sym, imdb, args, config):
    logger.addHandler(
        logging.FileHandler("{0}/{1}".format(args.prefix, 'test.log')))
    # print config
    logger.info('called with args\n{}'.format(pprint.pformat(vars(args))))

    # setup context
    ctx = mx.gpu(args.gpu)

    # load testing data
    test_data = TestLoader(imdb.roidb,
                           batch_size=1,
                           short=args.img_short_side,
                           max_size=args.img_long_side,
                           mean=config.transform['img_pixel_means'],
                           std=config.transform['img_pixel_stds'])

    # load params
    arg_params, aux_params = load_param(args.params, ctx=ctx)

    # produce shape max possible
    data_names = ['data', 'im_info']
    label_names = None
    data_shapes = [('data', (1, 3, args.img_long_side, args.img_long_side)),
                   ('im_info', (1, 3))]
    label_shapes = None

    # check shapes
    check_shape(sym, data_shapes, arg_params, aux_params)

    # create and bind module
    mod = Module(sym, data_names, label_names, context=ctx)
    mod.bind(data_shapes, label_shapes, for_training=False)
    mod.init_params(arg_params=arg_params, aux_params=aux_params)

    # all detections are collected into:
    #    all_boxes[cls][image] = N x 5 array of detections in
    #    (x1, y1, x2, y2, score)
    all_boxes = [[[] for _ in range(imdb.num_images)]
                 for _ in range(imdb.num_classes)]

    # start detection
    with tqdm(total=imdb.num_images) as pbar:
        for i, data_batch in enumerate(test_data):
            # forward
            im_info = data_batch.data[1][0]
            mod.forward(data_batch)
            rois, scores, bbox_deltas = mod.get_outputs()
            rois = rois[:, 1:]
            scores = scores[0]
            bbox_deltas = bbox_deltas[0]

            det = im_detect(rois,
                            scores,
                            bbox_deltas,
                            im_info,
                            bbox_stds=args.rcnn_bbox_stds,
                            nms_thresh=args.rcnn_nms_thresh,
                            conf_thresh=args.rcnn_conf_thresh)
            for j in range(1, imdb.num_classes):
                indexes = np.where(det[:, 0] == j)[0]
                all_boxes[j][i] = np.concatenate((det[:, -4:], det[:, [1]]),
                                                 axis=-1)[indexes, :]
            pbar.update(data_batch.data[0].shape[0])

    # evaluate model
    imdb.evaluate_detections(all_boxes)
Exemplo n.º 25
0
def train_net(args,
              ctx,
              pretrained,
              epoch,
              prefix,
              begin_epoch,
              end_epoch,
              lr=0.001,
              lr_step='5'):
    # setup config
    #init_config()
    #print(config)
    # setup multi-gpu

    input_batch_size = config.TRAIN.BATCH_IMAGES * len(ctx)

    # print config
    logger.info(pprint.pformat(config))

    # load dataset and prepare imdb for training
    image_sets = [iset for iset in args.image_set.split('+')]
    roidbs = [
        load_gt_roidb(args.dataset,
                      image_set,
                      args.root_path,
                      args.dataset_path,
                      flip=not args.no_flip) for image_set in image_sets
    ]
    #roidb = merge_roidb(roidbs)
    #roidb = filter_roidb(roidb)
    roidb = roidbs[0]

    # load symbol
    #sym = eval('get_' + args.network + '_train')(num_classes=config.NUM_CLASSES, num_anchors=config.NUM_ANCHORS)
    #feat_sym = sym.get_internals()['rpn_cls_score_output']
    #train_data = AnchorLoader(feat_sym, roidb, batch_size=input_batch_size, shuffle=not args.no_shuffle,
    #                          ctx=ctx, work_load_list=args.work_load_list,
    #                          feat_stride=config.RPN_FEAT_STRIDE, anchor_scales=config.ANCHOR_SCALES,
    #                          anchor_ratios=config.ANCHOR_RATIOS, aspect_grouping=config.TRAIN.ASPECT_GROUPING)

    # load and initialize params
    sym = None
    if len(pretrained) == 0:
        arg_params = {}
        aux_params = {}
    else:
        logger.info('loading %s,%d' % (pretrained, epoch))
        sym, arg_params, aux_params = mx.model.load_checkpoint(
            pretrained, epoch)
        #arg_params, aux_params = load_param(pretrained, epoch, convert=True)
        #for k in ['rpn_conv_3x3', 'rpn_cls_score', 'rpn_bbox_pred', 'cls_score', 'bbox_pred']:
        #  _k = k+"_weight"
        #  if _k in arg_shape_dict:
        #    v = 0.001 if _k.startswith('bbox_') else 0.01
        #    arg_params[_k] = mx.random.normal(0, v, shape=arg_shape_dict[_k])
        #    print('init %s with normal %.5f'%(_k,v))
        #  _k = k+"_bias"
        #  if _k in arg_shape_dict:
        #    arg_params[_k] = mx.nd.zeros(shape=arg_shape_dict[_k])
        #    print('init %s with zero'%(_k))

    sym = eval('get_' + args.network + '_train')(sym)
    feat_sym = []
    for stride in config.RPN_FEAT_STRIDE:
        feat_sym.append(
            sym.get_internals()['face_rpn_cls_score_stride%s_output' % stride])

    train_data = CropLoader(feat_sym,
                            roidb,
                            batch_size=input_batch_size,
                            shuffle=not args.no_shuffle,
                            ctx=ctx,
                            work_load_list=args.work_load_list)

    # infer max shape
    max_data_shape = [('data', (1, 3, max([v[1] for v in config.SCALES]),
                                max([v[1] for v in config.SCALES])))]
    #max_data_shape = [('data', (1, 3, max([v[1] for v in config.SCALES]), max([v[1] for v in config.SCALES])))]
    max_data_shape, max_label_shape = train_data.infer_shape(max_data_shape)
    max_data_shape.append(('gt_boxes', (1, roidb[0]['max_num_boxes'], 5)))
    logger.info('providing maximum shape %s %s' %
                (max_data_shape, max_label_shape))

    # infer shape
    data_shape_dict = dict(train_data.provide_data + train_data.provide_label)
    arg_shape, out_shape, aux_shape = sym.infer_shape(**data_shape_dict)
    arg_shape_dict = dict(zip(sym.list_arguments(), arg_shape))
    out_shape_dict = dict(zip(sym.list_outputs(), out_shape))
    aux_shape_dict = dict(zip(sym.list_auxiliary_states(), aux_shape))
    logger.info('output shape %s' % pprint.pformat(out_shape_dict))

    for k, v in arg_shape_dict.items():
        if k.find('upsampling') >= 0:
            print('initializing upsampling_weight', k)
            arg_params[k] = mx.nd.zeros(shape=v)
            init = mx.init.Initializer()
            init._init_bilinear(k, arg_params[k])
            #print(args[k])

    # check parameter shapes
    #for k in sym.list_arguments():
    #    if k in data_shape_dict:
    #        continue
    #    assert k in arg_params, k + ' not initialized'
    #    assert arg_params[k].shape == arg_shape_dict[k], \
    #        'shape inconsistent for ' + k + ' inferred ' + str(arg_shape_dict[k]) + ' provided ' + str(arg_params[k].shape)
    #for k in sym.list_auxiliary_states():
    #    assert k in aux_params, k + ' not initialized'
    #    assert aux_params[k].shape == aux_shape_dict[k], \
    #        'shape inconsistent for ' + k + ' inferred ' + str(aux_shape_dict[k]) + ' provided ' + str(aux_params[k].shape)

    fixed_param_prefix = config.FIXED_PARAMS
    # create solver
    data_names = [k[0] for k in train_data.provide_data]
    label_names = [k[0] for k in train_data.provide_label]
    fixed_param_names = get_fixed_params(sym, fixed_param_prefix)
    print('fixed', fixed_param_names, file=sys.stderr)
    mod = Module(sym,
                 data_names=data_names,
                 label_names=label_names,
                 logger=logger,
                 context=ctx,
                 work_load_list=args.work_load_list,
                 fixed_param_names=fixed_param_names)

    # metric
    eval_metrics = mx.metric.CompositeEvalMetric()
    mid = 0
    for m in range(len(config.RPN_FEAT_STRIDE)):
        stride = config.RPN_FEAT_STRIDE[m]
        #mid = m*MSTEP
        _metric = metric.RPNAccMetric(pred_idx=mid,
                                      label_idx=mid + 1,
                                      name='RPNAcc_s%s' % stride)
        eval_metrics.add(_metric)
        mid += 2
        #_metric = metric.RPNLogLossMetric(pred_idx=mid, label_idx=mid+1)
        #eval_metrics.add(_metric)

        _metric = metric.RPNL1LossMetric(loss_idx=mid,
                                         weight_idx=mid + 1,
                                         name='RPNL1Loss_s%s' % stride)
        eval_metrics.add(_metric)
        mid += 2
        if config.FACE_LANDMARK:
            _metric = metric.RPNL1LossMetric(loss_idx=mid,
                                             weight_idx=mid + 1,
                                             name='RPNLandMarkL1Loss_s%s' %
                                             stride)
            eval_metrics.add(_metric)
            mid += 2
        if config.HEAD_BOX:
            _metric = metric.RPNAccMetric(pred_idx=mid,
                                          label_idx=mid + 1,
                                          name='RPNAcc_head_s%s' % stride)
            eval_metrics.add(_metric)
            mid += 2
            #_metric = metric.RPNLogLossMetric(pred_idx=mid, label_idx=mid+1)
            #eval_metrics.add(_metric)

            _metric = metric.RPNL1LossMetric(loss_idx=mid,
                                             weight_idx=mid + 1,
                                             name='RPNL1Loss_head_s%s' %
                                             stride)
            eval_metrics.add(_metric)
            mid += 2

    # callback
    #means = np.tile(np.array(config.TRAIN.BBOX_MEANS), config.NUM_CLASSES)
    #stds = np.tile(np.array(config.TRAIN.BBOX_STDS), config.NUM_CLASSES)
    #epoch_end_callback = callback.do_checkpoint(prefix)
    epoch_end_callback = None
    # decide learning rate
    #base_lr = lr
    #lr_factor = 0.1
    #lr = base_lr * (lr_factor ** (len(lr_epoch) - len(lr_epoch_diff)))

    lr_epoch = [int(epoch) for epoch in lr_step.split(',')]
    lr_epoch_diff = [
        epoch - begin_epoch for epoch in lr_epoch if epoch > begin_epoch
    ]
    lr_iters = [
        int(epoch * len(roidb) / input_batch_size) for epoch in lr_epoch_diff
    ]

    lr_steps = []
    if len(lr_iters) == 5:
        factors = [0.5, 0.5, 0.4, 0.1, 0.1]
        for i in range(5):
            lr_steps.append((lr_iters[i], factors[i]))
    elif len(lr_iters) == 8:  #warmup
        for li in lr_iters[0:5]:
            lr_steps.append((li, 1.5849))
        for li in lr_iters[5:]:
            lr_steps.append((li, 0.1))
    else:
        for li in lr_iters:
            lr_steps.append((li, 0.1))
    #lr_steps = [ (20,0.1), (40, 0.1) ] #XXX

    end_epoch = 10000
    logger.info('lr %f lr_epoch_diff %s lr_steps %s' %
                (lr, lr_epoch_diff, lr_steps))
    # optimizer
    opt = optimizer.SGD(learning_rate=lr,
                        momentum=0.9,
                        wd=0.0005,
                        rescale_grad=1.0 / len(ctx),
                        clip_gradient=None)
    initializer = mx.init.Xavier()
    #initializer = mx.init.Xavier(rnd_type='gaussian', factor_type="out", magnitude=2) #resnet style

    train_data = mx.io.PrefetchingIter(train_data)

    _cb = mx.callback.Speedometer(train_data.batch_size,
                                  frequent=args.frequent,
                                  auto_reset=False)
    global_step = [0]

    def save_model(epoch):
        arg, aux = mod.get_params()
        all_layers = mod.symbol.get_internals()
        outs = []
        for stride in config.RPN_FEAT_STRIDE:
            num_anchors = config.RPN_ANCHOR_CFG[str(stride)]['NUM_ANCHORS']
            _name = 'face_rpn_cls_score_stride%d_output' % stride
            rpn_cls_score = all_layers[_name]

            # prepare rpn data
            rpn_cls_score_reshape = mx.symbol.Reshape(
                data=rpn_cls_score,
                shape=(0, 2, -1, 0),
                name="face_rpn_cls_score_reshape_stride%d" % stride)

            rpn_cls_prob = mx.symbol.SoftmaxActivation(
                data=rpn_cls_score_reshape,
                mode="channel",
                name="face_rpn_cls_prob_stride%d" % stride)
            rpn_cls_prob_reshape = mx.symbol.Reshape(
                data=rpn_cls_prob,
                shape=(0, 2 * num_anchors, -1, 0),
                name='face_rpn_cls_prob_reshape_stride%d' % stride)
            _name = 'face_rpn_bbox_pred_stride%d_output' % stride
            rpn_bbox_pred = all_layers[_name]
            outs.append(rpn_cls_prob_reshape)
            outs.append(rpn_bbox_pred)
            if config.FACE_LANDMARK:
                _name = 'face_rpn_landmark_pred_stride%d_output' % stride
                rpn_landmark_pred = all_layers[_name]
                outs.append(rpn_landmark_pred)
        _sym = mx.sym.Group(outs)
        mx.model.save_checkpoint(prefix, epoch, _sym, arg, aux)

    def _batch_callback(param):
        #global global_step
        _cb(param)
        global_step[0] += 1
        mbatch = global_step[0]
        for step in lr_steps:
            if mbatch == step[0]:
                opt.lr *= step[1]
                print('lr change to',
                      opt.lr,
                      ' in batch',
                      mbatch,
                      file=sys.stderr)
                break

        if mbatch == lr_steps[-1][0]:
            print('saving final checkpoint', mbatch, file=sys.stderr)
            save_model(0)
            #arg, aux = mod.get_params()
            #mx.model.save_checkpoint(prefix, 99, mod.symbol, arg, aux)
            sys.exit(0)

    if args.checkpoint is not None:
        _, arg_params, aux_params = mx.model.load_checkpoint(
            args.checkpoint, 0)

    # train
    mod.fit(train_data,
            eval_metric=eval_metrics,
            epoch_end_callback=checkpoint_callback('model/testR50'),
            batch_end_callback=_batch_callback,
            kvstore=args.kvstore,
            optimizer=opt,
            initializer=initializer,
            arg_params=arg_params,
            aux_params=aux_params,
            begin_epoch=begin_epoch,
            num_epoch=end_epoch)
Exemplo n.º 26
0
    def check_quantize_model(qdtype):
        if is_test_for_native_cpu():
            print(
                'skipped testing quantize_model for native cpu since it is not supported yet'
            )
            return
        elif qdtype == 'int8' and is_test_for_mkldnn():
            print(
                'skipped testing quantize_model for mkldnn cpu int8 since it is not supported yet'
            )
            return
        elif qdtype == 'uint8' and is_test_for_gpu():
            print(
                'skipped testing quantize_model for gpu uint8 since it is not supported yet'
            )
            return

        def check_params(params, qparams, qsym=None):
            if qsym is None:
                assert len(params) == len(qparams)
                for k, v in params.items():
                    assert k in qparams
                    assert same(v.asnumpy(), qparams[k].asnumpy())
            else:
                qparams_ground_truth = mx.contrib.quant._quantize_params(
                    qsym, params, th_dict={})
                assert len(qparams) == len(qparams_ground_truth)
                for k, v in qparams_ground_truth.items():
                    assert k in qparams
                    assert same(v.asnumpy(), qparams[k].asnumpy())

        def check_qsym_calibrated(qsym):
            attrs = qsym.attr_dict()
            for k, v in attrs.items():
                if k.find('requantize_') != -1:
                    assert 'min_calib_range' in v
                    assert 'max_calib_range' in v

        def check_qsym_qdtype(qsym, qdtype):
            attrs = qsym.attr_dict()
            for k, v in attrs.items():
                if k.find('_quantize') != -1:
                    assert 'out_type' in v
                    assert v['out_type'] == qdtype

        sym = get_fp32_sym()
        batch_size = 4
        label_shape = (batch_size, 10)
        data_shape = (batch_size, 4, 10, 10)

        length = batch_size  # specify num of outputs from split op
        msym = get_fp32_sym_with_multiple_outputs(length)
        msym_label_shape = (length, 10)
        msym_data_shape = (length, 4, 4, 10, 10)

        for s, dshape, lshape in zip((sym, msym),
                                     (data_shape, msym_data_shape),
                                     (label_shape, msym_label_shape)):
            mod = Module(symbol=s)
            mod.bind(data_shapes=[('data', dshape)],
                     label_shapes=[('softmax_label', lshape)])
            mod.init_params()
            arg_params, aux_params = mod.get_params()
            qsym, qarg_params, qaux_params = mx.contrib.quant.quantize_model(
                sym=s,
                arg_params=arg_params,
                aux_params=aux_params,
                ctx=mx.current_context(),
                quantized_dtype=qdtype,
                calib_mode='none')
            check_params(arg_params, qarg_params, qsym)
            check_params(aux_params, qaux_params)

            calib_data = mx.nd.random.uniform(shape=dshape)
            calib_data = NDArrayIter(data=calib_data, batch_size=batch_size)
            calib_data = DummyIter(calib_data)
            qsym, qarg_params, qaux_params = mx.contrib.quant.quantize_model(
                sym=s,
                arg_params=arg_params,
                aux_params=aux_params,
                ctx=mx.current_context(),
                quantized_dtype=qdtype,
                calib_mode='naive',
                calib_data=calib_data,
                num_calib_examples=20)
            check_params(arg_params, qarg_params, qsym)
            check_params(aux_params, qaux_params)
            check_qsym_calibrated(qsym)
            check_qsym_qdtype(qsym, qdtype)
Exemplo n.º 27
0
def train_net(args,
              ctx,
              pretrained,
              epoch,
              prefix,
              begin_epoch,
              end_epoch,
              lr=0.001,
              lr_step='5'):
    # setup config
    #init_config()
    #print(config)
    # setup multi-gpu

    input_batch_size = config.TRAIN.BATCH_IMAGES * len(ctx)

    # print config
    logger.info(pprint.pformat(config))

    # load dataset and prepare imdb for training
    image_sets = [iset for iset in args.image_set.split('+')]
    roidbs = [
        load_gt_roidb(args.dataset,
                      image_set,
                      args.root_path,
                      args.dataset_path,
                      flip=not args.no_flip) for image_set in image_sets
    ]
    roidb = merge_roidb(roidbs)
    roidb = filter_roidb(roidb)

    # load symbol
    #sym = eval('get_' + args.network + '_train')(num_classes=config.NUM_CLASSES, num_anchors=config.NUM_ANCHORS)
    #feat_sym = sym.get_internals()['rpn_cls_score_output']
    #train_data = AnchorLoader(feat_sym, roidb, batch_size=input_batch_size, shuffle=not args.no_shuffle,
    #                          ctx=ctx, work_load_list=args.work_load_list,
    #                          feat_stride=config.RPN_FEAT_STRIDE, anchor_scales=config.ANCHOR_SCALES,
    #                          anchor_ratios=config.ANCHOR_RATIOS, aspect_grouping=config.TRAIN.ASPECT_GROUPING)

    sym = eval('get_' + args.network + '_train')()
    #print(sym.get_internals())
    feat_sym = []
    for stride in config.RPN_FEAT_STRIDE:
        feat_sym.append(sym.get_internals()['rpn_cls_score_stride%s_output' %
                                            stride])

    #train_data = AnchorLoaderFPN(feat_sym, roidb, batch_size=input_batch_size, shuffle=not args.no_shuffle,
    #                              ctx=ctx, work_load_list=args.work_load_list)
    train_data = CropLoader(feat_sym,
                            roidb,
                            batch_size=input_batch_size,
                            shuffle=not args.no_shuffle,
                            ctx=ctx,
                            work_load_list=args.work_load_list)

    # infer max shape
    max_data_shape = [('data', (1, 3, max([v[1] for v in config.SCALES]),
                                max([v[1] for v in config.SCALES])))]
    #max_data_shape = [('data', (1, 3, max([v[1] for v in config.SCALES]), max([v[1] for v in config.SCALES])))]
    max_data_shape, max_label_shape = train_data.infer_shape(max_data_shape)
    max_data_shape.append(('gt_boxes', (1, roidb[0]['max_num_boxes'], 5)))
    logger.info('providing maximum shape %s %s' %
                (max_data_shape, max_label_shape))

    # infer shape
    data_shape_dict = dict(train_data.provide_data + train_data.provide_label)
    arg_shape, out_shape, aux_shape = sym.infer_shape(**data_shape_dict)
    arg_shape_dict = dict(zip(sym.list_arguments(), arg_shape))
    out_shape_dict = dict(zip(sym.list_outputs(), out_shape))
    aux_shape_dict = dict(zip(sym.list_auxiliary_states(), aux_shape))
    logger.info('output shape %s' % pprint.pformat(out_shape_dict))

    # load and initialize params
    if args.resume:
        arg_params, aux_params = load_param(prefix, begin_epoch, convert=True)
    else:
        arg_params, aux_params = load_param(pretrained, epoch, convert=True)
        #for k in ['rpn_conv_3x3', 'rpn_cls_score', 'rpn_bbox_pred', 'cls_score', 'bbox_pred']:
        #  _k = k+"_weight"
        #  if _k in arg_shape_dict:
        #    v = 0.001 if _k.startswith('bbox_') else 0.01
        #    arg_params[_k] = mx.random.normal(0, v, shape=arg_shape_dict[_k])
        #    print('init %s with normal %.5f'%(_k,v))
        #  _k = k+"_bias"
        #  if _k in arg_shape_dict:
        #    arg_params[_k] = mx.nd.zeros(shape=arg_shape_dict[_k])
        #    print('init %s with zero'%(_k))

        for k, v in arg_shape_dict.iteritems():
            if k.find('upsampling') >= 0:
                print('initializing upsampling_weight', k)
                arg_params[k] = mx.nd.zeros(shape=v)
                init = mx.init.Initializer()
                init._init_bilinear(k, arg_params[k])
                #print(args[k])

    # check parameter shapes
    #for k in sym.list_arguments():
    #    if k in data_shape_dict:
    #        continue
    #    assert k in arg_params, k + ' not initialized'
    #    assert arg_params[k].shape == arg_shape_dict[k], \
    #        'shape inconsistent for ' + k + ' inferred ' + str(arg_shape_dict[k]) + ' provided ' + str(arg_params[k].shape)
    #for k in sym.list_auxiliary_states():
    #    assert k in aux_params, k + ' not initialized'
    #    assert aux_params[k].shape == aux_shape_dict[k], \
    #        'shape inconsistent for ' + k + ' inferred ' + str(aux_shape_dict[k]) + ' provided ' + str(aux_params[k].shape)

    # create solver
    fixed_param_prefix = config.FIXED_PARAMS
    data_names = [k[0] for k in train_data.provide_data]
    label_names = [k[0] for k in train_data.provide_label]
    #mod = MutableModule(sym, data_names=data_names, label_names=label_names,
    #                    logger=logger, context=ctx, work_load_list=args.work_load_list,
    #                    max_data_shapes=max_data_shape, max_label_shapes=max_label_shape,
    #                    fixed_param_prefix=fixed_param_prefix)
    fixed_param_names = get_fixed_params(sym, fixed_param_prefix)
    print('fixed', fixed_param_names, file=sys.stderr)
    mod = Module(sym,
                 data_names=data_names,
                 label_names=label_names,
                 logger=logger,
                 context=ctx,
                 work_load_list=args.work_load_list,
                 fixed_param_names=fixed_param_names)

    # decide training params
    # metric
    eval_metrics = mx.metric.CompositeEvalMetric()
    #if len(sym.list_outputs())>4:
    #  metric_names = ['RPNAccMetric', 'RPNLogLossMetric', 'RPNL1LossMetric', 'RCNNAccMetric', 'RCNNLogLossMetric', 'RCNNL1LossMetric']
    #else:#train rpn only
    #print('sym', sym.list_outputs())
    #metric_names = ['RPNAccMetric', 'RPNLogLossMetric', 'RPNL1LossMetric']
    mids = [0, 4, 8]
    for mid in mids:
        _metric = metric.RPNAccMetric(pred_idx=mid, label_idx=mid + 1)
        eval_metrics.add(_metric)
        #_metric = metric.RPNLogLossMetric(pred_idx=mid, label_idx=mid+1)
        #eval_metrics.add(_metric)
        _metric = metric.RPNL1LossMetric(loss_idx=mid + 2, weight_idx=mid + 3)
        eval_metrics.add(_metric)

    #rpn_eval_metric = metric.RPNAccMetric()
    #rpn_cls_metric = metric.RPNLogLossMetric()
    #rpn_bbox_metric = metric.RPNL1LossMetric()
    #eval_metric = metric.RCNNAccMetric()
    #cls_metric = metric.RCNNLogLossMetric()
    #bbox_metric = metric.RCNNL1LossMetric()
    #for child_metric in [rpn_eval_metric, rpn_cls_metric, rpn_bbox_metric, eval_metric, cls_metric, bbox_metric]:
    #    eval_metrics.add(child_metric)
    # callback
    means = np.tile(np.array(config.TRAIN.BBOX_MEANS), config.NUM_CLASSES)
    stds = np.tile(np.array(config.TRAIN.BBOX_STDS), config.NUM_CLASSES)
    #epoch_end_callback = callback.do_checkpoint(prefix, means, stds)
    epoch_end_callback = None
    # decide learning rate
    base_lr = lr
    lr_factor = 0.1
    lr_epoch = [int(epoch) for epoch in lr_step.split(',')]
    lr_epoch_diff = [
        epoch - begin_epoch for epoch in lr_epoch if epoch > begin_epoch
    ]
    lr = base_lr * (lr_factor**(len(lr_epoch) - len(lr_epoch_diff)))
    lr_iters = [
        int(epoch * len(roidb) / input_batch_size) for epoch in lr_epoch_diff
    ]

    #lr_iters = [36000,42000] #TODO
    #lr_iters = [40000,50000,60000] #TODO
    #lr_iters = [40,50,60] #TODO
    end_epoch = 10000
    #lr_iters = [4,8] #TODO
    logger.info('lr %f lr_epoch_diff %s lr_iters %s' %
                (lr, lr_epoch_diff, lr_iters))
    #lr_scheduler = mx.lr_scheduler.MultiFactorScheduler(lr_iters, lr_factor)
    # optimizer
    opt = optimizer.SGD(learning_rate=lr,
                        momentum=0.9,
                        wd=0.0005,
                        rescale_grad=1.0 / len(ctx),
                        clip_gradient=None)
    initializer = mx.init.Xavier()
    #initializer = mx.init.Xavier(rnd_type='gaussian', factor_type="out", magnitude=2) #resnet style

    if len(ctx) > 1:
        train_data = mx.io.PrefetchingIter(train_data)

    _cb = mx.callback.Speedometer(train_data.batch_size,
                                  frequent=args.frequent,
                                  auto_reset=False)
    global_step = [0]

    def save_model(epoch):
        arg, aux = mod.get_params()
        all_layers = mod.symbol.get_internals()
        outs = []
        for stride in config.RPN_FEAT_STRIDE:
            num_anchors = config.RPN_ANCHOR_CFG[str(stride)]['NUM_ANCHORS']
            _name = 'rpn_cls_score_stride%d_output' % stride
            rpn_cls_score = all_layers[_name]

            # prepare rpn data
            rpn_cls_score_reshape = mx.symbol.Reshape(
                data=rpn_cls_score,
                shape=(0, 2, -1, 0),
                name="rpn_cls_score_reshape_stride%d" % stride)

            rpn_cls_prob = mx.symbol.SoftmaxActivation(
                data=rpn_cls_score_reshape,
                mode="channel",
                name="rpn_cls_prob_stride%d" % stride)
            rpn_cls_prob_reshape = mx.symbol.Reshape(
                data=rpn_cls_prob,
                shape=(0, 2 * num_anchors, -1, 0),
                name='rpn_cls_prob_reshape_stride%d' % stride)
            _name = 'rpn_bbox_pred_stride%d_output' % stride
            rpn_bbox_pred = all_layers[_name]
            outs.append(rpn_cls_prob_reshape)
            outs.append(rpn_bbox_pred)
        _sym = mx.sym.Group(outs)
        mx.model.save_checkpoint(prefix, epoch, _sym, arg, aux)

    def _batch_callback(param):
        #global global_step
        _cb(param)
        global_step[0] += 1
        mbatch = global_step[0]
        for _iter in lr_iters:
            if mbatch == _iter:
                opt.lr *= 0.1
                print('lr change to',
                      opt.lr,
                      ' in batch',
                      mbatch,
                      file=sys.stderr)
                break

        if mbatch % 1000 == 0:
            print('saving final checkpoint', mbatch, file=sys.stderr)
            save_model(mbatch)

        if mbatch == lr_iters[-1]:
            print('saving final checkpoint', mbatch, file=sys.stderr)
            save_model(0)
            #arg, aux = mod.get_params()
            #mx.model.save_checkpoint(prefix, 99, mod.symbol, arg, aux)
            sys.exit(0)

    # train
    mod.fit(train_data,
            eval_metric=eval_metrics,
            epoch_end_callback=epoch_end_callback,
            batch_end_callback=_batch_callback,
            kvstore=args.kvstore,
            optimizer=opt,
            initializer=initializer,
            allow_missing=True,
            arg_params=arg_params,
            aux_params=aux_params,
            begin_epoch=begin_epoch,
            num_epoch=end_epoch)
Exemplo n.º 28
0
def train(sym, roidb):
    '''
    User function: Start training

    Args:
        sym (mxnet model): Mxnet model returned from set_network() function
        roidb (dataloader): Dataloader returned from set_model() function

    Returns:
        None
    '''



    # print config
    #logger.info('called with system_dict\n{}'.format(pprint.pformat(vars(system_dict))))
    #print(system_dict)

    # setup multi-gpu
    if(len(system_dict["gpus"]) == 0):
        ctx = [mx.cpu(0)];
    else:
        ctx = [mx.gpu(int(i)) for i in system_dict["gpus"]]
    batch_size = system_dict["rcnn_batch_size"] * len(ctx)

    # load training data
    feat_sym = sym.get_internals()['rpn_cls_score_output']
    ag = AnchorGenerator(feat_stride=system_dict["rpn_feat_stride"],
                         anchor_scales=system_dict["rpn_anchor_scales"], anchor_ratios=system_dict["rpn_anchor_ratios"])
    asp = AnchorSampler(allowed_border=system_dict["rpn_allowed_border"], batch_rois=system_dict["rpn_batch_rois"],
                        fg_fraction=system_dict["rpn_fg_fraction"], fg_overlap=system_dict["rpn_fg_overlap"],
                        bg_overlap=system_dict["rpn_bg_overlap"])
    train_data = AnchorLoader(roidb, batch_size, system_dict["img_short_side"], system_dict["img_long_side"],
                              system_dict["img_pixel_means"], system_dict["img_pixel_stds"], feat_sym, ag, asp, shuffle=True)

    # produce shape max possible
    _, out_shape, _ = feat_sym.infer_shape(data=(1, 3, system_dict["img_long_side"], system_dict["img_long_side"]))
    feat_height, feat_width = out_shape[0][-2:]
    rpn_num_anchors = len(system_dict["rpn_anchor_scales"]) * len(system_dict["rpn_anchor_ratios"])
    data_names = ['data', 'im_info', 'gt_boxes']
    label_names = ['label', 'bbox_target', 'bbox_weight']
    data_shapes = [('data', (batch_size, 3, system_dict["img_long_side"], system_dict["img_long_side"])),
                   ('im_info', (batch_size, 3)),
                   ('gt_boxes', (batch_size, 100, 5))]
    label_shapes = [('label', (batch_size, 1, rpn_num_anchors * feat_height, feat_width)),
                    ('bbox_target', (batch_size, 4 * rpn_num_anchors, feat_height, feat_width)),
                    ('bbox_weight', (batch_size, 4 * rpn_num_anchors, feat_height, feat_width))]

    # print shapes
    data_shape_dict, out_shape_dict = infer_data_shape(sym, data_shapes + label_shapes)
    logger.info('max input shape\n%s' % pprint.pformat(data_shape_dict))
    logger.info('max output shape\n%s' % pprint.pformat(out_shape_dict))

    # load and initialize params
    if system_dict["resume"]:
        arg_params, aux_params = load_param(system_dict["resume"])
    else:
        arg_params, aux_params = load_param(system_dict["pretrained"])
        arg_params, aux_params = initialize_frcnn(sym, data_shapes, arg_params, aux_params)

    # check parameter shapes
    check_shape(sym, data_shapes + label_shapes, arg_params, aux_params)

    # check fixed params
    fixed_param_names = get_fixed_params(sym, system_dict["net_fixed_params"])
    logger.info('locking params\n%s' % pprint.pformat(fixed_param_names))

    # metric
    rpn_eval_metric = RPNAccMetric()
    rpn_cls_metric = RPNLogLossMetric()
    rpn_bbox_metric = RPNL1LossMetric()
    eval_metric = RCNNAccMetric()
    cls_metric = RCNNLogLossMetric()
    bbox_metric = RCNNL1LossMetric()
    eval_metrics = mx.metric.CompositeEvalMetric()
    for child_metric in [rpn_eval_metric, rpn_cls_metric, rpn_bbox_metric, eval_metric, cls_metric, bbox_metric]:
        eval_metrics.add(child_metric)

    # callback
    batch_end_callback = mx.callback.Speedometer(batch_size, frequent=system_dict["log_interval"], auto_reset=False)
    epoch_end_callback = mx.callback.do_checkpoint(system_dict["save_prefix"])

    # learning schedule
    base_lr = system_dict["lr"]
    lr_factor = 0.1
    lr_epoch = [int(epoch) for epoch in system_dict["lr_decay_epoch"].split(',')]
    lr_epoch_diff = [epoch - system_dict["start_epoch"] for epoch in lr_epoch if epoch > system_dict["start_epoch"]]
    lr = base_lr * (lr_factor ** (len(lr_epoch) - len(lr_epoch_diff)))
    lr_iters = [int(epoch * len(roidb) / batch_size) for epoch in lr_epoch_diff]
    logger.info('lr %f lr_epoch_diff %s lr_iters %s' % (lr, lr_epoch_diff, lr_iters))
    lr_scheduler = mx.lr_scheduler.MultiFactorScheduler(lr_iters, lr_factor)
    # optimizer
    optimizer_params = {'momentum': 0.9,
                        'wd': 0.0005,
                        'learning_rate': lr,
                        'lr_scheduler': lr_scheduler,
                        'rescale_grad': (1.0 / batch_size),
                        'clip_gradient': 5}

    # train
    mod = Module(sym, data_names=data_names, label_names=label_names,
                 logger=logger, context=ctx, work_load_list=None,
                 fixed_param_names=fixed_param_names)
    mod.fit(train_data, eval_metric=eval_metrics, epoch_end_callback=epoch_end_callback,
            batch_end_callback=batch_end_callback, kvstore='device',
            optimizer='sgd', optimizer_params=optimizer_params,
            arg_params=arg_params, aux_params=aux_params, begin_epoch=system_dict["start_epoch"], num_epoch=system_dict["epochs"])
Exemplo n.º 29
0
def train_net(sym, roidb, args):
    # print config
    logger.info('called with args\n{}'.format(pprint.pformat(vars(args))))

    # setup multi-gpu
    ctx = [mx.cpu()] if not args.gpus else [
        mx.gpu(int(i)) for i in args.gpus.split(',')
    ]
    batch_size = args.rcnn_batch_size * len(ctx)

    # load training data
    feat_sym = sym.get_internals()['rpn_cls_score_output']
    ag = AnchorGenerator(feat_stride=args.rpn_feat_stride,
                         anchor_scales=args.rpn_anchor_scales,
                         anchor_ratios=args.rpn_anchor_ratios)
    asp = AnchorSampler(allowed_border=args.rpn_allowed_border,
                        batch_rois=args.rpn_batch_rois,
                        fg_fraction=args.rpn_fg_fraction,
                        fg_overlap=args.rpn_fg_overlap,
                        bg_overlap=args.rpn_bg_overlap)
    train_data = AnchorLoader(roidb,
                              batch_size,
                              args.img_short_side,
                              args.img_long_side,
                              args.img_pixel_means,
                              args.img_pixel_stds,
                              feat_sym,
                              ag,
                              asp,
                              shuffle=True)

    # produce shape max possible
    _, out_shape, _ = feat_sym.infer_shape(data=(1, 3, args.img_long_side,
                                                 args.img_long_side))
    feat_height, feat_width = out_shape[0][-2:]
    rpn_num_anchors = len(args.rpn_anchor_scales) * len(args.rpn_anchor_ratios)
    data_names = ['data', 'im_info', 'gt_boxes']
    label_names = ['label', 'bbox_target', 'bbox_weight']
    data_shapes = [('data', (batch_size, 3,
                             args.img_long_side, args.img_long_side)),
                   ('im_info', (batch_size, 3)),
                   ('gt_boxes', (batch_size, 100, 5))]
    label_shapes = [('label', (batch_size, 1, rpn_num_anchors * feat_height,
                               feat_width)),
                    ('bbox_target', (batch_size, 4 * rpn_num_anchors,
                                     feat_height, feat_width)),
                    ('bbox_weight', (batch_size, 4 * rpn_num_anchors,
                                     feat_height, feat_width))]

    # print shapes
    data_shape_dict, out_shape_dict = infer_data_shape(
        sym, data_shapes + label_shapes)
    logger.info('max input shape\n%s' % pprint.pformat(data_shape_dict))
    logger.info('max output shape\n%s' % pprint.pformat(out_shape_dict))

    # load and initialize params
    if args.resume:
        arg_params, aux_params = load_param(args.resume)
    else:
        arg_params, aux_params = load_param(args.pretrained)
        arg_params, aux_params = initialize_frcnn(sym, data_shapes, arg_params,
                                                  aux_params)

    # check parameter shapes
    check_shape(sym, data_shapes + label_shapes, arg_params, aux_params)

    # check fixed params
    fixed_param_names = get_fixed_params(sym, args.net_fixed_params)
    logger.info('locking params\n%s' % pprint.pformat(fixed_param_names))

    # metric
    rpn_eval_metric = RPNAccMetric()
    rpn_cls_metric = RPNLogLossMetric()
    rpn_bbox_metric = RPNL1LossMetric()
    eval_metric = RCNNAccMetric()
    cls_metric = RCNNLogLossMetric()
    bbox_metric = RCNNL1LossMetric()
    eval_metrics = mx.gluon.metric.CompositeEvalMetric()
    for child_metric in [
            rpn_eval_metric, rpn_cls_metric, rpn_bbox_metric, eval_metric,
            cls_metric, bbox_metric
    ]:
        eval_metrics.add(child_metric)

    # callback
    batch_end_callback = mx.callback.Speedometer(batch_size,
                                                 frequent=args.log_interval,
                                                 auto_reset=False)
    epoch_end_callback = mx.callback.do_checkpoint(args.save_prefix)

    # learning schedule
    base_lr = args.lr
    lr_factor = 0.1
    lr_epoch = [int(epoch) for epoch in args.lr_decay_epoch.split(',')]
    lr_epoch_diff = [
        epoch - args.start_epoch for epoch in lr_epoch
        if epoch > args.start_epoch
    ]
    lr = base_lr * (lr_factor**(len(lr_epoch) - len(lr_epoch_diff)))
    lr_iters = [
        int(epoch * len(roidb) / batch_size) for epoch in lr_epoch_diff
    ]
    logger.info('lr %f lr_epoch_diff %s lr_iters %s' %
                (lr, lr_epoch_diff, lr_iters))
    lr_scheduler = mx.lr_scheduler.MultiFactorScheduler(lr_iters, lr_factor)
    # optimizer
    optimizer_params = {
        'momentum': 0.9,
        'wd': 0.0005,
        'learning_rate': lr,
        'lr_scheduler': lr_scheduler,
        'rescale_grad': (1.0 / batch_size),
        'clip_gradient': 5
    }

    # train
    mod = Module(sym,
                 data_names=data_names,
                 label_names=label_names,
                 logger=logger,
                 context=ctx,
                 work_load_list=None,
                 fixed_param_names=fixed_param_names)
    mod.fit(train_data,
            eval_metric=eval_metrics,
            epoch_end_callback=epoch_end_callback,
            batch_end_callback=batch_end_callback,
            kvstore='device',
            optimizer='sgd',
            optimizer_params=optimizer_params,
            arg_params=arg_params,
            aux_params=aux_params,
            begin_epoch=args.start_epoch,
            num_epoch=args.epochs)
Exemplo n.º 30
0
    def check_quantize_model(qdtype):
        if is_test_for_native_cpu():
            print(
                'skipped testing test_quantize_model_with_forward for native cpu since it is not supported yet'
            )
            return
        elif qdtype == 'int8' and is_test_for_mkldnn():
            print(
                'skipped testing test_quantize_model_with_forward for mkldnn cpu int8 since it is not supported yet'
            )
            return
        elif qdtype == 'uint8' and is_test_for_gpu():
            print(
                'skipped testing test_quantize_model_with_forward for gpu uint8 since it is not supported yet'
            )
            return

        def check_params(params, qparams, qsym=None):
            if qsym is None:
                assert len(params) == len(qparams)
                for k, v in params.items():
                    assert k in qparams
                    assert same(v.asnumpy(), qparams[k].asnumpy())
            else:
                qparams_ground_truth = mx.contrib.quant._quantize_params(
                    qsym, params, th_dict={})
                assert len(qparams) == len(qparams_ground_truth)
                for k, v in qparams_ground_truth.items():
                    assert k in qparams
                    assert same(v.asnumpy(), qparams[k].asnumpy())

        def check_qsym_calibrated(qsym):
            attrs = qsym.attr_dict()
            for k, v in attrs.items():
                if k.find('requantize_') != -1:
                    assert 'min_calib_range' in v
                    assert 'max_calib_range' in v

        def check_qsym_qdtype(qsym, qdtype):
            attrs = qsym.attr_dict()
            for k, v in attrs.items():
                if k.find('_quantize') != -1:
                    assert 'out_type' in v
                    assert v['out_type'] == qdtype

        def check_qsym_forward(qsym, qarg_params, qaux_params, data_shape,
                               label_shape):
            mod = mx.mod.Module(symbol=qsym, context=mx.current_context())
            mod.bind(for_training=False,
                     data_shapes=[('data', data_shape)],
                     label_shapes=[('softmax_label', label_shape)])
            mod.set_params(qarg_params, qaux_params)
            data = [
                mx.random.uniform(-1.0, 1.0, shape=shape)
                for _, shape in mod.data_shapes
            ]
            batch = mx.io.DataBatch(data, [])
            mod.forward(batch, is_train=False)
            for output in mod.get_outputs():
                output.wait_to_read()

        sym = get_fp32_residual()
        batch_size = 4
        data_shape = (batch_size, 4, 10, 10)
        label_shape = (batch_size, 10)

        length = batch_size  # specify num of outputs from split op
        msym = get_fp32_sym_with_multiple_outputs(length)
        msym_label_shape = (length, 10)
        msym_data_shape = (length, 4, 4, 10, 10)

        for s, dshape, lshape in zip((sym, msym),
                                     (data_shape, msym_data_shape),
                                     (label_shape, msym_label_shape)):
            mod = Module(symbol=s)
            mod.bind(data_shapes=[('data', dshape)],
                     label_shapes=[('softmax_label', lshape)])

            mod.init_params()
            arg_params, aux_params = mod.get_params()
            excluded_names = []
            if mx.current_context() == mx.cpu():
                excluded_names += ['fc']
            excluded_names += ['concat']

            optional_names = ['pool0']
            for skip_optional_names in [False, True]:
                exclude_sym_names = []
                if skip_optional_names:
                    excluded_sym_names = excluded_names
                else:
                    excluded_sym_names = excluded_names + optional_names

                qsym, qarg_params, qaux_params = mx.contrib.quant.quantize_model(
                    sym=s,
                    arg_params=arg_params,
                    aux_params=aux_params,
                    excluded_sym_names=excluded_sym_names,
                    ctx=mx.current_context(),
                    quantized_dtype=qdtype,
                    calib_mode='none')
                check_params(arg_params, qarg_params, qsym)
                check_params(aux_params, qaux_params)
                check_qsym_forward(qsym, qarg_params, qaux_params, dshape,
                                   lshape)

                calib_data = mx.nd.random.uniform(shape=dshape)
                calib_data = NDArrayIter(data=calib_data,
                                         batch_size=batch_size)
                calib_data = DummyIter(calib_data)
                qsym, qarg_params, qaux_params = mx.contrib.quant.quantize_model(
                    sym=s,
                    arg_params=arg_params,
                    aux_params=aux_params,
                    excluded_sym_names=excluded_sym_names,
                    ctx=mx.current_context(),
                    quantized_dtype=qdtype,
                    calib_mode='naive',
                    calib_data=calib_data,
                    num_calib_examples=20)
                check_params(arg_params, qarg_params, qsym)
                check_params(aux_params, qaux_params)
                check_qsym_calibrated(qsym)
                check_qsym_qdtype(qsym, qdtype)
                check_qsym_forward(qsym, qarg_params, qaux_params, dshape,
                                   lshape)