def calibration(net, val_data, opt, ctx, logger):
    if isinstance(ctx, list):
        ctx = ctx[0]
    ctx = mx.cpu()
    exclude_sym_layer = []
    exclude_match_layer = []
    if 'inceptionv3' not in opt.model:
        exclude_match_layer += ['concat']
    if opt.num_gpus > 0:
        raise ValueError('currently only supports CPU with MKL-DNN backend')
    net = quantize_net(net,
                       calib_data=val_data,
                       quantized_dtype=opt.quantized_dtype,
                       calib_mode=opt.calib_mode,
                       exclude_layers=exclude_sym_layer,
                       num_calib_examples=opt.batch_size *
                       opt.num_calib_batches,
                       exclude_layers_match=exclude_match_layer,
                       ctx=ctx,
                       logger=logger)
    # net = quantize_net(net, calib_data=val_data, quantized_dtype=opt.quantized_dtype, quantize_mode='full', calib_mode=opt.calib_mode,
    #                    exclude_layers=exclude_sym_layer, num_calib_examples=opt.batch_size * opt.num_calib_batches,
    #                    exclude_layers_match=exclude_match_layer, ctx=ctx, logger=logger)
    dir_path = os.path.dirname(os.path.realpath(__file__))
    dst_dir = os.path.join(dir_path, 'model')
    if not os.path.isdir(dst_dir):
        os.mkdir(dst_dir)
    prefix = os.path.join(dst_dir, opt.model + '-quantized-' + opt.calib_mode)
    logger.info('Saving quantized model at %s' % dst_dir)
    net.export(prefix, epoch=0)
Пример #2
0
def benchmark_int8(quantize_mode, quantize_granularity, elemwise_add):
    header = operator_string(elemwise_add) + ', mode = ' + quantize_mode + \
             ', granularity = ' + quantize_granularity
    print_header(header)
    for shape, nhid in sizes:
        net = FCWithSum(shape[1], nhid, elemwise_add)
        net.initialize()
        net.hybridize(static_alloc=True, static_shape=True)
        data0 = mx.np.random.uniform(size=shape, low=-1.0, high=1.0)
        data1 = mx.np.random.uniform(size=shape, low=-1.0, high=1.0)
        shape2 = (shape[0], nhid)
        data2 = mx.np.random.uniform(size=shape2, low=-1.0, high=1.0)
        data = mx.gluon.data.ArrayDataset(data0, data1, data2)
        calib_data = mx.gluon.data.DataLoader(data, batch_size=1)
        net = quantization.quantize_net(
            net,
            device=mx.cpu(),
            exclude_layers=None,
            exclude_operators=None,
            calib_mode='naive',
            calib_data=calib_data,
            num_calib_batches=1,
            quantize_mode=quantize_mode,
            quantize_granularity=quantize_granularity)
        net.hybridize(static_alloc=True, static_shape=True)
        measure(net, data0, data1, data2, shape, nhid)
    dump_graph_fn(net, operator_string(elemwise_add) + \
                    '_' + str(quantize_mode) + '_' + str(quantize_granularity))
def check_neg_fusion_quantized(net_original, attrs_name=None, excluded_attrs=None,
                     data_shapes=[(4,4,10,10)], name='conv'):
  op_name = config[name][OP_NAME]
  net_original.initialize(init=mx.init.Normal(0.5), force_reinit=True)
  one_shape = isinstance(data_shapes, tuple)
  if one_shape:
    # replace one shape with list of shapes with one element inside to follow later the same schema
    data_shapes=[data_shapes]
  data = []
  for shape in data_shapes:
    data.append(mx.np.random.uniform(size=shape, dtype='float32', device=mx.cpu()))

  dataArray= mx.gluon.data.ArrayDataset(*data)
  calib_data = mx.gluon.data.DataLoader(dataArray, batch_size=1)

  qnet = quantization.quantize_net(net_original,
                                    device=mx.cpu(),
                                    exclude_layers=None,
                                    exclude_operators=None,
                                    quantized_dtype='int8',
                                    calib_mode='naive',
                                    calib_data=calib_data,
                                    num_calib_batches=1,
                                    quantize_mode='full',
                                    quantize_granularity='tensor-wise')
  qsym, _ = qnet.export(None)
  attrs_dict = qsym.attr_dict()
  for k, v in attrs_dict.items():
    if k.find(op_name) != -1:
      for attr in attrs_name:
        assert v[attr] == 'true'
      for exc_attr in excluded_attrs:
        assert exc_attr not in v.keys(), exc_attr + " atribute shouldn't exist"
def test_quantized_fc_bias_overflow(data_min, data_max, weight_min,
                                    weight_max):
    data_shape = (1, 32)
    data_nd = mx.np.random.uniform(data_min,
                                   data_max,
                                   size=data_shape,
                                   device=mx.cpu())
    weight_nd = mx.np.random.uniform(weight_min,
                                     weight_max,
                                     size=[64, 32],
                                     device=mx.cpu())
    bias_nd = mx.np.random.uniform(-1, +1, size=[64], device=mx.cpu())

    class FCBiasOverflow(nn.HybridBlock):
        def __init__(self, dtype='float32', **kwargs):
            super(FCBiasOverflow, self).__init__(**kwargs)
            self.weight = mx.gluon.Parameter('weight',
                                             dtype=dtype,
                                             allow_deferred_init=True)
            self.bias = mx.gluon.Parameter('bias',
                                           dtype=dtype,
                                           allow_deferred_init=True)

        def forward(self, x):
            conv1 = mx.npx.fully_connected(x,
                                           num_hidden=64,
                                           weight=self.weight.data(x.device),
                                           no_bias=False,
                                           bias=self.bias.data(x.device))
            return conv1

        def infer_shape(self, x, *args):
            self.weight.shape = (64, x.shape[x.ndim - 1])
            self.bias.shape = (64, )

    net = FCBiasOverflow()
    net.initialize()
    net(data_nd)  # dummy run

    net.weight.data()[:] = weight_nd
    net.bias.data()[:] = bias_nd
    out = net(data_nd)

    calib_data = mx.gluon.data.DataLoader(data_nd, batch_size=1)
    qnet = quantization.quantize_net(net,
                                     device=mx.cpu(),
                                     exclude_layers=None,
                                     exclude_operators=None,
                                     quantized_dtype='int8',
                                     calib_mode='naive',
                                     calib_data=calib_data,
                                     num_calib_batches=1,
                                     quantize_mode='full')
    out_quantized = qnet(data_nd)
    assert_almost_equal_with_err(out.asnumpy(),
                                 out_quantized.asnumpy(),
                                 rtol=1e-2,
                                 atol=1e-2,
                                 etol=0.01)
def test_quantized_conv_bias_overflow(data_min, data_max, weight_min,
                                      weight_max):
    data_shape = (1, 32, 2, 2)
    data_nd = mx.random.uniform(data_min,
                                data_max,
                                shape=data_shape,
                                ctx=mx.cpu())
    weight_nd = mx.random.uniform(weight_min,
                                  weight_max,
                                  shape=[64, 32, 1, 1],
                                  ctx=mx.cpu())
    bias_nd = mx.random.uniform(-1, +1, shape=[64], ctx=mx.cpu())

    class ConvBiasOverflow(nn.HybridBlock):
        def __init__(self, dtype='float32', **kwargs):
            super(ConvBiasOverflow, self).__init__(**kwargs)
            self.weight = mx.gluon.Parameter('weight',
                                             dtype=dtype,
                                             allow_deferred_init=True)
            self.bias = mx.gluon.Parameter('bias',
                                           dtype=dtype,
                                           allow_deferred_init=True)

        def hybrid_forward(self, F, x, weight, bias):
            conv1 = F.Convolution(x,
                                  num_filter=64,
                                  kernel=(1, 1),
                                  weight=weight,
                                  no_bias=False,
                                  bias=bias)
            return conv1

    net = ConvBiasOverflow()
    net.initialize()
    net(data_nd)  # dummy run

    net.weight.data()[:] = weight_nd
    net.bias.data()[:] = bias_nd
    out = net(data_nd)

    calib_data = mx.gluon.data.DataLoader(data_nd, batch_size=data_shape[0])
    qnet = quantization.quantize_net(net,
                                     ctx=mx.cpu(),
                                     exclude_layers=None,
                                     exclude_operators=None,
                                     quantized_dtype='int8',
                                     calib_mode='naive',
                                     calib_data=calib_data,
                                     num_calib_batches=1,
                                     quantize_mode='full')

    out_quantized = qnet(data_nd)
    assert_almost_equal_with_err(out.asnumpy(),
                                 out_quantized.asnumpy(),
                                 rtol=1e-2,
                                 atol=1e-2,
                                 etol=0.01)
Пример #6
0
def check_quantize(net_original,
                   data_shape,
                   out_type,
                   name='conv',
                   check_calibration=True,
                   check_scale_align=False):
    quantize_granularity_list = ['tensor-wise']
    if name == 'fc':
        quantize_granularity_list += ['channel-wise']

    if name in config:
        name = config[name][OP_NAME]

    net_original.initialize(init=mx.init.Normal(0.5), force_reinit=True)
    min_value = -1 if out_type != 'uint8' else 0
    data = mx.np.random.uniform(min_value,
                                1.0,
                                size=data_shape,
                                dtype='float32',
                                ctx=mx.current_device())

    outputs = net_original(data)
    for output in outputs:
        output.wait_to_read()
    ref_out = outputs

    calib_data = mx.gluon.data.DataLoader(data, batch_size=1)
    for quantize_granularity in quantize_granularity_list:
        qnet = quantization.quantize_net(
            net_original,
            ctx=mx.current_device(),
            exclude_layers=None,
            exclude_operators=None,
            quantized_dtype=out_type,
            calib_mode='naive',
            calib_data=calib_data,
            num_calib_batches=1,
            quantize_mode='full',
            quantize_granularity=quantize_granularity)
        qsym, _ = qnet.export(None)
        if check_calibration:
            check_qsym_calibrated(qsym, out_type, name=name)
        if check_scale_align:
            check_qsym_scale_align(qsym)

        quantized_out = qnet(data)
        for i in range(len(ref_out)):
            min_range = mx.np.min(ref_out[i]).item()
            max_range = mx.np.max(ref_out[i]).item()
            atol = 0.1 * max(abs(min_range), abs(max_range))
            assert_almost_equal_with_err(quantized_out.asnumpy(),
                                         ref_out.asnumpy(),
                                         rtol=0.1,
                                         atol=atol,
                                         etol=0.2)
Пример #7
0
def check_quantize(net_original,
                   data_shapes,
                   out_type,
                   name='conv',
                   check_calibration=True,
                   check_scale_align=False,
                   quantize_mode='full',
                   attrs_dict={}):
    quantize_granularity_list = ['tensor-wise']
    if name == 'fc':
        quantize_granularity_list += ['channel-wise']

    if name in config:
        name = config[name][OP_NAME]

    net_original.initialize(init=mx.init.Normal(0.5), force_reinit=True)
    min_value = -1 if out_type != 'uint8' else 0
    one_shape = isinstance(data_shapes, tuple)
    if one_shape:
        # replace one shape with list of shapes with one element inside to follow later the same schema
        data_shapes = [data_shapes]
    data = []
    for shape in data_shapes:
        data.append(
            mx.np.random.uniform(min_value,
                                 1.0,
                                 size=shape,
                                 dtype='float32',
                                 device=mx.cpu()))

    outputs = net_original(*data)
    for output in outputs:
        output.wait_to_read()
    ref_out = outputs
    one_output = not isinstance(ref_out, list)
    if one_output:
        # make a list to have a common path for one and multiple outputs
        ref_out = [ref_out]

    dataArray = mx.gluon.data.ArrayDataset(*data)

    calib_data = mx.gluon.data.DataLoader(dataArray, batch_size=1)
    for quantize_granularity in quantize_granularity_list:
        qnet = quantization.quantize_net(
            net_original,
            device=mx.cpu(),
            exclude_layers=None,
            exclude_operators=None,
            quantized_dtype=out_type,
            calib_mode='naive',
            calib_data=calib_data,
            num_calib_batches=1,
            quantize_mode=quantize_mode,
            quantize_granularity=quantize_granularity)
        qsym, _ = qnet.export(None)
        check_fusion_parameter(qsym, attrs_dict)
        if check_calibration:
            check_qsym_calibrated(qsym, out_type, name=name)
        if check_scale_align:
            check_qsym_scale_align(qsym)

        quantized_out = qnet(*data)
        if one_output:
            quantized_out = [quantized_out]
        for i in range(len(ref_out)):
            min_range = mx.np.min(ref_out[i]).item()
            max_range = mx.np.max(ref_out[i]).item()
            atol = 0.1 * max(abs(min_range), abs(max_range))
            assert_almost_equal_with_err(quantized_out[i].asnumpy(),
                                         ref_out[i].asnumpy(),
                                         rtol=0.1,
                                         atol=atol,
                                         etol=0.2)
Пример #8
0
def check_quantize(net_original,
                   data_shapes,
                   out_type,
                   name='conv',
                   check_calibration=True,
                   check_scale_align=False,
                   quantize_mode='full',
                   attrs_dict={},
                   calib_mode='naive',
                   check_fusion=True):
    quantize_granularity_list = ['tensor-wise']
    if name == 'fc':
        quantize_granularity_list += ['channel-wise']

    if name in config:
        name = config[name][OP_NAME]

    sigma = 0.01 if hasattr(
        net_original, 'alg') is True and net_original.alg == 'exp' else 0.5
    if out_type == 'uint8':
        # Initialize weights and tensors only with positive values to be sure
        # that results are always positive
        init = CustomNormalInit(sigma=sigma, bounded=True)
        min_value = 0
    else:
        init = mx.init.Normal(sigma)
        min_value = -1

    net_original.initialize(init=init, force_reinit=True)

    one_shape = isinstance(data_shapes, tuple)
    if one_shape:
        # replace one shape with list of shapes with one element inside to follow later the same schema
        data_shapes = [data_shapes]
    data = []
    for shape in data_shapes:
        data.append(
            mx.np.random.uniform(min_value,
                                 1.0,
                                 size=shape,
                                 dtype='float32',
                                 device=mx.cpu()))

    outputs = net_original(*data)
    for output in outputs:
        output.wait_to_read()
    ref_out = outputs
    one_output = not isinstance(ref_out, list)
    if one_output:
        # make a list to have a common path for one and multiple outputs
        ref_out = [ref_out]

    class TestDataLoader(mx.gluon.data.DataLoader):
        def __init__(self, data):
            self.data = data
            self.finish = False

        def __iter__(self):
            self.finish = False
            return self

        def __next__(self):
            if self.finish:
                raise StopIteration
            self.finish = True
            return self.data

        def __del__(self):
            pass

    calib_data = TestDataLoader(data)

    for quantize_granularity in quantize_granularity_list:
        qnet = quantization.quantize_net(
            net_original,
            device=mx.cpu(),
            exclude_layers=None,
            exclude_operators=None,
            quantized_dtype=out_type,
            calib_mode=calib_mode,
            calib_data=calib_data,
            num_calib_batches=1,
            quantize_mode=quantize_mode,
            quantize_granularity=quantize_granularity)
        qsym, _ = qnet.export(None)
        if check_fusion:
            check_fusion_parameter(qsym, attrs_dict)
        if check_calibration:
            check_qsym_calibrated(qsym, out_type, name=name)
        if check_scale_align:
            check_qsym_scale_align(qsym)

        quantized_out = qnet(*data)
        if one_output:
            quantized_out = [quantized_out]
        for i in range(len(ref_out)):
            min_range = mx.np.min(ref_out[i]).item()
            max_range = mx.np.max(ref_out[i]).item()
            atol = 0.1 * max(abs(min_range), abs(max_range))
            assert_almost_equal_with_err(quantized_out[i].asnumpy(),
                                         ref_out[i].asnumpy(),
                                         rtol=0.1,
                                         atol=atol,
                                         etol=0.2)
Пример #9
0
    rgb_mean = [float(i) for i in rgb_mean.split(',')]
    mean_args = {
        'mean_r': rgb_mean[0],
        'mean_g': rgb_mean[1],
        'mean_b': rgb_mean[2]
    }
    rgb_std = [float(i) for i in rgb_std.split(',')]
    std_args = {'std_r': rgb_std[0], 'std_g': rgb_std[1], 'std_b': rgb_std[2]}
    if calib_mode == 'none':
        if logger:
            logger.info('Quantizing FP32 model %s' % args.model)
        qsym = quantize_net(net,
                            ctx=ctx,
                            exclude_layers_match=excluded_sym_names,
                            data_shapes=data_shape,
                            calib_mode=calib_mode,
                            quantized_dtype=args.quantized_dtype,
                            logger=logger)
        suffix = '-quantized'
    else:
        if logger:
            logger.info('Creating DataLoader for reading calibration dataset')
        dataset = mx.gluon.data.vision.ImageRecordDataset(args.calib_dataset)
        transformer = transforms.Compose([
            transforms.Resize(256),
            transforms.CenterCrop(224),
            transforms.ToTensor(),
            transforms.Normalize(mean=rgb_mean, std=rgb_std)
        ])
        data_loader = DataLoader(dataset.transform_first(transformer),