def calibration(net, val_data, opt, ctx, logger): if isinstance(ctx, list): ctx = ctx[0] ctx = mx.cpu() exclude_sym_layer = [] exclude_match_layer = [] if 'inceptionv3' not in opt.model: exclude_match_layer += ['concat'] if opt.num_gpus > 0: raise ValueError('currently only supports CPU with MKL-DNN backend') net = quantize_net(net, calib_data=val_data, quantized_dtype=opt.quantized_dtype, calib_mode=opt.calib_mode, exclude_layers=exclude_sym_layer, num_calib_examples=opt.batch_size * opt.num_calib_batches, exclude_layers_match=exclude_match_layer, ctx=ctx, logger=logger) # net = quantize_net(net, calib_data=val_data, quantized_dtype=opt.quantized_dtype, quantize_mode='full', calib_mode=opt.calib_mode, # exclude_layers=exclude_sym_layer, num_calib_examples=opt.batch_size * opt.num_calib_batches, # exclude_layers_match=exclude_match_layer, ctx=ctx, logger=logger) dir_path = os.path.dirname(os.path.realpath(__file__)) dst_dir = os.path.join(dir_path, 'model') if not os.path.isdir(dst_dir): os.mkdir(dst_dir) prefix = os.path.join(dst_dir, opt.model + '-quantized-' + opt.calib_mode) logger.info('Saving quantized model at %s' % dst_dir) net.export(prefix, epoch=0)
def benchmark_int8(quantize_mode, quantize_granularity, elemwise_add): header = operator_string(elemwise_add) + ', mode = ' + quantize_mode + \ ', granularity = ' + quantize_granularity print_header(header) for shape, nhid in sizes: net = FCWithSum(shape[1], nhid, elemwise_add) net.initialize() net.hybridize(static_alloc=True, static_shape=True) data0 = mx.np.random.uniform(size=shape, low=-1.0, high=1.0) data1 = mx.np.random.uniform(size=shape, low=-1.0, high=1.0) shape2 = (shape[0], nhid) data2 = mx.np.random.uniform(size=shape2, low=-1.0, high=1.0) data = mx.gluon.data.ArrayDataset(data0, data1, data2) calib_data = mx.gluon.data.DataLoader(data, batch_size=1) net = quantization.quantize_net( net, device=mx.cpu(), exclude_layers=None, exclude_operators=None, calib_mode='naive', calib_data=calib_data, num_calib_batches=1, quantize_mode=quantize_mode, quantize_granularity=quantize_granularity) net.hybridize(static_alloc=True, static_shape=True) measure(net, data0, data1, data2, shape, nhid) dump_graph_fn(net, operator_string(elemwise_add) + \ '_' + str(quantize_mode) + '_' + str(quantize_granularity))
def check_neg_fusion_quantized(net_original, attrs_name=None, excluded_attrs=None, data_shapes=[(4,4,10,10)], name='conv'): op_name = config[name][OP_NAME] net_original.initialize(init=mx.init.Normal(0.5), force_reinit=True) one_shape = isinstance(data_shapes, tuple) if one_shape: # replace one shape with list of shapes with one element inside to follow later the same schema data_shapes=[data_shapes] data = [] for shape in data_shapes: data.append(mx.np.random.uniform(size=shape, dtype='float32', device=mx.cpu())) dataArray= mx.gluon.data.ArrayDataset(*data) calib_data = mx.gluon.data.DataLoader(dataArray, batch_size=1) qnet = quantization.quantize_net(net_original, device=mx.cpu(), exclude_layers=None, exclude_operators=None, quantized_dtype='int8', calib_mode='naive', calib_data=calib_data, num_calib_batches=1, quantize_mode='full', quantize_granularity='tensor-wise') qsym, _ = qnet.export(None) attrs_dict = qsym.attr_dict() for k, v in attrs_dict.items(): if k.find(op_name) != -1: for attr in attrs_name: assert v[attr] == 'true' for exc_attr in excluded_attrs: assert exc_attr not in v.keys(), exc_attr + " atribute shouldn't exist"
def test_quantized_fc_bias_overflow(data_min, data_max, weight_min, weight_max): data_shape = (1, 32) data_nd = mx.np.random.uniform(data_min, data_max, size=data_shape, device=mx.cpu()) weight_nd = mx.np.random.uniform(weight_min, weight_max, size=[64, 32], device=mx.cpu()) bias_nd = mx.np.random.uniform(-1, +1, size=[64], device=mx.cpu()) class FCBiasOverflow(nn.HybridBlock): def __init__(self, dtype='float32', **kwargs): super(FCBiasOverflow, self).__init__(**kwargs) self.weight = mx.gluon.Parameter('weight', dtype=dtype, allow_deferred_init=True) self.bias = mx.gluon.Parameter('bias', dtype=dtype, allow_deferred_init=True) def forward(self, x): conv1 = mx.npx.fully_connected(x, num_hidden=64, weight=self.weight.data(x.device), no_bias=False, bias=self.bias.data(x.device)) return conv1 def infer_shape(self, x, *args): self.weight.shape = (64, x.shape[x.ndim - 1]) self.bias.shape = (64, ) net = FCBiasOverflow() net.initialize() net(data_nd) # dummy run net.weight.data()[:] = weight_nd net.bias.data()[:] = bias_nd out = net(data_nd) calib_data = mx.gluon.data.DataLoader(data_nd, batch_size=1) qnet = quantization.quantize_net(net, device=mx.cpu(), exclude_layers=None, exclude_operators=None, quantized_dtype='int8', calib_mode='naive', calib_data=calib_data, num_calib_batches=1, quantize_mode='full') out_quantized = qnet(data_nd) assert_almost_equal_with_err(out.asnumpy(), out_quantized.asnumpy(), rtol=1e-2, atol=1e-2, etol=0.01)
def test_quantized_conv_bias_overflow(data_min, data_max, weight_min, weight_max): data_shape = (1, 32, 2, 2) data_nd = mx.random.uniform(data_min, data_max, shape=data_shape, ctx=mx.cpu()) weight_nd = mx.random.uniform(weight_min, weight_max, shape=[64, 32, 1, 1], ctx=mx.cpu()) bias_nd = mx.random.uniform(-1, +1, shape=[64], ctx=mx.cpu()) class ConvBiasOverflow(nn.HybridBlock): def __init__(self, dtype='float32', **kwargs): super(ConvBiasOverflow, self).__init__(**kwargs) self.weight = mx.gluon.Parameter('weight', dtype=dtype, allow_deferred_init=True) self.bias = mx.gluon.Parameter('bias', dtype=dtype, allow_deferred_init=True) def hybrid_forward(self, F, x, weight, bias): conv1 = F.Convolution(x, num_filter=64, kernel=(1, 1), weight=weight, no_bias=False, bias=bias) return conv1 net = ConvBiasOverflow() net.initialize() net(data_nd) # dummy run net.weight.data()[:] = weight_nd net.bias.data()[:] = bias_nd out = net(data_nd) calib_data = mx.gluon.data.DataLoader(data_nd, batch_size=data_shape[0]) qnet = quantization.quantize_net(net, ctx=mx.cpu(), exclude_layers=None, exclude_operators=None, quantized_dtype='int8', calib_mode='naive', calib_data=calib_data, num_calib_batches=1, quantize_mode='full') out_quantized = qnet(data_nd) assert_almost_equal_with_err(out.asnumpy(), out_quantized.asnumpy(), rtol=1e-2, atol=1e-2, etol=0.01)
def check_quantize(net_original, data_shape, out_type, name='conv', check_calibration=True, check_scale_align=False): quantize_granularity_list = ['tensor-wise'] if name == 'fc': quantize_granularity_list += ['channel-wise'] if name in config: name = config[name][OP_NAME] net_original.initialize(init=mx.init.Normal(0.5), force_reinit=True) min_value = -1 if out_type != 'uint8' else 0 data = mx.np.random.uniform(min_value, 1.0, size=data_shape, dtype='float32', ctx=mx.current_device()) outputs = net_original(data) for output in outputs: output.wait_to_read() ref_out = outputs calib_data = mx.gluon.data.DataLoader(data, batch_size=1) for quantize_granularity in quantize_granularity_list: qnet = quantization.quantize_net( net_original, ctx=mx.current_device(), exclude_layers=None, exclude_operators=None, quantized_dtype=out_type, calib_mode='naive', calib_data=calib_data, num_calib_batches=1, quantize_mode='full', quantize_granularity=quantize_granularity) qsym, _ = qnet.export(None) if check_calibration: check_qsym_calibrated(qsym, out_type, name=name) if check_scale_align: check_qsym_scale_align(qsym) quantized_out = qnet(data) for i in range(len(ref_out)): min_range = mx.np.min(ref_out[i]).item() max_range = mx.np.max(ref_out[i]).item() atol = 0.1 * max(abs(min_range), abs(max_range)) assert_almost_equal_with_err(quantized_out.asnumpy(), ref_out.asnumpy(), rtol=0.1, atol=atol, etol=0.2)
def check_quantize(net_original, data_shapes, out_type, name='conv', check_calibration=True, check_scale_align=False, quantize_mode='full', attrs_dict={}): quantize_granularity_list = ['tensor-wise'] if name == 'fc': quantize_granularity_list += ['channel-wise'] if name in config: name = config[name][OP_NAME] net_original.initialize(init=mx.init.Normal(0.5), force_reinit=True) min_value = -1 if out_type != 'uint8' else 0 one_shape = isinstance(data_shapes, tuple) if one_shape: # replace one shape with list of shapes with one element inside to follow later the same schema data_shapes = [data_shapes] data = [] for shape in data_shapes: data.append( mx.np.random.uniform(min_value, 1.0, size=shape, dtype='float32', device=mx.cpu())) outputs = net_original(*data) for output in outputs: output.wait_to_read() ref_out = outputs one_output = not isinstance(ref_out, list) if one_output: # make a list to have a common path for one and multiple outputs ref_out = [ref_out] dataArray = mx.gluon.data.ArrayDataset(*data) calib_data = mx.gluon.data.DataLoader(dataArray, batch_size=1) for quantize_granularity in quantize_granularity_list: qnet = quantization.quantize_net( net_original, device=mx.cpu(), exclude_layers=None, exclude_operators=None, quantized_dtype=out_type, calib_mode='naive', calib_data=calib_data, num_calib_batches=1, quantize_mode=quantize_mode, quantize_granularity=quantize_granularity) qsym, _ = qnet.export(None) check_fusion_parameter(qsym, attrs_dict) if check_calibration: check_qsym_calibrated(qsym, out_type, name=name) if check_scale_align: check_qsym_scale_align(qsym) quantized_out = qnet(*data) if one_output: quantized_out = [quantized_out] for i in range(len(ref_out)): min_range = mx.np.min(ref_out[i]).item() max_range = mx.np.max(ref_out[i]).item() atol = 0.1 * max(abs(min_range), abs(max_range)) assert_almost_equal_with_err(quantized_out[i].asnumpy(), ref_out[i].asnumpy(), rtol=0.1, atol=atol, etol=0.2)
def check_quantize(net_original, data_shapes, out_type, name='conv', check_calibration=True, check_scale_align=False, quantize_mode='full', attrs_dict={}, calib_mode='naive', check_fusion=True): quantize_granularity_list = ['tensor-wise'] if name == 'fc': quantize_granularity_list += ['channel-wise'] if name in config: name = config[name][OP_NAME] sigma = 0.01 if hasattr( net_original, 'alg') is True and net_original.alg == 'exp' else 0.5 if out_type == 'uint8': # Initialize weights and tensors only with positive values to be sure # that results are always positive init = CustomNormalInit(sigma=sigma, bounded=True) min_value = 0 else: init = mx.init.Normal(sigma) min_value = -1 net_original.initialize(init=init, force_reinit=True) one_shape = isinstance(data_shapes, tuple) if one_shape: # replace one shape with list of shapes with one element inside to follow later the same schema data_shapes = [data_shapes] data = [] for shape in data_shapes: data.append( mx.np.random.uniform(min_value, 1.0, size=shape, dtype='float32', device=mx.cpu())) outputs = net_original(*data) for output in outputs: output.wait_to_read() ref_out = outputs one_output = not isinstance(ref_out, list) if one_output: # make a list to have a common path for one and multiple outputs ref_out = [ref_out] class TestDataLoader(mx.gluon.data.DataLoader): def __init__(self, data): self.data = data self.finish = False def __iter__(self): self.finish = False return self def __next__(self): if self.finish: raise StopIteration self.finish = True return self.data def __del__(self): pass calib_data = TestDataLoader(data) for quantize_granularity in quantize_granularity_list: qnet = quantization.quantize_net( net_original, device=mx.cpu(), exclude_layers=None, exclude_operators=None, quantized_dtype=out_type, calib_mode=calib_mode, calib_data=calib_data, num_calib_batches=1, quantize_mode=quantize_mode, quantize_granularity=quantize_granularity) qsym, _ = qnet.export(None) if check_fusion: check_fusion_parameter(qsym, attrs_dict) if check_calibration: check_qsym_calibrated(qsym, out_type, name=name) if check_scale_align: check_qsym_scale_align(qsym) quantized_out = qnet(*data) if one_output: quantized_out = [quantized_out] for i in range(len(ref_out)): min_range = mx.np.min(ref_out[i]).item() max_range = mx.np.max(ref_out[i]).item() atol = 0.1 * max(abs(min_range), abs(max_range)) assert_almost_equal_with_err(quantized_out[i].asnumpy(), ref_out[i].asnumpy(), rtol=0.1, atol=atol, etol=0.2)
rgb_mean = [float(i) for i in rgb_mean.split(',')] mean_args = { 'mean_r': rgb_mean[0], 'mean_g': rgb_mean[1], 'mean_b': rgb_mean[2] } rgb_std = [float(i) for i in rgb_std.split(',')] std_args = {'std_r': rgb_std[0], 'std_g': rgb_std[1], 'std_b': rgb_std[2]} if calib_mode == 'none': if logger: logger.info('Quantizing FP32 model %s' % args.model) qsym = quantize_net(net, ctx=ctx, exclude_layers_match=excluded_sym_names, data_shapes=data_shape, calib_mode=calib_mode, quantized_dtype=args.quantized_dtype, logger=logger) suffix = '-quantized' else: if logger: logger.info('Creating DataLoader for reading calibration dataset') dataset = mx.gluon.data.vision.ImageRecordDataset(args.calib_dataset) transformer = transforms.Compose([ transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor(), transforms.Normalize(mean=rgb_mean, std=rgb_std) ]) data_loader = DataLoader(dataset.transform_first(transformer),