def test_sym_pass(iter_num=10): inputs_ext = { 'data': { 'shape': (batch_size, 1, 28, 28), } } inputs = [mx.sym.var(n) for n in inputs_ext] data_iter = iter(val_loader) def data_iter_func(): return next(data_iter) data, _ = data_iter_func() net1 = utils.load_model(*load_fname(version), inputs, ctx=ctx) def graph_func(data): return net1.forward(data.as_in_context(ctx)) sym_file, param_file = load_fname(version) sym, params = mx.sym.load(sym_file), nd.load(param_file) sym, params = spass.sym_quant_prepare(sym, params, inputs_ext) if True: mrt = _mrt.MRT(sym, params, inputs_ext) mrt.set_data('data', data) mrt.calibrate(ctx=ctx) mrt.set_output_prec(8) qsym, qparams, inputs_ext = mrt.quantize() else: inputs_ext['data']['data'] = data th_dict = calib.sym_calibrate(sym, params, inputs_ext, ctx=ctx) qsym, qparams, precs, _ = calib.sym_simulate(sym, params, inputs_ext, th_dict) qsym, qparams = calib.sym_realize(qsym, qparams, inputs_ext, precs, "cvm") dump_sym, dump_params, dump_ext = load_fname(version, "sym.quantize", True) sim.save_ext(dump_ext, inputs_ext) nd.save(dump_params, qparams) open(dump_sym, "w").write(qsym.tojson()) dump_sym, dump_params, dump_ext = load_fname(version, "sym.quantize", True) (inputs_ext,) = sim.load_ext(dump_ext) inputs = [mx.sym.var(n) for n in inputs_ext] net2 = utils.load_model(dump_sym, dump_params, inputs, ctx=ctx) def cvm_quantize(data): data = sim.load_real_data(data, 'data', inputs_ext) return net2.forward(data.as_in_context(ctx)) utils.multi_eval_accuracy(graph_func, data_iter_func, cvm_quantize, iter_num=iter_num)
data, label = data_iter_func() sym_file, param_file = load_fname() net1 = utils.load_model(sym_file, param_file, inputs, ctx=ctx) def trec(data): res = net1(data.as_in_context(ctx)) return res sym, params = mx.sym.load(sym_file), nd.load(param_file) sym, params = spass.sym_quant_prepare(sym, params, inputs_ext) if True: mrt = _mrt.MRT(sym, params, inputs_ext) mrt.set_data('data', data) mrt.calibrate(ctx=ctx) mrt.set_input_prec('data', 16) mrt.set_fixed('data') mrt.set_output_prec(8) qsym, qparams, inputs_ext = mrt.quantize() else: inputs_ext['data']['data'] = data th_dict = calib.sym_calibrate(sym, params, inputs_ext, ctx=ctx) qsym, qparams, _ = calib.pure_int8_quantize(sym, params, inputs_ext, th_dict) net2 = gluon.nn.SymbolBlock(qsym, inputs) utils.load_parameters(net2, qparams, ctx=ctx)
def test_sym_pass(batch_size=10, iter_num=10, quantize=True): logger = logging.getLogger("log.test.sym.pass") calib_ctx = mx.gpu(2) ctx = [mx.gpu(int(i)) for i in "1,2,3,4".split(',') if i.strip()] input_size = 299 version = "v3" h, w = input_size, input_size inputs_ext = { 'data': { 'shape': (batch_size, 3, h, w), } } inputs = [mx.sym.var(name) for name in inputs_ext] logger.info("load dataset, symbol and parameters") data_iter = ds.load_imagenet_rec(batch_size, input_size) def data_iter_func(): data = data_iter.next() return data.data[0], data.label[0] net1 = utils.load_model(*load_fname(version), inputs, ctx=ctx) acc_top1 = mx.metric.Accuracy() acc_top5 = mx.metric.TopKAccuracy(5) acc_top1.reset() acc_top5.reset() def inception_v3(data, label): data = gluon.utils.split_and_load(data, ctx_list=ctx, batch_axis=0, even_split=False) res = [net1.forward(d) for d in data] res = nd.concatenate(res) acc_top1.update(label, res) _, top1 = acc_top1.get() acc_top5.update(label, res) _, top5 = acc_top5.get() return "top1={:6.2%} top5={:6.2%}".format(top1, top5) if quantize: sym_file, param_file = load_fname(version) sym, params = mx.sym.load(sym_file), nd.load(param_file) sym, params = spass.sym_quant_prepare(sym, params, inputs_ext) data, _ = data_iter_func() if True: dump_sym, dump_params, dump_ext = load_fname(version, "mrt", True) mrt = _mrt.MRT(sym, params, inputs_ext) mrt.set_data('data', data) mrt.calibrate(ctx=calib_ctx) mrt.set_output_prec(8) qsym, qparams, inputs_ext = mrt.quantize() else: dump_sym, dump_params, dump_ext = load_fname( version, "sym.quantize", True) inputs_ext['data']['data'] = data th_dict = calib.sym_calibrate(sym, params, inputs_ext, ctx=calib_ctx) qsym, qparams, precs, _ = calib.sym_simulate( sym, params, inputs_ext, th_dict) qsym, qparams = calib.sym_realize(qsym, qparams, inputs_ext, precs) sim.save_ext(dump_ext, inputs_ext) nd.save(dump_params, qparams) open(dump_sym, "w").write(qsym.tojson()) dump_sym, dump_params, dump_ext = load_fname(version, "mrt", True) (inputs_ext, ) = sim.load_ext(dump_ext) net2 = utils.load_model(dump_sym, dump_params, inputs, ctx=ctx) qacc_top1 = mx.metric.Accuracy() qacc_top5 = mx.metric.TopKAccuracy(5) qacc_top1.reset() qacc_top5.reset() def cvm_quantize(data, label): data = sim.load_real_data(data, 'data', inputs_ext) data = gluon.utils.split_and_load(data, ctx_list=ctx, batch_axis=0, even_split=False) res = [net2.forward(d) for d in data] res = nd.concatenate(res) qacc_top1.update(label, res) _, top1 = qacc_top1.get() qacc_top5.update(label, res) _, top5 = qacc_top5.get() return "top1={:6.2%} top5={:6.2%}".format(top1, top5) utils.multi_validate(inception_v3, data_iter_func, cvm_quantize, iter_num=iter_num, logger=logger)
def test_sym_pass(batch_size=10, iter_num=10, quantize=True): logger = logging.getLogger("log.test.sym.pass") calib_ctx = mx.gpu(1) ctx = [mx.gpu(int(i)) for i in "1,2,3,4".split(',') if i.strip()] inputs_ext = { 'data': { 'shape': (batch_size, 3, 224, 224), } } inputs = [mx.sym.var(name) for name in inputs_ext] logger.info("load dataset, symbol and parameters") # load dataset and iter function data_iter = ds.load_imagenet_rec(batch_size) def data_iter_func(): data = data_iter.next() return data.data[0], data.label[0] data, _ = data_iter_func() # load original model for accuracy net1 = utils.load_model(*load_fname(version), inputs, ctx=ctx) acc_top1 = mx.metric.Accuracy() acc_top5 = mx.metric.TopKAccuracy(5) acc_top1.reset() acc_top5.reset() def shufflenet(data, label): data = gluon.utils.split_and_load(data, ctx_list=ctx, batch_axis=0, even_split=False) res = [net1.forward(d) for d in data] res = nd.concatenate(res) acc_top1.update(label, res) _, top1 = acc_top1.get() acc_top5.update(label, res) _, top5 = acc_top5.get() return "top1={:6.2%} top5={:6.2%}".format(top1, top5) if quantize: # load original model sym_fname, param_fname = load_fname(version) sym, params = mx.sym.load(sym_fname), nd.load(param_fname) sym, params = spass.sym_quant_prepare(sym, params, inputs_ext) # quantize process mrt = _mrt.MRT(sym, params, inputs_ext) # initialize mrt.set_data('data', data) # set input data mrt.calibrate(ctx=calib_ctx) # calibration mrt.set_output_prec(8) # set output prec, do nothing by default qsym, qparams, inputs_ext = mrt.quantize() # quantization # dump quantized model dump_sym, dump_params, dump_ext = load_fname(version, "sym.quantize", True) sim.save_ext(dump_ext, inputs_ext) nd.save(dump_params, qparams) open(dump_sym, "w").write(qsym.tojson()) if False: # convert to cvm executor model inputs_ext['data']['shape'] = (1, 3, 224, 224) nnvm_sym, nnvm_params = spass.mxnet_to_nnvm(qsym, qparams, inputs_ext) spass.cvm_build(nnvm_sym, nnvm_params, inputs_ext, *load_fname(version, "nnvm")) # load quantized model for accuracy dump_sym, dump_params, dump_ext = load_fname(version, "sym.quantize", True) (inputs_ext, ) = sim.load_ext(dump_ext) inputs = [mx.sym.var(n) for n in inputs_ext] net3 = utils.load_model(dump_sym, dump_params, inputs, ctx=ctx) # net3 = mx.gluon.nn.SymbolBlock(qsym, inputs) # utils.load_parameters(net3, qparams, ctx=ctx) qacc_top1 = mx.metric.Accuracy() qacc_top5 = mx.metric.TopKAccuracy(5) qacc_top1.reset() qacc_top5.reset() def cvm_quantize(data, label): data = sim.load_real_data(data, 'data', inputs_ext) data = gluon.utils.split_and_load(data, ctx_list=ctx, batch_axis=0, even_split=False) res = [net3.forward(d) for d in data] res = nd.concatenate(res) qacc_top1.update(label, res) _, top1 = qacc_top1.get() qacc_top5.update(label, res) _, top5 = qacc_top5.get() return "top1={:6.2%} top5={:6.2%}".format(top1, top5) # compare accuracy between models utils.multi_validate(shufflenet, data_iter_func, cvm_quantize, iter_num=iter_num, logger=logger)
def test_mx_quantize(batch_size=10, iter_num=10): logger = logging.getLogger("log.test.mx.quantize") ctx = [mx.gpu(int(i)) for i in "1,3".split(',') if i.strip()] inputs_ext = { 'data': { 'shape': (batch_size, 3, 224, 224), }} inputs = [mx.sym.var(n) for n in inputs_ext] data_iter = ds.load_imagenet_rec(batch_size) def data_iter_func(): data = data_iter.next() return data.data[0], data.label[0] data, _ = data_iter_func() net1 = utils.load_model(*load_fname(version), inputs, ctx=ctx) acc_top1 = mx.metric.Accuracy() acc_top5 = mx.metric.TopKAccuracy(5) acc_top1.reset() acc_top5.reset() def mobilenet(data, label): data = gluon.utils.split_and_load(data, ctx_list=ctx, batch_axis=0, even_split=False) res = [net1.forward(d) for d in data] res = nd.concatenate(res) acc_top1.update(label, res) _, top1 = acc_top1.get() acc_top5.update(label, res) _, top5 = acc_top5.get() return "top1={:6.2%} top5={:6.2%}".format(top1, top5) calib_ctx = mx.gpu(1) sym_fname, param_fname = load_fname(version) sym, params = mx.sym.load(sym_fname), nd.load(param_fname) sym, params = spass.sym_quant_prepare(sym, params, inputs_ext) if True: if True: mrt = _mrt.MRT(sym, params, inputs_ext) mrt.set_data('data', data) mrt.calibrate() # [ 0.0008745864 0.03330660510427334 ] 0.6670066884888368 0.7753906 # mrt.set_threshold("mobilenet0_dense0_weight", 0.67) # # [ -0.0036011334 0.054821780899052534 ] 1.100036751338784 1.4626989 # mrt.set_threshold("mobilenet0_conv24_batchnorm24_fwd_weight", 1.1) # # [ 0.013243316 1.7543557133786065 ] 70.18747185088569 94.66275 # mrt.set_threshold("mobilenet0_conv23_batchnorm23_fwd_weight", 35.10) # # [ -0.0016149869 0.05713169649243355 ] 1.1442489167675376 1.7122083 # mrt.set_threshold("mobilenet0_conv20_batchnorm20_fwd_weight", 1.144) # # [ -0.0015804865 0.04523811489343643 ] 0.9063427844084799 1.0745146 # mrt.set_threshold("mobilenet0_conv16_batchnorm16_fwd_weight", 0.90) # # [ 0.4315614 2.447332109723772 ] 49.37820360490254 63.959927 # mrt.set_threshold("mobilenet0_conv2_batchnorm2_fwd", 49.37) # # [ 0.9770754 1.3392452512468611 ] 27.761980422905516 40.729546 # mrt.set_threshold("mobilenet0_relu2_fwd", 27.76) # [ 1.0975745 1.0489919010632773 ] 22.077412493692915 23.784576 # mrt.set_threshold("mobilenet0_relu4_fwd", 22.08) # # [ 0.9885562 2.360489403014386 ] 48.19834426651407 69.22121 # mrt.set_threshold("mobilenet0_conv5_batchnorm5_fwd", 48.2) # # [ 0.7895588 1.0544661745870065 ] 21.878882319617176 30.95745 # mrt.set_threshold("mobilenet0_relu17_fwd", 21.88) # # [ 0.8717863 1.0887600296120434 ] 22.646986888608513 28.265652 # mrt.set_threshold("mobilenet0_relu19_fwd", 22.65) # # [ 0.35124516 0.6501711574631898 ] 13.354668314135012 20.770807 # mrt.set_threshold("mobilenet0_relu20_fwd", 13.35) # # [ 0.9378179 1.110470714216975 ] 23.147232155910086 27.886068 # mrt.set_threshold("mobilenet0_relu21_fwd", 23.15) # # [ 0.36263302 0.6352599878026505 ] 13.067832775738754 17.18809 # mrt.set_threshold("mobilenet0_relu22_fwd", 13.07) # # [ 0.19875833 0.49999100821358816 ] 10.198578498193196 16.625143 # mrt.set_threshold("mobilenet0_relu24_fwd", 10.2) # # [ 0.32357717 1.6308352606637138 ] 65.55698759215218 75.84912 # mrt.set_threshold("mobilenet0_conv25_batchnorm25_fwd", 32.94) # # [ 0.36793178 1.512995992388044 ] 30.62785163096019 49.464615 # mrt.set_threshold("mobilenet0_relu26_fwd", 30.63) # # [ 18.028658 38.61970520019531 ] 790.4227619171143 805.51886 # mrt.set_threshold("sum0", 790.423) mrt.set_output_prec(8) qsym, qparams, inputs_ext = mrt.quantize() else: inputs_ext['data']['data'] = data th_dict = calib.sym_calibrate(sym, params, inputs_ext, ctx=calib_ctx) qsym, qparams, precs, _ = calib.sym_simulate(sym, params, inputs_ext, th_dict) qsym, qparams = calib.sym_realize(qsym, qparams, inputs_ext, precs) dump_sym, dump_params, dump_ext = load_fname(version, "sym.quantize", True) sim.save_ext(dump_ext, inputs_ext) nd.save(dump_params, qparams) open(dump_sym, "w").write(qsym.tojson()) dump_sym, dump_params = load_fname(version, "nnvm.compile") nnvm_sym, nnvm_params = spass.mxnet_to_nnvm(qsym, qparams, inputs_ext) spass.cvm_build(nnvm_sym, nnvm_params, inputs_ext, dump_sym, dump_params) dump_sym, dump_params, dump_ext = load_fname(version, "sym.quantize", True) (inputs_ext,) = sim.load_ext(dump_ext) net2 = utils.load_model(dump_sym, dump_params, inputs, ctx=ctx) qacc_top1 = mx.metric.Accuracy() qacc_top5 = mx.metric.TopKAccuracy(5) qacc_top1.reset() qacc_top5.reset() def cvm_quantize(data, label): data = sim.load_real_data(data, 'data', inputs_ext) data = gluon.utils.split_and_load(data, ctx_list=ctx, batch_axis=0, even_split=False) res = [net2.forward(d) for d in data] res = nd.concatenate(res) qacc_top1.update(label, res) _, top1 = qacc_top1.get() qacc_top5.update(label, res) _, top5 = qacc_top5.get() return "top1={:6.2%} top5={:6.2%}".format(top1, top5) utils.multi_validate(mobilenet, data_iter_func, cvm_quantize, iter_num=iter_num, logger=logger)
def test_mrt_quant(batch_size=1, iter_num=10): logger = logging.getLogger("log.test.mrt.quantize") ctx = mx.gpu(1) qctx = mx.gpu(3) input_size = 512 h, w = input_size, input_size inputs_ext = { 'data': { 'shape': (batch_size, 3, h, w), } } val_data = dataset.load_voc(batch_size, input_size) val_data_iter = iter(val_data) def data_iter_func(): data, label = next(val_data_iter) return data, label sym_file, param_file = load_fname() sym, params = mx.sym.load(sym_file), nd.load(param_file) sym, params = spass.sym_quant_prepare(sym, params, inputs_ext) keys = [ "ssd0_multiperclassdecoder0_concat0", "ssd0_multiperclassdecoder0__mulscalar0", "ssd0_multiperclassdecoder0_slice_axis0", "ssd0_multiperclassdecoder0_zeros_like1", "ssd0_normalizedboxcenterdecoder0_concat0", ] base, base_params, base_inputs_ext, top, top_params, top_inputs_ext \ = _mrt.split_model(sym, params, inputs_ext, keys) dump_sym, dump_params = load_fname("mrt.base") open(dump_sym, "w").write(base.tojson()) nd.save(dump_params, base_params) dump_sym, dump_params, dump_ext = load_fname("mrt.top", True) open(dump_sym, "w").write(top.tojson()) nd.save(dump_params, top_params) sim.save_ext(dump_ext, top_inputs_ext) dump_sym, dump_params = load_fname("mrt.base") base, base_params = mx.sym.load(dump_sym), nd.load(dump_params) dump_sym, dump_params, dump_ext = load_fname("mrt.top", True) top, top_params = mx.sym.load(dump_sym), nd.load(dump_params) (top_inputs_ext, ) = sim.load_ext(dump_ext) base_inputs = [mx.sym.var(n) for n in inputs_ext] base_graph = mx.gluon.nn.SymbolBlock(base, base_inputs) utils.load_parameters(base_graph, base_params, ctx=ctx) top_inputs = [mx.sym.var(n) for n in top_inputs_ext] top_graph = mx.gluon.nn.SymbolBlock(top, top_inputs) utils.load_parameters(top_graph, top_params, ctx=ctx) metric = dataset.load_voc_metric() metric.reset() def yolov3(data, label): def net(data): tmp = base_graph(data.as_in_context(ctx)) outs = top_graph(*tmp) return outs acc = validate_data(net, data, label, metric) return "{:6.2%}".format(acc) # utils.multi_validate(yolov3, data_iter_func, # iter_num=iter_num, logger=logger) # exit() if True: mrt = _mrt.MRT(base, base_params, inputs_ext) for i in range(16): data, _ = data_iter_func() mrt.set_data('data', data) th_dict = mrt.calibrate(ctx=ctx) _, _, dump_ext = load_fname("mrt.dict", True) sim.save_ext(dump_ext, th_dict) _, _, dump_ext = load_fname("mrt.dict", True) (th_dict, ) = sim.load_ext(dump_ext) if True: mrt = _mrt.MRT(base, base_params, base_inputs_ext) mrt.set_th_dict(th_dict) mrt.set_threshold('data', 2.64) mrt.set_fixed("ssd0_multiperclassdecoder0_concat0") mrt.set_fixed("ssd0_multiperclassdecoder0__mulscalar0") mrt.set_fixed("ssd0_multiperclassdecoder0_zeros_like1") mrt.set_threshold("ssd0_multiperclassdecoder0_slice_axis0", 1) # mrt.set_threshold("ssd0_normalizedboxcenterdecoder0_concat0", 512) mrt.set_output_prec(30) qbase, qbase_params, qbase_inputs_ext = mrt.quantize() oscales = mrt.get_output_scales() maps = mrt.get_maps() dump_sym, dump_params, dump_ext = load_fname("mrt.quantize", True) open(dump_sym, "w").write(qbase.tojson()) nd.save(dump_params, qbase_params) sim.save_ext(dump_ext, qbase_inputs_ext, oscales, maps) # merge quantize model if True: qb_sym, qb_params, qb_ext = load_fname("mrt.quantize", True) qbase, qbase_params = mx.sym.load(qb_sym), nd.load(qb_params) qbase_inputs_ext, oscales, maps = sim.load_ext(qb_ext) name_maps = { "ssd0_slice_axis41": "ssd0_multiperclassdecoder0_concat0", "ssd0_slice_axis42": "ssd0_multiperclassdecoder0_slice_axis0", "ssd0_slice_axis43": "ssd0_normalizedboxcenterdecoder0_concat0", } oscales_dict = dict(zip([c.attr('name') for c in base], oscales)) oscales = [oscales_dict[name_maps[c.attr('name')]] for c in top] def box_nms(node, params, graph): name, op_name = node.attr('name'), node.attr('op_name') childs, attr = sutils.sym_iter( node.get_children()), node.list_attr() if op_name == '_greater_scalar': valid_thresh = sutils.get_attr(attr, 'scalar', 0) attr['scalar'] = int(valid_thresh * oscales[1]) node = sutils.get_mxnet_op(op_name)(*childs, **attr, name=name) elif op_name == '_contrib_box_nms': valid_thresh = sutils.get_attr(attr, 'valid_thresh', 0) attr['valid_thresh'] = int(valid_thresh * oscales[1]) node = sutils.get_mxnet_op(op_name)(*childs, **attr, name=name) return node qsym, qparams = _mrt.merge_model(qbase, qbase_params, top, top_params, maps, box_nms) sym_file, param_file, ext_file = load_fname("mrt.all.quantize", True) open(sym_file, "w").write(qsym.tojson()) nd.save(param_file, qparams) sim.save_ext(ext_file, qbase_inputs_ext, oscales) if True: dump_sym, dump_params, dump_ext = load_fname("mrt.all.quantize", True) net2_inputs_ext, oscales = sim.load_ext(dump_ext) inputs = [mx.sym.var(n) for n in net2_inputs_ext] net2 = utils.load_model(dump_sym, dump_params, inputs, ctx=qctx) net2_metric = dataset.load_voc_metric() net2_metric.reset() def mrt_quantize(data, label): def net(data): data = sim.load_real_data(data, 'data', net2_inputs_ext) outs = net2(data.as_in_context(qctx)) outs = [ o.as_in_context(ctx) / oscales[i] for i, o in enumerate(outs) ] return outs acc = validate_data(net, data, label, net2_metric) return "{:6.2%}".format(acc) utils.multi_validate(yolov3, data_iter_func, mrt_quantize, iter_num=iter_num, logger=logger)
res = nd.concatenate(res) acc_top1.update(label, res) _, top1 = acc_top1.get() acc_top5.update(label, res) _, top5 = acc_top5.get() return "top1={:6.2%} top5={:6.2%}".format(top1, top5) # load original model sym_fname, param_fname = load_fname(version) sym, params = mx.sym.load(sym_fname), nd.load(param_fname) print(param_fname) sym, params = spass.sym_quant_prepare(sym, params, inputs_ext) # quantize process mrt = _mrt.MRT(sym, params, inputs_ext) # initialize mrt.set_data('data', data) # set input data mrt.calibrate(ctx=calib_ctx) # calibration mrt.set_output_prec(8) # set output prec, do nothing by default qsym, qparams, inputs_ext = mrt.quantize() # quantization if False: # dump quantized model dump_sym, dump_params, dump_ext = load_fname(version, "sym.quantize", True) sim.save_ext(dump_ext, inputs_ext) nd.save(dump_params, qparams) open(dump_sym, "w").write(qsym.tojson()) # convert to cvm executor model inputs_ext['data']['shape'] = (1, 3, input_size, input_size) nnvm_sym, nnvm_params = spass.mxnet_to_nnvm(qsym, qparams, inputs_ext)
def test_sym_pass(batch_size=10, iter_num=10): logger = logging.getLogger("log.test.sym.pass") calib_ctx = mx.gpu(0) ctx = [mx.gpu(int(i)) for i in "1,2,3,4".split(',') if i.strip()] inputs_ext = { 'data': { 'shape': (batch_size, 3, 224, 224), } } inputs = [mx.sym.var(name) for name in inputs_ext] logger.info("load dataset, symbol and parameters") data_iter = ds.load_imagenet_rec(batch_size) def data_iter_func(): data = data_iter.next() return data.data[0], data.label[0] for i in range(10): if i == 3: break data, _ = data_iter_func() data_iter.reset() version = "19" net1 = utils.load_model(*load_fname(version), inputs, ctx=ctx) acc_top1 = mx.metric.Accuracy() acc_top5 = mx.metric.TopKAccuracy(5) acc_top1.reset() acc_top5.reset() def vgg(data, label): data = gluon.utils.split_and_load(data, ctx_list=ctx, batch_axis=0, even_split=False) res = [net1.forward(d) for d in data] res = nd.concatenate(res) acc_top1.update(label, res) _, top1 = acc_top1.get() acc_top5.update(label, res) _, top5 = acc_top5.get() return "top1={:6.2%} top5={:6.2%}".format(top1, top5) sym_fname, param_fname = load_fname(version) print(sym_fname, param_fname) exit() sym, params = mx.sym.load(sym_fname), nd.load(param_fname) sym, params = spass.sym_quant_prepare(sym, params, inputs_ext) if True: mrt = _mrt.MRT(sym, params, inputs_ext) mrt.set_data('data', data) mrt.calibrate(ctx=calib_ctx) mrt.set_output_prec(8) qsym, qparams, inputs_ext = mrt.quantize() dump_sym, dump_params, dump_ext = load_fname(version, "sym.quantize", True) sim.save_ext(dump_ext, inputs_ext) nd.save(dump_params, qparams) open(dump_sym, "w").write(qsym.tojson()) dump_sym, dump_params, dump_ext = load_fname(version, "sym.quantize", True) (inputs_ext,) = sim.load_ext(dump_ext) net3 = utils.load_model(dump_sym, dump_params, inputs, ctx=ctx) qacc_top1 = mx.metric.Accuracy() qacc_top5 = mx.metric.TopKAccuracy(5) qacc_top1.reset() qacc_top5.reset() def cvm_quantize(data, label): data = sim.load_real_data(data, 'data', inputs_ext) data = gluon.utils.split_and_load(data, ctx_list=ctx, batch_axis=0, even_split=False) res = [net3.forward(d) for d in data] res = nd.concatenate(res) qacc_top1.update(label, res) _, top1 = qacc_top1.get() qacc_top5.update(label, res) _, top5 = qacc_top5.get() return "top1={:6.2%} top5={:6.2%}".format(top1, top5) utils.multi_validate(vgg, data_iter_func, cvm_quantize, iter_num=iter_num, logger=logger)
def test_mrt_quant(batch_size=1, iter_num=10): logger = logging.getLogger("log.test.mrt.quantize") base_ctx = mx.gpu(1) ctx = mx.gpu(2) qctx = mx.gpu(3) input_size = 416 h, w = input_size, input_size inputs_ext = { 'data': { 'shape': (batch_size, 3, h, w), } } val_data = dataset.load_voc(batch_size, input_size) val_data_iter = iter(val_data) def data_iter_func(): data, label = next(val_data_iter) return data, label if False: sym_file, param_file = load_fname("_darknet53_voc") sym, params = mx.sym.load(sym_file), nd.load(param_file) sym, params = spass.sym_quant_prepare(sym, params, inputs_ext) keys = [ 'yolov30_yolooutputv30_expand_dims0', 'yolov30_yolooutputv31_expand_dims0', 'yolov30_yolooutputv32_expand_dims0', 'yolov30_yolooutputv30_tile0', 'yolov30_yolooutputv31_tile0', 'yolov30_yolooutputv32_tile0', 'yolov30_yolooutputv30_broadcast_add1', 'yolov30_yolooutputv31_broadcast_add1', 'yolov30_yolooutputv32_broadcast_add1', ] base, base_params, base_inputs_ext, top, top_params, top_inputs_ext \ = split_model(sym, params, inputs_ext, keys, logger) dump_sym, dump_params = load_fname("_darknet53_voc", "mrt.base") open(dump_sym, "w").write(base.tojson()) nd.save(dump_params, base_params) dump_sym, dump_params, dump_ext = load_fname("_darknet53_voc", "mrt.top", True) open(dump_sym, "w").write(top.tojson()) nd.save(dump_params, top_params) sim.save_ext(dump_ext, top_inputs_ext) dump_sym, dump_params = load_fname("_darknet53_voc", "mrt.base") base, base_params = mx.sym.load(dump_sym), nd.load(dump_params) dump_sym, dump_params, dump_ext = load_fname("_darknet53_voc", "mrt.top", True) top, top_params = mx.sym.load(dump_sym), nd.load(dump_params) (top_inputs_ext, ) = sim.load_ext(dump_ext) base_inputs = [mx.sym.var(n) for n in inputs_ext] base_graph = mx.gluon.nn.SymbolBlock(base, base_inputs) utils.load_parameters(base_graph, base_params, ctx=ctx) top_inputs = [mx.sym.var(n) for n in top_inputs_ext] top_graph = mx.gluon.nn.SymbolBlock(top, top_inputs) utils.load_parameters(top_graph, top_params, ctx=ctx) metric = dataset.load_voc_metric() metric.reset() def yolov3(data, label): def net(data): tmp = base_graph(data.as_in_context(ctx)) outs = top_graph(*tmp) # print ([o[0][0][:] for o in outs]) return outs acc = validate_data(net, data, label, metric) return "{:6.2%}".format(acc) if False: mrt = _mrt.MRT(base, base_params, inputs_ext) for i in range(16): data, _ = data_iter_func() mrt.set_data('data', data) th_dict = mrt.calibrate(ctx=ctx) _, _, dump_ext = load_fname("_darknet53_voc", "mrt.dict", True) sim.save_ext(dump_ext, th_dict) _, _, dump_ext = load_fname("_darknet53_voc", "mrt.dict", True) (th_dict, ) = sim.load_ext(dump_ext) if True: mrt = _mrt.MRT(base, base_params, base_inputs_ext) mrt.set_th_dict(th_dict) mrt.set_threshold('data', 2.64) mrt.set_threshold('yolov30_yolooutputv30_expand_dims0', 1) mrt.set_threshold('yolov30_yolooutputv31_expand_dims0', 1) mrt.set_threshold('yolov30_yolooutputv32_expand_dims0', 1) mrt.set_threshold('yolov30_yolooutputv30_tile0', 416) mrt.set_threshold('yolov30_yolooutputv31_tile0', 416) mrt.set_threshold('yolov30_yolooutputv32_tile0', 416) # mrt.set_fixed('yolov30_yolooutputv30_broadcast_add1') # mrt.set_fixed('yolov30_yolooutputv31_broadcast_add1') # mrt.set_fixed('yolov30_yolooutputv32_broadcast_add1') mrt.set_output_prec(30) qbase, qbase_params, qbase_inputs_ext = mrt.quantize() oscales = mrt.get_output_scales() dump_sym, dump_params, dump_ext = load_fname("_darknet53_voc", "mrt.quantize", True) open(dump_sym, "w").write(qbase.tojson()) nd.save(dump_params, qbase_params) sim.save_ext(dump_ext, qbase_inputs_ext, oscales) if True: dump_sym, dump_params, dump_ext = load_fname("_darknet53_voc", "mrt.quantize", True) net2_inputs_ext, oscales = sim.load_ext(dump_ext) inputs = [mx.sym.var(n) for n in net2_inputs_ext] net2 = utils.load_model(dump_sym, dump_params, inputs, ctx=qctx) net2_metric = dataset.load_voc_metric() net2_metric.reset() def mrt_quantize(data, label): def net(data): data = sim.load_real_data(data, 'data', net2_inputs_ext) outs = net2(data.as_in_context(qctx)) outs = [ o.as_in_context(ctx) / oscales[i] for i, o in enumerate(outs) ] # outs = b2_graph(*data) outs = top_graph(*outs) return outs acc = validate_data(net, data, label, net2_metric) return "{:6.2%}".format(acc) utils.multi_validate(yolov3, data_iter_func, mrt_quantize, iter_num=iter_num, logger=logger)