def get_onnx_model(model_name, batch_size, qconfig, original=False, dataset=None):
    assert model_name == "vit32", "Only support vit32 model!"
    base = "https://github.com/TheGreatCold/tvm-vit/raw/d2aa1e60eef42e2fdedbd1e13aa85ac5faf0a7fc"
    logfile = "gtx1660_vit_B32_224.log"
    onnx_path = "vit_B32_224.onnx"

    download_file(base, logfile)
    download_file(base, onnx_path)

    onnx_graph = onnx.load(open(onnx_path, "rb"))
    data_shape = (batch_size, 3, 224, 224)
    mod, params = relay.frontend.from_onnx(onnx_graph, {"data": data_shape})

    with tvm.transform.PassContext(opt_level=3):
        qfunc = relay.quantize.prerequisite_optimize(mod, params=params)
    logging.debug("original")
    logging.debug(qfunc.astext(show_meta_data=False))
    if original:
        return qfunc, params, logfile

    with qconfig:
        logging.debug("current quantize config")
        logging.debug(qtz.current_qconfig())

        if dataset is not None:
            with tvm.target.cuda():
                with tvm.autotvm.apply_history_best(logfile):
                    qfunc = qtz.quantize(qfunc, params, dataset=dataset)
        else:
            qfunc = qtz.quantize(qfunc, params)

        logging.debug("after quantize")
        logging.debug(qfunc.astext(show_meta_data=False))
    return qfunc, params, logfile
def get_model(model_name,
              batch_size,
              qconfig,
              target=None,
              original=False,
              simulated=False):
    gluon_model = gluon.model_zoo.vision.get_model(model_name, pretrained=True)
    img_size = 299 if model_name == 'inceptionv3' else 224
    data_shape = (batch_size, 3, img_size, img_size)
    mod, params = relay.frontend.from_mxnet(gluon_model, {"data": data_shape})
    net = mod['main']

    with tvm.transform.PassContext(opt_level=3):
        qfunc = relay.quantize.prerequisite_optimize(net, params=params)
    logging.debug('original')
    logging.debug(qfunc.astext(show_meta_data=False))
    if original:
        return qfunc

    with qconfig:
        logging.debug('current quantize config')
        logging.debug(qtz.current_qconfig())
        qfunc = qtz.quantize(qfunc)
        logging.debug('after quantize')
        logging.debug(qfunc.astext(show_meta_data=False))
    return qfunc
def quantize_relay_module(mod, params, qconfig=None):
    """ Quantize the relay module with qconfig options.

    Parameters:
    ------
    mod : tvm.relay.module
        The original module.

    qconfig : tvm.relay.quantize.quantize.QConfig
        The quantization configuration

    Returns:
    ------
    qfunc : vm.relay.expr.Function
        The graph after quantization
    
    """

    # default qconfig
    if not qconfig:
        qconfig = qtz.qconfig()

    with qconfig:
        logging.debug('current quantize config')
        logging.debug(qtz.current_qconfig())
        mod['main'] = qtz.quantize(mod['main'], params=params)
        logging.debug('after quantize')
        logging.debug(mod['main'].astext(show_meta_data=False))
    return mod
Exemplo n.º 4
0
def get_model(model_name, batch_size, qconfig, target=None, original=False, simulated=False):
    gluon_model = gluon.model_zoo.vision.get_model(model_name, pretrained=True)
    img_size = 299 if model_name == 'inceptionv3' else 224
    input_shape = (batch_size, 3, img_size, img_size)
    mod, params = relay.frontend.from_mxnet(gluon_model, {"data": input_shape})
    qfunc = mod['main']

    start_time = time.time()
    with relay.build_config(opt_level=3):
        qfunc = relay.quantize.prerequisite_optimize(qfunc, params=params)
    logging.debug('original')
    logging.debug(qfunc.astext(show_meta_data=False))
    if original:
        return qfunc

    with qconfig:
        logging.debug('current quantize config')
        logging.debug(qtz.current_qconfig())
        qfunc = qtz.quantize(qfunc,params=params)
        logging.debug('after quantize')
        logging.debug(qfunc.astext(show_meta_data=False))


    # os._exit(-1)

    return qfunc, params, input_shape
Exemplo n.º 5
0
def test_quantize_pass():
    def quantize_weight(arr):
        maximum = np.amax(np.abs(arr.asnumpy()))
        scale = 2**math.ceil(math.log(maximum, 2))
        out = np.around(arr.asnumpy() / scale * 128).astype('int8')
        out = np.clip(out, -127, 127)
        return relay.const(out, 'int8')

    n, c, h, w = 1, 3, 224, 224

    def make_graph(data):
        weight = relay.var("conv_weight")
        out = relay.nn.conv2d(data,
                              weight,
                              kernel_size=(3, 3),
                              padding=(1, 1),
                              channels=c)
        out = relay.Function(relay.ir_pass.free_vars(out), out)
        return out

    def make_qgraph(data, weight):
        out = data * relay.const(32.0)
        out = relay.round(out)
        out = relay.clip(out, a_min=-127, a_max=127)
        out = out.astype('int8')

        out = relay.nn.conv2d(out,
                              weight,
                              kernel_size=(3, 3),
                              padding=(1, 1),
                              channels=c,
                              out_dtype='int32')
        out = out.astype('float32')
        out = relay.multiply(out, relay.const(0.00024414062))
        out = relay.Function(relay.ir_pass.free_vars(out), out)
        return out

    np.random.seed(42)

    data = relay.var("data", relay.TensorType((n, c, h, w), "float32"))
    graph = make_graph(data)
    dataset, params = make_dataset(graph, 10)

    with qtz.qconfig(skip_k_conv=0,
                     global_scale=4.0,
                     round_for_shift=False,
                     store_lowbit_output=False):
        qgraph0 = qtz.quantize(graph, params)
        qgraph0 = relay.ir_pass.infer_type(qgraph0)

    conv_weight = quantize_weight(params['conv_weight'])
    qgraph1 = make_qgraph(data, conv_weight)
    qgraph1 = relay.ir_pass.infer_type(qgraph1)

    graph = relay.create_executor('graph')
    res0 = graph.evaluate(qgraph0)(dataset[0]['data'])
    res1 = graph.evaluate(qgraph1)(dataset[0]['data'])
    tvm.testing.assert_allclose(res0.asnumpy(), res1.asnumpy(), rtol=1e-3)
def get_model(model_name, batch_size, qconfig, original=False):
    gluon_model = gluon.model_zoo.vision.get_model(model_name, pretrained=True)
    img_size = 299 if model_name == "inceptionv3" else 224
    data_shape = (batch_size, 3, img_size, img_size)
    mod, params = relay.frontend.from_mxnet(gluon_model, {"data": data_shape})

    logging.debug("original")
    logging.debug(mod.astext(show_meta_data=False))
    if original:
        return mod, params

    with qconfig:
        logging.debug("current quantize config")
        logging.debug(qtz.current_qconfig())
        qfunc = qtz.quantize(mod, params)
        logging.debug("after quantize")
        logging.debug(qfunc.astext(show_meta_data=False))
    return qfunc, params
dtype_dict = {}

# convert nnvm to relay
print("convert nnvm symbols into relay function...")
from nnvm.to_relay import to_relay
func, params = to_relay(sym, shape, 'float32', params=params)
# optimization
print("optimize relay graph...")
with tvm.relay.build_config(opt_level=2):
    func = tvm.relay.optimize(func, target, params)
# quantize
print("apply quantization...")
from tvm.relay import quantize
with quantize.qconfig():
   func = quantize.quantize(func, params)

# Relay build
print("Compiling the model...")
print(func.astext(show_meta_data=False))
with tvm.relay.build_config(opt_level=3):
    graph, lib, params = tvm.relay.build(func, target=target, params=params)

# Save the model
tmp = util.tempdir()
lib_fname = tmp.relpath('model.tar')
lib.export_library(lib_fname)

# NNVM
# with nnvm.compiler.build_config(opt_level=2):
#     graph, lib, params = nnvm.compiler.build(sym, target, shape, dtype_dict, params)