def quantize_relay_module(mod, params, qconfig=None):
    """ Quantize the relay module with qconfig options.

    Parameters:
    ------
    mod : tvm.relay.module
        The original module.

    qconfig : tvm.relay.quantize.quantize.QConfig
        The quantization configuration

    Returns:
    ------
    qfunc : vm.relay.expr.Function
        The graph after quantization
    
    """

    # default qconfig
    if not qconfig:
        qconfig = qtz.qconfig()

    with qconfig:
        logging.debug('current quantize config')
        logging.debug(qtz.current_qconfig())
        mod['main'] = qtz.quantize(mod['main'], params=params)
        logging.debug('after quantize')
        logging.debug(mod['main'].astext(show_meta_data=False))
    return mod
def get_model(model_name,
              batch_size,
              qconfig,
              target=None,
              original=False,
              simulated=False):
    gluon_model = gluon.model_zoo.vision.get_model(model_name, pretrained=True)
    img_size = 299 if model_name == 'inceptionv3' else 224
    data_shape = (batch_size, 3, img_size, img_size)
    mod, params = relay.frontend.from_mxnet(gluon_model, {"data": data_shape})
    net = mod['main']

    with tvm.transform.PassContext(opt_level=3):
        qfunc = relay.quantize.prerequisite_optimize(net, params=params)
    logging.debug('original')
    logging.debug(qfunc.astext(show_meta_data=False))
    if original:
        return qfunc

    with qconfig:
        logging.debug('current quantize config')
        logging.debug(qtz.current_qconfig())
        qfunc = qtz.quantize(qfunc)
        logging.debug('after quantize')
        logging.debug(qfunc.astext(show_meta_data=False))
    return qfunc
def get_onnx_model(model_name, batch_size, qconfig, original=False, dataset=None):
    assert model_name == "vit32", "Only support vit32 model!"
    base = "https://github.com/TheGreatCold/tvm-vit/raw/d2aa1e60eef42e2fdedbd1e13aa85ac5faf0a7fc"
    logfile = "gtx1660_vit_B32_224.log"
    onnx_path = "vit_B32_224.onnx"

    download_file(base, logfile)
    download_file(base, onnx_path)

    onnx_graph = onnx.load(open(onnx_path, "rb"))
    data_shape = (batch_size, 3, 224, 224)
    mod, params = relay.frontend.from_onnx(onnx_graph, {"data": data_shape})

    with tvm.transform.PassContext(opt_level=3):
        qfunc = relay.quantize.prerequisite_optimize(mod, params=params)
    logging.debug("original")
    logging.debug(qfunc.astext(show_meta_data=False))
    if original:
        return qfunc, params, logfile

    with qconfig:
        logging.debug("current quantize config")
        logging.debug(qtz.current_qconfig())

        if dataset is not None:
            with tvm.target.cuda():
                with tvm.autotvm.apply_history_best(logfile):
                    qfunc = qtz.quantize(qfunc, params, dataset=dataset)
        else:
            qfunc = qtz.quantize(qfunc, params)

        logging.debug("after quantize")
        logging.debug(qfunc.astext(show_meta_data=False))
    return qfunc, params, logfile
Exemplo n.º 4
0
def get_model(model_name, batch_size, qconfig, target=None, original=False, simulated=False):
    gluon_model = gluon.model_zoo.vision.get_model(model_name, pretrained=True)
    img_size = 299 if model_name == 'inceptionv3' else 224
    input_shape = (batch_size, 3, img_size, img_size)
    mod, params = relay.frontend.from_mxnet(gluon_model, {"data": input_shape})
    qfunc = mod['main']

    start_time = time.time()
    with relay.build_config(opt_level=3):
        qfunc = relay.quantize.prerequisite_optimize(qfunc, params=params)
    logging.debug('original')
    logging.debug(qfunc.astext(show_meta_data=False))
    if original:
        return qfunc

    with qconfig:
        logging.debug('current quantize config')
        logging.debug(qtz.current_qconfig())
        qfunc = qtz.quantize(qfunc,params=params)
        logging.debug('after quantize')
        logging.debug(qfunc.astext(show_meta_data=False))


    # os._exit(-1)

    return qfunc, params, input_shape
Exemplo n.º 5
0
def build_model(args, gluon_model):
    """Build with relay."""
    import tvm
    from tvm import relay
    from tvm.relay import quantize as qtz
    img_size = 299 if args.model == 'inceptionv3' else 224
    data_shape = (args.batch_size, 3, img_size, img_size)
    net, params = relay.frontend.from_mxnet(gluon_model, {"data": data_shape})
    target = args.target

    if args.original:
        # run original model
        with relay.build_config(opt_level=3):
            graph, lib, params = relay.build(net, target, params=params)
        ctx = tvm.nd.context(target, 0)
        return graph, lib, params, ctx

    # constant folding and scale folding.
    print('original')
    print(net.astext(show_meta_data=False))
    with relay.build_config(opt_level=3):
        qgraph = relay.optimize(net, target, params)
        # qgraph = relay.optimize(qgraph)
    print('after optimize')
    print(qgraph.astext(show_meta_data=False))

    with qtz.qconfig(skip_k_conv=0,
                     nbit_input=args.nbit_input,
                     nbit_weight=args.nbit_input,
                     global_scale=args.global_scale,
                     dtype_input=args.dtype_input,
                     dtype_weight=args.dtype_input,
                     dtype_activation=args.dtype_output,
                     store_lowbit_output=False,
                     debug_enabled_ops=None):
        print(qtz.current_qconfig())
        qgraph = qtz.annotate(qgraph)
        print('after annotate')
        print(qgraph.astext(show_meta_data=False))
        qgraph = qtz.calibrate(qgraph)
        print('after calibrate\n')
        print(qgraph.astext(show_meta_data=False))
        if not args.simulated:
            qgraph = qtz.realize(qgraph)
            qgraph = relay.ir_pass.infer_type(qgraph)
            print('after realize\n')
            print(qgraph.astext(show_meta_data=False))

    with relay.build_config(opt_level=3):
        graph, lib, params = relay.build(qgraph, target)
    ctx = tvm.nd.context(target, 0)
    return graph, lib, params, ctx
def get_model(model_name, batch_size, qconfig, original=False):
    gluon_model = gluon.model_zoo.vision.get_model(model_name, pretrained=True)
    img_size = 299 if model_name == "inceptionv3" else 224
    data_shape = (batch_size, 3, img_size, img_size)
    mod, params = relay.frontend.from_mxnet(gluon_model, {"data": data_shape})

    logging.debug("original")
    logging.debug(mod.astext(show_meta_data=False))
    if original:
        return mod, params

    with qconfig:
        logging.debug("current quantize config")
        logging.debug(qtz.current_qconfig())
        qfunc = qtz.quantize(mod, params)
        logging.debug("after quantize")
        logging.debug(qfunc.astext(show_meta_data=False))
    return qfunc, params
Exemplo n.º 7
0
def quantize_model(args):
    """Build with relay."""
    import tvm
    from tvm import relay
    from tvm.relay import quantize as qtz
    img_size = 224
    data_shape = (args.batch_size, 3, img_size, img_size)
    mx_sym, mx_args, mx_auxs = mx.model.load_checkpoint(args.model, 0)
    net, params = relay.frontend.from_mxnet(mx_sym, {"data": data_shape},
                                            arg_params=mx_args,
                                            aux_params=mx_auxs)
    target = args.target

    if args.original:
        # run original model
        with relay.build_config(opt_level=3):
            graph, lib, params = relay.build(net, target, params=params)
        ctx = tvm.nd.context(target, 0)
        return graph, lib, params, ctx

    # constant folding and scale folding.
    # print('original')
    # print(net.astext(show_meta_data=False))
    with relay.build_config(opt_level=3):
        qgraph = relay.optimize(net, target, params)
    # print('after optimize')
    # print(qgraph.astext(show_meta_data=False))

    with qtz.qconfig(skip_k_conv=0,
                     nbit_input=args.nbit_input,
                     nbit_weight=args.nbit_input,
                     global_scale=args.global_scale,
                     dtype_input=args.dtype_input,
                     dtype_weight=args.dtype_input,
                     dtype_activation=args.dtype_output,
                     store_lowbit_output=False,
                     debug_enabled_ops=None):
        print(qtz.current_qconfig())
        qgraph = qtz.annotate(qgraph)
        # print('after annotate')
        # print(qgraph.astext(show_meta_data=False))
        qgraph = qtz.calibrate(qgraph)
        # print('after calibrate\n')
        # print(qgraph.astext(show_meta_data=False))
        if not args.simulated:
            qgraph = qtz.realize(qgraph)
            qgraph = relay.ir_pass.infer_type(qgraph)
            # print('after realize\n')
            # print(qgraph.astext(show_meta_data=False))

    with relay.build_config(opt_level=3):
        graph, lib, params = relay.build(qgraph, target)

    ### save/load the graph, lib and params into separate files
    # save
    lib.export_library(os.path.join(thisdir, "deploy_lib.so"))
    with open(os.path.join(thisdir, "deploy_graph.json"), "w") as fo:
        fo.write(graph)
    with open(os.path.join(thisdir, "deploy_param.params"), "wb") as fo:
        fo.write(relay.save_param_dict(params))
    # load
    graph = open(os.path.join(thisdir, "deploy_graph.json")).read()
    lib = tvm.module.load(os.path.join(thisdir, "deploy_lib.so"))
    params = bytearray(
        open(os.path.join(thisdir, "deploy_param.params"), "rb").read())

    ctx = tvm.nd.context(target, 0)
    return graph, lib, params, ctx