def _lower(mod, target, params): """Helper to lower VTA properly.""" # pylint: disable=import-outside-toplevel from tvm import relay from tvm.relay.backend import graph_executor_codegen if hasattr(target, "device_name") and target.device_name == "vta": import vta with vta.build_config(opt_level=3, disabled_pass={"AlterOpLayout"}): mod, _ = relay.optimize(mod, target, params) grc = graph_executor_codegen.GraphExecutorCodegen(None, target) grc.codegen(mod["main"]) return # default case # Try graph codegen first to extract autotvm tasks. # If failed to compile, then fallback to use VM compiler. # TODO: Currently VM compiler is likely to stack overflow for large models. try: # TODO(jwfromm) Remove this once AlterOpLayout bug that mutates # source module is fixed. Until then, create a clone. mod_clone = deepcopy(mod) opt_mod, _ = relay.optimize(mod_clone, target, params) grc = graph_executor_codegen.GraphExecutorCodegen(None, target) grc.codegen(opt_mod["main"]) except tvm.TVMError as e: print("Get errors with GraphExecutorCodegen for task extraction. " "Fallback to VMCompiler. Error details:\n%s" % str(e)) mod_clone = deepcopy(mod) compiler = relay.vm.VMCompiler() if params: compiler.set_params(params) compiler.lower(mod_clone, target=target)
def _lower(mod, target, params): """ Helper to lower VTA properly. """ # pylint: disable=import-outside-toplevel from tvm import relay from tvm.relay.backend import graph_runtime_codegen if hasattr(target, 'device_name') and target.device_name == "vta": import vta with vta.build_config(opt_level=3, disabled_pass={"AlterOpLayout"}): mod, _ = relay.optimize(mod, target, params) grc = graph_runtime_codegen.GraphRuntimeCodegen(None, target) grc.codegen(mod["main"]) return # default case # Try graph codegen first to extract autotvm tasks. # If failed to compile, then fallback to use VM compiler. # TODO: Currently VM compiler is likely to stack overflow for large models. try: opt_mod, _ = relay.optimize(mod, target, params) grc = graph_runtime_codegen.GraphRuntimeCodegen(None, target) grc.codegen(opt_mod["main"]) except tvm.TVMError: compiler = relay.vm.VMCompiler() if params: compiler.set_params(params) compiler.lower(mod, target=target)
def call_all_topi_funcs(mod, params, target): """Call all TOPI compute to extract auto_scheduler tasks in a Relay program""" # pylint: disable=import-outside-toplevel from tvm import relay from tvm.relay.backend import graph_runtime_codegen # Turn off AutoTVM config not found warnings old_autotvm_silent = autotvm.GLOBAL_SCOPE.silent autotvm.GLOBAL_SCOPE.silent = True with transform.PassContext( opt_level=3, config={ "relay.backend.use_auto_scheduler": True, "relay.backend.disable_compile_engine_cache": True, }, disabled_pass={"AutoSchedulerLayoutRewrite"}, ): try: opt_mod, _ = relay.optimize(mod, target, params) grc = graph_runtime_codegen.GraphRuntimeCodegen(None, target) grc.codegen(opt_mod["main"]) except tvm.TVMError: print("Get errors with GraphRuntimeCodegen for task extraction. " "Fallback to VMCompiler.") compiler = relay.vm.VMCompiler() if params: compiler.set_params(params) mod = tvm.IRModule.from_expr(mod) if isinstance( mod, relay.Function) else mod compiler.lower(mod, target) autotvm.GLOBAL_SCOPE.silent = old_autotvm_silent
def test_annotate_spans_compatibility(): data = relay.var("data", relay.TensorType((1, 3, 64, 64), "float32")) weight = relay.var("weight") bn_gamma = relay.var("bn_gamma") bn_beta = relay.var("bn_beta") bn_mmean = relay.var("bn_mean") bn_mvar = relay.var("bn_var") simple_net = relay.nn.conv2d(data=data, weight=weight, kernel_size=(3, 3), channels=3, padding=(1, 1)) simple_net = relay.nn.batch_norm(simple_net, bn_gamma, bn_beta, bn_mmean, bn_mvar)[0] simple_net = relay.Function(relay.analysis.free_vars(simple_net), simple_net) module, params = testing.create_workload(simple_net) # Apply some simple passes to legalize the IR. with tvm.transform.PassContext(opt_level=0): module, params = relay.optimize(module, tvm.testing.enabled_targets()[0][0], params) seq = tvm.transform.Sequential( [relay.transform.AnnotateSpans(), relay.transform.DefuseOps()]) with tvm.transform.PassContext(opt_level=3): module = seq(module)
def test_alter_layout_conv2d(): """Additional layout transformations should occour on the graph. """ def convnet(): """Alternating layout of simple convnet (from image super-resolution). """ bias1 = relay.var('bias1', shape=(64, )) bias2 = relay.var('bias2', shape=(64, )) bias3 = relay.var('bias3', shape=(64, )) bias4 = relay.var('bias4', shape=(64, )) weight1 = relay.var('weight1', shape=(64, 1, 5, 5)) weight2 = relay.var('weight2', shape=(64, 64, 3, 3)) weight3 = relay.var('weight3', shape=(64, 64, 3, 3)) weight4 = relay.var('weight4', shape=(64, 64, 3, 3)) data = relay.var("x", shape=(1, 1, 224, 224)) n00 = relay.nn.conv2d(data, weight1, padding=[2, 2], kernel_size=[5, 5]) n01 = relay.expand_dims(bias1, axis=1, num_newaxis=2) n02 = relay.add(n00, n01) n03 = relay.nn.relu(n02) n04 = relay.nn.conv2d(n03, weight2, padding=[1, 1], kernel_size=[3, 3]) n05 = relay.expand_dims(bias2, axis=1, num_newaxis=2) n06 = relay.add(n04, n05) n07 = relay.nn.relu(n06) n08 = relay.nn.conv2d(n07, weight3, padding=[1, 1], kernel_size=[3, 3]) n09 = relay.expand_dims(bias3, axis=1, num_newaxis=2) n10 = relay.add(n08, n09) n11 = relay.nn.relu(n10) n12 = relay.nn.conv2d(n11, weight4, padding=[1, 1], kernel_size=[3, 3]) n13 = relay.expand_dims(bias4, axis=1, num_newaxis=2) n14 = relay.add(n12, n13) n15 = relay.reshape(n14, newshape=[1, 1, 3, 3, 224, 224]) n16 = relay.transpose(n15, axes=[0, 1, 4, 2, 5, 3]) net = relay.reshape(n16, newshape=[1, 1, 672, 672]) args = relay.ir_pass.free_vars(net) return relay.Function(args, net) # orig net N = convnet() N = infer_type(N) # trigger a test # for each known alter_conv2d targets = [ 'cuda', 'opencl -device=mali', 'opencl -device=intel_graphics', 'llvm -device=arm_cpu', 'llvm -device=core-avx-ii' ] for tgt in targets: with tvm.target.create(tgt) as target: with relay.build_config(opt_level=-1, add_pass='******'): with autotvm.tophub.context(target): O = relay.optimize(N, target, params=None) O = relay.ir_pass.infer_type(O) # graph should differ assert not relay.ir_pass.alpha_equal(N, O)
def build_graph(mod, target): target = relay.build_module.build_target_by_device_type_map(target) target, target_host = tvm.target.Target.check_and_update_host_consist( target) mod, _ = relay.optimize(mod, target, None) grc = graph_executor_codegen.GraphExecutorCodegen(None, target) _, lowered_funcs, _ = grc.codegen(mod, mod["main"]) _ = relay.backend._backend.build(lowered_funcs, target, target_host)
def test_alter_layout_conv2d(): """Additional layout transformations should occour on the graph. """ def convnet(): """Alternating layout of simple convnet (from image super-resolution). """ bias1 = relay.var('bias1', shape=(64,)) bias2 = relay.var('bias2', shape=(64,)) bias3 = relay.var('bias3', shape=(64,)) bias4 = relay.var('bias4', shape=(64,)) weight1 = relay.var('weight1', shape=(64, 1, 5, 5)) weight2 = relay.var('weight2', shape=(64, 64, 3, 3)) weight3 = relay.var('weight3', shape=(64, 64, 3, 3)) weight4 = relay.var('weight4', shape=(64, 64, 3, 3)) data = relay.var("x", shape=(1, 1, 224, 224)) n00 = relay.nn.conv2d(data, weight1, padding=[2, 2], kernel_size=[5, 5]) n01 = relay.expand_dims(bias1, axis=1, num_newaxis=2) n02 = relay.add(n00, n01) n03 = relay.nn.relu(n02) n04 = relay.nn.conv2d(n03, weight2, padding=[1, 1], kernel_size=[3, 3]) n05 = relay.expand_dims(bias2, axis=1, num_newaxis=2) n06 = relay.add(n04, n05) n07 = relay.nn.relu(n06) n08 = relay.nn.conv2d(n07, weight3, padding=[1, 1], kernel_size=[3, 3]) n09 = relay.expand_dims(bias3, axis=1, num_newaxis=2) n10 = relay.add(n08, n09) n11 = relay.nn.relu(n10) n12 = relay.nn.conv2d(n11, weight4, padding=[1, 1], kernel_size=[3, 3]) n13 = relay.expand_dims(bias4, axis=1, num_newaxis=2) n14 = relay.add(n12, n13) n15 = relay.reshape(n14, newshape=[1, 1, 3, 3, 224, 224]) n16 = relay.transpose(n15, axes=[0, 1, 4, 2, 5, 3]) net = relay.reshape(n16, newshape=[1, 1, 672, 672]) args = relay.ir_pass.free_vars(net) return relay.Function(args, net) # orig net N = convnet() N = infer_type(N) # trigger a test # for each known alter_conv2d targets=['cuda', 'opencl -device=mali', 'opencl -device=intel_graphics', 'llvm -device=arm_cpu', 'llvm -device=core-avx-ii'] for tgt in targets: with tvm.target.create(tgt) as target: with relay.build_config(opt_level=-1, add_pass='******'): with autotvm.tophub.context(target): O = relay.optimize(N, target, params=None) O = relay.ir_pass.infer_type(O) # graph should differ assert not relay.ir_pass.alpha_equal(N, O)
def test_softmax(): x = relay.var("x", shape=(1, 16), dtype="float32") y = relay.nn.softmax(x) func = relay.Function([x], y) mod = tvm.IRModule.from_expr(func) with tvm.transform.PassContext(opt_level=3, required_pass=["FastMath"]): fast_mod = relay.optimize(mod, target="llvm") assert "nn.fast_softmax" in fast_mod[0].astext()
def call_all_topi_funcs(mod, params, target): """Call all TOPI compute + schedule to extract tasks in a relay program""" # pylint: disable=import-outside-toplevel from tvm import relay from tvm.relay.backend import graph_runtime_codegen with transform.PassContext(opt_level=3): opt_mod, _ = relay.optimize(mod, target, params) grc = graph_runtime_codegen.GraphRuntimeCodegen(None, target) grc.codegen(opt_mod["main"])
def _lower(func, target, params): """ Helper to lower VTA properly. """ from tvm import relay from tvm.relay.backend import graph_runtime_codegen if hasattr(target, 'device_name') and target.device_name == "vta": with relay.build_config(opt_level=3, disabled_pass={"AlterOpLayout"}): import vta with vta.build_config(): mod, _ = relay.optimize(func, target, params) grc = graph_runtime_codegen.GraphRuntimeCodegen(None, target) return grc.codegen(mod["main"]) # default case mod, _ = relay.optimize(func, target, params) grc = graph_runtime_codegen.GraphRuntimeCodegen(None, target) return grc.codegen(mod["main"])
def build_model(args, gluon_model): """Build with relay.""" import tvm from tvm import relay from tvm.relay import quantize as qtz img_size = 299 if args.model == 'inceptionv3' else 224 data_shape = (args.batch_size, 3, img_size, img_size) net, params = relay.frontend.from_mxnet(gluon_model, {"data": data_shape}) target = args.target if args.original: # run original model with relay.build_config(opt_level=3): graph, lib, params = relay.build(net, target, params=params) ctx = tvm.nd.context(target, 0) return graph, lib, params, ctx # constant folding and scale folding. print('original') print(net.astext(show_meta_data=False)) with relay.build_config(opt_level=3): qgraph = relay.optimize(net, target, params) # qgraph = relay.optimize(qgraph) print('after optimize') print(qgraph.astext(show_meta_data=False)) with qtz.qconfig(skip_k_conv=0, nbit_input=args.nbit_input, nbit_weight=args.nbit_input, global_scale=args.global_scale, dtype_input=args.dtype_input, dtype_weight=args.dtype_input, dtype_activation=args.dtype_output, store_lowbit_output=False, debug_enabled_ops=None): print(qtz.current_qconfig()) qgraph = qtz.annotate(qgraph) print('after annotate') print(qgraph.astext(show_meta_data=False)) qgraph = qtz.calibrate(qgraph) print('after calibrate\n') print(qgraph.astext(show_meta_data=False)) if not args.simulated: qgraph = qtz.realize(qgraph) qgraph = relay.ir_pass.infer_type(qgraph) print('after realize\n') print(qgraph.astext(show_meta_data=False)) with relay.build_config(opt_level=3): graph, lib, params = relay.build(qgraph, target) ctx = tvm.nd.context(target, 0) return graph, lib, params, ctx
def test_erf(): x = relay.var("x", shape=(1, 16, 16, 16), dtype="float32") y = relay.erf(x) func = relay.Function([x], y) mod = tvm.IRModule.from_expr(func) fast_mod = FastMath()(mod) assert "fast_erf" in fast_mod.astext() # Check that FastMath option works for relay.build. with tvm.transform.PassContext(opt_level=3, required_pass=["FastMath"]): fast_mod = relay.optimize(mod, target="llvm", params=None) assert "fast_erf" in fast_mod[0].astext()
def test_tanh(): x = relay.var("x", shape=(1, 16, 16, 16), dtype="float32") y = relay.tanh(x) func = relay.Function([x], y) mod = tvm.IRModule.from_expr(func) fast_mod = FastMath()(mod) assert "fast_tanh" in fast_mod.astext() # Check that FastMath option works for relay.build. with relay.build_config(opt_level=3, required_pass=['FastMath']): fast_mod = relay.optimize(mod, target='llvm', params=None) assert "fast_tanh" in fast_mod[0].astext()
def call_all_topi_funcs(mod, params, target): """Call all TOPI compute to extract auto_scheduler tasks in a Relay program""" # pylint: disable=import-outside-toplevel from tvm import relay from tvm.relay.backend import graph_runtime_codegen # Turn off AutoTVM config not found warnings old_autotvm_silent = autotvm.GLOBAL_SCOPE.silent autotvm.GLOBAL_SCOPE.silent = True with transform.PassContext(opt_level=3, config={"relay.backend.use_auto_scheduler": True}): opt_mod, _ = relay.optimize(mod, target, params) grc = graph_runtime_codegen.GraphRuntimeCodegen(None, target) grc.codegen(opt_mod["main"]) autotvm.GLOBAL_SCOPE.silent = old_autotvm_silent
def _lower(mod, target, params): """ Helper to lower VTA properly. """ # pylint: disable=import-outside-toplevel from tvm import relay from tvm.relay.backend import graph_runtime_codegen if hasattr(target, 'device_name') and target.device_name == "vta": with relay.build_config(opt_level=3, disabled_pass={"AlterOpLayout"}): import vta with vta.build_config(): mod, _ = relay.optimize(mod, target, params) grc = graph_runtime_codegen.GraphRuntimeCodegen(None, target) grc.codegen(mod["main"]) # default case compiler = relay.vm.VMCompiler() if params: compiler.set_params(params) compiler.lower(mod, target=target)
def call_all_topi_funcs(mod, params, target, opt_level=3): """Call all TOPI compute to extract auto_scheduler tasks in a Relay program""" # pylint: disable=import-outside-toplevel from tvm import relay from tvm.relay.backend import graph_executor_codegen # Turn off AutoTVM config not found warnings old_autotvm_silent = autotvm.GLOBAL_SCOPE.silent autotvm.GLOBAL_SCOPE.silent = True with transform.PassContext( opt_level=opt_level, config={ "relay.backend.use_auto_scheduler": True, "relay.backend.disable_compile_engine_cache": True, }, disabled_pass={"AutoSchedulerLayoutRewrite"}, ): try: # TODO(jwfromm) Remove this once AlterOpLayout bug that mutates # source module is fixed. Until then, create a clone. mod_clone = deepcopy(mod) opt_mod, _ = relay.optimize(mod_clone, target, params) grc = graph_executor_codegen.GraphExecutorCodegen(None, target) grc.codegen(opt_mod["main"]) except tvm.TVMError: print( "Get errors with GraphExecutorCodegen for task extraction. " "Fallback to VMCompiler." ) mod_clone = deepcopy(mod) compiler = relay.vm.VMCompiler() if params: compiler.set_params(params) mod_clone = ( tvm.IRModule.from_expr(mod_clone) if isinstance(mod_clone, relay.Function) else mod_clone ) compiler.lower(mod_clone, target) autotvm.GLOBAL_SCOPE.silent = old_autotvm_silent
def _run_tvm(data, proto_file, blob_file): """ Run caffe model by TVM according to .caffemodel and .prototxt""" init_net = pb.NetParameter() predict_net = pb.NetParameter() # load model with open(proto_file, "r") as f: text_format.Merge(f.read(), predict_net) # load blob with open(blob_file, "rb") as f: init_net.ParseFromString(f.read()) shape_dict = dict() dtype_dict = dict() if isinstance(data, (tuple, list)): for idx, d in enumerate(data): shape_dict["data" + str(idx)] = d.shape dtype_dict["data" + str(idx)] = "float32" else: shape_dict = {"data": data.shape} dtype_dict = {"data": "float32"} #print("++++++++++++++++++++++++++++++++++++++++") net, params = relay.frontend.from_caffe(init_net, predict_net, shape_dict, dtype_dict) tg = "dpu" print("$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$") #mod, _ = relay.optimize(net, tg, params) with relay.build_config(opt_level=0): mod, _ = relay.optimize(net, tg, params) #lib = relay.build(mod, target=target, target_host=target_host, params=params) #graph, lib, params = relay.build_module.build(mod, target=tg, params=params) graph0, func0, params0 = graph_runtime_codegen.GraphRuntimeCodegen(None, tg).codegen(mod["main"]) dtype = "float32" func=tvm.build(func0, tg, name="default_function") f = open('/home/wangjj/wujq/test/alexnetCode.c', 'w') print(func.get_source(), file = f) f.close() print("$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$")
def _lower(mod, target, params, opt_level=3): """Helper to lower VTA properly.""" # pylint: disable=import-outside-toplevel from tvm import relay from tvm.relay.backend import graph_executor_codegen if hasattr(target, "device_name") and target.device_name == "vta": import vta with vta.build_config(opt_level=opt_level, disabled_pass={"AlterOpLayout"}): mod, _ = relay.optimize(mod, target, params) grc = graph_executor_codegen.GraphExecutorCodegen(None, target) grc.codegen(mod, mod["main"]) return # Alter op layout code has been written expecting that tuning is applied # without it, so we disable AlterOpLayout to maintain that behavior. with tvm.transform.PassContext(opt_level=opt_level, disabled_pass={"AlterOpLayout"}): compiler = relay.vm.VMCompiler() if params: compiler.set_params(params) compiler.lower(mod, target=target)
simple_net = relay.nn.bias_add(data=simple_net, bias=bias6) simple_net = relay.nn.relu(data=simple_net) simple_net = relay.nn.dense(data=simple_net, weight=conv7_weight,units=1000) simple_net = relay.nn.bias_add(data=simple_net, bias=bias7) simple_net = relay.nn.relu(data=simple_net) simple_net = relay.nn.dense(data=simple_net, weight=conv8_weight,units=1000) simple_net = relay.nn.bias_add(data=simple_net, bias=bias8) simple_net = relay.reshape(simple_net, (batch_size,1000)) simple_net = relay.nn.softmax(data=simple_net) tic = timer() node = relay.analysis.free_vars(simple_net) simple_net = relay.Function(node, simple_net) net, params = testing.create_workload(simple_net) tg = "c" with relay.build_config(opt_level=3): mod, _ = relay.optimize(net, tg, params) graph0, func0, params0 = graph_runtime_codegen.GraphRuntimeCodegen(None, tg).codegen(mod["main"]) func=tvm.build(func0, tg, name="default_function") toc = timer() print("AlexNet compile on TVM time is : ", (toc-tic))
data = relay.var("data", relay.TensorType((batch_size,3,4,4), "float32")) conv1_weight = relay.var("conv1_weight") conv2_weight = relay.var("conv2_weight") dense1_weight = relay.var("dense1_weight") dense2_weight = relay.var("dense2_weight") simple_net = relay.nn.conv2d(data=data, weight=conv1_weight, kernel_size=(2,2), channels=2, strides=(2,2),padding=(1, 1)) #simple_net = relay.nn.max_pool2d(simple_net,pool_size=(2, 2),strides=(2, 2),padding=(1, 1)) #simple_net = relay.nn.batch_flatten(simple_net) #simple_net = relay.nn.dense(simple_net, dense1_weight,units=10) #simple_net = relay.nn.relu(simple_net) #simple_net = relay.nn.softmax(simple_net,1) node = relay.analysis.free_vars(simple_net) print("**************test1*************") simple_net = relay.Function(node, simple_net) print("**************test2*************") net, params_tmp = testing.create_workload(simple_net) target="cuda" print("**************test3*************") mod, _ = relay.optimize(net, target, params_tmp) print("**************test4*************") grc = graph_runtime_codegen.GraphRuntimeCodegen(None, target) print("**************test5*************") graph0, func0, params0 = grc.codegen(mod["main"]) print(func0) print("**************test6*************") func=tvm.build(func0, target, name="default_function") print("**************test7*************") print(func.get_source())
# --------------------------------------------- if use_gpu: backend_target = 'cuda' # or 'llvm' target = tvm.target.create('%s -model=%s' % (backend_target, gpu_model)) ctx = tvm.gpu(0) else: # cpu settings if deploy_remote: target = 'llvm -target=aarch64-linux-gnu' else: target = 'llvm' ctx = tvm.cpu(0) with relay.build_config(opt_level=3): ssd_module, ssd_params = relay.optimize(ssd_module, target=target, params=ssd_params) graph, lib, params = relay.build( ssd_module, target=target, # target_host='llvm', params=ssd_params) # export weights if export_weight: # store IR representation. export_ssd_module = os.path.join(DEPLOY_WEIGHT_DIR, "ssd_module.json") export_lib = os.path.join(DEPLOY_WEIGHT_DIR, "ssd_lib.so") export_graph = os.path.join(DEPLOY_WEIGHT_DIR, "ssd_graph.json") export_params = os.path.join(DEPLOY_WEIGHT_DIR, "ssd_param.params") lib.export_library(export_lib)
def build_graph(mod, target): target, target_host = tvm.target.Target.canon_target_and_host(target) mod, _ = relay.optimize(mod, target) grc = graph_executor_codegen.GraphExecutorCodegen(None, target) _, lowered_funcs, _ = grc.codegen(mod, mod["main"]) _ = relay.backend._backend.build(lowered_funcs, target)
def quantize_model(args): """Build with relay.""" import tvm from tvm import relay from tvm.relay import quantize as qtz img_size = 224 data_shape = (args.batch_size, 3, img_size, img_size) mx_sym, mx_args, mx_auxs = mx.model.load_checkpoint(args.model, 0) net, params = relay.frontend.from_mxnet(mx_sym, {"data": data_shape}, arg_params=mx_args, aux_params=mx_auxs) target = args.target if args.original: # run original model with relay.build_config(opt_level=3): graph, lib, params = relay.build(net, target, params=params) ctx = tvm.nd.context(target, 0) return graph, lib, params, ctx # constant folding and scale folding. # print('original') # print(net.astext(show_meta_data=False)) with relay.build_config(opt_level=3): qgraph = relay.optimize(net, target, params) # print('after optimize') # print(qgraph.astext(show_meta_data=False)) with qtz.qconfig(skip_k_conv=0, nbit_input=args.nbit_input, nbit_weight=args.nbit_input, global_scale=args.global_scale, dtype_input=args.dtype_input, dtype_weight=args.dtype_input, dtype_activation=args.dtype_output, store_lowbit_output=False, debug_enabled_ops=None): print(qtz.current_qconfig()) qgraph = qtz.annotate(qgraph) # print('after annotate') # print(qgraph.astext(show_meta_data=False)) qgraph = qtz.calibrate(qgraph) # print('after calibrate\n') # print(qgraph.astext(show_meta_data=False)) if not args.simulated: qgraph = qtz.realize(qgraph) qgraph = relay.ir_pass.infer_type(qgraph) # print('after realize\n') # print(qgraph.astext(show_meta_data=False)) with relay.build_config(opt_level=3): graph, lib, params = relay.build(qgraph, target) ### save/load the graph, lib and params into separate files # save lib.export_library(os.path.join(thisdir, "deploy_lib.so")) with open(os.path.join(thisdir, "deploy_graph.json"), "w") as fo: fo.write(graph) with open(os.path.join(thisdir, "deploy_param.params"), "wb") as fo: fo.write(relay.save_param_dict(params)) # load graph = open(os.path.join(thisdir, "deploy_graph.json")).read() lib = tvm.module.load(os.path.join(thisdir, "deploy_lib.so")) params = bytearray( open(os.path.join(thisdir, "deploy_param.params"), "rb").read()) ctx = tvm.nd.context(target, 0) return graph, lib, params, ctx