def create_exec(f, target="llvm", params=None): if isinstance(f, relay.Expr): mod = relay.Module() mod["main"] = f executable = _vm.compile(mod, target=target, params=params) return executable else: assert isinstance(f, relay.Module), "expected mod as relay.Module" executable = _vm.compile(f, target=target, params=params) return executable
def create_vm(f, ctx=tvm.cpu(), target="llvm", params=None): if isinstance(f, relay.Expr): mod = relay.Module() mod["main"] = f vm = _vm.compile(mod, target=target, params=params) vm.init(ctx) return vm else: assert isinstance(f, relay.Module), "expected mod as relay.Module" vm = _vm.compile(f, target=target, params=params) vm.init(ctx) return vm
def get_vm_output(mod, data, params, target, ctx, dtype='float32', number=2, repeat=20): with tvm.transform.PassContext(opt_level=3): exe = vm.compile(mod, target, params=params) rly_vm = vm_rt.VirtualMachine(exe) rly_vm.init(ctx) result = rly_vm.run(data) if measure: print("Evaluate vm inference cost of {} on {}".format( model, repr(ctx))) ftimer = rly_vm.mod.time_evaluator("invoke", ctx, number=number, repeat=repeat) # Measure in millisecond. prof_res = np.array(ftimer("main", data).results) * 1000 print("Mean vm inference time (std dev): %.2f ms (%.2f ms)" % (np.mean(prof_res), np.std(prof_res))) return result.asnumpy().astype(dtype)
def test_vm_onnx_process(): import onnx onnx_model_path = "/data00/cuiqing.li/onnx_models/sr_dy.onnx" onnx_model = onnx.load(onnx_model_path) shape_dict = {"input.1": (1, 1, 640, 360)} mod, params = relay.frontend.from_onnx(onnx_model, shape_dict) target = tvm.target.cuda() ctx = tvm.context(str(target), 0) with tvm.transform.PassContext(opt_level=3, disabled_pass=["FoldScaleAxis"]): exe = vm.compile(mod, target, params=params) code, lib = exe.save() saved_dir = "tmp" if os.path.isdir("./tmp") == False: os.system("mkdir {}".format(saved_dir)) path_lib = os.path.join(saved_dir, "lib.so") lib.export_library(path_lib) code_path = os.path.join(saved_dir, "code.ro") with open(code_path, "wb") as fo: fo.write(code) loaded_lib = tvm.runtime.load_module(path_lib) loaded_code = bytearray(open(code_path, "rb").read()) # deserialize. des_exec = _vm.Executable.load_exec(loaded_code, loaded_lib) des_vm = _vm.VirtualMachine(des_exec, ctx) input_shape = [1, 1, 640, 360] dtype = "float32" data_tvm = tvm.nd.array( (np.random.uniform(size=input_shape)).astype(dtype)) data = [] data.append(data_tvm) data = tuple(data) res = des_vm.run(*data) print("Evaluate vm inference cost of {} on {}".format( "your testing model", repr(ctx))) ftimer_warmup = des_vm.module.time_evaluator("invoke", ctx, number=1, repeat=50) # Measure in millisecond. print("finished warming up and start testing vm compile performance") ftimer = des_vm.module.time_evaluator("invoke", ctx, number=1, repeat=600) # Measure in millisecond. prof_res = np.array(ftimer("main", *data).results) * 1000 #prof_res = np.array(ftimer().results) * 1000 print("Mean vm inference time (std dev): %.2f ms (%.2f ms)" % (np.mean(prof_res), np.std(prof_res)))
def get_tvm_executor(irmod, executor, target, params): if executor == "vm": log.info("Build TVM virtual machine") lib = vm.compile( copy.deepcopy(irmod), target, params=params, ) elif executor == "graph": log.info("Build TVM graph executor") lib = relay.build(irmod, target=target, params=params) else: log.error("Executor type {} is unsupported. ".format(executor) + "Only \"vm\" and \"graph\" types are supported") return None return lib
def vm_tensorflow_model_process(): def normalize_node_name(nodes): from tensorflow.compat import as_text if isinstance(nodes, list): ret = [as_text(node.split(':', 1)[0], 'ascii') for node in nodes] else: ret = as_text(nodes.split(':', 1)[0], 'ascii') return ret import tensorflow as tf from tvm.relay.frontend.tensorflow_parser import TFParser TF_pb_path = "/home/tiger/cuiqing.li/models/TF_checkpoint/latest" graph_def = TFParser(TF_pb_path).parse() input_names = ["input_ids_1:0", "input_mask_1:0", "segment_ids_1:0"] output_names = ["loss/Softmax:0"] input_shapes = [[1, 256], [1, 256], [1, 256]] input_names = [normalize_node_name(i) for i in input_names] output_names = [normalize_node_name(i) for i in output_names] mod, params = relay.frontend.from_tensorflow( graph_def, shape={k: v for k, v in zip(input_names, input_shapes)}, layout=None, outputs=output_names) desired_layouts = {'nn.conv2d': ['NCHW', 'default']} seq = tvm.transform.Sequential([ relay.transform.RemoveUnusedFunctions(), relay.transform.ConvertLayout(desired_layouts) ]) with tvm.ir.transform.PassContext(opt_level=3): mod = seq(mod) target = tvm.target.cuda() ctx = tvm.context(str(target), 0) with tvm.transform.PassContext(opt_level=3, disabled_pass=["FoldScaleAxis"]): exe = vm.compile(mod, target, params=params) code, lib = exe.save() saved_dir = "tmp" if os.path.isdir("./tmp") == False: os.system("mkdir {}".format(saved_dir)) path_lib = os.path.join(saved_dir, "lib.so") lib.export_library(path_lib) code_path = os.path.join(saved_dir, "code.ro") with open(code_path, "wb") as fo: fo.write(code) loaded_lib = tvm.runtime.load_module(path_lib) loaded_code = bytearray(open(code_path, "rb").read()) # deserialize. des_exec = _vm.Executable.load_exec(loaded_code, loaded_lib) des_vm = _vm.VirtualMachine(des_exec, ctx) data = [] idx = 0 for input_shape in input_shapes: dtype = "int32" data_tvm = tvm.nd.array( (np.random.uniform(size=input_shape)).astype(dtype), ctx) data.append(data_tvm) idx += 1 data = tuple(data) res = des_vm.run(*data) print("Evaluate vm inference cost of {} on {}".format( "your testing model", repr(ctx))) ftimer_warmup = des_vm.module.time_evaluator("invoke", ctx, number=1, repeat=50) # Measure in millisecond. print("finished warming up and start testing vm compile performance") ftimer = des_vm.module.time_evaluator("invoke", ctx, number=1, repeat=100) # Measure in millisecond. prof_res = np.array(ftimer("main", *data).results) * 1000 #prof_res = np.array(ftimer().results) * 1000 print("Mean vm inference time (std dev): %.2f ms (%.2f ms)" % (np.mean(prof_res), np.std(prof_res)))