def check_sharing(): x = relay.var("x", shape=(1, 10)) y = relay.var("y", shape=(1, 10)) z = relay.add(x, y) func = relay.Function([x, y], z) x_in = np.ones((1, 10)).astype("float32") params = {"x": x_in} graph, lib, params = relay.build(func, target="llvm", params=params) mod_shared = graph_executor.create(graph, lib, tvm.cpu(0)) mod_shared.load_params(runtime.save_param_dict(params)) num_mods = 10 mods = [ graph_executor.create(graph, lib, tvm.cpu(0)) for _ in range(num_mods) ] for mod in mods: mod.share_params(mod_shared, runtime.save_param_dict(params)) a = np.random.uniform(size=(1, 10)).astype("float32") for mod in mods: mod.run(y=a) out = mod.get_output(0, tvm.nd.empty((1, 10))) np.testing.assert_equal(out.asnumpy(), x_in + a) # Explicitly delete the shared module and verify correctness. del mod_shared for mod in mods: mod.run(y=a) out = mod.get_output(0, tvm.nd.empty((1, 10))) np.testing.assert_equal(out.asnumpy(), x_in + a) del mod
def qnn_dense_driver(test_configuration): in_dtype = test_configuration["dtype"] out_dtype = test_configuration["out_dtype"] quantized_data_name = "quantized_data" quantized_kernel_name = "quantized_kernel" expected_out_dtype = test_configuration["out_dtype"] bias_name = "bias" quantized_data = relay.var(quantized_data_name, shape=test_configuration["input_shape"], dtype=in_dtype) quantized_kernel = relay.var(quantized_kernel_name, shape=test_configuration["kernel_shape"], dtype=in_dtype) mod = relay.qnn.op.dense( quantized_data, quantized_kernel, relay.const(test_configuration["input_zero_point"], "int32"), relay.const(test_configuration["kernel_zero_point"], "int32"), relay.const(test_configuration["input_scale"], "float32"), relay.const(test_configuration["kernel_scale"], "float32"), test_configuration["units"], ) if test_configuration[bias_name] is not None: bias = relay.var(bias_name, shape=test_configuration["bias"].shape, dtype=out_dtype) mod = relay.nn.bias_add(mod, bias) if test_configuration["requantize"] is not None: requantize_config = test_configuration["requantize"] mod = relay.qnn.op.requantize( mod, input_scale=relay.const(requantize_config["input_scale"], "float32"), input_zero_point=relay.const(0, "int32"), output_scale=relay.const(requantize_config["output_scale"], "float32"), output_zero_point=relay.const( requantize_config["output_zero_point"], "int32"), out_dtype=requantize_config["out_dtype"], ) expected_out_dtype = requantize_config["out_dtype"] mod = relay.Function(relay.analysis.free_vars(mod), mod) mod = tvm.IRModule.from_expr(mod) mod = relay.transform.InferType()(mod) mod = relay.qnn.transform.CanonicalizeOps()(mod) with tvm.transform.PassContext(opt_level=2): graph, lib, params = relay.build(mod, "llvm", params=None) mod = graph_executor.create(graph, lib, device=tvm.cpu(0)) mod.set_input(quantized_data_name, test_configuration[quantized_data_name]) mod.set_input(quantized_kernel_name, test_configuration[quantized_kernel_name]) if test_configuration[bias_name] is not None: mod.set_input(bias_name, test_configuration[bias_name]) mod.set_input(**params) mod.run() res = mod.get_output(0).asnumpy() np.testing.assert_equal(res, test_configuration["output"]) assert res.dtype == expected_out_dtype
def test_apply(relay_op, name, f_numpy, low, high, step, dtype="float32"): a_np = np.arange(low, high, step).astype(dtype).reshape((1, -1)) b_np = f_numpy(a_np) x = relay.var("x", shape=a_np.shape, dtype="float32") y = relay_op(x) func = relay.Function([x], y) mod = tvm.IRModule.from_expr(func) with tvm.transform.PassContext(opt_level=3, required_pass=["FastMath"]): graph, lib, params = relay.build(mod, target=target, params=None) # Check that the op related to fast math have been convered to function in lib func_name = "fused_" + name # When there're multiple targets in tvm.testing.parametrize_targets, the function # built will have a "_1" in function name assert func_name in graph m = graph_executor.create(graph, lib, dev) # Set inputs m.set_input("x", tvm.nd.array(a_np, dev)) m.set_input(**params) # Execute m.run() # Get outputs tvm_output = m.get_output(0) tvm.testing.assert_allclose(tvm_output.numpy(), b_np, rtol=1e-5, atol=1e-5)
def check_verify(): mod = graph_executor.create(graph, mhost, dev) mod.set_input(**params) mod.run() out = mod.get_output(0, tvm.nd.empty(shape)) np.testing.assert_equal(out.numpy(), tensor_a + tensor_b - tensor_c + tensor_d)
def check_verify(): mlib = tvm.build(s, [A, B], "llvm", name="myadd") mod = graph_executor.create(graph, mlib, tvm.cpu(0)) a = np.random.uniform(size=(n, )).astype(A.dtype) mod.run(x=a) out = mod.get_output(0, tvm.nd.empty((n, ))) np.testing.assert_equal(out.asnumpy(), a + 1)
def quantize_test_driver(in_dtype, quant_args, axis, out_dtype, in_data, verify_output_data): shape = in_data.shape input_data = relay.var("input_data", shape=shape, dtype=in_dtype) output_zero_point = relay.const(quant_args["out_zero_point"]) output_scale = relay.const(quant_args["out_scale"]) quantized_output = relay.qnn.op.quantize( input_data, output_scale=output_scale, output_zero_point=output_zero_point, axis=axis, out_dtype=out_dtype, ) mod = relay.Function(relay.analysis.free_vars(quantized_output), quantized_output) mod = tvm.IRModule.from_expr(mod) with tvm.transform.PassContext(opt_level=3): graph, lib, params = relay.build(mod, "llvm", params=None) rt_mod = graph_executor.create(graph, lib, device=tvm.cpu(0)) rt_mod.set_input(input_data=in_data) rt_mod.set_input(**params) rt_mod.run() res = rt_mod.get_output(0).numpy() np.testing.assert_equal(res, verify_output_data) assert res.dtype == out_dtype
def test_benchmark(): mod, params = mlp.get_workload(1) lib = relay.build(mod, target="llvm", params=params) exe = graph_executor.create(lib.get_graph_json(), lib.lib, tvm.cpu()) data = tvm.nd.array(np.random.rand(1, 1, 28, 28).astype("float32")) result = exe.benchmark(tvm.cpu(), data=data, func_name="run", repeat=2, number=1) assert result.mean == result.median assert result.mean > 0 assert len(result.results) == 2 with patch.object( tvm.runtime.module.Module, "time_evaluator", return_value=lambda: tvm.runtime.module.BenchmarkResult( [1, 2, 2, 5]), ) as method: result = exe.benchmark(tvm.cpu(), data=data, func_name="run", repeat=2, number=1) assert result.mean == 2.5 assert result.median == 2.0 assert result.max == 5 assert result.min == 1 assert result.std == 1.5
def relay_micro_build(func, dev_config, params=None): """Create a graph executor module with a micro device context from a Relay function. Parameters ---------- func : relay.Function function to compile dev_config : Dict[str, Any] MicroTVM config dict for the target device params : dict input parameters that do not change during inference Return ------ mod : tvm.runtime.Module graph executor module for the target device """ with tvm.transform.PassContext( disabled_pass={"FuseOps"}, config={"tir.disable_vectorize": True} ): graph, c_mod, params = relay.build(func, target=TARGET, params=params) micro_mod = micro.create_micro_mod(c_mod, dev_config) ctx = tvm.micro_dev(0) mod = graph_executor.create(graph, micro_mod, ctx) mod.set_input(**params) return mod
def test_benchmark_end_to_end_rpc(): server = rpc.Server("127.0.0.1") remote = rpc.connect(server.host, server.port) mod, params = mlp.get_workload(1) lib = relay.build(mod, target="cuda", params=params) temp = utils.tempdir() path = temp.relpath("library.so") lib.export_library(path) remote.upload(path) rlib = remote.load_module("library.so") dev = remote.device("cuda") exe = graph_executor.create(lib.get_graph_json(), rlib, dev) data = tvm.nd.array(np.random.rand(1, 1, 28, 28).astype("float32"), device=dev) result = exe.benchmark(dev, data=data, func_name="run", repeat=2, number=1, end_to_end=True) assert result.mean > 0 assert len(result.results) == 2
def verify(mod, goldens): with tvm.transform.PassContext(opt_level=3): graph, lib, params = relay.build(mod, "llvm", params=None) golden_data, golden_output = goldens rt_mod = graph_executor.create(graph, lib, device=tvm.cpu(0)) rt_mod.set_input("quantized_data", golden_data) rt_mod.set_input(**params) rt_mod.run() res = rt_mod.get_output(0).asnumpy() np.testing.assert_equal(res, golden_output)
def get_output(func, golden_inputs): with tvm.transform.PassContext(opt_level=2): golden_data, golden_weight = golden_inputs params = {"kernel": golden_weight} libs = relay.build(func, "llvm", params=params) mod = graph_executor.create(libs.graph_json, libs.lib, device=tvm.cpu(0)) mod.set_input("data", golden_data) mod.set_input(**libs.params) mod.run() res = mod.get_output(0).numpy() return res
def test_legacy_compatibility(): mod, params = relay.testing.synthetic.get_workload() with relay.build_config(opt_level=3): graph, lib, graph_params = relay.build_module.build(mod, "llvm", params=params) data = np.random.uniform(-1, 1, size=input_shape(mod)).astype("float32") dev = tvm.cpu() module = graph_executor.create(graph, lib, dev) module.set_input("data", data) module.set_input(**graph_params) module.run() out = module.get_output(0).numpy() tvm.testing.assert_allclose(out, verify(data), atol=1e-5)
def check_device(device, target_device): if not tvm.runtime.enabled(target_device): print("Skip test because {} is not enabled.".format(target_device)) return device_dev = tvm.device(device) graph = get_simplex_graph(host_dev.device_type, device_dev.device_type) shape = (4, ) # Create module for add whose target is the device. tensor_a = te.placeholder(shape, name="A") tensor_b = te.placeholder(shape, name="B") elemwise_add = te.compute(shape, lambda *i: tensor_a(*i) + tensor_b(*i), name="elemwise_add") target = topi.cpp.TEST_create_target(device) schedule_add = topi.cpp.cuda.schedule_injective(target, [elemwise_add]) lower_add = tvm.lower(schedule_add, [tensor_a, tensor_b, elemwise_add], name="elemwise_add") # Insert copy. Neither compute nor schedule is required for the copy # node. The compute will be performed at runtime which is just data # copy from the input to the output. tensor_copy = te.placeholder(shape, name="__copy") # Create module for sub whose target is the host. tensor_c = te.placeholder(shape, name="C") elemwise_sub = te.compute(shape, lambda *i: tensor_copy(*i) - tensor_c(*i), name="elemwise_sub") schedule_sub = te.create_schedule(elemwise_sub.op) lower_sub = tvm.lower(schedule_sub, [tensor_copy, tensor_c, elemwise_sub], name="elemwise_sub") target_flist = {target_device: lower_add, target_host: lower_sub} target = tvm.target.Target(target, target_host) mhost = tvm.build(target_flist, target=target) dev = [host_dev, device_dev] mod = graph_executor.create(graph, mhost, dev) params = {} params["A"] = tensor_a = np.random.uniform(size=shape).astype( tensor_a.dtype) params["B"] = tensor_b = np.random.uniform(size=shape).astype( tensor_b.dtype) params["C"] = tensor_c = np.random.uniform(size=shape).astype( tensor_c.dtype) mod.set_input(**params) mod.run() out = mod.get_output(0, tvm.nd.empty(shape)) np.testing.assert_equal(out.asnumpy(), (tensor_a + tensor_b) - tensor_c)
def test_benchmark_end_to_end(dev, target): mod, params = mlp.get_workload(1) lib = relay.build(mod, target=target, params=params) exe = graph_executor.create(lib.get_graph_json(), lib.lib, dev) data = tvm.nd.array(np.random.rand(1, 1, 28, 28).astype("float32")) result = exe.benchmark(dev, data=data, func_name="run", repeat=2, number=1, end_to_end=True) assert result.mean > 0 assert len(result.results) == 2
def check_load_module(): temp = utils.tempdir() path_lib = temp.relpath("deploy.so") mhost.export_library(path_lib) with open(temp.relpath("deploy.json"), "w") as out_file: out_file.write(graph) loaded_lib = tvm.runtime.load_module(path_lib) loaded_graph = open(temp.relpath("deploy.json")).read() mod = graph_executor.create(loaded_graph, loaded_lib, dev) mod.set_input(**params) mod.run() out = mod.get_output(0, tvm.nd.empty(shape)) np.testing.assert_equal(out.numpy(), tensor_a + tensor_b - tensor_c + tensor_d)
def test_build(build_dir): """Sanity check with the cat image we download.""" graph = open(osp.join(build_dir, "deploy_graph.json")).read() lib = tvm.runtime.load_module(osp.join(build_dir, "deploy_lib.so")) params = bytearray(open(osp.join(build_dir, "deploy_param.params"), "rb").read()) input_data = get_cat_image() dev = tvm.cpu() module = graph_executor.create(graph, lib, dev) module.load_params(params) module.run(data=input_data) out = module.get_output(0).numpy() top1 = np.argmax(out[0]) synset = download_img_labels() print("TVM prediction top-1:", top1, synset[top1])
def qnn_batch_matmul_driver(test_configuration): in_dtype = test_configuration["dtype"] out_dtype = test_configuration["out_dtype"] quantized_x_name = "quantized_x" quantized_y_name = "quantized_y" expected_out_dtype = test_configuration["out_dtype"] quantized_x = relay.var(quantized_x_name, shape=test_configuration["x_shape"], dtype=in_dtype) quantized_y = relay.var(quantized_y_name, shape=test_configuration["y_shape"], dtype=in_dtype) mod = relay.qnn.op.batch_matmul( quantized_x, quantized_y, relay.const(test_configuration["x_zero_point"], "int32"), relay.const(test_configuration["y_zero_point"], "int32"), relay.const(test_configuration["x_scale"], "float32"), relay.const(test_configuration["y_scale"], "float32"), ) if test_configuration["requantize"] is not None: requantize_config = test_configuration["requantize"] mod = relay.qnn.op.requantize( mod, input_scale=relay.const(requantize_config["input_scale"], "float32"), input_zero_point=relay.const(0, "int32"), output_scale=relay.const(requantize_config["output_scale"], "float32"), output_zero_point=relay.const( requantize_config["output_zero_point"], "int32"), out_dtype=requantize_config["out_dtype"], ) expected_out_dtype = requantize_config["out_dtype"] mod = relay.Function(relay.analysis.free_vars(mod), mod) mod = tvm.IRModule.from_expr(mod) mod = relay.transform.InferType()(mod) mod = relay.qnn.transform.CanonicalizeOps()(mod) with tvm.transform.PassContext(opt_level=2): graph, lib, params = relay.build(mod, "llvm", params=None) mod = graph_executor.create(graph, lib, device=tvm.cpu(0)) mod.set_input(quantized_x_name, test_configuration[quantized_x_name]) mod.set_input(quantized_y_name, test_configuration[quantized_y_name]) mod.set_input(**params) mod.run() res = mod.get_output(0).numpy() np.testing.assert_equal(res, test_configuration["output"]) assert res.dtype == expected_out_dtype
def check_remote(server): mlib = tvm.build(s, [A, B], "llvm", name="myadd") remote = rpc.connect(server.host, server.port) temp = utils.tempdir() dev = remote.cpu(0) path_dso = temp.relpath("dev_lib.so") mlib.export_library(path_dso) remote.upload(path_dso) mlib = remote.load_module("dev_lib.so") mod = graph_executor.create(graph, mlib, remote.cpu(0)) a = np.random.uniform(size=(n, )).astype(A.dtype) mod.run(x=tvm.nd.array(a, dev)) out = tvm.nd.empty((n, ), device=dev) out = mod.get_output(0, out) np.testing.assert_equal(out.numpy(), a + 1)
def load_tvm(self, export_dir): """Load tvm module from export directory""" self.export_dir = export_dir self.tvm_lib = load_module(os.path.join(export_dir, TVM_ASSETS[0])) with open(os.path.join(export_dir, TVM_ASSETS[1]), "r", encoding="utf8") as f: self.tvm_graph = f.read() with open(os.path.join(export_dir, TVM_ASSETS[2]), "rb") as f: self.tvm_params = relay.load_param_dict(f.read()) self.tvm_module = graph_executor.create(self.tvm_graph, self.tvm_lib, device=self.dev) self.tvm_module.set_input(**self.tvm_params) return self.tvm_module
def verify(data): if not tvm.runtime.enabled("llvm"): print("Skip because llvm is not enabled") return mod, params = relay.testing.synthetic.get_workload() with relay.build_config(opt_level=3): graph, lib, graph_params = relay.build_module.build(mod, "llvm", params=params) dev = tvm.cpu() module = graph_executor.create(graph, lib, dev) module.set_input("data", data) module.set_input(**graph_params) module.run() out = module.get_output(0).numpy() return out
def verify_fused_batch_norm(shape): g = tf.Graph() with g.as_default(): input_tensor = tf.placeholder(tf.float32, shape=shape, name="input") alpha = tf.constant( np.random.rand(shape[-1], ), dtype=tf.float32, name="alpha", ) beta = tf.constant( np.random.rand(shape[-1], ), dtype=tf.float32, name="beta", ) bn = tf.nn.fused_batch_norm(x=input_tensor, offset=beta, scale=alpha, name="bn") out = tf.identity(bn[0], name="output") data = np.random.rand(*shape) with tf.Session(graph=out.graph) as sess: sess.run([tf.global_variables_initializer()]) tf_out = sess.run(out, feed_dict={input_tensor: data}) constant_graph = graph_util.convert_variables_to_constants( sess, sess.graph_def, ["output"]) for device in ["llvm"]: dev = tvm.device(device, 0) if not tvm.testing.device_enabled(device): print("Skip because %s is not enabled" % device) continue mod, params = relay.frontend.from_tensorflow(constant_graph, outputs=["output"]) with tvm.transform.PassContext(opt_level=3): graph, lib, params = relay.build(mod, target=device, params=params) from tvm.contrib import graph_executor m = graph_executor.create(graph, lib, dev) m.set_input(**params) m.set_input("input", data) m.run() tvm_out = m.get_output(0) tvm.testing.assert_allclose(tvm_out.numpy(), tf_out.astype(tvm_out.dtype), atol=1e-3, rtol=1e-3)
def run_func(func, params, x): with tvm.transform.PassContext(opt_level=3): graph, lib, new_params = relay.build(func, "llvm", params=params) from tvm.contrib import graph_executor dev = tvm.cpu(0) dtype = "float32" m = graph_executor.create(graph, lib, dev) # set inputs m.set_input("data", tvm.nd.array(x.astype(dtype))) m.set_input(**new_params) # execute m.run() # get outputs tvm_output = m.get_output(0) return tvm_output.asnumpy()
def _build_tvm(self, debug_runtime=False): # compile kernels with history best records with autotvm.apply_history_best(self.log_file): with tvm.transform.PassContext(opt_level=3): self.tvm_graph, self.tvm_lib, self.tvm_params = relay.build( self.mod, target=self.target, params=self.params) if not debug_runtime: self.tvm_module = graph_executor.create(self.tvm_graph, self.tvm_lib, device=self.dev) else: self.tvm_module = debug_executor.create(self.tvm_graph, self.tvm_lib, device=self.dev) self.tvm_module.set_input(**self.tvm_params) return self.tvm_module
def test_fac_relay_build(): # Check the default optimize pipeline shape_x = [1, 5, 5, 4] shape_w = [3, 3, 4, 1] x_np = np.random.randint(-128, 127, size=shape_x, dtype="int8").astype("float32") w_np = np.random.randint(-128, 127, size=shape_w, dtype="int8").astype("float32") weight = relay.const(w_np) data = relay.var("data", shape=shape_x, dtype="float32") op1 = relay.nn.space_to_batch_nd(data, block_shape=[2, 2], paddings=[[2, 3], [2, 3]]) op2 = relay.nn.conv2d( op1, weight, padding=[0, 0, 0, 0], groups=4, channels=4, kernel_size=[3, 3], data_layout="NHWC", kernel_layout="HWOI", ) expr = relay.nn.batch_to_space_nd(op2, block_shape=[2, 2], crops=[[0, 1], [0, 1]]) mod_def = tvm.relay.transform.InferType()(tvm.IRModule.from_expr(expr)) result_def = (relay.create_executor( "vm", mod=mod_def, device=tvm.cpu(), target="llvm").evaluate()(x_np).numpy()) graph, lib, params = relay.build(mod_def, "llvm", params=None) rt_mod = graph_executor.create(graph, lib, device=tvm.cpu()) rt_mod.set_input("data", x_np) rt_mod.set_input(**params) rt_mod.run() result_flat = rt_mod.get_output(0).numpy() assert "space_to_batch_nd" not in graph assert "conv2d" in graph assert "batch_to_space_nd" not in graph assert np.array_equal(result_def, result_flat)
def test_with_params(): x = relay.var("x", shape=(10, 5)) y = relay.var("y", shape=(1, 5)) z = relay.add(x, y) z = relay.exp(z) func = relay.Function([x, y], z) x_data = np.random.rand(10, 5).astype("float32") y_data = np.random.rand(1, 5).astype("float32") params = {"y": y_data} graph, lib, params = relay.build(tvm.IRModule.from_expr(func), "llvm", params=params) mod = graph_executor.create(graph, lib, device=tvm.cpu(0)) mod.set_input(**params) mod.set_input(x=x_data) mod.run() res = mod.get_output(0).numpy() ref_res = np.exp(y_data + x_data) tvm.testing.assert_allclose(res, ref_res, atol=1e-5, rtol=1e-5)
def test_tflite_output_multiplier_greater_than_one(): with TempOpAttr("qnn.conv2d", "FTVMQnnLegalize", legalize_qnn_conv2d): # uint8 input data_shape = (2, 1, 2, 4) data_dtype = "uint8" kernel_shape = (3, 1, 2, 2) kernel_dtype = "uint8" ref_func, qnn_func = get_funcs( data_shape=data_shape, data_dtype=data_dtype, kernel_shape=kernel_shape, kernel_dtype=kernel_dtype, input_scale=1.0, kernel_scale=1.0, input_zero_point=128, kernel_zero_point=128, kernel_size=(2, 2), padding=(0, 0), strides=(2, 2), dilation=(1, 1), data_layout="NCHW", kernel_layout="OIHW", out_dtype="int32", ) golden_data = 128 + np.array((1, 1, 1, 1, 2, 2, 2, 2, 1, 2, 3, 4, 1, 2, 3, 4)).reshape( data_shape ).astype("uint8") golden_weight = 128 + np.array((1, 2, 3, 4, -1, 1, -1, 1, -1, -1, 1, 1)).reshape( kernel_shape ) golden_weight = golden_weight.astype("uint8") with tvm.transform.PassContext(opt_level=2): params = {"kernel": golden_weight} graph, lib, params = relay.build(qnn_func, "llvm", params=params) mod = graph_executor.create(graph, lib, device=tvm.cpu(0)) mod.set_input("data", golden_data) mod.set_input(**params) mod.run() qnn_output = mod.get_output(0).numpy() golden_output = np.array((17, 17, 0, 0, 2, 2, 16, 36, 2, 2, 0, 0)).reshape(2, 3, 1, 2) np.testing.assert_equal(qnn_output, golden_output)
def check_graph_executor(target, ref_res, device, func, params, config, opt_level, expected_index=None): with tvm.transform.PassContext(opt_level=opt_level, config=config): graph, lib, new_params = relay.build(func, target, params=params) contexts = [tvm.cpu(0), tvm.device(device)] graph_json = json.loads(graph) if "device_index" in graph_json["attrs"]: device_index = graph_json["attrs"]["device_index"][1] assert device_index == expected_index mod = graph_executor.create(graph, lib, contexts) mod.set_input(**new_params) mod.run() res = mod.get_output(0).numpy() tvm.testing.assert_allclose(res, ref_res, rtol=1e-5, atol=1e-5)
def verify_graph_executor(remote, target, shape, dtype): x = relay.var("x") y = relay.const(1) z = relay.add(x, y) func = relay.Function([x], z) x_in = np.ones(shape).astype(dtype) params = {"x": x_in} graph, lib, params = relay.build(func, target=target, params=params) temp = utils.tempdir() path_dso = temp.relpath("dev_lib.o") lib.save(path_dso) remote.upload(path_dso) lib = remote.load_module("dev_lib.o") dev = remote.cpu(0) mod = graph_executor.create(graph, lib, dev) mod.load_params(runtime.save_param_dict(params)) mod.run() out = mod.get_output(0, tvm.nd.empty(shape, dtype=dtype, device=dev)) tvm.testing.assert_allclose(x_in + 1, out.numpy())
def test_compile_fused_identity_cast(): # a fused function that would optimized to identity x = relay.var("x", shape=[16], dtype="float32") y = relay.cast(x, "float32") func1 = relay.Function([x], y).with_attr("Primitive", 1) # a fused function with param pass-through x = relay.var("x", shape=[16], dtype="float32") y = relay.add(x, relay.const(3.14, "float32")) func2 = relay.Function([x], relay.Tuple([x, y])).with_attr("Primitive", 1) x_global = relay.var("xx", shape=[16], dtype="float32") tup = func2(x_global) y_global = func1(relay.TupleGetItem(tup, 0) + relay.TupleGetItem(tup, 1)) mod = tvm.IRModule.from_expr(relay.Function([x_global], y_global)) for target, device in tvm.testing.enabled_targets(): with tvm.transform.PassContext(opt_level=2): graph, lib, _ = relay.build(mod, target=target) executor = graph_executor.create(graph, lib, device=device) executor.run()
def dequantize_test_driver(in_dtype, quant_args, in_data, verify_output_data): shape = in_data.shape input_data = relay.var("input_data", shape=shape, dtype=in_dtype) min_range = quant_args["min_range"] max_range = quant_args["max_range"] dequantized_output = dequantize_mxnet_min_max(input_data, min_range=min_range, max_range=max_range, in_dtype=in_dtype) mod = relay.Function(relay.analysis.free_vars(dequantized_output), dequantized_output) mod = tvm.IRModule.from_expr(mod) with tvm.transform.PassContext(opt_level=3): graph, lib, params = relay.build(mod, "llvm", params=None) rt_mod = graph_executor.create(graph, lib, device=tvm.cpu(0)) rt_mod.set_input(input_data=in_data) rt_mod.set_input(**params) rt_mod.run() res = rt_mod.get_output(0).asnumpy() assert np.allclose(res, verify_output_data) assert res.dtype == np.float32