def test_matmul( self, android_serial_number, tvm_tracker_host, tvm_tracker_port, adb_server_socket, M, N, K ): X = te.placeholder((M, K), dtype="float32") Y = te.placeholder((K, N), dtype="float32") k1 = te.reduce_axis((0, K), name="k1") Z = te.compute((M, N), lambda i, j: te.sum(X[i, k1] * Y[k1, j], axis=[k1])) schedule = te.create_schedule(Z.op) target_hexagon = tvm.target.hexagon("v68", link_params=True) func = tvm.build( schedule, [X, Y, Z], tvm.target.Target(target_hexagon, host=target_hexagon) ) temp = utils.tempdir() dso_binary = "test_binary.so" dso_binary_path = temp.relpath(dso_binary) func.save(dso_binary_path) if not android_serial_number: pytest.skip(msg="Skip hardware test since ANDROID_SERIAL_NUMBER is not set.") rpc_info = { "rpc_tracker_host": tvm_tracker_host, "rpc_tracker_port": tvm_tracker_port, "rpc_server_port": RPC_SERVER_PORT + 2, # See note at the beginning of the file "adb_server_socket": adb_server_socket, } launcher = HexagonLauncher(serial_number=android_serial_number, rpc_info=rpc_info) launcher.upload(dso_binary_path, dso_binary) launcher.start_server() x = np.random.uniform(size=[i.value for i in X.shape]).astype(X.dtype) y = np.random.uniform(size=[i.value for i in Y.shape]).astype(Y.dtype) z = np.zeros([i.value for i in Z.shape], dtype=Z.dtype) with launcher.start_session() as sess: mod = launcher.load_module(dso_binary, sess) xt = tvm.nd.array(x, device=sess.device) yt = tvm.nd.array(y, device=sess.device) zt = tvm.nd.array(z, device=sess.device) mod(xt, yt, zt) launcher.stop_server() target_llvm = tvm.target.Target("llvm") mod = tvm.build(schedule, [X, Y, Z], tvm.target.Target(target_llvm, host=target_llvm)) device = tvm.cpu(0) xtcpu = tvm.nd.array(x, device) ytcpu = tvm.nd.array(y, device) ztcpu = tvm.nd.array(z, device) mod(xtcpu, ytcpu, ztcpu) tvm.testing.assert_allclose(zt.numpy(), ztcpu.numpy(), rtol=1e-4)
def hexagon_launcher( hexagon_server_process, rpc_server_port, tvm_tracker_host, tvm_tracker_port, adb_server_socket, android_serial_number, ) -> HexagonLauncherRPC: """Initials and returns hexagon launcher which reuses RPC info and Android serial number.""" if android_serial_number is None: yield None if android_serial_number != "simulator": rpc_info = hexagon_server_process._rpc_info else: rpc_info = { "rpc_tracker_host": tvm_tracker_host, "rpc_tracker_port": tvm_tracker_port, "rpc_server_port": rpc_server_port, "adb_server_socket": adb_server_socket, } launcher = HexagonLauncher(serial_number=android_serial_number, rpc_info=rpc_info) try: if android_serial_number == "simulator": launcher.start_server() yield launcher finally: if android_serial_number == "simulator": launcher.stop_server() launcher.cleanup_directory()
def test_add_vtcm(android_serial_number, tvm_tracker_host, tvm_tracker_port, adb_server_socket): dtype = "int8" A = tvm.te.placeholder((2,), dtype=dtype) B = tvm.te.placeholder((1,), dtype=dtype) C = tvm.te.compute(A.shape, lambda i: A[i] + B[0], name="C") sched = tvm.te.create_schedule(C.op) target_hexagon = tvm.target.hexagon("v68", link_params=True) func = tvm.build( sched, [A, B, C], tvm.target.Target(target_hexagon, host=target_hexagon), name="add" ) temp = utils.tempdir() dso_binary = "test_binary.so" dso_binary_path = temp.relpath(dso_binary) func.save(dso_binary_path) if not android_serial_number: pytest.skip(msg="Skip hardware test since ANDROID_SERIAL_NUMBER is not set.") rpc_info = { "rpc_tracker_host": tvm_tracker_host, "rpc_tracker_port": tvm_tracker_port, "rpc_server_port": RPC_SERVER_PORT + 1, # See note at the beginning of the file "adb_server_socket": adb_server_socket, } launcher = HexagonLauncher(serial_number=android_serial_number, rpc_info=rpc_info) launcher.upload(dso_binary_path, dso_binary) launcher.start_server() with launcher.start_session() as sess: mod = launcher.load_module(dso_binary, sess) A_data = tvm.nd.empty(A.shape, A.dtype, sess.device, "global.vtcm") A_data.copyfrom(np.array([2, 3])) B_data = tvm.nd.empty(B.shape, B.dtype, sess.device, "global.vtcm") B_data.copyfrom(np.array([4])) C_data = tvm.nd.empty(C.shape, C.dtype, sess.device, "global.vtcm") C_data.copyfrom(np.array([0, 0])) mod["add"](A_data, B_data, C_data) result = C_data.numpy() assert (result == np.array([6, 7])).all() launcher.stop_server()
def hexagon_server_process(request, android_serial_number, rpc_server_port_for_session, adb_server_socket) -> HexagonLauncherRPC: """Initials and returns hexagon launcher if ANDROID_SERIAL_NUMBER is defined. This launcher is started only once per test session. """ if android_serial_number is None or android_serial_number == "simulator": yield None else: # Requesting these fixtures sets up a local tracker, if one # hasn't been provided to us. Delaying the evaluation of # these fixtures avoids starting a tracker unless necessary. tvm_tracker_host = request.getfixturevalue("tvm_tracker_host") tvm_tracker_port = request.getfixturevalue("tvm_tracker_port") rpc_info = { "rpc_tracker_host": tvm_tracker_host, "rpc_tracker_port": tvm_tracker_port, "rpc_server_port": rpc_server_port_for_session, "adb_server_socket": adb_server_socket, } launcher = HexagonLauncher(serial_number=android_serial_number, rpc_info=rpc_info) try: launcher.start_server() yield launcher finally: launcher.stop_server()
def hexagon_launcher(request, android_serial_number, rpc_server_port, adb_server_socket): if android_serial_number is None: yield None else: # Requesting these fixtures sets up a local tracker, if one # hasn't been provided to us. Delaying the evaluation of # these fixtures avoids starting a tracker unless necessary. tvm_tracker_host = request.getfixturevalue("tvm_tracker_host") tvm_tracker_port = request.getfixturevalue("tvm_tracker_port") rpc_info = { "rpc_tracker_host": tvm_tracker_host, "rpc_tracker_port": tvm_tracker_port, "rpc_server_port": rpc_server_port, "adb_server_socket": adb_server_socket, } launcher = HexagonLauncher(serial_number=android_serial_number, rpc_info=rpc_info) launcher.start_server() try: yield launcher finally: launcher.stop_server()
def test_cache_read_write(android_serial_number, tvm_tracker_host, tvm_tracker_port): size = 128 outer_shape = (size, ) factor = 16 inner_shape = (factor, ) dtype = "int8" x = te.placeholder(shape=outer_shape, dtype=dtype, name="x") y = te.placeholder(shape=outer_shape, dtype=dtype, name="y") z = te.compute(outer_shape, lambda i: x[i] + y[i], name="z") s = te.create_schedule(z.op) x_global = s.cache_read(x, "global.vtcm", [z]) y_global = s.cache_read(y, "global.vtcm", [z]) z_global = s.cache_write(z, "global.vtcm") zouter, zinner = s[z_global].split(z_global.op.axis[0], factor=factor) s[x_global].compute_at(s[z_global], zouter) s[y_global].compute_at(s[z_global], zouter) mem_copy_read = intrin_mem_copy(inner_shape, dtype, "global.vtcm", "global") (cache_read_x, ) = s[x_global].op.axis s[x_global].tensorize(cache_read_x, mem_copy_read) (cache_read_y, ) = s[y_global].op.axis s[y_global].tensorize(cache_read_y, mem_copy_read) mem_copy_write = intrin_mem_copy(outer_shape, dtype, "global", "global.vtcm") (cache_write_z, ) = s[z].op.axis s[z].tensorize(cache_write_z, mem_copy_write) print(tvm.lower(s, [x, y, z])) target_hexagon = tvm.target.hexagon("v68", link_params=True) func = tvm.build(s, [x, y, z], tvm.target.Target(target_hexagon, host=target_hexagon), name="dmacpy") temp = utils.tempdir() dso_binary = "test_binary.so" dso_binary_path = temp.relpath(dso_binary) func.save(dso_binary_path) if not android_serial_number: pytest.skip( "Skip hardware test since ANDROID_SERIAL_NUMBER is not set.") launcher = HexagonLauncher(serial_number=android_serial_number) launcher.android_run_rpc(rpc_tracker_host=tvm_tracker_host, rpc_tracker_port=tvm_tracker_port) launcher.hexagon_setup() remote_kw = { "host": tvm_tracker_host, "port": tvm_tracker_port, "priority": 0, "timeout": 60, } launcher.hexagon_session_setup(remote_kw) launcher.upload(dso_binary_path, dso_binary) with launcher.session as sess: mod = launcher.get_module(dso_binary) xt = tvm.nd.array(np.random.uniform(size=size).astype(x.dtype), device=sess.device) yt = tvm.nd.array(np.random.uniform(size=size).astype(y.dtype), device=sess.device) zt = tvm.nd.array(np.random.uniform(size=size).astype(z.dtype), device=sess.device) mod["dmacpy"](xt, yt, zt) launcher.close() ref = xt.numpy() + yt.numpy() np.testing.assert_equal(zt.numpy(), ref)
def test_add_vtcm(tvm_tracker_host, tvm_tracker_port, android_serial_number): dtype = "int8" A = tvm.te.placeholder((2, ), dtype=dtype) B = tvm.te.placeholder((1, ), dtype=dtype) C = tvm.te.compute(A.shape, lambda i: A[i] + B[0], name="C") sched = tvm.te.create_schedule(C.op) target_hexagon = tvm.target.hexagon("v68", link_params=True) func = tvm.build(sched, [A, B, C], tvm.target.Target(target_hexagon, host=target_hexagon), name="add") temp = utils.tempdir() dso_binary = "test_binary.so" dso_binary_path = temp.relpath(dso_binary) func.save(dso_binary_path) launcher = HexagonLauncher(serial_number=android_serial_number) launcher.android_run_rpc(rpc_tracker_host=tvm_tracker_host, rpc_tracker_port=tvm_tracker_port) launcher.hexagon_setup() remote_kw = { "host": tvm_tracker_host, "port": tvm_tracker_port, "priority": 0, "timeout": 60, } launcher.hexagon_session_setup(remote_kw) launcher.upload(dso_binary_path, dso_binary) with launcher.session as sess: mod = launcher.get_module(dso_binary) A_data = tvm.nd.empty(A.shape, A.dtype, sess.device, "global.vtcm") A_data.copyfrom(np.array([2, 3])) B_data = tvm.nd.empty(B.shape, B.dtype, sess.device, "global.vtcm") B_data.copyfrom(np.array([4])) C_data = tvm.nd.empty(C.shape, C.dtype, sess.device, "global.vtcm") C_data.copyfrom(np.array([0, 0])) mod["add"](A_data, B_data, C_data) result = C_data.numpy() assert (result == np.array([6, 7])).all() launcher.close()
def test_graph_executor(tvm_tracker_host, tvm_tracker_port, android_serial_number): dtype = "float32" data = relay.var("data", relay.TensorType((1, 64, 64, 3), dtype)) weight = relay.var("weight", relay.TensorType((5, 5, 3, 8), dtype)) y = relay.nn.conv2d( data, weight, padding=(2, 2), kernel_size=(5, 5), data_layout="NHWC", kernel_layout="HWIO", out_dtype="float32", ) f = relay.Function([data, weight], y) relay_mod = tvm.IRModule.from_expr(f) relay_mod = relay.transform.InferType()(relay_mod) target_hexagon = tvm.target.hexagon("v68") runtime = Runtime("cpp") executor = Executor("graph") temp = utils.tempdir() dso_binary = "test_binary.so" dso_binary_path = temp.relpath(dso_binary) with tvm.transform.PassContext(opt_level=3): lowered = tvm.relay.build( relay_mod, tvm.target.Target(target_hexagon, host=target_hexagon), runtime=runtime, executor=executor, ) lowered.get_lib().save(dso_binary_path) launcher = HexagonLauncher(serial_number=android_serial_number) launcher.android_run_rpc(rpc_tracker_host=tvm_tracker_host, rpc_tracker_port=tvm_tracker_port) launcher.hexagon_setup() remote_kw = { "host": tvm_tracker_host, "port": tvm_tracker_port, "priority": 0, "timeout": 60, } launcher.hexagon_session_setup(remote_kw) launcher.upload(dso_binary_path, dso_binary) graph_mod = launcher.get_graph_executor(lowered, dso_binary) weight_in = np.random.rand(5, 5, 3, 8).astype(dtype=dtype) data_in = np.random.rand(1, 64, 64, 3).astype(dtype=dtype) graph_mod.set_input(weight=weight_in) graph_mod.run(data=data_in) hexagon_output = graph_mod.get_output(0).numpy() target_llvm = tvm.target.Target("llvm") with tvm.transform.PassContext(opt_level=3): llvm_lowered = tvm.relay.build( relay_mod, tvm.target.Target(target_llvm, host=target_llvm), runtime=runtime, executor=executor, ) llvm_graph_mod = tvm.contrib.graph_executor.GraphModule( llvm_lowered["default"](tvm.cpu(0))) llvm_graph_mod.set_input(weight=weight_in) llvm_graph_mod.run(data=data_in) expected_output = llvm_graph_mod.get_output(0).numpy() launcher.close() tvm.testing.assert_allclose(hexagon_output, expected_output, rtol=1e-4, atol=1e-5)
def test_matmul(self, tvm_tracker_host, tvm_tracker_port, android_serial_number, M, N, K): X = te.placeholder((M, K), dtype="float32") Y = te.placeholder((K, N), dtype="float32") k1 = te.reduce_axis((0, K), name="k1") Z = te.compute((M, N), lambda i, j: te.sum(X[i, k1] * Y[k1, j], axis=[k1])) schedule = te.create_schedule(Z.op) target_hexagon = tvm.target.hexagon("v68", link_params=True) func = tvm.build( schedule, [X, Y, Z], tvm.target.Target(target_hexagon, host=target_hexagon)) temp = utils.tempdir() dso_binary = "test_binary.so" dso_binary_path = temp.relpath(dso_binary) func.save(dso_binary_path) launcher = HexagonLauncher(serial_number=android_serial_number) launcher.android_run_rpc(rpc_tracker_host=tvm_tracker_host, rpc_tracker_port=tvm_tracker_port) launcher.hexagon_setup() remote_kw = { "host": tvm_tracker_host, "port": tvm_tracker_port, "priority": 0, "timeout": 60, } launcher.hexagon_session_setup(remote_kw) launcher.upload(dso_binary_path, dso_binary) x = np.random.uniform(size=[i.value for i in X.shape]).astype(X.dtype) y = np.random.uniform(size=[i.value for i in Y.shape]).astype(Y.dtype) z = np.zeros([i.value for i in Z.shape], dtype=Z.dtype) with launcher.session as sess: mod = launcher.get_module(dso_binary) xt = tvm.nd.array(x, device=sess.device) yt = tvm.nd.array(y, device=sess.device) zt = tvm.nd.array(z, device=sess.device) mod(xt, yt, zt) target_llvm = tvm.target.Target("llvm") mod = tvm.build(schedule, [X, Y, Z], tvm.target.Target(target_llvm, host=target_llvm)) device = tvm.cpu(0) xtcpu = tvm.nd.array(x, device) ytcpu = tvm.nd.array(y, device) ztcpu = tvm.nd.array(z, device) mod(xtcpu, ytcpu, ztcpu) launcher.close() tvm.testing.assert_allclose(zt.numpy(), ztcpu.numpy(), rtol=1e-4)
def test_add(android_serial_number, tvm_tracker_host, tvm_tracker_port): dtype = "int8" A = tvm.te.placeholder((2, ), dtype=dtype) B = tvm.te.placeholder((1, ), dtype=dtype) C = tvm.te.compute(A.shape, lambda i: A[i] + B[0], name="C") sched = tvm.te.create_schedule(C.op) target_hexagon = tvm.target.hexagon("v68", link_params=True) func = tvm.build(sched, [A, B, C], tvm.target.Target(target_hexagon, host=target_hexagon), name="add") temp = utils.tempdir() dso_binary = "test_binary.so" dso_binary_path = temp.relpath(dso_binary) func.save(dso_binary_path) if not android_serial_number: pytest.skip( "Skip hardware test since ANDROID_SERIAL_NUMBER is not set.") launcher = HexagonLauncher(serial_number=android_serial_number) launcher.android_run_rpc(rpc_tracker_host=tvm_tracker_host, rpc_tracker_port=tvm_tracker_port) launcher.hexagon_setup() remote_kw = { "host": tvm_tracker_host, "port": tvm_tracker_port, "priority": 0, "timeout": 60, } launcher.hexagon_session_setup(remote_kw) launcher.upload(dso_binary_path, dso_binary) with launcher.session as sess: mod = launcher.get_module(dso_binary) A_data = tvm.nd.array(np.array([2, 3], dtype=dtype), device=sess.device) assert (A_data.numpy() == np.array([2, 3])).all() B_data = tvm.nd.array(np.array([4], dtype=dtype), device=sess.device) assert (B_data.numpy() == np.array([4])).all() C_data = tvm.nd.array(np.array([0, 0], dtype=dtype), device=sess.device) assert (C_data.numpy() == np.array([0, 0])).all() mod["add"](A_data, B_data, C_data) assert (C_data.numpy() == np.array([6, 7])).all() launcher.close()
def test_aot_executor_multiple_conv2d( tvm_tracker_host, tvm_tracker_port, android_serial_number, adb_server_socket ): dtype = "float32" input_shape = (1, 8, 8, 3) w1_shape = (5, 5, 3, 1) w2_shape = (5, 5, 1, 3) data = relay.var("data", relay.TensorType(input_shape, dtype)) weight1 = relay.var("weight1", relay.TensorType(w1_shape, dtype)) weight2 = relay.var("weight2", relay.TensorType(w2_shape, dtype)) y1 = relay.nn.conv2d( data, weight1, padding=(2, 2), kernel_size=(5, 5), data_layout="NHWC", kernel_layout="HWIO", out_dtype="float32", ) y2 = relay.nn.conv2d( y1, weight2, padding=(2, 2), kernel_size=(5, 5), data_layout="NHWC", kernel_layout="HWIO", out_dtype="float32", ) f = relay.Function([data, weight1, weight2], y2) relay_mod = tvm.IRModule.from_expr(f) relay_mod = relay.transform.InferType()(relay_mod) target_hexagon = tvm.target.hexagon("v68") temp = utils.tempdir() dso_binary = "test_binary.so" dso_binary_path = temp / dso_binary weight1_data = np.random.rand(w1_shape[0], w1_shape[1], w1_shape[2], w1_shape[3]).astype( dtype=dtype ) weight2_data = np.random.rand(w2_shape[0], w2_shape[1], w2_shape[2], w2_shape[3]).astype( dtype=dtype ) input_data = np.random.rand( input_shape[0], input_shape[1], input_shape[2], input_shape[3] ).astype(dtype=dtype) params = {"weight1": weight1_data, "weight2": weight2_data} inputs = {"data": input_data} with tvm.transform.PassContext(opt_level=3): lowered = tvm.relay.build( relay_mod, params=params, target=tvm.target.Target(target_hexagon, host="c"), runtime=Runtime("cpp"), executor=Executor("aot", {"unpacked-api": False, "interface-api": "c"}), ) # Uncomment this once the workaround is not needed. # lowered.export_library( # dso_binary_path, fcompile=hexagon.create_aot_shared, hexagon_arch="v68" # ) lowered.export_library( dso_binary_path, fcompile=_workaround_create_aot_shared(), hexagon_arch="v68" ) if not android_serial_number: pytest.skip(msg="Skip hardware test since ANDROID_SERIAL_NUMBER is not set.") rpc_info = { "rpc_tracker_host": tvm_tracker_host, "rpc_tracker_port": tvm_tracker_port, "rpc_server_port": RPC_SERVER_PORT + 6, # See note at the beginning of the file "adb_server_socket": adb_server_socket, } launcher = HexagonLauncher(serial_number=android_serial_number, rpc_info=rpc_info) launcher.upload(dso_binary_path, dso_binary) launcher.start_server() with launcher.start_session() as sess: aot_mod = launcher.get_aot_executor(dso_binary, sess) aot_mod.set_input(**inputs) aot_mod.run() hexagon_output = aot_mod.get_output(0).numpy() launcher.stop_server() target_llvm = tvm.target.Target("llvm") with tvm.transform.PassContext(opt_level=3): llvm_lowered = tvm.relay.build( relay_mod, tvm.target.Target(target_llvm, host=target_llvm), runtime=Runtime("cpp"), executor=Executor("graph"), ) llvm_graph_mod = tvm.contrib.graph_executor.GraphModule(llvm_lowered["default"](tvm.cpu(0))) llvm_graph_mod.set_input(**params) llvm_graph_mod.run(**inputs) expected_output = llvm_graph_mod.get_output(0).numpy() tvm.testing.assert_allclose(hexagon_output, expected_output, rtol=1e-4, atol=1e-5)
def test_graph_executor( android_serial_number, tvm_tracker_host, tvm_tracker_port, adb_server_socket ): dtype = "float32" data = relay.var("data", relay.TensorType((1, 64, 64, 3), dtype)) weight = relay.var("weight", relay.TensorType((5, 5, 3, 8), dtype)) y = relay.nn.conv2d( data, weight, padding=(2, 2), kernel_size=(5, 5), data_layout="NHWC", kernel_layout="HWIO", out_dtype="float32", ) f = relay.Function([data, weight], y) relay_mod = tvm.IRModule.from_expr(f) relay_mod = relay.transform.InferType()(relay_mod) target_hexagon = tvm.target.hexagon("v68") runtime = Runtime("cpp") executor = Executor("graph") temp = utils.tempdir() dso_binary = "test_binary.so" dso_binary_path = temp.relpath(dso_binary) weight_in = np.random.rand(5, 5, 3, 8).astype(dtype=dtype) data_in = np.random.rand(1, 64, 64, 3).astype(dtype=dtype) params = {"weight": weight_in} inputs = {"data": data_in} with tvm.transform.PassContext(opt_level=3): lowered = tvm.relay.build( relay_mod, tvm.target.Target(target_hexagon, host=target_hexagon), runtime=runtime, executor=executor, ) lowered.get_lib().save(dso_binary_path) if not android_serial_number: pytest.skip(msg="Skip hardware test since ANDROID_SERIAL_NUMBER is not set.") rpc_info = { "rpc_tracker_host": tvm_tracker_host, "rpc_tracker_port": tvm_tracker_port, "rpc_server_port": RPC_SERVER_PORT + 3, # See note at the beginning of the file "adb_server_socket": adb_server_socket, } launcher = HexagonLauncher(serial_number=android_serial_number, rpc_info=rpc_info) launcher.upload(dso_binary_path, dso_binary) launcher.start_server() with launcher.start_session() as sess: graph_mod = launcher.get_graph_executor(lowered.get_graph_json(), dso_binary, sess) graph_mod.set_input(**params) graph_mod.run(**inputs) hexagon_output = graph_mod.get_output(0).numpy() launcher.stop_server() target_llvm = tvm.target.Target("llvm") with tvm.transform.PassContext(opt_level=3): llvm_lowered = tvm.relay.build( relay_mod, tvm.target.Target(target_llvm, host=target_llvm), runtime=runtime, executor=executor, ) llvm_graph_mod = tvm.contrib.graph_executor.GraphModule(llvm_lowered["default"](tvm.cpu(0))) llvm_graph_mod.set_input(**params) llvm_graph_mod.run(**inputs) expected_output = llvm_graph_mod.get_output(0).numpy() tvm.testing.assert_allclose(hexagon_output, expected_output, rtol=1e-4, atol=1e-5)