def run(mod, target): with relay.build_config(opt_level=3): lib = relay.build(mod, target=target, target_host=target_host, params=params) path_dso = temp.relpath("deploy.dylib") lib.export_library(path_dso, xcode.create_dylib, arch=arch, sdk=sdk) xcode.codesign(path_dso) # Start RPC test server that contains the compiled library. xcode.popen_test_rpc(proxy_host, proxy_port, key, destination=destination, libs=[path_dso]) # connect to the proxy remote = rpc.connect(proxy_host, proxy_port, key=key) if target == "metal": ctx = remote.metal(0) else: ctx = remote.cpu(0) lib = remote.load_module("deploy.dylib") m = graph_runtime.GraphModule(lib["default"](ctx)) m.set_input("data", tvm.nd.array(image, ctx)) m.run() tvm_output = m.get_output(0) top1 = np.argmax(tvm_output.asnumpy()[0]) print("TVM prediction top-1:", top1, synset[top1]) # evaluate ftimer = m.module.time_evaluator("run", ctx, number=3, repeat=10) prof_res = np.array(ftimer().results) * 1000 print("%-19s (%s)" % ("%.2f ms" % np.mean(prof_res), "%.2f ms" % np.std(prof_res)))
def test_rpc_module(): # graph n = tvm.convert(1024) A = tvm.placeholder((n, ), name='A') B = tvm.compute(A.shape, lambda *i: A(*i) + 1.0, name='B') temp = util.tempdir() s = tvm.create_schedule(B.op) xo, xi = s[B].split(B.op.axis[0], factor=64) s[B].bind(xi, tvm.thread_axis("threadIdx.x")) s[B].bind(xo, tvm.thread_axis("blockIdx.x")) # Build the dynamic lib. # If we don't want to do metal and only use cpu, just set target to be target f = tvm.build(s, [A, B], "metal", target_host=target, name="myadd") path_dso1 = temp.relpath("dev_lib.dylib") f.export_library(path_dso1, xcode.create_dylib, arch=arch, sdk=sdk) xcode.codesign(path_dso1) s = tvm.create_schedule(B.op) xo, xi = s[B].split(B.op.axis[0], factor=64) s[B].parallel(xi) s[B].pragma(xo, "parallel_launch_point") s[B].pragma(xi, "parallel_barrier_when_finish") f = tvm.build(s, [A, B], target, name="myadd_cpu") path_dso2 = temp.relpath("cpu_lib.dylib") f.export_library(path_dso2, xcode.create_dylib, arch=arch, sdk=sdk) xcode.codesign(path_dso2) # Start RPC test server that contains the compiled library. server = xcode.popen_test_rpc(proxy_host, proxy_port, key, destination=destination, options=['-quiet'], libs=[path_dso1, path_dso2]) # connect to the proxy remote = rpc.connect(proxy_host, proxy_port, key=key) ctx = remote.metal(0) f1 = remote.load_module("dev_lib.dylib") a_np = np.random.uniform(size=1024).astype(A.dtype) a = tvm.nd.array(a_np, ctx) b = tvm.nd.array(np.zeros(1024, dtype=A.dtype), ctx) time_f = f1.time_evaluator(f1.entry_name, ctx, number=10) cost = time_f(a, b).mean print('%g secs/op' % cost) np.testing.assert_equal(b.asnumpy(), a.asnumpy() + 1) # CPU ctx = remote.cpu(0) f2 = remote.load_module("cpu_lib.dylib") a_np = np.random.uniform(size=1024).astype(A.dtype) a = tvm.nd.array(a_np, ctx) b = tvm.nd.array(np.zeros(1024, dtype=A.dtype), ctx) time_f = f2.time_evaluator(f1.entry_name, ctx, number=10) cost = time_f(a, b).mean print('%g secs/op' % cost) np.testing.assert_equal(b.asnumpy(), a.asnumpy() + 1)
def test_rpc_module(): # graph n = tvm.convert(1024) A = tvm.placeholder((n,), name='A') B = tvm.compute(A.shape, lambda *i: A(*i) + 1.0, name='B') temp = util.tempdir() s = tvm.create_schedule(B.op) xo, xi = s[B].split(B.op.axis[0], factor=64) s[B].bind(xi, tvm.thread_axis("threadIdx.x")) s[B].bind(xo, tvm.thread_axis("blockIdx.x")) # Build the dynamic lib. # If we don't want to do metal and only use cpu, just set target to be target f = tvm.build(s, [A, B], "metal", target_host=target, name="myadd") path_dso1 = temp.relpath("dev_lib.dylib") f.export_library(path_dso1, xcode.create_dylib, arch=arch, sdk=sdk) xcode.codesign(path_dso1) s = tvm.create_schedule(B.op) xo, xi = s[B].split(B.op.axis[0], factor=64) s[B].parallel(xi) s[B].pragma(xo, "parallel_launch_point") s[B].pragma(xi, "parallel_barrier_when_finish") f = tvm.build(s, [A, B], target, name="myadd_cpu") path_dso2 = temp.relpath("cpu_lib.dylib") f.export_library(path_dso2, xcode.create_dylib, arch=arch, sdk=sdk) xcode.codesign(path_dso2) # Start RPC test server that contains the compiled library. server = xcode.popen_test_rpc(proxy_host, proxy_port, key, destination=destination, libs=[path_dso1, path_dso2]) # connect to the proxy remote = rpc.connect(proxy_host, proxy_port, key=key) ctx = remote.metal(0) f1 = remote.load_module("dev_lib.dylib") a_np = np.random.uniform(size=1024).astype(A.dtype) a = tvm.nd.array(a_np, ctx) b = tvm.nd.array(np.zeros(1024, dtype=A.dtype), ctx) time_f = f1.time_evaluator(f1.entry_name, ctx, number=10) cost = time_f(a, b).mean print('%g secs/op' % cost) np.testing.assert_equal(b.asnumpy(), a.asnumpy() + 1) # CPU ctx = remote.cpu(0) f2 = remote.load_module("cpu_lib.dylib") a_np = np.random.uniform(size=1024).astype(A.dtype) a = tvm.nd.array(a_np, ctx) b = tvm.nd.array(np.zeros(1024, dtype=A.dtype), ctx) time_f = f2.time_evaluator(f1.entry_name, ctx, number=10) cost = time_f(a, b).mean print('%g secs/op' % cost) np.testing.assert_equal(b.asnumpy(), a.asnumpy() + 1)
dtype_dict = {input_name: data.dtype} # parse Caffe2 model and convert into Relay computation graph from tvm import relay mod, params = relay.frontend.from_caffe2(resnet50.init_net, resnet50.predict_net, shape_dict, dtype_dict) # compile the model target = 'metal' with relay.build_config(opt_level=3): graph, lib, params = relay.build(mod, target, target_host=target_host, params=params) #Save the library temp = util.tempdir() path_dso1 = temp.relpath("dev_lib.dylib") lib.export_library(path_dso1, xcode.create_dylib, arch=arch, sdk=sdk) xcode.codesign(path_dso1) # Start RPC test server that contains the compiled library. server = xcode.popen_test_rpc(proxy_host, proxy_port, key, destination=destination, libs=[path_dso1]) # connect to the proxy remote = rpc.connect(proxy_host, proxy_port, key=key) ctx = remote.metal(0) load_lib = remote.load_module("dev_lib.dylib") module = graph_runtime.create(graph, loaded_lib, ctx) # This line is thrwoing error. module.load_params(loaded_params) caffe2_out = module.run(data=input_data)