def build(target_dir): """ Compiles resnet18 with TVM""" deploy_lib = osp.join(target_dir, 'deploy_lib.o') if osp.exists(deploy_lib): return # download the pretrained resnet18 trained on imagenet1k dataset for # image classification task block = get_model('resnet18_v1', pretrained=True) sym, params = nnvm.frontend.from_mxnet(block) # add the softmax layer for prediction net = nnvm.sym.softmax(sym) # compile the model with nnvm.compiler.build_config(opt_level=opt_level): graph, lib, params = nnvm.compiler.build( net, target, shape={"data": data_shape}, params=params) # save the model artifacts lib.save(deploy_lib) cc.create_shared(osp.join(target_dir, "deploy_lib.so"), [osp.join(target_dir, "deploy_lib.o")]) with open(osp.join(target_dir, "deploy_graph.json"), "w") as fo: fo.write(graph.json()) with open(osp.join(target_dir,"deploy_param.params"), "wb") as fo: fo.write(nnvm.compiler.save_param_dict(params))
def check_system_lib(): dev = tvm.cpu(0) if not tvm.testing.device_enabled("llvm"): print("Skip because llvm is not enabled") return temp = utils.tempdir() runtime = Runtime("cpp", {"system-lib": True}) fadd1 = tvm.build(s, [A, B], "llvm", runtime=runtime, name="myadd1") fadd2 = tvm.build(s, [A, B], "llvm", runtime=runtime, name="myadd2") path1 = temp.relpath("myadd1.o") path2 = temp.relpath("myadd2.o") path_dso = temp.relpath("mylib.so") fadd1.save(path1) fadd2.save(path2) cc.create_shared(path_dso, [path1, path2]) # Load dll, will trigger system library registration ctypes.CDLL(path_dso) # Load the system wide library mm = tvm.runtime.system_lib() a = tvm.nd.array(np.random.uniform(size=nn).astype(A.dtype), dev) b = tvm.nd.array(np.zeros(nn, dtype=A.dtype), dev) mm["myadd1"](a, b) np.testing.assert_equal(b.numpy(), a.numpy() + 1) mm["myadd2"](a, b) np.testing.assert_equal(b.numpy(), a.numpy() + 1)
def download_linked_module(file_name): """Load module from remote side.""" # c++ compiler/linker cc = os.environ.get("CXX", "g++") # pylint: disable=import-outside-toplevel path = temp.relpath(file_name) if path.endswith(".o"): # Extra dependencies during runtime. from tvm.contrib import cc as _cc _cc.create_shared(path + ".so", path, cc=cc) path += ".so" elif path.endswith(".tar"): # Extra dependencies during runtime. from tvm.contrib import cc as _cc, tar as _tar tar_temp = utils.tempdir(custom_path=path.replace(".tar", "")) _tar.untar(path, tar_temp.temp_dir) files = [tar_temp.relpath(x) for x in tar_temp.listdir()] _cc.create_shared(path + ".so", files, cc=cc) path += ".so" elif path.endswith(".dylib") or path.endswith(".so"): pass else: raise RuntimeError("Do not know how to link %s" % file_name) logger.info("Send linked module %s to client", path) return bytearray(open(path, "rb").read())
def build(target_dir): """ Compiles resnet18 with TVM""" # Download the pretrained model in MxNet's format. block = get_model("resnet18_v1", pretrained=True) shape_dict = {"data": (1, 3, 224, 224)} mod, params = relay.frontend.from_mxnet(block, shape_dict) # Add softmax to do classification in last layer. func = mod["main"] func = relay.Function(func.params, relay.nn.softmax(func.body), None, func.type_params, func.attrs) target = "llvm" with tvm.transform.PassContext(opt_level=3): graph, lib, params = relay.build(func, target, params=params) # save the model artifacts deploy_lib = osp.join(target_dir, "deploy_lib.o") lib.save(deploy_lib) cc.create_shared(osp.join(target_dir, "deploy_lib.so"), [osp.join(target_dir, "deploy_lib.o")]) with open(osp.join(target_dir, "deploy_graph.json"), "w") as fo: fo.write(graph) with open(osp.join(target_dir, "deploy_param.params"), "wb") as fo: fo.write(relay.save_param_dict(params))
def reconfig_runtime(cfg_json): """Rebuild and reload runtime with new configuration. Parameters ---------- cfg_json : str JSON string used for configurations. """ if runtime_dll: raise RuntimeError("Can only reconfig in the beginning of session...") env = get_env() cfg = json.loads(cfg_json) cfg["TARGET"] = env.TARGET pkg = PkgConfig(cfg, proj_root) # check if the configuration is already the same if os.path.isfile(cfg_path): old_cfg = json.loads(open(cfg_path, "r").read()) if pkg.same_config(old_cfg): logging.info("Skip reconfig_runtime due to same config.") return cflags = ["-O2", "-std=c++11"] cflags += pkg.cflags ldflags = pkg.ldflags lib_name = dll_path source = pkg.lib_source logging.info("Rebuild runtime:\n output=%s,\n cflags=%s,\n source=%s,\n ldflags=%s", dll_path, '\n\t'.join(cflags), '\n\t'.join(source), '\n\t'.join(ldflags)) cc.create_shared(lib_name, source, cflags + ldflags) with open(cfg_path, "w") as outputfile: outputfile.write(pkg.cfg_json)
def reconfig_runtime(cfg_json): """Rebuild and reload runtime with new configuration. Parameters ---------- cfg_json : str JSON string used for configurations. """ if runtime_dll: raise RuntimeError( "Can only reconfig in the beginning of session...") env = get_env() cfg = json.loads(cfg_json) cfg["TARGET"] = env.TARGET pkg = PkgConfig(cfg, proj_root) # check if the configuration is already the same if os.path.isfile(cfg_path): old_cfg = json.loads(open(cfg_path, "r").read()) if pkg.same_config(old_cfg): logging.info("Skip reconfig_runtime due to same config.") return cflags = ["-O2", "-std=c++11"] cflags += pkg.cflags ldflags = pkg.ldflags lib_name = dll_path source = pkg.lib_source logging.info( "Rebuild runtime:\n output=%s,\n cflags=%s,\n source=%s,\n ldflags=%s", dll_path, '\n\t'.join(cflags), '\n\t'.join(source), '\n\t'.join(ldflags)) cc.create_shared(lib_name, source, cflags + ldflags) with open(cfg_path, "w") as outputfile: outputfile.write(pkg.cfg_json)
def test_add(target_dir): n = tvm.var("n") A = tvm.placeholder((n,), name='A') B = tvm.placeholder((n,), name='B') C = tvm.compute(A.shape, lambda i: A[i] + B[i], name="C") s = tvm.create_schedule(C.op) fadd = tvm.build(s, [A, B, C], "llvm", target_host="llvm", name="myadd") fadd.save(os.path.join(target_dir, "add_cpu.o")) cc.create_shared(os.path.join(target_dir, "add_cpu.so"), [os.path.join(target_dir, "add_cpu.o")])
def main(): n = tvm.var('n') A = tvm.placeholder((n,), name='A') B = tvm.placeholder((n,), name='B') C = tvm.compute(A.shape, lambda *i: A(*i) + B(*i), name='C') s = tvm.create_schedule(C.op) s[C].parallel(s[C].op.axis[0]) print(tvm.lower(s, [A, B, C], simple_mode=True)) obj_file = osp.join(sys.argv[1], 'test.o') tvm.build(s, [A, B, C], 'llvm').save(obj_file) cc.create_shared(osp.join(sys.argv[1], 'test.so'), [obj_file])
def main(): n = te.var("n") A = te.placeholder((n,), name="A") B = te.placeholder((n,), name="B") C = te.compute(A.shape, lambda *i: A(*i) + B(*i), name="C") s = tvm.te.create_schedule(C.op) s[C].parallel(s[C].op.axis[0]) print(tvm.lower(s, [A, B, C], simple_mode=True)) obj_file = osp.join(sys.argv[1], "test.o") tvm.build(s, [A, B, C], "llvm").save(obj_file) cc.create_shared(osp.join(sys.argv[1], "test.so"), [obj_file])
def my_mul(): n = tvm.var("n") A = tvm.placeholder((n, n), name='A', dtype="float32") B = tvm.placeholder((n, n), name='B', dtype="float32") C = tvm.compute(A.shape, lambda *i: A(*i) * B(*i), name='C') s = tvm.create_schedule(C.op) module = tvm.build(s, [A, B, C], "llvm", "llvm") temp = util.tempdir() module.save(temp.relpath("mymul.o")) cc.create_shared("mymul.so", [temp.relpath("mymul.o")]) print("create mymul")
def test_add(target_dir): n = tvm.var("n") A = tvm.placeholder((n, ), name='A') B = tvm.placeholder((n, ), name='B') C = tvm.compute(A.shape, lambda i: A[i] + B[i], name="C") s = tvm.create_schedule(C.op) fadd = tvm.build(s, [A, B, C], "llvm", target_host="llvm", name="myadd") fadd.save(os.path.join(target_dir, "add_cpu.o")) cc.create_shared(os.path.join(target_dir, "add_cpu.so"), [os.path.join(target_dir, "add_cpu.o")])
def save_tvm_model(name, graph, lib, params): deploy_lib = osp.join(target_dir, name + '.o') deploy_so = osp.join(target_dir, name + '.so') lib.save(deploy_lib) cc.create_shared(deploy_so, [deploy_lib]) with open(osp.join(target_dir, name + ".json"), "w") as fo: fo.write(graph) with open(osp.join(target_dir, name + ".params"), "wb") as fo: fo.write(relay.save_param_dict(params))
def load_module(path, fmt=""): """Load module from file. Parameters ---------- path : str The path to the module file. fmt : str, optional The format of the file, if not specified it will be inferred from suffix of the file. Returns ------- module : runtime.Module The loaded module Note ---- This function will automatically call cc.create_shared if the path is in format .o or .tar """ if os.stat(path).st_size == 0: logging.info( "The lib generated by the NNVM compiler does not contain optimized " "functions for any operators. This usually happens when an external " "accelerator, e.g. TensorRT, is employed along with TVM to compile " "the model, and all the operators in the model are supported by the " "external accelerator at runtime. Therefore, the NNVM compiler skipped " "optimizing them at the compile time. The TVM runtime " "will create an empty Module as a dummy module.") return _ffi_api.CreateEmptyModule() # High level handling for .o and .tar file. # We support this to be consistent with RPC module load. if path.endswith(".o"): # Extra dependencies during runtime. from tvm.contrib import cc as _cc _cc.create_shared(path + ".so", path) path += ".so" elif path.endswith(".tar"): # Extra dependencies during runtime. from tvm.contrib import cc as _cc, util as _util, tar as _tar tar_temp = _util.tempdir(custom_path=path.replace('.tar', '')) _tar.untar(path, tar_temp.temp_dir) files = [tar_temp.relpath(x) for x in tar_temp.listdir()] _cc.create_shared(path + ".so", files) path += ".so" # TODO(weberlo): we should probably use a more distinctive suffix for uTVM object files elif path.endswith(".obj"): fmt = "micro_dev" # Redirect to the load API return _ffi_api.ModuleLoadFromFile(path, fmt)
def load_module(path, fmt=""): """Load module from file. Parameters ---------- path : str The path to the module file. fmt : str, optional The format of the file, if not specified it will be inferred from suffix of the file. Returns ------- module : runtime.Module The loaded module Note ---- This function will automatically call cc.create_shared if the path is in format .o or .tar """ if os.path.isfile(path): path = os.path.realpath(path) else: raise ValueError("cannot find file %s" % path) # c++ compiler/linker cc = os.environ.get("CXX", "g++") # High level handling for .o and .tar file. # We support this to be consistent with RPC module load. if path.endswith(".o"): # Extra dependencies during runtime. from tvm.contrib import cc as _cc _cc.create_shared(path + ".so", path, cc=cc) path += ".so" elif path.endswith(".tar"): # Extra dependencies during runtime. from tvm.contrib import cc as _cc, utils as _utils, tar as _tar tar_temp = _utils.tempdir(custom_path=path.replace(".tar", "")) _tar.untar(path, tar_temp.temp_dir) files = [tar_temp.relpath(x) for x in tar_temp.listdir()] _cc.create_shared(path + ".so", files, cc=cc) path += ".so" # TODO(weberlo): we should probably use a more distinctive suffix for microTVM object files elif path.endswith(".obj"): fmt = "micro_dev" # Redirect to the load API return _ffi_api.ModuleLoadFromFile(path, fmt)
def save_operator(operator, name): temp = utils.tempdir() fadd = operator fadd.save(temp.relpath(name+".o")) if tgt.kind.name == "cuda": fadd.imported_modules[0].save(temp.relpath(name+".ptx")) if tgt.kind.name == "rocm": fadd.imported_modules[0].save(temp.relpath(name+".hsaco")) if tgt.kind.name.startswith("opencl"): fadd.imported_modules[0].save(temp.relpath(name+".cl")) cc.create_shared(temp.relpath(name+".so"), [temp.relpath(name+".o")]) print(temp.listdir()) return temp.listdir()
def test_dso_module_load(): if not tvm.testing.device_enabled("llvm"): return dtype = "int64" temp = utils.tempdir() def save_object(names): n = te.size_var("n") Ab = tvm.tir.decl_buffer((n,), dtype) i = te.var("i") # for i in 0 to n-1: stmt = tvm.tir.For( i, 0, n - 1, tvm.tir.ForKind.SERIAL, tvm.tir.BufferStore(Ab, tvm.tir.BufferLoad(Ab, [i]) + 1, [i + 1]), ) mod = tvm.IRModule.from_expr( tvm.tir.PrimFunc([Ab], stmt).with_attr("global_symbol", "main") ) m = tvm.driver.build(mod, target="llvm") for name in names: m.save(name) path_obj = temp.relpath("test.o") path_ll = temp.relpath("test.ll") path_bc = temp.relpath("test.bc") path_dso = temp.relpath("test.so") save_object([path_obj, path_ll, path_bc]) cc.create_shared(path_dso, [path_obj]) f1 = tvm.runtime.load_module(path_dso) f2 = tvm.runtime.load_module(path_ll) a = tvm.nd.array(np.zeros(10, dtype=dtype)) f1(a) np.testing.assert_equal(a.numpy(), np.arange(a.shape[0])) a = tvm.nd.array(np.zeros(10, dtype=dtype)) f2(a) np.testing.assert_equal(a.numpy(), np.arange(a.shape[0])) path_runtime_py = temp.relpath("runtime.py") with open(path_runtime_py, "w") as fo: fo.write(runtime_py) proc = subprocess.run( [sys.executable, path_runtime_py, path_dso, dtype], stdout=subprocess.PIPE, stderr=subprocess.STDOUT, ) assert proc.returncode == 0, f"{proc.args} exited with {proc.returncode}: {proc.stdout}"
def link_shared(so_name, objs, **kwargs): """Link shared library on Hexagon using the registered Hexagon linker. Parameters ---------- so_name : str Name of the shared library file. objs : list[str,StringImm] kwargs : additional arguments: 'verbose' - print additional information Returns ------- ret_val : int This function returns 0 at the moment. """ # The list of object files can be passed as built-in Python strings, # or as tvm.tir.StringImm's. def to_str(s): if isinstance(s, tvm.tir.StringImm): return s.value assert isinstance(s, str), 'argument "' + str(s) + '" should be a string or StrImm' return s objs = [to_str(s) for s in objs] linker = tvm.get_global_func('tvm.contrib.hexagon.hexagon_link')() if kwargs.get('verbose'): print('tvm.contrib.hexagon.link_shared:') print(' Using linker:', linker) print(' Library name:', so_name) print(' Object files:', objs) if not os.access(linker, os.X_OK): message = 'The linker "' + linker + '" does not exist or is not executable.' if not os.environ.get('HEXAGON_TOOLCHAIN'): message += ' The environment variable HEXAGON_TOOLCHAIN is unset. Please export ' + \ 'HEXAGON_TOOLCHAIN in your environment, so that ${HEXAGON_TOOLCHAIN}/bin/' + \ 'hexagon-link exists.' else: message += ' Please verify the value of the HEXAGON_LINKER environment variable ' + \ '(currently set to "' + hexagon_toolchain_root + '").' raise Exception(message) libpath = os.path.join( hexagon_toolchain_root, 'target', 'hexagon', 'lib', 'v66', 'G0') cc.create_shared( so_name, objs, # pylint: disable=bad-whitespace options = ['-Bdynamic', '-shared', '-export-dynamic', os.path.join(libpath, 'pic', 'libgcc.so')], cc = linker) return 0
def make_binary(): prog = "int a = 7; \ int main() { \ int b = 5; \ return 0; \ }" tmp_dir = util.tempdir() tmp_source = tmp_dir.relpath("source.c") tmp_obj = tmp_dir.relpath("obj.obj") with open(tmp_source, "w") as f: f.write(prog) cc.create_shared(tmp_obj, tmp_source, [], cc="{}gcc".format(TOOLCHAIN_PREFIX)) prog_bin = bytearray(open(tmp_obj, "rb").read()) return prog_bin
def test_dso_module_load(): if not tvm.testing.device_enabled("llvm"): return dtype = "int64" temp = utils.tempdir() def save_object(names): n = te.size_var("n") Ab = tvm.tir.decl_buffer((n, ), dtype) i = te.var("i") # for i in 0 to n-1: stmt = tvm.tir.For( i, 0, n - 1, tvm.tir.ForKind.SERIAL, tvm.tir.Store(Ab.data, tvm.tir.Load(dtype, Ab.data, i) + 1, i + 1), ) mod = tvm.IRModule.from_expr( tvm.tir.PrimFunc([Ab], stmt).with_attr("global_symbol", "main")) m = tvm.driver.build(mod, target="llvm") for name in names: m.save(name) path_obj = temp.relpath("test.o") path_ll = temp.relpath("test.ll") path_bc = temp.relpath("test.bc") path_dso = temp.relpath("test.so") save_object([path_obj, path_ll, path_bc]) cc.create_shared(path_dso, [path_obj]) f1 = tvm.runtime.load_module(path_dso) f2 = tvm.runtime.load_module(path_ll) a = tvm.nd.array(np.zeros(10, dtype=dtype)) f1(a) np.testing.assert_equal(a.asnumpy(), np.arange(a.shape[0])) a = tvm.nd.array(np.zeros(10, dtype=dtype)) f2(a) np.testing.assert_equal(a.asnumpy(), np.arange(a.shape[0])) path_runtime_py = temp.relpath("runtime.py") with open(path_runtime_py, "w") as fo: fo.write(runtime_py) subprocess.check_call("python3 %s %s %s" % (path_runtime_py, path_dso, dtype), shell=True)
def test_add(target_dir): n = tvm.var("n") A = tvm.placeholder((n,), name='A') B = tvm.placeholder((n,), name='B') C = tvm.compute(A.shape, lambda i: A[i] + B[i], name="C") s = tvm.create_schedule(C.op) bx, tx = s[C].split(C.op.axis[0], factor=64) s[C].bind(bx, tvm.thread_axis("blockIdx.x")) s[C].bind(tx, tvm.thread_axis("threadIdx.x")) fadd_cuda = tvm.build(s, [A, B, C], "cuda", target_host="llvm", name="myadd") fadd_cuda.save(os.path.join(target_dir, "add_gpu.o")) fadd_cuda.imported_modules[0].save(os.path.join(target_dir, "add_gpu.ptx")) cc.create_shared(os.path.join(target_dir, "add_gpu.so"), [os.path.join(target_dir, "add_gpu.o")])
def main(target, out_dir): n = te.var("n") A = te.placeholder((n,), name="A") B = te.placeholder((n,), name="B") C = te.compute(A.shape, lambda i: A[i] + B[i], name="C") s = te.create_schedule(C.op) if target == "cuda": bx, tx = s[C].split(C.op.axis[0], factor=64) s[C].bind(bx, te.thread_axis("blockIdx.x")) s[C].bind(tx, te.thread_axis("threadIdx.x")) fadd = tvm.build(s, [A, B, C], target, target_host="llvm", name="myadd") fadd.save(osp.join(out_dir, "test_add.o")) if target == "cuda": fadd.imported_modules[0].save(osp.join(out_dir, "test_add.ptx")) cc.create_shared(osp.join(out_dir, "test_add.so"), [osp.join(out_dir, "test_add.o")])
def test_dso_module_load(): if not tvm.runtime.enabled("llvm"): return dtype = 'int64' temp = util.tempdir() def save_object(names): n = tvm.size_var('n') Ab = tvm.decl_buffer((n, ), dtype) i = tvm.var('i') # for i in 0 to n-1: stmt = tvm.tir.For( i, 0, n - 1, 0, 0, tvm.tir.Store(Ab.data, tvm.tir.Load(dtype, Ab.data, i) + 1, i + 1)) fapi = tvm.ir_pass.MakeAPI(stmt, "ramp", [Ab], 0, True) fapi = tvm.ir_pass.LowerTVMBuiltin(fapi) m = tvm.target.codegen.build_module(fapi, "llvm") for name in names: m.save(name) path_obj = temp.relpath("test.o") path_ll = temp.relpath("test.ll") path_bc = temp.relpath("test.bc") path_dso = temp.relpath("test.so") save_object([path_obj, path_ll, path_bc]) cc.create_shared(path_dso, [path_obj]) f1 = tvm.runtime.load_module(path_dso) f2 = tvm.runtime.load_module(path_ll) a = tvm.nd.array(np.zeros(10, dtype=dtype)) f1(a) np.testing.assert_equal(a.asnumpy(), np.arange(a.shape[0])) a = tvm.nd.array(np.zeros(10, dtype=dtype)) f2(a) np.testing.assert_equal(a.asnumpy(), np.arange(a.shape[0])) path_runtime_py = temp.relpath("runtime.py") with open(path_runtime_py, "w") as fo: fo.write(runtime_py) subprocess.check_call( "python3 %s %s %s" % (path_runtime_py, path_dso, dtype), shell=True)
def test_dso_module_load(): if not tvm.module.enabled("llvm"): return dtype = 'int64' temp = util.tempdir() def save_object(names): n = tvm.var('n') Ab = tvm.decl_buffer((n, ), dtype) i = tvm.var('i') # for i in 0 to n-1: stmt = tvm.make.For( i, 0, n - 1, 0, 0, tvm.make.Store(Ab.data, tvm.make.Load(dtype, Ab.data, i) + 1, i + 1)) fapi = tvm.ir_pass.MakeAPI(stmt, "ramp", [Ab], 0, True) fapi = tvm.ir_pass.LowerTVMBuiltin(fapi) m = tvm.codegen.build_module(fapi, "llvm") for name in names: m.save(name) path_obj = temp.relpath("test.o") path_ll = temp.relpath("test.ll") path_bc = temp.relpath("test.bc") path_dso = temp.relpath("test.so") save_object([path_obj, path_ll, path_bc]) cc.create_shared(path_dso, [path_obj]) f1 = tvm.module.load(path_dso) f2 = tvm.module.load(path_ll) a = tvm.nd.array(np.zeros(10, dtype=dtype)) f1(a) np.testing.assert_equal(a.asnumpy(), np.arange(a.shape[0])) a = tvm.nd.array(np.zeros(10, dtype=dtype)) f2(a) np.testing.assert_equal(a.asnumpy(), np.arange(a.shape[0])) path_runtime_py = temp.relpath("runtime.py") with open(path_runtime_py, "w") as fo: fo.write(runtime_py) subprocess.check_call( "python %s %s %s" % (path_runtime_py, path_dso, dtype), shell=True)
def check_code_gen(device): n = tvm.var("n") A = tvm.placeholder((n, n), name='A', dtype="float32") B = tvm.placeholder((n, n), name='B', dtype="float32") C = tvm.compute(A.shape, lambda *i: A(*i) + B(*i), name='C') s = tvm.create_schedule(C.op) bx, tx = s[C].split(C.op.axis[0], factor=64) s[C].bind(bx, tvm.thread_axis("blockIdx.x")) s[C].bind(tx, tvm.thread_axis("threadIdx.x")) #print (tvm.lower(s, [A, B, C], simple_mode=True)) module = tvm.build(s, [A, B, C], device, target_host="llvm") #print ("Device code %s" %device) #print (module.imported_modules[0].get_source()) #print (module.get_source ("asm")) temp = util.tempdir() module.save(temp.relpath("myadd.o")) # Save device code suffix = "vulkan" if device == "opencl": suffix = "cl" module.imported_modules[0].save(temp.relpath("myadd.%s" % suffix)) # Create shared library cc.create_shared(temp.relpath("myadd.so"), [temp.relpath("myadd.o")]) myadd = tvm.module.load(temp.relpath("myadd.so")) # Import "deviced" code myadd_device = tvm.module.load(temp.relpath("myadd.%s" % suffix)) # Import module myadd.import_module(myadd_device) ctx = tvm.context(device, 0) n = 1024 a = tvm.nd.array(np.random.uniform(size=(n, n)).astype(A.dtype), ctx) b = tvm.nd.array(np.random.uniform(size=(n, n)).astype(A.dtype), ctx) c = tvm.nd.array(np.random.uniform(size=(n, n)).astype(A.dtype), ctx) t0 = time.time() myadd(a, b, c) t1 = time.time() print(device) print("GPU time: %s" % (t1 - t0))
def build(): tgt_host = "llvm" tgt = "llvm" sche = te.create_schedule([C.op]) print(tvm.lower(sche, [A, B, C], simple_mode=True)) fadd = tvm.build(sche, [A, B, C], tgt, target_host=tgt_host, name="add") pdb.set_trace() ###################################################################### # Save Compiled Module # -------------------- from tvm.contrib import cc from tvm.contrib import utils fadd.save("deploy.o") cc.create_shared("deploy.so", ["deploy.o"])
def main(target, out_dir): n = te.var('n') A = te.placeholder((n, ), name='A') B = te.placeholder((n, ), name='B') C = te.compute(A.shape, lambda i: A[i] + B[i], name='C') s = te.create_schedule(C.op) if target == 'cuda': bx, tx = s[C].split(C.op.axis[0], factor=64) s[C].bind(bx, te.thread_axis('blockIdx.x')) s[C].bind(tx, te.thread_axis('threadIdx.x')) fadd = tvm.build(s, [A, B, C], target, target_host='llvm', name='myadd') fadd.save(osp.join(out_dir, 'test_add.o')) if target == 'cuda': fadd.imported_modules[0].save(osp.join(out_dir, 'test_add.ptx')) cc.create_shared(osp.join(out_dir, 'test_add.so'), [osp.join(out_dir, 'test_add.o')])
def load_module(path, fmt=""): """Load module from file. Parameters ---------- path : str The path to the module file. fmt : str, optional The format of the file, if not specified it will be inferred from suffix of the file. Returns ------- module : runtime.Module The loaded module Note ---- This function will automatically call cc.create_shared if the path is in format .o or .tar """ # High level handling for .o and .tar file. # We support this to be consistent with RPC module load. if path.endswith(".o"): # Extra dependencies during runtime. from tvm.contrib import cc as _cc _cc.create_shared(path + ".so", path) path += ".so" elif path.endswith(".tar"): # Extra dependencies during runtime. from tvm.contrib import cc as _cc, util as _util, tar as _tar tar_temp = _util.tempdir(custom_path=path.replace('.tar', '')) _tar.untar(path, tar_temp.temp_dir) files = [tar_temp.relpath(x) for x in tar_temp.listdir()] _cc.create_shared(path + ".so", files) path += ".so" # TODO(weberlo): we should probably use a more distinctive suffix for uTVM object files elif path.endswith(".obj"): fmt = "micro_dev" # Redirect to the load API return _ffi_api.ModuleLoadFromFile(path, fmt)
def test_add(target_dir): n = tvm.var("n") A = tvm.placeholder((n, ), name='A') B = tvm.placeholder((n, ), name='B') C = tvm.compute(A.shape, lambda i: A[i] + B[i], name="C") s = tvm.create_schedule(C.op) bx, tx = s[C].split(C.op.axis[0], factor=64) s[C].bind(bx, tvm.thread_axis("blockIdx.x")) s[C].bind(tx, tvm.thread_axis("threadIdx.x")) fadd_cuda = tvm.build(s, [A, B, C], "cuda", target_host="llvm", name="myadd") fadd_cuda.save(os.path.join(target_dir, "add_gpu.o")) fadd_cuda.imported_modules[0].save(os.path.join(target_dir, "add_gpu.ptx")) cc.create_shared(os.path.join(target_dir, "add_gpu.so"), [os.path.join(target_dir, "add_gpu.o")])
def test_add(target_dir): if not tvm.runtime.enabled("cuda"): print("skip %s because cuda is not enabled..." % __file__) return n = te.var("n") A = te.placeholder((n,), name="A") B = te.placeholder((n,), name="B") C = te.compute(A.shape, lambda i: A[i] + B[i], name="C") s = te.create_schedule(C.op) bx, tx = s[C].split(C.op.axis[0], factor=64) s[C].bind(bx, te.thread_axis("blockIdx.x")) s[C].bind(tx, te.thread_axis("threadIdx.x")) fadd_cuda = tvm.build(s, [A, B, C], "cuda", target_host="llvm", name="myadd") fadd_cuda.save(os.path.join(target_dir, "add_gpu.o")) fadd_cuda.imported_modules[0].save(os.path.join(target_dir, "add_gpu.ptx")) cc.create_shared( os.path.join(target_dir, "add_gpu.so"), [os.path.join(target_dir, "add_gpu.o")] )
def build(target_dir): """ Compiles resnet18 with TVM""" # download the pretrained resnet18 trained on imagenet1k dataset for # image classification task block = get_model('resnet18_v1', pretrained=True) sym, params = nnvm.frontend.from_mxnet(block) # add the softmax layer for prediction net = nnvm.sym.softmax(sym) # compile the model with nnvm.compiler.build_config(opt_level=opt_level): graph, lib, params = nnvm.compiler.build( net, target, shape={"data": data_shape}, params=params) # same the model artifacts lib.save(os.path.join(target_dir, "deploy_lib.o")) cc.create_shared(os.path.join(target_dir, "deploy_lib.so"), [os.path.join(target_dir, "deploy_lib.o")]) with open(os.path.join(target_dir, "deploy_graph.json"), "w") as fo: fo.write(graph.json()) with open(os.path.join(target_dir,"deploy_param.params"), "wb") as fo: fo.write(nnvm.compiler.save_param_dict(params)) # download an image and imagenet1k class labels for test img_name = 'cat.png' synset_url = ''.join(['https://gist.githubusercontent.com/zhreshold/', '4d0b62f3d01426887599d4f7ede23ee5/raw/', '596b27d23537e5a1b5751d2b0481ef172f58b539/', 'imagenet1000_clsid_to_human.txt']) synset_name = 'synset.txt' download('https://github.com/dmlc/mxnet.js/blob/master/data/cat.png?raw=true', img_name) download(synset_url, synset_name) with open(synset_name) as fin: synset = eval(fin.read()) with open("synset.csv", "w") as fout: w = csv.writer(fout) w.writerows(synset.items())
def build(target_dir): """ Compiles resnet18 with TVM""" deploy_lib = osp.join(target_dir, "deploy_lib.o") if osp.exists(deploy_lib): return if args.pretrained: # needs mxnet installed from mxnet.gluon.model_zoo.vision import get_model # if `--pretrained` is enabled, it downloads a pretrained # resnet18 trained on imagenet1k dataset for image classification task block = get_model("resnet18_v1", pretrained=True) net, params = relay.frontend.from_mxnet(block, {"data": data_shape}) # we want a probability so add a softmax operator func = net["main"] net = relay.Function(func.params, relay.nn.softmax(func.body), None, func.type_params, func.attrs) else: # use random weights from relay.testing net, params = relay.testing.resnet.get_workload( num_layers=18, batch_size=batch_size, image_shape=image_shape) # compile the model with tvm.transform.PassContext(opt_level=opt_level): graph, lib, params = relay.build_module.build(net, target, params=params) # save the model artifacts lib.save(deploy_lib) cc.create_shared(osp.join(target_dir, "deploy_lib.so"), [osp.join(target_dir, "deploy_lib.o")]) with open(osp.join(target_dir, "deploy_graph.json"), "w") as fo: fo.write(graph) with open(osp.join(target_dir, "deploy_param.params"), "wb") as fo: fo.write(relay.save_param_dict(params))
def simple_llvm_save_module(): n = tvm.var("n") A = tvm.placeholder((n, n), name='A', dtype="float32") B = tvm.placeholder((n, n), name='B', dtype="float32") C = tvm.compute(A.shape, lambda *i: A(*i) + B(*i), name='C') s = tvm.create_schedule(C.op) module = tvm.build(s, [A, B, C], "llvm", "llvm") temp = util.tempdir() module.save(temp.relpath("myadd.o")) cc.create_shared(temp.relpath("myadd.so"), [temp.relpath("myadd.o")]) ctx = tvm.context("llvm", 0) n = 1024 a = tvm.nd.array(np.random.uniform(size=(n, n)).astype(A.dtype), ctx) b = tvm.nd.array(np.random.uniform(size=(n, n)).astype(A.dtype), ctx) c = tvm.nd.array(np.random.uniform(size=(n, n)).astype(A.dtype), ctx) myadd = tvm.module.load(temp.relpath("myadd.so")) t0 = time.time() myadd(a, b, c) t1 = time.time() print("CPU time: %s" % (t1 - t0))
def check_system_lib(): ctx = tvm.cpu(0) if not tvm.module.enabled("llvm"): print("Skip because llvm is not enabled" ) return temp = util.tempdir() fadd1 = tvm.build(s, [A, B], "llvm -system-lib", name="myadd1") fadd2 = tvm.build(s, [A, B], "llvm -system-lib", name="myadd2") path1 = temp.relpath("myadd1.o") path2 = temp.relpath("myadd2.o") path_dso = temp.relpath("mylib.so") fadd1.save(path1) fadd2.save(path2) cc.create_shared(path_dso, [path1, path2]) # Load dll, will trigger system library registration dll = ctypes.CDLL(path_dso) # Load the system wide library mm = tvm.module.system_lib() a = tvm.nd.array(np.random.uniform(size=nn).astype(A.dtype), ctx) b = tvm.nd.array(np.zeros(nn, dtype=A.dtype), ctx) mm['myadd1'](a, b) np.testing.assert_equal(b.asnumpy(), a.asnumpy() + 1) mm['myadd2'](a, b) np.testing.assert_equal(b.asnumpy(), a.asnumpy() + 1)
def check_system_lib(): ctx = tvm.cpu(0) if not tvm.runtime.enabled("llvm"): print("Skip because llvm is not enabled" ) return temp = util.tempdir() fadd1 = tvm.build(s, [A, B], "llvm -system-lib", name="myadd1") fadd2 = tvm.build(s, [A, B], "llvm -system-lib", name="myadd2") path1 = temp.relpath("myadd1.o") path2 = temp.relpath("myadd2.o") path_dso = temp.relpath("mylib.so") fadd1.save(path1) fadd2.save(path2) cc.create_shared(path_dso, [path1, path2]) # Load dll, will trigger system library registration dll = ctypes.CDLL(path_dso) # Load the system wide library mm = tvm.runtime.system_lib() a = tvm.nd.array(np.random.uniform(size=nn).astype(A.dtype), ctx) b = tvm.nd.array(np.zeros(nn, dtype=A.dtype), ctx) mm['myadd1'](a, b) np.testing.assert_equal(b.asnumpy(), a.asnumpy() + 1) mm['myadd2'](a, b) np.testing.assert_equal(b.asnumpy(), a.asnumpy() + 1)
def check_llvm(): ctx = tvm.cpu(0) if not tvm.runtime.enabled("llvm"): print("Skip because llvm is not enabled" ) return temp = util.tempdir() fadd1 = tvm.build(s, [A, B], "llvm", name="myadd1") fadd2 = tvm.build(s, [A, B], "llvm", name="myadd2") path1 = temp.relpath("myadd1.o") path2 = temp.relpath("myadd2.o") path_dso = temp.relpath("mylib.so") fadd1.save(path1) fadd2.save(path2) # create shared library with multiple functions cc.create_shared(path_dso, [path1, path2]) m = tvm.runtime.load_module(path_dso) fadd1 = m['myadd1'] fadd2 = m['myadd2'] a = tvm.nd.array(np.random.uniform(size=nn).astype(A.dtype), ctx) b = tvm.nd.array(np.zeros(nn, dtype=A.dtype), ctx) fadd1(a, b) np.testing.assert_equal(b.asnumpy(), a.asnumpy() + 1) fadd2(a, b) np.testing.assert_equal(b.asnumpy(), a.asnumpy() + 1)
def check_llvm(): ctx = tvm.cpu(0) if not tvm.module.enabled("llvm"): print("Skip because llvm is not enabled" ) return temp = util.tempdir() fadd1 = tvm.build(s, [A, B], "llvm", name="myadd1") fadd2 = tvm.build(s, [A, B], "llvm", name="myadd2") path1 = temp.relpath("myadd1.o") path2 = temp.relpath("myadd2.o") path_dso = temp.relpath("mylib.so") fadd1.save(path1) fadd2.save(path2) # create shared library with multiple functions cc.create_shared(path_dso, [path1, path2]) m = tvm.module.load(path_dso) fadd1 = m['myadd1'] fadd2 = m['myadd2'] a = tvm.nd.array(np.random.uniform(size=nn).astype(A.dtype), ctx) b = tvm.nd.array(np.zeros(nn, dtype=A.dtype), ctx) fadd1(a, b) np.testing.assert_equal(b.asnumpy(), a.asnumpy() + 1) fadd2(a, b) np.testing.assert_equal(b.asnumpy(), a.asnumpy() + 1)
# file and load them back later. This is called ahead of time compilation. # # The following code first does the following step: # # - It saves the compiled host module into an object file. # - Then it saves the device module into a ptx file. # - cc.create_shared calls a env compiler(gcc) to create a shared library # from tvm.contrib import cc from tvm.contrib import util temp = util.tempdir() fadd.save(temp.relpath("myadd.o")) if tgt == "cuda": fadd.imported_modules[0].save(temp.relpath("myadd.ptx")) cc.create_shared(temp.relpath("myadd.so"), [temp.relpath("myadd.o")]) print(temp.listdir()) ###################################################################### # .. note:: Module Storage Format # # The CPU(host) module is directly saved as a shared library(so). # There can be multiple customed format on the device code. # In our example, device code is stored in ptx, as well as a meta # data json file. They can be loaded and linked seperatedly via import. # ###################################################################### # Load Compiled Module # -------------------- # We can load the compiled module from the file system and run the code.
tgt_host="llvm" tgt="llvm" ###################################################################### # Describe the Computation # ------------------------ n = tvm.var("n") A = tvm.placeholder((n,), name='A') B = tvm.placeholder((n,), name='B') C = tvm.compute(A.shape, lambda i: A[i] + B[i], name="C") ###################################################################### # Schedule the Computation # ------------------------ s = tvm.create_schedule(C.op) ###################################################################### # Compilation # ----------- fadd = tvm.build(s, [A, B, C], tgt, target_host=tgt_host, name="myadd") ###################################################################### # Save Compiled Module # -------------------- from tvm.contrib import cc from tvm.contrib import util fadd.save("deploy.o") cc.create_shared("deploy.so", ["deploy.o"])