def _launch_kernel_from_path(inputs, op_attrs, func_type, func_source, func_name): kernel_meta_path = get_kernel_meta_path() cuda_path = os.path.realpath(kernel_meta_path) if not os.path.isdir(cuda_path): os.makedirs(cuda_path, exist_ok=True) if not func_name: raise ValueError("Can't find name of function") if not func_source: raise ValueError("Can't find source of function: {}".format( str(func_name))) op_imply_path = os.path.realpath(kernel_meta_path + func_name + ".py") if os.path.exists(op_imply_path): os.remove(op_imply_path) try: with open(op_imply_path, 'at') as file: fcntl.flock(file.fileno(), fcntl.LOCK_EX) file.seek(0, 2) if file.tell() == 0: file.write(func_source) os.chmod(op_imply_path, 0o400) except Exception: logging.error(traceback.format_exc()) return None custom_mod_name = Path(op_imply_path).resolve().stem mod_spec = importlib.util.spec_from_file_location(custom_mod_name, op_imply_path) custom_mod = importlib.util.module_from_spec(mod_spec) mod_spec.loader.exec_module(custom_mod) func_kernel = getattr(custom_mod, func_name, None) if func_kernel is None: raise ValueError( "Can't find the following function under module {}: {}".format( custom_mod_name, func_name)) if "__wrapped__" in func_kernel.__dict__: func_kernel = func_kernel.__dict__["__wrapped__"] func_kernel.__globals__["tvm"] = globals()["tvm"] if func_type == "ir_builder": return func_kernel(inputs, op_attrs) else: inputs = list(inputs) return func_kernel(*inputs, **op_attrs)
def dump_cpu_meta(mod, kernel_name): """ Function for dumping cpu meta. Args: mod: the module code of cpu. """ title_dict = dict() # kernel name code = mod.get_source() title_dict["kernelName"] = kernel_name + "_kernel" #thread number thread_num = "null" title_dict["threadNumber"] = thread_num #meta path path_name = get_kernel_meta_path() meta_path = os.path.realpath(path_name) if not os.path.isdir(meta_path): os.makedirs(meta_path, exist_ok=True) # save libraries to kernel meta obj_file = os.path.join(meta_path, kernel_name + '.o') lib_file = os.path.join(meta_path, kernel_name + '.so') mod.save(obj_file, 'k') mod.export_library(lib_file) # sha256 of files obj_sha256 = hashlib.sha256() lib_sha256 = hashlib.sha256() with open(obj_file, 'rb') as f: obj_sha256.update(f.read()) with open(lib_file, 'rb') as f: lib_sha256.update(f.read()) obj_hash_str = obj_sha256.hexdigest() lib_hash_str = lib_sha256.hexdigest() title_dict["objSha256"] = obj_hash_str title_dict["sha256"] = lib_hash_str # save json file to kernel meta json_file = os.path.join(meta_path, kernel_name + ".json") write_code(title_dict, json_file)
def _update_workspace_data(kernel_name, input_for_mod, output_indexes): """Update workspace tensors.""" workspace_tensors = [] json_file = get_kernel_meta_path() + kernel_name + ".json" if os.path.isfile(json_file): with open(json_file, 'r') as f: kernel_json = f.read() kernel_desc = json.loads(kernel_json) if "workspace" in kernel_desc: workspace_bytes = kernel_desc["workspace"]["size"] item = np.full(workspace_bytes, np.nan, np.int8) workspace_tensors.append(item) else: logging.warning("Kernel json file %s not found", json_file) # Add workspace tensors to input_for_mod if len(workspace_tensors) > 0: # workspace tensors are placed after inputs and outputs, so index in output_indexes should # be converted to positive number first, otherwise -1 will point to the last workspace tensor # instead of the last output tensor. output_indexes = [i if i > 0 else i + len(input_for_mod) for i in output_indexes] input_for_mod.extend(workspace_tensors) return output_indexes
def _op_build_cuda(opnames, computes, args, device, kernel_name): kernel_meta_path = get_kernel_meta_path() cuda_path = os.path.realpath(kernel_meta_path) if not os.path.isdir(cuda_path): os.makedirs(cuda_path, exist_ok=True) if not opnames: logging.error("no opname given.") return None schedule_name = 'gpu_schedule_' + opnames[0] schedule_func = getattr(akg.ops.array.gpu, schedule_name) if not isinstance(schedule_func, (types.FunctionType, typing.Callable)): logging.error("no schedule func found %s", str(schedule_name)) return None ptx_file = os.path.realpath(kernel_meta_path + kernel_name + ".ptx") if os.path.exists(ptx_file): os.remove(ptx_file) try: with open(ptx_file, 'at') as file: fcntl.flock(file.fileno(), fcntl.LOCK_EX) file.seek(0, 2) if file.tell() == 0: s = schedule_func(computes) foo = akg.tvm.build(s, args, device, name=kernel_name) ptx_code = foo.imported_modules[0].get_source("ptx") file.write(ptx_code) json_file = os.path.realpath(kernel_meta_path + kernel_name + ".json") kernel_info = (ptx_code, json_file, kernel_name) gpu_utils.save_gpu_params(s, args, kernel_info) os.chmod(ptx_file, 0o400) except Exception: logging.error(traceback.format_exc()) return None return True
def dump_cuda_meta(code, ptx, thread_info, workspace=None): """ Function for dumping cuda meta. Args: code: gpu code. ptx: ptx code. thread_info: thread info, written to json file. workspace: workspace info, which will be allocated in global memory. """ title_dict = dict() # kernel name kernel_name = code.split("_kernel")[0].split(" ")[-1] title_dict["kernelName"] = kernel_name + "_kernel0" # sha256 of ptx sha256 = hashlib.sha256() sha256.update(ptx.encode("utf-8")) hash_str = sha256.hexdigest() title_dict["sha256"] = hash_str # thread info thread_info_dict = { "blockIdx.x": 1, "blockIdx.y": 1, "blockIdx.z": 1, "threadIdx.x": 1, "threadIdx.y": 1, "threadIdx.z": 1 } for thread_tag in thread_info_dict.keys(): if thread_tag in thread_info: if isinstance(thread_info[thread_tag], int): thread_info_dict[thread_tag] = thread_info[thread_tag] elif isinstance(thread_info[thread_tag], akg.tvm.expr.IntImm): thread_info_dict[thread_tag] = thread_info[thread_tag].value title_dict.update(thread_info_dict) # workspace workspace_dict = parse_workspace(workspace) if workspace_dict is not None: title_dict["workspace"] = workspace_dict meta_path = get_kernel_meta_path() cuda_path = os.path.realpath(meta_path) if not os.path.isdir(cuda_path): os.makedirs(cuda_path, exist_ok=True) # save ptx file to cuda meta ptx_file = os.path.realpath(meta_path + kernel_name + ".ptx") if os.path.exists(ptx_file): os.remove(ptx_file) with open(ptx_file, "at") as f: fcntl.flock(f.fileno(), fcntl.LOCK_EX) f.seek(0, 2) if f.tell() == 0: f.write(ptx) # modify the file permisson to 400 os.chmod(ptx_file, 0o400) # save json file to cuda meta json_file = os.path.realpath(meta_path + kernel_name + ".json") write_code(title_dict, json_file)
def launch(kernel, args, output=(-1, )): """ simulated run CCE kernel by aic model. Args: kernel (str): str of kernel name, or CCE Module. args (Union[list, tuple]): list or tuple of numpy array. output (Union[list, tuple]): list or tuple of output argment index. Returns: output numpy array, or tuple of numpy array if multi-output. """ def _check_exists(value, error_msg): if not value: raise RuntimeError(error_msg) def _mkdir(path): if not os.path.exists(path): os.mkdir(path) def _rmdir(path): if os.path.exists(path): os.remove(path) if isinstance(kernel, akg.tvm.module.Module): code = kernel.imported_modules[0].get_source() kernel_name = code.split("_kernel")[0].split(" ")[-1] else: kernel_name = kernel hbm_addr = 0x4000000 hbm_unit = 0x1000000 aic_model_path = os.getenv('AIC_MODEL_PATH') _check_exists( aic_model_path, "AIC_MODEL_PATH environment variable is not set. Please set it to the dir of model_exe" ) aic_model_path = os.path.realpath(aic_model_path) # spec : target chip specification. spec_name = os.getenv('AIC_MODEL_SPEC_NAME') _check_exists( spec_name, "AIC_MODEL_SPEC_NAME environment variable is not set. Please set it to the name of spec(" "It should be xxx.spec and the xxx.spec file is under the AIC_MODEL_PATH directory)" ) aic_out_path = os.path.realpath("aic_out") _mkdir(aic_out_path) calog_path = aic_out_path + "/calog" _mkdir(calog_path) model_path = aic_out_path + "/model" if not os.path.exists(model_path): subprocess.call(["ln", "-s", aic_model_path + "/model", model_path]) kernel_meta_path = get_kernel_meta_path() kernel_meta_realpath = os.path.realpath(kernel_meta_path) _check_exists( kernel_meta_realpath, "The parameter kernel_meta_realpath can not be found, please check") o_name = kernel_meta_realpath + "/" + kernel_name + ".o" bin_name = aic_out_path + "/kernel.bin" subprocess.call([ "aicore-elf-objcopy", "-O", "binary", "-j", ".text", o_name, bin_name ]) load_dict = {} with open("%s/%s.json" % (kernel_meta_realpath, kernel_name), "r") as f: load_dict = json.load(f) arg_info = [] desc = { "args": arg_info, "para_addr": hbm_addr, "bin_addr": hbm_addr + 0x100000, "bin": "kernel.bin", "block": load_dict["blockDim"], "spec": aic_model_path + '/' + spec_name, "path": aic_out_path } hbm_addr += hbm_unit for i, arg in enumerate(args): bin_name = "a_%d.bin" % (i) arg.tofile(os.path.join(aic_out_path, bin_name)) info = { "bin": bin_name, "size": arg.size * arg.dtype.itemsize, "addr": hbm_addr, "out": False } arg_info.append(info) need_size = arg.size if need_size % hbm_unit: need_size += hbm_unit - (need_size % hbm_unit) hbm_addr += need_size for i in output: arg_info[len(arg_info) + i if i < 0 else i]['out'] = True config_path = aic_out_path + "/config.toml" _rmdir(config_path) with os.fdopen(os.open(config_path, os.O_WRONLY | os.O_CREAT, 0o400), 'w') as f: f.write('title="Sim Config"\n') f.write('log_open_value=0xffffffff\n') f.write('chip_version=1\n') f.write('block_dim=%d\n' % (desc['block'])) f.write('specPathName="%s"\n' % (desc["spec"])) f.write('path="%s/"\n' % (desc["path"])) f.write('hbm_para_addr=0x%x\n' % (desc["para_addr"])) f.write('[BIN]\n') f.write('name="%s"\n' % (desc['bin'])) f.write('addr=0x%x\n' % (desc['bin_addr'])) for arg in arg_info: f.write('[[output_para_array]]\n' if arg['out'] else '[[input_para_array]]\n') f.write('name="%s"\n' % (arg['bin'])) f.write('addr=0x%x\n' % (arg['addr'])) f.write('valid=1\n') if arg['out']: f.write('size=0x%x\n' % (arg['size'])) run_path = aic_out_path + "/run.sh" _rmdir(run_path) with os.fdopen(os.open(run_path, os.O_WRONLY | os.O_CREAT, 0o500), 'w') as f: f.write("cd " + aic_out_path + "\n") f.write("export DVCSPEC_DIR=" + aic_model_path + "\n") f.write(aic_model_path + "/v100_ca_tag_master --gtest_filter=test_st_case.test_st_ca\n") subprocess.call(["sh", aic_out_path + "/run.sh"]) out_list = [] for i, arg_ in enumerate(args): if arg_info[i]['out']: out_data = np.fromfile( os.path.join(aic_out_path, arg_info[i]['bin']), arg_.dtype) # strip unneeded data copied back by aic model if out_data.size > args[i].size: out_data = out_data[0:arg_.size] out_arg = out_data.reshape(arg_.shape) out_list.append(out_arg) return out_list[0] if len(out_list) == 1 else tuple(out_list)
def tvm_callback_cce_postproc(code, block_dim=1, workspace=None): """Function for dumping ascend meta.""" if "__aicore__" in code: title_dict = {"magic": "RT_DEV_BINARY_MAGIC_ELF"} else: logging.warning("__aicore__ not found in code.") title_dict = dict() # kernel name kernel_name = code.split("_kernel")[0].split(" ")[-1] title_dict["kernelName"] = kernel_name + "_kernel0" # thread info title_dict["blockDim"] = block_dim # bin file info bin_file_suffix = ".o" title_dict["binFileSuffix"] = bin_file_suffix bin_file_name = kernel_name title_dict["binFileName"] = bin_file_name # sha256 buf_size = 64 * 1024 # once read 64kb root_path = get_kernel_meta_path() kernel_file_name = root_path + bin_file_name + bin_file_suffix sha256 = hashlib.sha256() with open(kernel_file_name, 'rb') as kf: while True: data = kf.read(buf_size) if not data: break sha256.update(data) title_dict["sha256"] = sha256.hexdigest() # workspace workspace_dict = parse_workspace(workspace) if workspace_dict is not None: title_dict["workspace"] = workspace_dict load_dict = {} if not os.path.exists(get_kernel_meta_path()): try: os.mkdir(root_path) except OSError as err: # 17, OSError: [Errno 17] File exists if err.errno == 17: pass else: raise err else: fname = root_path + kernel_name + "wk.json" if os.path.exists(fname): with open(fname, "r") as f: load_dict = json.load(f) os.remove(fname) final_dict = title_dict.copy() final_dict.update(load_dict) json_file = root_path + kernel_name + ".json" write_code(final_dict, json_file) return code