def codegen(self, func): """Compile a single function into a graph. Parameters ---------- func: tvm.relay.Expr The function to compile. Returns ------- graph_json : str The graph json that can be consumed by runtime. mod : IRModule or Dict[str, IRModule] The lowered functions. params : Dict[str, tvm.nd.NDArray] Additional constant parameters. """ self._codegen(func) graph_json = self._get_graph_json() lowered_func = self._get_irmodule() param_names = self._list_params_name() params = {} for key in param_names: arr = self._get_param_by_name(key) param = empty(arr.shape, dtype=arr.dtype, device=arr.device) arr.copyto(param) params[key] = param return graph_json, lowered_func, params
def timed_func(): tic = time.time() error_no = 0 error_msg = None try: # upload built module remote = request_remote(key, host, port, priority, timeout) remote.upload(build_res.filename) func = remote.load_module(os.path.split(build_res.filename)[1]) ctx = remote.context(str(inp.task.target), 0) # Limitation: # We can not get PackFunction directly in the remote mode as it is wrapped # under the std::function. We could lift the restriction later once we fold # the PackedFunc as an object. Currently, we pass function name to work # around it. f_prepare = 'cache_flush_cpu_non_first_arg' if enable_cpu_cache_flush else '' time_f = func.time_evaluator( func.entry_name, ctx, number=number, repeat=repeat, min_repeat_ms=min_repeat_ms, f_preproc=f_prepare) # pylint: disable=broad-except except Exception: costs = (max_float,) error_no = MeasureErrorNo.COMPILE_DEVICE error_msg = make_error_msg() if error_no == 0: try: args = [ndarray.empty(get_const_tuple(x.shape), x.dtype, ctx) for x in build_res.args] try: random_fill = remote.get_function("tvm.contrib.random.random_fill") except AttributeError: raise AttributeError("Please make sure USE_RANDOM is ON in the config.cmake " "on the remote devices") for arg in args: random_fill(arg) ctx.sync() costs = time_f(*args).results # clean up remote files remote.remove(build_res.filename) remote.remove(os.path.splitext(build_res.filename)[0] + '.so') remote.remove('') # pylint: disable=broad-except except Exception: costs = (max_float,) error_no = MeasureErrorNo.RUNTIME_DEVICE error_msg = make_error_msg() shutil.rmtree(os.path.dirname(build_res.filename)) toc = time.time() time.sleep(cooldown_interval) if verbose >= 1: if error_no == MeasureErrorNo.NO_ERROR: print("*", end="") else: print("*E", end="") # Run error return costs, error_no, error_msg, toc - tic + build_res.time_cost, toc
def codegen(self, func): """Compile a single function into a graph. Parameters ---------- func: tvm.relay.Expr The function to compile. Returns ------- graph_json : str The graph json that can be consumed by runtime. lowered_funcs : List[tvm.LoweredFunc] or Dict[str, List[tvm.LoweredFunc]] The lowered functions. params : Dict[str, tvm.nd.NDArray] Additional constant parameters. """ self._codegen(func) graph_json = self._get_graph_json() lowered_func = self._get_lowered_funcs() param_names = self._list_params_name() params = {} for name in param_names: key = name.value arr = self._get_param_by_name(key) param = empty(arr.shape, dtype=arr.dtype, ctx=arr.ctx) arr.copyto(param) params[key] = param return graph_json, lowered_func, params
def timed_func(inp, build_res): tic = time.time() error_no = 0 error_msg = None try: func = module.load_module(build_res.filename) ctx = ndarray.context(str(inp.task.target), 0) # Limitation: # We can not get PackFunction directly in the remote mode as it is wrapped # under the std::function. We could lift the restriction later once we fold # the PackedFunc as an object. Currently, we pass function name to work # around it. f_prepare = "cache_flush_cpu_non_first_arg" if enable_cpu_cache_flush else "" time_f = func.time_evaluator( func.entry_name, ctx, number=number, repeat=repeat, min_repeat_ms=min_repeat_ms, f_preproc=f_prepare, ) # pylint: disable=broad-except except Exception: costs = (max_float, ) error_no = MeasureErrorNo.COMPILE_DEVICE error_msg = make_error_msg() if error_no == 0: try: args = [ ndarray.empty(get_const_tuple(x.shape), x.dtype, ctx) for x in build_res.args ] random_fill = tvm.get_global_func( "tvm.contrib.random.random_fill", True) assert random_fill, "Please make sure USE_RANDOM is ON in the config.cmake" for arg in args: random_fill(arg) ctx.sync() costs = time_f(*args).results # pylint: disable=broad-except except Exception: costs = (max_float, ) error_no = MeasureErrorNo.RUNTIME_DEVICE error_msg = make_error_msg() shutil.rmtree(os.path.dirname(build_res.filename)) toc = time.time() time.sleep(cooldown_interval) if verbose >= 1: if error_no == MeasureErrorNo.NO_ERROR: print("*", end="") else: print("*E", end="") # Run error return costs, error_no, error_msg, toc - tic + build_res.time_cost, toc
def timed_func(): tic = time.time() error_no = 0 error_msg = None try: # upload built module remote = request_remote(key, host, port, priority, timeout) remote.upload(build_res.filename) func = remote.load_module(os.path.split(build_res.filename)[1]) ctx = remote.context(str(inp.task.target), 0) # TODO(FrozenGene): Add cpu cache flush to this function. time_f = func.time_evaluator(func.entry_name, ctx, number=number, repeat=repeat, min_repeat_ms=min_repeat_ms) # pylint: disable=broad-except except Exception: costs = (max_float, ) error_no = MeasureErrorNo.COMPILE_DEVICE error_msg = make_error_msg() if error_no == 0: try: # TODO(FrozenGene): Update to ndarray.non-empty. args = [ ndarray.empty(get_const_tuple(x.shape), x.dtype, ctx) for x in build_res.args ] ctx.sync() costs = time_f(*args).results # clean up remote files remote.remove(build_res.filename) remote.remove(os.path.splitext(build_res.filename)[0] + '.so') remote.remove('') # pylint: disable=broad-except except Exception: costs = (max_float, ) error_no = MeasureErrorNo.RUNTIME_DEVICE error_msg = make_error_msg() shutil.rmtree(os.path.dirname(build_res.filename)) toc = time.time() time.sleep(cooldown_interval) if verbose >= 1: if error_no == MeasureErrorNo.NO_ERROR: print("*", end="") else: print("*E", end="") # Run error return costs, error_no, error_msg, toc - tic + build_res.time_cost, toc
def timed_func(inp, build_res): tic = time.time() error_no = 0 error_msg = None try: func = module.load_module(build_res.filename) ctx = ndarray.context(str(inp.task.target), 0) time_f = func.time_evaluator(func.entry_name, ctx, number=number, repeat=repeat, min_repeat_ms=min_repeat_ms) # pylint: disable=broad-except except Exception: costs = (max_float, ) error_no = MeasureErrorNo.COMPILE_DEVICE error_msg = make_error_msg() if error_no == 0: try: args = [ ndarray.empty(get_const_tuple(x.shape), x.dtype, ctx) for x in build_res.args ] ctx.sync() costs = time_f(*args).results # pylint: disable=broad-except except Exception: costs = (max_float, ) error_no = MeasureErrorNo.RUNTIME_DEVICE error_msg = make_error_msg() shutil.rmtree(os.path.dirname(build_res.filename)) toc = time.time() time.sleep(cooldown_interval) if verbose >= 1: if error_no == MeasureErrorNo.NO_ERROR: print("*", end="") else: print("*E", end="") # Run error return costs, error_no, error_msg, toc - tic + build_res.time_cost, toc
def _timed_eval_func( inp_serialized, build_res, number, repeat, min_repeat_ms, cooldown_interval, enable_cpu_cache_flush, verbose, ): # pylint: disable=import-outside-toplevel from .search_task import get_task_input_buffer # lazily import to avoid recursive dependency inp = MeasureInput.deserialize(inp_serialized) task_input_names = inp.task.task_input_names tic = time.time() error_no = 0 error_msg = None try: func = module.load_module(build_res.filename) dev = ndarray.device(str(inp.task.target), 0) # Limitation: # We can not get PackFunction directly in the remote mode as it is wrapped # under the std::function. We could lift the restriction later once we fold # the PackedFunc as an object. Currently, we pass function name to work # around it. f_prepare = "cache_flush_cpu_non_first_arg" if enable_cpu_cache_flush else "" time_f = func.time_evaluator( func.entry_name, dev, number=number, repeat=repeat, min_repeat_ms=min_repeat_ms, f_preproc=f_prepare, ) # pylint: disable=broad-except except Exception: costs = (MAX_FLOAT, ) error_no = MeasureErrorNo.COMPILE_DEVICE error_msg = make_traceback_info() if error_no == 0: try: random_fill = tvm.get_global_func("tvm.contrib.random.random_fill", True) assert random_fill, "Please make sure USE_RANDOM is ON in the config.cmake" tensor_input_map = prepare_input_map( build_res.args) if task_input_names else {} args = [] task_inputs_count = 0 for arg in build_res.args: if arg in tensor_input_map: tensor_name = tensor_input_map[arg] if tensor_name in task_input_names: args.append( ndarray.array( get_task_input_buffer(inp.task.workload_key, tensor_name), dev)) task_inputs_count += 1 else: raise ValueError( "%s not found in task_inputs, " % (tensor_name) + "should provide with `SearchTask(..., task_inputs={...})`" ) else: empty_array = ndarray.empty(get_const_tuple(arg.shape), arg.dtype, dev) random_fill(empty_array) args.append(empty_array) if task_inputs_count != len(task_input_names): logger.warning( "task_inputs not fully matched, check if there's any unexpected error" ) dev.sync() costs = time_f(*args).results # pylint: disable=broad-except except Exception: costs = (MAX_FLOAT, ) error_no = MeasureErrorNo.RUNTIME_DEVICE error_msg = make_traceback_info() shutil.rmtree(os.path.dirname(build_res.filename)) toc = time.time() time.sleep(cooldown_interval) if verbose >= 1: if error_no == MeasureErrorNo.NO_ERROR: print("*", end="", flush=True) else: print("*E", end="", flush=True) # Run error return costs, error_no, error_msg, toc - tic + build_res.time_cost, toc
def _timed_eval_func( inp_serialized, build_res, args, number, repeat, min_repeat_ms, cooldown_interval, enable_cpu_cache_flush, verbose, ): inp = MeasureInput.deserialize(inp_serialized) tic = time.time() error_no = 0 error_msg = None try: func = module.load_module(build_res.filename) dev = ndarray.device(str(inp.task.target), 0) # Limitation: # We can not get PackFunction directly in the remote mode as it is wrapped # under the std::function. We could lift the restriction later once we fold # the PackedFunc as an object. Currently, we pass function name to work # around it. f_prepare = "cache_flush_cpu_non_first_arg" if enable_cpu_cache_flush else "" time_f = func.time_evaluator( func.entry_name, dev, number=number, repeat=repeat, min_repeat_ms=min_repeat_ms, f_preproc=f_prepare, ) # pylint: disable=broad-except except Exception: costs = (MAX_FLOAT, ) error_no = MeasureErrorNo.COMPILE_DEVICE error_msg = make_traceback_info() if error_no == 0: try: random_fill = tvm.get_global_func("tvm.contrib.random.random_fill", True) assert random_fill, "Please make sure USE_RANDOM is ON in the config.cmake" assert len(args) == len(build_res.args) # pylint: disable=consider-using-enumerate for idx in range(len(args)): if args[idx] is None: build_res_arg = build_res.args[idx] empty_array = ndarray.empty( get_const_tuple(build_res_arg.shape), build_res_arg.dtype, dev) random_fill(empty_array) args[idx] = empty_array else: args[idx] = ndarray.array(args[idx], dev) dev.sync() costs = time_f(*args).results # pylint: disable=broad-except except Exception: costs = (MAX_FLOAT, ) error_no = MeasureErrorNo.RUNTIME_DEVICE error_msg = make_traceback_info() shutil.rmtree(os.path.dirname(build_res.filename)) toc = time.time() time.sleep(cooldown_interval) if verbose >= 1: if error_no == MeasureErrorNo.NO_ERROR: print("*", end="", flush=True) else: print("*E", end="", flush=True) # Run error return costs, error_no, error_msg, toc - tic + build_res.time_cost, toc
def _rpc_run( inp_serialized, build_res, args, key, host, port, priority, timeout, number, repeat, min_repeat_ms, cooldown_interval, enable_cpu_cache_flush, verbose, ): inp = MeasureInput.deserialize(inp_serialized) tic = time.time() error_no = 0 error_msg = None try: # upload built module remote = request_remote(key, host, port, priority, timeout) remote.upload(build_res.filename) func = remote.load_module(os.path.split(build_res.filename)[1]) dev = remote.device(str(inp.task.target), 0) # Limitation: # We can not get PackFunction directly in the remote mode as it is wrapped # under the std::function. We could lift the restriction later once we fold # the PackedFunc as an object. Currently, we pass function name to work # around it. f_prepare = "cache_flush_cpu_non_first_arg" if enable_cpu_cache_flush else "" time_f = func.time_evaluator( func.entry_name, dev, number=number, repeat=repeat, min_repeat_ms=min_repeat_ms, f_preproc=f_prepare, ) # pylint: disable=broad-except except Exception: costs = (MAX_FLOAT, ) error_no = MeasureErrorNo.COMPILE_DEVICE error_msg = make_traceback_info() if error_no == 0: try: stream = dev.create_raw_stream() dev.set_raw_stream(stream) random_fill = remote.get_function("tvm.contrib.random.random_fill") assert ( random_fill ), "Please make sure USE_RANDOM is ON in the config.cmake on the remote devices" assert len(args) == len(build_res.args) # pylint: disable=consider-using-enumerate for idx in range(len(args)): if args[idx] is None: build_res_arg = build_res.args[idx] empty_array = ndarray.empty( get_const_tuple(build_res_arg.shape), build_res_arg.dtype, dev) random_fill(empty_array) args[idx] = empty_array else: args[idx] = ndarray.array(args[idx], dev) dev.sync() # First run for check that the kernel is correct func.entry_func(*args) dev.sync() costs = time_f(*args).results # clean up remote files remote.remove(build_res.filename) remote.remove(os.path.splitext(build_res.filename)[0] + ".so") remote.remove("") dev.free_raw_stream(stream) # pylint: disable=broad-except except Exception: dev.free_raw_stream(stream) costs = (MAX_FLOAT, ) error_no = MeasureErrorNo.RUNTIME_DEVICE error_msg = make_traceback_info() shutil.rmtree(os.path.dirname(build_res.filename)) toc = time.time() time.sleep(cooldown_interval) if verbose >= 1: if error_no == MeasureErrorNo.NO_ERROR: print("*", end="") else: print("*E", end="") # Run error return costs, error_no, error_msg, toc - tic + build_res.time_cost, toc