def test_popen_pool_executor(): import tvm pool = PopenPoolExecutor(max_workers=2, timeout=0.01) value1 = pool.submit(identity_after, 1, 100) value2 = pool.submit(terminate_self) value3 = pool.submit(identity_after, 3, 0) value4 = pool.submit(tvm.runtime.String, "xyz") with pytest.raises(TimeoutError): value1.result() with pytest.raises(ChildProcessError): value2.result() assert value3.result() == 3 value = value4.result() assert isinstance(value, tvm.runtime.String) assert value == "xyz" pool = PopenPoolExecutor(max_workers=4, timeout=None) values = pool.map_with_error_catching(lambda x: x, range(100)) for idx, val in enumerate(values): assert val.value == idx
def test_popen_pool_executor_recycles(): pool = PopenPoolExecutor(max_workers=1, timeout=None, maximum_process_uses=2) initial_pid = pool.submit(os.getpid).result() assert initial_pid == pool.submit(os.getpid).result() assert initial_pid != pool.submit(os.getpid).result()
def test_popen_pool_executor_timeout(): timeout = 0.5 pool = PopenPoolExecutor(timeout=timeout) f1 = pool.submit(timeout_job, timeout) while not f1.done(): pass try: res = f1.result() except Exception as ex: assert isinstance(ex, TimeoutError)
def test_popen_pool_executor_async(): pool = PopenPoolExecutor() f1 = pool.submit(slow_summation, 9999999) f2 = pool.submit(fast_summation, 9999999) t1 = 0 t2 = 0 while True: if t1 == 0 and f1.done(): t1 = time.time() if t2 == 0 and f2.done(): t2 = time.time() if t1 != 0 and t2 != 0: break assert t2 < t1, "Expected fast async job to finish first!" assert f1.result() == f2.result()
class RPCRunner(PyRunner): """RPC based runner Parameters ---------- rpc_config: RPCConfig The rpc configuration. evaluator_config: EvaluatorConfig The evaluator configuration. cooldown_sec: float The cooldown in seconds. TODO(@junrushao1994,@zxybazh): This is not used yet. alloc_repeat: int The number of times to repeat the allocation. f_create_session: Optional[str, Callable] The function name to create the session or the function itself. f_upload_module: Optional[str, Callable] The function name to upload the module or the function itself. f_alloc_argument: Optional[str, Callable] The function name to allocate the arguments or the function itself. f_run_evaluator: Optional[str, Callable] The function name to run the evaluator or the function itself. f_cleanup: Optional[str, Callable] The function name to cleanup the session or the function itself. pool: PopenPoolExecutor The popen pool executor. Attributes ---------- T_CREATE_SESSION : typing._GenericAlias The signature of the function `f_create_session`, which is: .. code-block:: python def default_create_session(rpc_config: RPCConfig) -> RPCSession: ... T_UPLOAD_MODULE : typing._GenericAlias The signature of the function `f_upload_module`, which is: .. code-block:: python def default_upload_module( session: RPCSession, local_path: str, remote_path: str, ) -> Module: ... T_ALLOC_ARGUMENT : typing._GenericAlias The signature of the function `f_alloc_argument`, which is: .. code-block:: python def default_alloc_argument( session: RPCSession, device: Device, args_info: T_ARG_INFO_JSON_OBJ_LIST, alloc_repeat: int, ) -> List[T_ARGUMENT_LIST]: ... T_RUN_EVALUATOR : typing._GenericAlias The signature of the function `f_run_evaluator`, which is: .. code-block:: python def default_run_evaluator( session: RPCSession, rt_mod: Module, device: Device, evaluator_config: EvaluatorConfig, repeated_args: List[T_ARGUMENT_LIST], ) -> List[float]: ... T_CLEANUP : typing._GenericAlias The signature of the function `f_cleanup`, which is: .. code-block:: python def default_cleanup( session: Optional[RPCSession], remote_path: Optional[str], ) -> None: ... """ rpc_config: RPCConfig evaluator_config: EvaluatorConfig cooldown_sec: float alloc_repeat: int f_create_session: Union[T_CREATE_SESSION, str, None] f_upload_module: Union[T_UPLOAD_MODULE, str, None] f_alloc_argument: Union[T_ALLOC_ARGUMENT, str, None] f_run_evaluator: Union[T_RUN_EVALUATOR, str, None] f_cleanup: Union[T_CLEANUP, str, None] pool: PopenPoolExecutor def __init__( self, rpc_config: Optional[RPCConfig] = None, evaluator_config: Optional[EvaluatorConfig] = None, cooldown_sec: float = 0.0, alloc_repeat: int = 1, f_create_session: Union[T_CREATE_SESSION, str, None] = None, f_upload_module: Union[T_UPLOAD_MODULE, str, None] = None, f_alloc_argument: Union[T_ALLOC_ARGUMENT, str, None] = None, f_run_evaluator: Union[T_RUN_EVALUATOR, str, None] = None, f_cleanup: Union[T_CLEANUP, str, None] = None, max_workers: Optional[int] = None, initializer: Optional[Callable[[], None]] = None, ) -> None: """Constructor Parameters ---------- rpc_config: RPCConfig The rpc configuration. evaluator_config: EvaluatorConfig The evaluator configuration. cooldown_sec: float The cooldown in seconds. alloc_repeat: int The number of times to random fill the allocation. f_create_session: Union[T_CREATE_SESSION, str, None] The function name to create the session or the function itself. f_upload_module: Union[T_UPLOAD_MODULE, str, None] The function name to upload the module or the function itself. f_alloc_argument: Union[T_ALLOC_ARGUMENT, str, None] The function name to allocate the arguments or the function itself. f_run_evaluator: Union[T_RUN_EVALUATOR, str, None] The function name to run the evaluator or the function itself. f_cleanup: Union[T_CLEANUP, str, None] The function name to cleanup the session or the function itself. max_workers: Optional[int] = None The maximum number of connections. Defaults to number of logical CPU cores. initializer: Optional[Callable[[], None]] The initializer function. """ super().__init__() self.rpc_config = RPCConfig._normalized(rpc_config) self.evaluator_config = EvaluatorConfig._normalized(evaluator_config) self.cooldown_sec = cooldown_sec self.alloc_repeat = alloc_repeat self.f_create_session = f_create_session self.f_upload_module = f_upload_module self.f_alloc_argument = f_alloc_argument self.f_run_evaluator = f_run_evaluator self.f_cleanup = f_cleanup if max_workers is None: max_workers = cpu_count(logical=True) logger.info("RPCRunner: max_workers = %d", max_workers) self.pool = PopenPoolExecutor( max_workers=max_workers, timeout=rpc_config.session_timeout_sec, initializer=initializer, ) self._sanity_check() def run(self, runner_inputs: List[RunnerInput]) -> List[RunnerFuture]: results: List[RunnerFuture] = [] for runner_input in runner_inputs: future = RPCRunnerFuture( future=self.pool.submit( _worker_func, self.f_create_session, self.f_upload_module, self.f_alloc_argument, self.f_run_evaluator, self.f_cleanup, self.rpc_config, self.evaluator_config, self.alloc_repeat, str(runner_input.artifact_path), str(runner_input.device_type), tuple(arg_info.as_json() for arg_info in runner_input.args_info), ), timeout_sec=self.rpc_config.session_timeout_sec, ) results.append(future) # type: ignore return results def _sanity_check(self) -> None: def _check( f_create_session, f_upload_module, f_alloc_argument, f_run_evaluator, f_cleanup, ) -> None: get_global_func_with_default_on_worker(name=f_create_session, default=None) get_global_func_with_default_on_worker(name=f_upload_module, default=None) get_global_func_with_default_on_worker(name=f_alloc_argument, default=None) get_global_func_with_default_on_worker(name=f_run_evaluator, default=None) get_global_func_with_default_on_worker(name=f_cleanup, default=None) value = self.pool.submit( _check, self.f_create_session, self.f_upload_module, self.f_alloc_argument, self.f_run_evaluator, self.f_cleanup, ) value.result()
class LocalBuilder(Builder): """Run compilation on local machine Parameters ---------- timeout: float The timeout of a compilation n_parallel: int The number of tasks run in parallel. "None" will use all cpu cores build_kwargs: dict If supplied, additional kwargs passed to build_func. Overrides any build_kwargs supplied by the Runner. build_func: callable or str If is 'default', use default build function If is 'ndk', use function for android ndk If id 'stackvm', use function for stackvm If is callable, use it as custom build function, expect lib_format field. do_fork: bool If False, do not fork when building. Requires n_parallel=1. runtime: Optional[Runtime] Specify the runtime to generate artifacts for """ def __init__( self, timeout=10, n_parallel=None, build_kwargs=None, build_func="default", do_fork=False, runtime=None, ): super(LocalBuilder, self).__init__(timeout, n_parallel, build_kwargs) if isinstance(build_func, str): if build_func == "default": build_func = tar.tar elif build_func == "ndk": build_func = ndk.create_shared elif build_func == "stackvm": build_func = stackvm.build else: raise ValueError("Invalid build_func" + build_func) self.build_func = _WrappedBuildFunc(build_func, runtime) if not do_fork: assert n_parallel in ( None, 1, ), f"if do_fork=False, need n_parallel=None or 1; got {n_parallel}" self.executor = PopenPoolExecutor( timeout=timeout, initializer=reset_global_scope, initargs=(AutotvmGlobalScope.current, )) self.tmp_dir = tempfile.mkdtemp() def build(self, measure_inputs): results = [] shutil.rmtree(self.tmp_dir, ignore_errors=True) self.tmp_dir = tempfile.mkdtemp() for i in range(0, len(measure_inputs), self.n_parallel): futures = [] for inp in measure_inputs[i:i + self.n_parallel]: ret = self.executor.submit(self.build_func, inp, self.tmp_dir, **self.build_kwargs) futures.append(ret) for future in futures: try: res = future.result() if res.error is not None: # instantiation error if isinstance(res.error, InstantiationError): res = MeasureResult( (res.error, ), MeasureErrorNo.INSTANTIATION_ERROR, res.time_cost, time.time(), ) else: if "InstantiationError" in str(res.error): msg = str(res.error) try: msg = msg.split("\n")[-2].split(": ")[1] except Exception: # pylint: disable=broad-except pass res = MeasureResult( (InstantiationError(msg), ), MeasureErrorNo.INSTANTIATION_ERROR, res.time_cost, time.time(), ) else: # tvm error res = MeasureResult( (res.error, ), MeasureErrorNo.COMPILE_HOST, res.time_cost, time.time(), ) except TimeoutError as ex: res = MeasureResult((ex, ), MeasureErrorNo.BUILD_TIMEOUT, self.timeout, time.time()) except ChildProcessError as ex: res = MeasureResult( (ex, ), MeasureErrorNo.RUNTIME_DEVICE, self.timeout, time.time(), ) results.append(res) return results
class RPCRunner(Runner): """Run generated code on remove devices. This function will ask a RPC Tracker to get device for measurement. Parameters ---------- timeout: float The timeout of a RPCRunner measurement task n_parallel: int The number of tasks run in parallel. "None" will use all cpu cores key: str The key of the device registered in the tracker host: str The host address of RPC Tracker port: int The port of RPC Tracker number: int The number of times to run the generated code for taking average. We call these runs as one `repeat` of measurement. repeat : int, optional The number of times to repeat the measurement. In total, the generated code will be run (1 + number x repeat) times, where the first "1" is warm up and will be discarded. The returned result contains `repeat` costs, each of which is an average of `number` costs. min_repeat_ms: int, optional The minimum duration of one `repeat` in milliseconds. By default, one `repeat` contains `number` runs. If this parameter is set, the parameters `number` will be dynamically adjusted to meet the minimum duration requirement of one `repeat`. i.e., When the run time of one `repeat` falls below this time, the `number` parameter will be automatically increased. cooldown_interval: float, optional The cool down interval between two measurements. enable_cpu_cache_flush: bool Whether to flush cache on CPU between repeated measurements. Flushing cache can make the measured latency of one operator closer to its actual latency during end-to-end inference. To make this option effective, the argument `number` should also be set to 1. This is only has effect on CPU task. module_loader : ModuleLoader If given, a context manager that loads the module to be timed into the remote runtime. If not given, default_module_loader is used. """ def __init__( self, key, host, port, priority=1, timeout=10, n_parallel=None, number=4, repeat=3, min_repeat_ms=0, cooldown_interval=0.1, enable_cpu_cache_flush=False, module_loader=None, ): super(RPCRunner, self).__init__(timeout, n_parallel) self.key = key self.host = host self.port = port self.priority = priority self.timeout = timeout self.number = number self.repeat = repeat self.min_repeat_ms = min_repeat_ms self._ref_input = None self.enable_cpu_cache_flush = enable_cpu_cache_flush self.cooldown_interval = cooldown_interval self.module_loader = module_loader self.executor = PopenPoolExecutor( timeout=timeout * (self.n_parallel + 1), initializer=reset_global_scope, initargs=(AutotvmGlobalScope.current, ), ) @property def ref_input(self): """ Fixed input for tuning special operators, e.g., sparse operators requiring indices as input. """ return self._ref_input @ref_input.setter def ref_input(self, val): if val is not None: warnings.warn( "You are specifying fixed input for tuning the operator. " "Be sure your input always fits the operator. Some " "operators may conduct layout transformation during tuning, " "thus can lead to unexpected behaviors. ", RuntimeWarning, ) self._ref_input = val def set_task(self, task): self.task = task if check_remote(task.target, self.key, self.host, self.port): logger.info("Get devices for measurement successfully!") else: raise RuntimeError( "Cannot get remote devices from the tracker. " "Please check the status of tracker by " "'python -m tvm.exec.query_rpc_tracker --port [THE PORT YOU USE]' " "and make sure you have free devices on the queue status.") def get_build_kwargs(self): kwargs = {} if ("cuda" in self.task.target.keys or "opencl" in self.task.target.keys or "rocm" in self.task.target.keys or "vulkan" in self.task.target.keys): remote = request_remote(self.key, self.host, self.port) dev = remote.device(str(self.task.target), 0) max_dims = dev.max_thread_dimensions kwargs["check_gpu"] = { "max_shared_memory_per_block": dev.max_shared_memory_per_block, "max_threads_per_block": dev.max_threads_per_block, "max_thread_x": max_dims[0], "max_thread_y": max_dims[1], "max_thread_z": max_dims[2], } return kwargs def run(self, measure_inputs, build_results): results = [] remote_kwargs = dict( device_key=self.key, host=self.host, port=self.port, priority=self.priority, timeout=self.timeout, ) for i in range(0, len(measure_inputs), self.n_parallel): futures = [] for measure_inp, build_res in zip( measure_inputs[i:i + self.n_parallel], build_results[i:i + self.n_parallel]): module_loader = (self.module_loader if self.module_loader is not None else default_module_loader()) ret = self.executor.submit( run_through_rpc, measure_inp, build_res, self.number, self.repeat, self.min_repeat_ms, self.cooldown_interval, remote_kwargs, self.ref_input, self.enable_cpu_cache_flush, module_loader, ) futures.append(ret) for future in futures: try: res = future.result() results.append(res) except Exception as ex: # pylint: disable=broad-except results.append( MeasureResult((str(ex), ), MeasureErrorNo.RUN_TIMEOUT, self.timeout, time.time())) return results
class RPCRunner(PyRunner): """RPC based runner Parameters ---------- rpc_config: RPCConfig The rpc configuration. evaluator_config: EvaluatorConfig The evaluator configuration. cooldown_sec: float The cooldown in seconds. TODO(@junrushao1994,@zxybazh): This is not used yet. alloc_repeat: int The number of times to repeat the allocation. f_create_session: Optional[str, Callable] The function name to create the session or the function itself. f_upload_module: Optional[str, Callable] The function name to upload the module or the function itself. f_alloc_argument: Optional[str, Callable] The function name to allocate the arguments or the function itself. f_run_evaluator: Optional[str, Callable] The function name to run the evaluator or the function itself. f_cleanup: Optional[str, Callable] The function name to cleanup the session or the function itself. pool: PopenPoolExecutor The popen pool executor. Attributes ---------- T_CREATE_SESSION : typing._GenericAlias The signature of the function `f_create_session`, which is: .. code-block:: python def default_create_session(rpc_config: RPCConfig) -> RPCSession: ... T_UPLOAD_MODULE : typing._GenericAlias The signature of the function `f_upload_module`, which is: .. code-block:: python def default_upload_module( session: RPCSession, local_path: str, remote_path: str, ) -> Module: ... T_ALLOC_ARGUMENT : typing._GenericAlias The signature of the function `f_alloc_argument`, which is: .. code-block:: python def default_alloc_argument( session: RPCSession, device: Device, args_info: T_ARG_INFO_JSON_OBJ_LIST, alloc_repeat: int, ) -> List[T_ARGUMENT_LIST]: ... T_RUN_EVALUATOR : typing._GenericAlias The signature of the function `f_run_evaluator`, which is: .. code-block:: python def default_run_evaluator( session: RPCSession, rt_mod: Module, device: Device, evaluator_config: EvaluatorConfig, repeated_args: List[T_ARGUMENT_LIST], ) -> List[float]: ... T_CLEANUP : typing._GenericAlias The signature of the function `f_cleanup`, which is: .. code-block:: python def default_cleanup( session: Optional[RPCSession], remote_path: Optional[str], ) -> None: ... """ T_CREATE_SESSION = Callable[ [RPCConfig], # The RPC configuration RPCSession, # The RPC Session ] T_UPLOAD_MODULE = Callable[ [ RPCSession, # The RPC Session str, # local path to the artifact str, # remote path to the artifact ], Module, # the Module opened on the remote ] T_ALLOC_ARGUMENT = Callable[ [ RPCSession, # The RPC Session Device, # The device on the remote T_ARG_INFO_JSON_OBJ_LIST, # The metadata information of the arguments to be allocated int, # The number of repeated allocations to be done ], List[T_ARGUMENT_LIST], # A list of argument lists ] T_RUN_EVALUATOR = Callable[ [ RPCSession, # The RPC Session Module, # The Module opened on the remote Device, # The device on the remote EvaluatorConfig, # The evaluator configuration List[T_ARGUMENT_LIST], # A list of argument lists ], List[float], # A list of running time ] T_CLEANUP = Callable[ [ Optional[RPCSession], # The RPC Session to be cleaned up Optional[str], # remote path to the artifact ], None, ] rpc_config: RPCConfig evaluator_config: EvaluatorConfig cooldown_sec: float alloc_repeat: int f_create_session: Union[T_CREATE_SESSION, str, None] f_upload_module: Union[T_UPLOAD_MODULE, str, None] f_alloc_argument: Union[T_ALLOC_ARGUMENT, str, None] f_run_evaluator: Union[T_RUN_EVALUATOR, str, None] f_cleanup: Union[T_CLEANUP, str, None] pool: PopenPoolExecutor def __init__( self, rpc_config: Optional[RPCConfig] = None, evaluator_config: Optional[EvaluatorConfig] = None, cooldown_sec: float = 0.0, alloc_repeat: int = 1, f_create_session: Union[T_CREATE_SESSION, str, None] = None, f_upload_module: Union[T_UPLOAD_MODULE, str, None] = None, f_alloc_argument: Union[T_ALLOC_ARGUMENT, str, None] = None, f_run_evaluator: Union[T_RUN_EVALUATOR, str, None] = None, f_cleanup: Union[T_CLEANUP, str, None] = None, max_workers: int = 1, initializer: Optional[Callable[[], None]] = None, ) -> None: """Constructor Parameters ---------- rpc_config: RPCConfig The rpc configuration. evaluator_config: EvaluatorConfig The evaluator configuration. cooldown_sec: float The cooldown in seconds. alloc_repeat: int The number of times to random fill the allocation. f_create_session: Union[T_CREATE_SESSION, str, None] The function name to create the session or the function itself. f_upload_module: Union[T_UPLOAD_MODULE, str, None] The function name to upload the module or the function itself. f_alloc_argument: Union[T_ALLOC_ARGUMENT, str, None] The function name to allocate the arguments or the function itself. f_run_evaluator: Union[T_RUN_EVALUATOR, str, None] The function name to run the evaluator or the function itself. f_cleanup: Union[T_CLEANUP, str, None] The function name to cleanup the session or the function itself. max_workers: int = 1 The maximum number of connections. Defaults to 1. initializer: Optional[Callable[[], None]] The initializer function. """ super().__init__() self.rpc_config = RPCConfig._normalized(rpc_config) self.evaluator_config = EvaluatorConfig._normalized(evaluator_config) self.cooldown_sec = cooldown_sec self.alloc_repeat = alloc_repeat self.f_create_session = f_create_session self.f_upload_module = f_upload_module self.f_alloc_argument = f_alloc_argument self.f_run_evaluator = f_run_evaluator self.f_cleanup = f_cleanup logger.info("RPCRunner: max_workers = %d", max_workers) self.pool = PopenPoolExecutor( max_workers=max_workers, timeout=rpc_config.session_timeout_sec, initializer=initializer, ) self._sanity_check() def run(self, runner_inputs: List[RunnerInput]) -> List[RunnerFuture]: results: List[RunnerFuture] = [] for runner_input in runner_inputs: future = RPCRunnerFuture( future=self.pool.submit( RPCRunner._worker_func, self.f_create_session, self.f_upload_module, self.f_alloc_argument, self.f_run_evaluator, self.f_cleanup, self.rpc_config, self.evaluator_config, self.alloc_repeat, str(runner_input.artifact_path), str(runner_input.device_type), tuple(arg_info.as_json() for arg_info in runner_input.args_info), ), timeout_sec=self.rpc_config.session_timeout_sec, ) results.append(future) return results def _sanity_check(self) -> None: def _check( f_create_session, f_upload_module, f_alloc_argument, f_run_evaluator, f_cleanup, ) -> None: get_global_func_with_default_on_worker(name=f_create_session, default=None) get_global_func_with_default_on_worker(name=f_upload_module, default=None) get_global_func_with_default_on_worker(name=f_alloc_argument, default=None) get_global_func_with_default_on_worker(name=f_run_evaluator, default=None) get_global_func_with_default_on_worker(name=f_cleanup, default=None) value = self.pool.submit( _check, self.f_create_session, self.f_upload_module, self.f_alloc_argument, self.f_run_evaluator, self.f_cleanup, ) value.result() @staticmethod def _worker_func( _f_create_session: Union[T_CREATE_SESSION, str, None], _f_upload_module: Union[T_UPLOAD_MODULE, str, None], _f_alloc_argument: Union[T_ALLOC_ARGUMENT, str, None], _f_run_evaluator: Union[T_RUN_EVALUATOR, str, None], _f_cleanup: Union[T_CLEANUP, str, None], rpc_config: RPCConfig, evaluator_config: EvaluatorConfig, alloc_repeat: int, artifact_path: str, device_type: str, args_info: T_ARG_INFO_JSON_OBJ_LIST, ) -> List[float]: # Step 0. Get the registered functions f_create_session: RPCRunner.T_CREATE_SESSION = get_global_func_with_default_on_worker( _f_create_session, default_create_session ) f_upload_module: RPCRunner.T_UPLOAD_MODULE = get_global_func_with_default_on_worker( _f_upload_module, default_upload_module ) f_alloc_argument: RPCRunner.T_ALLOC_ARGUMENT = get_global_func_with_default_on_worker( _f_alloc_argument, default_alloc_argument ) f_run_evaluator: RPCRunner.T_RUN_EVALUATOR = get_global_func_with_default_on_worker( _f_run_evaluator, default_run_evaluator ) f_cleanup: RPCRunner.T_CLEANUP = get_global_func_with_default_on_worker( _f_cleanup, default_cleanup ) # Managed resources session: Optional[RPCSession] = None remote_path: Optional[str] = None @contextmanager def resource_handler(): try: yield finally: # Final step. Always clean up f_cleanup(session, remote_path) with resource_handler(): # Step 1. Create session session = f_create_session(rpc_config) device = session.device(dev_type=device_type, dev_id=0) # Step 2. Upload the module _, remote_path = osp.split(artifact_path) local_path: str = artifact_path rt_mod: Module = f_upload_module(session, local_path, remote_path) # Step 3: Allocate input arguments repeated_args: List[T_ARGUMENT_LIST] = f_alloc_argument( session, device, args_info, alloc_repeat, ) # Step 4: Run time_evaluator costs: List[float] = f_run_evaluator( session, rt_mod, device, evaluator_config, repeated_args, ) return costs