def test_popen_pool_executor():
    import tvm

    pool = PopenPoolExecutor(max_workers=2, timeout=0.01)
    value1 = pool.submit(identity_after, 1, 100)
    value2 = pool.submit(terminate_self)
    value3 = pool.submit(identity_after, 3, 0)
    value4 = pool.submit(tvm.runtime.String, "xyz")

    with pytest.raises(TimeoutError):
        value1.result()

    with pytest.raises(ChildProcessError):
        value2.result()

    assert value3.result() == 3
    value = value4.result()
    assert isinstance(value, tvm.runtime.String)
    assert value == "xyz"

    pool = PopenPoolExecutor(max_workers=4, timeout=None)
    values = pool.map_with_error_catching(lambda x: x, range(100))

    for idx, val in enumerate(values):
        assert val.value == idx
示例#2
0
def test_popen_pool_executor_recycles():
    pool = PopenPoolExecutor(max_workers=1,
                             timeout=None,
                             maximum_process_uses=2)

    initial_pid = pool.submit(os.getpid).result()
    assert initial_pid == pool.submit(os.getpid).result()
    assert initial_pid != pool.submit(os.getpid).result()
示例#3
0
def test_popen_pool_executor_timeout():
    timeout = 0.5

    pool = PopenPoolExecutor(timeout=timeout)

    f1 = pool.submit(timeout_job, timeout)
    while not f1.done():
        pass
    try:
        res = f1.result()
    except Exception as ex:
        assert isinstance(ex, TimeoutError)
示例#4
0
def test_popen_pool_executor_async():
    pool = PopenPoolExecutor()
    f1 = pool.submit(slow_summation, 9999999)
    f2 = pool.submit(fast_summation, 9999999)
    t1 = 0
    t2 = 0
    while True:
        if t1 == 0 and f1.done():
            t1 = time.time()
        if t2 == 0 and f2.done():
            t2 = time.time()
        if t1 != 0 and t2 != 0:
            break
    assert t2 < t1, "Expected fast async job to finish first!"
    assert f1.result() == f2.result()
示例#5
0
class RPCRunner(PyRunner):
    """RPC based runner

    Parameters
    ----------
    rpc_config: RPCConfig
        The rpc configuration.
    evaluator_config: EvaluatorConfig
        The evaluator configuration.
    cooldown_sec: float
        The cooldown in seconds. TODO(@junrushao1994,@zxybazh): This is not used yet.
    alloc_repeat: int
        The number of times to repeat the allocation.
    f_create_session: Optional[str, Callable]
        The function name to create the session or the function itself.
    f_upload_module: Optional[str, Callable]
        The function name to upload the module or the function itself.
    f_alloc_argument: Optional[str, Callable]
        The function name to allocate the arguments or the function itself.
    f_run_evaluator: Optional[str, Callable]
        The function name to run the evaluator or the function itself.
    f_cleanup: Optional[str, Callable]
        The function name to cleanup the session or the function itself.
    pool: PopenPoolExecutor
        The popen pool executor.

    Attributes
    ----------
    T_CREATE_SESSION : typing._GenericAlias
        The signature of the function `f_create_session`, which is:

        .. code-block:: python

        def default_create_session(rpc_config: RPCConfig) -> RPCSession:
            ...

    T_UPLOAD_MODULE : typing._GenericAlias
        The signature of the function `f_upload_module`, which is:

        .. code-block:: python

        def default_upload_module(
            session: RPCSession,
            local_path: str,
            remote_path: str,
        ) -> Module:
            ...

    T_ALLOC_ARGUMENT : typing._GenericAlias
        The signature of the function `f_alloc_argument`, which is:

        .. code-block:: python

        def default_alloc_argument(
            session: RPCSession,
            device: Device,
            args_info: T_ARG_INFO_JSON_OBJ_LIST,
            alloc_repeat: int,
        ) -> List[T_ARGUMENT_LIST]:
            ...

    T_RUN_EVALUATOR : typing._GenericAlias
        The signature of the function `f_run_evaluator`, which is:

        .. code-block:: python

        def default_run_evaluator(
            session: RPCSession,
            rt_mod: Module,
            device: Device,
            evaluator_config: EvaluatorConfig,
            repeated_args: List[T_ARGUMENT_LIST],
        ) -> List[float]:
            ...

    T_CLEANUP : typing._GenericAlias
        The signature of the function `f_cleanup`, which is:

        .. code-block:: python

        def default_cleanup(
            session: Optional[RPCSession],
            remote_path: Optional[str],
        ) -> None:
            ...
    """

    rpc_config: RPCConfig
    evaluator_config: EvaluatorConfig
    cooldown_sec: float
    alloc_repeat: int

    f_create_session: Union[T_CREATE_SESSION, str, None]
    f_upload_module: Union[T_UPLOAD_MODULE, str, None]
    f_alloc_argument: Union[T_ALLOC_ARGUMENT, str, None]
    f_run_evaluator: Union[T_RUN_EVALUATOR, str, None]
    f_cleanup: Union[T_CLEANUP, str, None]

    pool: PopenPoolExecutor

    def __init__(
        self,
        rpc_config: Optional[RPCConfig] = None,
        evaluator_config: Optional[EvaluatorConfig] = None,
        cooldown_sec: float = 0.0,
        alloc_repeat: int = 1,
        f_create_session: Union[T_CREATE_SESSION, str, None] = None,
        f_upload_module: Union[T_UPLOAD_MODULE, str, None] = None,
        f_alloc_argument: Union[T_ALLOC_ARGUMENT, str, None] = None,
        f_run_evaluator: Union[T_RUN_EVALUATOR, str, None] = None,
        f_cleanup: Union[T_CLEANUP, str, None] = None,
        max_workers: Optional[int] = None,
        initializer: Optional[Callable[[], None]] = None,
    ) -> None:
        """Constructor

        Parameters
        ----------
        rpc_config: RPCConfig
            The rpc configuration.
        evaluator_config: EvaluatorConfig
            The evaluator configuration.
        cooldown_sec: float
            The cooldown in seconds.
        alloc_repeat: int
            The number of times to random fill the allocation.
        f_create_session: Union[T_CREATE_SESSION, str, None]
            The function name to create the session or the function itself.
        f_upload_module: Union[T_UPLOAD_MODULE, str, None]
            The function name to upload the module or the function itself.
        f_alloc_argument: Union[T_ALLOC_ARGUMENT, str, None]
            The function name to allocate the arguments or the function itself.
        f_run_evaluator: Union[T_RUN_EVALUATOR, str, None]
            The function name to run the evaluator or the function itself.
        f_cleanup: Union[T_CLEANUP, str, None]
            The function name to cleanup the session or the function itself.
        max_workers: Optional[int] = None
            The maximum number of connections. Defaults to number of logical CPU cores.
        initializer: Optional[Callable[[], None]]
            The initializer function.
        """
        super().__init__()
        self.rpc_config = RPCConfig._normalized(rpc_config)
        self.evaluator_config = EvaluatorConfig._normalized(evaluator_config)
        self.cooldown_sec = cooldown_sec
        self.alloc_repeat = alloc_repeat
        self.f_create_session = f_create_session
        self.f_upload_module = f_upload_module
        self.f_alloc_argument = f_alloc_argument
        self.f_run_evaluator = f_run_evaluator
        self.f_cleanup = f_cleanup
        if max_workers is None:
            max_workers = cpu_count(logical=True)
        logger.info("RPCRunner: max_workers = %d", max_workers)
        self.pool = PopenPoolExecutor(
            max_workers=max_workers,
            timeout=rpc_config.session_timeout_sec,
            initializer=initializer,
        )
        self._sanity_check()

    def run(self, runner_inputs: List[RunnerInput]) -> List[RunnerFuture]:
        results: List[RunnerFuture] = []
        for runner_input in runner_inputs:
            future = RPCRunnerFuture(
                future=self.pool.submit(
                    _worker_func,
                    self.f_create_session,
                    self.f_upload_module,
                    self.f_alloc_argument,
                    self.f_run_evaluator,
                    self.f_cleanup,
                    self.rpc_config,
                    self.evaluator_config,
                    self.alloc_repeat,
                    str(runner_input.artifact_path),
                    str(runner_input.device_type),
                    tuple(arg_info.as_json()
                          for arg_info in runner_input.args_info),
                ),
                timeout_sec=self.rpc_config.session_timeout_sec,
            )
            results.append(future)  # type: ignore
        return results

    def _sanity_check(self) -> None:
        def _check(
            f_create_session,
            f_upload_module,
            f_alloc_argument,
            f_run_evaluator,
            f_cleanup,
        ) -> None:
            get_global_func_with_default_on_worker(name=f_create_session,
                                                   default=None)
            get_global_func_with_default_on_worker(name=f_upload_module,
                                                   default=None)
            get_global_func_with_default_on_worker(name=f_alloc_argument,
                                                   default=None)
            get_global_func_with_default_on_worker(name=f_run_evaluator,
                                                   default=None)
            get_global_func_with_default_on_worker(name=f_cleanup,
                                                   default=None)

        value = self.pool.submit(
            _check,
            self.f_create_session,
            self.f_upload_module,
            self.f_alloc_argument,
            self.f_run_evaluator,
            self.f_cleanup,
        )
        value.result()
示例#6
0
class LocalBuilder(Builder):
    """Run compilation on local machine

    Parameters
    ----------
    timeout: float
        The timeout of a compilation
    n_parallel: int
        The number of tasks run in parallel. "None" will use all cpu cores
    build_kwargs: dict
        If supplied, additional kwargs passed to build_func. Overrides any build_kwargs supplied
        by the Runner.
    build_func: callable or str
        If is 'default', use default build function
        If is 'ndk', use function for android ndk
        If id 'stackvm', use function for stackvm
        If is callable, use it as custom build function, expect lib_format field.
    do_fork: bool
        If False, do not fork when building. Requires n_parallel=1.
    runtime: Optional[Runtime]
        Specify the runtime to generate artifacts for
    """
    def __init__(
        self,
        timeout=10,
        n_parallel=None,
        build_kwargs=None,
        build_func="default",
        do_fork=False,
        runtime=None,
    ):
        super(LocalBuilder, self).__init__(timeout, n_parallel, build_kwargs)

        if isinstance(build_func, str):
            if build_func == "default":
                build_func = tar.tar
            elif build_func == "ndk":
                build_func = ndk.create_shared
            elif build_func == "stackvm":
                build_func = stackvm.build
            else:
                raise ValueError("Invalid build_func" + build_func)
        self.build_func = _WrappedBuildFunc(build_func, runtime)
        if not do_fork:
            assert n_parallel in (
                None,
                1,
            ), f"if do_fork=False, need n_parallel=None or 1; got {n_parallel}"
        self.executor = PopenPoolExecutor(
            timeout=timeout,
            initializer=reset_global_scope,
            initargs=(AutotvmGlobalScope.current, ))
        self.tmp_dir = tempfile.mkdtemp()

    def build(self, measure_inputs):
        results = []

        shutil.rmtree(self.tmp_dir, ignore_errors=True)
        self.tmp_dir = tempfile.mkdtemp()

        for i in range(0, len(measure_inputs), self.n_parallel):
            futures = []
            for inp in measure_inputs[i:i + self.n_parallel]:
                ret = self.executor.submit(self.build_func, inp, self.tmp_dir,
                                           **self.build_kwargs)
                futures.append(ret)

            for future in futures:
                try:
                    res = future.result()
                    if res.error is not None:
                        # instantiation error
                        if isinstance(res.error, InstantiationError):
                            res = MeasureResult(
                                (res.error, ),
                                MeasureErrorNo.INSTANTIATION_ERROR,
                                res.time_cost,
                                time.time(),
                            )

                        else:
                            if "InstantiationError" in str(res.error):
                                msg = str(res.error)
                                try:
                                    msg = msg.split("\n")[-2].split(": ")[1]
                                except Exception:  # pylint: disable=broad-except
                                    pass
                                res = MeasureResult(
                                    (InstantiationError(msg), ),
                                    MeasureErrorNo.INSTANTIATION_ERROR,
                                    res.time_cost,
                                    time.time(),
                                )

                            else:  # tvm error
                                res = MeasureResult(
                                    (res.error, ),
                                    MeasureErrorNo.COMPILE_HOST,
                                    res.time_cost,
                                    time.time(),
                                )
                except TimeoutError as ex:
                    res = MeasureResult((ex, ), MeasureErrorNo.BUILD_TIMEOUT,
                                        self.timeout, time.time())
                except ChildProcessError as ex:
                    res = MeasureResult(
                        (ex, ),
                        MeasureErrorNo.RUNTIME_DEVICE,
                        self.timeout,
                        time.time(),
                    )

                results.append(res)

        return results
示例#7
0
class RPCRunner(Runner):
    """Run generated code on remove devices.
    This function will ask a RPC Tracker to get device for measurement.

    Parameters
    ----------
    timeout: float
        The timeout of a RPCRunner measurement task
    n_parallel: int
        The number of tasks run in parallel. "None" will use all cpu cores
    key: str
        The key of the device registered in the tracker
    host: str
        The host address of RPC Tracker
    port: int
        The port of RPC Tracker
    number: int
        The number of times to run the generated code for taking average.
        We call these runs as one `repeat` of measurement.
    repeat : int, optional
        The number of times to repeat the measurement.
        In total, the generated code will be run (1 + number x repeat) times,
        where the first "1" is warm up and will be discarded.
        The returned result contains `repeat` costs,
        each of which is an average of `number` costs.
    min_repeat_ms: int, optional
        The minimum duration of one `repeat` in milliseconds.
        By default, one `repeat` contains `number` runs. If this parameter is set,
        the parameters `number` will be dynamically adjusted to meet the
        minimum duration requirement of one `repeat`.
        i.e., When the run time of one `repeat` falls below this time, the `number` parameter
        will be automatically increased.
    cooldown_interval: float, optional
        The cool down interval between two measurements.
    enable_cpu_cache_flush: bool
        Whether to flush cache on CPU between repeated measurements.
        Flushing cache can make the measured latency of one operator closer to
        its actual latency during end-to-end inference.
        To make this option effective, the argument `number` should also be set to 1.
        This is only has effect on CPU task.
    module_loader : ModuleLoader
        If given, a context manager that loads the module to be timed into the remote runtime.
        If not given, default_module_loader is used.
    """
    def __init__(
        self,
        key,
        host,
        port,
        priority=1,
        timeout=10,
        n_parallel=None,
        number=4,
        repeat=3,
        min_repeat_ms=0,
        cooldown_interval=0.1,
        enable_cpu_cache_flush=False,
        module_loader=None,
    ):
        super(RPCRunner, self).__init__(timeout, n_parallel)

        self.key = key
        self.host = host
        self.port = port
        self.priority = priority
        self.timeout = timeout

        self.number = number
        self.repeat = repeat
        self.min_repeat_ms = min_repeat_ms
        self._ref_input = None

        self.enable_cpu_cache_flush = enable_cpu_cache_flush
        self.cooldown_interval = cooldown_interval
        self.module_loader = module_loader

        self.executor = PopenPoolExecutor(
            timeout=timeout * (self.n_parallel + 1),
            initializer=reset_global_scope,
            initargs=(AutotvmGlobalScope.current, ),
        )

    @property
    def ref_input(self):
        """
        Fixed input for tuning special operators, e.g., sparse operators
        requiring indices as input.
        """
        return self._ref_input

    @ref_input.setter
    def ref_input(self, val):
        if val is not None:
            warnings.warn(
                "You are specifying fixed input for tuning the operator. "
                "Be sure your input always fits the operator. Some "
                "operators may conduct layout transformation during tuning, "
                "thus can lead to unexpected behaviors. ",
                RuntimeWarning,
            )
        self._ref_input = val

    def set_task(self, task):
        self.task = task

        if check_remote(task.target, self.key, self.host, self.port):
            logger.info("Get devices for measurement successfully!")
        else:
            raise RuntimeError(
                "Cannot get remote devices from the tracker. "
                "Please check the status of tracker by "
                "'python -m tvm.exec.query_rpc_tracker --port [THE PORT YOU USE]' "
                "and make sure you have free devices on the queue status.")

    def get_build_kwargs(self):
        kwargs = {}
        if ("cuda" in self.task.target.keys
                or "opencl" in self.task.target.keys
                or "rocm" in self.task.target.keys
                or "vulkan" in self.task.target.keys):
            remote = request_remote(self.key, self.host, self.port)
            dev = remote.device(str(self.task.target), 0)
            max_dims = dev.max_thread_dimensions
            kwargs["check_gpu"] = {
                "max_shared_memory_per_block": dev.max_shared_memory_per_block,
                "max_threads_per_block": dev.max_threads_per_block,
                "max_thread_x": max_dims[0],
                "max_thread_y": max_dims[1],
                "max_thread_z": max_dims[2],
            }

        return kwargs

    def run(self, measure_inputs, build_results):
        results = []
        remote_kwargs = dict(
            device_key=self.key,
            host=self.host,
            port=self.port,
            priority=self.priority,
            timeout=self.timeout,
        )

        for i in range(0, len(measure_inputs), self.n_parallel):
            futures = []
            for measure_inp, build_res in zip(
                    measure_inputs[i:i + self.n_parallel],
                    build_results[i:i + self.n_parallel]):
                module_loader = (self.module_loader if self.module_loader
                                 is not None else default_module_loader())
                ret = self.executor.submit(
                    run_through_rpc,
                    measure_inp,
                    build_res,
                    self.number,
                    self.repeat,
                    self.min_repeat_ms,
                    self.cooldown_interval,
                    remote_kwargs,
                    self.ref_input,
                    self.enable_cpu_cache_flush,
                    module_loader,
                )
                futures.append(ret)

            for future in futures:
                try:
                    res = future.result()
                    results.append(res)
                except Exception as ex:  # pylint: disable=broad-except
                    results.append(
                        MeasureResult((str(ex), ), MeasureErrorNo.RUN_TIMEOUT,
                                      self.timeout, time.time()))

        return results
示例#8
0
class RPCRunner(PyRunner):
    """RPC based runner

    Parameters
    ----------
    rpc_config: RPCConfig
        The rpc configuration.
    evaluator_config: EvaluatorConfig
        The evaluator configuration.
    cooldown_sec: float
        The cooldown in seconds. TODO(@junrushao1994,@zxybazh): This is not used yet.
    alloc_repeat: int
        The number of times to repeat the allocation.
    f_create_session: Optional[str, Callable]
        The function name to create the session or the function itself.
    f_upload_module: Optional[str, Callable]
        The function name to upload the module or the function itself.
    f_alloc_argument: Optional[str, Callable]
        The function name to allocate the arguments or the function itself.
    f_run_evaluator: Optional[str, Callable]
        The function name to run the evaluator or the function itself.
    f_cleanup: Optional[str, Callable]
        The function name to cleanup the session or the function itself.
    pool: PopenPoolExecutor
        The popen pool executor.

    Attributes
    ----------
    T_CREATE_SESSION : typing._GenericAlias
        The signature of the function `f_create_session`, which is:

        .. code-block:: python

        def default_create_session(rpc_config: RPCConfig) -> RPCSession:
            ...

    T_UPLOAD_MODULE : typing._GenericAlias
        The signature of the function `f_upload_module`, which is:

        .. code-block:: python

        def default_upload_module(
            session: RPCSession,
            local_path: str,
            remote_path: str,
        ) -> Module:
            ...

    T_ALLOC_ARGUMENT : typing._GenericAlias
        The signature of the function `f_alloc_argument`, which is:

        .. code-block:: python

        def default_alloc_argument(
            session: RPCSession,
            device: Device,
            args_info: T_ARG_INFO_JSON_OBJ_LIST,
            alloc_repeat: int,
        ) -> List[T_ARGUMENT_LIST]:
            ...

    T_RUN_EVALUATOR : typing._GenericAlias
        The signature of the function `f_run_evaluator`, which is:

        .. code-block:: python

        def default_run_evaluator(
            session: RPCSession,
            rt_mod: Module,
            device: Device,
            evaluator_config: EvaluatorConfig,
            repeated_args: List[T_ARGUMENT_LIST],
        ) -> List[float]:
            ...

    T_CLEANUP : typing._GenericAlias
        The signature of the function `f_cleanup`, which is:

        .. code-block:: python

        def default_cleanup(
            session: Optional[RPCSession],
            remote_path: Optional[str],
        ) -> None:
            ...
    """

    T_CREATE_SESSION = Callable[
        [RPCConfig],  # The RPC configuration
        RPCSession,  # The RPC Session
    ]
    T_UPLOAD_MODULE = Callable[
        [
            RPCSession,  # The RPC Session
            str,  # local path to the artifact
            str,  # remote path to the artifact
        ],
        Module,  # the Module opened on the remote
    ]
    T_ALLOC_ARGUMENT = Callable[
        [
            RPCSession,  # The RPC Session
            Device,  # The device on the remote
            T_ARG_INFO_JSON_OBJ_LIST,  # The metadata information of the arguments to be allocated
            int,  # The number of repeated allocations to be done
        ],
        List[T_ARGUMENT_LIST],  # A list of argument lists
    ]
    T_RUN_EVALUATOR = Callable[
        [
            RPCSession,  # The RPC Session
            Module,  # The Module opened on the remote
            Device,  # The device on the remote
            EvaluatorConfig,  # The evaluator configuration
            List[T_ARGUMENT_LIST],  # A list of argument lists
        ],
        List[float],  # A list of running time
    ]
    T_CLEANUP = Callable[
        [
            Optional[RPCSession],  # The RPC Session to be cleaned up
            Optional[str],  # remote path to the artifact
        ],
        None,
    ]

    rpc_config: RPCConfig
    evaluator_config: EvaluatorConfig
    cooldown_sec: float
    alloc_repeat: int

    f_create_session: Union[T_CREATE_SESSION, str, None]
    f_upload_module: Union[T_UPLOAD_MODULE, str, None]
    f_alloc_argument: Union[T_ALLOC_ARGUMENT, str, None]
    f_run_evaluator: Union[T_RUN_EVALUATOR, str, None]
    f_cleanup: Union[T_CLEANUP, str, None]

    pool: PopenPoolExecutor

    def __init__(
        self,
        rpc_config: Optional[RPCConfig] = None,
        evaluator_config: Optional[EvaluatorConfig] = None,
        cooldown_sec: float = 0.0,
        alloc_repeat: int = 1,
        f_create_session: Union[T_CREATE_SESSION, str, None] = None,
        f_upload_module: Union[T_UPLOAD_MODULE, str, None] = None,
        f_alloc_argument: Union[T_ALLOC_ARGUMENT, str, None] = None,
        f_run_evaluator: Union[T_RUN_EVALUATOR, str, None] = None,
        f_cleanup: Union[T_CLEANUP, str, None] = None,
        max_workers: int = 1,
        initializer: Optional[Callable[[], None]] = None,
    ) -> None:
        """Constructor

        Parameters
        ----------
        rpc_config: RPCConfig
            The rpc configuration.
        evaluator_config: EvaluatorConfig
            The evaluator configuration.
        cooldown_sec: float
            The cooldown in seconds.
        alloc_repeat: int
            The number of times to random fill the allocation.
        f_create_session: Union[T_CREATE_SESSION, str, None]
            The function name to create the session or the function itself.
        f_upload_module: Union[T_UPLOAD_MODULE, str, None]
            The function name to upload the module or the function itself.
        f_alloc_argument: Union[T_ALLOC_ARGUMENT, str, None]
            The function name to allocate the arguments or the function itself.
        f_run_evaluator: Union[T_RUN_EVALUATOR, str, None]
            The function name to run the evaluator or the function itself.
        f_cleanup: Union[T_CLEANUP, str, None]
            The function name to cleanup the session or the function itself.
        max_workers: int = 1
            The maximum number of connections. Defaults to 1.
        initializer: Optional[Callable[[], None]]
            The initializer function.
        """
        super().__init__()
        self.rpc_config = RPCConfig._normalized(rpc_config)
        self.evaluator_config = EvaluatorConfig._normalized(evaluator_config)
        self.cooldown_sec = cooldown_sec
        self.alloc_repeat = alloc_repeat
        self.f_create_session = f_create_session
        self.f_upload_module = f_upload_module
        self.f_alloc_argument = f_alloc_argument
        self.f_run_evaluator = f_run_evaluator
        self.f_cleanup = f_cleanup
        logger.info("RPCRunner: max_workers = %d", max_workers)
        self.pool = PopenPoolExecutor(
            max_workers=max_workers,
            timeout=rpc_config.session_timeout_sec,
            initializer=initializer,
        )
        self._sanity_check()

    def run(self, runner_inputs: List[RunnerInput]) -> List[RunnerFuture]:
        results: List[RunnerFuture] = []
        for runner_input in runner_inputs:
            future = RPCRunnerFuture(
                future=self.pool.submit(
                    RPCRunner._worker_func,
                    self.f_create_session,
                    self.f_upload_module,
                    self.f_alloc_argument,
                    self.f_run_evaluator,
                    self.f_cleanup,
                    self.rpc_config,
                    self.evaluator_config,
                    self.alloc_repeat,
                    str(runner_input.artifact_path),
                    str(runner_input.device_type),
                    tuple(arg_info.as_json() for arg_info in runner_input.args_info),
                ),
                timeout_sec=self.rpc_config.session_timeout_sec,
            )
            results.append(future)
        return results

    def _sanity_check(self) -> None:
        def _check(
            f_create_session,
            f_upload_module,
            f_alloc_argument,
            f_run_evaluator,
            f_cleanup,
        ) -> None:
            get_global_func_with_default_on_worker(name=f_create_session, default=None)
            get_global_func_with_default_on_worker(name=f_upload_module, default=None)
            get_global_func_with_default_on_worker(name=f_alloc_argument, default=None)
            get_global_func_with_default_on_worker(name=f_run_evaluator, default=None)
            get_global_func_with_default_on_worker(name=f_cleanup, default=None)

        value = self.pool.submit(
            _check,
            self.f_create_session,
            self.f_upload_module,
            self.f_alloc_argument,
            self.f_run_evaluator,
            self.f_cleanup,
        )
        value.result()

    @staticmethod
    def _worker_func(
        _f_create_session: Union[T_CREATE_SESSION, str, None],
        _f_upload_module: Union[T_UPLOAD_MODULE, str, None],
        _f_alloc_argument: Union[T_ALLOC_ARGUMENT, str, None],
        _f_run_evaluator: Union[T_RUN_EVALUATOR, str, None],
        _f_cleanup: Union[T_CLEANUP, str, None],
        rpc_config: RPCConfig,
        evaluator_config: EvaluatorConfig,
        alloc_repeat: int,
        artifact_path: str,
        device_type: str,
        args_info: T_ARG_INFO_JSON_OBJ_LIST,
    ) -> List[float]:
        # Step 0. Get the registered functions
        f_create_session: RPCRunner.T_CREATE_SESSION = get_global_func_with_default_on_worker(
            _f_create_session, default_create_session
        )
        f_upload_module: RPCRunner.T_UPLOAD_MODULE = get_global_func_with_default_on_worker(
            _f_upload_module, default_upload_module
        )
        f_alloc_argument: RPCRunner.T_ALLOC_ARGUMENT = get_global_func_with_default_on_worker(
            _f_alloc_argument, default_alloc_argument
        )
        f_run_evaluator: RPCRunner.T_RUN_EVALUATOR = get_global_func_with_default_on_worker(
            _f_run_evaluator, default_run_evaluator
        )
        f_cleanup: RPCRunner.T_CLEANUP = get_global_func_with_default_on_worker(
            _f_cleanup, default_cleanup
        )
        # Managed resources
        session: Optional[RPCSession] = None
        remote_path: Optional[str] = None

        @contextmanager
        def resource_handler():
            try:
                yield
            finally:
                # Final step. Always clean up
                f_cleanup(session, remote_path)

        with resource_handler():
            # Step 1. Create session
            session = f_create_session(rpc_config)
            device = session.device(dev_type=device_type, dev_id=0)
            # Step 2. Upload the module
            _, remote_path = osp.split(artifact_path)
            local_path: str = artifact_path
            rt_mod: Module = f_upload_module(session, local_path, remote_path)
            # Step 3: Allocate input arguments
            repeated_args: List[T_ARGUMENT_LIST] = f_alloc_argument(
                session,
                device,
                args_info,
                alloc_repeat,
            )
            # Step 4: Run time_evaluator
            costs: List[float] = f_run_evaluator(
                session,
                rt_mod,
                device,
                evaluator_config,
                repeated_args,
            )
        return costs