def repopulate_pool(self): """ Bring the number of pool processes up to the specified number, for use after reaping workers which have exited. """ # Get existing thread ids: ids = {t.id for t in self._pool} need_ids = set(range(self._processes)) - ids for _, tid in zip(range(self._processes - len(self._pool)), need_ids): initargs = list(self._initargs) # Unpack context initargs[0] = initargs[0][tid] args = ( self._inqueue.get, self._outqueue.put, self._initializer, initargs, self._maxtasksperchild, ) if hasattr(self, "_wrap_exception"): args += (self._wrap_exception, ) # changed worker -> clean_worker worker = Thread(target=self.worker, args=args) worker.daemon = True worker.id = tid self._pool.append(worker) worker.start() default_logger.debug(f"Added worker thread with id {tid}")
def repopulate_pool(self): """ Bring the number of pool workers up to the specified number, it also creates new workers to replace old workers which have exited after executing ``maxtasksperchild``. Override this method to implement your own pool. """ for i in range(self._processes - len(self._pool)): w = Process( target=self.worker, args=( self._inqueue.get, self._outqueue.put, self._initializer, self._initargs, self._maxtasksperchild, ), ctx=self._ctx, ) self._pool.append(w) w.name = w.name.replace("Process", "PoolWorker") w.daemon = True w.start() default_logger.debug(f"Added worker {w.name}")
def repopulate_pool(self): """ Bring the number of pool workers up to the specified number, it also creates new workers to replace old workers which have exited after executing ``maxtasksperchild``. Override this method to implement your own pool. """ ids = {t.id for t in self._pool} need_ids = set(range(self._processes)) - ids for _, tid in zip(range(self._processes - len(self._pool)), need_ids): worker = Thread( target=self.worker, args=( self._inqueue.get, self._outqueue.put, self._initializer, self._initargs, self._maxtasksperchild, ), ) self._pool.append(worker) worker.daemon = True worker.id = tid worker.start() default_logger.debug(f"Added worker thread with id {tid}")
def repopulate_pool(self): """ Bring the number of pool processes up to the specified number, for use after reaping workers which have exited. """ # Get existing process ids: ids = {p.id for p in self._pool} need_ids = set(range(self._processes)) - ids for _, pid in zip(range(self._processes - len(self._pool)), need_ids): initargs = list(self._initargs) # Unpack context initargs[0] = initargs[0][pid] args = ( self._inqueue.get, self._outqueue.put, self._initializer, initargs, self._maxtasksperchild, ) if hasattr(self, "_wrap_exception"): args += (self._wrap_exception, ) # changed worker -> clean_worker worker = Process(target=self.worker, args=args) worker.id = pid self._pool.append(worker) worker.name = worker.name.replace("Process", "CtxPoolWorker") worker.daemon = self._is_daemon worker.start() default_logger.debug("Added worker")
def debug_with_process(message): if default_logger.level != DEBUG: return caller = getframeinfo(stack()[1][0]) default_logger.debug(f"Process [{get_cur_rank()}]: " f"<{caller.filename},L{caller.lineno}> " f"{message}")
def terminate(self): """ Immediately terminates the pool threads and workers, and also join them. """ default_logger.debug("Terminating pool") self._state = PoolStates.TERMINATE self._terminate() default_logger.debug("Terminating finished")
def terminate_workers(self): """ Force terminate all workers. Override this method to implement your own pool. """ for p in self._pool: if p.exitcode is None: p.terminate() default_logger.debug(f"Terminated worker {p.pid}")
def join_workers(self): """ Wait until all workers have terminated. Override this method to implement your own pool. """ for p in self._pool: if p.is_alive(): # worker has not yet exited p.join() default_logger.debug(f"Joined worker {p.pid}")
def join(self): """ Wait for handler threads and workers to join. """ default_logger.debug("Joining pool") if self._state == PoolStates.RUN: raise ValueError("Pool is still running") self._worker_handler.join() self._result_handler.join() self.join_workers() default_logger.debug("Joining finished")
def close(self): """ Softly closing the pool, handler threads, and then shutdown workers by sending signals. The pool will be closed after all job is finished and all results returned. Remember to call ``join()`` to wait for full shutdown. """ default_logger.debug("Closing pool") if self._state == PoolStates.RUN: self._state = PoolStates.CLOSE
def _handle_workers(pool: "BasePool"): """ Worker handler. Keep maintaining workers until the cache gets drained, unless the pool is terminated. """ while pool._state == PoolStates.RUN or ( pool._cache and pool._state != PoolStates.TERMINATE): pool.maintain_pool() time.sleep(0.1) for _ in pool._pool: # send stop signals to workers pool.pool_inqueue_put(None) default_logger.debug("Worker handler exiting")
def worker( get, put, initializer: Callable = None, initargs: Tuple = (), maxtasks: int = None, ): """ The default worker function executed by worker processes. Override this method to implement your own pool. Args: get: A function of form ``get() -> Any`` used to get tasks. put: A function of form ``put(obj: Any)`` used to put results. initializer: An initializer function to init global environment in worker processes. initargs: Initializer arguments. maxtasks: Maximum number of tasks a worker needs to run before it exits. """ if (maxtasks is not None) and not (isinstance(maxtasks, int) and maxtasks >= 1): raise AssertionError(f"Maxtasks {maxtasks:!r} is not valid") if initializer is not None: initializer(*initargs) completed = 0 while maxtasks is None or (maxtasks and completed < maxtasks): task = get() if task is None: default_logger.debug("Worker got sentinel -- exiting") break # Job index is the index of the submitted batch of tasks. # Chunk index is the index of the chunk got by the worker. job_idx, chunk_idx, func, args, kwds = task try: result = (True, func(*args, **kwds)) except Exception as e: result = (False, ExceptionWithTraceback(e)) put((job_idx, chunk_idx, result)) completed += 1 default_logger.debug(f"Worker exiting after {completed} tasks")
def maintain_pool(self): """ Watch workers for exceptions and raise them and then terminate the pool, Clean up any retired workers reaching max task number, and start replacements for them. Override this method to implement your own pool. """ for i in reversed(range(len(self._pool))): worker = self._pool[i] if worker.exception is not None: default_logger.critical(worker.exception, exc_info=True) if not worker.is_alive(): # worker exited default_logger.debug(f"Cleaning up worker with id {worker.id}") worker.join() del self._pool[i] self.repopulate_pool()
def _handle_results(pool: "BasePool"): while pool._state == PoolStates.RUN or ( pool._cache and pool._state != PoolStates.TERMINATE): try: result = pool.pool_outqueue_get(0.1) except (OSError, EOFError) as e: default_logger.debug( "Result handler got EOFError/OSError -- exiting") default_logger.critical(e, exc_info=True) return except TimeoutError: continue job_idx, chunk_idx, obj = result try: pool._cache[job_idx].set(chunk_idx, obj) except KeyError: pass default_logger.debug("Result handler exiting")
def repopulate_pool(self): # DOC INHERITED # for type hinting self._inqueue = self._inqueue # type: MultiP2PQueue self._outqueue = self._outqueue # type: MultiP2PQueue for i in range(self._processes - len(self._pool)): w = Process( target=self.worker, args=( self._inqueue.get_sub_queue(i).get, self._outqueue.get_sub_queue(i).put, self._initializer, self._initargs, self._maxtasksperchild, ), ctx=self._ctx, ) self._pool.append(w) w.name = w.name.replace("Process", "P2PPoolWorker") w.daemon = True w.start() default_logger.debug(f"Added worker {w.name}")
def _finalize_pool(cls, pool, worker_handler, result_handler): """ Pool finalizer callback use by the Finalizer to clean up things using weakref. """ # this is guaranteed to only be called once default_logger.debug("Finalizing pool") pool._state = PoolStates.TERMINATE # We must wait for the worker handler to exit before terminating # workers because we don't want workers to be restarted behind our back. default_logger.debug("Joining worker handler") if threading.current_thread() is not worker_handler: worker_handler.join() default_logger.debug("Joining result handler") if threading.current_thread() is not result_handler: result_handler.join() # Terminate workers which haven't already finished. default_logger.debug("Terminating workers") pool.terminate_workers() default_logger.debug("Joining pool workers") pool.join_workers()
def join_workers(self): for idx, worker in enumerate(self._pool): if worker.is_alive(): # worker has not yet exited default_logger.debug(f"Cleaning up worker with id {worker.id}") worker.join()