def _wait_futures(self, data): logger.info('Reduce function: waiting for map results') fut_list = data['results'] wait_storage(fut_list, self.internal_storage, download_results=True) results = [f.result() for f in fut_list if f.done and not f.futures] fut_list.clear() data['results'] = results
def reduce_function_wrapper(fut_list, internal_storage, ibm_cos): logger.info('Waiting for results') if 'SHOW_MEMORY_USAGE' in os.environ: show_memory = eval(os.environ['SHOW_MEMORY_USAGE']) else: show_memory = False # Wait for all results wait_storage(fut_list, internal_storage, download_results=True) results = [f.result() for f in fut_list if f.done and not f.futures] fut_list.clear() reduce_func_args = {'results': results} if show_memory: logger.debug("Memory usage after getting the results: {}".format( utils.get_current_memory_usage())) # Run reduce function func_sig = inspect.signature(reduce_function) if 'ibm_cos' in func_sig.parameters: reduce_func_args['ibm_cos'] = ibm_cos if 'internal_storage' in func_sig.parameters: reduce_func_args['internal_storage'] = internal_storage return reduce_function(**reduce_func_args)
def wait(self, fs=None, throw_except=True, return_when=ALL_COMPLETED, download_results=False, timeout=EXECUTION_TIMEOUT, THREADPOOL_SIZE=128, WAIT_DUR_SEC=1): """ Wait for the Future instances (possibly created by different Executor instances) given by fs to complete. Returns a named 2-tuple of sets. The first set, named done, contains the futures that completed (finished or cancelled futures) before the wait completed. The second set, named not_done, contains the futures that did not complete (pending or running futures). timeout can be used to control the maximum number of seconds to wait before returning. :param fs: Futures list. Default None :param throw_except: Re-raise exception if call raised. Default True. :param return_when: One of `ALL_COMPLETED`, `ANY_COMPLETED`, `ALWAYS` :param download_results: Download results. Default false (Only get statuses) :param timeout: Timeout of waiting for results. :param THREADPOOL_SIZE: Number of threads to use. Default 64 :param WAIT_DUR_SEC: Time interval between each check. :return: `(fs_done, fs_notdone)` where `fs_done` is a list of futures that have completed and `fs_notdone` is a list of futures that have not completed. :rtype: 2-tuple of list """ if not fs: fs = [] for job in self.jobs: if not download_results and self.jobs[job][ 'state'] == JobState.Running: fs.extend(self.jobs[job]['futures']) self.jobs[job]['state'] = JobState.Ready elif download_results and self.jobs[job][ 'state'] != JobState.Done: fs.extend(self.jobs[job]['futures']) self.jobs[job]['state'] = JobState.Done if type(fs) != list: futures = [fs] else: futures = fs if not futures: raise Exception( 'You must run the call_async(), map() or map_reduce(), or provide' ' a list of futures before calling the monitor()/get_result() method' ) if download_results: msg = 'ExecutorID {} - Getting results...'.format(self.executor_id) else: msg = 'ExecutorID {} - Waiting for functions to complete...'.format( self.executor_id) logger.info(msg) if not self.log_level and self._state == FunctionExecutor.State.Running: print(msg) if is_unix_system(): signal.signal(signal.SIGALRM, timeout_handler) signal.alarm(timeout) pbar = None if not self.is_remote_cluster and self._state == FunctionExecutor.State.Running \ and not self.log_level: from tqdm.auto import tqdm if is_notebook(): pbar = tqdm(bar_format='{n}/|/ {n_fmt}/{total_fmt}', total=len(futures)) # ncols=800 else: print() pbar = tqdm(bar_format=' {l_bar}{bar}| {n_fmt}/{total_fmt} ', total=len(futures), disable=False) try: if self.rabbitmq_monitor: logger.info('Using RabbitMQ to monitor function activations') wait_rabbitmq(futures, self.internal_storage, rabbit_amqp_url=self.rabbit_amqp_url, download_results=download_results, throw_except=throw_except, pbar=pbar, return_when=return_when, THREADPOOL_SIZE=THREADPOOL_SIZE) else: wait_storage(futures, self.internal_storage, download_results=download_results, throw_except=throw_except, return_when=return_when, pbar=pbar, THREADPOOL_SIZE=THREADPOOL_SIZE, WAIT_DUR_SEC=WAIT_DUR_SEC) except FunctionException as e: if is_unix_system(): signal.alarm(0) if pbar: pbar.close() logger.info(e.msg) if not self.log_level: if not is_notebook(): print() print(e.msg) if e.exc_msg: logger.info('Exception: ' + e.exc_msg) if not self.log_level: print('--> Exception: ' + e.exc_msg) else: print() traceback.print_exception(*e.exception) sys.exit() except TimeoutError: if download_results: not_dones_call_ids = [(f.job_id, f.call_id) for f in futures if not f.done] else: not_dones_call_ids = [(f.job_id, f.call_id) for f in futures if not f.ready and not f.done] msg = ( 'ExecutorID {} - Raised timeout of {} seconds waiting for results - Total Activations not done: {}' .format(self.executor_id, timeout, len(not_dones_call_ids))) self._state = FunctionExecutor.State.Error except KeyboardInterrupt: if download_results: not_dones_call_ids = [(f.job_id, f.call_id) for f in futures if not f.done] else: not_dones_call_ids = [(f.job_id, f.call_id) for f in futures if not f.ready and not f.done] msg = ('ExecutorID {} - Cancelled - Total Activations not done: {}' .format(self.executor_id, len(not_dones_call_ids))) self._state = FunctionExecutor.State.Error except Exception as e: if not self.is_remote_cluster: self.clean() raise e finally: if is_unix_system(): signal.alarm(0) if pbar: pbar.close() if not is_notebook(): print() if self._state == FunctionExecutor.State.Error: logger.debug(msg) if not self.log_level: print(msg) if download_results and self.data_cleaner and not self.is_remote_cluster: self.clean() if download_results: fs_done = [f for f in futures if f.done] fs_notdone = [f for f in futures if not f.done] self._state = FunctionExecutor.State.Done else: fs_done = [f for f in futures if f.ready or f.done] fs_notdone = [f for f in futures if not f.ready and not f.done] self._state = FunctionExecutor.State.Ready return fs_done, fs_notdone
def wait(self, fs=None, throw_except=True, return_when=ALL_COMPLETED, download_results=False, timeout=None, THREADPOOL_SIZE=128, WAIT_DUR_SEC=1): """ Wait for the Future instances (possibly created by different Executor instances) given by fs to complete. Returns a named 2-tuple of sets. The first set, named done, contains the futures that completed (finished or cancelled futures) before the wait completed. The second set, named not_done, contains the futures that did not complete (pending or running futures). timeout can be used to control the maximum number of seconds to wait before returning. :param fs: Futures list. Default None :param throw_except: Re-raise exception if call raised. Default True. :param return_when: One of `ALL_COMPLETED`, `ANY_COMPLETED`, `ALWAYS` :param download_results: Download results. Default false (Only get statuses) :param timeout: Timeout of waiting for results. :param THREADPOOL_SIZE: Number of threads to use. Default 64 :param WAIT_DUR_SEC: Time interval between each check. :return: `(fs_done, fs_notdone)` where `fs_done` is a list of futures that have completed and `fs_notdone` is a list of futures that have not completed. :rtype: 2-tuple of list """ futures = self.futures if not fs else fs if type(futures) != list: futures = [futures] if not futures: raise Exception('You must run the call_async(), map() or map_reduce(), or provide' ' a list of futures before calling the wait()/get_result() method') if download_results: msg = 'ExecutorID {} - Getting results...'.format(self.executor_id) futures = [f for f in futures if not f.done] fs_done = [f for f in futures if f.done] else: msg = 'ExecutorID {} - Waiting for functions to complete...'.format(self.executor_id) futures = [f for f in futures if not f.ready and not f.done] fs_done = [f for f in futures if f.ready or f.done] if not futures: return fs_done, [] print(msg) if not self.log_level else logger.info(msg) if is_unix_system() and timeout is not None: logger.debug('Setting waiting timeout to {} seconds'.format(timeout)) error_msg = 'Timeout of {} seconds exceeded waiting for function activations to finish'.format(timeout) signal.signal(signal.SIGALRM, partial(timeout_handler, error_msg)) signal.alarm(timeout) pbar = None error = False if not self.is_pywren_function and not self.log_level: from tqdm.auto import tqdm if is_notebook(): pbar = tqdm(bar_format='{n}/|/ {n_fmt}/{total_fmt}', total=len(futures)) # ncols=800 else: print() pbar = tqdm(bar_format=' {l_bar}{bar}| {n_fmt}/{total_fmt} ', total=len(futures), disable=False) try: if self.rabbitmq_monitor: logger.info('Using RabbitMQ to monitor function activations') wait_rabbitmq(futures, self.internal_storage, rabbit_amqp_url=self.rabbit_amqp_url, download_results=download_results, throw_except=throw_except, pbar=pbar, return_when=return_when, THREADPOOL_SIZE=THREADPOOL_SIZE) else: wait_storage(futures, self.internal_storage, download_results=download_results, throw_except=throw_except, return_when=return_when, pbar=pbar, THREADPOOL_SIZE=THREADPOOL_SIZE, WAIT_DUR_SEC=WAIT_DUR_SEC) except KeyboardInterrupt: if download_results: not_dones_call_ids = [(f.job_id, f.call_id) for f in futures if not f.done] else: not_dones_call_ids = [(f.job_id, f.call_id) for f in futures if not f.ready and not f.done] msg = ('ExecutorID {} - Cancelled - Total Activations not done: {}' .format(self.executor_id, len(not_dones_call_ids))) if pbar: pbar.close() print() print(msg) if not self.log_level else logger.info(msg) error = True except Exception as e: error = True raise e finally: self.invoker.stop() if is_unix_system(): signal.alarm(0) if pbar and not pbar.disable: pbar.close() if not is_notebook(): print() if self.data_cleaner and not self.is_pywren_function: self.clean(cloudobjects=False, force=False, log=False) if not fs and error and is_notebook(): del self.futures[len(self.futures)-len(futures):] if download_results: fs_done = [f for f in futures if f.done] fs_notdone = [f for f in futures if not f.done] else: fs_done = [f for f in futures if f.ready or f.done] fs_notdone = [f for f in futures if not f.ready and not f.done] return fs_done, fs_notdone