def benchmark(workers, memory, loopcount, matn): iterable = [(loopcount, matn) for i in range(workers)] fexec = FunctionExecutor(runtime_memory=memory) start_time = time.time() worker_futures = fexec.map(compute_flops, iterable) results = fexec.get_result() end_time = time.time() worker_stats = [f.stats for f in worker_futures] total_time = end_time - start_time print("Total time:", round(total_time, 3)) est_flops = workers * 2 * loopcount * matn**3 print('Estimated GFLOPS:', round(est_flops / 1e9 / total_time, 4)) res = { 'start_time': start_time, 'total_time': total_time, 'est_flops': est_flops, 'worker_stats': worker_stats, 'results': results } return res
def read(backend, bucket_name, number, keylist_raw, read_times): blocksize = 1024 * 1024 def read_object(key_name, storage): m = hashlib.md5() bytes_read = 0 print(key_name) start_time = time.time() for unused in range(read_times): fileobj = storage.get_object(bucket_name, key_name, stream=True) try: buf = fileobj.read(blocksize) while len(buf) > 0: bytes_read += len(buf) #if bytes_read % (blocksize *10) == 0: # mb_rate = bytes_read/(time.time()-t1)/1e6 # print('POS:'+str(bytes_read)+' MB Rate: '+ str(mb_rate)) m.update(buf) buf = fileobj.read(blocksize) except Exception as e: print(e) pass end_time = time.time() mb_rate = bytes_read / (end_time - start_time) / 1e6 print('MB Rate: ' + str(mb_rate)) return { 'start_time': start_time, 'end_time': end_time, 'mb_rate': mb_rate, 'bytes_read': bytes_read } if number == 0: keynames = keylist_raw else: keynames = [keylist_raw[i % len(keylist_raw)] for i in range(number)] fexec = FunctionExecutor(backend=backend, runtime_memory=1024) start_time = time.time() worker_futures = fexec.map(read_object, keynames) results = fexec.get_result() end_time = time.time() total_time = end_time - start_time worker_stats = [f.stats for f in worker_futures] res = { 'start_time': start_time, 'total_time': total_time, 'worker_stats': worker_stats, 'results': results } return res
class FuturesList(list): def _create_executor(self): if not self.executor: from lithops import FunctionExecutor self.executor = FunctionExecutor(config=self.config) def _extend_futures(self, fs): for fut in self: fut._produce_output = False if not hasattr(self, 'alt_list'): self.alt_list = [] self.alt_list.extend(self) self.alt_list.extend(fs) self.clear() self.extend(fs) def map(self, map_function, sync=False, **kwargs): self._create_executor() if sync: self.executor.wait(self) fs = self.executor.map(map_function, self, **kwargs) self._extend_futures(fs) return self def map_reduce(self, map_function, reduce_function, sync=False, **kwargs): self._create_executor() if sync: self.executor.wait(self) fs = self.executor.map_reduce(map_function, self, reduce_function, **kwargs) self._extend_futures(fs) return self def wait(self, **kwargs): self._create_executor() fs_tt = self.alt_list if hasattr(self, 'alt_list') else self return self.executor.wait(fs_tt, **kwargs) def get_result(self, **kwargs): self._create_executor() fs_tt = self.alt_list if hasattr(self, 'alt_list') else self return self.executor.get_result(fs_tt, **kwargs) def __reduce__(self): self.executor = None return super().__reduce__()
def write(backend, bucket_name, mb_per_file, number, key_prefix): def write_object(key_name, storage): bytes_n = mb_per_file * 1024**2 d = RandomDataGenerator(bytes_n) print(key_name) start_time = time.time() storage.put_object(bucket_name, key_name, d) end_time = time.time() mb_rate = bytes_n / (end_time - start_time) / 1e6 print('MB Rate: ' + str(mb_rate)) return { 'start_time': start_time, 'end_time': end_time, 'mb_rate': mb_rate } # create list of random keys keynames = [ key_prefix + str(uuid.uuid4().hex.upper()) for unused in range(number) ] fexec = FunctionExecutor(backend=backend, runtime_memory=1024) start_time = time.time() worker_futures = fexec.map(write_object, keynames) results = fexec.get_result() end_time = time.time() worker_stats = [f.stats for f in worker_futures] total_time = end_time - start_time res = { 'start_time': start_time, 'total_time': total_time, 'worker_stats': worker_stats, 'bucket_name': bucket_name, 'keynames': keynames, 'results': results } return res
class Pool(object): """ Class which supports an async version of applying functions to arguments. """ _wrap_exception = True def Process(self, *args, **kwds): return self._ctx.Process(*args, **kwds) def __init__(self, processes=None, initializer=None, initargs=(), maxtasksperchild=None, context=None): self._ctx = context or get_context() # self._setup_queues() self._taskqueue = queue.Queue() self._cache = {} self._state = RUN self._maxtasksperchild = maxtasksperchild self._initializer = initializer self._initargs = initargs if processes is not None and processes < 1: raise ValueError("Number of processes must be at least 1") if processes is not None: if self._initargs: self._executor = FunctionExecutor(workers=processes, **self._initargs) else: self._executor = FunctionExecutor(workers=processes) self._processes = processes else: if self._initargs: self._executor = FunctionExecutor(**self._initargs) else: self._executor = FunctionExecutor() self._processes = self._executor.invoker.workers if initializer is not None and not callable(initializer): raise TypeError('initializer must be a callable') self._pool = [] def _join_exited_workers(self): """ Cleanup after any worker processes which have exited due to reaching their specified lifetime. Returns True if any workers were cleaned up. """ cleaned = False for i in reversed(range(len(self._pool))): worker = self._pool[i] if worker.exitcode is not None: # worker exited util.debug('cleaning up worker %d' % i) worker.join() cleaned = True del self._pool[i] return cleaned def _repopulate_pool(self): """ Bring the number of pool processes up to the specified number, for use after reaping workers which have exited. """ for i in range(self._processes - len(self._pool)): w = self.Process(target=worker, args=(self._inqueue, self._outqueue, self._initializer, self._initargs, self._maxtasksperchild, self._wrap_exception) ) self._pool.append(w) w.name = w.name.replace('Process', 'PoolWorker') w.daemon = True w.start() util.debug('added worker') def _maintain_pool(self): """ Clean up any exited workers and start replacements for them. """ if self._join_exited_workers(): self._repopulate_pool() def _setup_queues(self): self._inqueue = self._ctx.SimpleQueue() self._outqueue = self._ctx.SimpleQueue() self._quick_put = self._inqueue._writer.send self._quick_get = self._outqueue._reader.recv def apply(self, func, args=(), kwds={}): """ Equivalent of `func(*args, **kwds)`. """ assert self._state == RUN if kwds and not args: args = {} return self.apply_async(func, args, kwds).get() def map(self, func, iterable, chunksize=None): """ Apply `func` to each element in `iterable`, collecting the results in a list that is returned. """ return self._map_async(func, iterable, mapstar, chunksize).get() def starmap(self, func, iterable, chunksize=None): """ Like `map()` method but the elements of the `iterable` are expected to be iterables as well and will be unpacked as arguments. Hence `func` and (a, b) becomes func(a, b). """ return self._map_async(func, iterable, starmapstar, chunksize).get() def starmap_async(self, func, iterable, chunksize=None, callback=None, error_callback=None): """ Asynchronous version of `starmap()` method. """ return self._map_async(func, iterable, starmapstar, chunksize, callback, error_callback) def _guarded_task_generation(self, result_job, func, iterable): """ Provides a generator of tasks for imap and imap_unordered with appropriate handling for iterables which throw exceptions during iteration. """ try: i = -1 for i, x in enumerate(iterable): yield (result_job, i, func, (x,), {}) except Exception as e: yield (result_job, i + 1, _helper_reraises_exception, (e,), {}) def imap(self, func, iterable, chunksize=1): """ Equivalent of `map()` -- can be MUCH slower than `Pool.map()`. """ if self._state != RUN: raise ValueError("Pool not running") if chunksize == 1: result = IMapIterator(self._cache) self._taskqueue.put( ( self._guarded_task_generation(result._job, func, iterable), result._set_length )) return result else: assert chunksize > 1 task_batches = Pool._get_tasks(func, iterable, chunksize) result = IMapIterator(self._cache) self._taskqueue.put( ( self._guarded_task_generation(result._job, mapstar, task_batches), result._set_length )) return (item for chunk in result for item in chunk) def imap_unordered(self, func, iterable, chunksize=1): """ Like `imap()` method but ordering of results is arbitrary. """ if self._state != RUN: raise ValueError("Pool not running") if chunksize == 1: result = IMapUnorderedIterator(self._cache) self._taskqueue.put( ( self._guarded_task_generation(result._job, func, iterable), result._set_length )) return result else: assert chunksize > 1 task_batches = Pool._get_tasks(func, iterable, chunksize) result = IMapUnorderedIterator(self._cache) self._taskqueue.put( ( self._guarded_task_generation(result._job, mapstar, task_batches), result._set_length )) return (item for chunk in result for item in chunk) def apply_async(self, func, args=(), kwds={}, callback=None, error_callback=None): """ Asynchronous version of `apply()` method. """ if self._state != RUN: raise ValueError("Pool not running") futures = self._executor.call_async(func, data=args) result = ApplyResult(self._executor, [futures], callback, error_callback) return result def map_async(self, func, iterable, chunksize=None, callback=None, error_callback=None): """ Asynchronous version of `map()` method. """ return self._map_async(func, iterable, mapstar, chunksize, callback, error_callback) def _map_async(self, func, iterable, mapper, chunksize=None, callback=None, error_callback=None): """ Helper function to implement map, starmap and their async counterparts. """ if self._state != RUN: raise ValueError("Pool not running") if not hasattr(iterable, '__len__'): iterable = list(iterable) futures = self._executor.map(func, iterable) result = MapResult(self._executor, futures, callback, error_callback) return result @staticmethod def _handle_workers(pool): thread = threading.current_thread() # Keep maintaining workers until the cache gets drained, unless the pool # is terminated. while thread._state == RUN or (pool._cache and thread._state != TERMINATE): pool._maintain_pool() time.sleep(0.1) # send sentinel to stop workers pool._taskqueue.put(None) util.debug('worker handler exiting') @staticmethod def _handle_tasks(taskqueue, put, outqueue, pool, cache): thread = threading.current_thread() for taskseq, set_length in iter(taskqueue.get, None): task = None try: # iterating taskseq cannot fail for task in taskseq: if thread._state: util.debug('task handler found thread._state != RUN') break try: put(task) except Exception as e: job, idx = task[:2] try: cache[job]._set(idx, (False, e)) except KeyError: pass else: if set_length: util.debug('doing set_length()') idx = task[1] if task else -1 set_length(idx + 1) continue break finally: task = taskseq = job = None else: util.debug('task handler got sentinel') try: # tell result handler to finish when cache is empty util.debug('task handler sending sentinel to result handler') outqueue.put(None) # tell workers there is no more work util.debug('task handler sending sentinel to workers') for p in pool: put(None) except OSError: util.debug('task handler got OSError when sending sentinels') util.debug('task handler exiting') @staticmethod def _handle_results(outqueue, get, cache): thread = threading.current_thread() while 1: try: task = get() except (OSError, EOFError): util.debug('result handler got EOFError/OSError -- exiting') return if thread._state: assert thread._state == TERMINATE util.debug('result handler found thread._state=TERMINATE') break if task is None: util.debug('result handler got sentinel') break job, i, obj = task try: cache[job]._set(i, obj) except KeyError: pass task = job = obj = None while cache and thread._state != TERMINATE: try: task = get() except (OSError, EOFError): util.debug('result handler got EOFError/OSError -- exiting') return if task is None: util.debug('result handler ignoring extra sentinel') continue job, i, obj = task try: cache[job]._set(i, obj) except KeyError: pass task = job = obj = None if hasattr(outqueue, '_reader'): util.debug('ensuring that outqueue is not full') # If we don't make room available in outqueue then # attempts to add the sentinel (None) to outqueue may # block. There is guaranteed to be no more than 2 sentinels. try: for i in range(10): if not outqueue._reader.poll(): break get() except (OSError, EOFError): pass util.debug('result handler exiting: len(cache)=%s, thread._state=%s', len(cache), thread._state) @staticmethod def _get_tasks(func, it, size): it = iter(it) while 1: x = tuple(itertools.islice(it, size)) if not x: return yield (func, x) def __reduce__(self): raise NotImplementedError( 'pool objects cannot be passed between processes or pickled' ) def close(self): util.debug('closing pool') if self._state == RUN: self._state = CLOSE # self._worker_handler._state = CLOSE def terminate(self): util.debug('terminating pool') self._state = TERMINATE # self._worker_handler._state = TERMINATE # self._terminate() self._executor.clean() def join(self): util.debug('joining pool') assert self._state in (CLOSE, TERMINATE) # self._worker_handler.join() # self._task_handler.join() # self._result_handler.join() # for p in self._pool: # p.join() @staticmethod def _help_stuff_finish(inqueue, task_handler, size): # task_handler may be blocked trying to put items on inqueue util.debug('removing tasks from inqueue until task handler finished') while task_handler.is_alive() and inqueue._reader.poll(): inqueue._reader.recv() time.sleep(0) @classmethod def _terminate_pool(cls, taskqueue, inqueue, outqueue, pool, worker_handler, task_handler, result_handler, cache): # this is guaranteed to only be called once util.debug('finalizing pool') worker_handler._state = TERMINATE task_handler._state = TERMINATE util.debug('helping task handler/workers to finish') cls._help_stuff_finish(inqueue, task_handler, len(pool)) assert result_handler.is_alive() or len(cache) == 0 result_handler._state = TERMINATE outqueue.put(None) # sentinel # We must wait for the worker handler to exit before terminating # workers because we don't want workers to be restarted behind our back. util.debug('joining worker handler') if threading.current_thread() is not worker_handler: worker_handler.join() # Terminate workers which haven't already finished. if pool and hasattr(pool[0], 'terminate'): util.debug('terminating workers') for p in pool: if p.exitcode is None: p.terminate() util.debug('joining task handler') if threading.current_thread() is not task_handler: task_handler.join() util.debug('joining result handler') if threading.current_thread() is not result_handler: result_handler.join() if pool and hasattr(pool[0], 'terminate'): util.debug('joining pool workers') for p in pool: if p.is_alive(): # worker has not yet exited util.debug('cleaning up worker %d' % p.pid) p.join() def __enter__(self): return self def __exit__(self, exc_type, exc_val, exc_tb): self.terminate()
class Pool(object): """ Class which supports an async version of applying functions to arguments. """ _wrap_exception = True Process = CloudProcess def __init__(self, processes=None, initializer=None, initargs=None, maxtasksperchild=None, context=None): if initargs is None: initargs = () self._taskqueue = queue.Queue() self._cache = {} self._state = RUN self._maxtasksperchild = maxtasksperchild self._initializer = initializer self._initargs = initargs self._remote_logger = None if processes is not None and processes < 1: raise ValueError("Number of processes must be at least 1") lithops_conf = mp_config.get_parameter(mp_config.LITHOPS_CONFIG) if processes is not None: self._processes = processes self._executor = FunctionExecutor(workers=processes, **lithops_conf) else: self._executor = FunctionExecutor(**lithops_conf) self._processes = self._executor.invoker.workers if initializer is not None and not callable(initializer): raise TypeError('initializer must be a callable') def apply(self, func, args=(), kwds={}): """ Equivalent of `func(*args, **kwds)`. """ assert self._state == RUN if kwds and not args: args = {} return self.apply_async(func, args, kwds).get() def map(self, func, iterable, chunksize=None): """ Apply `func` to each element in `iterable`, collecting the results in a list that is returned. """ return self._map_async(func, iterable, chunksize).get() def starmap(self, func, iterable, chunksize=None): """ Like `map()` method but the elements of the `iterable` are expected to be iterables as well and will be unpacked as arguments. Hence `func` and (a, b) becomes func(a, b). """ return self._map_async(func, iterable, chunksize=chunksize).get() def starmap_async(self, func, iterable, chunksize=None, callback=None, error_callback=None): """ Asynchronous version of `starmap()` method. """ return self._map_async(func, iterable, chunksize=chunksize, callback=callback, error_callback=error_callback) def imap(self, func, iterable, chunksize=1): """ Equivalent of `map()` -- can be MUCH slower than `Pool.map()`. """ res = self.map(func, iterable, chunksize=chunksize) return IMapIterator(res) def imap_unordered(self, func, iterable, chunksize=1): """ Like `imap()` method but ordering of results is arbitrary. """ res = self.map(func, iterable, chunksize=chunksize) return IMapIterator(res) def apply_async(self, func, args=(), kwds={}, callback=None, error_callback=None): """ Asynchronous version of `apply()` method. """ if self._state != RUN: raise ValueError("Pool not running") cloud_worker = CloudWorker(func=func, initializer=self._initializer, initargs=self._initargs) if mp_config.get_parameter(mp_config.STREAM_STDOUT): stream = self._executor.executor_id util.debug('Log streaming enabled, stream name: {}'.format(stream)) self._remote_logger = util.RemoteLoggingFeed(stream) self._remote_logger.start() cloud_worker.log_stream = stream extra_env = mp_config.get_parameter(mp_config.ENV_VARS) futures = self._executor.call_async(cloud_worker, data={ 'args': args, 'kwargs': kwds }, extra_env=extra_env) result = ApplyResult(self._executor, [futures], callback, error_callback) return result def map_async(self, func, iterable, chunksize=None, callback=None, error_callback=None): """ Asynchronous version of `map()` method. """ return self._map_async(func, iterable, chunksize, callback, error_callback) def _map_async(self, func, iterable, chunksize=None, callback=None, error_callback=None): """ Helper function to implement map, starmap and their async counterparts. """ if self._state != RUN: raise ValueError("Pool not running") if not hasattr(iterable, '__len__'): iterable = list(iterable) cloud_worker = CloudWorker(func=func, initializer=self._initializer, initargs=self._initargs) if isinstance(iterable[0], dict): fmt_args = [{'args': (), 'kwargs': kwargs} for kwargs in iterable] elif isinstance(iterable[0], tuple) or isinstance(iterable[0], list): fmt_args = [{'args': args, 'kwargs': {}} for args in iterable] else: fmt_args = [{'args': (args, ), 'kwargs': {}} for args in iterable] if mp_config.get_parameter(mp_config.STREAM_STDOUT): stream = self._executor.executor_id util.debug('Log streaming enabled, stream name: {}'.format(stream)) self._remote_logger = util.RemoteLoggingFeed(stream) self._remote_logger.start() cloud_worker.log_stream = stream extra_env = mp_config.get_parameter(mp_config.ENV_VARS) futures = self._executor.map(cloud_worker, fmt_args, extra_env=extra_env) result = MapResult(self._executor, futures, callback, error_callback) return result def __reduce__(self): raise NotImplementedError( 'pool objects cannot be passed between processes or pickled') def close(self): util.debug('closing pool') if self._state == RUN: self._state = CLOSE def terminate(self): util.debug('terminating pool') self._state = TERMINATE if self._remote_logger: self._remote_logger.stop() self._remote_logger = None def join(self): util.debug('joining pool') assert self._state in (CLOSE, TERMINATE) def __enter__(self): return self def __exit__(self, exc_type, exc_val, exc_tb): self.terminate()