def attach(config, backend, start, debug): """Create or attach to a SSH session on Lithops master VM""" if config: config = load_yaml_config(config) log_level = logging.INFO if not debug else logging.DEBUG setup_lithops_logger(log_level) config_ow = set_config_ow(backend) config = default_config(config, config_ow) if config['lithops']['mode'] != STANDALONE: raise Exception( 'lithops attach method is only available for standalone backends') compute_config = extract_standalone_config(config) compute_handler = StandaloneHandler(compute_config) compute_handler.init() if start: compute_handler.backend.master.start() master_ip = compute_handler.backend.master.get_public_ip() user = compute_handler.backend.master.ssh_credentials['username'] key_file = compute_handler.backend.master.ssh_credentials[ 'key_filename'] or '~/.ssh/id_rsa' key_file = os.path.abspath(os.path.expanduser(key_file)) if not os.path.exists(key_file): raise Exception(f'Private key file {key_file} does not exists') print(f'Got master VM public IP address: {master_ip}') print(f'Loading ssh private key from: {key_file}') print('Creating SSH Connection to lithops master VM') cmd = ( 'ssh -o "UserKnownHostsFile=/dev/null" -o "StrictHostKeyChecking=no" ' f'-i {key_file} {user}@{master_ip}') compute_handler.backend.master.wait_ready() sp.run(shlex.split(cmd))
class FunctionExecutor: """ Executor abstract class that contains the common logic for the Localhost, Serverless and Standalone executors """ def __init__(self, type=None, mode=None, config=None, backend=None, storage=None, runtime=None, runtime_memory=None, rabbitmq_monitor=None, workers=None, remote_invoker=None, log_level=None): mode = mode or type if mode is None: config = default_config(copy.deepcopy(config)) mode = config['lithops']['mode'] if mode not in [LOCALHOST, SERVERLESS, STANDALONE]: raise Exception("Function executor mode must be one of '{}', '{}' " "or '{}'".format(LOCALHOST, SERVERLESS, STANDALONE)) if log_level: setup_logger(log_level) if type is not None: logger.warning("'type' parameter is deprecated and it will be removed" "in future releases. Use 'mode' parameter instead") config_ow = {'lithops': {'mode': mode}, mode: {}} if runtime is not None: config_ow[mode]['runtime'] = runtime if backend is not None: config_ow[mode]['backend'] = backend if runtime_memory is not None: config_ow[mode]['runtime_memory'] = int(runtime_memory) if remote_invoker is not None: config_ow[mode]['remote_invoker'] = remote_invoker if storage is not None: config_ow['lithops']['storage'] = storage if workers is not None: config_ow['lithops']['workers'] = workers if rabbitmq_monitor is not None: config_ow['lithops']['rabbitmq_monitor'] = rabbitmq_monitor self.config = default_config(copy.deepcopy(config), config_ow) self.log_active = logger.getEffectiveLevel() != logging.WARNING self.is_lithops_worker = is_lithops_worker() self.executor_id = create_executor_id() self.data_cleaner = self.config['lithops'].get('data_cleaner', True) if self.data_cleaner and not self.is_lithops_worker: spawn_cleaner = int(self.executor_id.split('-')[1]) == 0 atexit.register(self.clean, spawn_cleaner=spawn_cleaner, clean_cloudobjects=False) self.rabbitmq_monitor = self.config['lithops'].get('rabbitmq_monitor', False) if self.rabbitmq_monitor: if 'rabbitmq' in self.config and 'amqp_url' in self.config['rabbitmq']: self.rabbit_amqp_url = self.config['rabbitmq'].get('amqp_url') else: raise Exception("You cannot use rabbitmq_mnonitor since " "'amqp_url' is not present in configuration") storage_config = extract_storage_config(self.config) self.internal_storage = InternalStorage(storage_config) self.storage = self.internal_storage.storage self.futures = [] self.cleaned_jobs = set() self.total_jobs = 0 self.last_call = None if mode == LOCALHOST: localhost_config = extract_localhost_config(self.config) self.compute_handler = LocalhostHandler(localhost_config) self.invoker = StandaloneInvoker(self.config, self.executor_id, self.internal_storage, self.compute_handler) elif mode == SERVERLESS: serverless_config = extract_serverless_config(self.config) self.compute_handler = ServerlessHandler(serverless_config, storage_config) self.invoker = ServerlessInvoker(self.config, self.executor_id, self.internal_storage, self.compute_handler) elif mode == STANDALONE: standalone_config = extract_standalone_config(self.config) self.compute_handler = StandaloneHandler(standalone_config) self.invoker = StandaloneInvoker(self.config, self.executor_id, self.internal_storage, self.compute_handler) logger.info('{} Executor created with ID: {}' .format(mode.capitalize(), self.executor_id)) def __enter__(self): return self def _create_job_id(self, call_type): job_id = str(self.total_jobs).zfill(3) self.total_jobs += 1 return '{}{}'.format(call_type, job_id) def call_async(self, func, data, extra_env=None, runtime_memory=None, timeout=None, include_modules=[], exclude_modules=[]): """ For running one function execution asynchronously :param func: the function to map over the data :param data: input data :param extra_env: Additional env variables for action environment :param runtime_memory: Memory to use to run the function :param timeout: Time that the functions have to complete their execution before raising a timeout :param include_modules: Explicitly pickle these dependencies :param exclude_modules: Explicitly keep these modules from pickled dependencies :return: future object. """ job_id = self._create_job_id('A') self.last_call = 'call_async' runtime_meta = self.invoker.select_runtime(job_id, runtime_memory) job = create_map_job(self.config, self.internal_storage, self.executor_id, job_id, map_function=func, iterdata=[data], runtime_meta=runtime_meta, runtime_memory=runtime_memory, extra_env=extra_env, include_modules=include_modules, exclude_modules=exclude_modules, execution_timeout=timeout) futures = self.invoker.run(job) self.futures.extend(futures) return futures[0] def map(self, map_function, map_iterdata, extra_args=None, extra_env=None, runtime_memory=None, chunk_size=None, chunk_n=None, timeout=None, invoke_pool_threads=500, include_modules=[], exclude_modules=[]): """ For running multiple function executions asynchronously :param map_function: the function to map over the data :param map_iterdata: An iterable of input data :param extra_args: Additional args to pass to the function activations :param extra_env: Additional env variables for action environment :param runtime_memory: Memory to use to run the function :param chunk_size: the size of the data chunks to split each object. 'None' for processing the whole file in one function activation. :param chunk_n: Number of chunks to split each object. 'None' for processing the whole file in one function activation :param remote_invocation: Enable or disable remote_invocation mechanism :param timeout: Time that the functions have to complete their execution before raising a timeout :param invoke_pool_threads: Number of threads to use to invoke :param include_modules: Explicitly pickle these dependencies :param exclude_modules: Explicitly keep these modules from pickled dependencies :return: A list with size `len(iterdata)` of futures. """ job_id = self._create_job_id('M') self.last_call = 'map' runtime_meta = self.invoker.select_runtime(job_id, runtime_memory) job = create_map_job(self.config, self.internal_storage, self.executor_id, job_id, map_function=map_function, iterdata=map_iterdata, runtime_meta=runtime_meta, runtime_memory=runtime_memory, extra_env=extra_env, include_modules=include_modules, exclude_modules=exclude_modules, execution_timeout=timeout, extra_args=extra_args, obj_chunk_size=chunk_size, obj_chunk_number=chunk_n, invoke_pool_threads=invoke_pool_threads) futures = self.invoker.run(job) self.futures.extend(futures) return futures def map_reduce(self, map_function, map_iterdata, reduce_function, extra_args=None, extra_env=None, map_runtime_memory=None, reduce_runtime_memory=None, chunk_size=None, chunk_n=None, timeout=None, invoke_pool_threads=500, reducer_one_per_object=False, reducer_wait_local=False, include_modules=[], exclude_modules=[]): """ Map the map_function over the data and apply the reduce_function across all futures. This method is executed all within CF. :param map_function: the function to map over the data :param map_iterdata: An iterable of input data :param reduce_function: the function to reduce over the futures :param extra_env: Additional environment variables for action environment. Default None. :param extra_args: Additional arguments to pass to function activation. Default None. :param map_runtime_memory: Memory to use to run the map function. Default None (loaded from config). :param reduce_runtime_memory: Memory to use to run the reduce function. Default None (loaded from config). :param chunk_size: the size of the data chunks to split each object. 'None' for processing the whole file in one function activation. :param chunk_n: Number of chunks to split each object. 'None' for processing the whole file in one function activation. :param remote_invocation: Enable or disable remote_invocation mechanism. Default 'False' :param timeout: Time that the functions have to complete their execution before raising a timeout. :param reducer_one_per_object: Set one reducer per object after running the partitioner :param reducer_wait_local: Wait for results locally :param invoke_pool_threads: Number of threads to use to invoke. :param include_modules: Explicitly pickle these dependencies. :param exclude_modules: Explicitly keep these modules from pickled dependencies. :return: A list with size `len(map_iterdata)` of futures. """ self.last_call = 'map_reduce' map_job_id = self._create_job_id('M') runtime_meta = self.invoker.select_runtime(map_job_id, map_runtime_memory) map_job = create_map_job(self.config, self.internal_storage, self.executor_id, map_job_id, map_function=map_function, iterdata=map_iterdata, runtime_meta=runtime_meta, runtime_memory=map_runtime_memory, extra_args=extra_args, extra_env=extra_env, obj_chunk_size=chunk_size, obj_chunk_number=chunk_n, include_modules=include_modules, exclude_modules=exclude_modules, execution_timeout=timeout, invoke_pool_threads=invoke_pool_threads) map_futures = self.invoker.run(map_job) self.futures.extend(map_futures) if reducer_wait_local: self.wait(fs=map_futures) reduce_job_id = map_job_id.replace('M', 'R') runtime_meta = self.invoker.select_runtime(reduce_job_id, reduce_runtime_memory) reduce_job = create_reduce_job(self.config, self.internal_storage, self.executor_id, reduce_job_id, reduce_function, map_job, map_futures, runtime_meta=runtime_meta, runtime_memory=reduce_runtime_memory, reducer_one_per_object=reducer_one_per_object, extra_env=extra_env, include_modules=include_modules, exclude_modules=exclude_modules) reduce_futures = self.invoker.run(reduce_job) self.futures.extend(reduce_futures) for f in map_futures: f._produce_output = False return map_futures + reduce_futures def wait(self, fs=None, throw_except=True, return_when=ALL_COMPLETED, download_results=False, timeout=None, THREADPOOL_SIZE=128, WAIT_DUR_SEC=1): """ Wait for the Future instances (possibly created by different Executor instances) given by fs to complete. Returns a named 2-tuple of sets. The first set, named done, contains the futures that completed (finished or cancelled futures) before the wait completed. The second set, named not_done, contains the futures that did not complete (pending or running futures). timeout can be used to control the maximum number of seconds to wait before returning. :param fs: Futures list. Default None :param throw_except: Re-raise exception if call raised. Default True. :param return_when: One of `ALL_COMPLETED`, `ANY_COMPLETED`, `ALWAYS` :param download_results: Download results. Default false (Only get statuses) :param timeout: Timeout of waiting for results. :param THREADPOOL_SIZE: Number of threads to use. Default 64 :param WAIT_DUR_SEC: Time interval between each check. :return: `(fs_done, fs_notdone)` where `fs_done` is a list of futures that have completed and `fs_notdone` is a list of futures that have not completed. :rtype: 2-tuple of list """ futures = fs or self.futures if type(futures) != list: futures = [futures] if not futures: raise Exception('You must run the call_async(), map() or map_reduce(), or provide' ' a list of futures before calling the wait()/get_result() method') if download_results: msg = 'ExecutorID {} - Getting results...'.format(self.executor_id) fs_done = [f for f in futures if f.done] fs_not_done = [f for f in futures if not f.done] else: msg = 'ExecutorID {} - Waiting for functions to complete...'.format(self.executor_id) fs_done = [f for f in futures if f.ready or f.done] fs_not_done = [f for f in futures if not f.ready and not f.done] if not fs_not_done: return fs_done, fs_not_done logger.info(msg) if not self.log_active: print(msg) if is_unix_system() and timeout is not None: logger.debug('Setting waiting timeout to {} seconds'.format(timeout)) error_msg = 'Timeout of {} seconds exceeded waiting for function activations to finish'.format(timeout) signal.signal(signal.SIGALRM, partial(timeout_handler, error_msg)) signal.alarm(timeout) pbar = None error = False if not self.is_lithops_worker and not self.log_active: from tqdm.auto import tqdm if is_notebook(): pbar = tqdm(bar_format='{n}/|/ {n_fmt}/{total_fmt}', total=len(fs_not_done)) # ncols=800 else: print() pbar = tqdm(bar_format=' {l_bar}{bar}| {n_fmt}/{total_fmt} ', total=len(fs_not_done), disable=False) try: if self.rabbitmq_monitor: logger.info('Using RabbitMQ to monitor function activations') wait_rabbitmq(futures, self.internal_storage, rabbit_amqp_url=self.rabbit_amqp_url, download_results=download_results, throw_except=throw_except, pbar=pbar, return_when=return_when, THREADPOOL_SIZE=THREADPOOL_SIZE) else: wait_storage(futures, self.internal_storage, download_results=download_results, throw_except=throw_except, return_when=return_when, pbar=pbar, THREADPOOL_SIZE=THREADPOOL_SIZE, WAIT_DUR_SEC=WAIT_DUR_SEC) except KeyboardInterrupt as e: if download_results: not_dones_call_ids = [(f.job_id, f.call_id) for f in futures if not f.done] else: not_dones_call_ids = [(f.job_id, f.call_id) for f in futures if not f.ready and not f.done] msg = ('ExecutorID {} - Cancelled - Total Activations not done: {}' .format(self.executor_id, len(not_dones_call_ids))) if pbar: pbar.close() print() logger.info(msg) if not self.log_active: print(msg) error = True raise e except Exception as e: error = True raise e finally: self.invoker.stop() if is_unix_system(): signal.alarm(0) if pbar and not pbar.disable: pbar.close() if not is_notebook(): print() if self.data_cleaner and not self.is_lithops_worker: self.clean(clean_cloudobjects=False) if not fs and error and is_notebook(): del self.futures[len(self.futures)-len(futures):] if download_results: fs_done = [f for f in futures if f.done] fs_notdone = [f for f in futures if not f.done] else: fs_done = [f for f in futures if f.ready or f.done] fs_notdone = [f for f in futures if not f.ready and not f.done] return fs_done, fs_notdone def get_result(self, fs=None, throw_except=True, timeout=None, THREADPOOL_SIZE=128, WAIT_DUR_SEC=1): """ For getting the results from all function activations :param fs: Futures list. Default None :param throw_except: Reraise exception if call raised. Default True. :param verbose: Shows some information prints. Default False :param timeout: Timeout for waiting for results. :param THREADPOOL_SIZE: Number of threads to use. Default 128 :param WAIT_DUR_SEC: Time interval between each check. :return: The result of the future/s """ fs_done, _ = self.wait(fs=fs, throw_except=throw_except, timeout=timeout, download_results=True, THREADPOOL_SIZE=THREADPOOL_SIZE, WAIT_DUR_SEC=WAIT_DUR_SEC) result = [] fs_done = [f for f in fs_done if not f.futures and f._produce_output] for f in fs_done: if fs: # Process futures provided by the user result.append(f.result(throw_except=throw_except, internal_storage=self.internal_storage)) elif not fs and not f._read: # Process internally stored futures result.append(f.result(throw_except=throw_except, internal_storage=self.internal_storage)) f._read = True logger.debug("ExecutorID {} Finished getting results" .format(self.executor_id)) if len(result) == 1 and self.last_call != 'map': return result[0] return result def plot(self, fs=None, dst=None): """ Creates timeline and histogram of the current execution in dst_dir. :param dst_dir: destination folder to save .png plots. :param dst_file_name: prefix name of the file. :param fs: list of futures. """ ftrs = self.futures if not fs else fs if type(ftrs) != list: ftrs = [ftrs] ftrs_to_plot = [f for f in ftrs if (f.ready or f.done) and not f.error] if not ftrs_to_plot: logger.debug('ExecutorID {} - No futures ready to plot' .format(self.executor_id)) return logging.getLogger('matplotlib').setLevel(logging.WARNING) from lithops.plots import create_timeline, create_histogram msg = 'ExecutorID {} - Creating execution plots'.format(self.executor_id) logger.info(msg) if not self.log_active: print(msg) create_timeline(ftrs_to_plot, dst) create_histogram(ftrs_to_plot, dst) def clean(self, fs=None, cs=None, clean_cloudobjects=True, spawn_cleaner=True): """ Deletes all the temp files from storage. These files include the function, the data serialization and the function invocation results. It can also clean cloudobjects. :param fs: list of futures to clean :param cs: list of cloudobjects to clean :param clean_cloudobjects: true/false :param spawn_cleaner true/false """ os.makedirs(CLEANER_DIR, exist_ok=True) def save_data_to_clean(data): with tempfile.NamedTemporaryFile(dir=CLEANER_DIR, delete=False) as temp: pickle.dump(data, temp) if cs: data = {'cos_to_clean': list(cs), 'storage_config': self.internal_storage.get_storage_config()} save_data_to_clean(data) if not fs: return futures = fs or self.futures futures = [futures] if type(futures) != list else futures present_jobs = {create_job_key(f.executor_id, f.job_id) for f in futures if f.executor_id.count('-') == 1} jobs_to_clean = present_jobs - self.cleaned_jobs if jobs_to_clean: logger.info("ExecutorID {} - Cleaning temporary data" .format(self.executor_id)) data = {'jobs_to_clean': jobs_to_clean, 'clean_cloudobjects': clean_cloudobjects, 'storage_config': self.internal_storage.get_storage_config()} save_data_to_clean(data) self.cleaned_jobs.update(jobs_to_clean) if (jobs_to_clean or cs) and spawn_cleaner: log_file = open(CLEANER_LOG_FILE, 'a') cmdstr = '{} -m lithops.scripts.cleaner'.format(sys.executable) sp.Popen(cmdstr, shell=True, stdout=log_file, stderr=log_file) def dismantle(self): self.compute_handler.dismantle() def init(self): self.compute_handler.init() def __exit__(self, exc_type, exc_value, traceback): self.invoker.stop()