def _send_status_rabbitmq(self): """ Send the status event to RabbitMQ """ dmpd_response_status = json.dumps(self.response) drs = sizeof_fmt(len(dmpd_response_status)) executor_id = self.response['executor_id'] job_id = self.response['job_id'] rabbit_amqp_url = self.config['rabbitmq'].get('amqp_url') status_sent = False output_query_count = 0 params = pika.URLParameters(rabbit_amqp_url) exchange = 'lithops-{}-{}'.format(executor_id, job_id) while not status_sent and output_query_count < 5: output_query_count = output_query_count + 1 try: connection = pika.BlockingConnection(params) channel = connection.channel() channel.exchange_declare(exchange=exchange, exchange_type='fanout', auto_delete=True) channel.basic_publish(exchange=exchange, routing_key='', body=dmpd_response_status) connection.close() logger.info("Execution status sent to rabbitmq - Size: {}".format(drs)) status_sent = True except Exception as e: logger.error("Unable to send status to rabbitmq") logger.error(str(e)) logger.info('Retrying to send status to rabbitmq...') time.sleep(0.2)
def list_bucket(prefix, bucket, backend, debug, config): if config: config = load_yaml_config(config) log_level = logging.INFO if not debug else logging.DEBUG setup_lithops_logger(log_level) storage = Storage(config=config, backend=backend) logger.info('Listing objects in bucket {}'.format(bucket)) objects = storage.list_objects(bucket, prefix=prefix) if objects: width = max([len(obj['Key']) for obj in objects]) print('\n{:{width}} \t {} \t\t {:>9}'.format('Key', 'Last modified', 'Size', width=width)) print('-' * width, '\t', '-' * 20, '\t', '-' * 9) for obj in objects: key = obj['Key'] date = obj['LastModified'].strftime("%b %d %Y %H:%M:%S") size = sizeof_fmt(obj['Size']) print('{:{width}} \t {} \t {:>9}'.format(key, date, size, width=width)) print() print('Total objects: {}'.format(len(objects))) else: width = 10 print('\n{:{width}} \t {} \t\t {:>9}'.format('Key', 'Last modified', 'Size', width=width)) print('-' * width, '\t', '-' * 20, '\t', '-' * 9) print('\nThe bucket is empty')
def put_object(self, bucket_name, key, data): """ Put an object in COS. Override the object if the key already exists. :param key: key of the object. :param data: data of the object :type data: str/bytes :return: None """ retries = 0 status = None while status is None: try: res = self.s3_client.put_object(Bucket=bucket_name, Key=key, Body=data) status = 'OK' if res['ResponseMetadata'][ 'HTTPStatusCode'] == 200 else 'Error' try: logger.debug('PUT Object {} - Size: {} - {}'.format( key, sizeof_fmt(len(data)), status)) except Exception: logger.debug('PUT Object {} {}'.format(key, status)) except botocore.exceptions.ClientError as e: if e.response['Error']['Code'] == "NoSuchKey": raise StorageNoSuchKeyError(bucket_name, key) else: raise e except botocore.exceptions.ReadTimeoutError as e: if retries == OBJ_REQ_RETRIES: raise e logger.debug('PUT Object timeout. Retrying request') retries += 1 return True
def _send(self): """ Send the status event to RabbitMQ """ dmpd_response_status = json.dumps(self.status) drs = sizeof_fmt(len(dmpd_response_status)) status_sent = False output_query_count = 0 queues = [] executor_keys = self.job.executor_id.split('-') for k in range(int(len(executor_keys)/2)): qname = 'lithops-{}'.format('-'.join(executor_keys[0:k*3+2])) queues.append(qname) while not status_sent and output_query_count < 5: output_query_count = output_query_count + 1 try: with self._create_channel() as ch: for queue in queues: ch.basic_publish(exchange='', routing_key=queue, body=dmpd_response_status) logger.info("Execution status sent to RabbitMQ - Size: {}".format(drs)) status_sent = True except Exception: time.sleep(0.2) if self.status['type'] == '__end__': super()._send()
def _split_objects_from_buckets(map_func_args_list, keys_dict, chunk_size, chunk_number): """ Create partitions from bucket/s """ partitions = [] parts_per_object = [] if chunk_number: logger.debug('Chunk size set to {}'.format(chunk_size)) elif chunk_size: logger.debug('Chunk number set to {}'.format(chunk_number)) else: logger.debug('Chunk size and chunk number not set ') for entry in map_func_args_list: # Each entry is a bucket sb, bucket, prefix, obj_name = utils.split_object_url(entry['obj']) for key, obj_size in keys_dict[bucket].items(): if prefix in key and obj_size > 0: if chunk_number: chunk_rest = obj_size % chunk_number obj_chunk_size = (obj_size // chunk_number) + \ round((chunk_rest / chunk_number) + 0.5) elif chunk_size: obj_chunk_size = chunk_size else: obj_chunk_size = obj_size size = total_partitions = 0 ci = obj_size cz = obj_chunk_size parts = ci // cz + (ci % cz > 0) logger.debug( 'Creating {} partitions from object {} ({})'.format( parts, key, sizeof_fmt(obj_size))) while size < obj_size: brange = (size, size + obj_chunk_size + CHUNK_THRESHOLD) brange = None if obj_size == obj_chunk_size else brange partition = entry.copy() partition['obj'] = CloudObject(sb, bucket, key) partition['obj'].data_byte_range = brange partition['obj'].chunk_size = obj_chunk_size partition['obj'].part = total_partitions partitions.append(partition) total_partitions += 1 size += obj_chunk_size parts_per_object.append(total_partitions) return partitions, parts_per_object
def list_bucket(bucket, backend, debug): log_level = logging.INFO if not debug else logging.DEBUG setup_lithops_logger(log_level) storage = Storage(backend=backend) logger.info('Listing objects in bucket {}'.format(bucket)) objects = storage.list_objects(bucket) width = max([len(obj['Key']) for obj in objects]) print('\n{:{width}} \t {} \t\t {:>9}'.format('Key', 'Last modified', 'Size', width=width)) print('-' * width, '\t', '-' * 20, '\t', '-' * 9) for obj in objects: key = obj['Key'] date = obj['LastModified'].strftime("%b %d %Y %H:%M:%S") size = sizeof_fmt(obj['Size']) print('{:{width}} \t {} \t {:>9}'.format(key, date, size, width=width)) print()
def _split(entry): obj_size = None object_url = entry['url'] metadata = requests.head(object_url) if 'content-length' in metadata.headers: obj_size = int(metadata.headers['content-length']) if chunk_number and obj_size: chunk_rest = obj_size % chunk_number obj_chunk_size = (obj_size // chunk_number) + \ round((chunk_rest / chunk_number) + 0.5) elif chunk_size and obj_size: obj_chunk_size = chunk_size elif obj_size: obj_chunk_size = obj_size else: obj_chunk_size = obj_size = 1 if 'accept-ranges' not in metadata.headers: obj_chunk_size = obj_size size = total_partitions = 0 ci = obj_size cz = obj_chunk_size parts = ci // cz + (ci % cz > 0) logger.debug('Creating {} partitions from object {} ({})'.format( parts, object_url, sizeof_fmt(obj_size))) while size < obj_size: brange = (size, size + obj_chunk_size + CHUNK_THRESHOLD) brange = None if obj_size == obj_chunk_size else brange partition = entry.copy() partition['url'] = CloudObjectUrl(object_url) partition['url'].data_byte_range = brange partition['url'].chunk_size = obj_chunk_size partition['url'].part = total_partitions partitions.append(partition) total_partitions += 1 size += obj_chunk_size parts_per_object.append(total_partitions)
def put_object(self, container_name, key, data): """ Put an object in Swift. Override the object if the key already exists. :param key: key of the object. :param data: data of the object :type data: str/bytes :return: None """ url = '/'.join([self.endpoint, container_name, key]) try: res = self.session.put(url, data=data) status = 'OK' if res.status_code == 201 else 'Error' try: logger.debug('PUT Object {} - Size: {} - {}'.format(key, sizeof_fmt(len(data)), status)) except Exception: logger.debug('PUT Object {} - {}'.format(key, status)) except Exception as e: print(e)
def runtime_packages(storage_config): logger.info("Extracting preinstalled Python modules...") internal_storage = InternalStorage(storage_config) runtime_meta = dict() mods = list(pkgutil.iter_modules()) runtime_meta['preinstalls'] = [entry for entry in sorted([[mod, is_pkg] for _, mod, is_pkg in mods])] python_version = sys.version_info runtime_meta['python_ver'] = str(python_version[0])+"."+str(python_version[1]) activation_id = storage_config['activation_id'] status_key = create_runtime_meta_key(JOBS_PREFIX, activation_id) logger.debug("Runtime metadata key {}".format(status_key)) dmpd_response_status = json.dumps(runtime_meta) drs = sizeof_fmt(len(dmpd_response_status)) logger.info("Storing execution stats - Size: {}".format(drs)) internal_storage.put_data(status_key, dmpd_response_status)
def _send_status_os(self): """ Send the status event to the Object Storage """ executor_id = self.response['executor_id'] job_id = self.response['job_id'] call_id = self.response['call_id'] act_id = self.response['activation_id'] if self.response['type'] == '__init__': init_key = create_init_key(JOBS_PREFIX, executor_id, job_id, call_id, act_id) self.internal_storage.put_data(init_key, '') elif self.response['type'] == '__end__': status_key = create_status_key(JOBS_PREFIX, executor_id, job_id, call_id) dmpd_response_status = json.dumps(self.response) drs = sizeof_fmt(len(dmpd_response_status)) logger.info("Storing execution stats - Size: {}".format(drs)) self.internal_storage.put_data(status_key, dmpd_response_status)
def get_memory_usage(formatted=True): """ Gets the current memory usage of the runtime. To be used only in the action code. """ if not is_unix_system(): return split_args = False pids_to_show = None discriminate_by_pid = False ps_mem.verify_environment(pids_to_show) sorted_cmds, shareds, count, total, swaps, total_swap = \ ps_mem.get_memory_usage(pids_to_show, split_args, discriminate_by_pid, include_self=True, only_self=False) if formatted: return sizeof_fmt(int(ps_mem.human(total, units=1))) else: return int(ps_mem.human(total, units=1))
def create_partition(bucket, key, entry): if key.endswith('/'): logger.debug( f'Discarding object "{key}" as it is a prefix folder (0.0B)') return obj_size = keys_dict[bucket][key] if chunk_number: chunk_rest = obj_size % chunk_number obj_chunk_size = (obj_size // chunk_number) + \ round((chunk_rest / chunk_number) + 0.5) elif chunk_size: obj_chunk_size = chunk_size else: obj_chunk_size = obj_size size = total_partitions = 0 ci = obj_size cz = obj_chunk_size parts = ci // cz + (ci % cz > 0) logger.debug('Creating {} partitions from object {} ({})'.format( parts, key, sizeof_fmt(obj_size))) while size < obj_size: brange = (size, size + obj_chunk_size + CHUNK_THRESHOLD) brange = None if obj_size == obj_chunk_size else brange partition = entry.copy() partition['obj'] = CloudObject(sb, bucket, key) partition['obj'].data_byte_range = brange partition['obj'].chunk_size = obj_chunk_size partition['obj'].part = total_partitions partitions.append(partition) total_partitions += 1 size += obj_chunk_size parts_per_object.append(total_partitions)
def _split(entry): path = entry['obj'] file_stats = os.stat(entry['obj']) obj_size = int(file_stats.st_size) if chunk_number and obj_size: chunk_rest = obj_size % chunk_number obj_chunk_size = (obj_size // chunk_number) + \ round((chunk_rest / chunk_number) + 0.5) elif chunk_size and obj_size: obj_chunk_size = chunk_size elif obj_size: obj_chunk_size = obj_size else: obj_chunk_size = obj_size = 1 size = total_partitions = 0 ci = obj_size cz = obj_chunk_size parts = ci // cz + (ci % cz > 0) logger.debug('Creating {} partitions from file {} ({})'.format( parts, path, sizeof_fmt(obj_size))) while size < obj_size: brange = (size, size + obj_chunk_size + CHUNK_THRESHOLD) brange = None if obj_size == obj_chunk_size else brange partition = entry.copy() partition['obj'] = CloudObjectLocal(path) partition['obj'].data_byte_range = brange partition['obj'].chunk_size = obj_chunk_size partition['obj'].part = total_partitions partitions.append(partition) total_partitions += 1 size += obj_chunk_size parts_per_object.append(total_partitions)
def get_memory_usage(formatted=True): """ Gets the current memory usage of the runtime. To be used only in the action code. """ from lithops.libs import ps_mem if not is_unix_system() or os.geteuid() != 0: # Non Unix systems and non root users can't run # the ps_mem module return split_args = False pids_to_show = None discriminate_by_pid = False ps_mem.verify_environment(pids_to_show) sorted_cmds, shareds, count, total, swaps, total_swap = \ ps_mem.get_memory_usage(pids_to_show, split_args, discriminate_by_pid, include_self=True, only_self=False) if formatted: return sizeof_fmt(int(ps_mem.human(total, units=1))) else: return int(ps_mem.human(total, units=1))
def put_object(self, bucket_name, key, data): ''' Put an object in COS. Override the object if the key already exists. :param key: key of the object. :param data: data of the object :type data: str/bytes :return: None ''' try: res = self.s3_client.put_object(Bucket=bucket_name, Key=key, Body=data) status = 'OK' if res['ResponseMetadata'][ 'HTTPStatusCode'] == 200 else 'Error' try: logger.debug('PUT Object {} - Size: {} - {}'.format( key, sizeof_fmt(len(data)), status)) except Exception: logger.debug('PUT Object {} {}'.format(key, status)) except botocore.exceptions.ClientError as e: if e.response['Error']['Code'] == 'NoSuchKey': raise StorageNoSuchKeyError(bucket_name, key) else: raise e
def run_job(job): """ Runs a single job within a separate process """ start_tstamp = time.time() setup_lithops_logger(job.log_level) logger.info("Lithops v{} - Starting execution".format(__version__)) logger.info("Execution ID: {}/{}".format(job.job_key, job.call_id)) logger.debug("Runtime name: {}".format(job.runtime_name)) if job.runtime_memory: logger.debug("Runtime memory: {}MB".format(job.runtime_memory)) logger.debug("Function timeout: {}s".format(job.execution_timeout)) env = job.extra_env env['LITHOPS_WORKER'] = 'True' env['PYTHONUNBUFFERED'] = 'True' env['LITHOPS_CONFIG'] = json.dumps(job.config) env['PYTHONPATH'] = "{}:{}".format(os.getcwd(), LITHOPS_LIBS_PATH) env['__LITHOPS_SESSION_ID'] = '-'.join([job.job_key, job.call_id]) os.environ.update(env) storage_config = extract_storage_config(job.config) internal_storage = InternalStorage(storage_config) call_status = CallStatus(job.config, internal_storage) call_status.response['worker_start_tstamp'] = start_tstamp call_status.response['host_submit_tstamp'] = job.host_submit_tstamp call_status.response['call_id'] = job.call_id call_status.response['job_id'] = job.job_id call_status.response['executor_id'] = job.executor_id show_memory_peak = strtobool(os.environ.get('SHOW_MEMORY_PEAK', 'False')) try: if __version__ != job.lithops_version: msg = ( "Lithops version mismatch. Host version: {} - Runtime version: {}" .format(job.lithops_version, __version__)) raise RuntimeError('HANDLER', msg) # send init status event call_status.send('__init__') if show_memory_peak: mm_handler_conn, mm_conn = Pipe() memory_monitor = Thread(target=memory_monitor_worker, args=(mm_conn, )) memory_monitor.start() job.jr_stats_file = os.path.join(job.job_dir, 'jobrunner.stats.txt') handler_conn, jobrunner_conn = Pipe() jobrunner = JobRunner(job, jobrunner_conn, internal_storage) logger.debug('Starting JobRunner process') jrp = Process(target=jobrunner.run) if is_unix_system() else Thread( target=jobrunner.run) jrp.start() jrp.join(job.execution_timeout) logger.debug('JobRunner process finished') if jrp.is_alive(): # If process is still alive after jr.join(job_max_runtime), kill it try: jrp.terminate() except Exception: # thread does not have terminate method pass msg = ('Function exceeded maximum time of {} seconds and was ' 'killed'.format(job.execution_timeout)) raise TimeoutError('HANDLER', msg) if show_memory_peak: mm_handler_conn.send('STOP') memory_monitor.join() peak_memory_usage = int(mm_handler_conn.recv()) logger.info("Peak memory usage: {}".format( sizeof_fmt(peak_memory_usage))) call_status.response['peak_memory_usage'] = peak_memory_usage if not handler_conn.poll(): logger.error( 'No completion message received from JobRunner process') logger.debug('Assuming memory overflow...') # Only 1 message is returned by jobrunner when it finishes. # If no message, this means that the jobrunner process was killed. # 99% of times the jobrunner is killed due an OOM, so we assume here an OOM. msg = 'Function exceeded maximum memory and was killed' raise MemoryError('HANDLER', msg) if os.path.exists(job.jr_stats_file): with open(job.jr_stats_file, 'r') as fid: for l in fid.readlines(): key, value = l.strip().split(" ", 1) try: call_status.response[key] = float(value) except Exception: call_status.response[key] = value if key in [ 'exception', 'exc_pickle_fail', 'result', 'new_futures' ]: call_status.response[key] = eval(value) except Exception: # internal runtime exceptions print('----------------------- EXCEPTION !-----------------------') traceback.print_exc(file=sys.stdout) print('----------------------------------------------------------') call_status.response['exception'] = True pickled_exc = pickle.dumps(sys.exc_info()) pickle.loads( pickled_exc) # this is just to make sure they can be unpickled call_status.response['exc_info'] = str(pickled_exc) finally: call_status.response['worker_end_tstamp'] = time.time() with open(job.log_file, 'rb') as lf: log_str = base64.b64encode(zlib.compress(lf.read())).decode() call_status.response['logs'] = log_str call_status.send('__end__') # Unset specific env vars for key in job.extra_env: os.environ.pop(key, None) os.environ.pop('__LITHOPS_TOTAL_EXECUTORS', None) logger.info("Finished")
def run(self): """ Runs the function """ # self.stats.write('worker_jobrunner_start_tstamp', time.time()) logger.info("Started") result = None exception = False try: loaded_func_all = self._get_function_and_modules() self._save_modules(loaded_func_all['module_data']) function = self._unpickle_function(loaded_func_all['func']) data = self._load_data() if strtobool(os.environ.get('__PW_REDUCE_JOB', 'False')): self._wait_futures(data) elif is_object_processing_function(function): self._load_object(data) self._fill_optional_args(function, data) logger.info("Going to execute '{}()'".format(str( function.__name__))) print('---------------------- FUNCTION LOG ----------------------', flush=True) function_start_tstamp = time.time() result = function(**data) function_end_tstamp = time.time() print('----------------------------------------------------------', flush=True) logger.info("Success function execution") self.stats.write('worker_func_start_tstamp', function_start_tstamp) self.stats.write('worker_func_end_tstamp', function_end_tstamp) self.stats.write( 'worker_func_exec_time', round(function_end_tstamp - function_start_tstamp, 8)) # Check for new futures if result is not None: self.stats.write("result", True) if isinstance(result, ResponseFuture) or \ (type(result) == list and len(result) > 0 and isinstance(result[0], ResponseFuture)): self.stats.write('new_futures', True) logger.debug("Pickling result") output_dict = {'result': result} pickled_output = pickle.dumps(output_dict) else: logger.debug("No result to store") self.stats.write("result", False) # self.stats.write('worker_jobrunner_end_tstamp', time.time()) except Exception: exception = True self.stats.write("exception", True) exc_type, exc_value, exc_traceback = sys.exc_info() print('----------------------- EXCEPTION !-----------------------', flush=True) traceback.print_exc(file=sys.stdout) print('----------------------------------------------------------', flush=True) try: logger.debug("Pickling exception") pickled_exc = pickle.dumps( (exc_type, exc_value, exc_traceback)) pickle.loads( pickled_exc ) # this is just to make sure they can be unpickled self.stats.write("exc_info", str(pickled_exc)) except Exception as pickle_exception: # Shockingly often, modules like subprocess don't properly # call the base Exception.__init__, which results in them # being unpickleable. As a result, we actually wrap this in a try/catch block # and more-carefully handle the exceptions if any part of this save / test-reload # fails self.stats.write("exc_pickle_fail", True) pickled_exc = pickle.dumps({ 'exc_type': str(exc_type), 'exc_value': str(exc_value), 'exc_traceback': exc_traceback, 'pickle_exception': pickle_exception }) pickle.loads(pickled_exc ) # this is just to make sure it can be unpickled self.stats.write("exc_info", str(pickled_exc)) finally: store_result = strtobool(os.environ.get('STORE_RESULT', 'True')) if result is not None and store_result and not exception: output_upload_start_tstamp = time.time() logger.info("Storing function result - Size: {}".format( sizeof_fmt(len(pickled_output)))) self.internal_storage.put_data(self.output_key, pickled_output) output_upload_end_tstamp = time.time() self.stats.write( "worker_result_upload_time", round( output_upload_end_tstamp - output_upload_start_tstamp, 8)) self.jobrunner_conn.send("Finished") logger.info("Finished")
def _create_job(config, internal_storage, executor_id, job_id, func, iterdata, runtime_meta, runtime_memory, extra_env, include_modules, exclude_modules, execution_timeout, host_job_meta, invoke_pool_threads=128): """ :param func: the function to map over the data :param iterdata: An iterable of input data :param extra_env: Additional environment variables for CF environment. Default None. :param extra_meta: Additional metadata to pass to CF. Default None. :param remote_invocation: Enable remote invocation. Default False. :param invoke_pool_threads: Number of threads to use to invoke. :param data_all_as_one: upload the data as a single object. Default True :param overwrite_invoke_args: Overwrite other args. Mainly used for testing. :param exclude_modules: Explicitly keep these modules from pickled dependencies. :return: A list with size `len(iterdata)` of futures for each job :rtype: list of futures. """ ext_env = {} if extra_env is None else extra_env.copy() if ext_env: ext_env = utils.convert_bools_to_string(ext_env) logger.debug("Extra environment vars {}".format(ext_env)) job = SimpleNamespace() job.executor_id = executor_id job.job_id = job_id job.extra_env = ext_env job.execution_timeout = execution_timeout or config['lithops'][ 'execution_timeout'] job.function_name = func.__name__ job.total_calls = len(iterdata) mode = config['lithops']['mode'] if mode == SERVERLESS: job.invoke_pool_threads = invoke_pool_threads or config['serverless'][ 'invoke_pool_threads'] job.runtime_memory = runtime_memory or config['serverless'][ 'runtime_memory'] job.runtime_timeout = config['serverless']['runtime_timeout'] if job.execution_timeout >= job.runtime_timeout: job.execution_timeout = job.runtime_timeout - 5 elif mode == STANDALONE: job.runtime_memory = None runtime_timeout = config['standalone']['hard_dismantle_timeout'] if job.execution_timeout >= runtime_timeout: job.execution_timeout = runtime_timeout - 10 elif mode == LOCALHOST: job.runtime_memory = None job.runtime_timeout = execution_timeout exclude_modules_cfg = config['lithops'].get('exclude_modules', []) include_modules_cfg = config['lithops'].get('include_modules', []) exc_modules = set() inc_modules = set() if exclude_modules_cfg: exc_modules.update(exclude_modules_cfg) if exclude_modules: exc_modules.update(exclude_modules) if include_modules_cfg is not None: inc_modules.update(include_modules_cfg) if include_modules_cfg is None and not include_modules: inc_modules = None if include_modules is not None and include_modules: inc_modules.update(include_modules) if include_modules is None: inc_modules = None logger.debug( 'ExecutorID {} | JobID {} - Serializing function and data'.format( executor_id, job_id)) job_serialize_start = time.time() serializer = SerializeIndependent(runtime_meta['preinstalls']) func_and_data_ser, mod_paths = serializer([func] + iterdata, inc_modules, exc_modules) data_strs = func_and_data_ser[1:] data_size_bytes = sum(len(x) for x in data_strs) module_data = create_module_data(mod_paths) func_str = func_and_data_ser[0] func_module_str = pickle.dumps( { 'func': func_str, 'module_data': module_data }, -1) func_module_size_bytes = len(func_module_str) total_size = utils.sizeof_fmt(data_size_bytes + func_module_size_bytes) host_job_meta['host_job_serialize_time'] = round( time.time() - job_serialize_start, 6) host_job_meta['data_size_bytes'] = data_size_bytes host_job_meta['func_module_size_bytes'] = func_module_size_bytes if 'data_limit' in config['lithops']: data_limit = config['lithops']['data_limit'] else: data_limit = MAX_AGG_DATA_SIZE if data_limit and data_size_bytes > data_limit * 1024**2: log_msg = ( 'ExecutorID {} | JobID {} - Total data exceeded maximum size ' 'of {}'.format(executor_id, job_id, sizeof_fmt(data_limit * 1024**2))) raise Exception(log_msg) logger.info('ExecutorID {} | JobID {} - Uploading function and data ' '- Total: {}'.format(executor_id, job_id, total_size)) # Upload data data_key = create_agg_data_key(JOBS_PREFIX, executor_id, job_id) job.data_key = data_key data_bytes, data_ranges = utils.agg_data(data_strs) job.data_ranges = data_ranges data_upload_start = time.time() internal_storage.put_data(data_key, data_bytes) data_upload_end = time.time() host_job_meta['host_data_upload_time'] = round( data_upload_end - data_upload_start, 6) func_upload_start = time.time() # Upload function and modules if config[mode].get('customized_runtime'): # Prepare function and modules locally to store in the runtime image later function_file = func.__code__.co_filename function_hash = hashlib.md5(open(function_file, 'rb').read()).hexdigest()[:16] mod_hash = hashlib.md5(repr( sorted(mod_paths)).encode('utf-8')).hexdigest()[:16] uuid = f'{function_hash}{mod_hash}' func_key = create_func_key(JOBS_PREFIX, uuid, "") _store_func_and_modules(func_key, func_str, module_data) job.ext_runtime_uuid = uuid else: func_key = create_func_key(JOBS_PREFIX, executor_id, job_id) internal_storage.put_func(func_key, func_module_str) job.func_key = func_key func_upload_end = time.time() host_job_meta['host_func_upload_time'] = round( func_upload_end - func_upload_start, 6) host_job_meta['host_job_created_time'] = round( time.time() - host_job_meta['host_job_create_tstamp'], 6) job.metadata = host_job_meta return job
def _create_job(config, internal_storage, executor_id, job_id, func, data, runtime_meta, runtime_memory=None, extra_env=None, invoke_pool_threads=128, include_modules=[], exclude_modules=[], execution_timeout=None, host_job_meta=None): """ :param func: the function to map over the data :param iterdata: An iterable of input data :param extra_env: Additional environment variables for CF environment. Default None. :param extra_meta: Additional metadata to pass to CF. Default None. :param remote_invocation: Enable remote invocation. Default False. :param invoke_pool_threads: Number of threads to use to invoke. :param data_all_as_one: upload the data as a single object. Default True :param overwrite_invoke_args: Overwrite other args. Mainly used for testing. :param exclude_modules: Explicitly keep these modules from pickled dependencies. :return: A list with size `len(iterdata)` of futures for each job :rtype: list of futures. """ log_level = logger.getEffectiveLevel() != logging.WARNING runtime_name = config['lithops']['runtime'] if runtime_memory is None: runtime_memory = config['lithops']['runtime_memory'] ext_env = {} if extra_env is None else extra_env.copy() if ext_env: ext_env = utils.convert_bools_to_string(ext_env) logger.debug("Extra environment vars {}".format(ext_env)) if not data: return [] if execution_timeout is None: execution_timeout = config['lithops']['runtime_timeout'] - 5 job_description = {} job_description['runtime_name'] = runtime_name job_description['runtime_memory'] = runtime_memory job_description['execution_timeout'] = execution_timeout job_description['function_name'] = func.__name__ job_description['extra_env'] = ext_env job_description['total_calls'] = len(data) job_description['invoke_pool_threads'] = invoke_pool_threads job_description['executor_id'] = executor_id job_description['job_id'] = job_id exclude_modules_cfg = config['lithops'].get('exclude_modules', []) include_modules_cfg = config['lithops'].get('include_modules', []) exc_modules = set() inc_modules = set() if exclude_modules_cfg: exc_modules.update(exclude_modules_cfg) if exclude_modules: exc_modules.update(exclude_modules) if include_modules_cfg is not None: inc_modules.update(include_modules_cfg) if include_modules_cfg is None and not include_modules: inc_modules = None if include_modules is not None and include_modules: inc_modules.update(include_modules) if include_modules is None: inc_modules = None logger.debug( 'ExecutorID {} | JobID {} - Serializing function and data'.format( executor_id, job_id)) job_serialize_start = time.time() serializer = SerializeIndependent(runtime_meta['preinstalls']) func_and_data_ser, mod_paths = serializer([func] + data, inc_modules, exc_modules) data_strs = func_and_data_ser[1:] data_size_bytes = sum(len(x) for x in data_strs) module_data = create_module_data(mod_paths) func_str = func_and_data_ser[0] func_module_str = pickle.dumps( { 'func': func_str, 'module_data': module_data }, -1) func_module_size_bytes = len(func_module_str) total_size = utils.sizeof_fmt(data_size_bytes + func_module_size_bytes) host_job_meta['host_job_serialize_time'] = round( time.time() - job_serialize_start, 6) host_job_meta['data_size_bytes'] = data_size_bytes host_job_meta['func_module_size_bytes'] = func_module_size_bytes if 'data_limit' in config['lithops']: data_limit = config['lithops']['data_limit'] else: data_limit = MAX_AGG_DATA_SIZE if data_limit and data_size_bytes > data_limit * 1024**2: log_msg = ( 'ExecutorID {} | JobID {} - Total data exceeded maximum size ' 'of {}'.format(executor_id, job_id, sizeof_fmt(data_limit * 1024**2))) raise Exception(log_msg) log_msg = ('ExecutorID {} | JobID {} - Uploading function and data ' '- Total: {}'.format(executor_id, job_id, total_size)) logger.info(log_msg) if not log_level: print(log_msg) # Upload data data_key = create_agg_data_key(JOBS_PREFIX, executor_id, job_id) job_description['data_key'] = data_key data_bytes, data_ranges = utils.agg_data(data_strs) job_description['data_ranges'] = data_ranges data_upload_start = time.time() internal_storage.put_data(data_key, data_bytes) data_upload_end = time.time() host_job_meta['host_data_upload_time'] = round( data_upload_end - data_upload_start, 6) # Upload function and modules func_upload_start = time.time() func_key = create_func_key(JOBS_PREFIX, executor_id, job_id) job_description['func_key'] = func_key internal_storage.put_func(func_key, func_module_str) func_upload_end = time.time() host_job_meta['host_func_upload_time'] = round( func_upload_end - func_upload_start, 6) host_job_meta['host_job_created_time'] = round( time.time() - host_job_meta['host_job_create_tstamp'], 6) job_description['metadata'] = host_job_meta return job_description
def _create_job(config, internal_storage, executor_id, job_id, func, iterdata, runtime_meta, runtime_memory, extra_env, include_modules, exclude_modules, execution_timeout, host_job_meta, chunksize=None): """ Creates a new Job """ global FUNCTION_CACHE ext_env = {} if extra_env is None else extra_env.copy() if ext_env: ext_env = utils.convert_bools_to_string(ext_env) logger.debug("Extra environment vars {}".format(ext_env)) mode = config['lithops']['mode'] backend = config['lithops']['backend'] job = SimpleNamespace() job.chunksize = chunksize or config['lithops']['chunksize'] job.worker_processes = config[backend]['worker_processes'] job.execution_timeout = execution_timeout or config['lithops']['execution_timeout'] job.executor_id = executor_id job.job_id = job_id job.job_key = create_job_key(job.executor_id, job.job_id) job.extra_env = ext_env job.function_name = func.__name__ if inspect.isfunction(func) or inspect.ismethod(func) else type(func).__name__ job.total_calls = len(iterdata) if mode == SERVERLESS: job.runtime_memory = runtime_memory or config[backend]['runtime_memory'] job.runtime_timeout = config[backend]['runtime_timeout'] if job.execution_timeout >= job.runtime_timeout: job.execution_timeout = job.runtime_timeout - 5 elif mode in STANDALONE: job.runtime_memory = None runtime_timeout = config[STANDALONE]['hard_dismantle_timeout'] if job.execution_timeout >= runtime_timeout: job.execution_timeout = runtime_timeout - 10 elif mode == LOCALHOST: job.runtime_memory = None job.runtime_timeout = None exclude_modules_cfg = config['lithops'].get('exclude_modules', []) include_modules_cfg = config['lithops'].get('include_modules', []) exc_modules = set() inc_modules = set() if exclude_modules_cfg: exc_modules.update(exclude_modules_cfg) if exclude_modules: exc_modules.update(exclude_modules) if include_modules_cfg is not None: inc_modules.update(include_modules_cfg) if include_modules_cfg is None and not include_modules: inc_modules = None if include_modules is not None and include_modules: inc_modules.update(include_modules) if include_modules is None: inc_modules = None logger.debug('ExecutorID {} | JobID {} - Serializing function and data'.format(executor_id, job_id)) job_serialize_start = time.time() serializer = SerializeIndependent(runtime_meta['preinstalls']) func_and_data_ser, mod_paths = serializer([func] + iterdata, inc_modules, exc_modules) data_strs = func_and_data_ser[1:] data_size_bytes = sum(len(x) for x in data_strs) module_data = create_module_data(mod_paths) func_str = func_and_data_ser[0] func_module_str = pickle.dumps({'func': func_str, 'module_data': module_data}, -1) func_module_size_bytes = len(func_module_str) host_job_meta['host_job_serialize_time'] = round(time.time()-job_serialize_start, 6) host_job_meta['data_size_bytes'] = data_size_bytes host_job_meta['func_module_size_bytes'] = func_module_size_bytes # Check data limit if 'data_limit' in config['lithops']: data_limit = config['lithops']['data_limit'] else: data_limit = MAX_AGG_DATA_SIZE if data_limit and data_size_bytes > data_limit*1024**2: log_msg = ('ExecutorID {} | JobID {} - Total data exceeded maximum size ' 'of {}'.format(executor_id, job_id, utils.sizeof_fmt(data_limit*1024**2))) raise Exception(log_msg) # Upload function and data upload_function = not config['lithops'].get('customized_runtime', False) upload_data = not (len(str(data_strs[0])) * job.chunksize < 8*1204 and backend in FAAS_BACKENDS) # Upload function and modules if upload_function: function_hash = hashlib.md5(func_module_str).hexdigest() job.func_key = create_func_key(executor_id, function_hash) if job.func_key not in FUNCTION_CACHE: logger.debug('ExecutorID {} | JobID {} - Uploading function and modules ' 'to the storage backend'.format(executor_id, job_id)) func_upload_start = time.time() internal_storage.put_func(job.func_key, func_module_str) func_upload_end = time.time() host_job_meta['host_func_upload_time'] = round(func_upload_end - func_upload_start, 6) FUNCTION_CACHE.add(job.func_key) else: logger.debug('ExecutorID {} | JobID {} - Function and modules ' 'found in local cache'.format(executor_id, job_id)) host_job_meta['host_func_upload_time'] = 0 else: # Prepare function and modules locally to store in the runtime image later function_file = func.__code__.co_filename function_hash = hashlib.md5(open(function_file, 'rb').read()).hexdigest()[:16] mod_hash = hashlib.md5(repr(sorted(mod_paths)).encode('utf-8')).hexdigest()[:16] job.func_key = func_key_suffix job.ext_runtime_uuid = '{}{}'.format(function_hash, mod_hash) job.local_tmp_dir = os.path.join(CUSTOM_RUNTIME_DIR, job.ext_runtime_uuid) _store_func_and_modules(job.local_tmp_dir, job.func_key, func_str, module_data) host_job_meta['host_func_upload_time'] = 0 # upload data if upload_data: # Upload iterdata to COS only if a single element is greater than 8KB logger.debug('ExecutorID {} | JobID {} - Uploading data to the storage backend' .format(executor_id, job_id)) # pass_iteradata through an object storage file data_key = create_data_key(executor_id, job_id) job.data_key = data_key data_bytes, data_byte_ranges = utils.agg_data(data_strs) job.data_byte_ranges = data_byte_ranges data_upload_start = time.time() internal_storage.put_data(data_key, data_bytes) data_upload_end = time.time() host_job_meta['host_data_upload_time'] = round(data_upload_end-data_upload_start, 6) else: # pass iteradata as part of the invocation payload logger.debug('ExecutorID {} | JobID {} - Data per activation is < ' '{}. Passing data through invocation payload' .format(executor_id, job_id, utils.sizeof_fmt(8*1024))) job.data_key = None job.data_byte_ranges = None job.data_byte_strs = data_strs host_job_meta['host_data_upload_time'] = 0 host_job_meta['host_job_created_time'] = round(time.time() - host_job_meta['host_job_create_tstamp'], 6) job.metadata = host_job_meta return job
def _split_objects_from_keys(map_func_args_list, keys_dict, chunk_size, chunk_number): """ Create partitions from a list of objects keys """ if chunk_number: logger.debug('Chunk size set to {}'.format(chunk_size)) elif chunk_size: logger.debug('Chunk number set to {}'.format(chunk_number)) else: logger.debug('Chunk size and chunk number not set ') partitions = [] parts_per_object = [] for entry in map_func_args_list: # each entry is a key sb, bucket, prefix, obj_name = utils.split_object_url(entry['obj']) key = '/'.join([prefix, obj_name]) if prefix else obj_name try: obj_size = keys_dict[bucket][key] except Exception: raise Exception( 'Object key "{}" does not exist in "{}" bucket'.format( key, bucket)) if chunk_number: chunk_rest = obj_size % chunk_number obj_chunk_size = (obj_size // chunk_number) + \ round((chunk_rest / chunk_number) + 0.5) elif chunk_size: obj_chunk_size = chunk_size else: obj_chunk_size = obj_size size = total_partitions = 0 ci = obj_size cz = obj_chunk_size parts = ci // cz + (ci % cz > 0) logger.debug('Creating {} partitions from object {} ({})'.format( parts, key, sizeof_fmt(obj_size))) while size < obj_size: brange = (size, size + obj_chunk_size + CHUNK_THRESHOLD) brange = None if obj_size == obj_chunk_size else brange partition = entry.copy() partition['obj'] = CloudObject(sb, bucket, key) partition['obj'].data_byte_range = brange partition['obj'].chunk_size = obj_chunk_size partition['obj'].part = total_partitions partitions.append(partition) total_partitions += 1 size += obj_chunk_size parts_per_object.append(total_partitions) return partitions, parts_per_object
def run(self): """ Runs the function """ # self.stats.write('worker_jobrunner_start_tstamp', time.time()) logger.debug("Process started") result = None exception = False fn_name = None try: func = pickle.loads(self.job.func) data = pickle.loads(self.job.data) if strtobool(os.environ.get('__LITHOPS_REDUCE_JOB', 'False')): self._wait_futures(data) elif is_object_processing_function(func): self._load_object(data) self._fill_optional_args(func, data) fn_name = func.__name__ if inspect.isfunction(func) \ or inspect.ismethod(func) else type(func).__name__ self.prometheus.send_metric(name='function_start', value=time.time(), type='gauge', labels=(('job_id', self.job.job_key), ('call_id', '-'.join([ self.job.job_key, self.job.call_id ])), ('function_name', fn_name or 'undefined'))) logger.info("Going to execute '{}()'".format(str(fn_name))) print('---------------------- FUNCTION LOG ----------------------') function_start_tstamp = time.time() result = func(**data) function_end_tstamp = time.time() print('----------------------------------------------------------') logger.info("Success function execution") self.stats.write('worker_func_start_tstamp', function_start_tstamp) self.stats.write('worker_func_end_tstamp', function_end_tstamp) self.stats.write( 'worker_func_exec_time', round(function_end_tstamp - function_start_tstamp, 8)) # Check for new futures if result is not None: if isinstance(result, ResponseFuture) or isinstance(result, FuturesList) \ or (type(result) == list and len(result) > 0 and isinstance(result[0], ResponseFuture)): self.stats.write('new_futures', pickle.dumps(result)) result = None else: self.stats.write("result", True) logger.debug("Pickling result") output_dict = {'result': result} pickled_output = pickle.dumps(output_dict) self.stats.write('func_result_size', len(pickled_output)) if result is None: logger.debug("No result to store") self.stats.write("result", False) self.stats.write('func_result_size', 0) # self.stats.write('worker_jobrunner_end_tstamp', time.time()) except Exception: exception = True self.stats.write("exception", True) exc_type, exc_value, exc_traceback = sys.exc_info() print('----------------------- EXCEPTION !-----------------------') traceback.print_exc(file=sys.stdout) print('----------------------------------------------------------') try: logger.debug("Pickling exception") pickled_exc = pickle.dumps( (exc_type, exc_value, exc_traceback)) pickle.loads( pickled_exc ) # this is just to make sure they can be unpickled self.stats.write("exc_info", str(pickled_exc)) except Exception as pickle_exception: # Shockingly often, modules like subprocess don't properly # call the base Exception.__init__, which results in them # being unpickleable. As a result, we actually wrap this in a try/catch block # and more-carefully handle the exceptions if any part of this save / test-reload # fails self.stats.write("exc_pickle_fail", True) pickled_exc = pickle.dumps({ 'exc_type': str(exc_type), 'exc_value': str(exc_value), 'exc_traceback': exc_traceback, 'pickle_exception': pickle_exception }) pickle.loads(pickled_exc ) # this is just to make sure it can be unpickled self.stats.write("exc_info", str(pickled_exc)) finally: self.prometheus.send_metric(name='function_end', value=time.time(), type='gauge', labels=(('job_id', self.job.job_key), ('call_id', '-'.join([ self.job.job_key, self.job.call_id ])), ('function_name', fn_name or 'undefined'))) store_result = strtobool(os.environ.get('STORE_RESULT', 'True')) if result is not None and store_result and not exception: output_upload_start_tstamp = time.time() logger.info("Storing function result - Size: {}".format( sizeof_fmt(len(pickled_output)))) self.internal_storage.put_data(self.output_key, pickled_output) output_upload_end_tstamp = time.time() self.stats.write( "worker_result_upload_time", round( output_upload_end_tstamp - output_upload_start_tstamp, 8)) self.jobrunner_conn.send("Finished") logger.info("Process finished")
def function_handler(event): start_tstamp = time.time() log_level = event['log_level'] cloud_logging_config(log_level) logger.debug("Action handler started") extra_env = event.get('extra_env', {}) os.environ.update(extra_env) os.environ.update({'LITHOPS_FUNCTION': 'True', 'PYTHONUNBUFFERED': 'True'}) os.environ.pop('LITHOPS_TOTAL_EXECUTORS', None) config = event['config'] call_id = event['call_id'] job_id = event['job_id'] executor_id = event['executor_id'] exec_id = "{}/{}/{}".format(executor_id, job_id, call_id) logger.info("Execution-ID: {}".format(exec_id)) runtime_name = event['runtime_name'] runtime_memory = event['runtime_memory'] execution_timeout = event['execution_timeout'] logger.debug("Runtime name: {}".format(runtime_name)) logger.debug("Runtime memory: {}MB".format(runtime_memory)) logger.debug("Function timeout: {}s".format(execution_timeout)) func_key = event['func_key'] data_key = event['data_key'] data_byte_range = event['data_byte_range'] storage_config = extract_storage_config(config) internal_storage = InternalStorage(storage_config) call_status = CallStatus(config, internal_storage) call_status.response['host_submit_tstamp'] = event['host_submit_tstamp'] call_status.response['worker_start_tstamp'] = start_tstamp context_dict = { 'python_version': os.environ.get("PYTHON_VERSION"), 'call_id': call_id, 'job_id': job_id, 'executor_id': executor_id, 'activation_id': os.environ.get('__PW_ACTIVATION_ID') } call_status.response.update(context_dict) show_memory_peak = strtobool(os.environ.get('SHOW_MEMORY_PEAK', 'False')) try: if version.__version__ != event['lithops_version']: msg = ( "Lithops version mismatch. Host version: {} - Runtime version: {}" .format(event['lithops_version'], version.__version__)) raise RuntimeError('HANDLER', msg) # send init status event call_status.send('__init__') # call_status.response['free_disk_bytes'] = free_disk_space("/tmp") custom_env = { 'LITHOPS_CONFIG': json.dumps(config), 'LITHOPS_EXECUTION_ID': exec_id, 'PYTHONPATH': "{}:{}".format(os.getcwd(), LITHOPS_LIBS_PATH) } os.environ.update(custom_env) jobrunner_stats_dir = os.path.join(STORAGE_FOLDER, storage_config['bucket'], JOBS_PREFIX, executor_id, job_id, call_id) os.makedirs(jobrunner_stats_dir, exist_ok=True) jobrunner_stats_filename = os.path.join(jobrunner_stats_dir, 'jobrunner.stats.txt') jobrunner_config = { 'lithops_config': config, 'call_id': call_id, 'job_id': job_id, 'executor_id': executor_id, 'func_key': func_key, 'data_key': data_key, 'log_level': log_level, 'data_byte_range': data_byte_range, 'output_key': create_output_key(JOBS_PREFIX, executor_id, job_id, call_id), 'stats_filename': jobrunner_stats_filename } if show_memory_peak: mm_handler_conn, mm_conn = Pipe() memory_monitor = Thread(target=memory_monitor_worker, args=(mm_conn, )) memory_monitor.start() handler_conn, jobrunner_conn = Pipe() jobrunner = JobRunner(jobrunner_config, jobrunner_conn, internal_storage) logger.debug('Starting JobRunner process') local_execution = strtobool( os.environ.get('__PW_LOCAL_EXECUTION', 'False')) jrp = Thread(target=jobrunner.run) if local_execution else Process( target=jobrunner.run) jrp.start() jrp.join(execution_timeout) logger.debug('JobRunner process finished') if jrp.is_alive(): # If process is still alive after jr.join(job_max_runtime), kill it try: jrp.terminate() except Exception: # thread does not have terminate method pass msg = ('Function exceeded maximum time of {} seconds and was ' 'killed'.format(execution_timeout)) raise TimeoutError('HANDLER', msg) if show_memory_peak: mm_handler_conn.send('STOP') memory_monitor.join() peak_memory_usage = int(mm_handler_conn.recv()) logger.info("Peak memory usage: {}".format( sizeof_fmt(peak_memory_usage))) call_status.response['peak_memory_usage'] = peak_memory_usage if not handler_conn.poll(): logger.error( 'No completion message received from JobRunner process') logger.debug('Assuming memory overflow...') # Only 1 message is returned by jobrunner when it finishes. # If no message, this means that the jobrunner process was killed. # 99% of times the jobrunner is killed due an OOM, so we assume here an OOM. msg = 'Function exceeded maximum memory and was killed' raise MemoryError('HANDLER', msg) if os.path.exists(jobrunner_stats_filename): with open(jobrunner_stats_filename, 'r') as fid: for l in fid.readlines(): key, value = l.strip().split(" ", 1) try: call_status.response[key] = float(value) except Exception: call_status.response[key] = value if key in [ 'exception', 'exc_pickle_fail', 'result', 'new_futures' ]: call_status.response[key] = eval(value) except Exception: # internal runtime exceptions print('----------------------- EXCEPTION !-----------------------', flush=True) traceback.print_exc(file=sys.stdout) print('----------------------------------------------------------', flush=True) call_status.response['exception'] = True pickled_exc = pickle.dumps(sys.exc_info()) pickle.loads( pickled_exc) # this is just to make sure they can be unpickled call_status.response['exc_info'] = str(pickled_exc) finally: call_status.response['worker_end_tstamp'] = time.time() call_status.send('__end__') for key in extra_env: os.environ.pop(key) logger.info("Finished")
def _create_job(config, internal_storage, executor_id, job_id, func, iterdata, runtime_meta, runtime_memory, extra_env, include_modules, exclude_modules, execution_timeout, host_job_meta, chunksize=None, worker_processes=None, invoke_pool_threads=16): """ Creates a new Job """ ext_env = {} if extra_env is None else extra_env.copy() if ext_env: ext_env = utils.convert_bools_to_string(ext_env) logger.debug("Extra environment vars {}".format(ext_env)) job = SimpleNamespace() job.chunksize = chunksize or config['lithops']['chunksize'] job.worker_processes = worker_processes or config['lithops'][ 'worker_processes'] job.execution_timeout = execution_timeout or config['lithops'][ 'execution_timeout'] job.executor_id = executor_id job.job_id = job_id job.job_key = create_job_key(job.executor_id, job.job_id) job.extra_env = ext_env job.function_name = func.__name__ job.total_calls = len(iterdata) mode = config['lithops']['mode'] if mode == SERVERLESS: job.invoke_pool_threads = invoke_pool_threads or config['serverless'][ 'invoke_pool_threads'] job.runtime_memory = runtime_memory or config['serverless'][ 'runtime_memory'] job.runtime_timeout = config['serverless']['runtime_timeout'] if job.execution_timeout >= job.runtime_timeout: job.execution_timeout = job.runtime_timeout - 5 elif mode == STANDALONE: job.runtime_memory = None runtime_timeout = config['standalone']['hard_dismantle_timeout'] if job.execution_timeout >= runtime_timeout: job.execution_timeout = runtime_timeout - 10 elif mode == LOCALHOST: job.runtime_memory = None job.runtime_timeout = execution_timeout exclude_modules_cfg = config['lithops'].get('exclude_modules', []) include_modules_cfg = config['lithops'].get('include_modules', []) exc_modules = set() inc_modules = set() if exclude_modules_cfg: exc_modules.update(exclude_modules_cfg) if exclude_modules: exc_modules.update(exclude_modules) if include_modules_cfg is not None: inc_modules.update(include_modules_cfg) if include_modules_cfg is None and not include_modules: inc_modules = None if include_modules is not None and include_modules: inc_modules.update(include_modules) if include_modules is None: inc_modules = None logger.debug( 'ExecutorID {} | JobID {} - Serializing function and data'.format( executor_id, job_id)) job_serialize_start = time.time() serializer = SerializeIndependent(runtime_meta['preinstalls']) func_and_data_ser, mod_paths = serializer([func] + iterdata, inc_modules, exc_modules) data_strs = func_and_data_ser[1:] data_size_bytes = sum(len(x) for x in data_strs) module_data = create_module_data(mod_paths) func_str = func_and_data_ser[0] func_module_str = pickle.dumps( { 'func': func_str, 'module_data': module_data }, -1) func_module_size_bytes = len(func_module_str) total_size = utils.sizeof_fmt(data_size_bytes + func_module_size_bytes) host_job_meta['host_job_serialize_time'] = round( time.time() - job_serialize_start, 6) host_job_meta['data_size_bytes'] = data_size_bytes host_job_meta['func_module_size_bytes'] = func_module_size_bytes if 'data_limit' in config['lithops']: data_limit = config['lithops']['data_limit'] else: data_limit = MAX_AGG_DATA_SIZE if data_limit and data_size_bytes > data_limit * 1024**2: log_msg = ( 'ExecutorID {} | JobID {} - Total data exceeded maximum size ' 'of {}'.format(executor_id, job_id, sizeof_fmt(data_limit * 1024**2))) raise Exception(log_msg) logger.info('ExecutorID {} | JobID {} - Uploading function and data ' '- Total: {}'.format(executor_id, job_id, total_size)) # Upload data data_key = create_agg_data_key(JOBS_PREFIX, executor_id, job_id) job.data_key = data_key data_bytes, data_byte_ranges = utils.agg_data(data_strs) job.data_byte_ranges = data_byte_ranges data_upload_start = time.time() internal_storage.put_data(data_key, data_bytes) data_upload_end = time.time() host_job_meta['host_data_upload_time'] = round( data_upload_end - data_upload_start, 6) func_upload_start = time.time() # Upload function and modules if config[mode].get('customized_runtime'): # Prepare function and modules locally to store in the runtime image later function_file = func.__code__.co_filename function_hash = hashlib.md5(open(function_file, 'rb').read()).hexdigest()[:16] mod_hash = hashlib.md5(repr( sorted(mod_paths)).encode('utf-8')).hexdigest()[:16] uuid = '{}{}'.format(function_hash, mod_hash) func_key = create_func_key(JOBS_PREFIX, uuid, "") _store_func_and_modules(func_key, func_str, module_data) job.ext_runtime_uuid = uuid else: func_key = create_func_key(JOBS_PREFIX, executor_id, job_id) internal_storage.put_func(func_key, func_module_str) job.func_key = func_key func_upload_end = time.time() host_job_meta['host_func_upload_time'] = round( func_upload_end - func_upload_start, 6) host_job_meta['host_job_created_time'] = round( time.time() - host_job_meta['host_job_create_tstamp'], 6) job.metadata = host_job_meta return job