示例#1
0
def create_reduce_job(config,
                      internal_storage,
                      executor_id,
                      reduce_job_id,
                      reduce_function,
                      map_job,
                      map_futures,
                      runtime_meta,
                      reducer_one_per_object=False,
                      runtime_memory=None,
                      extra_env=None,
                      include_modules=[],
                      exclude_modules=[],
                      execution_timeout=None):
    """
    Wrapper to create a reduce job. Apply a function across all map futures.
    """
    host_job_meta = {'host_job_create_tstamp': time.time()}

    iterdata = [[
        map_futures,
    ]]

    if 'parts_per_object' in map_job and reducer_one_per_object:
        prev_total_partitons = 0
        iterdata = []
        for total_partitions in map_job['parts_per_object']:
            iterdata.append([
                map_futures[prev_total_partitons:prev_total_partitons +
                            total_partitions]
            ])
            prev_total_partitons = prev_total_partitons + total_partitions

    reduce_job_env = {'__PW_REDUCE_JOB': True}
    if extra_env is None:
        ext_env = reduce_job_env
    else:
        ext_env = extra_env.copy()
        ext_env.update(reduce_job_env)

    iterdata = utils.verify_args(reduce_function, iterdata, None)

    return _create_job(config,
                       internal_storage,
                       executor_id,
                       reduce_job_id,
                       reduce_function,
                       iterdata,
                       runtime_meta=runtime_meta,
                       runtime_memory=runtime_memory,
                       extra_env=ext_env,
                       include_modules=include_modules,
                       exclude_modules=exclude_modules,
                       execution_timeout=execution_timeout,
                       host_job_meta=host_job_meta)
示例#2
0
文件: job.py 项目: kpavel/lithops
def create_map_job(config, internal_storage, executor_id, job_id, map_function,
                   iterdata,  runtime_meta, runtime_memory, extra_env,
                   include_modules, exclude_modules, execution_timeout,
                   chunksize=None, worker_processes=None, extra_args=None,
                   obj_chunk_size=None, obj_chunk_number=None, chunk_size=None,
                   chunk_n=None):
    """
    Wrapper to create a map job.  It integrates COS logic to process objects.
    """

    if chunk_size or chunk_n:
        print('>> WARNING: chunk_size and chunk_n parameters are deprecated'
              'use obj_chunk_size and obj_chunk_number instead')
        obj_chunk_size = chunk_size
        obj_chunk_number = chunk_n

    host_job_meta = {'host_job_create_tstamp': time.time()}
    map_iterdata = utils.verify_args(map_function, iterdata, extra_args)

    # Object processing functionality
    ppo = None
    if utils.is_object_processing_function(map_function):
        create_partitions_start = time.time()
        # Create partitions according chunk_size or chunk_number
        logger.debug('ExecutorID {} | JobID {} - Calling map on partitions '
                     'from object storage flow'.format(executor_id, job_id))
        map_iterdata, ppo = create_partitions(config, internal_storage,
                                              map_iterdata, obj_chunk_size,
                                              obj_chunk_number)

        host_job_meta['host_job_create_partitions_time'] = round(time.time()-create_partitions_start, 6)
    # ########

    job = _create_job(config=config,
                      internal_storage=internal_storage,
                      executor_id=executor_id,
                      job_id=job_id,
                      func=map_function,
                      iterdata=map_iterdata,
                      chunksize=chunksize,
                      worker_processes=worker_processes,
                      runtime_meta=runtime_meta,
                      runtime_memory=runtime_memory,
                      extra_env=extra_env,
                      include_modules=include_modules,
                      exclude_modules=exclude_modules,
                      execution_timeout=execution_timeout,
                      host_job_meta=host_job_meta)

    if ppo:
        job.parts_per_object = ppo

    return job
示例#3
0
def create_reduce_job(config,
                      internal_storage,
                      executor_id,
                      reduce_job_id,
                      reduce_function,
                      map_job,
                      map_futures,
                      runtime_meta,
                      runtime_memory,
                      reducer_one_per_object,
                      extra_env,
                      include_modules,
                      exclude_modules,
                      execution_timeout=None):
    """
    Wrapper to create a reduce job. Apply a function across all map futures.
    """
    host_job_meta = {'host_job_create_tstamp': time.time()}

    iterdata = [(map_futures, )]

    if hasattr(map_job, 'parts_per_object') and reducer_one_per_object:
        prev_total_partitons = 0
        iterdata = []
        for total_partitions in map_job.parts_per_object:
            iterdata.append(
                (map_futures[prev_total_partitons:prev_total_partitons +
                             total_partitions], ))
            prev_total_partitons += total_partitions

    reduce_job_env = {'__LITHOPS_REDUCE_JOB': True}
    if extra_env is None:
        ext_env = reduce_job_env
    else:
        ext_env = extra_env.copy()
        ext_env.update(reduce_job_env)

    iterdata = utils.verify_args(reduce_function, iterdata, None)

    return _create_job(config=config,
                       internal_storage=internal_storage,
                       executor_id=executor_id,
                       job_id=reduce_job_id,
                       func=reduce_function,
                       iterdata=iterdata,
                       runtime_meta=runtime_meta,
                       runtime_memory=runtime_memory,
                       extra_env=ext_env,
                       include_modules=include_modules,
                       exclude_modules=exclude_modules,
                       execution_timeout=execution_timeout,
                       host_job_meta=host_job_meta)
示例#4
0
文件: job.py 项目: cclauss/lithops
def create_map_job(config, internal_storage, executor_id, job_id, map_function,
                   iterdata, runtime_meta, runtime_memory, extra_env,
                   include_modules, exclude_modules, execution_timeout,
                   extra_args=None,  obj_chunk_size=None, obj_chunk_number=None,
                   invoke_pool_threads=128):
    """
    Wrapper to create a map job.  It integrates COS logic to process objects.
    """

    host_job_meta = {'host_job_create_tstamp': time.time()}
    map_iterdata = utils.verify_args(map_function, iterdata, extra_args)

    if config['lithops'].get('rabbitmq_monitor', False):
        rabbit_amqp_url = config['rabbitmq'].get('amqp_url')
        utils.create_rabbitmq_resources(rabbit_amqp_url, executor_id, job_id)

    # Object processing functionality
    parts_per_object = None
    if is_object_processing_function(map_function):
        create_partitions_start = time.time()
        # Create partitions according chunk_size or chunk_number
        logger.debug('ExecutorID {} | JobID {} - Calling map on partitions '
                     'from object storage flow'.format(executor_id, job_id))
        map_iterdata, parts_per_object = create_partitions(config, internal_storage,
                                                           map_iterdata, obj_chunk_size,
                                                           obj_chunk_number)
        host_job_meta['host_job_create_partitions_time'] = round(time.time()-create_partitions_start, 6)
    # ########

    job = _create_job(config=config,
                      internal_storage=internal_storage,
                      executor_id=executor_id,
                      job_id=job_id,
                      func=map_function,
                      iterdata=map_iterdata,
                      runtime_meta=runtime_meta,
                      runtime_memory=runtime_memory,
                      extra_env=extra_env,
                      include_modules=include_modules,
                      exclude_modules=exclude_modules,
                      execution_timeout=execution_timeout,
                      host_job_meta=host_job_meta,
                      invoke_pool_threads=invoke_pool_threads)

    if parts_per_object:
        job.parts_per_object = parts_per_object

    return job
示例#5
0
    def _fill_optional_args(self, function, data):
        """
        Fills in those reserved, optional parameters that might be write to the function signature
        """
        func_sig = inspect.signature(function)

        if len(data) == 1 and 'future' in data:
            # Function chaining feature
            out = [
                data.pop('future').result(
                    internal_storage=self.internal_storage)
            ]
            data.update(verify_args(function, out, None)[0])

        if 'ibm_cos' in func_sig.parameters:
            if 'ibm_cos' in self.lithops_config:
                if self.internal_storage.backend == 'ibm_cos':
                    ibm_boto3_client = self.internal_storage.get_client()
                else:
                    ibm_boto3_client = Storage(config=self.lithops_config,
                                               backend='ibm_cos').get_client()
                data['ibm_cos'] = ibm_boto3_client
            else:
                raise Exception(
                    'Cannot create the ibm_cos client: missing configuration')

        if 'storage' in func_sig.parameters:
            data['storage'] = self.internal_storage.storage

        if 'rabbitmq' in func_sig.parameters:
            if 'rabbitmq' in self.lithops_config:
                rabbit_amqp_url = self.lithops_config['rabbitmq'].get(
                    'amqp_url')
                params = pika.URLParameters(rabbit_amqp_url)
                connection = pika.BlockingConnection(params)
                data['rabbitmq'] = connection
            else:
                raise Exception(
                    'Cannot create the rabbitmq client: missing configuration')

        if 'id' in func_sig.parameters:
            data['id'] = int(self.job.call_id)