Exemplo n.º 1
0
    def map(self, map_function, map_iterdata, chunksize=None, worker_processes=None,
            extra_args=None, extra_env=None, runtime_memory=None, chunk_size=None,
            chunk_n=None, obj_chunk_size=None, obj_chunk_number=None, timeout=None,
            invoke_pool_threads=None, include_modules=[], exclude_modules=[]):
        """
        For running multiple function executions asynchronously

        :param map_function: the function to map over the data
        :param map_iterdata: An iterable of input data
        :param chunksize: Split map_iteradata in chunks of this size.
                          Lithops spawns 1 worker per resulting chunk. Default 1
        :param worker_processes: Number of concurrent/parallel processes in each worker. Default 1
        :param extra_args: Additional args to pass to the function activations
        :param extra_env: Additional env variables for action environment
        :param runtime_memory: Memory to use to run the function
        :param obj_chunk_size: the size of the data chunks to split each object.
                           'None' for processing the whole file in one function
                           activation.
        :param obj_chunk_number: Number of chunks to split each object. 'None' for
                                 processing the whole file in one function activation
        :param remote_invocation: Enable or disable remote_invocation mechanism
        :param timeout: Time that the functions have to complete their execution
                        before raising a timeout
        :param invoke_pool_threads: Number of threads to use to invoke
        :param include_modules: Explicitly pickle these dependencies
        :param exclude_modules: Explicitly keep these modules from pickled
                                dependencies

        :return: A list with size `len(iterdata)` of futures.
        """
        job_id = self._create_job_id('M')
        self.last_call = 'map'

        runtime_meta = self.invoker.select_runtime(job_id, runtime_memory)

        job = create_map_job(self.config, self.internal_storage,
                             self.executor_id, job_id,
                             map_function=map_function,
                             iterdata=map_iterdata,
                             chunksize=chunksize,
                             worker_processes=worker_processes,
                             runtime_meta=runtime_meta,
                             runtime_memory=runtime_memory,
                             extra_env=extra_env,
                             include_modules=include_modules,
                             exclude_modules=exclude_modules,
                             execution_timeout=timeout,
                             extra_args=extra_args,
                             chunk_size=chunk_size,
                             chunk_n=chunk_n,
                             obj_chunk_size=obj_chunk_size,
                             obj_chunk_number=obj_chunk_number,
                             invoke_pool_threads=invoke_pool_threads)

        futures = self.invoker.run_job(job)
        self.futures.extend(futures)

        return futures
Exemplo n.º 2
0
    def map(self,
            map_function,
            map_iterdata,
            extra_args=None,
            extra_env=None,
            runtime_memory=None,
            chunk_size=None,
            chunk_n=None,
            timeout=None,
            invoke_pool_threads=500,
            include_modules=[],
            exclude_modules=[]):
        """
        :param map_function: the function to map over the data
        :param map_iterdata: An iterable of input data
        :param extra_args: Additional arguments to pass to the function activation. Default None.
        :param extra_env: Additional environment variables for action environment. Default None.
        :param runtime_memory: Memory to use to run the function. Default None (loaded from config).
        :param chunk_size: the size of the data chunks to split each object. 'None' for processing
                           the whole file in one function activation.
        :param chunk_n: Number of chunks to split each object. 'None' for processing the whole
                        file in one function activation.
        :param remote_invocation: Enable or disable remote_invocation mechanism. Default 'False'
        :param timeout: Time that the functions have to complete their execution before raising a timeout.
        :param invoke_pool_threads: Number of threads to use to invoke.
        :param include_modules: Explicitly pickle these dependencies.
        :param exclude_modules: Explicitly keep these modules from pickled dependencies.

        :return: A list with size `len(iterdata)` of futures.
        """
        job_id = self._create_job_id('M')
        self.last_call = 'map'

        runtime_meta = self.invoker.select_runtime(job_id, runtime_memory)

        job = create_map_job(self.config,
                             self.internal_storage,
                             self.executor_id,
                             job_id,
                             map_function=map_function,
                             iterdata=map_iterdata,
                             runtime_meta=runtime_meta,
                             runtime_memory=runtime_memory,
                             extra_env=extra_env,
                             include_modules=include_modules,
                             exclude_modules=exclude_modules,
                             execution_timeout=timeout,
                             extra_args=extra_args,
                             obj_chunk_size=chunk_size,
                             obj_chunk_number=chunk_n,
                             invoke_pool_threads=invoke_pool_threads)

        futures = self.invoker.run(job)
        self.futures.extend(futures)

        return futures
Exemplo n.º 3
0
    def call_async(self,
                   func,
                   data,
                   extra_env=None,
                   runtime_memory=None,
                   timeout=None,
                   include_modules=[],
                   exclude_modules=[]):
        """
        For running one function execution asynchronously

        :param func: the function to map over the data
        :param data: input data
        :param extra_env: Additional env variables for action environment
        :param runtime_memory: Memory to use to run the function
        :param timeout: Time that the functions have to complete their
                        execution before raising a timeout
        :param include_modules: Explicitly pickle these dependencies
        :param exclude_modules: Explicitly keep these modules from pickled
                                dependencies

        :return: future object.
        """
        job_id = self._create_job_id('A')
        self.last_call = 'call_async'

        runtime_meta = self.invoker.select_runtime(job_id, runtime_memory)

        job = create_map_job(self.config,
                             self.internal_storage,
                             self.executor_id,
                             job_id,
                             map_function=func,
                             iterdata=[data],
                             runtime_meta=runtime_meta,
                             runtime_memory=runtime_memory,
                             extra_env=extra_env,
                             include_modules=include_modules,
                             exclude_modules=exclude_modules,
                             execution_timeout=timeout)

        futures = self.invoker.run_job(job)
        self.futures.extend(futures)

        return futures[0]
Exemplo n.º 4
0
    def call_async(self,
                   func: Callable,
                   data: Union[List[Any], Tuple[Any, ...], Dict[str, Any]],
                   extra_env: Optional[Dict] = None,
                   runtime_memory: Optional[int] = None,
                   timeout: Optional[int] = None,
                   include_modules: Optional[List] = [],
                   exclude_modules: Optional[List] = []) -> ResponseFuture:
        """
        For running one function execution asynchronously.

        :param func: The function to map over the data.
        :param data: Input data. Arguments can be passed as a list or tuple, or as a dictionary for keyword arguments.
        :param extra_env: Additional env variables for function environment.
        :param runtime_memory: Memory to use to run the function.
        :param timeout: Time that the function has to complete its execution before raising a timeout.
        :param include_modules: Explicitly pickle these dependencies.
        :param exclude_modules: Explicitly keep these modules from pickled dependencies.

        :return: Response future.
        """
        job_id = self._create_job_id('A')
        self.last_call = 'call_async'

        runtime_meta = self.invoker.select_runtime(job_id, runtime_memory)

        job = create_map_job(config=self.config,
                             internal_storage=self.internal_storage,
                             executor_id=self.executor_id,
                             job_id=job_id,
                             map_function=func,
                             iterdata=[data],
                             runtime_meta=runtime_meta,
                             runtime_memory=runtime_memory,
                             extra_env=extra_env,
                             include_modules=include_modules,
                             exclude_modules=exclude_modules,
                             execution_timeout=timeout)

        futures = self.invoker.run_job(job)
        self.futures.extend(futures)

        return futures[0]
Exemplo n.º 5
0
    def map_reduce(self,
                   map_function,
                   map_iterdata,
                   reduce_function,
                   chunksize=None,
                   worker_processes=None,
                   extra_args=None,
                   extra_env=None,
                   map_runtime_memory=None,
                   obj_chunk_size=None,
                   obj_chunk_number=None,
                   reduce_runtime_memory=None,
                   chunk_size=None,
                   chunk_n=None,
                   timeout=None,
                   invoke_pool_threads=None,
                   reducer_one_per_object=False,
                   reducer_wait_local=False,
                   include_modules=[],
                   exclude_modules=[]):
        """
        Map the map_function over the data and apply the reduce_function across all futures.
        This method is executed all within CF.

        :param map_function: the function to map over the data
        :param map_iterdata:  An iterable of input data
        :param chunksize: Split map_iteradata in chunks of this size.
                          Lithops spawns 1 worker per resulting chunk. Default 1
        :param worker_processes: Number of concurrent/parallel processes in each worker Default 1
        :param reduce_function:  the function to reduce over the futures
        :param extra_env: Additional environment variables for action environment. Default None.
        :param extra_args: Additional arguments to pass to function activation. Default None.
        :param map_runtime_memory: Memory to use to run the map function. Default None (loaded from config).
        :param reduce_runtime_memory: Memory to use to run the reduce function. Default None (loaded from config).
        :param obj_chunk_size: the size of the data chunks to split each object. 'None' for processing
                               the whole file in one function activation.
        :param obj_chunk_number: Number of chunks to split each object. 'None' for processing the whole
                                 file in one function activation.
        :param remote_invocation: Enable or disable remote_invocation mechanism. Default 'False'
        :param timeout: Time that the functions have to complete their execution before raising a timeout.
        :param reducer_one_per_object: Set one reducer per object after running the partitioner
        :param reducer_wait_local: Wait for results locally
        :param invoke_pool_threads: Number of threads to use to invoke.
        :param include_modules: Explicitly pickle these dependencies.
        :param exclude_modules: Explicitly keep these modules from pickled dependencies.

        :return: A list with size `len(map_iterdata)` of futures.
        """
        self.last_call = 'map_reduce'
        map_job_id = self._create_job_id('M')

        runtime_meta = self.invoker.select_runtime(map_job_id,
                                                   map_runtime_memory)

        map_job = create_map_job(self.config,
                                 self.internal_storage,
                                 self.executor_id,
                                 map_job_id,
                                 map_function=map_function,
                                 iterdata=map_iterdata,
                                 chunksize=chunksize,
                                 worker_processes=worker_processes,
                                 runtime_meta=runtime_meta,
                                 runtime_memory=map_runtime_memory,
                                 extra_args=extra_args,
                                 extra_env=extra_env,
                                 chunk_size=chunk_size,
                                 chunk_n=chunk_n,
                                 obj_chunk_size=obj_chunk_size,
                                 obj_chunk_number=obj_chunk_number,
                                 include_modules=include_modules,
                                 exclude_modules=exclude_modules,
                                 execution_timeout=timeout,
                                 invoke_pool_threads=invoke_pool_threads)

        map_futures = self.invoker.run_job(map_job)
        self.futures.extend(map_futures)

        if reducer_wait_local:
            wait(fs=map_futures,
                 internal_storage=self.internal_storage,
                 job_monitor=self.job_monitor)

        reduce_job_id = map_job_id.replace('M', 'R')

        runtime_meta = self.invoker.select_runtime(reduce_job_id,
                                                   reduce_runtime_memory)

        reduce_job = create_reduce_job(
            self.config,
            self.internal_storage,
            self.executor_id,
            reduce_job_id,
            reduce_function,
            map_job,
            map_futures,
            runtime_meta=runtime_meta,
            runtime_memory=reduce_runtime_memory,
            reducer_one_per_object=reducer_one_per_object,
            extra_env=extra_env,
            include_modules=include_modules,
            exclude_modules=exclude_modules)

        reduce_futures = self.invoker.run_job(reduce_job)
        self.futures.extend(reduce_futures)

        for f in map_futures:
            f._produce_output = False

        return map_futures + reduce_futures
Exemplo n.º 6
0
    def map_reduce(self,
                   map_function: Callable,
                   map_iterdata: List[Union[List[Any], Tuple[Any, ...],
                                            Dict[str, Any]]],
                   reduce_function: Callable,
                   chunksize: Optional[int] = None,
                   extra_args: Optional[Union[List[Any], Tuple[Any, ...],
                                              Dict[str, Any]]] = None,
                   extra_env: Optional[Dict[str, str]] = None,
                   map_runtime_memory: Optional[int] = None,
                   reduce_runtime_memory: Optional[int] = None,
                   obj_chunk_size: Optional[int] = None,
                   obj_chunk_number: Optional[int] = None,
                   timeout: Optional[int] = None,
                   reducer_one_per_object: Optional[bool] = False,
                   spawn_reducer: Optional[int] = 20,
                   include_modules: Optional[List[str]] = [],
                   exclude_modules: Optional[List[str]] = []) -> FuturesList:
        """
        Map the map_function over the data and apply the reduce_function across all futures.

        :param map_function: The function to map over the data
        :param map_iterdata: An iterable of input data
        :param reduce_function: The function to reduce over the futures
        :param chunksize: Split map_iteradata in chunks of this size. Lithops spawns 1 worker per resulting chunk. Default 1
        :param extra_args: Additional arguments to pass to function activation. Default None
        :param extra_env: Additional environment variables for action environment. Default None
        :param map_runtime_memory: Memory to use to run the map function. Default None (loaded from config)
        :param reduce_runtime_memory: Memory to use to run the reduce function. Default None (loaded from config)
        :param obj_chunk_size: the size of the data chunks to split each object. 'None' for processing the whole file in one function activation
        :param obj_chunk_number: Number of chunks to split each object. 'None' for processing the whole file in one function activation
        :param timeout: Time that the functions have to complete their execution before raising a timeout
        :param reducer_one_per_object: Set one reducer per object after running the partitioner
        :param spawn_reducer: Percentage of done map functions before spawning the reduce function
        :param include_modules: Explicitly pickle these dependencies.
        :param exclude_modules: Explicitly keep these modules from pickled dependencies.

        :return: A list with size `len(map_iterdata)` of futures.
        """
        self.last_call = 'map_reduce'
        map_job_id = self._create_job_id('M')

        runtime_meta = self.invoker.select_runtime(map_job_id,
                                                   map_runtime_memory)

        map_job = create_map_job(config=self.config,
                                 internal_storage=self.internal_storage,
                                 executor_id=self.executor_id,
                                 job_id=map_job_id,
                                 map_function=map_function,
                                 iterdata=map_iterdata,
                                 chunksize=chunksize,
                                 runtime_meta=runtime_meta,
                                 runtime_memory=map_runtime_memory,
                                 extra_args=extra_args,
                                 extra_env=extra_env,
                                 obj_chunk_size=obj_chunk_size,
                                 obj_chunk_number=obj_chunk_number,
                                 include_modules=include_modules,
                                 exclude_modules=exclude_modules,
                                 execution_timeout=timeout)

        map_futures = self.invoker.run_job(map_job)
        self.futures.extend(map_futures)

        if isinstance(map_iterdata, FuturesList):
            for fut in map_iterdata:
                fut._produce_output = False

        if spawn_reducer != ALWAYS:
            self.wait(map_futures, return_when=spawn_reducer)
            logger.debug(
                f'ExecutorID {self.executor_id} | JobID {map_job_id} - '
                f'{spawn_reducer}% of map activations done. Spawning reduce stage'
            )

        reduce_job_id = map_job_id.replace('M', 'R')

        runtime_meta = self.invoker.select_runtime(reduce_job_id,
                                                   reduce_runtime_memory)

        reduce_job = create_reduce_job(
            config=self.config,
            internal_storage=self.internal_storage,
            executor_id=self.executor_id,
            reduce_job_id=reduce_job_id,
            reduce_function=reduce_function,
            map_job=map_job,
            map_futures=map_futures,
            runtime_meta=runtime_meta,
            runtime_memory=reduce_runtime_memory,
            reducer_one_per_object=reducer_one_per_object,
            extra_env=extra_env,
            include_modules=include_modules,
            exclude_modules=exclude_modules)

        reduce_futures = self.invoker.run_job(reduce_job)
        self.futures.extend(reduce_futures)

        for f in map_futures:
            f._produce_output = False

        return create_futures_list(map_futures + reduce_futures, self)
Exemplo n.º 7
0
    def map(self,
            map_function: Callable,
            map_iterdata: List[Union[List[Any], Tuple[Any, ...], Dict[str,
                                                                      Any]]],
            chunksize: Optional[int] = None,
            extra_args: Optional[Union[List[Any], Tuple[Any, ...],
                                       Dict[str, Any]]] = None,
            extra_env: Optional[Dict[str, str]] = None,
            runtime_memory: Optional[int] = None,
            obj_chunk_size: Optional[int] = None,
            obj_chunk_number: Optional[int] = None,
            timeout: Optional[int] = None,
            include_modules: Optional[List[str]] = [],
            exclude_modules: Optional[List[str]] = []) -> FuturesList:
        """
        Spawn multiple function activations based on the items of an input list.

        :param map_function: The function to map over the data
        :param map_iterdata: An iterable of input data (e.g python list).
        :param chunksize: Split map_iteradata in chunks of this size. Lithops spawns 1 worker per resulting chunk
        :param extra_args: Additional arguments to pass to each map_function activation
        :param extra_env: Additional environment variables for function environment
        :param runtime_memory: Memory (in MB) to use to run the functions
        :param obj_chunk_size: Used for data processing. Chunk size to split each object in bytes. Must be >= 1MiB. 'None' for processing the whole file in one function activation
        :param obj_chunk_number: Used for data processing. Number of chunks to split each object. 'None' for processing the whole file in one function activation. chunk_n has prevalence over chunk_size if both parameters are set
        :param timeout: Max time per function activation (seconds)
        :param include_modules: Explicitly pickle these dependencies. All required dependencies are pickled if default empty list. No one dependency is pickled if it is explicitly set to None
        :param exclude_modules: Explicitly keep these modules from pickled dependencies. It is not taken into account if you set include_modules.

        :return: A list with size `len(map_iterdata)` of futures for each job (Futures are also internally stored by Lithops).
        """

        job_id = self._create_job_id('M')
        self.last_call = 'map'

        runtime_meta = self.invoker.select_runtime(job_id, runtime_memory)

        job = create_map_job(config=self.config,
                             internal_storage=self.internal_storage,
                             executor_id=self.executor_id,
                             job_id=job_id,
                             map_function=map_function,
                             iterdata=map_iterdata,
                             chunksize=chunksize,
                             runtime_meta=runtime_meta,
                             runtime_memory=runtime_memory,
                             extra_env=extra_env,
                             include_modules=include_modules,
                             exclude_modules=exclude_modules,
                             execution_timeout=timeout,
                             extra_args=extra_args,
                             obj_chunk_size=obj_chunk_size,
                             obj_chunk_number=obj_chunk_number)

        futures = self.invoker.run_job(job)
        self.futures.extend(futures)

        if isinstance(map_iterdata, FuturesList):
            for fut in map_iterdata:
                fut._produce_output = False

        return create_futures_list(futures, self)