Пример #1
0
    def __init__(self, processes=None, initializer=None, initargs=(),
                 maxtasksperchild=None, context=None):
        self._ctx = context or get_context()
        # self._setup_queues()
        self._taskqueue = queue.Queue()
        self._cache = {}
        self._state = RUN
        self._maxtasksperchild = maxtasksperchild
        self._initializer = initializer
        self._initargs = initargs

        if processes is not None and processes < 1:
            raise ValueError("Number of processes must be at least 1")

        if processes is not None:
            if self._initargs:
                self._executor = FunctionExecutor(workers=processes, **self._initargs)
            else:
                self._executor = FunctionExecutor(workers=processes)
            self._processes = processes
        else:
            if self._initargs:
                self._executor = FunctionExecutor(**self._initargs)
            else:
                self._executor = FunctionExecutor()
            self._processes = self._executor.invoker.workers

        if initializer is not None and not callable(initializer):
            raise TypeError('initializer must be a callable')

        self._pool = []
Пример #2
0
    def __init__(self,
                 processes=None,
                 initializer=None,
                 initargs=None,
                 maxtasksperchild=None,
                 context=None):
        if initargs is None:
            initargs = ()

        self._taskqueue = queue.Queue()
        self._cache = {}
        self._state = RUN
        self._maxtasksperchild = maxtasksperchild
        self._initializer = initializer
        self._initargs = initargs
        self._remote_logger = None

        if processes is not None and processes < 1:
            raise ValueError("Number of processes must be at least 1")

        lithops_conf = mp_config.get_parameter(mp_config.LITHOPS_CONFIG)

        if processes is not None:
            self._processes = processes
            self._executor = FunctionExecutor(workers=processes,
                                              **lithops_conf)
        else:
            self._executor = FunctionExecutor(**lithops_conf)
            self._processes = self._executor.invoker.workers

        if initializer is not None and not callable(initializer):
            raise TypeError('initializer must be a callable')
Пример #3
0
    def __init__(self,
                 group=None,
                 target=None,
                 name=None,
                 args=None,
                 kwargs=None,
                 *,
                 daemon=None):
        assert group is None, 'process grouping is not implemented'
        count = next(_process_counter)

        if args is None:
            args = ()
        if kwargs is None:
            kwargs = {}

        self._config = {}
        self._identity = count
        self._parent_pid = os.getpid()
        self._target = target
        self._args = tuple(args)
        self._kwargs = dict(kwargs)
        self._name = name or (type(self).__name__ + '-' + str(self._identity))
        if daemon is not None:
            self.daemon = daemon
        lithops_config = mp_config.get_parameter(mp_config.LITHOPS_CONFIG)
        self._executor = FunctionExecutor(**lithops_config)
        self._forked = False
        self._sentinel = object()
        self._remote_logger = None
        self._redis = util.get_redis_client()
Пример #4
0
def benchmark(workers, memory, loopcount, matn):
    iterable = [(loopcount, matn) for i in range(workers)]

    fexec = FunctionExecutor(runtime_memory=memory)
    start_time = time.time()
    worker_futures = fexec.map(compute_flops, iterable)
    results = fexec.get_result()
    end_time = time.time()

    worker_stats = [f.stats for f in worker_futures]
    total_time = end_time - start_time

    print("Total time:", round(total_time, 3))
    est_flops = workers * 2 * loopcount * matn**3
    print('Estimated GFLOPS:', round(est_flops / 1e9 / total_time, 4))

    res = {
        'start_time': start_time,
        'total_time': total_time,
        'est_flops': est_flops,
        'worker_stats': worker_stats,
        'results': results
    }

    return res
Пример #5
0
 def get_conn(self, lithops_executor_config):
     """
     Initializes Lithops executor.
     """
     lithops_executor_config['log_level'] = 'DEBUG'
     lithops_executor_config['config'] = self.lithops_config
     return FunctionExecutor(**lithops_executor_config)
Пример #6
0
def validate_command(prefix, image):
    storage_client = Storage()

    with FunctionExecutor(runtime=image) as fexec:
        bucket = fexec.config['lithops']['storage_bucket']
        key_list = storage_client.list_keys(bucket, prefix + '/')

        validate_records_futures = fexec.map(validate_records,
                                             key_list,
                                             extra_args=[bucket, prefix],
                                             include_modules=['util'])
        results = fexec.get_result(fs=validate_records_futures)
        for index, r in enumerate(results):
            if not r['success']:
                print(f'Failed to validate partition: {key_list[index]}')
                print(r['stderr'])
                return

        validate_summaries_futures = fexec.map(validate_summaries,
                                               [prefix + summary_postfix],
                                               extra_args=[bucket],
                                               include_modules=['util'])
        results = fexec.get_result(fs=validate_summaries_futures)
        if results[0] == '':
            print('Success!')
        else:
            print(results)
Пример #7
0
def read(backend, bucket_name, number, keylist_raw, read_times):

    blocksize = 1024 * 1024

    def read_object(key_name, storage):
        m = hashlib.md5()
        bytes_read = 0
        print(key_name)

        start_time = time.time()
        for unused in range(read_times):
            fileobj = storage.get_object(bucket_name, key_name, stream=True)
            try:
                buf = fileobj.read(blocksize)
                while len(buf) > 0:
                    bytes_read += len(buf)
                    #if bytes_read % (blocksize *10) == 0:
                    #    mb_rate = bytes_read/(time.time()-t1)/1e6
                    #    print('POS:'+str(bytes_read)+' MB Rate: '+ str(mb_rate))
                    m.update(buf)
                    buf = fileobj.read(blocksize)
            except Exception as e:
                print(e)
                pass
        end_time = time.time()
        mb_rate = bytes_read / (end_time - start_time) / 1e6
        print('MB Rate: ' + str(mb_rate))

        return {
            'start_time': start_time,
            'end_time': end_time,
            'mb_rate': mb_rate,
            'bytes_read': bytes_read
        }

    if number == 0:
        keynames = keylist_raw
    else:
        keynames = [keylist_raw[i % len(keylist_raw)] for i in range(number)]

    fexec = FunctionExecutor(backend=backend, runtime_memory=1024)
    start_time = time.time()
    worker_futures = fexec.map(read_object, keynames)
    results = fexec.get_result()
    end_time = time.time()

    total_time = end_time - start_time
    worker_stats = [f.stats for f in worker_futures]

    res = {
        'start_time': start_time,
        'total_time': total_time,
        'worker_stats': worker_stats,
        'results': results
    }

    return res
Пример #8
0
def write(backend, bucket_name, mb_per_file, number, key_prefix):
    def write_object(key_name, storage):
        bytes_n = mb_per_file * 1024**2
        d = RandomDataGenerator(bytes_n)
        print(key_name)
        start_time = time.time()
        storage.put_object(bucket_name, key_name, d)
        end_time = time.time()

        mb_rate = bytes_n / (end_time - start_time) / 1e6
        print('MB Rate: ' + str(mb_rate))

        return {
            'start_time': start_time,
            'end_time': end_time,
            'mb_rate': mb_rate
        }

    # create list of random keys
    keynames = [
        key_prefix + str(uuid.uuid4().hex.upper()) for unused in range(number)
    ]

    fexec = FunctionExecutor(backend=backend, runtime_memory=1024)
    start_time = time.time()
    worker_futures = fexec.map(write_object, keynames)
    results = fexec.get_result()
    end_time = time.time()

    worker_stats = [f.stats for f in worker_futures]
    total_time = end_time - start_time

    res = {
        'start_time': start_time,
        'total_time': total_time,
        'worker_stats': worker_stats,
        'bucket_name': bucket_name,
        'keynames': keynames,
        'results': results
    }

    return res
Пример #9
0
def generate_command(number, prefix, partitions, image):
    bucket = None
    with FunctionExecutor(runtime=image) as fexec:
        bucket = fexec.config['lithops']['storage_bucket']
        futures = fexec.map(generate_records,
                            range(partitions),
                            extra_args=[number, prefix],
                            include_modules=['util'])
        results = fexec.get_result(fs=futures)
        # print(results)

    partition_size = record_size * number

    # Check if all files have been uploaded
    storage_client = Storage()
    partition_list = storage_client.list_objects(bucket, prefix + '/')
    assert len(
        partition_list
    ) == partitions, f'partition_list: {len(partition_list)}; partitions: {partitions}'
    for info in partition_list:
        assert info[
            'Size'] == partition_size, f'partition size: {partition_size} \ninfo: {info}'

    print('Done!')
Пример #10
0
BUCKET_NAME = 'lithops-sample-data'  # change-me


def my_function(obj_id, storage):
    print(obj_id)

    data = storage.get_cloudobject(obj_id)

    return data.decode()


if __name__ == '__main__':

    obj_key = 'cloudobject1.txt'
    storage = Storage()
    obj_id = storage.put_cloudobject('Hello World!', BUCKET_NAME, obj_key)
    print(obj_id)

    fexec = FunctionExecutor()
    fexec.call_async(my_function, obj_id)
    print(fexec.get_result())

    obj_key = 'cloudobject2.txt'
    storage = fexec.storage
    obj_id = storage.put_cloudobject('Hello World!', BUCKET_NAME, obj_key)
    print(obj_id)

    fexec.call_async(my_function, obj_id)
    print(fexec.get_result())
Пример #11
0
 def __init__(self, process_obj):
     util._flush_std_streams()
     self.returncode = None
     self._executor = FunctionExecutor()
     self._launch(process_obj)
Пример #12
0
 def _create_executor(self):
     if not self.executor:
         from lithops import FunctionExecutor
         self.executor = FunctionExecutor(config=self.config)
Пример #13
0
def sort_command(input_prefix, output_prefix, max_parallelism, image):
    storage_client = Storage()
    bucket = None
    input_info_lis = None

    with FunctionExecutor(runtime=image, workers=max_parallelism) as fexec:
        bucket = fexec.config['lithops']['storage_bucket']
        input_info_list = storage_client.list_objects(bucket,
                                                      input_prefix + '/')
        input_size = sum(info['Size'] for info in input_info_list)
        (num_shuffles, last_values_per_category) = make_plan(input_size)

        current_values_per_category = 1
        current_prefix = input_prefix
        current_keys_list = [{
            'keys_list': [key_name],
            'prefix': input_prefix + '-intermediate0',
            'category_stack': []
        } for key_name in storage_client.list_keys(bucket, input_prefix + '/')]
        for current_shuffle in range(num_shuffles):
            # Change values per category of last shuffle
            if current_shuffle == num_shuffles - 1:
                current_values_per_category = last_values_per_category

            radix_sort_futures = fexec.map(radix_sort_by_byte,
                                           current_keys_list,
                                           extra_args={
                                               'values_per_category':
                                               current_values_per_category
                                           },
                                           include_modules=['util'])
            radix_sort_results = fexec.get_result(fs=radix_sort_futures)

            categories_keys_lists = {}
            for res in radix_sort_results:
                intermediate_keys_list = res['keys_list']
                input_category_stack = res['category_stack']
                for key_name in intermediate_keys_list:
                    category_id = int(key_name.rsplit(sep='/', maxsplit=3)[-3])
                    new_category_stack = input_category_stack + [category_id]
                    new_category_stack_str = '/'.join(
                        [str(x) for x in new_category_stack])
                    if new_category_stack_str in categories_keys_lists:
                        categories_keys_lists[new_category_stack_str].append(
                            key_name)
                    else:
                        categories_keys_lists[new_category_stack_str] = [
                            key_name
                        ]

            # Partition category lists
            # Attach prefix metadata so that sorter knows what to name files
            each_category_size = input_size / (
                (256 / current_values_per_category) * (current_shuffle + 1))
            num_partitions_per_category = math.ceil(each_category_size /
                                                    buffer_size_to_categorize)

            current_keys_list = []
            for category_stack_str, cat_keys_list in categories_keys_lists.items(
            ):
                for sub_list in np.array_split(cat_keys_list,
                                               num_partitions_per_category):
                    partition_entry = {
                        'keys_list':
                        sub_list,
                        'prefix':
                        f'{input_prefix}-intermediate{str(current_shuffle + 1)}',
                        'category_stack':
                        [int(x) for x in category_stack_str.split('/')]
                    }
                    current_keys_list.append(partition_entry)

        consider_last_byte_sorted = False
        if last_values_per_category == 1:
            consider_last_byte_sorted = True
        for entry in current_keys_list:
            entry['prefix'] = output_prefix
        sorted_keys_list = sorted(current_keys_list,
                                  key=lambda x: x['category_stack'])
        sort_category_futures = fexec.map(sort_category,
                                          sorted_keys_list,
                                          extra_args={
                                              'consider_last_byte_sorted':
                                              consider_last_byte_sorted
                                          },
                                          include_modules=['util'])
        results = fexec.get_result(fs=sort_category_futures)
        # print(results)

    # Check if size of output matches size of input

    output_info_list = storage_client.list_objects(bucket, output_prefix)
    output_size = sum(info['Size'] for info in output_info_list)
    assert input_size == output_size, f'input size: {input_size}, output_size: {output_size}'

    print('Done!')
Пример #14
0
def delete_temp_data(bucket_name, keynames):
    fexec = FunctionExecutor(runtime_memory=1024)
    print('Deleting temp files...')
    fexec.storage.delete_objects(bucket_name, keynames)
    print('Done!')