Exemplo n.º 1
0
def work(data, info):
    celery_chunk_size = info.get('celery_chunk_size', 80)
    celery_max_workers = info.get('celery_max_workers', 4)
    celery_sleep = info.get('celery_sleep')
    queue = info.get('queue', 'celery')
    sync_callback = info.get('sync_callback')
    final_callback = info.get('final_callback')
    dummy = info.get('dummy')
    dummy = load(dummy)
    splitted_data = []

    for index, data_chunked in enumerate(grouper_it(data, celery_chunk_size)):
        splitted_data.append(data_chunked)

    results_list = []
    if sync_callback:
        for index, splitted_chunked in enumerate(
                grouper_it(splitted_data, celery_max_workers)):
            tasks = parallel_chunked(splitted_chunked, info)
            results = tasks.apply_async()
            wait_for_group(results, celery_sleep, sync_callback)
            results_list.append(results)

        results = final_results(results_list)
        sync_callback = load(sync_callback)
        return sync_callback(results)
    else:
        tasks_list = []
        for index, splitted_chunked in enumerate(
                grouper_it(splitted_data, celery_max_workers)):
            tasks = parallel_chunked(splitted_chunked, info)
            if len(tasks) == 1:
                # chord([A], B) can be optimized as A | B
                # - Issue #3323
                tasks_list.append(tasks | dummy.si().set(queue=queue))
            else:
                tasks_list.append(chord(tasks, dummy.si().set(queue=queue)))

        if final_callback:
            final_callback = load(final_callback)
            task_to_run = chain(tasks_list) | final_callback.si(
                data, info).set(queue=queue)
        else:
            task_to_run = chain(tasks_list)

        results = task_to_run.apply_async()

        results_list.append(results)
        return results_list
Exemplo n.º 2
0
def work(data, info):
    log = logging.getLogger(info.get('log', 'worker'))
    results = []
    chunk_size = info.get('chunk_size', 20)
    max_workers = info.get('max_workers', 4)
    try:
        func_str = info.get('worker')
        func = load(func_str)
    except Exception as exc:
        log.error('dynamic worker func invalid! %s' % exc)
        return results

    Executor = futures.ProcessPoolExecutor
    backup_info = deepcopy(info)
    with Executor(max_workers=max_workers) as executor:
        future_to_data = {}

        for index, data_chunked in enumerate(grouper_it(data, chunk_size)):
            log.debug('process worker chunk %d processing.' % (index))
            info = deepcopy(backup_info)
            info['index'] = index
            future_to_data[executor.submit(func, data_chunked,
                                           info)] = data_chunked

        for future in futures.as_completed(future_to_data):
            data = future_to_data[future]
            try:
                result = future.result()
            except Exception as exc:
                tt(exc)
                log.critical('exception catched! %s %s' % (exc, type(exc)))
                result = WorkerException('%s -- %r' % (type(exc), exc))
            results.append(result)
    return results
Exemplo n.º 3
0
def parallel_chunked(data, info):
    func_str = info.get('celery_worker')
    queue = info.get('queue', 'celery')
    func = load(func_str)
    tasks = []

    callback = info.get('each_callback')
    if callback:
        callback = load(callback)
    for i, d in enumerate(data):
        for index, chunked_data in enumerate(
                grouper_it(d, info.get('chunk_size', 20))):
            if callback:
                # the result to callback will be returned as [result]
                sig = func.si(chunked_data, info).set(
                    queue=queue) | callback.s().set(queue=queue)
            else:
                sig = func.si(chunked_data, info).set(queue=queue)
            tasks.append(sig)
    # removed for return group results
    # callback = info.get('group_callback')
    # if callback:
    #     callback = load(callback)
    #     return group(tasks) | callback.s()
    g = group(tasks)
    return g
Exemplo n.º 4
0
async def _work(data, info):
    log = logging.getLogger('worker')
    chunk_size = info.get('chunk_size', 20)
    # max_workers = info.get('max_workers', 4)
    max_sem = info.get('max_semaphore', len(data))
    try:
        func_str = info.get('worker')
        func = load(func_str)

    except Exception as exc:
        log.error('dynamic worker func invalid! %s' % exc)
        return None
    tasks = []
    semaphore = asyncio.Semaphore(max_sem)

    async with ClientSession() as session:
        backup_info = deepcopy(info)
        response = None
        for index, data_chunked in enumerate(grouper_it(data, chunk_size)):
            info = deepcopy(backup_info)
            info['index'] = index
            info['session'] = session
            info['semaphore'] = semaphore
            log.debug('coroutine worker chunk %d processing.' % (index))
            task = asyncio.ensure_future(
                bound_process(func, data_chunked, info))
            tasks.append(task)
        try:
            gathers = asyncio.gather(*tasks, return_exceptions=False)
        except Exception as exc:
            tt(exc)
            log.critical('exception catched! %s -- %r' % (exc, data))
            # response = WorkerException('%s -- %r' % (type(exc), exc))
        else:
            response = await gathers
        finally:
            return response
Exemplo n.º 5
0
Arquivo: loop.py Projeto: pingf/worker
def work(data, info):
    log = logging.getLogger(info.get('log', 'worker'))
    results = []
    chunk_size = info.get('chunk_size', 20)
    # max_workers = info.get('max_workers', 4)
    try:
        func_str = info.get('worker')
        func = load(func_str)
    except Exception as exc:
        log.error('dynamic worker func invalid! %s' % exc)
        return results
    backup_info = deepcopy(info)
    for index, data_chunked in enumerate(grouper_it(data, chunk_size)):
        log.debug('simple worker chunk %d processing.' % (index))
        info = deepcopy(backup_info)
        info['index'] = index
        try:
            result = func(data_chunked, info)
        except Exception as exc:
            tt(exc)
            log.critical('exception catched! %s %r' % (type(exc), exc))
            result = WorkerException('%s -- %r' % (type(exc), exc))
        results.append(result)
    return results