Python batch示例，mcm.utils.batch Python示例

示例#1

0

显示文件

文件： tasks.py 项目： gunduru/seed

def _save_raw_data(file_pk, *args, **kwargs):
    """Chunk up the CSV and save data into the DB raw."""
    import_file = ImportFile.objects.get(pk=file_pk)

    if import_file.raw_save_done:
        return {'status': 'warning', 'message': 'raw data already saved'}

    if import_file.source_type == "Green Button Raw":
        return _save_raw_green_button_data(file_pk, *args, **kwargs)

    parser = reader.MCMParser(import_file.local_file)
    cache_first_rows(import_file, parser)
    rows = parser.next()
    import_file.num_rows = 0

    prog_key = get_prog_key('save_raw_data', file_pk)

    tasks = []
    for chunk in batch(rows, 100):
        import_file.num_rows += len(chunk)
        tasks.append(_save_raw_data_chunk.subtask((chunk, file_pk, prog_key)))

    tasks = add_cache_increment_parameter(tasks)
    import_file.num_columns = parser.num_columns()
    import_file.save()

    if tasks:
        chord(tasks, interval=15)(finish_raw_save.subtask([file_pk]))
    else:
        finish_raw_save.task(file_pk)

    return {'status': 'success'}

示例#2

0

显示文件

文件： tasks.py 项目： gunduru/seed

def _delete_organization_buildings(org_pk, chunk_size=100, *args, **kwargs):
    """Deletes all BuildingSnapshot instances within an organization

    :param org_pk: int, str, the organization pk
    """
    qs = BuildingSnapshot.objects.filter(super_organization=org_pk)
    ids = qs.values_list('id', flat=True)
    deleting_cache_key = get_prog_key(
        'delete_organization_buildings',
        org_pk
    )
    if not ids:
        cache.set(deleting_cache_key, 100)
        return

    # delete the canonical buildings
    can_ids = CanonicalBuilding.objects.filter(
        canonical_snapshot__super_organization=org_pk
    ).values_list('id', flat=True)
    _delete_canonical_buildings.delay(can_ids)

    step = float(chunk_size) / len(ids)
    cache.set(deleting_cache_key, 0)
    tasks = []
    for del_ids in batch(ids, chunk_size):
        # we could also use .s instead of .subtask and not wrap the *args
        tasks.append(
            _delete_organization_buildings_chunk.subtask(
                (del_ids, deleting_cache_key, step, org_pk)
            )
        )
    chord(tasks, interval=15)(finish_delete.subtask([org_pk]))

示例#3

0

显示文件

def _delete_organization_buildings(org_pk, chunk_size=100, *args, **kwargs):
    """Deletes all BuildingSnapshot instances within an organization

    :param org_pk: int, str, the organization pk
    """
    qs = BuildingSnapshot.objects.filter(super_organization=org_pk)
    ids = qs.values_list('id', flat=True)
    deleting_cache_key = get_prog_key(
        'delete_organization_buildings',
        org_pk
    )
    if not ids:
        cache.set(deleting_cache_key, 100)
        return

    # delete the canonical buildings
    can_ids = CanonicalBuilding.objects.filter(
        canonical_snapshot__super_organization=org_pk
    ).values_list('id', flat=True)
    _delete_canonical_buildings.delay(can_ids)

    step = float(chunk_size) / len(ids)
    cache.set(deleting_cache_key, 0)
    tasks = []
    for del_ids in batch(ids, chunk_size):
        # we could also use .s instead of .subtask and not wrap the *args
        tasks.append(
            _delete_organization_buildings_chunk.subtask(
                (del_ids, deleting_cache_key, step, org_pk)
            )
        )
    chord(tasks, interval=15)(finish_delete.subtask([org_pk]))

示例#4

0

显示文件

def _save_raw_data(file_pk, *args, **kwargs):
    """Chunk up the CSV and save data into the DB raw."""
    import_file = ImportFile.objects.get(pk=file_pk)

    if import_file.raw_save_done:
        return {'status': 'warning', 'message': 'raw data already saved'}

    if import_file.source_type == "Green Button Raw":
        return _save_raw_green_button_data(file_pk, *args, **kwargs)

    parser = reader.MCMParser(import_file.local_file)
    cache_first_rows(import_file, parser)
    rows = parser.next()
    import_file.num_rows = 0

    prog_key = get_prog_key('save_raw_data', file_pk)

    tasks = []
    for chunk in batch(rows, 100):
        import_file.num_rows += len(chunk)
        tasks.append(_save_raw_data_chunk.subtask((chunk, file_pk, prog_key)))

    tasks = add_cache_increment_parameter(tasks)
    import_file.num_columns = parser.num_columns()
    import_file.save()

    if tasks:
        chord(tasks, interval=15)(finish_raw_save.subtask([file_pk]))
    else:
        finish_raw_save.task(file_pk)

    return {'status': 'success'}

示例#5

0

显示文件

文件： reader.py 项目： SEED-platform/seed-mcm

    def split_rows(self, chunk_size, callback, *args, **kwargs):
        """Break up the CSV into smaller pieces for parallel processing."""
        row_num = 0
        for batch in utils.batch(self.next(), chunk_size):
            row_num += len(batch)
            callback(batch, *args, **kwargs)

        return row_num

示例#6

0

显示文件

文件： reader.py 项目： buildingenergy/mcm-core

    def split_rows(self, chunk_size, callback, *args, **kwargs):
        """Break up the CSV into smaller pieces for parallel processing."""
        row_num = 0
        for batch in utils.batch(self.next(), chunk_size):
            row_num += len(batch)
            callback(batch, *args, **kwargs)

        return row_num

示例#7

0

显示文件

文件： tasks.py 项目： gunduru/seed

def _delete_canonical_buildings(ids, chunk_size=300):
    """deletes CanonicalBuildings

    :param ids: list of ids to delete from CanonicalBuilding
    :param chunk_size: number of CanonicalBuilding instances to delete per
    iteration
    """
    for del_ids in batch(ids, chunk_size):
        CanonicalBuilding.objects.filter(pk__in=del_ids).delete()

示例#8

0

显示文件

def _delete_canonical_buildings(ids, chunk_size=300):
    """deletes CanonicalBuildings

    :param ids: list of ids to delete from CanonicalBuilding
    :param chunk_size: number of CanonicalBuilding instances to delete per
    iteration
    """
    for del_ids in batch(ids, chunk_size):
        CanonicalBuilding.objects.filter(pk__in=del_ids).delete()

示例#9

0

显示文件

文件： tasks.py 项目： christopherjball/seed

def _save_raw_data(file_pk, *args, **kwargs):
    """Chunk up the CSV and save data into the DB raw."""

    result = {'status': 'success', 'progress': 100}
    prog_key = get_prog_key('save_raw_data', file_pk)
    try:
        import_file = ImportFile.objects.get(pk=file_pk)

        if import_file.raw_save_done:
            result['status'] = 'warning'
            result['message'] = 'Raw data already saved'
            cache.set(prog_key, result)
            return result

        if import_file.source_type == "Green Button Raw":
            return _save_raw_green_button_data(file_pk, *args, **kwargs)

        parser = reader.MCMParser(import_file.local_file)
        cache_first_rows(import_file, parser)
        rows = parser.next()
        import_file.num_rows = 0

        tasks = []
        for chunk in batch(rows, 100):
            import_file.num_rows += len(chunk)
            tasks.append(_save_raw_data_chunk.subtask((chunk, file_pk, prog_key)))

        tasks = add_cache_increment_parameter(tasks)
        import_file.num_columns = parser.num_columns()
        import_file.save()

        if tasks:
            chord(tasks, interval=15)(finish_raw_save.subtask([file_pk]))
        else:
            finish_raw_save.task(file_pk)

    except StopIteration:
        result['status'] = 'error'
        result['message'] = 'StopIteration Exception'
        result['stacktrace'] = traceback.format_exc()
    except Error as e:
        result['status'] = 'error'
        result['message'] = 'File Content Error: ' + e.message
        result['stacktrace'] = traceback.format_exc()
    except KeyError as e:
        result['status'] = 'error'
        result['message'] = 'Invalid Column Name: "' + e.message + '"'
        result['stacktrace'] = traceback.format_exc()
    except Exception as e:
        result['status'] = 'error'
        result['message'] = 'Unhandled Error: ' + e.message
        result['stacktrace'] = traceback.format_exc()

    cache.set(prog_key, result)
    return result

示例#10

0

显示文件

def log_deleted_buildings(ids, user_pk, chunk_size=300):
    """
    AudigLog logs a delete entry for the canonical building or each
    BuildingSnapshot in ``ids``
    """
    for del_ids in batch(ids, chunk_size):
        for b in BuildingSnapshot.objects.filter(pk__in=del_ids):
            AuditLog.objects.create(user_id=user_pk,
                                    content_object=b.canonical_building,
                                    organization=b.super_organization,
                                    action='delete_building',
                                    action_note='Deleted building.')

示例#11

0

显示文件

文件： tasks.py 项目： gunduru/seed

def log_deleted_buildings(ids, user_pk, chunk_size=300):
    """
    AuditLog logs a delete entry for the canonical building or each
    BuildingSnapshot in ``ids``
    """
    for del_ids in batch(ids, chunk_size):
        for b in BuildingSnapshot.objects.filter(pk__in=del_ids):
            AuditLog.objects.create(
                user_id=user_pk,
                content_object=b.canonical_building,
                organization=b.super_organization,
                action='delete_building',
                action_note='Deleted building.'
            )

示例#12

0

显示文件

文件： tasks.py 项目： gunduru/seed

def _map_data(file_pk, *args, **kwargs):
    """Get all of the raw data and process it using appropriate mapping.
    @lock_and_track returns a progress_key

    :param file_pk: int, the id of the import_file we're working with.

    """
    import_file = ImportFile.objects.get(pk=file_pk)
    # Don't perform this task if it's already been completed.
    if import_file.mapping_done:
        prog_key = get_prog_key('map_data', file_pk)
        cache.set(prog_key, 100)
        return {'status': 'warning', 'message': 'mapping already complete'}

    # If we haven't finished saving, we shouldn't proceed with mapping
    # Re-queue this task.
    if not import_file.raw_save_done:
        map_data.apply_async(args=[file_pk], countdown=60, expires=120)
        return {'status': 'error', 'message': 'waiting for raw data save.'}

    source_type_dict = {
        'Portfolio Raw': PORTFOLIO_RAW,
        'Assessed Raw': ASSESSED_RAW,
        'Green Button Raw': GREEN_BUTTON_RAW,
    }
    source_type = source_type_dict.get(import_file.source_type, ASSESSED_RAW)

    qs = BuildingSnapshot.objects.filter(
        import_file=import_file,
        source_type=source_type,
    ).iterator()

    prog_key = get_prog_key('map_data', file_pk)
    tasks = []
    for chunk in batch(qs, 100):
        serialized_data = [obj.extra_data for obj in chunk]
        tasks.append(map_row_chunk.subtask(
            (serialized_data, file_pk, source_type, prog_key)
        ))

    tasks = add_cache_increment_parameter(tasks)
    if tasks:
        chord(tasks, interval=15)(finish_mapping.subtask([file_pk]))
    else:
        finish_mapping.task(file_pk)

    return {'status': 'success'}

示例#13

0

显示文件

def _map_data(file_pk, *args, **kwargs):
    """Get all of the raw data and process it using appropriate mapping.
    @lock_and_track returns a progress_key

    :param file_pk: int, the id of the import_file we're working with.

    """
    import_file = ImportFile.objects.get(pk=file_pk)
    # Don't perform this task if it's already been completed.
    if import_file.mapping_done:
        prog_key = get_prog_key('map_data', file_pk)
        cache.set(prog_key, 100)
        return {'status': 'warning', 'message': 'mapping already complete'}

    # If we haven't finished saving, we shouldn't proceed with mapping
    # Re-queue this task.
    if not import_file.raw_save_done:
        map_data.apply_async(args=[file_pk], countdown=60, expires=120)
        return {'status': 'error', 'message': 'waiting for raw data save.'}

    source_type_dict = {
        'Portfolio Raw': PORTFOLIO_RAW,
        'Assessed Raw': ASSESSED_RAW,
        'Green Button Raw': GREEN_BUTTON_RAW,
    }
    source_type = source_type_dict.get(import_file.source_type, ASSESSED_RAW)

    qs = BuildingSnapshot.objects.filter(
        import_file=import_file,
        source_type=source_type,
    ).iterator()

    prog_key = get_prog_key('map_data', file_pk)
    tasks = []
    for chunk in batch(qs, 100):
        serialized_data = [obj.extra_data for obj in chunk]
        tasks.append(map_row_chunk.subtask(
            (serialized_data, file_pk, source_type, prog_key)
        ))

    tasks = add_cache_increment_parameter(tasks)
    if tasks:
        chord(tasks, interval=15)(finish_mapping.subtask([file_pk]))
    else:
        finish_mapping.task(file_pk)

    return {'status': 'success'}