Пример #1
0
def test_primed():

    global is_primed
    is_primed = False

    def unprimed_iter():
        global is_primed
        is_primed = True
        for i in range(10):
            yield i

    iterator = unprimed_iter()

    # iterator is still unprimed
    assert is_primed is False

    iterator = primed(iterator)
    assert is_primed is True
    assert list(iterator) == list(range(10))

    # test stop itteration
    with pytest.raises(StopIteration):
        next(primed(iterator))
Пример #2
0
    def setup_pbars(self, cursor):
        """
        Sets up progress bars
        """
        total = None
        if isinstance(cursor, types.GeneratorType):
            cursor = primed(cursor)
            if hasattr(self.builder, "total"):
                total = self.builder.total
        elif hasattr(cursor, "__len__"):
            total = len(cursor)
        elif hasattr(cursor, "count"):
            total = cursor.count()

        self.get_pbar = tqdm(cursor, desc="Get Items", total=total)
        self.process_pbar = tqdm(desc="Processing Item", total=total)
        self.update_pbar = tqdm(desc="Updating Targets", total=total)
Пример #3
0
async def multi(builder, num_workers):

    builder.connect()
    cursor = builder.get_items()
    executor = ProcessPoolExecutor(num_workers)

    # Gets the total number of items to process by priming
    # the cursor
    total = None

    if isinstance(cursor, GeneratorType):
        try:
            cursor = primed(cursor)
            if hasattr(builder, "total"):
                total = builder.total
        except StopIteration:
            pass

    elif hasattr(cursor, "__len__"):
        total = len(cursor)
    elif hasattr(cursor, "count"):
        total = cursor.count()

    logger.info(
        f"Starting multiprocessing: {builder.__class__.__name__}",
        extra={
            "maggma": {
                "event": "BUILD_STARTED",
                "total": total,
                "builder": builder.__class__.__name__,
                "sources": [source.name for source in builder.sources],
                "targets": [target.name for target in builder.targets],
            }
        },
    )

    back_pressured_get = BackPressure(iterator=tqdm(cursor,
                                                    desc="Get",
                                                    total=total),
                                      n=builder.chunk_size)

    processed_items = atqdm(
        async_iterator=AsyncUnorderedMap(
            func=builder.process_item,
            async_iterator=back_pressured_get,
            executor=executor,
        ),
        total=total,
        desc="Process Items",
    )

    back_pressure_relief = back_pressured_get.release(processed_items)

    update_items = tqdm(total=total, desc="Update Targets")

    async for chunk in grouper(back_pressure_relief, n=builder.chunk_size):

        logger.info(
            "Processed batch of {} items".format(builder.chunk_size),
            extra={
                "maggma": {
                    "event": "UPDATE",
                    "items": len(chunk),
                    "builder": builder.__class__.__name__,
                    "sources": [source.name for source in builder.sources],
                    "targets": [target.name for target in builder.targets],
                }
            },
        )
        processed_items = [item for item in chunk if item is not None]
        builder.update_targets(processed_items)
        update_items.update(len(processed_items))

    logger.info(
        f"Ended multiprocessing: {builder.__class__.__name__}",
        extra={
            "maggma": {
                "event": "BUILD_ENDED",
                "builder": builder.__class__.__name__,
                "sources": [source.name for source in builder.sources],
                "targets": [target.name for target in builder.targets],
            }
        },
    )

    update_items.close()
    builder.finalize()
Пример #4
0
def serial(builder: Builder):
    """
    Runs the builders using a single process
    """

    logger = logging.getLogger("SerialProcessor")

    builder.connect()

    cursor = builder.get_items()

    total = None
    if isinstance(cursor, GeneratorType):
        try:
            cursor = primed(cursor)
            if hasattr(builder, "total"):
                total = builder.total
        except StopIteration:
            pass

    elif hasattr(cursor, "__len__"):
        total = len(cursor)  # type: ignore
    elif hasattr(cursor, "count"):
        total = cursor.count()  # type: ignore

    logger.info(
        f"Starting serial processing: {builder.__class__.__name__}",
        extra={
            "maggma": {
                "event": "BUILD_STARTED",
                "total": total,
                "builder": builder.__class__.__name__,
                "sources": [source.name for source in builder.sources],
                "targets": [target.name for target in builder.targets],
            }
        },
    )
    for chunk in grouper(tqdm(cursor, total=total), builder.chunk_size):
        logger.info(
            "Processing batch of {} items".format(builder.chunk_size),
            extra={
                "maggma": {
                    "event": "UPDATE",
                    "items": len(chunk),
                    "builder": builder.__class__.__name__,
                }
            },
        )
        processed_chunk = [builder.process_item(item) for item in chunk]
        processed_items = [
            item for item in processed_chunk if item is not None
        ]
        builder.update_targets(processed_items)

    logger.info(
        f"Ended serial processing: {builder.__class__.__name__}",
        extra={
            "maggma": {
                "event": "BUILD_ENDED",
                "builder": builder.__class__.__name__
            }
        },
    )
    builder.finalize()