Exemplo n.º 1
0
 async def inner():
     async with PgRenderLocker() as locker1:
         async with PgRenderLocker() as locker2:
             async with locker1.render_lock(1) as lock1:
                 with self.assertRaises(WorkflowAlreadyLocked):
                     async with locker2.render_lock(1) as lock2:
                         await lock2.stall_others()
                 await lock1.stall_others()
Exemplo n.º 2
0
 async def inner():
     async with PgRenderLocker() as locker1:
         async with PgRenderLocker() as locker2:
             async with locker1.render_lock(1) as lock1:
                 # do not raise WorkflowAlreadyLocked here: it's a
                 # different workflow
                 async with locker2.render_lock(2) as lock2:
                     await lock2.stall_others()
                 await lock1.stall_others()
Exemplo n.º 3
0
        async def inner():
            async with PgRenderLocker() as locker1:
                async with PgRenderLocker() as locker2:
                    async with locker1.render_lock(1) as lock1:
                        await lock1.stall_others()

                    # do not raise WorkflowAlreadyLocked here
                    async with locker2.render_lock(1) as lock1:
                        await lock1.stall_others()
Exemplo n.º 4
0
 async def inner():
     async with PgRenderLocker() as locker1:
         async with PgRenderLocker() as locker2:
             async with locker1.render_lock(1) as lock1:
                 # "break" locker2: make it raise an exception
                 with self.assertRaises(WorkflowAlreadyLocked):
                     async with locker2.render_lock(1) as lock2:
                         await lock2.stall_others()
                 await lock1.stall_others()
             # now locker2 should be reset to its original state --
             # meaning it can acquire a lock just fine
             async with locker2.render_lock(1) as lock2:
                 await lock2.stall_others()
Exemplo n.º 5
0
 async def inner():
     async with PgRenderLocker() as locker:
         done, _ = await asyncio.wait(
             {use_lock(locker, i) for i in range(5)}
         )
         for task in done:
             task.result()  # throw error, if any
Exemplo n.º 6
0
async def queue_fetches(pg_render_locker: PgRenderLocker):
    """Queue all pending fetches in RabbitMQ.

    We'll set is_busy=True as we queue them, so we don't send double-fetches.
    """
    pending_ids = await load_pending_steps()

    for workflow_id, step_id in pending_ids:
        # Don't schedule a fetch if we're currently rendering.
        #
        # This still lets us schedule a fetch if a render is _queued_, so it
        # doesn't solve any races. But it should lower the number of fetches of
        # resource-intensive workflows.
        #
        # Using pg_render_locker means we can only queue a fetch _between_
        # renders. The fetch/render queues may be non-empty (we aren't
        # checking); but we're giving the renderers a chance to tackle some
        # backlog.
        try:
            async with pg_render_locker.render_lock(workflow_id) as lock:
                # At this moment, the workflow isn't rendering. Let's pass
                # through and queue the fetch.
                await lock.stall_others()  # required by the PgRenderLocker API

            logger.info("Queue fetch of step(%d, %d)", workflow_id, step_id)
            await set_step_busy(step_id)
            await rabbitmq.send_update_to_workflow_clients(
                workflow_id,
                clientside.Update(steps={step_id: clientside.StepUpdate(is_busy=True)}),
            )
            await rabbitmq.queue_fetch(workflow_id, step_id)
        except WorkflowAlreadyLocked:
            # Don't queue a fetch. We'll revisit this Step next time we
            # query for pending fetches.
            pass
Exemplo n.º 7
0
async def main():
    """Queue fetches for users' "automatic updates".

    Run this forever, as a singleton daemon.
    """
    from .autoupdate import queue_fetches  # AFTER django.setup()
    from cjwstate import rabbitmq
    from cjwstate.rabbitmq.connection import open_global_connection

    async with PgRenderLocker() as pg_render_locker, open_global_connection(
    ) as rabbitmq_connection:
        await rabbitmq_connection.exchange_declare(rabbitmq.GroupsExchange)
        await rabbitmq_connection.queue_declare(rabbitmq.Fetch, durable=True)

        while not rabbitmq_connection.closed.done():
            t1 = time.time()

            await benchmark(logger, queue_fetches(pg_render_locker),
                            "queue_fetches()")

            # Try to fetch at the beginning of each interval. Canonical example
            # is FetchInterval=60: queue all our fetches as soon as the minute
            # hand of the clock moves.

            next_t = (math.floor(t1 / FetchInterval) + 1) * FetchInterval
            delay = max(0, next_t - time.time())
            # Sleep ... or die, if RabbitMQ dies.
            await asyncio.wait({rabbitmq_connection.closed},
                               timeout=delay)  # raise

        await rabbitmq_connection.closed  # raise on failure
        # Now, raise on _success_! We should never get here
        raise RuntimeError(
            "RabbitMQ closed successfully. That's strange because cron never closes it."
        )
Exemplo n.º 8
0
async def render_workflow_and_maybe_requeue(
    pg_render_locker: PgRenderLocker,
    workflow_id: int,
    delta_id: int,
) -> None:
    """
    Acquire an advisory lock and render, or re-queue task if the lock is held.

    If a render is requested on a Workflow that's already being rendered,
    there's no point in wasting CPU cycles starting from scratch. Wait for the
    first render to exit (which will happen at the next stale database-write).
    It should then re-schedule a render.
    """
    # Query for workflow before locking. We don't need a lock for this, and no
    # lock means we can dismiss spurious renders sooner, so they don't fill the
    # render queue.
    try:
        workflow = await _lookup_workflow(workflow_id)
    except Workflow.DoesNotExist:
        logger.info("Skipping render of deleted Workflow %d", workflow_id)
        return

    try:
        async with pg_render_locker.render_lock(workflow_id) as lock:
            # any error leads to undefined behavior
            result = await render_workflow_once(workflow, delta_id)

            # requeue if needed
            await lock.stall_others()
            if result == RenderResult.MUST_REQUEUE:
                want_requeue = True
            elif result == RenderResult.MUST_NOT_REQUEUE:
                want_requeue = False
            else:
                try:
                    workflow = await _lookup_workflow(workflow_id)
                    if workflow.last_delta_id != delta_id:
                        logger.info(
                            "Requeueing render(workflow=%d, delta=%d)",
                            workflow_id,
                            workflow.last_delta_id,
                        )
                        want_requeue = True
                    else:
                        want_requeue = False
                except Workflow.DoesNotExist:
                    logger.info("Skipping requeue of deleted Workflow %d", workflow_id)
                    want_requeue = False
            if want_requeue:
                await rabbitmq.queue_render(workflow_id, workflow.last_delta_id)
                # This is why we used `lock.stall_others()`: after requeue,
                # another renderer may try to lock this workflow and we want
                # that lock to _succeed_ -- not raise WorkflowAlreadyLocked.
            # Only ack() _after_ requeue. That preserves our invariant: if we
            # schedule a render, there is always an un-acked render for that
            # workflow queued in RabbitMQ until the workflow is up-to-date. (At
            # this exact moment, there are briefly two un-acked renders.)
    except WorkflowAlreadyLocked:
        logger.info("Workflow %d is being rendered elsewhere; ignoring", workflow_id)
Exemplo n.º 9
0
        async def inner():
            async with PgRenderLocker() as locker:
                async with locker.render_lock(1) as lock1:
                    async with locker.render_lock(2) as lock2:
                        await lock2.stall_others()

                    async with locker.render_lock(2) as lock2:
                        await lock2.stall_others()
                    await lock1.stall_others()
Exemplo n.º 10
0
 async def inner():
     async with PgRenderLocker() as locker1:
         async with PgRenderLocker() as locker2:
             last_line = 'the initial value'
             async with locker1.render_lock(1) as lock1:
                 await lock1.stall_others()
                 async def stalling_op():
                     nonlocal last_line
                     async with locker2.render_lock(1) as lock2:
                         last_line = 'entered stalling_op'
                         await lock2.stall_others()
                     last_line = 'exited stalling_op'
                 task = asyncio.create_task(stalling_op())
                 await asyncio.sleep(0)
                 # Even though we started stalling_op(), it will stall
                 # rather than acquire a lock.
                 self.assertEqual(last_line, 'the initial value')
             await task
             self.assertEqual(last_line, 'exited stalling_op')
Exemplo n.º 11
0
async def main_loop():
    """
    Run fetchers and renderers, forever.
    """
    async with PgRenderLocker() as pg_render_locker:

        @rabbitmq.manual_acking_callback
        async def render_callback(message, ack):
            return await handle_render(message, ack, pg_render_locker)

        connection = rabbitmq.get_connection()
        connection.declare_queue_consume(rabbitmq.Render, render_callback)
        # Run forever
        await connection._closed_event.wait()
Exemplo n.º 12
0
async def queue_fetches_forever():
    async with PgRenderLocker() as pg_render_locker:
        while True:
            t1 = time.time()

            await benchmark(logger, queue_fetches(pg_render_locker),
                            'queue_fetches()')

            # Try to fetch at the beginning of each interval. Canonical example
            # is FetchInterval=60: queue all our fetches as soon as the minute
            # hand of the clock moves.

            next_t = (math.floor(t1 / FetchInterval) + 1) * FetchInterval
            delay = max(0, next_t - time.time())
            await asyncio.sleep(delay)
Exemplo n.º 13
0
async def queue_fetches(pg_render_locker: PgRenderLocker):
    """
    Queue all pending fetches in RabbitMQ.

    We'll set is_busy=True as we queue them, so we don't send double-fetches.
    """
    wf_modules = await load_pending_wf_modules()

    for workflow_id, wf_module in wf_modules:
        # Don't schedule a fetch if we're currently rendering.
        #
        # This still lets us schedule a fetch if a render is _queued_, so it
        # doesn't solve any races. But it should lower the number of fetches of
        # resource-intensive workflows.
        #
        # Using pg_render_locker means we can only queue a fetch _between_
        # renders. The fetch/render queues may be non-empty (we aren't
        # checking); but we're giving the renderers a chance to tackle some
        # backlog.
        try:
            async with pg_render_locker.render_lock(workflow_id) as lock:
                # At this moment, the workflow isn't rendering. Let's pass
                # through and queue the fetch.
                await lock.stall_others()  # required by the PgRenderLocker API

            logger.info("Queue fetch of wf_module(%d, %d)", workflow_id,
                        wf_module.id)
            await set_wf_module_busy(wf_module)
            await websockets.ws_client_send_delta_async(
                workflow_id,
                {
                    "updateWfModules": {
                        str(wf_module.id): {
                            "is_busy": True,
                            "fetch_error": ""
                        }
                    }
                },
            )
            await rabbitmq.queue_fetch(wf_module)
        except WorkflowAlreadyLocked:
            # Don't queue a fetch. We'll revisit this WfModule next time we
            # query for pending fetches.
            pass
Exemplo n.º 14
0
async def main():
    """Run fetchers and renderers, forever."""
    # import AFTER django.setup()
    import cjwstate.modules
    from cjworkbench.pg_render_locker import PgRenderLocker
    from cjwstate import rabbitmq
    from cjwstate.rabbitmq.connection import open_global_connection
    from .render import handle_render

    cjwstate.modules.init_module_system()

    async with PgRenderLocker() as pg_render_locker, open_global_connection() as rabbitmq_connection:
        await rabbitmq_connection.queue_declare(rabbitmq.Render, durable=True)
        await rabbitmq_connection.exchange_declare(rabbitmq.GroupsExchange)
        # Render; ack; render; ack ... forever.
        async with rabbitmq_connection.acking_consumer(rabbitmq.Render) as consumer:
            async for message_bytes in consumer:
                message = msgpack.unpackb(message_bytes)
                # Crash on error, and don't ack.
                await handle_render(message, pg_render_locker)
Exemplo n.º 15
0
async def render_workflow_and_maybe_requeue(
    pg_render_locker: PgRenderLocker,
    workflow_id: int,
    delta_id: int,
    ack: Callable[[], Awaitable[None]],
    requeue: Callable[[int, int], Awaitable[None]],
) -> None:
    """
    Acquire an advisory lock and render, or re-queue task if the lock is held.

    If a render is requested on a Workflow that's already being rendered,
    there's no point in wasting CPU cycles starting from scratch. Wait for the
    first render to exit (which will happen at the next stale database-write).
    It should then re-schedule a render.
    """
    # Query for workflow before locking. We don't need a lock for this, and no
    # lock means we can dismiss spurious renders sooner, so they don't fill the
    # render queue.
    try:
        workflow = await _lookup_workflow(workflow_id)
    except Workflow.DoesNotExist:
        logger.info("Skipping render of deleted Workflow %d", workflow_id)
        await ack()
        return

    try:
        async with pg_render_locker.render_lock(workflow_id) as lock:
            try:
                result = await render_workflow_once(workflow, delta_id)
            except (asyncio.CancelledError, DatabaseError, InterfaceError):
                raise  # all undefined behavior

            # requeue if needed
            await lock.stall_others()
            if result == RenderResult.MUST_REQUEUE:
                want_requeue = True
            elif result == RenderResult.MUST_NOT_REQUEUE:
                want_requeue = False
            else:
                try:
                    workflow = await _lookup_workflow(workflow_id)
                    if workflow.last_delta_id != delta_id:
                        logger.info(
                            "Requeueing render(workflow=%d, delta=%d)",
                            workflow_id,
                            workflow.last_delta_id,
                        )
                        want_requeue = True
                    else:
                        want_requeue = False
                except Workflow.DoesNotExist:
                    logger.info("Skipping requeue of deleted Workflow %d",
                                workflow_id)
                    want_requeue = False
            if want_requeue:
                await requeue(workflow_id, workflow.last_delta_id)
                # This is why we used `lock.stall_others()`: after requeue,
                # another renderer may try to lock this workflow and we want
                # that lock to _succeed_ -- not raise WorkflowAlreadyLocked.
            # Only ack() _after_ requeue. That preserves our invariant: if we
            # schedule a render, there is always an un-acked render for that
            # workflow queued in RabbitMQ until the workflow is up-to-date. (At
            # this exact moment, there are briefly two un-acked renders.)
            await ack()
    except WorkflowAlreadyLocked:
        logger.info("Workflow %d is being rendered elsewhere; ignoring",
                    workflow_id)
        await ack()
    except (DatabaseError, InterfaceError):
        # Possibilities:
        #
        # 1. There's a bug in renderer.execute. This may leave the event
        # loop's executor thread's database connection in an inconsistent
        # state. [2018-11-06 saw this on production.] The best way to clear
        # up the leaked, broken connection is to die. (Our parent process
        # should restart us, and RabbitMQ will give the job to someone
        # else.)
        #
        # 2. The database connection died (e.g., Postgres went away). The
        # best way to clear up the leaked, broken connection is to die.
        # (Our parent process should restart us, and RabbitMQ will give the
        # job to someone else.)
        #
        # 3. PgRenderLocker's database connection died (e.g., Postgres went
        # away). We haven't seen this much in practice; so let's die and let
        # the parent process restart us.
        #
        # 4. There's some design flaw we haven't thought of, and we
        # shouldn't ever render this workflow. If this is the case, we're
        # doomed.
        #
        # If you're seeing this error that means there's a bug somewhere
        # _else_. If you're staring at a case-3 situation, please remember
        # that cases 1 and 2 are important, too.
        logger.exception("Fatal database error; exiting")
        os._exit(1)