def test_dead_e2e(): app = App( name="testapp", retry_backoff=lambda retries: 0.01, schedule_interval=0.1, heartbeat_interval=0.1, maintenance_interval=0.1, processes=1, concurrency=4, prefetch_count=1, ) @app.task(retries=0) def example(): raise Chaos(f"Task failure") x = example.delay() # Process the queue, move the failure to the DLQ. with worker(app): state = wait_for_results(app, length=1, sleep=0.02, maxwait=1) assert len(state.dead.messages) == 1 assert len(state.queue.messages) == 0 assert get_job(app, x.job.uuid).max_retries == 0 assert count_results(app) == 1 # Process the DLQ, move the tasks back to the main queue. replay_dead(app) state = get_state(app) assert len(state.dead.messages) == 0 assert len(state.queue.messages) == 1 assert get_job(app, x.job.uuid).max_retries == 0 assert count_results(app) == 1
def test_entrypoint(): app = App( name="integration", processes=2, concurrency=4, read_timeout=100, prefetch_count=1, results_enabled=True, maintenance_interval=0.1, schedule_interval=0.1, heartbeat_interval=0.1, ) @app.task def foo(n): return n length = 50 with app.client.pipeline(transaction=False) as pipe: for i in range(length): job = random_job(task=foo.name, args=[i], status=SENT) pipe.hmset(app.keys.status(job), job.serialise()) pipe.xadd(app.keys.queue, fields={"uuid": job.uuid}) pipe.execute() start(app, exit=EXIT_COMPLETE) assert count_results(app) == length
def test_worker_loop(benchmark, concurrency, processes, task): app = App( name="benchmark", processes=processes, concurrency=concurrency, prefetch_count=1, results_enabled=True, ) @app.task def sleep(): time.sleep(random.expovariate(2)) @app.task def cpu(): np.sort(np.random.random(2_000_000)) length = 100 def setup(): app.client.flushall() with app.client.pipeline(transaction=False) as pipe: _load(pipe, app, cpu.name if task == "cpu" else sleep.name, n=length) pipe.execute() benchmark.pedantic( start, args=(app,), kwargs={"exit": EXIT_COMPLETE}, setup=setup, rounds=3, iterations=1, ) assert count_results(app) == length
def wait_for_results(app, length, sleep=0.01, maxwait=1): assert sleep <= maxwait tries = maxwait // sleep while tries and not count_results(app) == length: time.sleep(sleep) tries -= 1 return get_state(app)
async def test_ack(app, broker, message, xid, consumer_id, job): await broker.read(consumer_id, count=1) state = get_state(app) assert len(state.queue.messages) == 1 assert state.queue.groups[0].pending == 1 assert len(state.schedule) == 0 assert len(state.dead.messages) == 0 assert count_results(app) == 0 assert get_status(app, job.uuid) == status.SENT await broker.ack(xid, job) state = get_state(app) assert len(state.queue.messages) == 0 assert state.queue.groups[0].pending == 0 assert len(state.schedule) == 0 assert len(state.dead.messages) == 0 assert count_results(app) == 0 assert get_status(app, job.uuid) == status.SUCCESS
async def test_maintenance(app, broker, messages, executor_id, consumer_id): with freeze_time("2020-01-01 00:00:00"): await broker.read(consumer_id, count=5) await broker.heartbeat(executor_id) state = get_state(app) xids = {m.id for m in state.queue.messages} assert len(state.queue.messages) == 10 assert state.queue.groups[0].pending == 5 assert state.queue.groups[0].consumers == 1 assert len(state.schedule) == 0 assert len(state.dead.messages) == 0 assert len(state.heartbeats) == 1 assert count_results(app) == 0 with freeze_time( "2020-01-01 00:00:30"): # 30 seconds later, not passed threshold await broker.maintenance(threshold=59) state = get_state(app) new_xids = {m.id for m in state.queue.messages} assert new_xids == xids assert len(state.queue.messages) == 10 assert state.queue.groups[0].pending == 5 assert state.queue.groups[0].consumers == 1 assert len(state.schedule) == 0 assert len(state.dead.messages) == 0 assert len(state.heartbeats) == 1 assert count_results(app) == 0 with freeze_time( "2020-01-01 00:01:00"): # 1 minute later, passed threshold await broker.maintenance(threshold=59) state = get_state(app) new_xids = {m.id for m in state.queue.messages} assert new_xids != xids assert len(state.queue.messages) == 10 assert state.queue.groups[0].pending == 0 assert state.queue.groups[0].consumers == 0 assert len(state.schedule) == 0 assert len(state.dead.messages) == 0 assert len(state.heartbeats) == 0 assert count_results(app) == 0
async def test_process_schedule(app, broker, jobs, messages): with freeze_time("2020-01-01"): for xid, job in jobs: job.status = EXECUTING await broker.ack_and_schedule(xid, job) state = get_state(app) assert len(state.queue.messages) == 0 assert state.queue.groups[0].pending == 0 assert len(state.schedule) == 10 assert len(state.dead.messages) == 0 assert count_results(app) == 0 assert all(get_status(app, job.uuid) == RETRY for _, job in jobs) with freeze_time("1970-01-01"): # We're before the schedule time, no jobs should have moved. scheduled = await broker.process_schedule() assert len(scheduled) == 0 state = get_state(app) assert len(state.queue.messages) == 0 assert state.queue.groups[0].pending == 0 assert len(state.schedule) == 10 assert len(state.dead.messages) == 0 assert count_results(app) == 0 assert all(get_status(app, job.uuid) == RETRY for _, job in jobs) with freeze_time("2100-01-01"): # After the schedule time, all jobs should be moved to the task queue. scheduled = await broker.process_schedule() assert len(scheduled) == 10 state = get_state(app) assert len(state.queue.messages) == 10 assert {x.uuid for x in state.queue.messages} == {y.uuid for (_, y) in jobs} assert state.queue.groups[0].pending == 0 assert len(state.schedule) == 0 assert len(state.dead.messages) == 0 assert count_results(app) == 0 assert all(get_status(app, job.uuid) == RETRY for _, job in jobs)
async def test_ack_and_dead(app, broker, failing_message, xid, consumer_id, failing_job): await broker.read(consumer_id, count=1) state = get_state(app) assert len(state.queue.messages) == 1 assert state.queue.groups[0].pending == 1 assert len(state.schedule) == 0 assert len(state.dead.messages) == 0 assert count_results(app) == 0 assert get_status(app, failing_job.uuid) == status.SENT await broker.ack_and_dead(xid, failing_job) state = get_state(app) assert len(state.queue.messages) == 0 assert state.queue.groups[0].pending == 0 assert len(state.schedule) == 0 assert state.dead.messages[0].uuid == failing_job.uuid assert len(state.dead.messages) == 1 assert count_results(app) == 1 assert get_status(app, failing_job.uuid) == status.DEAD
def test_worker_failure(app): """ This test is designed to crash the worker during processing and ensure that when we bring up a new worker, the system fully recovers. To achieve this, we're using a 'sentinel' job which triggers an exception when we try to deserialise it. """ sentinel = 50 def mocked_deserialise(fields): job = Job.deserialise(fields) if job.args == [sentinel]: raise Chaos("Found sentinel job") return job with mock.patch("fennel.worker.broker.Job", wraps=Job) as mocked_job: mocked_job.deserialise.side_effect = mocked_deserialise # The worker crashes on the 50th job execution, wait times out. with worker(app): state = wait_for_results(app, length=100, sleep=0.1, maxwait=1) assert count_results(app) < 100 assert sentinel not in (r.return_value for r in all_results(app)) assert len(state.queue.messages) >= 1 assert len(state.queue.groups) == 1 assert state.queue.groups[0].pending >= 1 assert len(state.heartbeats) == 1 dead_executor_id = state.heartbeats[0].executor_id # Complete the job processing with a new worker (must wait long enough for # maintenance to happen and the dead worker's pending jobs to be reassigned). with worker(app): state = wait_for_results(app, length=100, sleep=0.2, maxwait=4) assert count_results(app) == 100 assert set(r.return_value for r in all_results(app)) == set(range(100)) assert len(state.queue.messages) == 0 assert state.queue.info.groups == 1 assert state.queue.groups[0].pending == 0 assert len(state.heartbeats) == 1 assert state.heartbeats[0].executor_id != dead_executor_id
def test_module(app, job, xid): actions.send(app, job, xid=xid) assert state.get_status(app, job.uuid) == SENT assert state.get_job(app, job.uuid).status == SENT assert state.count_jobs(app) == 1 assert state.get_messages( app, app.keys.queue) == [Message(id=xid, uuid=job.uuid)] with executor(app): wait_for_results(app, length=1) assert actions.result(app, job, timeout=1)["return_value"] == job.args[0] assert state.get_info(app, app.keys.queue).groups == 1 assert state.get_groups(app, app.keys.queue)[0].name == app.keys.group assert state.get_stream(app, app.keys.queue).key == app.keys.queue assert state.count_results(app) == 1 with pytest.raises(JobNotFound): state.get_job(app, "nope")