async def producer_fixture(): # Run a processing task and capture the metadata results in a mock kafka # producer redis = FakeRedis() stats = StatsManager(redis) meta_producer = FakeProducer() retry_producer = FakeProducer() producer = AsyncProducer(meta_producer, 'foo') await process_image( persister=validate_thumbnail, session=RateLimitedClientSession(FakeAioSession(), redis), url='https://example.gov/hello.jpg', identifier='4bbfe191-1cca-4b9e-aff0-1d3044ef3f2d', stats=stats, source='example', semaphore=asyncio.BoundedSemaphore(1000), metadata_producer=producer, retry_producer=retry_producer ) producer_task = asyncio.create_task(producer.listen()) try: await asyncio.wait_for(producer_task, 0.01) except concurrent.futures.TimeoutError: pass return meta_producer, retry_producer
async def test_handled_404s(): redis = FakeRedis() stats = StatsManager(redis) kafka = FakeProducer() rot_producer = AsyncProducer(kafka, 'foo') session = RateLimitedClientSession( FakeAioSession(corrupt=True, status=404), redis ) ident = '4bbfe191-1cca-4b9e-aff0-1d3044ef3f2d' await process_image( persister=validate_thumbnail, session=session, url='fake_url', identifier=ident, stats=stats, source='example', semaphore=asyncio.BoundedSemaphore(1000), rot_producer=rot_producer ) producer_task = asyncio.create_task(rot_producer.listen()) try: await asyncio.wait_for(producer_task, 0.01) except concurrent.futures.TimeoutError: pass rot_msg = kafka.messages[0] parsed = json.loads(str(rot_msg, 'utf-8')) assert ident == parsed['identifier']
async def test_records_errors(): redis = FakeRedis() stats = StatsManager(redis) session = RateLimitedClientSession(FakeAioSession(status=403), redis) retry_producer = FakeProducer() producer = AsyncProducer(retry_producer, 'foo') await process_image( persister=validate_thumbnail, session=session, url='https://example.gov/image.jpg', identifier='4bbfe191-1cca-4b9e-aff0-1d3044ef3f2d', stats=stats, source='example', semaphore=asyncio.BoundedSemaphore(1000), retry_producer=producer ) expected_keys = [ 'resize_errors', 'resize_errors:example', 'resize_errors:example:403', 'status60s:example', 'status1hr:example', 'status12hr:example' ] for key in expected_keys: val = redis.store[key] assert val == 1 or len(val) == 1 producer_task = asyncio.create_task(producer.listen()) try: await asyncio.wait_for(producer_task, 0.01) except concurrent.futures.TimeoutError: pass retry = retry_producer.messages[0] parsed = json.loads(str(retry, 'utf-8')) assert parsed['attempts'] == 1
def test_scheduler_terminates(): consumer = FakeConsumer() producer = FakeProducer() fake_events = [make_mock_msg() for _ in range(100)] for fake_event in fake_events: consumer.insert(fake_event) listen(consumer, producer, mock_work_function)
def test_exception_raised(): """ Make sure exceptions in child threads get caught """ with pytest.raises(ValueError): consumer1 = FakeConsumer() consumer2 = FakeConsumer() producer = FakeProducer() fake_events = [make_mock_msg() for _ in range(100)] for fake_event in fake_events: consumer1.insert(fake_event) consumer2.insert(fake_event) listen(consumer1, producer, mock_work_fn_failure) listen(consumer2, producer, mock_boto3_fn_failure)
def test_end_to_end(): consumer = FakeConsumer() producer = FakeProducer() fake_events = [make_mock_msg() for _ in range(90)] dupe_uuid = uuid.uuid4() dupes = [f'{{"identifier":"{dupe_uuid}"}}' for _ in range(10)] fake_events.extend(dupes) for fake_event in fake_events: consumer.insert(fake_event) results = listen(consumer, producer, mock_work_function) assert results[TaskStatus.SUCCEEDED] == 91 assert results[TaskStatus.IGNORED_DUPLICATE] == 9
async def test_handles_corrupt_images_gracefully(): redis = FakeRedis() stats = StatsManager(redis) kafka = FakeProducer() producer = AsyncProducer(kafka, 'foo') await process_image( persister=validate_thumbnail, session=RateLimitedClientSession(FakeAioSession(corrupt=True), redis), url='fake_url', identifier='4bbfe191-1cca-4b9e-aff0-1d3044ef3f2d', stats=stats, source='example', semaphore=asyncio.BoundedSemaphore(1000), metadata_producer=producer ) producer_task = asyncio.create_task(producer.listen()) try: await asyncio.wait_for(producer_task, 0.01) except concurrent.futures.TimeoutError: pass