async def test_producer_and_consumer_consume_from_start_after(self): # Don't flush, close producer immediately to test all data is written to stream on exit. async with Producer( stream_name=self.stream_name, endpoint_url=ENDPOINT_URL, processor=StringProcessor(), ) as producer: # Put enough data to ensure it will require more than one put # ie test overflow behaviour for _ in range(15): await producer.put(self.random_string(100 * 1024)) results = [] async with Consumer( stream_name=self.stream_name, endpoint_url=ENDPOINT_URL, processor=StringProcessor(), ) as consumer: async for item in consumer: results.append(item) # Expect to have consumed from start as default iterator_type=TRIM_HORIZON self.assertEquals(len(results), 15)
async def test_producer_and_consumer_consume_with_msgpack_aggregator(self): processor = MsgpackProcessor() async with Producer(stream_name=self.stream_name, endpoint_url=ENDPOINT_URL, processor=processor) as producer: for x in range(0, 10): await producer.put({"test": x}) await producer.flush() results = [] async with Consumer( stream_name=self.stream_name, endpoint_url=ENDPOINT_URL, processor=processor, ) as consumer: async for item in consumer: results.append(item) # Expect to have consumed from start as default iterator_type=TRIM_HORIZON self.assertEqual(len(results), 10) self.assertEquals(results[0], {"test": 0}) self.assertEquals(results[-1], {"test": 9})
async def test_producer_put_exceed_batch_size(self): # Expect to complete by lowering batch size until successful (500 is max) async with Producer(stream_name=self.stream_name, endpoint_url=ENDPOINT_URL, batch_size=600) as producer: for x in range(1000): await producer.put("test")
async def test_producer_and_consumer_consume_throttle(self): async with Producer(stream_name=self.stream_name, endpoint_url=ENDPOINT_URL) as producer: for i in range(0, 100): await producer.put("test") await producer.flush() results = [] async with Consumer( stream_name=self.stream_name, endpoint_url=ENDPOINT_URL, record_limit=10, # 2 per second shard_fetch_rate=2, ) as consumer: from datetime import datetime dt = datetime.now() while (datetime.now() - dt).total_seconds() < 3.05: async for item in consumer: results.append(item) # Expect 2*3*10 = 60 ie at most 6 iterations of 10 records self.assertGreaterEqual(len(results), 50) self.assertLessEqual(len(results), 70)
async def test_producer_producer_limit(self): # Expect some throughput errors async with Producer( stream_name=self.STREAM_NAME_SINGLE_SHARD, processor=StringProcessor(), put_bandwidth_limit_per_shard=1500, ) as producer: async with Consumer( stream_name=self.STREAM_NAME_SINGLE_SHARD, processor=StringProcessor(), iterator_type="LATEST", ) as consumer: await consumer.start_consumer() # Wait a bit just to be sure iterator is gonna get late await asyncio.sleep(3) for x in range(20): await producer.put(self.random_string(1024 * 250)) # todo: async timeout output = [] while len(output) < 20: async for item in consumer: output.append(item) self.assertEquals(len(output), 20) self.assertTrue(producer.throughput_exceeded_count > 0)
async def test_create_stream_shard_limit_exceeded(self): with self.assertRaises(exceptions.StreamShardLimit): async with Producer( stream_name=self.stream_name, endpoint_url=ENDPOINT_URL ) as producer: await producer.create_stream( shards=10001 ) # must match kinesalite (--shardLimit)
async def test_stream_does_not_exist(self): await asyncio.sleep(2) with self.assertRaises(exceptions.StreamDoesNotExist): async with Producer(stream_name='test_stream_does_not_exist', endpoint_url=ENDPOINT_URL) as producer: await producer.put("test")
async def test_producer_and_consumer(self): async with Producer(stream_name=self.stream_name, endpoint_url=ENDPOINT_URL) as producer: pass async with Consumer(stream_name=self.stream_name, endpoint_url=ENDPOINT_URL): pass
async def test_producer_put_below_limit(self): async with Producer( stream_name=self.stream_name, processor=StringProcessor(), endpoint_url=ENDPOINT_URL, ) as producer: # The maximum size of the data payload of a record before base64-encoding is up to 1 MiB. # Limit is set in aggregators.BaseAggregator (few bytes short of 1MiB) await producer.put(self.random_string(40 * 25 * 1024))
async def test_producer_and_consumer(self): async with Producer( stream_name=self.stream_name, endpoint_url=ENDPOINT_URL ) as producer: await producer.create_stream(shards=1) async with Consumer( stream_name=self.stream_name, endpoint_url=ENDPOINT_URL ): pass
async def test_producer_and_consumer_consume_from_start_flush(self): async with Producer(stream_name=self.stream_name, endpoint_url=ENDPOINT_URL) as producer: await producer.put({"test": 123}) await producer.flush() results = [] async with Consumer(stream_name=self.stream_name, endpoint_url=ENDPOINT_URL) as consumer: async for item in consumer: results.append(item) # Expect to have consumed from start as default iterator_type=TRIM_HORIZON self.assertEquals([{"test": 123}], results)
async def test_stream_does_not_exist(self): await asyncio.sleep(2) # Producer with self.assertRaises(exceptions.StreamDoesNotExist): async with Producer(session=AioSession(), stream_name="test_stream_does_not_exist", endpoint_url=ENDPOINT_URL) as producer: await producer.put("test") # Consumer with self.assertRaises(exceptions.StreamDoesNotExist): async with Consumer(stream_name="test_stream_does_not_exist", endpoint_url=ENDPOINT_URL): pass
async def test_producer_and_consumer_consume_with_bytes(self): class ByteSerializer(Serializer): def serialize(self, msg): result = str.encode(msg) return result def deserialize(self, data): return data class ByteProcessor(Processor, NetstringAggregator, ByteSerializer): pass processor = ByteProcessor() async with Producer( stream_name=self.stream_name, endpoint_url=ENDPOINT_URL, processor=processor ) as producer: for x in range(0, 2): await producer.put(f"{x}") await producer.flush() results = [] checkpointer = MemoryCheckPointer(name="test") async with Consumer( stream_name=self.stream_name, endpoint_url=ENDPOINT_URL, processor=processor, checkpointer=checkpointer, ) as consumer: async for item in consumer: results.append(item) await checkpointer.checkpoint( shard_id=consumer.shards[0]["ShardId"], sequence="seq" ) async for item in consumer: results.append(item) self.assertEquals(len(results), 2) await checkpointer.close() self.assertEquals(len(checkpointer.get_all_checkpoints()), 1)
async def test_producer_and_consumer_consume_multiple_shards_with_redis_checkpointer( self, ): stream_name = "test_{}".format(str(uuid.uuid4())[0:8]) async with Producer( stream_name=stream_name, endpoint_url=ENDPOINT_URL, create_stream=stream_name, create_stream_shards=2, ) as producer: for i in range(0, 100): await producer.put("test.{}".format(i)) await producer.flush() results = [] checkpointer = RedisCheckPointer( name="test-{}".format(str(uuid.uuid4())[0:8]), heartbeat_frequency=3 ) async with Consumer( stream_name=stream_name, endpoint_url=ENDPOINT_URL, checkpointer=checkpointer, record_limit=10, ) as consumer: # consumer will stop if no msgs for i in range(0, 6): async for item in consumer: results.append(item) await asyncio.sleep(0.5) self.assertEquals(100, len(results)) checkpoints = checkpointer.get_all_checkpoints() self.assertEquals(2, len(checkpoints)) # Expect both shards to have been used/set for item in checkpoints.values(): self.assertIsNotNone(item)
async def test_producer_and_consumer_consume_queue_full(self): async with Producer(stream_name=self.stream_name, endpoint_url=ENDPOINT_URL) as producer: for i in range(0, 100): await producer.put("test") await producer.flush() results = [] async with Consumer( stream_name=self.stream_name, endpoint_url=ENDPOINT_URL, max_queue_size=20, ) as consumer: async for item in consumer: results.append(item) # Expect 20 only as queue is full and we don't wait on queue self.assertEqual(20, len(results))
async def test_consumer_checkpoint(self): checkpointer = MemoryCheckPointer(name="test") results = [] async with Producer( stream_name=self.STREAM_NAME_SINGLE_SHARD, processor=StringProcessor(), ) as producer: async with Consumer( stream_name=self.STREAM_NAME_SINGLE_SHARD, checkpointer=checkpointer, processor=StringProcessor(), iterator_type="LATEST", ) as consumer: # Manually start await consumer.start_consumer() await producer.put("test") await producer.flush() for i in range(3): async for item in consumer: results.append(item) checkpoints = checkpointer.get_all_checkpoints() # Expect 1 as only 1 shard self.assertEquals(1, len(checkpoints)) self.assertIsNotNone(checkpoints[list( checkpoints.keys())[0]]["sequence"]) self.assertListEqual(results, ["test"])
async def test_producer(data, processor): log.info("Testing with {}".format(processor.__class__.__name__)) async with Producer(stream_name="test", processor=processor, max_queue_size=100000) as producer: await producer.create_stream(shards=1, ignore_exists=True) async with Consumer( stream_name="test", processor=processor, max_queue_size=100000, iterator_type="LATEST", ) as consumer: # ensure set up before producer puts records as using LATEST await consumer.start_consumer(wait_iterations=0) with Timer() as t: for item in data: await producer.put(item) await producer.flush() total = 0 while total < len(data): async for _ in consumer: total += 1 if len(data) != total: log.error("Failed to read all records.. expected {} read {}".format( len(data), total)) return False, None log.info("Completed {} records (read: {}) in {} seconds".format( len(data), total, round(t.elapsed, 2))) return True, round(t.elapsed, 2)
async def create(stream_name, shards): async with Producer(stream_name=stream_name) as producer: await producer.create_stream(shards=shards) await producer.start()
async def test_producer_and_consumer_consume_with_checkpointer_and_latest( self): async with Producer(stream_name=self.stream_name, endpoint_url=ENDPOINT_URL) as producer: await producer.put("test.A") results = [] checkpointer = MemoryCheckPointer(name="test") async with Consumer( stream_name=self.stream_name, endpoint_url=ENDPOINT_URL, checkpointer=checkpointer, iterator_type="LATEST", ) as consumer: async for item in consumer: results.append(item) # Expect none as LATEST self.assertEquals([], results) checkpoints = checkpointer.get_all_checkpoints() # Expect 1 as only 1 shard self.assertEquals(1, len(checkpoints)) # none as no records yet (using LATEST) self.assertIsNone(checkpoints[list( checkpoints.keys())[0]]["sequence"]) results = [] log.info("checkpointer checkpoints: {}".format(checkpoints)) log.info("Starting consumer again..") async with Consumer( stream_name=self.stream_name, endpoint_url=ENDPOINT_URL, checkpointer=checkpointer, iterator_type="LATEST", sleep_time_no_records=0.5, ) as consumer: # Manually start await consumer.start_consumer() await producer.put("test.B") await producer.flush() log.info("waiting..") await asyncio.sleep(1) log.info("about to consume..") async for item in consumer: results.append(item) self.assertEquals(["test.B"], results) checkpoints = checkpointer.get_all_checkpoints() log.info("checkpointer checkpoints: {}".format(checkpoints)) # expect not None as has processed records self.assertIsNotNone(checkpoints[list( checkpoints.keys())[0]]["sequence"]) # now add some records for i in range(0, 10): await producer.put("test.{}".format(i)) await producer.flush() await asyncio.sleep(1) results = [] async with Consumer( stream_name=self.stream_name, endpoint_url=ENDPOINT_URL, checkpointer=checkpointer, iterator_type="LATEST", sleep_time_no_records=0.5, ) as consumer: async for item in consumer: results.append(item) # Expect results as checkpointer resumed from prior sequence self.assertEquals(10, len(results))
async def test_resharding(self): stream_name = self.STREAM_NAME_SINGLE_SHARD # Create stream with 2x shards. Add some records async with Producer(stream_name=stream_name, shard_refresh_timer=15) as producer: for i in range(0, 50): await producer.put("test.{}".format(i)) await producer.flush() results = [] checkpointer = RedisCheckPointer(name="test-{}".format( str(uuid.uuid4())[0:8]), heartbeat_frequency=3) async with Consumer( stream_name=stream_name, checkpointer=checkpointer, record_limit=5, # Limit the queue so there records will remain in the shards max_queue_size=5, shard_refresh_timer=15) as consumer: for i in range(0, 3): async for item in consumer: results.append(item) await asyncio.sleep(0.5) log.info(f"Consumed {len(results)} records") # Start reshard operation # TODO: Producer not writing to new shards, shards are being found and checkpointed await producer.client.update_shard_count( StreamName=stream_name, TargetShardCount=2, ScalingType='UNIFORM_SCALING') await self.describe_stream(client=producer.client, stream_name=stream_name) await asyncio.sleep(1) await self.describe_stream(client=producer.client, stream_name=stream_name) # Now add some more records for i in range(50, 100): await producer.put("test.{}".format(i)) await producer.flush() await asyncio.sleep(10) for i in range(0, 20): async for item in consumer: results.append(item) await asyncio.sleep(2) log.info(f"Consumed {len(results)} records") assert len(results) == 100
async def test_resharding(self, *args): # *args pass through mock stream_name = "test_{}".format(str(uuid.uuid4())[0:8]) # Create stream with 2x shards. Add some records async with Producer(stream_name=stream_name, endpoint_url=ENDPOINT_URL, create_stream=stream_name, create_stream_shards=2, shard_refresh_timer=15) as producer: for i in range(0, 50): await producer.put("test.{}".format(i)) await producer.flush() results = [] checkpointer = RedisCheckPointer(name="test-{}".format( str(uuid.uuid4())[0:8]), heartbeat_frequency=3) async with Consumer( stream_name=stream_name, endpoint_url=ENDPOINT_URL, checkpointer=checkpointer, record_limit=5, # Limit the queue so there records will remain in the shards max_queue_size=5, shard_refresh_timer=15) as consumer: for i in range(0, 3): async for item in consumer: results.append(item) await asyncio.sleep(0.5) log.info(f"Consumed {len(results)} records") # Start reshard operation await producer.client.update_shard_count( StreamName=stream_name, TargetShardCount=4, ScalingType='UNIFORM_SCALING') await self.describe_stream(client=producer.client, stream_name=stream_name) await asyncio.sleep(1) await self.describe_stream(client=producer.client, stream_name=stream_name) # Now add some more records for i in range(50, 100): await producer.put("test.{}".format(i)) await producer.flush() for i in range(0, 10): async for item in consumer: results.append(item) await asyncio.sleep(0.5) log.info(f"Consumed {len(results)} records") assert len(results) == 100
async def test_producer_put(self): async with Producer(stream_name=self.stream_name, endpoint_url=ENDPOINT_URL) as producer: await producer.put("test")