def test_default_consumes_from_end_offset(self): self.add_topic("test.topic", leaders=(1,)) self.set_responses( broker_id=1, api="offset", responses=[ offset.OffsetResponse( topics=[ offset.TopicResponse( name="test.topic", partitions=[ offset.PartitionResponse( partition_id=0, error_code=errors.no_error, offsets=[99], ) ] ) ] ) ] ) self.set_responses( broker_id=1, api="fetch", responses=[ fetch.FetchResponse( topics=[ fetch.TopicResponse( name="test.topic", partitions=[ fetch.PartitionResponse( partition_id=0, error_code=errors.no_error, highwater_mark_offset=2, message_set=messages.MessageSet( messages=[ ( 0, messages.Message( magic=0, attributes=0, key=None, value='{"cat": "meow"}', ) ), ( 1, messages.Message( magic=0, attributes=0, key=None, value='{"dog": "bark"}', ) ), ] ) ), ] ), ] ), ] ) c = single.SingleConsumer(["kafka01"]) yield c.connect() msgs = yield c.consume("test.topic") yield c.close() self.assertEqual(msgs, [{"cat": "meow"}, {"dog": "bark"}]) self.assert_sent( broker_id=1, request=offset.OffsetRequest( replica_id=-1, topics=[ offset.TopicRequest( name="test.topic", partitions=[ offset.PartitionRequest( partition_id=0, time=-1, # alias for 'end of topic' max_offsets=1, ) ] ) ] ) ) self.assert_sent( broker_id=1, request=fetch.FetchRequest( replica_id=-1, max_wait_time=1000, min_bytes=1, topics=[ fetch.TopicRequest( name="test.topic", partitions=[ fetch.PartitionRequest( partition_id=0, offset=99, max_bytes=(1024 * 1024), ), ] ) ] ) )
def consume(self, topic, start=None): """ Fetches from a given topics returns a list of deserialized values. If the given topic is not known to have synced offsets, a call to `determine_offsets()` is made first. If a topic is unknown entirely the cluster's ``heal()`` method is called and the check retried. Since error codes and deserialization are taken care of by `handle_fetch_response` this method merely yields to wait on the deserialized results and returns a flattened list. """ if self.closing: return if topic not in self.synced_offsets: try: yield self.determine_offsets(topic, start) except NoOffsetsError: log.error("Unable to determine offsets for topic %s", topic) raise gen.Return([]) self.synced_offsets.add(topic) if topic not in self.allocation or not self.allocation[topic]: log.debug("Consuming unknown topic %s, reloading metadata", topic) yield self.cluster.heal() if topic not in self.allocation or not self.allocation[topic]: log.error("Consuming unknown topic %s and not auto-created", topic) raise gen.Return([]) ordered = collections.defaultdict(list) for partition_id in self.allocation[topic]: leader = self.cluster.get_leader(topic, partition_id) ordered[leader].append(partition_id) requests = {} for leader, partitions in six.iteritems(ordered): max_partition_bytes = int(self.max_bytes / len(partitions)) requests[leader] = fetch.FetchRequest( replica_id=CONSUMER_REPLICA_ID, max_wait_time=self.max_wait_time, min_bytes=self.min_bytes, topics=[ fetch.TopicRequest( name=topic, partitions=[ fetch.PartitionRequest( partition_id=partition_id, offset=self.offsets[topic][partition_id], max_bytes=max_partition_bytes, ) for partition_id in partitions ]) ]) results = yield self.send(requests) raise gen.Return([ msg for messageset in results.values() for msg in messageset if messageset ])
def test_consume_without_autocommit(self): self.add_topic("test.topic", leaders=(1, 8)) self.allocator.allocation = {"test.topic": [0, 1]} self.set_responses(broker_id=3, api="offset_fetch", responses=[ offset_fetch.OffsetFetchResponse(topics=[ offset_fetch.TopicResponse( name="test.topic", partitions=[ offset_fetch.PartitionResponse( error_code=errors.no_error, partition_id=0, offset=80, metadata="committed, ok!"), offset_fetch.PartitionResponse( error_code=errors.no_error, partition_id=1, offset=110, metadata="committed, ok!"), ]) ]), ]) self.set_responses(broker_id=3, api="offset_commit", responses=[ offset_commit.OffsetCommitResponse(topics=[ offset_commit.TopicResponse( name="test.topic", partitions=[ offset_commit.PartitionResponse( error_code=errors.no_error, partition_id=1, ) ]), ]), ]) self.set_responses( broker_id=1, api="fetch", responses=[ fetch.FetchResponse(topics=[ fetch.TopicResponse( name="test.topic", partitions=[ fetch.PartitionResponse( partition_id=0, error_code=errors.no_error, highwater_mark_offset=2, message_set=messages.MessageSet([ (80, messages.Message( magic=0, attributes=0, key=None, value='{"cat": "meow"}', )), ])), ]), ]) ]) self.set_responses( broker_id=8, api="fetch", responses=[ fetch.FetchResponse(topics=[ fetch.TopicResponse( name="test.topic", partitions=[ fetch.PartitionResponse( partition_id=1, error_code=errors.no_error, highwater_mark_offset=2, message_set=messages.MessageSet([ (110, messages.Message( magic=0, attributes=0, key=None, value='{"cat": "meow"}', )), ])), ]), ]) ]) c = grouped.GroupedConsumer(["kafka01", "kafka02"], "work-group", zk_hosts=["zk01", "zk02", "zk03"], autocommit=False) yield c.connect() yield c.consume("test.topic") self.assert_sent(broker_id=1, request=fetch.FetchRequest( replica_id=-1, max_wait_time=1000, min_bytes=1, topics=[ fetch.TopicRequest(name="test.topic", partitions=[ fetch.PartitionRequest( partition_id=0, offset=80, max_bytes=(1024 * 1024), ), ]) ])) self.assert_sent(broker_id=8, request=fetch.FetchRequest( replica_id=-1, max_wait_time=1000, min_bytes=1, topics=[ fetch.TopicRequest(name="test.topic", partitions=[ fetch.PartitionRequest( partition_id=1, offset=110, max_bytes=(1024 * 1024), ), ]) ])) yield c.commit_offsets() self.assert_sent( broker_id=3, request=offset_commit.OffsetCommitV0Request( group="work-group", topics=[ offset_commit.TopicRequest( name="test.topic", partitions=[ offset_commit.PartitionRequest( partition_id=0, offset=81, metadata="committed by %s" % c.name), offset_commit.PartitionRequest( partition_id=1, offset=111, metadata="committed by %s" % c.name), ]) ]))
def test_max_bytes_at_partition_level(self): self.add_topic("test.topic", leaders=(3, 3)) self.set_responses( broker_id=3, api="fetch", responses=[ fetch.FetchResponse( topics=[ fetch.TopicResponse( name="test.topic", partitions=[ fetch.PartitionResponse( partition_id=0, error_code=errors.no_error, highwater_mark_offset=2, message_set=messages.MessageSet( messages=[ ( 0, messages.Message( magic=0, attributes=0, key=None, value='{"foo": "bar"}', ) ), ] ) ), fetch.PartitionResponse( partition_id=1, error_code=errors.no_error, highwater_mark_offset=2, message_set=messages.MessageSet( messages=[ ( 0, messages.Message( magic=0, attributes=0, key=None, value='{"bwee": "bwoo"}', ) ), ] ) ), ] ), ] ), ] ) c = FakeConsumer(["kafka01", "kafka02"], max_bytes=(1024 * 1024)) yield c.connect() msgs = yield c.consume("test.topic") self.assertEqual(msgs, [{"foo": "bar"}, {"bwee": "bwoo"}]) self.assert_sent( broker_id=3, request=fetch.FetchRequest( replica_id=-1, max_wait_time=1000, min_bytes=1, topics=[ fetch.TopicRequest( name="test.topic", partitions=[ fetch.PartitionRequest( partition_id=0, offset=0, max_bytes=(512 * 1024), ), fetch.PartitionRequest( partition_id=1, offset=0, max_bytes=(512 * 1024), ), ] ) ] ) )
def test_consumer_tracks_offsets(self): self.add_topic("test.topic", leaders=(3, 8)) self.set_responses( broker_id=3, api="fetch", responses=[ fetch.FetchResponse( topics=[ fetch.TopicResponse( name="test.topic", partitions=[ fetch.PartitionResponse( partition_id=0, error_code=errors.no_error, highwater_mark_offset=2, message_set=messages.MessageSet( messages=[ ( 0, messages.Message( magic=0, attributes=0, key=None, value='{"foo": "bar"}', ) ), ( 1, messages.Message( magic=0, attributes=0, key=None, value='{"bwee": "bwoo"}', ) ), ] ) ), ] ), ] ), fetch.FetchResponse( topics=[ fetch.TopicResponse( name="test.topic", partitions=[ fetch.PartitionResponse( partition_id=0, error_code=errors.no_error, highwater_mark_offset=2, message_set=messages.MessageSet([]), ), ] ) ] ) ] ) self.set_responses( broker_id=8, api="fetch", responses=[ fetch.FetchResponse( topics=[ fetch.TopicResponse( name="test.topic", partitions=[ fetch.PartitionResponse( partition_id=1, error_code=errors.no_error, highwater_mark_offset=2, message_set=messages.MessageSet( messages=[ ( 0, messages.Message( magic=0, attributes=0, key=None, value='{"meow": "bark"}', ) ), ] ) ), ] ), ] ), fetch.FetchResponse( topics=[ fetch.TopicResponse( name="test.topic", partitions=[ fetch.PartitionResponse( partition_id=1, error_code=errors.no_error, highwater_mark_offset=2, message_set=messages.MessageSet([]) ), ] ), ] ), ] ) c = FakeConsumer(["kafka01", "kafka02"]) yield c.connect() msgs = yield c.consume("test.topic") possible_orders = [ [{"meow": "bark"}, {"foo": "bar"}, {"bwee": "bwoo"}], [{"foo": "bar"}, {"bwee": "bwoo"}, {"meow": "bark"}], ] self.assertTrue( any([msgs == possibility for possibility in possible_orders]) ) self.assert_sent( broker_id=3, request=fetch.FetchRequest( replica_id=-1, max_wait_time=1000, min_bytes=1, topics=[ fetch.TopicRequest( name="test.topic", partitions=[ fetch.PartitionRequest( partition_id=0, offset=0, max_bytes=(1024 * 1024), ), ] ) ] ) ) self.assert_sent( broker_id=8, request=fetch.FetchRequest( replica_id=-1, max_wait_time=1000, min_bytes=1, topics=[ fetch.TopicRequest( name="test.topic", partitions=[ fetch.PartitionRequest( partition_id=1, offset=0, max_bytes=(1024 * 1024), ), ] ) ] ) ) msgs = yield c.consume("test.topic") self.assertEqual(msgs, []) self.assert_sent( broker_id=3, request=fetch.FetchRequest( replica_id=-1, max_wait_time=1000, min_bytes=1, topics=[ fetch.TopicRequest( name="test.topic", partitions=[ fetch.PartitionRequest( partition_id=0, offset=2, max_bytes=(1024 * 1024), ), ] ) ] ) ) self.assert_sent( broker_id=8, request=fetch.FetchRequest( replica_id=-1, max_wait_time=1000, min_bytes=1, topics=[ fetch.TopicRequest( name="test.topic", partitions=[ fetch.PartitionRequest( partition_id=1, offset=1, max_bytes=(1024 * 1024), ), ] ) ] ) )
def test_offset_out_of_range_error(self): self.add_topic("test.topic", leaders=(3,)) self.set_responses( broker_id=3, api="fetch", responses=[ fetch.FetchResponse( topics=[ fetch.TopicResponse( name="test.topic", partitions=[ fetch.PartitionResponse( partition_id=0, error_code=errors.offset_out_of_range, highwater_mark_offset=2, message_set=messages.MessageSet([]) ), ] ), ] ), fetch.FetchResponse( topics=[ fetch.TopicResponse( name="test.topic", partitions=[ fetch.PartitionResponse( partition_id=0, error_code=errors.no_error, highwater_mark_offset=2, message_set=messages.MessageSet( messages=[ ( 0, messages.Message( magic=0, attributes=0, key=None, value='{"cat": "dog"}', ) ), ] ) ), ] ), ] ), ] ) c = FakeConsumer(["kafka01"]) yield c.connect() c.offsets["test.topic"][0] = 80 c.synced_offsets.add("test.topic") msgs = yield c.consume("test.topic") self.assertEqual(msgs, []) self.assert_sent( broker_id=3, request=fetch.FetchRequest( replica_id=-1, max_wait_time=1000, min_bytes=1, topics=[ fetch.TopicRequest( name="test.topic", partitions=[ fetch.PartitionRequest( partition_id=0, offset=80, max_bytes=(1024 * 1024), ), ] ) ] ) ) msgs = yield c.consume("test.topic") self.assertEqual(msgs, [{"cat": "dog"}]) self.assert_sent( broker_id=3, request=fetch.FetchRequest( replica_id=-1, max_wait_time=1000, min_bytes=1, topics=[ fetch.TopicRequest( name="test.topic", partitions=[ fetch.PartitionRequest( partition_id=0, offset=0, max_bytes=(1024 * 1024), ), ] ) ] ) )
def test_custom_deserializer_and_options(self): self.add_topic("test.topic", leaders=(3,)) self.set_responses( broker_id=3, api="fetch", responses=[ fetch.FetchResponse( topics=[ fetch.TopicResponse( name="test.topic", partitions=[ fetch.PartitionResponse( partition_id=0, error_code=errors.no_error, highwater_mark_offset=2, message_set=messages.MessageSet( messages=[ ( 0, messages.Message( magic=0, attributes=0, key=None, value='cat', ) ), ( 1, messages.Message( magic=0, attributes=0, key=None, value='dog', ) ), ] ) ), ] ), ] ), ] ) results = [Exception(), "bark"] def deserializer(val): result = results.pop(0) if isinstance(result, Exception): raise result return "%s: %s" % (val, result) c = FakeConsumer( ["kafka01", "kafka02"], deserializer=deserializer, max_wait_time=500, min_bytes=1024, max_bytes=1024 ) yield c.connect() msgs = yield c.consume("test.topic") self.assertEqual(msgs, ["dog: bark"]) self.assert_sent( broker_id=3, request=fetch.FetchRequest( replica_id=-1, max_wait_time=500, min_bytes=1024, topics=[ fetch.TopicRequest( name="test.topic", partitions=[ fetch.PartitionRequest( partition_id=0, offset=0, max_bytes=1024, ), ] ) ] ) )