def _create_encoded_metadata_response(self, broker_data, topic_data, topic_errors, partition_errors): encoded = struct.pack('>ii', 3, len(broker_data)) for node_id, broker in compat.dict_items(broker_data): encoded += struct.pack('>ih%dsi' % len(broker.host), node_id, len(broker.host), compat.bytes(broker.host), broker.port) encoded += struct.pack('>i', len(topic_data)) for topic, partitions in compat.dict_items(topic_data): encoded += struct.pack('>hh%dsi' % len(topic), topic_errors[topic], len(topic), compat.bytes(topic), len(partitions)) for partition, metadata in compat.dict_items(partitions): encoded += struct.pack('>hiii', partition_errors[(topic, partition)], partition, metadata.leader, len(metadata.replicas)) if len(metadata.replicas) > 0: encoded += struct.pack('>%di' % len(metadata.replicas), *metadata.replicas) encoded += struct.pack('>i', len(metadata.isr)) if len(metadata.isr) > 0: encoded += struct.pack('>%di' % len(metadata.isr), *metadata.isr) return encoded
def test_decode_fetch_response(self): t1 = "topic1" t2 = "topic2" msgs = list(map(create_message, [b"message1", b"hi", b"boo", b"foo", b"so fun!"])) ms1 = KafkaProtocol._encode_message_set([msgs[0], msgs[1]]) ms2 = KafkaProtocol._encode_message_set([msgs[2]]) ms3 = KafkaProtocol._encode_message_set([msgs[3], msgs[4]]) encoded = struct.pack('>iih%dsiihqi%dsihqi%dsh%dsiihqi%ds' % (len(t1), len(ms1), len(ms2), len(t2), len(ms3)), 4, 2, len(t1), compat.bytes(t1), 2, 0, 0, 10, len(ms1), ms1, 1, 1, 20, len(ms2), ms2, len(t2), compat.bytes(t2), 1, 0, 0, 30, len(ms3), ms3) responses = list(KafkaProtocol.decode_fetch_response(encoded)) def expand_messages(response): return FetchResponse(response.topic, response.partition, response.error, response.highwaterMark, list(response.messages)) expanded_responses = list(map(expand_messages, responses)) expect = [FetchResponse(t1, 0, 0, 10, [OffsetAndMessage(0, msgs[0]), OffsetAndMessage(0, msgs[1])]), FetchResponse(t1, 1, 1, 20, [OffsetAndMessage(0, msgs[2])]), FetchResponse(t2, 0, 0, 30, [OffsetAndMessage(0, msgs[3]), OffsetAndMessage(0, msgs[4])])] self.assertEqual(expanded_responses, expect)
def test_decode_produce_response(self): t1 = "topic1" t2 = "topic2" encoded = struct.pack('>iih%dsiihqihqh%dsiihq' % (len(t1), len(t2)), 2, 2, len(t1), compat.bytes(t1), 2, 0, 0, compat.long(10), 1, 1, compat.long(20), len(t2), compat.bytes(t2), 1, 0, 0, compat.long(30)) responses = list(KafkaProtocol.decode_produce_response(encoded)) self.assertEqual(responses, [ProduceResponse(t1, 0, 0, compat.long(10)), ProduceResponse(t1, 1, 1, compat.long(20)), ProduceResponse(t2, 0, 0, compat.long(30))])
def test_produce_many_gzip(self): start_offset = self.current_offset(self.topic, 0) message1 = create_gzip_message([compat.bytes("Gzipped 1 %d" % i) for i in range(100)]) message2 = create_gzip_message([compat.bytes("Gzipped 2 %d" % i) for i in range(100)]) self.assert_produce_request( [ message1, message2 ], start_offset, 200, )
def test_produce_many_gzip(self): start_offset = self.current_offset(self.topic, 0) message1 = create_gzip_message( [compat.bytes("Gzipped 1 %d" % i) for i in range(100)]) message2 = create_gzip_message( [compat.bytes("Gzipped 2 %d" % i) for i in range(100)]) self.assert_produce_request( [message1, message2], start_offset, 200, )
def test_decode_produce_response(self): t1 = "topic1" t2 = "topic2" encoded = struct.pack('>iih%dsiihqihqh%dsiihq' % (len(t1), len(t2)), 2, 2, len(t1), compat.bytes(t1), 2, 0, 0, compat.long(10), 1, 1, compat.long(20), len(t2), compat.bytes(t2), 1, 0, 0, compat.long(30)) responses = list(KafkaProtocol.decode_produce_response(encoded)) self.assertEqual(responses, [ ProduceResponse(t1, 0, 0, compat.long(10)), ProduceResponse(t1, 1, 1, compat.long(20)), ProduceResponse(t2, 0, 0, compat.long(30)) ])
def test_produce_many_simple(self): start_offset = self.current_offset(self.topic, 0) self.assert_produce_request( [ create_message(compat.bytes("Test message %d" % i)) for i in range(100) ], start_offset, 100, ) self.assert_produce_request( [ create_message(compat.bytes("Test message %d" % i)) for i in range(100) ], start_offset+100, 100, )
def test_produce_mixed(self): start_offset = self.current_offset(self.topic, 0) msg_count = 1+100 messages = [ create_message(compat.bytes("Just a plain message")), create_gzip_message([compat.bytes("Gzipped %d" % i) for i in range(100)]), ] # All snappy integration tests fail with nosnappyjava if False and has_snappy(): msg_count += 100 messages.append(create_snappy_message(["Snappy %d" % i for i in range(100)])) self.assert_produce_request(messages, start_offset, msg_count)
def test_huge_messages(self): huge_message, = self.send_messages(0, [ create_message( compat.bytes(random_string(MAX_FETCH_BUFFER_SIZE_BYTES + 10))), ]) # Create a consumer with the default buffer size consumer = self.consumer() # This consumer failes to get the message with self.assertRaises(ConsumerFetchSizeTooSmall): consumer.get_message(False, 0.1) consumer.stop() # Create a consumer with no fetch size limit big_consumer = self.consumer( max_buffer_size=None, partitions=[0], ) # Seek to the last message big_consumer.seek(-1, 2) # Consume giant message successfully message = big_consumer.get_message(block=False, timeout=10) self.assertIsNotNone(message) self.assertEquals(message.message.value, huge_message) big_consumer.stop()
def test_produce_100k_gzipped(self): start_offset = self.current_offset(self.topic, 0) self.assert_produce_request([ create_gzip_message([compat.bytes("Gzipped batch 1, message %d" % i) for i in range(50000)]) ], start_offset, 50000, ) self.assert_produce_request([ create_gzip_message([compat.bytes("Gzipped batch 1, message %d" % i) for i in range(50000)]) ], start_offset+50000, 50000, )
def test_decode_message_set_stop_iteration(self): encoded = b"".join([ struct.pack(">q", 0), # MsgSet Offset struct.pack(">i", 18), # Msg Size struct.pack(">i", 1474775406), # CRC struct.pack(">bb", 0, 0), # Magic, flags struct.pack(">i", 2), # Length of key b"k1", # Key struct.pack(">i", 2), # Length of value b"v1", # Value struct.pack(">q", 1), # MsgSet Offset struct.pack(">i", 18), # Msg Size struct.pack(">i", -16383415), # CRC struct.pack(">bb", 0, 0), # Magic, flags struct.pack(">i", 2), # Length of key b"k2", # Key struct.pack(">i", 2), # Length of value b"v2", # Value compat.bytes("@1$%(Y!"), # Random padding ]) msgs = list(KafkaProtocol._decode_message_set_iter(encoded)) self.assertEqual(len(msgs), 2) msg1, msg2 = msgs returned_offset1, decoded_message1 = msg1 returned_offset2, decoded_message2 = msg2 self.assertEqual(returned_offset1, 0) self.assertEqual(decoded_message1, create_message(b"v1", b"k1")) self.assertEqual(returned_offset2, 1) self.assertEqual(decoded_message2, create_message(b"v2", b"k2"))
def test_huge_messages(self): huge_message, = self.send_messages(0, [ create_message(compat.bytes(random_string(MAX_FETCH_BUFFER_SIZE_BYTES + 10))), ]) # Create a consumer with the default buffer size consumer = self.consumer() # This consumer failes to get the message with self.assertRaises(ConsumerFetchSizeTooSmall): consumer.get_message(False, 0.1) consumer.stop() # Create a consumer with no fetch size limit big_consumer = self.consumer( max_buffer_size = None, partitions = [0], ) # Seek to the last message big_consumer.seek(-1, 2) # Consume giant message successfully message = big_consumer.get_message(block=False, timeout=10) self.assertIsNotNone(message) self.assertEquals(message.message.value, huge_message) big_consumer.stop()
def test_produce_mixed(self): start_offset = self.current_offset(self.topic, 0) msg_count = 1 + 100 messages = [ create_message(compat.bytes("Just a plain message")), create_gzip_message( [compat.bytes("Gzipped %d" % i) for i in range(100)]), ] # All snappy integration tests fail with nosnappyjava if False and has_snappy(): msg_count += 100 messages.append( create_snappy_message(["Snappy %d" % i for i in range(100)])) self.assert_produce_request(messages, start_offset, msg_count)
def write_short_string(s): if s is None: return struct.pack('>h', -1) elif len(s) > 32767 and sys.version < '2.7': # Python 2.6 issues a deprecation warning instead of a struct error raise struct.error(len(s)) else: return struct.pack('>h%ds' % len(s), len(s), compat.bytes(s))
def _encode_message_header(cls, client_id, correlation_id, request_key): """ Encode the common request envelope """ return struct.pack('>hhih%ds' % len(client_id), request_key, # ApiKey 0, # ApiVersion correlation_id, # CorrelationId len(client_id), # ClientId size compat.bytes(client_id)) # ClientId
def test_produce_10k_simple(self): start_offset = self.current_offset(self.topic, 0) self.assert_produce_request( [ create_message(compat.bytes("Test message %d" % i)) for i in range(10000) ], start_offset, 10000, )
def test_produce_100k_gzipped(self): start_offset = self.current_offset(self.topic, 0) self.assert_produce_request( [ create_gzip_message([ compat.bytes("Gzipped batch 1, message %d" % i) for i in range(50000) ]) ], start_offset, 50000, ) self.assert_produce_request( [ create_gzip_message([ compat.bytes("Gzipped batch 1, message %d" % i) for i in range(50000) ]) ], start_offset + 50000, 50000, )
def test_decode_fetch_response(self): t1 = "topic1" t2 = "topic2" msgs = list( map(create_message, [b"message1", b"hi", b"boo", b"foo", b"so fun!"])) ms1 = KafkaProtocol._encode_message_set([msgs[0], msgs[1]]) ms2 = KafkaProtocol._encode_message_set([msgs[2]]) ms3 = KafkaProtocol._encode_message_set([msgs[3], msgs[4]]) encoded = struct.pack( '>iih%dsiihqi%dsihqi%dsh%dsiihqi%ds' % (len(t1), len(ms1), len(ms2), len(t2), len(ms3)), 4, 2, len(t1), compat.bytes(t1), 2, 0, 0, 10, len(ms1), ms1, 1, 1, 20, len(ms2), ms2, len(t2), compat.bytes(t2), 1, 0, 0, 30, len(ms3), ms3) responses = list(KafkaProtocol.decode_fetch_response(encoded)) def expand_messages(response): return FetchResponse(response.topic, response.partition, response.error, response.highwaterMark, list(response.messages)) expanded_responses = list(map(expand_messages, responses)) expect = [ FetchResponse( t1, 0, 0, 10, [OffsetAndMessage(0, msgs[0]), OffsetAndMessage(0, msgs[1])]), FetchResponse(t1, 1, 1, 20, [OffsetAndMessage(0, msgs[2])]), FetchResponse( t2, 0, 0, 30, [OffsetAndMessage(0, msgs[3]), OffsetAndMessage(0, msgs[4])]) ] self.assertEqual(expanded_responses, expect)
def test_decode_message_snappy(self): snappy_encoded = compat.bytes('\xec\x80\xa1\x95\x00\x02\xff\xff\xff\xff\x00\x00' '\x00,8\x00\x00\x19\x01@\x10L\x9f[\xc2\x00\x00\xff' '\xff\xff\xff\x00\x00\x00\x02v1\x19\x1bD\x00\x10\xd5' '\x96\nx\x00\x00\xff\xff\xff\xff\x00\x00\x00\x02v2') offset = 11 messages = list(KafkaProtocol._decode_message(snappy_encoded, offset)) self.assertEqual(len(messages), 2) msg1, msg2 = messages returned_offset1, decoded_message1 = msg1 self.assertEqual(returned_offset1, 0) self.assertEqual(decoded_message1, create_message(b"v1")) returned_offset2, decoded_message2 = msg2 self.assertEqual(returned_offset2, 0) self.assertEqual(decoded_message2, create_message(b"v2"))
def test_decode_message_gzip(self): gzip_encoded = compat.bytes('\xc0\x11\xb2\xf0\x00\x01\xff\xff\xff\xff\x00\x00\x000' '\x1f\x8b\x08\x00\xa1\xc1\xc5R\x02\xffc`\x80\x03\x01' '\x9f\xf9\xd1\x87\x18\x18\xfe\x03\x01\x90\xc7Tf\xc8' '\x80$wu\x1aW\x05\x92\x9c\x11\x00z\xc0h\x888\x00\x00' '\x00') offset = 11 messages = list(KafkaProtocol._decode_message(gzip_encoded, offset)) self.assertEqual(len(messages), 2) msg1, msg2 = messages returned_offset1, decoded_message1 = msg1 self.assertEqual(returned_offset1, 0) self.assertEqual(decoded_message1, create_message(b"v1")) returned_offset2, decoded_message2 = msg2 self.assertEqual(returned_offset2, 0) self.assertEqual(decoded_message2, create_message(b"v2"))
def test_decode_message_snappy(self): snappy_encoded = compat.bytes( '\xec\x80\xa1\x95\x00\x02\xff\xff\xff\xff\x00\x00' '\x00,8\x00\x00\x19\x01@\x10L\x9f[\xc2\x00\x00\xff' '\xff\xff\xff\x00\x00\x00\x02v1\x19\x1bD\x00\x10\xd5' '\x96\nx\x00\x00\xff\xff\xff\xff\x00\x00\x00\x02v2') offset = 11 messages = list(KafkaProtocol._decode_message(snappy_encoded, offset)) self.assertEqual(len(messages), 2) msg1, msg2 = messages returned_offset1, decoded_message1 = msg1 self.assertEqual(returned_offset1, 0) self.assertEqual(decoded_message1, create_message(b"v1")) returned_offset2, decoded_message2 = msg2 self.assertEqual(returned_offset2, 0) self.assertEqual(decoded_message2, create_message(b"v2"))
def test_decode_message_gzip(self): gzip_encoded = compat.bytes( '\xc0\x11\xb2\xf0\x00\x01\xff\xff\xff\xff\x00\x00\x000' '\x1f\x8b\x08\x00\xa1\xc1\xc5R\x02\xffc`\x80\x03\x01' '\x9f\xf9\xd1\x87\x18\x18\xfe\x03\x01\x90\xc7Tf\xc8' '\x80$wu\x1aW\x05\x92\x9c\x11\x00z\xc0h\x888\x00\x00' '\x00') offset = 11 messages = list(KafkaProtocol._decode_message(gzip_encoded, offset)) self.assertEqual(len(messages), 2) msg1, msg2 = messages returned_offset1, decoded_message1 = msg1 self.assertEqual(returned_offset1, 0) self.assertEqual(decoded_message1, create_message(b"v1")) returned_offset2, decoded_message2 = msg2 self.assertEqual(returned_offset2, 0) self.assertEqual(decoded_message2, create_message(b"v2"))
def encode_metadata_request(cls, client_id, correlation_id, topics=None): """ Encode a MetadataRequest Params ====== client_id: string correlation_id: int topics: list of strings """ topics = [] if topics is None else topics message = cls._encode_message_header(client_id, correlation_id, KafkaProtocol.METADATA_KEY) message += struct.pack('>i', len(topics)) for topic in topics: message += struct.pack('>h%ds' % len(topic), len(topic), compat.bytes(topic)) return write_int_string(message)
def encode_produce_request(cls, client_id, correlation_id, payloads=None, acks=1, timeout=1000): """ Encode some ProduceRequest structs Params ====== client_id: string correlation_id: int payloads: list of ProduceRequest acks: How "acky" you want the request to be 0: immediate response 1: written to disk by the leader 2+: waits for this many number of replicas to sync -1: waits for all replicas to be in sync timeout: Maximum time the server will wait for acks from replicas. This is _not_ a socket timeout """ payloads = [] if payloads is None else payloads grouped_payloads = group_by_topic_and_partition(payloads) message = cls._encode_message_header(client_id, correlation_id, KafkaProtocol.PRODUCE_KEY) message += struct.pack('>hii', acks, timeout, len(grouped_payloads)) for topic, topic_payloads in grouped_payloads.items(): message += struct.pack('>h%dsi' % len(topic), len(topic), compat.bytes(topic), len(topic_payloads)) for partition, payload in topic_payloads.items(): msg_set = KafkaProtocol._encode_message_set(payload.messages) message += struct.pack('>ii%ds' % len(msg_set), partition, len(msg_set), msg_set) return struct.pack('>i%ds' % len(message), len(message), message)
def msg(self, s): if s not in self._messages: self._messages[s] = '%s-%s-%s' % (s, self.id(), str(uuid.uuid4())) return compat.bytes(self._messages[s])
def test_snappy(self): for i in compat.xrange(1000): s1 = compat.bytes(random_string(100)) s2 = snappy_decode(snappy_encode(s1)) self.assertEquals(s1, s2)
def test_gzip(self): for i in compat.xrange(1000): s1 = compat.bytes(random_string(100)) s2 = gzip_decode(gzip_encode(s1)) self.assertEquals(s1, s2)
def _send_random_messages(self, producer, topic, n): for j in range(n): resp = producer.send_messages(topic, compat.bytes(random_string(10))) if len(resp) > 0: self.assertEquals(resp[0].error, 0) time.sleep(1) # give it some time