def drain_ready(self): """Compress batch to be ready for send""" memview = self._buffer.getbuffer() self._drain_waiter.set_result(None) if self._compression_type: _, compressor, attrs = self._COMPRESSORS[self._compression_type] msg = Message(compressor(memview[4:].tobytes()), attributes=attrs, magic=self._version_id) encoded = msg.encode() # if compressed message is longer than original # we should send it as is (not compressed) header_size = 16 # 4(all size) + 8(offset) + 4(compressed size) if len(encoded) + header_size < len(memview): # write compressed message set (with header) to buffer # using memory view (for avoid memory copying) memview[:4] = Int32.encode(len(encoded) + 12) memview[4:12] = Int64.encode(0) # offset 0 memview[12:16] = Int32.encode(len(encoded)) memview[16:16 + len(encoded)] = encoded self._buffer.seek(0) return # update batch size (first 4 bytes of buffer) memview[:4] = Int32.encode(self._buffer.tell() - 4) self._buffer.seek(0)
def _build(self): if self._closed: self._buffer.seek(0) return self._buffer self._closed = True memview = self._buffer.getbuffer() if self._compression_type: _, compressor, attrs = self._COMPRESSORS[self._compression_type] msg = Message(compressor(memview[4:].tobytes()), attributes=attrs, magic=self._magic) encoded = msg.encode() # if compressed message is longer than original # we should send it as is (not compressed) header_size = 16 # 4(all size) + 8(offset) + 4(compressed size) if len(encoded) + header_size < len(memview): # write compressed message set (with header) to buffer # using memory view (for avoid memory copying) memview[:4] = Int32.encode(len(encoded) + 12) memview[4:12] = Int64.encode(0) # offset 0 memview[12:16] = Int32.encode(len(encoded)) memview[16:16 + len(encoded)] = encoded memview.release() self._buffer.seek(16 + len(encoded)) self._buffer.truncate() self._buffer.seek(0) return self._buffer # update batch size (first 4 bytes of buffer) memview[:4] = Int32.encode(self._buffer.tell() - 4) self._buffer.seek(0) return self._buffer
def test_decode_fetch_response_partial(): encoded = b''.join([ Int32.encode(1), # Num Topics (Array) String('utf-8').encode('foobar'), Int32.encode(2), # Num Partitions (Array) Int32.encode(0), # Partition id Int16.encode(0), # Error Code Int64.encode(1234), # Highwater offset Int32.encode(52), # MessageSet size Int64.encode(0), # Msg Offset Int32.encode(18), # Msg Size struct.pack('>i', 1474775406), # CRC struct.pack('>bb', 0, 0), # Magic, flags struct.pack('>i', 2), # Length of key b'k1', # Key struct.pack('>i', 2), # Length of value b'v1', # Value Int64.encode(1), # Msg Offset struct.pack('>i', 24), # Msg Size (larger than remaining MsgSet size) struct.pack('>i', -16383415), # CRC struct.pack('>bb', 0, 0), # Magic, flags struct.pack('>i', 2), # Length of key b'k2', # Key struct.pack('>i', 8), # Length of value b'ar', # Value (truncated) Int32.encode(1), Int16.encode(0), Int64.encode(2345), Int32.encode(52), # MessageSet size Int64.encode(0), # Msg Offset Int32.encode(18), # Msg Size struct.pack('>i', 1474775406), # CRC struct.pack('>bb', 0, 0), # Magic, flags struct.pack('>i', 2), # Length of key b'k1', # Key struct.pack('>i', 2), # Length of value b'v1', # Value Int64.encode(1), # Msg Offset struct.pack('>i', 24), # Msg Size (larger than remaining MsgSet size) struct.pack('>i', -16383415), # CRC struct.pack('>bb', 0, 0), # Magic, flags struct.pack('>i', 2), # Length of key b'k2', # Key struct.pack('>i', 8), # Length of value b'ar', # Value (truncated) ]) resp = FetchResponse[0].decode(io.BytesIO(encoded)) assert len(resp.topics) == 1 topic, partitions = resp.topics[0] assert topic == 'foobar' assert len(partitions) == 2 m1 = MessageSet.decode( partitions[0][3], bytes_to_read=len(partitions[0][3])) assert len(m1) == 2 assert m1[1] == (None, None, PartialMessage())
def _try_authenticate_plain(self, future): if self.config['security_protocol'] == 'SASL_PLAINTEXT': log.warning('%s: Sending username and password in the clear', self) data = b'' # Send PLAIN credentials per RFC-4616 msg = bytes('\0'.join([self.config['sasl_plain_username'], self.config['sasl_plain_username'], self.config['sasl_plain_password']]).encode('utf-8')) size = Int32.encode(len(msg)) try: self._send_bytes_blocking(size + msg) # The server will send a zero sized message (that is Int32(0)) on success. # The connection is closed on failure self._recv_bytes_blocking(4) except ConnectionError as e: log.exception("%s: Error receiving reply from server", self) error = Errors.ConnectionError("%s: %s" % (self, e)) self.close(error=error) return future.failure(error) if data != b'\x00\x00\x00\x00': error = Errors.AuthenticationFailedError('Unrecognized response during authentication') return future.failure(error) log.info('%s: Authenticated as %s via PLAIN', self, self.config['sasl_plain_username']) return future.success(True)
def test__unpack_message_set_compressed_v1(fetcher): fetcher.config['check_crcs'] = False tp = TopicPartition('foo', 0) messages = [ (0, None, Message(b'a')), (1, None, Message(b'b')), (2, None, Message(b'c')), ] message_bytes = [] for offset, _, m in messages: encoded = m.encode() message_bytes.append( Int64.encode(offset) + Int32.encode(len(encoded)) + encoded) compressed_bytes = gzip_encode(b''.join(message_bytes)) compressed_base_offset = 10 compressed_msgs = [(compressed_base_offset, None, Message(compressed_bytes, magic=1, attributes=Message.CODEC_GZIP))] records = list(fetcher._unpack_message_set(tp, compressed_msgs)) assert len(records) == 3 assert all(map(lambda x: isinstance(x, ConsumerRecord), records)) assert records[0].value == b'a' assert records[1].value == b'b' assert records[2].value == b'c' assert records[0].offset == 8 assert records[1].offset == 9 assert records[2].offset == 10
def send_request(self, request, correlation_id=None): """Encode and queue a kafka api request for sending. Arguments: request (object): An un-encoded kafka request. correlation_id (int, optional): Optionally specify an ID to correlate requests with responses. If not provided, an ID will be generated automatically. Returns: correlation_id """ log.debug("Sending request %s", request) if correlation_id is None: correlation_id = self._next_correlation_id() header = RequestHeader(request, correlation_id=correlation_id, client_id=self._client_id) message = b"".join([header.encode(), request.encode()]) size = Int32.encode(len(message)) data = size + message self.bytes_to_send.append(data) if request.expect_response(): ifr = (correlation_id, request) self.in_flight_requests.append(ifr) return correlation_id
def append(self, key, value, timestamp_ms): """Append message (key and value) to batch Returns: None if batch is full or asyncio.Future that will resolved when message is delivered """ if self._is_full(key, value): return None # `.encode()` is a weak method for some reason, so we need to save # reference before calling it. if self._version_id == 0: msg_inst = Message(value, key=key, magic=self._version_id) else: msg_inst = Message(value, key=key, magic=self._version_id, timestamp=timestamp_ms) encoded = msg_inst.encode() msg = Int64.encode(self._relative_offset) + Int32.encode(len(encoded)) msg += encoded self._buffer.write(msg) future = asyncio.Future(loop=self._loop) self._msg_futures.append(future) self._relative_offset += 1 return future
def send_request(self, request, correlation_id=None): """Encode and queue a kafka api request for sending. Arguments: request (object): An un-encoded kafka request. correlation_id (int, optional): Optionally specify an ID to correlate requests with responses. If not provided, an ID will be generated automatically. Returns: correlation_id """ log.debug('Sending request %s', request) if correlation_id is None: correlation_id = self._next_correlation_id() header = RequestHeader(request, correlation_id=correlation_id, client_id=self._client_id) message = b''.join([header.encode(), request.encode()]) size = Int32.encode(len(message)) data = size + message self.bytes_to_send.append(data) if request.expect_response(): ifr = (correlation_id, request) self.in_flight_requests.append(ifr) return correlation_id
def __init__(self, tp, batch_size, compression_type, ttl, api_version, loop): # Kafka 0.8/0.9 had a quirky lz4... version_id = 0 if api_version < (0, 10) else 1 if compression_type == 'lz4' and version_id == 0: compression_type = 'lz4-old-kafka' if compression_type: checker, _, _ = self._COMPRESSORS[compression_type] assert checker(), 'Compression Libraries Not Found' self._tp = tp self._batch_size = batch_size self._compression_type = compression_type self._buffer = io.BytesIO() self._buffer.write(Int32.encode(0)) # first 4 bytes for batch size self._relative_offset = 0 self._loop = loop self._ttl = ttl self._ctime = loop.time() self._version_id = version_id # Waiters # Set when messages are delivered to Kafka based on ACK setting self._msg_futures = [] # Set when sender takes this batch self._drain_waiter = asyncio.Future(loop=loop)
def test_decode_fetch_response_partial(): encoded = b''.join([ Int32.encode(1), # Num Topics (Array) String('utf-8').encode('foobar'), Int32.encode(2), # Num Partitions (Array) Int32.encode(0), # Partition id Int16.encode(0), # Error Code Int64.encode(1234), # Highwater offset Int32.encode(52), # MessageSet size Int64.encode(0), # Msg Offset Int32.encode(18), # Msg Size struct.pack('>i', 1474775406), # CRC struct.pack('>bb', 0, 0), # Magic, flags struct.pack('>i', 2), # Length of key b'k1', # Key struct.pack('>i', 2), # Length of value b'v1', # Value Int64.encode(1), # Msg Offset struct.pack('>i', 24), # Msg Size (larger than remaining MsgSet size) struct.pack('>i', -16383415), # CRC struct.pack('>bb', 0, 0), # Magic, flags struct.pack('>i', 2), # Length of key b'k2', # Key struct.pack('>i', 8), # Length of value b'ar', # Value (truncated) Int32.encode(1), Int16.encode(0), Int64.encode(2345), Int32.encode(52), # MessageSet size Int64.encode(0), # Msg Offset Int32.encode(18), # Msg Size struct.pack('>i', 1474775406), # CRC struct.pack('>bb', 0, 0), # Magic, flags struct.pack('>i', 2), # Length of key b'k1', # Key struct.pack('>i', 2), # Length of value b'v1', # Value Int64.encode(1), # Msg Offset struct.pack('>i', 24), # Msg Size (larger than remaining MsgSet size) struct.pack('>i', -16383415), # CRC struct.pack('>bb', 0, 0), # Magic, flags struct.pack('>i', 2), # Length of key b'k2', # Key struct.pack('>i', 8), # Length of value b'ar', # Value (truncated) ]) resp = FetchResponse[0].decode(io.BytesIO(encoded)) assert len(resp.topics) == 1 topic, partitions = resp.topics[0] assert topic == 'foobar' assert len(partitions) == 2 m1 = partitions[0][3] assert len(m1) == 2 assert m1[1] == (None, None, PartialMessage())
def server_thread(): with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as ss: ss.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) ss.bind(('127.0.0.1', PORT)) ss.listen(1) event.set() (cs, _) = ss.accept() with cs: size = Int32.decode(BytesIO(cs.recv(4, socket.MSG_WAITALL))) id = Int32.decode(BytesIO(cs.recv(4, socket.MSG_WAITALL))) cs.recv(size - 4, socket.MSG_WAITALL) if need_response: message = b''.join([Int32.encode(id), response.encode()]) cs.send(Int32.encode(len(message)) + message) event.set()
def __init__(self, magic, batch_size, compression_type): if compression_type == 'lz4' and magic == 0: compression_type = 'lz4-old-kafka' if compression_type: checker, _, _ = self._COMPRESSORS[compression_type] assert checker(), 'Compression Libraries Not Found' self._magic = magic self._batch_size = batch_size self._compression_type = compression_type self._relative_offset = 0 self._buffer = io.BytesIO() self._buffer.write(Int32.encode(0)) # first 4 bytes for batch size self._closed = False
def drain_ready(self): """Compress batch to be ready for send""" memview = self._buffer.getbuffer() self._drain_waiter.set_result(None) if self._compression_type: _, compressor, attrs = self._COMPRESSORS[self._compression_type] msg = Message(compressor(memview[4:].tobytes()), attributes=attrs) encoded = msg.encode() # if compressed message is longer than original # we should send it as is (not compressed) header_size = 16 # 4(all size) + 8(offset) + 4(compressed size) if len(encoded) + header_size < len(memview): # write compressed message set (with header) to buffer # using memory view (for avoid memory copying) memview[:4] = Int32.encode(len(encoded) + 12) memview[4:12] = Int64.encode(0) # offset 0 memview[12:16] = Int32.encode(len(encoded)) memview[16:16+len(encoded)] = encoded self._buffer.seek(0) return # update batch size (first 4 bytes of buffer) memview[:4] = Int32.encode(self._buffer.tell()-4) self._buffer.seek(0)
def send(self, request, expect_response=True): """send request, return Future() Can block on network if request is larger than send_buffer_bytes """ future = Future() if self.connecting(): return future.failure(Errors.NodeNotReadyError(str(self))) elif not self.connected(): return future.failure(Errors.ConnectionError(str(self))) elif not self.can_send_more(): return future.failure(Errors.TooManyInFlightRequests(str(self))) correlation_id = self._next_correlation_id() header = RequestHeader(request, correlation_id=correlation_id, client_id=self.config['client_id']) message = b''.join([header.encode(), request.encode()]) size = Int32.encode(len(message)) try: # In the future we might manage an internal write buffer # and send bytes asynchronously. For now, just block # sending each request payload self._sock.setblocking(True) for data in (size, message): total_sent = 0 while total_sent < len(data): sent_bytes = self._sock.send(data[total_sent:]) total_sent += sent_bytes assert total_sent == len(data) self._sock.setblocking(False) except (AssertionError, ConnectionError) as e: log.exception("Error sending %s to %s", request, self) error = Errors.ConnectionError("%s: %s" % (str(self), e)) self.close(error=error) return future.failure(error) log.debug('%s Request %d: %s', self, correlation_id, request) if expect_response: ifr = InFlightRequest(request=request, correlation_id=correlation_id, response_type=request.RESPONSE_TYPE, future=future, timestamp=time.time()) self.in_flight_requests.append(ifr) else: future.success(None) return future
def close(self): """Close the batch to further updates. Closing the batch before submitting to the producer ensures that no messages are added via the ``producer.send()`` interface. To gracefully support both the batch and individual message interfaces, leave the batch open. For complete control over the batch's contents, close before submission. Closing a batch has no effect on when it's sent to the broker. A batch may not be reopened after it's closed. """ if self._closed: return self._closed = True data = self._builder.build() self._buffer = io.BytesIO(Int32.encode(len(data)) + data) del self._builder
def _try_authenticate_gssapi(self, future): gssapi_name = gssapi.Name( self.config['sasl_kerberos_service_name'] + '@' + self.hostname, name_type=gssapi.NameType.hostbased_service ).canonicalize(gssapi.MechType.kerberos) log.debug('%s: GSSAPI name: %s', self, gssapi_name) # Exchange tokens until authentication either succeeds or fails client_ctx = gssapi.SecurityContext(name=gssapi_name, usage='initiate') received_token = None try: while not client_ctx.complete: # calculate an output token from kafka token (or None if first iteration) output_token = client_ctx.step(received_token) if output_token is None: continue # pass output token to kafka try: msg = output_token size = Int32.encode(len(msg)) self._send_bytes_blocking(size + msg) # The server will send a token back. Processing of this token either # establishes a security context, or it needs further token exchange. # The gssapi will be able to identify the needed next step. # The connection is closed on failure. header = self._recv_bytes_blocking(4) (token_size,) = struct.unpack('>i', header) received_token = self._recv_bytes_blocking(token_size) except ConnectionError as e: log.exception("%s: Error receiving reply from server", self) error = Errors.ConnectionError("%s: %s" % (self, e)) self.close(error=error) return future.failure(error) except Exception as e: return future.failure(e) log.info('%s: Authenticated as %s via GSSAPI', self, gssapi_name) return future.success(True)
def _send(self, request): assert self.state in (ConnectionStates.AUTHENTICATING, ConnectionStates.CONNECTED) future = Future() correlation_id = self._next_correlation_id() header = RequestHeader(request, correlation_id=correlation_id, client_id=self.config['client_id']) message = b''.join([header.encode(), request.encode()]) size = Int32.encode(len(message)) data = size + message try: # In the future we might manage an internal write buffer # and send bytes asynchronously. For now, just block # sending each request payload self._sock.setblocking(True) total_sent = 0 while total_sent < len(data): sent_bytes = self._sock.send(data[total_sent:]) total_sent += sent_bytes assert total_sent == len(data) if self._sensors: self._sensors.bytes_sent.record(total_sent) self._sock.setblocking(False) except (AssertionError, ConnectionError) as e: log.exception("Error sending %s to %s", request, self) error = Errors.ConnectionError("%s: %s" % (self, e)) self.close(error=error) return future.failure(error) log.debug('%s Request %d: %s', self, correlation_id, request) if request.expect_response(): ifr = InFlightRequest(request=request, correlation_id=correlation_id, response_type=request.RESPONSE_TYPE, future=future, timestamp=time.time()) self.in_flight_requests.append(ifr) else: future.success(None) return future
def __init__(self, tp, batch_size, compression_type, ttl, loop): if compression_type: checker, _, _ = self._COMPRESSORS[compression_type] assert checker(), 'Compression Libraries Not Found' self._tp = tp self._batch_size = batch_size self._compression_type = compression_type self._buffer = io.BytesIO() self._buffer.write(Int32.encode(0)) # first 4 bytes for batch size self._relative_offset = 0 self._loop = loop self._ttl = ttl self._ctime = loop.time() # Waiters # Set when messages are delivered to Kafka based on ACK setting self._msg_futures = [] # Set when sender takes this batch self._drain_waiter = asyncio.Future(loop=loop)
def append(self, key, value): """Append message (key and value) to batch Returns: None if batch is full or asyncio.Future that will resolved when message is delivered """ if self._is_full(key, value): return None encoded = Message(value, key=key).encode() msg = Int64.encode(self._relative_offset) + Int32.encode(len(encoded)) msg += encoded self._buffer.write(msg) future = asyncio.Future(loop=self._loop) self._msg_futures.append(future) self._relative_offset += 1 return future
def append(self, *, timestamp, key, value): if not self._has_room_for(key, value): return 0 # `.encode()` is a weak method for some reason, so we need to save # reference before calling it. if self._magic == 0: msg_inst = Message(value, key=key, magic=self._magic) else: msg_inst = Message(value, key=key, magic=self._magic, timestamp=timestamp) encoded = msg_inst.encode() msg = Int64.encode(self._relative_offset) + Int32.encode(len(encoded)) msg += encoded actual_size = self._buffer.write(msg) self._relative_offset += 1 return actual_size
def _try_authenticate_plain(self, future): if self.config['security_protocol'] == 'SASL_PLAINTEXT': log.warning('%s: Sending username and password in the clear', str(self)) data = b'' try: self._sock.setblocking(True) # Send PLAIN credentials per RFC-4616 msg = bytes('\0'.join([ self.config['sasl_plain_username'], self.config['sasl_plain_username'], self.config['sasl_plain_password'] ]).encode('utf-8')) size = Int32.encode(len(msg)) self._sock.sendall(size + msg) # The server will send a zero sized message (that is Int32(0)) on success. # The connection is closed on failure while len(data) < 4: fragment = self._sock.recv(4 - len(data)) if not fragment: log.error('%s: Authentication failed for user %s', self, self.config['sasl_plain_username']) error = Errors.AuthenticationFailedError( 'Authentication failed for user {0}'.format( self.config['sasl_plain_username'])) future.failure(error) raise error data += fragment self._sock.setblocking(False) except (AssertionError, ConnectionError) as e: log.exception("%s: Error receiving reply from server", self) error = Errors.ConnectionError("%s: %s" % (str(self), e)) future.failure(error) self.close(error=error) if data != b'\x00\x00\x00\x00': return future.failure(Errors.AuthenticationFailedError()) return future.success(True)
def _send(self, request, expect_response=True): future = Future() correlation_id = self._next_correlation_id() header = RequestHeader(request, correlation_id=correlation_id, client_id=self.config['client_id']) message = b''.join([header.encode(), request.encode()]) size = Int32.encode(len(message)) data = size + message try: # In the future we might manage an internal write buffer # and send bytes asynchronously. For now, just block # sending each request payload self._sock.setblocking(True) total_sent = 0 while total_sent < len(data): sent_bytes = self._sock.send(data[total_sent:]) total_sent += sent_bytes assert total_sent == len(data) if self._sensors: self._sensors.bytes_sent.record(total_sent) self._sock.setblocking(False) except (AssertionError, ConnectionError) as e: log.exception("Error sending %s to %s", request, self) error = Errors.ConnectionError("%s: %s" % (str(self), e)) self.close(error=error) return future.failure(error) log.debug('%s Request %d: %s', self, correlation_id, request) if expect_response: ifr = InFlightRequest(request=request, correlation_id=correlation_id, response_type=request.RESPONSE_TYPE, future=future, timestamp=time.time()) self.in_flight_requests.append(ifr) else: future.success(None) return future
def send(request, wait_response=True): global correlation_id with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: s.connect(('127.0.0.1', PORT)) header = RequestHeader( request=request, correlation_id=correlation_id, client_id='test' ) correlation_id += 1 message = b''.join([header.encode(), request.encode()]) size = Int32.encode(len(message)) s.send(size) s.send(message) if wait_response: size = s.recv(4, socket.MSG_WAITALL) size = Int32.decode(BytesIO(size)) s.recv(size, socket.MSG_WAITALL)
def _try_authenticate_plain(self, future): if self.config['security_protocol'] == 'SASL_PLAINTEXT': log.warning('%s: Sending username and password in the clear', str(self)) data = b'' try: self._sock.setblocking(True) # Send PLAIN credentials per RFC-4616 msg = bytes('\0'.join([self.config['sasl_plain_username'], self.config['sasl_plain_username'], self.config['sasl_plain_password']]).encode('utf-8')) size = Int32.encode(len(msg)) self._sock.sendall(size + msg) # The server will send a zero sized message (that is Int32(0)) on success. # The connection is closed on failure while len(data) < 4: fragment = self._sock.recv(4 - len(data)) if not fragment: log.error('%s: Authentication failed for user %s', self, self.config['sasl_plain_username']) error = Errors.AuthenticationFailedError( 'Authentication failed for user {0}'.format( self.config['sasl_plain_username'])) future.failure(error) raise error data += fragment self._sock.setblocking(False) except (AssertionError, ConnectionError) as e: log.exception("%s: Error receiving reply from server", self) error = Errors.ConnectionError("%s: %s" % (str(self), e)) future.failure(error) self.close(error=error) if data != b'\x00\x00\x00\x00': return future.failure(Errors.AuthenticationFailedError()) return future.success(True)