def _encode_message(cls, message): """ Encode a single message. The magic number of a message is a format version number. The only supported magic number right now is zero Format ====== Message => Crc MagicByte Attributes Key Value Crc => int32 MagicByte => int8 Attributes => int8 Key => bytes Value => bytes """ if message.magic == 0: msg = b''.join([ struct.pack('>BB', message.magic, message.attributes), write_int_string(message.key), write_int_string(message.value) ]) crc = crc32(msg) msg = struct.pack('>i%ds' % len(msg), crc, msg) else: raise ProtocolError("Unexpected magic number: %d" % message.magic) return msg
def validate_crc(self): if self._validated_crc is None: raw_msg = self._encode_self(recalc_crc=False) self._validated_crc = crc32(raw_msg[4:]) if self.crc == self._validated_crc: return True return False
def _decode_message(cls, data, offset): """ Decode a single Message The only caller of this method is decode_message_set_iter. They are decoupled to support nested messages (compressed MessageSets). The offset is actually read from decode_message_set_iter (it is part of the MessageSet payload). """ ((crc, magic, att), cur) = relative_unpack('>IBB', data, 0) if crc != crc32(data[4:]): raise ChecksumError("Message checksum failed") (key, cur) = read_int_string(data, cur) (value, cur) = read_int_string(data, cur) codec = att & ATTRIBUTE_CODEC_MASK if codec == CODEC_NONE: yield (offset, Message(magic, att, key, value)) elif codec == CODEC_GZIP: gz = gzip_decode(value) for (offset, msg) in KafkaProtocol._decode_message_set_iter(gz): yield (offset, msg) elif codec == CODEC_SNAPPY: snp = snappy_decode(value) for (offset, msg) in KafkaProtocol._decode_message_set_iter(snp): yield (offset, msg)
def _encode_self(self, recalc_crc=True): version = self.magic if version == 1: fields = (self.crc, self.magic, self.attributes, self.timestamp, self.key, self.value) elif version == 0: fields = (self.crc, self.magic, self.attributes, self.key, self.value) else: raise ValueError('Unrecognized message version: %s' % version) message = Message.SCHEMAS[version].encode(fields) if not recalc_crc: return message self.crc = crc32(message[4:]) crc_field = self.SCHEMAS[version].fields[0] return crc_field.encode(self.crc) + message[4:]
def _encode_self(self, recalc_crc=True): version = self.magic if version == 1: fields = (self.crc, self.magic, self.attributes, self.timestamp, self.key, self.value) elif version == 0: fields = (self.crc, self.magic, self.attributes, self.key, self.value) else: raise ValueError('Unrecognized message version: %s' % (version, )) message = Message.SCHEMAS[version].encode(fields) if not recalc_crc: return message self.crc = crc32(message[4:]) crc_field = self.SCHEMAS[version].fields[0] return crc_field.encode(self.crc) + message[4:]
def decode(cls, data): _validated_crc = None if isinstance(data, bytes): _validated_crc = crc32(data[4:]) data = io.BytesIO(data) # Partial decode required to determine message version base_fields = cls.SCHEMAS[0].fields[0:3] crc, magic, attributes = [field.decode(data) for field in base_fields] remaining = cls.SCHEMAS[magic].fields[3:] fields = [field.decode(data) for field in remaining] if magic == 1: timestamp = fields[0] else: timestamp = None msg = cls(fields[-1], key=fields[-2], magic=magic, attributes=attributes, crc=crc, timestamp=timestamp) msg._validated_crc = _validated_crc return msg