示例#1
0
    def output(self, dataframes: [DataFrame]):
        """
        Emits the transformed data from the filter to a log

        :param dataframes: The dataframes to log
        """
        syslog_reader = DatumReader(SYSLOG_AVRO_SCHEMA, SYSLOG_AVRO_SCHEMA)

        for dataframe in dataframes:
            for row in dataframe.collect():
                buffer = BytesIO()
                buffer.write(row.value)
                buffer.flush()
                try:
                    buffer.seek(0)
                    decoder = BinaryDecoder(buffer)
                    output = syslog_reader.read(decoder)
                    self.log(INFO, f"Received AVRO syslog -> { output }")
                    #self.log(INFO, f"Received AVRO syslog -> { row.value }")
                except BaseException as e:
                    try:
                        buffer.seek(0)
                        decoder = BinaryDecoder(buffer)
                        output = syslog_reader.read(decoder)
                        self.log(INFO,
                                 f"ERROR Received AVRO feedback -> {output}")
                    except BaseException as e:
                        self.log(ERROR, f"{e}")
示例#2
0
def test_sanity():
  """

  Ensures that our "base" and "good" schemas are actually forwards- and
  backwards-compatible

  """
  # fst schema / record
  fst_schema = schema.parse(open("%s/MyRecord.base.avsc" % BASE_DIR).read())
  fst_writer = DatumWriter(writers_schema=fst_schema)
  fst_record = {
      "fieldWithoutDefaultValue": 0,
      "properField": 0,
      "enumField": "A",
      "unionField": None,
      "arrayField": ["world"],
      "mapField": {"hello": "world"},
      "fixedField": "aaaaaaaaaaaaaaaa"
  }

  # sec schema / record
  sec_schema = schema.parse(open("%s/MyRecord.good.avsc" % BASE_DIR).read())
  sec_writer = DatumWriter(writers_schema=sec_schema)
  sec_record = {
      "fieldWithoutDefaultValue": 0,
      "properField2": 0,
      "enumField": "B",
      "unionField": None,
      "arrayField": ["world"],
      "fixedField": "bbbbbbbbbbbbbbbb"
  }

  # Encode record w/ fst
  fst_buf = StringIO.StringIO()
  fst_encoder = BinaryEncoder(fst_buf)
  fst_writer.write(fst_record, fst_encoder)
  fst_data = fst_buf.getvalue()

  # Encode record w/ sec
  sec_buf = StringIO.StringIO()
  sec_encoder = BinaryEncoder(sec_buf)
  sec_writer.write(sec_record, sec_encoder)
  sec_data = sec_buf.getvalue()

  # writers == fst, readers == sec
  sec_reader = DatumReader(writers_schema=fst_schema, readers_schema=sec_schema)
  sec_decoder = BinaryDecoder(StringIO.StringIO(fst_data))
  sec_from_fst = sec_reader.read(sec_decoder) # no exception -> good

  # writers == sec, readers == fst
  fst_reader = DatumReader(writers_schema=sec_schema, readers_schema=fst_schema)
  fst_decoder = BinaryDecoder(StringIO.StringIO(sec_data))
  fst_from_sec = fst_reader.read(fst_decoder) # no exception -> good
    def deserialize(
            self,
            data,  # type: Union[bytes, BinaryIO]
            schema,  # type:  Union[str, bytes, avro.schema.Schema]
    ):
        # type: (Union[bytes, BinaryIO], Union[str, bytes, avro.schema.Schema]) -> ObjectType
        """Read the binary representation into a specific type.
        Return type will be ignored, since the schema is deduced from the provided bytes.
        :param data: A stream of bytes or bytes directly
        :type data: BinaryIO or bytes
        :param schema: An Avro RecordSchema
        :type schema: str
        :returns: An instantiated object
        :rtype: ObjectType
        """
        if not hasattr(data, 'read'):
            data = BytesIO(data)

        reader = self.get_schema_reader(schema)

        with data:
            bin_decoder = BinaryDecoder(data)
            decoded_data = reader.read(bin_decoder)

        return decoded_data
示例#4
0
 def _decode_avro(raw_bytes):
     schema = SchemaFromJSONData(avro_api_schema)
     buffer = io.BytesIO(raw_bytes)
     decoder = BinaryDecoder(buffer)
     reader = DatumReader(schema)
     content = reader.read(decoder)
     return content
    def decode(
            self,
            content,  # type: Union[bytes, BinaryIO]
            reader,  # type: DatumReader
    ) -> ObjectType:
        """Read the binary representation into a specific type.
        Return type will be ignored, since the schema is deduced from the provided bytes.
        :param content: A stream of bytes or bytes directly
        :type content: BinaryIO or bytes
        :param schema: An Avro RecordSchema
        :type schema: str
        :keyword readers_schema: An optional reader's schema as defined by the Apache Avro specification.
        :paramtype readers_schema: str or None
        :returns: An instantiated object
        :rtype: ObjectType
        """
        if not hasattr(content, 'read'):
            content = cast(bytes, content)
            content = BytesIO(content)

        with content:  # type: ignore
            bin_decoder = BinaryDecoder(content)
            decoded_content = reader.read(bin_decoder)

        return decoded_content
示例#6
0
    def decode(
        self,
        data,  # type: Union[bytes, BinaryIO]
        schema,  # type:  Union[str, bytes, avro.schema.Schema]
        *,
        readers_schema=None,  # type:  Optional[Union[str, bytes, avro.schema.Schema]]
    ) -> ObjectType:
        """Read the binary representation into a specific type.
        Return type will be ignored, since the schema is deduced from the provided bytes.
        :param data: A stream of bytes or bytes directly
        :type data: BinaryIO or bytes
        :param schema: An Avro RecordSchema
        :type schema: str
        :keyword readers_schema: An optional reader's schema as defined by the Apache Avro specification.
        :paramtype readers_schema: str or None
        :returns: An instantiated object
        :rtype: ObjectType
        """
        if not hasattr(data, 'read'):
            data = BytesIO(data)

        reader = self.get_schema_reader(schema, readers_schema)

        with data:
            bin_decoder = BinaryDecoder(data)
            decoded_data = reader.read(bin_decoder)

        return decoded_data
示例#7
0
    def consume(self):
        for message in self.kfkcon:
            try:
                if self.ser_type == kfkcfg.SERIALIZATIO_JSON:
                    message = json.loads(message.value.decode('utf-8'))

                elif self.ser_type == kfkcfg.SERIALIZATIO_AVRO:
                    bytes_reader = io.BytesIO(message.value)
                    decoder = BinaryDecoder(bytes_reader)
                    reader = DatumReader(self.avro_schema)
                    try:
                        message = reader.read(decoder)
                        print(message)
                    except Exception as e:
                        print(e)
                        pass

                if self.print_msg:
                    parser_logger.info(
                        'Message to consume: {}  -- serialization: {}'.format(
                            message, self.ser_type))
                    # print('Message to consume: {}'.format(message))

                yield message
            except Exception as e:
                parser_logger.info(
                    'unable to parse the msg!: {}...error: {}'.format(
                        message, e))
示例#8
0
 def decode(schema, msg_value):
     reader = DatumReader(schema)
     message_bytes = io.BytesIO(msg_value)
     message_bytes.seek(5)
     decoder = BinaryDecoder(message_bytes)
     event_dict = reader.read(decoder)
     return event_dict
def verify_kafka_avro_messages(record, key_capture_mode, key_capture_field,
                               key_capture_attribute, key_schema, message_keys,
                               message_values):

    for key, value in message_values.items():
        assert record.get_field_data(f'/{key}').value == value

    # validate message key fields/attribute based on configuration
    if (key_capture_mode in ['RECORD_FIELD', 'RECORD_HEADER_AND_FIELD']):
        # the message key should have been captured into the configured field
        for key, value in message_keys.items():
            assert record.get_field_data(
                f"{key_capture_field}/{key}").value == value

    if (key_capture_mode in ['RECORD_HEADER', 'RECORD_HEADER_AND_FIELD']):
        # get the base64 encoded Avro message key
        encoded = record.header['values'][key_capture_attribute]
        # decode into bytes
        key_bytes = base64.standard_b64decode(encoded)
        # create an Avro binary decoder based on those bytes
        decoder = BinaryDecoder(io.BytesIO(key_bytes))
        # parse the key schema out of the record header
        decoded_key_schema = confluent_kafka.avro.loads(
            record.header['values']['avroKeySchema'])
        # ensure the parsed key schema matches the one we actually produced, earlier
        assert decoded_key_schema == key_schema
        # create a DatumReader to read a full Avro record (the key)
        reader = DatumReader(decoded_key_schema)
        decoded_avro_key = reader.read(decoder)
        # assert the values from the Avro record match what's expected
        for key, value in message_keys.items():
            assert decoded_avro_key[key] == value
示例#10
0
    def respond(self, call_request):
        buffer_reader = io.BytesIO(call_request)
        buffer_decoder = BinaryDecoder(buffer_reader)
        buffer_writer = io.BytesIO()
        buffer_encoder = BinaryEncoder(buffer_writer)
        error = None
        response_metadata = {}
        try:
            remote_protocol = self.process_handshake(buffer_decoder,
                                                     buffer_encoder)
            if remote_protocol is None or self.local_protocol is None:
                return buffer_writer.getvalue()

            DatumReader(schema.parse(
                '{"type": "map", "values": "bytes"}')).read(buffer_decoder)
            remote_message_name = buffer_decoder.read_utf8()

            remote_message = remote_protocol.messages.get(remote_message_name)
            if remote_message is None:
                fail_msg = 'Unknown remote message: %s' % remote_message_name
                raise schema.AvroException(fail_msg)
            local_message = self.local_protocol.messages.get(
                remote_message_name)
            if local_message is None:
                fail_msg = 'Unknown local message: %s' % remote_message_name
                raise schema.AvroException(fail_msg)
            writers_schema = remote_message.request
            readers_schema = local_message.request
            request = self.read_request(writers_schema, readers_schema,
                                        buffer_decoder)

            response = None
            try:
                response = self.invoke(self.local_protocol, local_message,
                                       request)
            except AvroRemoteException as e:
                error = e
            except Exception as e:
                error = AvroRemoteException(str(e))

            DatumWriter(
                schema.parse('{"type": "map", "values": "bytes"}')).write(
                    response_metadata, buffer_encoder)
            buffer_encoder.write_boolean(error is not None)
            if error is None:
                writers_schema = local_message.response
                self.write_response(writers_schema, response, buffer_encoder)
            else:
                writers_schema = local_message.errors
                self.write_error(writers_schema, error, buffer_encoder)
        except schema.AvroException as e:
            error = AvroRemoteException(str(e))
            buffer_encoder = BinaryEncoder(io.BytesIO())
            DatumWriter(
                schema.parse('{"type": "map", "values": "bytes"}')).write(
                    response_metadata, buffer_encoder)
            buffer_encoder.write_boolean(True)
            self.write_error(schema.parse('["string"]'), error, buffer_encoder)
            return buffer_encoder.writer.getvalue()
        return buffer_writer.getvalue()
def avro_deserialization(value_schema,x):
    datum_reader = DatumReader(writer_schema=value_schema,
                            reader_schema=value_schema)
    bytes_io = BytesIO(x)
    decoder = BinaryDecoder(bytes_io)
    deserialized_x = datum_reader.read(decoder)
    bytes_io.close()
    return deserialized_x
示例#12
0
 def avro_decode(cls, binary_data, schema=None):
     """avro 反序列化二进制数据为json
     :param binary_data:
     :param schema:
     :return:
     """
     bio = BytesIO(binary_data)
     binary_decoder = BinaryDecoder(bio)
     return DatumReader(schema or cls.REQUEST_SCHEMA).read(binary_decoder)
示例#13
0
def binToObj(ab):
    bytes_reader = io.BytesIO(ab)
    decoder = BinaryDecoder(bytes_reader)
    reader = DatumReader(sc)
    while True:
        try:
            rec = reader.read(decoder)
            print(rec)
        except:
            break
def deserialize(flight_info_bytes):
    if flight_info_bytes is not None:
        bytes_reader = BytesIO(flight_info_bytes)
        decoder = BinaryDecoder(bytes_reader)
        schema_flight_info = Parse(
            open(dir_path + "/flight-info.schema.avsc", "rb").read())
        reader = DatumReader(schema_flight_info)
        flight_info = reader.read(decoder)
        return [{"id": "1"}, {"id": "2"}]
    else:
        return None
示例#15
0
    def unpack(self, payload):
        magic, schema_id = struct.unpack('>bi', payload[:5])

        if magic == MAGIC_BYTES:
            schema = self.RegistryClient.get_by_id(schema_id)
            reader = DatumReader(schema)
            output = BinaryDecoder(io.BytesIO(payload[5:]))
            abc = reader.read(output)
            return abc
        else:
            return payload.decode()
示例#16
0
def deserialize_avro(binary_data, schema):
    """
    Function used to deserialize an avro binary data
    :param schema: avro schema of binary data
    :param binary_data: event data in binary encoded (bytes)
    :return: deserialized data and corresponding schema
    """
    bytes_reader = io.BytesIO(binary_data)
    decoder = BinaryDecoder(bytes_reader)
    reader = DatumReader(schema)
    data = reader.read(decoder)
    return data, schema
示例#17
0
def read_value(schema: TypedSchema, bio: io.BytesIO):
    if schema.schema_type is SchemaType.AVRO:
        reader = DatumReader(schema.schema)
        return reader.read(BinaryDecoder(bio))
    if schema.schema_type is SchemaType.JSONSCHEMA:
        value = load(bio)
        try:
            schema.schema.validate(value)
        except ValidationError as e:
            raise InvalidPayload from e
        return value
    raise ValueError("Unknown schema type")
示例#18
0
def deserialize(flight_info_bytes):
    if flight_info_bytes is not None:
        bytes_reader = BytesIO(flight_info_bytes)
        decoder = BinaryDecoder(bytes_reader)
        schema_flight_info = Parse(
            open(dir_path + "/flight-info.schema.avsc", "rb").read())
        reader = DatumReader(schema_flight_info)
        flight_info = reader.read(decoder)

        return json.dumps([{"id": 907955534287978496}])
    else:
        return None
示例#19
0
    def decode_avro(self, payload):
        magic, schema_id = struct.unpack('>bi', payload[:5])

        # Get Schema registry
        # Avro value format
        if magic == MAGIC_BYTES:
            schema = self.register_client.get_by_id(schema_id)
            reader = DatumReader(schema)
            output = BinaryDecoder(io.BytesIO(payload[5:]))
            decoded = reader.read(output)
            return decoded, schema.name
        # no magic bytes, something is wrong
        else:
            raise ValueError
示例#20
0
def parse_avro_msg(msg, avro_schema):
    """
    Parses an avro record using a specified avro schema

    Args:
        :msg: the avro message to parse
        :avro_schema: the avro schema

    Returns:
         The parsed/decoded message
    """
    reader = DatumReader(avro_schema)
    message_bytes = BytesIO(msg)
    decoder = BinaryDecoder(message_bytes)
    return reader.read(decoder)
示例#21
0
    def _decode_avro(self, avro_data):
        """
        Decode Avro Message.

        Args:
            avro_data (binary):     Avro data to decode.

        Returns:
            dict
        """
        bytes_reader = io.BytesIO(avro_data.value)
        decoder = BinaryDecoder(bytes_reader)
        reader = DatumReader(self.schema)

        return reader.read(decoder)
 def unpack(self, payload):
     MAGIC_BYTES = 0
     magic, schema_id = struct.unpack('>bi', payload[:5])
     # Get Schema registry
     # Avro value format
     if magic == MAGIC_BYTES:
         schema = self.register_client.get_by_id(schema_id)
         reader = DatumReader(schema)
         output = BinaryDecoder(io.BytesIO(payload[5:]))
         abc = reader.read(output)
         return abc
     # String key
     else:
         # Timestamp is inside my key
         return payload[:-8].decode()
示例#23
0
def decode_avro(msg_value, reader):
    """Function to decode the bytes from the Avro serilization

    Args:
        msg_value ([bytes]): message to deserialize
        reader ([DatumReader]): special variable of Avro schema for the deserilization

    Returns:
        [dict]: deserialize data dictionary
    """

    message_bytes = io.BytesIO(msg_value)
    decoder = BinaryDecoder(message_bytes)
    event_dict = reader.read(decoder)

    return event_dict
示例#24
0
    def _unpack(self, payload):
        magic, schema_id = struct.unpack('>bi', payload[:5])

        # Get Schema registry
        # Avro value format
        if magic == self.MAGIC_BYTES:
            schema = self.register_client.get_by_id(schema_id)
            reader = DatumReader(schema)
            output = BinaryDecoder(io.BytesIO(payload[5:]))
            abc = reader.read(output)
            return abc
        # String key
        else:
            # If KSQL payload, exclude timestamp which is inside the key.
            # payload[:-8].decode()
            return payload.decode()
示例#25
0
    def callback(message):
        # Get the message serialization type.
        encoding = message.attributes.get("googclient_schemaencoding")
        # Deserialize the message data accordingly.
        if encoding == "BINARY":
            bout = io.BytesIO(message.data)
            decoder = BinaryDecoder(bout)
            reader = DatumReader(avro_schema)
            message_data = reader.read(decoder)
            print(f"Received a binary-encoded message:\n{message_data}")
        elif encoding == "JSON":
            message_data = json.loads(message.data)
            print(f"Received a JSON-encoded message:\n{message_data}")
        else:
            print(f"Received a message with no encoding:\n{message}")

        message.ack()
def unpack(payload, windowed, register_client):
    magic, schema_id = struct.unpack('>bi', payload[:5])

    # Get Schema registry
    # Avro value format
    if magic == MAGIC_BYTES:
        schema = register_client.get_by_id(schema_id)
        reader = DatumReader(schema)
        output = BinaryDecoder(io.BytesIO(payload[5:]))
        abc = reader.read(output)
        return abc
    # String key
    else:
        # If Windowed KSQL payload
        if windowed == 'TRUE':
            return payload[:-8].decode()
        elif windowed == 'FALSE':
            return payload.decode()
示例#27
0
def extract_messages(ams, ingest_sub, bulk_size, schema, verify):

    # consume metric data messages
    consumed_msgs = ams.pull_sub(ingest_sub,
                                 num=bulk_size,
                                 return_immediately=True,
                                 verify=verify)

    # initialise the avro reader
    avro_reader = DatumReader(writers_schema=schema)

    # all the decoded messages that will be returned
    decoded_msgs = []

    # decode the messages
    for msg in consumed_msgs:

        try:

            # decode the data field again using the provided avro schema
            msg_bytes = BytesIO(msg[1].get_data())
            msg_decoder = BinaryDecoder(msg_bytes)
            avro_msg = avro_reader.read(msg_decoder)

            # check that the tags field is present
            if avro_msg["tags"] is None:
                raise KeyError("tags field is empty")

            # append to decoded messages
            decoded_msgs.append((msg[0], avro_msg))

        except Exception as e:
            LOGGER.warning(
                "Could not extract data from ams message {}, {}".format(
                    msg[0], e.message))

    last_msg_id = "-1"
    if len(consumed_msgs) > 0:
        last_msg_id = consumed_msgs.pop()[0]

    return decoded_msgs, last_msg_id
示例#28
0
def decode_avro(data):
    decoder = BinaryDecoder(BytesIO(data))
    new_data = datum_reader.read(decoder)
    return new_data
示例#29
0
文件: avrolib.py 项目: wtj/pydoop
 def with_deserialization(self, *args, **kwargs):
     ret = meth(self, *args, **kwargs)
     f = StringIO(ret)
     dec = BinaryDecoder(f)
     return datum_reader.read(dec)
示例#30
0
def decoder(binary_data):
    bio = BytesIO(binary_data)
    binary_decoder = BinaryDecoder(bio)
    # return "yangkun"
    schema = avro.schema.parse(open("user.avsc", "rb").read())
    return DatumReader(schema).read(binary_decoder)