def test_fastavro_errors_read_enum(): fo = MemoryIO() writer_schema = { "type": "enum", "name": "Suit", "symbols": [ "SPADES", "HEARTS", "DIAMONDS", "CLUBS", ] } reader_schema = { "type": "enum", "name": "Suit", "symbols": [ "SPADES", "HEARTS", "DIAMONDS", ] } given = "CLUBS" write_data(fo, given, writer_schema) fo.seek(0) try: read_data(fo, reader_schema) assert False, 'bad schema did not raise!' except AvroValueError as e: assert '<enum>' in str(e)
def _get_decoder_func(self, schema_id, payload): if schema_id in self.id_to_decoder_func: return self.id_to_decoder_func[schema_id] schema = self.get_schema(schema_id) curr_pos = payload.tell() if self.fast_avro: # try to use fast avro try: schema_dict = schema.to_json() payload.seek(curr_pos) decoder_func = lambda p: read_data(p, schema_dict) self.id_to_decoder_func[schema_id] = decoder_func return self.id_to_decoder_func[schema_id] except: payload.seek(curr_pos) pass avro_reader = avro.io.DatumReader(schema) def decoder(p): bin_decoder = avro.io.BinaryDecoder(p) return avro_reader.read(bin_decoder) self.id_to_decoder_func[schema_id] = decoder return self.id_to_decoder_func[schema_id]
def _get_decoder_func(self, schema_id, payload): if schema_id in self.id_to_decoder_func: return self.id_to_decoder_func[schema_id] # fetch from schema reg try: # first call will cache in the client schema = self.registry_client.get_by_id(schema_id) except: schema = None if not schema: err = "unable to fetch schema with id %d" % (schema_id) raise SerializerError(err) curr_pos = payload.tell() # try to use fast avro try: schema_dict = schema.to_json() payload.seek(curr_pos) decoder_func = lambda p: read_data(p, schema_dict) self.id_to_decoder_func[schema_id] = decoder_func return self.id_to_decoder_func[schema_id] except: pass
def _get_decoder_func(self, schema_id, payload): if schema_id in self.id_to_decoder_func: return self.id_to_decoder_func[schema_id] # fetch from schema reg try: schema = self.registry_client.get_by_id(schema_id) except ClientError as e: raise SerializerError("unable to fetch schema with id %d: %s" % (schema_id, str(e))) if schema is None: raise SerializerError("unable to fetch schema with id %d" % (schema_id)) curr_pos = payload.tell() if HAS_FAST: # try to use fast avro try: schema_dict = schema.to_json() read_data(payload, schema_dict) # If we reach this point, this means we have fastavro and it can # do this deserialization. Rewind since this method just determines # the reader function and we need to deserialize again along the # normal path. payload.seek(curr_pos) self.id_to_decoder_func[schema_id] = lambda p: read_data( p, schema_dict) return self.id_to_decoder_func[schema_id] except Exception: # Fast avro failed, fall thru to standard avro below. pass # here means we should just delegate to slow avro # rewind payload.seek(curr_pos) avro_reader = avro.io.DatumReader(schema) def decoder(p): bin_decoder = avro.io.BinaryDecoder(p) return avro_reader.read(bin_decoder) self.id_to_decoder_func[schema_id] = decoder return self.id_to_decoder_func[schema_id]
def test_fastavro_errors_read_record(): fo = MemoryIO() writer_schema = { "type": "record", "name": "extension_test", "doc": "Complex schema with avro extensions", "fields": [ {"name": "x", "type": { "type": "record", "name": "inner", "fields": [ {"name": "y", "type": "int"} ] }} ] } reader_schema = { "type": "record", "name": "extension_test", "doc": "Complex schema with avro extensions", "fields": [ {"name": "x", "type": { "type": "record", "name": "inner", "fields": [ {"name": "y", "type": "float"} ] }} ] } given = {"x": {"y": 0}} write_data(fo, given, writer_schema) fo.seek(0) try: read_data(fo, reader_schema) assert False, 'bad schema did not raise!' except AvroValueError as e: assert '<record>.x.<record>.y' in str(e)
def _get_decoder_func(self, schema_id, payload): if schema_id in self.id_to_decoder_func: return self.id_to_decoder_func[schema_id] # fetch from schema reg try: schema = self.registry_client.get_by_id(schema_id) except: schema = None if not schema: err = "unable to fetch schema with id %d" % (schema_id) log.error(err) raise SerializerError(err) curr_pos = payload.tell() if HAS_FAST: # try to use fast avro try: schema_dict = schema.to_json() obj = read_data(payload, schema_dict) # here means we passed so this is something fastavro can do # seek back since it will be called again for the # same payload - one time hit payload.seek(curr_pos) decoder_func = lambda p: read_data(p, schema_dict) self.id_to_decoder_func[schema_id] = decoder_func return self.id_to_decoder_func[schema_id] except: pass # here means we should just delegate to slow avro # rewind payload.seek(curr_pos) avro_reader = avro.io.DatumReader(schema) def decoder(p): bin_decoder = avro.io.BinaryDecoder(p) return avro_reader.read(bin_decoder) self.id_to_decoder_func[schema_id] = decoder return self.id_to_decoder_func[schema_id]
def _get_decoder_func(self, schema_id, payload): if schema_id in self.id_to_decoder_func: return self.id_to_decoder_func[schema_id] # fetch from schema reg try: schema = self.registry_client.get_by_id(schema_id) except: schema = None if not schema: err = "unable to fetch schema with id %d" % (schema_id) raise SerializerError(err) curr_pos = payload.tell() if HAS_FAST: # try to use fast avro try: schema_dict = schema.to_json() obj = read_data(payload, schema_dict) # here means we passed so this is something fastavro can do # seek back since it will be called again for the # same payload - one time hit payload.seek(curr_pos) decoder_func = lambda p: read_data(p, schema_dict) self.id_to_decoder_func[schema_id] = decoder_func return self.id_to_decoder_func[schema_id] except: pass # here means we should just delegate to slow avro # rewind payload.seek(curr_pos) avro_reader = io.DatumReader(schema) def decoder(p): bin_decoder = io.BinaryDecoder(p) return avro_reader.read(bin_decoder) self.id_to_decoder_func[schema_id] = decoder return self.id_to_decoder_func[schema_id]
def test_fastavro_errors_read_map(): fo = MemoryIO() writer_schema = { "type": "map", "values": "float" } reader_schema = { "type": "map", "values": "double" } given = {"x": 0} write_data(fo, given, writer_schema) fo.seek(0) try: read_data(fo, reader_schema) assert False, 'bad schema did not raise!' except AvroValueError as e: assert '<map>.x.<double>' in str(e)
def test_fastavro_errors_read_array(): fo = MemoryIO() writer_schema = { "type": "array", "items": "int", } reader_schema = { "type": "array", "items": "float", } given = [10, 20, 30] write_data(fo, given, writer_schema) fo.seek(0) try: read_data(fo, reader_schema) assert False, 'bad schema did not raise!' except AvroValueError as e: # .[1] because the first element is read succesfully # (but would be corrupt) assert '<array>.[1].<float>' in str(e)
def AvroToJson(record, schema): buf = io.BytesIO() buf.write(record) buf.seek(0) return read_data(buf, schema)