def make_avro(schema, named_defs): names = Names() make_avsc_object(FILE_SCHEMA, names) for d in named_defs: make_avsc_object(d, names) avsc = make_avsc_object(fix_file_type(wrap_in_list(schema)), names) return avsc
def is_backward_compatible(cls, writer_schema_json, reader_schema_json): """Whether the data serialized with given writer_schema can be deserialized using given reader schema """ writer_schema = schema.make_avsc_object(writer_schema_json) reader_schema = schema.make_avsc_object(reader_schema_json) resolver = SchemaResolution() return resolver.resolve_schema(writer_schema, reader_schema)
def make_avro(schema, named_defs): names = Names() make_avsc_object(FILE_SCHEMA, names) for d in named_defs: make_avsc_object(d, names) avsc = make_avsc_object(fix_file_type(wrap_in_list(schema)), names) return avsc
def is_backward_compatible(cls, writer_schema_json, reader_schema_json): """Whether the data serialized with given writer_schema can be deserialized using given reader schema """ writer_schema = schema.make_avsc_object(writer_schema_json) reader_schema = schema.make_avsc_object(reader_schema_json) resolver = SchemaResolution() return resolver.resolve_schema(writer_schema, reader_schema)
def verify_avro_schema(cls, avro_schema_json): """Verify whether the given JSON representation is a valid Avro schema. :param avro_schema_json: JSON representation of the Avro schema :return: A tuple (is_valid, error) in which the first element indicates whether the given JSON is a valid Avro schema, and the second element is the error if it is not valid. """ try: schema.make_avsc_object(avro_schema_json) return True, None except Exception as e: return False, repr(e)
def test_union_serialization_invalid(self): avro_schema = make_avsc_object(self.UNION_FIELDS_SCHEMA, avro.schema.Names()) data = { "funion_null": "hi" } serializer = AvroJsonSerializer(avro_schema) self.assertRaises(avro.io.AvroTypeException, serializer.to_json, data)
def test_dict_with_unicode_bytes(self): schema_dict = { "namespace": "example.avro", "type": "record", "name": "WithBytes", "fields": [{ "type": "bytes", "name": "fbytes" }] } # byte arrays should be left alone byte_data = { "fbytes": b"(~^\xfbzoW\x13p\x19!4\x0b+\x00\x00\x0b+\x00\x00" } avro_schema = make_avsc_object(schema_dict, avro.schema.Names()) self.assertEquals( AvroJsonDeserializer(avro_schema).from_dict(byte_data), byte_data) # unicode strings should be turned into iso-8859-1 bytes iso8859_data = {'fbytes': b"(~^\xfbzoW\x13p\x19!4\x0b+\x00\x00"} unicode_data = {u'fbytes': u'(~^\xfbzoW\x13p\x19!4\x0b+\x00\x00'} self.assertEquals( AvroJsonDeserializer(avro_schema).from_dict(unicode_data), iso8859_data)
def test_map(self): schema_dict = { "type": "record", "name": "rec", "fields": [ self.FIELD_MAP_INT ] } data = { "intmap": { "one": 1, "two": 2 } } unicode_dict = { 'intmap': { 'one': 1, u'two': 2 } } avro_schema = make_avsc_object(schema_dict, avro.schema.Names()) avro_json = AvroJsonSerializer(avro_schema).to_json(data) # Dictionaries are unsorted self.assertIn(avro_json, ("""{"intmap":{"one":1,"two":2}}""", """{"intmap":{"two":2,"one":1}}""")) deserializer = AvroJsonDeserializer(avro_schema) json_data = deserializer.from_json(avro_json) self.assertEquals(json_data, data) mixed_unicode = deserializer.from_dict(unicode_dict) self.assertEquals(mixed_unicode, data)
def test_user_record(self): """ This schema example is from documentation http://avro.apache.org/docs/1.7.6/gettingstartedpython.html """ schema_dict = { "namespace": "example.avro", "type": "record", "name": "User", "fields": [ {"name": "name", "type": "string"}, {"name": "favorite_number", "type": ["int", "null"]}, {"name": "favorite_color", "type": ["string", "null"]} ] } avro_schema = make_avsc_object(schema_dict, avro.schema.Names()) serializer = AvroJsonSerializer(avro_schema) deserializer = AvroJsonDeserializer(avro_schema) alyssa = {"name": "Alyssa", "favorite_number": 256} alyssa_full = {"name": "Alyssa", "favorite_number": 256, "favorite_color": None} alyssa_json = """{"name":"Alyssa","favorite_number":{"int":256},"favorite_color":null}""" self.assertEquals(serializer.to_json(alyssa), alyssa_json) self.assertEquals(deserializer.from_json(alyssa_json), alyssa_full) ben = {"name": "Ben", "favorite_number": 7, "favorite_color": "red"} ben_json = """{"name":"Ben","favorite_number":{"int":7},"favorite_color":{"string":"red"}}""" self.assertEquals(serializer.to_json(ben), ben_json) self.assertEquals(deserializer.from_json(ben_json), ben) lion = {"name": "Lion"} lion_full = {"name": "Lion", "favorite_number": None, "favorite_color": None} lion_json = """{"name":"Lion","favorite_number":null,"favorite_color":null}""" self.assertEquals(serializer.to_json(lion), lion_json) self.assertEquals(deserializer.from_json(lion_json), lion_full)
def __init__(self, schemadir=".."): self.names = schema.Names() for sf in self.SCHEMAS: print "Loading", sf with open(os.path.join(schemadir, sf.lower() + ".av"), "r") as fp: obj = json.load(fp) s = schema.make_avsc_object(obj, self.names)
def test_fails_validation(self): avro_schema = make_avsc_object(self.ALL_FIELDS_SCHEMA, avro.schema.Names()) data = dict(self.VALID_DATA_ALL_FIELDS) data["ffloat"] = "hi" serializer = AvroJsonSerializer(avro_schema) self.assertRaises(avro.io.AvroTypeException, serializer.to_json, data)
def test_all_supported_types(self): avro_schema = make_avsc_object(self.ALL_FIELDS_SCHEMA, avro.schema.Names()) data = self.VALID_DATA_ALL_FIELDS avro_json = AvroJsonSerializer(avro_schema).to_json(data) self.assertEquals(avro_json, """{"fruit":"ORANGE","fint":1,"flong":1,"fstring":"hi there","ffixed":"1234567890123456","frec":{"subfint":2},"funion_null":null,"ffloat":1.0,"fdouble":2.0,"intarr":[1,2,3],"intmap":{"one":1}}""") json_data = AvroJsonDeserializer(avro_schema).from_json(avro_json) self.assertEquals(json_data, data)
def test_union_serialization_not_null(self): avro_schema = make_avsc_object(self.UNION_FIELDS_SCHEMA, avro.schema.Names()) data = {"funion_null": 1} avro_json = AvroJsonSerializer(avro_schema).to_json(data) self.assertEquals(avro_json, """{"funion_null":{"int":1}}""") json_data = AvroJsonDeserializer(avro_schema).from_json(avro_json) self.assertEquals(json_data, data)
def __init__(self, schemadir=".."): self.names = schema.Names() for sf in self.SCHEMAS: print "Loading", sf sfname = os.path.join(schemadir, sf.lower() + ".av") with open(sfname, "r") as fp: obj = json.load(fp) s = schema.make_avsc_object(obj, self.names)
def test_union_serialization_not_null(self): avro_schema = make_avsc_object(self.UNION_FIELDS_SCHEMA, avro.schema.Names()) data = { "funion_null": 1 } avro_json = AvroJsonSerializer(avro_schema).to_json(data) self.assertEquals(avro_json, """{"funion_null":{"int":1}}""") json_data = AvroJsonDeserializer(avro_schema).from_json(avro_json) self.assertEquals(json_data, data)
def test_all_supported_types(self): avro_schema = make_avsc_object(self.ALL_FIELDS_SCHEMA, avro.schema.Names()) avro_json = AvroJsonSerializer(avro_schema).to_json( self.VALID_DATA_ALL_FIELDS) self.assertEquals( avro_json, """{"fint":1,"flong":1,"fstring":"hi there","ffixed":"1234567890123456","frec":{"subfint":2},"funion_null":null,"ffloat":1.0,"fdouble":2.0}""" )
def _parse_types(self, types, type_names): type_objects = [] for type in types: type_object = schema.make_avsc_object(type, type_names) if type_object.type not in VALID_TYPE_SCHEMA_TYPES: fail_msg = 'Type %s not an enum, fixed, record, or error.' % type raise ProtocolParseException(fail_msg) type_objects.append(type_object) return type_objects
def _parse_types(self, types, type_names): type_objects = [] for type in types: type_object = schema.make_avsc_object(type, type_names) if type_object.type not in VALID_TYPE_SCHEMA_TYPES: fail_msg = 'Type %s not an enum, fixed, record, or error.' % type raise ProtocolParseException(fail_msg) type_objects.append(type_object) return type_objects
def test_array(self): schema_dict = { "type": "record", "name": "rec", "fields": [self.FIELD_ARRAY_INT] } data = {"intarr": [1, 2, 3]} avro_schema = make_avsc_object(schema_dict, avro.schema.Names()) avro_json = AvroJsonSerializer(avro_schema).to_json(data) self.assertEquals(avro_json, """{"intarr":[1,2,3]}""")
def end(self): if not self._schema_tracker: # this is the top level schema; do the schema validation schema_obj = schema.make_avsc_object(self._schema_json) self._schema_json = None return schema_obj.to_json() current_schema_json = self._schema_json self._restore_current_schema() return current_schema_json
def end(self): if not self._schema_tracker: # this is the top level schema; do the schema validation schema_obj = schema.make_avsc_object(self._schema_json) self._schema_json = None return schema_obj.to_json() current_schema_json = self._schema_json self._restore_current_schema() return current_schema_json
def test_individually_allowed_fields_separately(self): for field in self.INDIVIDUALLY_SERIALIZABLE: # unwrap enum, fixed, array, and map but save the name for value lookup name = field['name'] if isinstance(field['type'], dict): field = field['type'] avro_schema = make_avsc_object(field, avro.schema.Names()) data = self.VALID_DATA_ALL_FIELDS[name] avro_json = AvroJsonSerializer(avro_schema).to_json(data) json_data = AvroJsonDeserializer(avro_schema).from_json(avro_json) self.assertEquals(json_data, data)
def test_all_supported_types(self): avro_schema = make_avsc_object(self.ALL_FIELDS_SCHEMA, avro.schema.Names()) data = self.VALID_DATA_ALL_FIELDS avro_json = AvroJsonSerializer(avro_schema).to_json(data) self.assertEquals( avro_json, """{"fruit":"ORANGE","fint":1,"flong":1,"fstring":"hi there","ffixed":"1234567890123456","frec":{"subfint":2},"funion_null":null,"ffloat":1.0,"fdouble":2.0,"intarr":[1,2,3],"intmap":{"one":1}}""" ) json_data = AvroJsonDeserializer(avro_schema).from_json(avro_json) self.assertEquals(json_data, data)
def test_individually_allowed_fields_separately(self): for field in self.INDIVIDUALLY_SERIALIZABLE: # unwrap enum, fixed, array, and map but save the name for value lookup name = field['name'] if isinstance(field['type'], dict): field = field['type'] avro_schema = make_avsc_object(field, avro.schema.Names()) data = self.VALID_DATA_ALL_FIELDS[name] avro_json = AvroJsonSerializer(avro_schema).to_json(data) json_data = AvroJsonDeserializer(avro_schema).from_json(avro_json) self.assertEquals(json_data, data)
def get_schema(): # TODO: support multiple type schema_json = json.loads( open(settings.AVRO_SCHEMA_PATH).read() ) type_names = schema.Names() array_schema = None # TODO: here we assume the LogArray schema is the last one, which night # not be the case for avro_type in schema_json["types"]: array_schema = schema.make_avsc_object(avro_type, type_names) return json.loads(str(array_schema))
def test_fixed_ascii(self): schema_dict = { "namespace": "example.avro", "type": "record", "name": "WithFixed", "fields": [self.FIELD_FIXED] } data = {"ffixed": b"fixed text here!"} avro_schema = make_avsc_object(schema_dict, avro.schema.Names()) serializer = AvroJsonSerializer(avro_schema) json_data = serializer.to_json(data) self.assertEquals(json_data, """{"ffixed":"fixed text here!"}""")
def test_records_union(self): avro_schema = make_avsc_object(self.UNION_RECORDS_SCHEMA, avro.schema.Names()) data = {"funion_rec": {"field": 1}} avro_json = AvroJsonSerializer(avro_schema).to_json(data) self.assertEquals(avro_json, """{"funion_rec":{"rec1":{"field":1}}}""") data_another_record = {"funion_rec": {"field": "hi"}} another_record_json = AvroJsonSerializer(avro_schema).to_json( data_another_record) self.assertEquals(another_record_json, """{"funion_rec":{"rec2":{"field":"hi"}}}""")
def test_unknown_fields_are_ignored(self): schema_dict = { "type": "record", "name": "BasicName", "fields": [{ "type": "string", "name": "name" }] } avro_json = """{"name":"todd","age":1}""" avro_schema = make_avsc_object(schema_dict, avro.schema.Names()) json_data = AvroJsonDeserializer(avro_schema).from_json(avro_json) self.assertEquals(json_data, {"name": "todd"})
def test_map(self): schema_dict = { "type": "record", "name": "rec", "fields": [self.FIELD_MAP_INT] } data = {"intmap": {"one": 1, "two": 2}} avro_schema = make_avsc_object(schema_dict, avro.schema.Names()) avro_json = AvroJsonSerializer(avro_schema).to_json(data) # Dictionaries are unsorted self.assertIn(avro_json, ("""{"intmap":{"one":1,"two":2}}""", """{"intmap":{"two":2,"one":1}}"""))
def test_unknown_fields_are_ignored(self): schema_dict = { "type": "record", "name": "BasicName", "fields": [ { "type": "string", "name": "name" } ] } avro_json = """{"name":"todd","age":1}""" avro_schema = make_avsc_object(schema_dict, avro.schema.Names()) json_data = AvroJsonDeserializer(avro_schema).from_json(avro_json) self.assertEquals(json_data, {"name": "todd"})
def test_fixed_non_ascii(self): schema_dict = { "namespace": "example.avro", "type": "record", "name": "WithFixed", "fields": [self.FIELD_FIXED] } data = {"ffixed": b"(~^\xfbzoW\x13p\x19!4\x0b+\x00\x00"} avro_schema = make_avsc_object(schema_dict, avro.schema.Names()) serializer = AvroJsonSerializer(avro_schema) json_data = serializer.to_json(data) self.assertEquals( json_data, """{"ffixed":"(~^\\u00fbzoW\\u0013p\\u0019!4\\u000b+\\u0000\\u0000"}""" )
def test_nested_union_records(self): schema_dict = { "namespace": "nested", "name": "OuterType", "type": "record", "fields": [{ "name": "outer", "type": [ "null", { "name": "MiddleType", "type": "record", "fields": [{ "name": "middle", "type": [ "null", { "name": "InnerType", "type": "record", "fields": [{ "name": "inner", "type": "int" }] } ] }] } ] }] } data1 = {"outer": {"middle": {"inner": 1}}} data2 = {"outer": {"middle": None}} avro1 = """{"outer":{"nested.MiddleType":{"middle":{"nested.InnerType":{"inner":1}}}}}""" avro2 = """{"outer":{"nested.MiddleType":{"middle":null}}}""" avro_schema = make_avsc_object(schema_dict, avro.schema.Names()) serializer = AvroJsonSerializer(avro_schema) self.assertEquals(serializer.to_json(data1), avro1) self.assertEquals(serializer.to_json(data2), avro2) deserializer = AvroJsonDeserializer(avro_schema) self.assertEquals(deserializer.from_json(avro1), data1) self.assertEquals(deserializer.from_json(avro2), data2)
def _create_schema_elements_from_json(cls, avro_schema_json): avro_schema_obj = schema.make_avsc_object(avro_schema_json) schema_elements = [] schema_elements_queue = deque([(avro_schema_obj, None)]) while schema_elements_queue: schema_obj, parent_key = schema_elements_queue.popleft() element_cls = _schema_to_element_map.get(schema_obj.__class__) if not element_cls: continue _schema_element = element_cls(schema_obj, parent_key) schema_elements.append((_schema_element, schema_obj)) parent_key = _schema_element.key for nested_schema in _schema_element.nested_schema_objects: schema_elements_queue.append((nested_schema, parent_key)) return schema_elements
def test_fixed_ascii(self): schema_dict = { "namespace": "example.avro", "type": "record", "name": "WithFixed", "fields": [ self.FIELD_FIXED ] } data = {"ffixed": b"fixed text here!"} avro_schema = make_avsc_object(schema_dict, avro.schema.Names()) serializer = AvroJsonSerializer(avro_schema) avro_json = serializer.to_json(data) self.assertEquals(avro_json, """{"ffixed":"fixed text here!"}""") json_data = AvroJsonDeserializer(avro_schema).from_json(avro_json) self.assertEquals(json_data, data)
def test_fixed_non_ascii(self): schema_dict = { "namespace": "example.avro", "type": "record", "name": "WithFixed", "fields": [ self.FIELD_FIXED ] } data = {"ffixed": b"(~^\xfbzoW\x13p\x19!4\x0b+\x00\x00"} avro_schema = make_avsc_object(schema_dict, avro.schema.Names()) serializer = AvroJsonSerializer(avro_schema) avro_json = serializer.to_json(data) self.assertEquals(avro_json, """{"ffixed":"(~^\\u00fbzoW\\u0013p\\u0019!4\\u000b+\\u0000\\u0000"}""") json_data = AvroJsonDeserializer(avro_schema).from_json(avro_json) self.assertEquals(json_data, data)
def test_array(self): schema_dict = { "type": "record", "name": "rec", "fields": [ self.FIELD_ARRAY_INT ] } data = { "intarr": [1, 2, 3] } avro_schema = make_avsc_object(schema_dict, avro.schema.Names()) avro_json = AvroJsonSerializer(avro_schema).to_json(data) self.assertEquals(avro_json, """{"intarr":[1,2,3]}""") json_data = AvroJsonDeserializer(avro_schema).from_json(avro_json) self.assertEquals(json_data, data)
def test_bytes_field_ascii(self): schema_dict = { "namespace": "example.avro", "type": "record", "name": "WithFixed", "fields": [{ "type": "bytes", "name": "fbytes" }] } data = {"fbytes": b"this is some long bytes field"} avro_schema = make_avsc_object(schema_dict, avro.schema.Names()) serializer = AvroJsonSerializer(avro_schema) json_data = serializer.to_json(data) self.assertEquals(json_data, """{"fbytes":"this is some long bytes field"}""")
def test_user_record(self): """ This schema example is from documentation http://avro.apache.org/docs/1.7.6/gettingstartedpython.html """ schema_dict = { "namespace": "example.avro", "type": "record", "name": "User", "fields": [{ "name": "name", "type": "string" }, { "name": "favorite_number", "type": ["int", "null"] }, { "name": "favorite_color", "type": ["string", "null"] }] } avro_schema = make_avsc_object(schema_dict, avro.schema.Names()) serializer = AvroJsonSerializer(avro_schema) deserializer = AvroJsonDeserializer(avro_schema) alyssa = {"name": "Alyssa", "favorite_number": 256} alyssa_full = { "name": "Alyssa", "favorite_number": 256, "favorite_color": None } alyssa_json = """{"name":"Alyssa","favorite_number":{"int":256},"favorite_color":null}""" self.assertEquals(serializer.to_json(alyssa), alyssa_json) self.assertEquals(deserializer.from_json(alyssa_json), alyssa_full) ben = {"name": "Ben", "favorite_number": 7, "favorite_color": "red"} ben_json = """{"name":"Ben","favorite_number":{"int":7},"favorite_color":{"string":"red"}}""" self.assertEquals(serializer.to_json(ben), ben_json) self.assertEquals(deserializer.from_json(ben_json), ben) lion = {"name": "Lion"} lion_full = { "name": "Lion", "favorite_number": None, "favorite_color": None } lion_json = """{"name":"Lion","favorite_number":null,"favorite_color":null}""" self.assertEquals(serializer.to_json(lion), lion_json) self.assertEquals(deserializer.from_json(lion_json), lion_full)
def test_bytes_field_ascii(self): schema_dict = { "namespace": "example.avro", "type": "record", "name": "WithFixed", "fields": [ { "type": "bytes", "name": "fbytes" } ] } data = {"fbytes": b"this is some long bytes field"} avro_schema = make_avsc_object(schema_dict, avro.schema.Names()) serializer = AvroJsonSerializer(avro_schema) avro_json = serializer.to_json(data) self.assertEquals(avro_json, """{"fbytes":"this is some long bytes field"}""") json_data = AvroJsonDeserializer(avro_schema).from_json(avro_json) self.assertEquals(json_data, data)
def load_schema(sf): dirs = [os.path.dirname(sys.modules[__name__].__file__), os.path.join(os.path.dirname(sys.modules[__name__].__file__), os.path.join(os.pardir, os.pardir)), sys.prefix] #print dirs obj = None for d in dirs: try: path = os.path.join(d, 'schema', sf.lower() + '.av') obj = json.load(open(path, 'r')) break except: pass if obj == None: raise Exception('Cannot load schema: ' + sf.lower()) s = schema.make_avsc_object(obj, SCHEMA_NAMES) return s
def test_missing_nullable_field(self): schema_dict = { "type": "record", "name": "WithDefault", "fields": [ { "type": "string", "name": "name" }, { "type": ["null", "int"], "name": "version", "default": None } ] } avro_json = """{"name":"mcnameface"}""" avro_schema = make_avsc_object(schema_dict, avro.schema.Names()) deserializer = AvroJsonDeserializer(avro_schema) self.assertRaises(avro.io.AvroTypeException, deserializer.from_json, avro_json)
def test_missing_nullable_field(self): schema_dict = { "type": "record", "name": "WithDefault", "fields": [{ "type": "string", "name": "name" }, { "type": ["null", "int"], "name": "version", "default": None }] } avro_json = """{"name":"mcnameface"}""" avro_schema = make_avsc_object(schema_dict, avro.schema.Names()) deserializer = AvroJsonDeserializer(avro_schema) self.assertRaises(avro.io.AvroTypeException, deserializer.from_json, avro_json)
def test_bytes_field_non_ascii(self): schema_dict = { "namespace": "example.avro", "type": "record", "name": "WithFixed", "fields": [{ "type": "bytes", "name": "fbytes" }] } data = {"fbytes": b"(~^\xfbzoW\x13p\x19!4\x0b+\x00\x00\x0b+\x00\x00"} avro_schema = make_avsc_object(schema_dict, avro.schema.Names()) serializer = AvroJsonSerializer(avro_schema) avro_json = serializer.to_json(data) self.assertEquals( avro_json, """{"fbytes":"(~^\\u00fbzoW\\u0013p\\u0019!4\\u000b+\\u0000\\u0000\\u000b+\\u0000\\u0000"}""" ) json_data = AvroJsonDeserializer(avro_schema).from_json(avro_json) self.assertEquals(json_data, data)
def convert(self, src_schema): """The src_schema is the Avro schema json object. It returns the SQLTable object that represents the Redshift table schema. Note that Redshift does not support unsigned column type. For now, `unsigned` metadata will be ignored and a signed column type will be used instead. """ # TODO[clin|DATAPIPE-101] adding sortkey/distkey if not src_schema: return None try: avro_record = schema.make_avsc_object(src_schema) except: raise SchemaConversionException('Invalid Avro record schema.') if not self._is_record_schema(avro_record): raise SchemaConversionException('Invalid Avro record schema.') return self._create_redshift_table(avro_record)
def convert(self, src_schema): """The src_schema is the Avro schema json object. It returns the SQLTable object that represents the Redshift table schema. Note that Redshift does not support unsigned column type. For now, `unsigned` metadata will be ignored and a signed column type will be used instead. """ # TODO[clin|DATAPIPE-101] adding sortkey/distkey if not src_schema: return None try: avro_record = schema.make_avsc_object(src_schema) except: raise SchemaConversionException('Invalid Avro record schema.') if not self._is_record_schema(avro_record): raise SchemaConversionException('Invalid Avro record schema.') return self._create_redshift_table(avro_record)
def test_records_union(self): avro_schema = make_avsc_object(self.UNION_RECORDS_SCHEMA, avro.schema.Names()) data = { "funion_rec": { "field": 1 } } avro_json = AvroJsonSerializer(avro_schema).to_json(data) self.assertEquals(avro_json, """{"funion_rec":{"rec1":{"field":1}}}""") json_data = AvroJsonDeserializer(avro_schema).from_json(avro_json) self.assertEquals(json_data, data) data_another_record = { "funion_rec": { "field": "hi" } } another_record_json = AvroJsonSerializer(avro_schema).to_json(data_another_record) self.assertEquals(another_record_json, """{"funion_rec":{"example.avro.rec2":{"field":"hi"}}}""") another_json_data = AvroJsonDeserializer(avro_schema).from_json(another_record_json) self.assertEquals(another_json_data, data_another_record)
def test_dict_with_unicode_bytes(self): schema_dict = { "namespace": "example.avro", "type": "record", "name": "WithBytes", "fields": [ { "type": "bytes", "name": "fbytes" } ] } # byte arrays should be left alone byte_data = {"fbytes": b"(~^\xfbzoW\x13p\x19!4\x0b+\x00\x00\x0b+\x00\x00"} avro_schema = make_avsc_object(schema_dict, avro.schema.Names()) self.assertEquals(AvroJsonDeserializer(avro_schema).from_dict(byte_data), byte_data) # unicode strings should be turned into iso-8859-1 bytes iso8859_data = {'fbytes': b"(~^\xfbzoW\x13p\x19!4\x0b+\x00\x00"} unicode_data = {u'fbytes': u'(~^\xfbzoW\x13p\x19!4\x0b+\x00\x00'} self.assertEquals(AvroJsonDeserializer(avro_schema).from_dict(unicode_data), iso8859_data)
def test_map(self): schema_dict = { "type": "record", "name": "rec", "fields": [self.FIELD_MAP_INT] } data = {"intmap": {"one": 1, "two": 2}} unicode_dict = {'intmap': {'one': 1, u'two': 2}} avro_schema = make_avsc_object(schema_dict, avro.schema.Names()) avro_json = AvroJsonSerializer(avro_schema).to_json(data) # Dictionaries are unsorted self.assertIn(avro_json, ("""{"intmap":{"one":1,"two":2}}""", """{"intmap":{"two":2,"one":1}}""")) deserializer = AvroJsonDeserializer(avro_schema) json_data = deserializer.from_json(avro_json) self.assertEquals(json_data, data) mixed_unicode = deserializer.from_dict(unicode_dict) self.assertEquals(mixed_unicode, data)
def avro_schema_with_doc(self): """Get the JSON representation of the Avro schema with the documentation and element Id of each doc-eligible element. """ key_to_element_map = dict( (o.key, o) for o in self.avro_schema_elements ) avro_schema_obj = schema.make_avsc_object(self.avro_schema_json) schema_elements = deque([(avro_schema_obj, None)]) while len(schema_elements) > 0: schema_obj, parent_key = schema_elements.popleft() element_cls = _schema_to_element_map.get(schema_obj.__class__) if not element_cls: continue _schema_element = element_cls(schema_obj, parent_key) self._add_doc_to_schema(_schema_element, key_to_element_map) parent_key = _schema_element.key for nested_schema in _schema_element.nested_schema_objects: schema_elements.append((nested_schema, parent_key)) return avro_schema_obj.to_json()
def test_nested_union_records(self): schema_dict = { "namespace": "nested", "name": "OuterType", "type": "record", "fields": [{ "name": "outer", "type": ["null", { "name": "MiddleType", "type": "record", "fields": [{ "name": "middle", "type": ["null", { "name": "InnerType", "type": "record", "fields": [{ "name": "inner", "type": "int" }] }] }] }] }] } data1 = {"outer": {"middle": {"inner": 1}}} data2 = {"outer": {"middle": None}} avro1 = """{"outer":{"nested.MiddleType":{"middle":{"nested.InnerType":{"inner":1}}}}}""" avro2 = """{"outer":{"nested.MiddleType":{"middle":null}}}""" avro_schema = make_avsc_object(schema_dict, avro.schema.Names()) serializer = AvroJsonSerializer(avro_schema) self.assertEquals(serializer.to_json(data1), avro1) self.assertEquals(serializer.to_json(data2), avro2) deserializer = AvroJsonDeserializer(avro_schema) self.assertEquals(deserializer.from_json(avro1), data1) self.assertEquals(deserializer.from_json(avro2), data2)
def test_fails_validation(self): avro_schema = make_avsc_object(self.ALL_FIELDS_SCHEMA, avro.schema.Names()) data = dict(self.VALID_DATA_ALL_FIELDS) data["ffloat"] = "hi" serializer = AvroJsonSerializer(avro_schema) self.assertRaises(avro.io.AvroTypeException, serializer.to_json, data)
def test_union_serialization_invalid(self): avro_schema = make_avsc_object(self.UNION_FIELDS_SCHEMA, avro.schema.Names()) data = {"funion_null": "hi"} serializer = AvroJsonSerializer(avro_schema) self.assertRaises(avro.io.AvroTypeException, serializer.to_json, data)
def test_union_serialization_null(self): avro_schema = make_avsc_object(self.UNION_FIELDS_SCHEMA, avro.schema.Names()) data = {"funion_null": None} avro_json = AvroJsonSerializer(avro_schema).to_json(data) self.assertEquals(avro_json, """{"funion_null":null}""")
def _parse_errors(self, errors, names): if not isinstance(errors, list): fail_msg = 'Errors property not a list: %s' % errors raise ProtocolParseException(fail_msg) return schema.make_avsc_object(errors, names)
def _parse_response(self, response, names): if isinstance(response, basestring) and names.has_name(response, None): return names.get_name(response, None) else: return schema.make_avsc_object(response, names)
def _parse_errors(self, errors, names): if not isinstance(errors, list): fail_msg = 'Errors property not a list: %s' % errors raise ProtocolParseException(fail_msg) errors_for_parsing = {'type': 'error_union', 'declared_errors': errors} return schema.make_avsc_object(errors_for_parsing, names)