def test_bytes(): sch1 = '''{ "type": "record", "name": "Test", "fields": [ {"name": "bytes_field", "type": "bytes"} ] }''' sch2 = '''{ "type": "record", "name": "Test", "fields": [ {"name": "bytes_field", "type": "bytes"} ] }''' serializer = pyavroc.AvroSerializer(sch1) deserializer = pyavroc.AvroDeserializer(sch2) data = {"bytes_field": b'some bytes'} ser_python_avro = Serializer(sch1).serialize(data) res_pyavroc = deserializer.deserialize(ser_python_avro, writer_schema=sch1) res_python_avro = Deserializer(sch2).deserialize(ser_python_avro) assert res_pyavroc['bytes_field'] == res_python_avro[ 'bytes_field'] == b'some bytes'
def test_unicode_map_keys(): schema = """\ {"type": "record", "name": "foo", "fields": [{"name": "bar", "type": ["null", {"type": "map", "values": "string"}]}]} """ serializer = pyavroc.AvroSerializer(schema) rec_bytes = serializer.serialize({"bar": {"k": "v"}}) assert serializer.serialize({"bar": {u"k": "v"}}) == rec_bytes
def test_serialize_utf8_string(): schema = '["string"]' serializer = pyavroc.AvroSerializer(schema) deserializer = Deserializer(schema) if sys.version_info < (3, ): datum = "barà" rec_bytes = serializer.serialize(datum) assert deserializer.deserialize(rec_bytes) == unicode(datum, "utf-8") datum = u"barà" rec_bytes = serializer.serialize(datum) assert deserializer.deserialize(rec_bytes) == datum
def test_serialize_record(): n_recs = 10 avtypes = pyavroc.create_types(SCHEMA) serializer = pyavroc.AvroSerializer(SCHEMA) deserializer = Deserializer(SCHEMA) for i in range(n_recs): name, office = "name-%d" % i, "office-%d" % i avro_obj = avtypes.User(name=name, office=office) rec_bytes = serializer.serialize(avro_obj) deser_rec = deserializer.deserialize(rec_bytes) assert set(deser_rec) == set(['name', 'office', 'favorite_number']) assert deser_rec['name'] == name assert deser_rec['office'] == office assert deser_rec['favorite_number'] is None
def test_serialize_reuse_record_type(): schema = """\ { "type": "record", "name": "foo", "namespace": "org.pyavroc", "fields": [ { "name": "c1", "type": { "type": "record", "name": "Contig", "fields": [ { "name": "contigName", "type": "string" } ] } }, { "name": "c2", "type": [ "null", "Contig" ] } ] } """ ser = pyavroc.AvroSerializer(schema) datum = {"c1": {"contigName": "contig1"}, "c2": {"contigName": "contig2"}} obytes = ser.serialize(datum) assert obytes
def test_write_wrong_type_primitive(): schema = '''{ "type": "record", "name": "Obj", "fields": [ {"name": "string", "type": "string"}, {"name": "number", "type": "int"} ] }''' avtypes = pyavroc.create_types(schema) serializer = pyavroc.AvroSerializer(schema) # this shouldn't raise serializer.serialize(avtypes.Obj(string="pippo", number=1)) # place an int in place of a str u = avtypes.Obj(string=1, number=1) with pytest.raises(TypeError): serializer.serialize(u) # string in place of int u = avtypes.Obj(string="a", number="a") with pytest.raises(TypeError): serializer.serialize(u)
def test_serialize_union(): schema = '["string", "null"]' serializer = pyavroc.AvroSerializer(schema) deserializer = Deserializer(schema) for datum in "foo", u"foo", None: assert deserializer.deserialize(serializer.serialize(datum)) == datum
def test_big(): avtypes = pyavroc.create_types(SCHEMA) serializer = pyavroc.AvroSerializer(SCHEMA) long_str = 'X' * (10 * 1024 * 1024) avro_obj = avtypes.User(name=long_str, office=long_str) serializer.serialize(avro_obj)
def test_exc(): with pytest.raises(TypeError): pyavroc.AvroSerializer(1) with pytest.raises(IOError): pyavroc.AvroSerializer('NOT_A_VALID_JSON')