def test_empty_union(): with pytest.raises(ValueError) as exc: cavro.Schema([]) assert str(exc.value) == 'Unions must contain at least one member type' avro_type = cavro.Schema([], permissive=True) assert isinstance(avro_type.type, cavro.UnionType) assert len(avro_type.type.union_types) == 0
def test_duplicate_item_union(): with pytest.raises(ValueError) as exc: cavro.Schema(['int', 'int']) assert str(exc.value) == "Unions may not have more than one member of type 'int'" avro_type = cavro.Schema(['int', 'int'], permissive=True) assert isinstance(avro_type.type, cavro.UnionType) assert len(avro_type.type.union_types) == 2 assert avro_type.binary_encode(1) == b"\x00\x02"
def test_enum_with_invalid_symbols(): with pytest.raises(ValueError): cavro.Schema({ 'type': 'enum', 'name': 'A', 'symbols': ['one', 2, 'do'] }) schema = cavro.Schema({ 'type': 'enum', 'name': 'A', 'symbols': ['one', 2, 'do'] }, permissive=True) assert schema.binary_encode(2) == b'\x02'
def test_long_json(): schema = cavro.Schema('"long"') assert schema.json_encode(1) == "1" assert schema.json_encode(2**31-1) == "2147483647" assert schema.json_encode(2**63-1) == "9223372036854775807" with pytest.raises(OverflowError): schema.json_encode(2**64)
def test_asdict_nested(): schema = cavro.Schema({ 'type': 'record', 'name': 'A', 'fields': [ { 'name': 'a', 'type': 'int' }, { 'name': 'b', 'type': 'string' }, { 'name': 'c', 'type': ['null', { 'type': 'A' }] }, ] }) rec = schema.type.record(a=1, b='hi', c={'a': 2, 'b': 'hoho', 'c': None}) assert rec._asdict() == { 'a': 1, 'b': 'hi', 'c': { 'a': 2, 'b': 'hoho', 'c': None } } assert rec.c.a == 2 assert isinstance(rec.c, schema.type.record)
def test_map_from_fuzz_1(): schema = cavro.Schema({ "type": "map", "values": [ { "type": "string" }, { "name": "A", "type": "record", "fields": [{ "name": "a", "type": "long" }] }, { "type": "map", "values": "float" }, ] }) assert schema.binary_encode({'x': { 'a': 1 }}) == bytesx(""" 02 // 1 map item 02 x // key = x 02 // union index 1 02 // a = 1 00 // end of map """)
def test_enum_with_invalid_symbol_type(): with pytest.raises(ValueError): cavro.Schema({ 'type': 'enum', 'name': 'A', 'symbols': True })
def test_enum_with_duplicate_symbols(): with pytest.raises(ValueError): cavro.Schema({ 'type': 'enum', 'name': 'A', 'symbols': ['a', 'b', 'b'] })
def test_null_encoding(): schema = cavro.Schema('"null"') assert schema.binary_encode(None) == b'' with pytest.raises(ValueError): schema.binary_encode('d') with pytest.raises(ValueError): schema.binary_encode('')
def test_float_json(): schema = cavro.Schema('{"type": "float"}') encoded = schema.json_encode(3.14159e2) assert isinstance(encoded, str) assert float(encoded) == 314.159 encoded = schema.json_encode(31.4159e30) assert isinstance(encoded, str) assert float(encoded) == 3.14159e+31
def test_avro_schema_tests(schema_text, canonical, fingerprint): schema = cavro.Schema(schema_text, permissive=True) assert schema.canonical_form == canonical for method in ['sha256', 'md5']: assert (schema.fingerprint(method).hexdigest() == hashlib.new( method, canonical.encode()).hexdigest()) if fingerprint: assert schema.fingerprint().value == int(fingerprint)
def test_writing_empty_no_close(monkeypatch): monkeypatch.setattr(uuid, 'uuid4', FakeUUID) buf = BytesIO() sch = cavro.Schema('"int"') writer = cavro.ContainerWriter(buf, sch) del writer assert buf.getvalue( ) == b'Obj\x01\x04\x16avro.schema\n"int"\x14avro.codec\x08null\x00abcdefghijklmnop\x00\x00abcdefghijklmnop'
def test_cannot_write_after_close(): buf = BytesIO() sch = cavro.Schema('"int"') writer = cavro.ContainerWriter(buf, sch) writer.write_one(1) writer.close() with pytest.raises(ValueError): writer.write_one(2)
def main(): for i in range(100): schema_json = make_schema_json(10) try: sch = cavro.Schema(schema_json) except: print(schema_json) raise
def test_writing_one_int(monkeypatch): monkeypatch.setattr(uuid, 'uuid4', FakeUUID) buf = BytesIO() sch = cavro.Schema('"int"') writer = cavro.ContainerWriter(buf, sch) writer.write_one(1) writer.close() assert buf.getvalue() == FakeUUID.HEADER + b'\x02\x02\x02' + FakeUUID.bytes
def test_double_json(): schema = cavro.Schema('"double"') encoded = schema.json_encode(3.14159e2) assert isinstance(encoded, str) assert float(encoded) == 314.159 encoded = schema.json_encode(31.4159e200) assert isinstance(encoded, str) assert float(encoded) == 3.14159e+201
def test_long_overflow(): schema = cavro.Schema('"long"') assert schema.can_encode(2**64) == False with pytest.raises(OverflowError) as exc: schema.binary_encode(2**65) assert "too large" in str(exc.value) with pytest.raises(OverflowError) as exc: schema.binary_encode(-2**65) assert "too large" in str(exc.value)
def test_permissive_null_encoding(): schema = cavro.Schema('"null"', permissive=True) assert schema.binary_encode(None) == b'' assert schema.binary_encode(False) == b'' assert schema.binary_encode(0) == b'' with pytest.raises(ValueError): schema.binary_encode(1) with pytest.raises(ValueError): schema.binary_encode('Hi')
def test_writing_two_ints_context(monkeypatch): monkeypatch.setattr(uuid, 'uuid4', FakeUUID) buf = BytesIO() sch = cavro.Schema('"int"') with cavro.ContainerWriter(buf, sch) as writer: writer.write_one(64) writer.write_one(1) assert buf.getvalue( ) == FakeUUID.HEADER + b'\x04\x06\x80\x01\x02' + FakeUUID.bytes
def test_round_tripping(source_vals, schema): for codec in ['null', 'deflate', 'snappy']: buf = BytesIO() sch = cavro.Schema(schema) with cavro.ContainerWriter(buf, sch, codec) as writer: writer.write_many(source_vals) buf.seek(0) reader = cavro.ContainerReader(buf) obs = list(reader) assert obs == source_vals
def test_record_schema(): schema = cavro.Schema({ 'type': 'record', 'name': 'A', 'fields': [ { 'name': 'a', 'type': 'int' }, ] }) assert isinstance(schema.type, cavro.RecordType) assert len(schema.type.fields) == 1
def test_writing_two_blocks_of_ints(monkeypatch): monkeypatch.setattr(uuid, 'uuid4', FakeUUID) buf = BytesIO() sch = cavro.Schema('"int"') writer = cavro.ContainerWriter(buf, sch, max_blocksize=1) writer.write_one(64) assert buf.getvalue() == FakeUUID.HEADER writer.write_one(1) assert buf.getvalue( ) == FakeUUID.HEADER + b'\x02\x04\x80\x01' + FakeUUID.bytes writer.close() assert buf.getvalue( ) == FakeUUID.HEADER + b'\x02\x04\x80\x01' + FakeUUID.bytes + b'\x02\x02\x02' + FakeUUID.bytes
def main(): schema = cavro.Schema(SCHEMA) for i, value in enumerate([ None, False, 1, 1.1, 'abc', ['a', 'b', 'c'], {'a': 1, 'b': 2}, {'int': 100, 'bool': False, 'string': 'foobar'} ]): with open(f'inputs/{i+1}', 'wb') as fh: fh.write(schema.binary_encode(value))
def main(count): if count < 0: counter = itertools.count() else: counter = range(count) for it in tqdm.tqdm(counter): try: tmp = io.StringIO() sch_json = schema.make_schema_json(5) try: sch = cavro.Schema(sch_json) except: print(f"Exception parsing: {sch_json}") raise try: value = make_value_for_type(sch.type, 5) except: print(f"Exception making value for: {sch_json}") raise try: encoded = sch.binary_encode(value) except: print(sch_json) print(value) raise try: decoded = sch.binary_decode(encoded) except: print(sch_json, value, encoded) raise de_recorded = de_record(decoded) info = [] equal = almost_equal(de_recorded, de_record(value), info) if not equal: print("----------- SCHEMA -------------") print(sch_json) print("\n----------- VALUE ---------------") print(value) print("\n----------- DECODED ---------------") print(decoded) print("\n----------- DECODED DATA ---------------") print(de_recorded) print("\n----------- INFO ---------------") print(info) return except: print(tmp.getvalue()) raise
def test_write_record_from_dict(): schema = cavro.Schema({ 'type': 'record', 'name': 'A', 'fields': [ { 'name': 'a', 'type': 'int' }, { 'name': 'b', 'type': 'long' }, ] }) assert schema.binary_encode({'a': 1, 'b': 2}) == b'\x02\x04'
def test_read_record(): schema = cavro.Schema({ 'type': 'record', 'name': 'A', 'fields': [ { 'name': 'a', 'type': 'int' }, { 'name': 'b', 'type': 'long' }, ] }) assert schema.binary_decode(b'\x02\x04')._asdict() == {'a': 1, 'b': 2}
def test_record_creation(): schema = cavro.Schema({ 'type': 'record', 'name': 'A', 'fields': [ { 'name': 'a', 'type': 'int' }, { 'name': 'b', 'type': 'int', 'default': 123 }, ] }) rec = schema.named_types['A'].record({'a': 1}) assert repr(rec) == '<Record:A {a: 1 b: 123}>'
def main(count): if count < 0: counter = itertools.count() else: counter = range(count) for it in tqdm.tqdm(counter): try: tmp = io.BytesIO() sch_json = schema.make_schema_json(5) sch = cavro.Schema(sch_json) vals = [ values.make_value_for_type(sch.type, 5) for _ in range(randint(0, 2000)) ] with cavro.ContainerWriter(tmp, sch) as writer: writer.write_many(vals) tmp.seek(0) reader = cavro.ContainerReader(tmp) decoded = [values.de_record(v) for v in reader] expected = [values.de_record(v) for v in vals] info = [] equal = values.almost_equal(decoded, expected, info) if not equal: print("----------- SCHEMA -------------") print(sch_json) # print("\n----------- VALUE ---------------") # print(value) # print("\n----------- DECODED ---------------") # print(decoded) # print("\n----------- DECODED DATA ---------------") # print(de_recorded) print("\n----------- INFO ---------------") print(info) return except: print(tmp.getvalue()) raise
def test_map(): schema = cavro.Schema({"type": "map", 'values': 'int'}) assert schema.binary_encode({}) == b'\x00' assert schema.binary_encode({'': 0}) == b'\x02\x00\x00\x00' assert schema.binary_encode({ 'A': 1, 'B': 2 }) == b'\x04\x02A\x02\x02B\x04\x00' assert schema.binary_encode({ 'A': 1, 'B': 2, 'XX': 99999 }) == bytesx(""" 06 // 3 Entries 02 A // String 'B' 02 // Number 1 02 B // String 'B' 04 // Number 2 04 XX // String 'XX' be 9a 0c // Number 99999 00 // End of map """)
def cavro(self): schema = cavro.Schema(SCHEMA) encoded = schema.binary_encode(self.values)