示例#1
0
def make_avro(schema, named_defs):
    names = Names()
    make_avsc_object(FILE_SCHEMA, names)
    for d in named_defs:
        make_avsc_object(d, names)

    avsc = make_avsc_object(fix_file_type(wrap_in_list(schema)), names)
    return avsc
示例#2
0
 def is_backward_compatible(cls, writer_schema_json, reader_schema_json):
     """Whether the data serialized with given writer_schema can be
     deserialized using given reader schema
     """
     writer_schema = schema.make_avsc_object(writer_schema_json)
     reader_schema = schema.make_avsc_object(reader_schema_json)
     resolver = SchemaResolution()
     return resolver.resolve_schema(writer_schema, reader_schema)
示例#3
0
文件: models.py 项目: lowks/rabix
def make_avro(schema, named_defs):
    names = Names()
    make_avsc_object(FILE_SCHEMA, names)
    for d in named_defs:
        make_avsc_object(d, names)

    avsc = make_avsc_object(fix_file_type(wrap_in_list(schema)), names)
    return avsc
示例#4
0
 def is_backward_compatible(cls, writer_schema_json, reader_schema_json):
     """Whether the data serialized with given writer_schema can be
     deserialized using given reader schema
     """
     writer_schema = schema.make_avsc_object(writer_schema_json)
     reader_schema = schema.make_avsc_object(reader_schema_json)
     resolver = SchemaResolution()
     return resolver.resolve_schema(writer_schema, reader_schema)
示例#5
0
    def verify_avro_schema(cls, avro_schema_json):
        """Verify whether the given JSON representation is a valid Avro schema.

        :param avro_schema_json: JSON representation of the Avro schema
        :return: A tuple (is_valid, error) in which the first element
        indicates whether the given JSON is a valid Avro schema, and the
        second element is the error if it is not valid.
        """
        try:
            schema.make_avsc_object(avro_schema_json)
            return True, None
        except Exception as e:
            return False, repr(e)
 def test_union_serialization_invalid(self):
     avro_schema = make_avsc_object(self.UNION_FIELDS_SCHEMA, avro.schema.Names())
     data = {
         "funion_null": "hi"
     }
     serializer = AvroJsonSerializer(avro_schema)
     self.assertRaises(avro.io.AvroTypeException, serializer.to_json, data)
示例#7
0
    def test_dict_with_unicode_bytes(self):
        schema_dict = {
            "namespace": "example.avro",
            "type": "record",
            "name": "WithBytes",
            "fields": [{
                "type": "bytes",
                "name": "fbytes"
            }]
        }

        # byte arrays should be left alone
        byte_data = {
            "fbytes": b"(~^\xfbzoW\x13p\x19!4\x0b+\x00\x00\x0b+\x00\x00"
        }
        avro_schema = make_avsc_object(schema_dict, avro.schema.Names())
        self.assertEquals(
            AvroJsonDeserializer(avro_schema).from_dict(byte_data), byte_data)

        # unicode strings should be turned into iso-8859-1 bytes
        iso8859_data = {'fbytes': b"(~^\xfbzoW\x13p\x19!4\x0b+\x00\x00"}
        unicode_data = {u'fbytes': u'(~^\xfbzoW\x13p\x19!4\x0b+\x00\x00'}
        self.assertEquals(
            AvroJsonDeserializer(avro_schema).from_dict(unicode_data),
            iso8859_data)
    def test_map(self):
        schema_dict = {
            "type": "record",
            "name": "rec",
            "fields": [
                self.FIELD_MAP_INT
            ]
        }
        data = {
            "intmap": {
                "one": 1,
                "two": 2
            }
        }
        unicode_dict = {
            'intmap': {
                'one': 1,
                u'two': 2
            }
        }

        avro_schema = make_avsc_object(schema_dict, avro.schema.Names())
        avro_json = AvroJsonSerializer(avro_schema).to_json(data)

        # Dictionaries are unsorted
        self.assertIn(avro_json, ("""{"intmap":{"one":1,"two":2}}""", """{"intmap":{"two":2,"one":1}}"""))

        deserializer = AvroJsonDeserializer(avro_schema)
        json_data = deserializer.from_json(avro_json)
        self.assertEquals(json_data, data)

        mixed_unicode = deserializer.from_dict(unicode_dict)
        self.assertEquals(mixed_unicode, data)
 def test_user_record(self):
     """
     This schema example is from documentation http://avro.apache.org/docs/1.7.6/gettingstartedpython.html
     """
     schema_dict = {
         "namespace": "example.avro",
         "type": "record",
         "name": "User",
         "fields": [
             {"name": "name", "type": "string"},
             {"name": "favorite_number",  "type": ["int", "null"]},
             {"name": "favorite_color", "type": ["string", "null"]}
         ]
     }
     avro_schema = make_avsc_object(schema_dict, avro.schema.Names())
     serializer = AvroJsonSerializer(avro_schema)
     deserializer = AvroJsonDeserializer(avro_schema)
     alyssa = {"name": "Alyssa", "favorite_number": 256}
     alyssa_full = {"name": "Alyssa", "favorite_number": 256, "favorite_color": None}
     alyssa_json = """{"name":"Alyssa","favorite_number":{"int":256},"favorite_color":null}"""
     self.assertEquals(serializer.to_json(alyssa), alyssa_json)
     self.assertEquals(deserializer.from_json(alyssa_json), alyssa_full)
     ben = {"name": "Ben", "favorite_number": 7, "favorite_color": "red"}
     ben_json = """{"name":"Ben","favorite_number":{"int":7},"favorite_color":{"string":"red"}}"""
     self.assertEquals(serializer.to_json(ben), ben_json)
     self.assertEquals(deserializer.from_json(ben_json), ben)
     lion = {"name": "Lion"}
     lion_full = {"name": "Lion", "favorite_number": None, "favorite_color": None}
     lion_json = """{"name":"Lion","favorite_number":null,"favorite_color":null}"""
     self.assertEquals(serializer.to_json(lion), lion_json)
     self.assertEquals(deserializer.from_json(lion_json), lion_full)
示例#10
0
 def __init__(self, schemadir=".."):
     self.names = schema.Names()
     for sf in self.SCHEMAS:
         print "Loading", sf
         with open(os.path.join(schemadir, sf.lower() + ".av"), "r") as fp:
             obj = json.load(fp)
             s = schema.make_avsc_object(obj, self.names)
示例#11
0
 def test_fails_validation(self):
     avro_schema = make_avsc_object(self.ALL_FIELDS_SCHEMA,
                                    avro.schema.Names())
     data = dict(self.VALID_DATA_ALL_FIELDS)
     data["ffloat"] = "hi"
     serializer = AvroJsonSerializer(avro_schema)
     self.assertRaises(avro.io.AvroTypeException, serializer.to_json, data)
 def test_all_supported_types(self):
     avro_schema = make_avsc_object(self.ALL_FIELDS_SCHEMA, avro.schema.Names())
     data = self.VALID_DATA_ALL_FIELDS
     avro_json = AvroJsonSerializer(avro_schema).to_json(data)
     self.assertEquals(avro_json, """{"fruit":"ORANGE","fint":1,"flong":1,"fstring":"hi there","ffixed":"1234567890123456","frec":{"subfint":2},"funion_null":null,"ffloat":1.0,"fdouble":2.0,"intarr":[1,2,3],"intmap":{"one":1}}""")
     json_data = AvroJsonDeserializer(avro_schema).from_json(avro_json)
     self.assertEquals(json_data, data)
示例#13
0
 def test_union_serialization_not_null(self):
     avro_schema = make_avsc_object(self.UNION_FIELDS_SCHEMA,
                                    avro.schema.Names())
     data = {"funion_null": 1}
     avro_json = AvroJsonSerializer(avro_schema).to_json(data)
     self.assertEquals(avro_json, """{"funion_null":{"int":1}}""")
     json_data = AvroJsonDeserializer(avro_schema).from_json(avro_json)
     self.assertEquals(json_data, data)
示例#14
0
 def __init__(self, schemadir=".."):
     self.names = schema.Names()
     for sf in self.SCHEMAS:
         print "Loading", sf
         sfname = os.path.join(schemadir, sf.lower() + ".av")
         with open(sfname, "r") as fp:
             obj = json.load(fp)
             s = schema.make_avsc_object(obj, self.names)
 def test_union_serialization_not_null(self):
     avro_schema = make_avsc_object(self.UNION_FIELDS_SCHEMA, avro.schema.Names())
     data = {
         "funion_null": 1
     }
     avro_json = AvroJsonSerializer(avro_schema).to_json(data)
     self.assertEquals(avro_json, """{"funion_null":{"int":1}}""")
     json_data = AvroJsonDeserializer(avro_schema).from_json(avro_json)
     self.assertEquals(json_data, data)
示例#16
0
 def test_all_supported_types(self):
     avro_schema = make_avsc_object(self.ALL_FIELDS_SCHEMA,
                                    avro.schema.Names())
     avro_json = AvroJsonSerializer(avro_schema).to_json(
         self.VALID_DATA_ALL_FIELDS)
     self.assertEquals(
         avro_json,
         """{"fint":1,"flong":1,"fstring":"hi there","ffixed":"1234567890123456","frec":{"subfint":2},"funion_null":null,"ffloat":1.0,"fdouble":2.0}"""
     )
示例#17
0
 def _parse_types(self, types, type_names):
     type_objects = []
     for type in types:
         type_object = schema.make_avsc_object(type, type_names)
         if type_object.type not in VALID_TYPE_SCHEMA_TYPES:
             fail_msg = 'Type %s not an enum, fixed, record, or error.' % type
             raise ProtocolParseException(fail_msg)
         type_objects.append(type_object)
     return type_objects
示例#18
0
文件: protocol.py 项目: 10sr/hue
 def _parse_types(self, types, type_names):
   type_objects = []
   for type in types:
     type_object = schema.make_avsc_object(type, type_names)
     if type_object.type not in VALID_TYPE_SCHEMA_TYPES:
       fail_msg = 'Type %s not an enum, fixed, record, or error.' % type
       raise ProtocolParseException(fail_msg)
     type_objects.append(type_object)
   return type_objects
示例#19
0
 def test_array(self):
     schema_dict = {
         "type": "record",
         "name": "rec",
         "fields": [self.FIELD_ARRAY_INT]
     }
     data = {"intarr": [1, 2, 3]}
     avro_schema = make_avsc_object(schema_dict, avro.schema.Names())
     avro_json = AvroJsonSerializer(avro_schema).to_json(data)
     self.assertEquals(avro_json, """{"intarr":[1,2,3]}""")
    def end(self):
        if not self._schema_tracker:
            # this is the top level schema; do the schema validation
            schema_obj = schema.make_avsc_object(self._schema_json)
            self._schema_json = None
            return schema_obj.to_json()

        current_schema_json = self._schema_json
        self._restore_current_schema()
        return current_schema_json
示例#21
0
    def end(self):
        if not self._schema_tracker:
            # this is the top level schema; do the schema validation
            schema_obj = schema.make_avsc_object(self._schema_json)
            self._schema_json = None
            return schema_obj.to_json()

        current_schema_json = self._schema_json
        self._restore_current_schema()
        return current_schema_json
 def test_individually_allowed_fields_separately(self):
     for field in self.INDIVIDUALLY_SERIALIZABLE:
         # unwrap enum, fixed, array, and map but save the name for value lookup
         name = field['name']
         if isinstance(field['type'], dict):
             field = field['type']
         avro_schema = make_avsc_object(field, avro.schema.Names())
         data = self.VALID_DATA_ALL_FIELDS[name]
         avro_json = AvroJsonSerializer(avro_schema).to_json(data)
         json_data = AvroJsonDeserializer(avro_schema).from_json(avro_json)
         self.assertEquals(json_data, data)
示例#23
0
 def test_all_supported_types(self):
     avro_schema = make_avsc_object(self.ALL_FIELDS_SCHEMA,
                                    avro.schema.Names())
     data = self.VALID_DATA_ALL_FIELDS
     avro_json = AvroJsonSerializer(avro_schema).to_json(data)
     self.assertEquals(
         avro_json,
         """{"fruit":"ORANGE","fint":1,"flong":1,"fstring":"hi there","ffixed":"1234567890123456","frec":{"subfint":2},"funion_null":null,"ffloat":1.0,"fdouble":2.0,"intarr":[1,2,3],"intmap":{"one":1}}"""
     )
     json_data = AvroJsonDeserializer(avro_schema).from_json(avro_json)
     self.assertEquals(json_data, data)
示例#24
0
 def test_individually_allowed_fields_separately(self):
     for field in self.INDIVIDUALLY_SERIALIZABLE:
         # unwrap enum, fixed, array, and map but save the name for value lookup
         name = field['name']
         if isinstance(field['type'], dict):
             field = field['type']
         avro_schema = make_avsc_object(field, avro.schema.Names())
         data = self.VALID_DATA_ALL_FIELDS[name]
         avro_json = AvroJsonSerializer(avro_schema).to_json(data)
         json_data = AvroJsonDeserializer(avro_schema).from_json(avro_json)
         self.assertEquals(json_data, data)
示例#25
0
def get_schema():
    # TODO: support multiple type
    schema_json = json.loads(
            open(settings.AVRO_SCHEMA_PATH).read()
    )
    type_names = schema.Names()
    array_schema = None
    # TODO: here we assume the LogArray schema is the last one, which night
    # not be the case
    for avro_type in schema_json["types"]:
        array_schema = schema.make_avsc_object(avro_type, type_names)
    return json.loads(str(array_schema))
示例#26
0
 def test_fixed_ascii(self):
     schema_dict = {
         "namespace": "example.avro",
         "type": "record",
         "name": "WithFixed",
         "fields": [self.FIELD_FIXED]
     }
     data = {"ffixed": b"fixed text here!"}
     avro_schema = make_avsc_object(schema_dict, avro.schema.Names())
     serializer = AvroJsonSerializer(avro_schema)
     json_data = serializer.to_json(data)
     self.assertEquals(json_data, """{"ffixed":"fixed text here!"}""")
    def test_records_union(self):
        avro_schema = make_avsc_object(self.UNION_RECORDS_SCHEMA,
                                       avro.schema.Names())
        data = {"funion_rec": {"field": 1}}
        avro_json = AvroJsonSerializer(avro_schema).to_json(data)
        self.assertEquals(avro_json, """{"funion_rec":{"rec1":{"field":1}}}""")

        data_another_record = {"funion_rec": {"field": "hi"}}
        another_record_json = AvroJsonSerializer(avro_schema).to_json(
            data_another_record)
        self.assertEquals(another_record_json,
                          """{"funion_rec":{"rec2":{"field":"hi"}}}""")
示例#28
0
 def test_unknown_fields_are_ignored(self):
     schema_dict = {
         "type": "record",
         "name": "BasicName",
         "fields": [{
             "type": "string",
             "name": "name"
         }]
     }
     avro_json = """{"name":"todd","age":1}"""
     avro_schema = make_avsc_object(schema_dict, avro.schema.Names())
     json_data = AvroJsonDeserializer(avro_schema).from_json(avro_json)
     self.assertEquals(json_data, {"name": "todd"})
示例#29
0
    def test_map(self):
        schema_dict = {
            "type": "record",
            "name": "rec",
            "fields": [self.FIELD_MAP_INT]
        }
        data = {"intmap": {"one": 1, "two": 2}}
        avro_schema = make_avsc_object(schema_dict, avro.schema.Names())
        avro_json = AvroJsonSerializer(avro_schema).to_json(data)

        # Dictionaries are unsorted
        self.assertIn(avro_json, ("""{"intmap":{"one":1,"two":2}}""",
                                  """{"intmap":{"two":2,"one":1}}"""))
 def test_unknown_fields_are_ignored(self):
     schema_dict = {
         "type": "record",
         "name": "BasicName",
         "fields": [
             {
                 "type": "string",
                 "name": "name"
             }
         ]
     }
     avro_json = """{"name":"todd","age":1}"""
     avro_schema = make_avsc_object(schema_dict, avro.schema.Names())
     json_data = AvroJsonDeserializer(avro_schema).from_json(avro_json)
     self.assertEquals(json_data, {"name": "todd"})
示例#31
0
 def test_fixed_non_ascii(self):
     schema_dict = {
         "namespace": "example.avro",
         "type": "record",
         "name": "WithFixed",
         "fields": [self.FIELD_FIXED]
     }
     data = {"ffixed": b"(~^\xfbzoW\x13p\x19!4\x0b+\x00\x00"}
     avro_schema = make_avsc_object(schema_dict, avro.schema.Names())
     serializer = AvroJsonSerializer(avro_schema)
     json_data = serializer.to_json(data)
     self.assertEquals(
         json_data,
         """{"ffixed":"(~^\\u00fbzoW\\u0013p\\u0019!4\\u000b+\\u0000\\u0000"}"""
     )
示例#32
0
    def test_nested_union_records(self):
        schema_dict = {
            "namespace":
            "nested",
            "name":
            "OuterType",
            "type":
            "record",
            "fields": [{
                "name":
                "outer",
                "type": [
                    "null", {
                        "name":
                        "MiddleType",
                        "type":
                        "record",
                        "fields": [{
                            "name":
                            "middle",
                            "type": [
                                "null", {
                                    "name": "InnerType",
                                    "type": "record",
                                    "fields": [{
                                        "name": "inner",
                                        "type": "int"
                                    }]
                                }
                            ]
                        }]
                    }
                ]
            }]
        }
        data1 = {"outer": {"middle": {"inner": 1}}}
        data2 = {"outer": {"middle": None}}
        avro1 = """{"outer":{"nested.MiddleType":{"middle":{"nested.InnerType":{"inner":1}}}}}"""
        avro2 = """{"outer":{"nested.MiddleType":{"middle":null}}}"""

        avro_schema = make_avsc_object(schema_dict, avro.schema.Names())
        serializer = AvroJsonSerializer(avro_schema)
        self.assertEquals(serializer.to_json(data1), avro1)
        self.assertEquals(serializer.to_json(data2), avro2)

        deserializer = AvroJsonDeserializer(avro_schema)
        self.assertEquals(deserializer.from_json(avro1), data1)
        self.assertEquals(deserializer.from_json(avro2), data2)
示例#33
0
    def _create_schema_elements_from_json(cls, avro_schema_json):
        avro_schema_obj = schema.make_avsc_object(avro_schema_json)
        schema_elements = []
        schema_elements_queue = deque([(avro_schema_obj, None)])
        while schema_elements_queue:
            schema_obj, parent_key = schema_elements_queue.popleft()
            element_cls = _schema_to_element_map.get(schema_obj.__class__)
            if not element_cls:
                continue

            _schema_element = element_cls(schema_obj, parent_key)
            schema_elements.append((_schema_element, schema_obj))
            parent_key = _schema_element.key
            for nested_schema in _schema_element.nested_schema_objects:
                schema_elements_queue.append((nested_schema, parent_key))
        return schema_elements
 def test_fixed_ascii(self):
     schema_dict = {
         "namespace": "example.avro",
         "type": "record",
         "name": "WithFixed",
         "fields": [
             self.FIELD_FIXED
         ]
     }
     data = {"ffixed": b"fixed text here!"}
     avro_schema = make_avsc_object(schema_dict, avro.schema.Names())
     serializer = AvroJsonSerializer(avro_schema)
     avro_json = serializer.to_json(data)
     self.assertEquals(avro_json, """{"ffixed":"fixed text here!"}""")
     json_data = AvroJsonDeserializer(avro_schema).from_json(avro_json)
     self.assertEquals(json_data, data)
 def test_fixed_non_ascii(self):
     schema_dict = {
         "namespace": "example.avro",
         "type": "record",
         "name": "WithFixed",
         "fields": [
             self.FIELD_FIXED
         ]
     }
     data = {"ffixed": b"(~^\xfbzoW\x13p\x19!4\x0b+\x00\x00"}
     avro_schema = make_avsc_object(schema_dict, avro.schema.Names())
     serializer = AvroJsonSerializer(avro_schema)
     avro_json = serializer.to_json(data)
     self.assertEquals(avro_json, """{"ffixed":"(~^\\u00fbzoW\\u0013p\\u0019!4\\u000b+\\u0000\\u0000"}""")
     json_data = AvroJsonDeserializer(avro_schema).from_json(avro_json)
     self.assertEquals(json_data, data)
 def test_array(self):
     schema_dict = {
         "type": "record",
         "name": "rec",
         "fields": [
             self.FIELD_ARRAY_INT
         ]
     }
     data = {
         "intarr": [1, 2, 3]
     }
     avro_schema = make_avsc_object(schema_dict, avro.schema.Names())
     avro_json = AvroJsonSerializer(avro_schema).to_json(data)
     self.assertEquals(avro_json, """{"intarr":[1,2,3]}""")
     json_data = AvroJsonDeserializer(avro_schema).from_json(avro_json)
     self.assertEquals(json_data, data)
示例#37
0
 def test_bytes_field_ascii(self):
     schema_dict = {
         "namespace": "example.avro",
         "type": "record",
         "name": "WithFixed",
         "fields": [{
             "type": "bytes",
             "name": "fbytes"
         }]
     }
     data = {"fbytes": b"this is some long bytes field"}
     avro_schema = make_avsc_object(schema_dict, avro.schema.Names())
     serializer = AvroJsonSerializer(avro_schema)
     json_data = serializer.to_json(data)
     self.assertEquals(json_data,
                       """{"fbytes":"this is some long bytes field"}""")
示例#38
0
 def test_user_record(self):
     """
     This schema example is from documentation http://avro.apache.org/docs/1.7.6/gettingstartedpython.html
     """
     schema_dict = {
         "namespace":
         "example.avro",
         "type":
         "record",
         "name":
         "User",
         "fields": [{
             "name": "name",
             "type": "string"
         }, {
             "name": "favorite_number",
             "type": ["int", "null"]
         }, {
             "name": "favorite_color",
             "type": ["string", "null"]
         }]
     }
     avro_schema = make_avsc_object(schema_dict, avro.schema.Names())
     serializer = AvroJsonSerializer(avro_schema)
     deserializer = AvroJsonDeserializer(avro_schema)
     alyssa = {"name": "Alyssa", "favorite_number": 256}
     alyssa_full = {
         "name": "Alyssa",
         "favorite_number": 256,
         "favorite_color": None
     }
     alyssa_json = """{"name":"Alyssa","favorite_number":{"int":256},"favorite_color":null}"""
     self.assertEquals(serializer.to_json(alyssa), alyssa_json)
     self.assertEquals(deserializer.from_json(alyssa_json), alyssa_full)
     ben = {"name": "Ben", "favorite_number": 7, "favorite_color": "red"}
     ben_json = """{"name":"Ben","favorite_number":{"int":7},"favorite_color":{"string":"red"}}"""
     self.assertEquals(serializer.to_json(ben), ben_json)
     self.assertEquals(deserializer.from_json(ben_json), ben)
     lion = {"name": "Lion"}
     lion_full = {
         "name": "Lion",
         "favorite_number": None,
         "favorite_color": None
     }
     lion_json = """{"name":"Lion","favorite_number":null,"favorite_color":null}"""
     self.assertEquals(serializer.to_json(lion), lion_json)
     self.assertEquals(deserializer.from_json(lion_json), lion_full)
 def test_bytes_field_ascii(self):
     schema_dict = {
         "namespace": "example.avro",
         "type": "record",
         "name": "WithFixed",
         "fields": [
             {
                 "type": "bytes",
                 "name": "fbytes"
             }
         ]
     }
     data = {"fbytes": b"this is some long bytes field"}
     avro_schema = make_avsc_object(schema_dict, avro.schema.Names())
     serializer = AvroJsonSerializer(avro_schema)
     avro_json = serializer.to_json(data)
     self.assertEquals(avro_json, """{"fbytes":"this is some long bytes field"}""")
     json_data = AvroJsonDeserializer(avro_schema).from_json(avro_json)
     self.assertEquals(json_data, data)
示例#40
0
def load_schema(sf):
    dirs = [os.path.dirname(sys.modules[__name__].__file__),
            os.path.join(os.path.dirname(sys.modules[__name__].__file__),
                         os.path.join(os.pardir, os.pardir)),
            sys.prefix]
    #print dirs
    obj = None
    for d in dirs:
        try:
            path = os.path.join(d, 'schema', sf.lower() + '.av')
            obj = json.load(open(path, 'r'))
            break
        except:
            pass
    if obj == None:
        raise Exception('Cannot load schema: ' + sf.lower())

    s = schema.make_avsc_object(obj, SCHEMA_NAMES)
    return s
 def test_missing_nullable_field(self):
     schema_dict = {
         "type": "record",
         "name": "WithDefault",
         "fields": [
             {
                 "type": "string",
                 "name": "name"
             },
             {
                 "type": ["null", "int"],
                 "name": "version",
                 "default": None
             }
         ]
     }
     avro_json = """{"name":"mcnameface"}"""
     avro_schema = make_avsc_object(schema_dict, avro.schema.Names())
     deserializer = AvroJsonDeserializer(avro_schema)
     self.assertRaises(avro.io.AvroTypeException, deserializer.from_json, avro_json)
示例#42
0
 def test_missing_nullable_field(self):
     schema_dict = {
         "type":
         "record",
         "name":
         "WithDefault",
         "fields": [{
             "type": "string",
             "name": "name"
         }, {
             "type": ["null", "int"],
             "name": "version",
             "default": None
         }]
     }
     avro_json = """{"name":"mcnameface"}"""
     avro_schema = make_avsc_object(schema_dict, avro.schema.Names())
     deserializer = AvroJsonDeserializer(avro_schema)
     self.assertRaises(avro.io.AvroTypeException, deserializer.from_json,
                       avro_json)
示例#43
0
 def test_bytes_field_non_ascii(self):
     schema_dict = {
         "namespace": "example.avro",
         "type": "record",
         "name": "WithFixed",
         "fields": [{
             "type": "bytes",
             "name": "fbytes"
         }]
     }
     data = {"fbytes": b"(~^\xfbzoW\x13p\x19!4\x0b+\x00\x00\x0b+\x00\x00"}
     avro_schema = make_avsc_object(schema_dict, avro.schema.Names())
     serializer = AvroJsonSerializer(avro_schema)
     avro_json = serializer.to_json(data)
     self.assertEquals(
         avro_json,
         """{"fbytes":"(~^\\u00fbzoW\\u0013p\\u0019!4\\u000b+\\u0000\\u0000\\u000b+\\u0000\\u0000"}"""
     )
     json_data = AvroJsonDeserializer(avro_schema).from_json(avro_json)
     self.assertEquals(json_data, data)
    def convert(self, src_schema):
        """The src_schema is the Avro schema json object. It returns the
        SQLTable object that represents the Redshift table schema.

        Note that Redshift does not support unsigned column type. For now,
        `unsigned` metadata will be ignored and a signed column type will
        be used instead.
        """
        # TODO[clin|DATAPIPE-101] adding sortkey/distkey

        if not src_schema:
            return None
        try:
            avro_record = schema.make_avsc_object(src_schema)
        except:
            raise SchemaConversionException('Invalid Avro record schema.')

        if not self._is_record_schema(avro_record):
            raise SchemaConversionException('Invalid Avro record schema.')

        return self._create_redshift_table(avro_record)
    def convert(self, src_schema):
        """The src_schema is the Avro schema json object. It returns the
        SQLTable object that represents the Redshift table schema.

        Note that Redshift does not support unsigned column type. For now,
        `unsigned` metadata will be ignored and a signed column type will
        be used instead.
        """
        # TODO[clin|DATAPIPE-101] adding sortkey/distkey

        if not src_schema:
            return None
        try:
            avro_record = schema.make_avsc_object(src_schema)
        except:
            raise SchemaConversionException('Invalid Avro record schema.')

        if not self._is_record_schema(avro_record):
            raise SchemaConversionException('Invalid Avro record schema.')

        return self._create_redshift_table(avro_record)
    def test_records_union(self):
        avro_schema = make_avsc_object(self.UNION_RECORDS_SCHEMA, avro.schema.Names())
        data = {
            "funion_rec": {
                "field": 1
            }
        }
        avro_json = AvroJsonSerializer(avro_schema).to_json(data)
        self.assertEquals(avro_json, """{"funion_rec":{"rec1":{"field":1}}}""")
        json_data = AvroJsonDeserializer(avro_schema).from_json(avro_json)
        self.assertEquals(json_data, data)

        data_another_record = {
            "funion_rec": {
                "field": "hi"
            }
        }
        another_record_json = AvroJsonSerializer(avro_schema).to_json(data_another_record)
        self.assertEquals(another_record_json, """{"funion_rec":{"example.avro.rec2":{"field":"hi"}}}""")
        another_json_data = AvroJsonDeserializer(avro_schema).from_json(another_record_json)
        self.assertEquals(another_json_data, data_another_record)
    def test_dict_with_unicode_bytes(self):
        schema_dict = {
            "namespace": "example.avro",
            "type": "record",
            "name": "WithBytes",
            "fields": [
                {
                    "type": "bytes",
                    "name": "fbytes"
                }
            ]
        }

        # byte arrays should be left alone
        byte_data = {"fbytes": b"(~^\xfbzoW\x13p\x19!4\x0b+\x00\x00\x0b+\x00\x00"}
        avro_schema = make_avsc_object(schema_dict, avro.schema.Names())
        self.assertEquals(AvroJsonDeserializer(avro_schema).from_dict(byte_data), byte_data)

        # unicode strings should be turned into iso-8859-1 bytes
        iso8859_data = {'fbytes': b"(~^\xfbzoW\x13p\x19!4\x0b+\x00\x00"}
        unicode_data = {u'fbytes': u'(~^\xfbzoW\x13p\x19!4\x0b+\x00\x00'}
        self.assertEquals(AvroJsonDeserializer(avro_schema).from_dict(unicode_data), iso8859_data)
示例#48
0
    def test_map(self):
        schema_dict = {
            "type": "record",
            "name": "rec",
            "fields": [self.FIELD_MAP_INT]
        }
        data = {"intmap": {"one": 1, "two": 2}}
        unicode_dict = {'intmap': {'one': 1, u'two': 2}}

        avro_schema = make_avsc_object(schema_dict, avro.schema.Names())
        avro_json = AvroJsonSerializer(avro_schema).to_json(data)

        # Dictionaries are unsorted
        self.assertIn(avro_json, ("""{"intmap":{"one":1,"two":2}}""",
                                  """{"intmap":{"two":2,"one":1}}"""))

        deserializer = AvroJsonDeserializer(avro_schema)
        json_data = deserializer.from_json(avro_json)
        self.assertEquals(json_data, data)

        mixed_unicode = deserializer.from_dict(unicode_dict)
        self.assertEquals(mixed_unicode, data)
示例#49
0
    def avro_schema_with_doc(self):
        """Get the JSON representation of the Avro schema with the
        documentation and element Id of each doc-eligible element.
        """
        key_to_element_map = dict(
            (o.key, o) for o in self.avro_schema_elements
        )
        avro_schema_obj = schema.make_avsc_object(self.avro_schema_json)

        schema_elements = deque([(avro_schema_obj, None)])
        while len(schema_elements) > 0:
            schema_obj, parent_key = schema_elements.popleft()
            element_cls = _schema_to_element_map.get(schema_obj.__class__)
            if not element_cls:
                continue
            _schema_element = element_cls(schema_obj, parent_key)
            self._add_doc_to_schema(_schema_element, key_to_element_map)

            parent_key = _schema_element.key
            for nested_schema in _schema_element.nested_schema_objects:
                schema_elements.append((nested_schema, parent_key))

        return avro_schema_obj.to_json()
    def test_nested_union_records(self):
        schema_dict = {
            "namespace": "nested",
            "name": "OuterType",
            "type": "record",
            "fields": [{
                "name": "outer",
                "type": ["null", {
                    "name": "MiddleType",
                    "type": "record",
                    "fields": [{
                        "name": "middle",
                        "type": ["null", {
                            "name": "InnerType",
                            "type": "record",
                            "fields": [{
                                "name": "inner",
                                "type": "int"
                            }]
                        }]
                    }]
                }]
            }]
        }
        data1 = {"outer": {"middle": {"inner": 1}}}
        data2 = {"outer": {"middle": None}}
        avro1 = """{"outer":{"nested.MiddleType":{"middle":{"nested.InnerType":{"inner":1}}}}}"""
        avro2 = """{"outer":{"nested.MiddleType":{"middle":null}}}"""

        avro_schema = make_avsc_object(schema_dict, avro.schema.Names())
        serializer = AvroJsonSerializer(avro_schema)
        self.assertEquals(serializer.to_json(data1), avro1)
        self.assertEquals(serializer.to_json(data2), avro2)

        deserializer = AvroJsonDeserializer(avro_schema)
        self.assertEquals(deserializer.from_json(avro1), data1)
        self.assertEquals(deserializer.from_json(avro2), data2)
 def test_fails_validation(self):
     avro_schema = make_avsc_object(self.ALL_FIELDS_SCHEMA, avro.schema.Names())
     data = dict(self.VALID_DATA_ALL_FIELDS)
     data["ffloat"] = "hi"
     serializer = AvroJsonSerializer(avro_schema)
     self.assertRaises(avro.io.AvroTypeException, serializer.to_json, data)
示例#52
0
 def test_union_serialization_invalid(self):
     avro_schema = make_avsc_object(self.UNION_FIELDS_SCHEMA,
                                    avro.schema.Names())
     data = {"funion_null": "hi"}
     serializer = AvroJsonSerializer(avro_schema)
     self.assertRaises(avro.io.AvroTypeException, serializer.to_json, data)
示例#53
0
 def test_union_serialization_null(self):
     avro_schema = make_avsc_object(self.UNION_FIELDS_SCHEMA,
                                    avro.schema.Names())
     data = {"funion_null": None}
     avro_json = AvroJsonSerializer(avro_schema).to_json(data)
     self.assertEquals(avro_json, """{"funion_null":null}""")
示例#54
0
 def _parse_errors(self, errors, names):
   if not isinstance(errors, list):
     fail_msg = 'Errors property not a list: %s' % errors
     raise ProtocolParseException(fail_msg)
   return schema.make_avsc_object(errors, names)
示例#55
0
文件: protocol.py 项目: 10sr/hue
 def _parse_response(self, response, names):
   if isinstance(response, basestring) and names.has_name(response, None):
     return names.get_name(response, None)
   else:
     return schema.make_avsc_object(response, names)
示例#56
0
文件: protocol.py 项目: 10sr/hue
 def _parse_errors(self, errors, names):
   if not isinstance(errors, list):
     fail_msg = 'Errors property not a list: %s' % errors
     raise ProtocolParseException(fail_msg)
   errors_for_parsing = {'type': 'error_union', 'declared_errors': errors}
   return schema.make_avsc_object(errors_for_parsing, names)