示例#1
0
def test_serialize_bad_missing_required_field():
    # Field "name" is required but missing
    user = {
        'favorite_number': "Not an int",
        'favorite_color': 'Nonyabusiness',
    }
    with pytest.raises(DatumTypeException):
        serialize(USER_SCHEMA, [user])
示例#2
0
def test_serialize_bad_type_for_field():
    user = {
        'name': 'Foo Bar Matic',
        'favorite_number': "Not an int",
        'favorite_color': 'Nonyabusiness',
    }
    with pytest.raises(DatumTypeException):
        serialize(USER_SCHEMA, [user])
示例#3
0
def test_serialize_and_deserialize():
    user = {
        'name': 'Foo Bar Matic',
        'favorite_number': 24,
        'favorite_color': 'Nonyabusiness',
    }

    avro_blob = serialize(USER_SCHEMA, [user])
    (test_meta, test_generator) = deserialize(avro_blob, decode_schema=True)
    assert isinstance(test_meta, dict)
    assert isinstance(test_generator, types.GeneratorType)
    assert isinstance(test_meta['avro.schema'], dict)

    test_schema = test_meta['avro.schema']
    assert test_schema['name'] == '.'.join(
        [USER_SCHEMA['namespace'], USER_SCHEMA['name']])
    assert test_schema['fields'] == USER_SCHEMA['fields']

    test_records = [value for value in test_generator]
    assert len(test_records) == 1
    assert test_records[0] == user

    # Ensure serialization / deserialization via generators
    # works as expected.
    test_buffer = BytesIO(avro_blob)
    test_meta, test_generator = deserialize(test_buffer)
    assert isinstance(test_meta['avro.schema'], string_types)
    assert isinstance(test_generator, types.GeneratorType)

    # Reform the generator by re-encoding the original, passed
    # as part of a batch.
    test_generator_blob = serialize(USER_SCHEMA, [test_generator])
    _, test_generator = deserialize(test_generator_blob)

    # Ensure the expected data survived.
    test_records = [value for value in test_generator]
    assert len(test_records) == 1
    assert test_records[0] == user
示例#4
0
def test_serialize_bad_schema():
    schema = {
        # Name is missing
        "type":
        "record",
        "fields": [{
            "name": "name",
            "type": "string"
        }, {
            "name": "favorite_number",
            "type": ["int", "null"]
        }, {
            "name": "favorite_color",
            "type": ["string", "null"]
        }]
    }
    user = {
        'name': 'Foo Bar Matic',
        'favorite_number': 24,
        'favorite_color': 'Nonyabusiness',
    }
    with pytest.raises(SchemaParseException):
        serialize(schema, [user])
示例#5
0
def test_serialize_with_metadata():
    metadata = {
        'foo.bar': 10,
        'foo.baz': 'foomatic',
    }
    user = {
        'name': 'Foo Bar Matic',
        'favorite_number': 24,
        'favorite_color': 'Nonyabusiness',
    }

    avro_blob = serialize(USER_SCHEMA, [user], **metadata)
    (test_meta, test_records) = deserialize(avro_blob)

    for k, v, in metadata.items():
        assert test_meta[k] == str(metadata[k])
示例#6
0
def test_serialize_and_deserialize_with_reader_schema():
    book = {
        'title':
        'Nineteen Eighty-Four',
        'first_sentence':
        'It was a bright cold day in April, and the clocks were striking thirteen.'
    }

    book_read_1 = {
        'title': 'Nineteen Eighty-Four',
        'first_sentence':
        'It was a bright cold day in April, and the clocks were striking thirteen.',
        'pages': None
    }

    book_read_2 = {
        'first_sentence':
        'It was a bright cold day in April, and the clocks were striking thirteen.'
    }

    avro_blob = serialize(BOOK_SCHEMA_WRITE, [book])

    (test_meta, test_generator) = deserialize(avro_blob,
                                              decode_schema=True,
                                              reader_schema=BOOK_SCHEMA_READ_1)
    assert isinstance(test_generator, types.GeneratorType)
    test_records = [value for value in test_generator]
    assert len(test_records) == 1
    assert test_records[0] == book_read_1

    (test_meta, test_generator) = deserialize(avro_blob,
                                              decode_schema=True,
                                              reader_schema=BOOK_SCHEMA_READ_2)
    assert isinstance(test_generator, types.GeneratorType)
    test_records = [value for value in test_generator]
    assert len(test_records) == 1
    assert test_records[0] == book_read_2

    # test read with incompatible schema
    (test_meta, test_generator) = deserialize(avro_blob,
                                              decode_schema=True,
                                              reader_schema=USER_SCHEMA)
    assert isinstance(test_generator, types.GeneratorType)
    with pytest.raises(SchemaResolutionException):
        test_records = [value for value in test_generator]
示例#7
0
def test_logical_types():
    from datetime import datetime
    from pytz import timezone

    event = {
        'ts': datetime.utcnow().replace(tzinfo=timezone('UTC')),
        'customer_uuid': 'some_random_uuid',
        'decimal_bytes': Decimal("-2.90"),
        'decimal_fixed': Decimal("3.68"),
    }

    avro_blob = serialize(TEST_SCHEMA_LOGICAL_TYPES, [event])

    (test_meta, test_generator) = deserialize(avro_blob, decode_schema=True)
    assert isinstance(test_generator, types.GeneratorType)
    test_records = [value for value in test_generator]
    assert len(test_records) == 1
    assert test_records[0] == event
示例#8
0
def test_serialize(ephemeral, schema):
    user = {
        'name': 'Foo Bar Matic',
        'favorite_number': 24,
        'favorite_color': 'Nonyabusiness',
    }

    avro_blob = serialize(schema, [user], ephemeral_storage=ephemeral)
    buf = BytesIO()
    buf.write(avro_blob)
    buf.seek(0)

    read = reader(buf)
    meta = read.metadata
    value = meta.get('postmates.storage.ephemeral', None)
    assert value == ('1' if ephemeral else None)
    records = [r for r in read]
    assert records == [user]
示例#9
0
    def publish(self, schema_map, batch, ephemeral_storage=False, **kwargs):
        """
            Publishes a batch of records corresponding to the given schema.

            Args:
                schema_map: dict - Avro schema defintion.
                batch: list - List of Avro records (as dicts).

            Kwargs:
                ephemeral_storage: bool - Flag to indicate whether the batch
                                          should be stored long-term.
                Others  are version specific options.  See extending object.
        """
        if not batch:
            raise EmptyBatchException()

        blob = serialize(schema_map, batch, ephemeral_storage)
        self.publish_blob(blob, **kwargs)
示例#10
0
def test_serialize_bad_datum_empty():
    user = {}
    with pytest.raises(DatumTypeException):
        serialize(USER_SCHEMA, [user])