Пример #1
0
def is_avro(path_or_buffer):
    """Return True if path (or buffer) points to an Avro file.

    Paramaters
    ----------
    path_or_buffer: path to file or file line object
        Path to file
    """
    if is_str(path_or_buffer):
        fp = open(path_or_buffer, 'rb')
        close = True
    else:
        fp = path_or_buffer
        close = False

    try:
        header = fp.read(len(MAGIC))
        return header == MAGIC
    finally:
        if close:
            fp.close()
Пример #2
0
def validate(datum, schema):
    """Determine if a python datum is an instance of a schema."""
    record_type = extract_record_type(schema)

    if record_type == 'null':
        return datum is None

    if record_type == 'boolean':
        return isinstance(datum, bool)

    if record_type == 'string':
        return is_str(datum)

    if record_type == 'bytes':
        return isinstance(datum, (bytes, decimal.Decimal))

    if record_type == 'int':
        return ((isinstance(datum, (
            int,
            long,
        )) and INT_MIN_VALUE <= datum <= INT_MAX_VALUE)
                or isinstance(datum, (datetime.time, datetime.datetime)))

    if record_type == 'long':
        return ((isinstance(datum, (
            int,
            long,
        )) and INT_MIN_VALUE <= datum <= INT_MAX_VALUE)
                or isinstance(datum, (datetime.time, datetime.datetime)))

    if record_type in ['float', 'double']:
        return isinstance(datum, (int, long, float))

    if record_type == 'fixed':
        return isinstance(datum, bytes) and len(datum) == schema['size']

    if record_type == 'union':
        if isinstance(datum, tuple):
            (name, datum) = datum
            for candidate in schema:
                if extract_record_type(candidate) == 'record':
                    if name == candidate["name"]:
                        return validate(datum, candidate)
            else:
                return False
        return any(validate(datum, s) for s in schema)

    # dict-y types from here on.
    if record_type == 'enum':
        return datum in schema['symbols']

    if record_type == 'array':
        return (isinstance(datum, Iterable)
                and all(validate(d, schema['items']) for d in datum))

    if record_type == 'map':
        return (isinstance(datum, Mapping)
                and all(is_str(k) for k in datum.keys())
                and all(validate(v, schema['values']) for v in datum.values()))

    if record_type in (
            'record',
            'error',
            'request',
    ):
        return (isinstance(datum, Mapping) and all(
            validate(datum.get(f['name'], f.get('default')), f['type'])
            for f in schema['fields']))

    if record_type in SCHEMA_DEFS:
        return validate(datum, SCHEMA_DEFS[record_type])

    raise ValueError('unkown record type - %s' % record_type)
Пример #3
0
def validate(datum, schema):
    """Determine if a python datum is an instance of a schema."""

    record_type = extract_record_type(schema)

    if record_type == 'null':
        return datum is None

    if record_type == 'boolean':
        return isinstance(datum, bool)

    if record_type == 'string':
        return is_str(datum)

    if record_type == 'bytes':
        return isinstance(datum, bytes)

    if record_type == 'int':
        return (
            isinstance(datum, (int, long,)) and
            INT_MIN_VALUE <= datum <= INT_MAX_VALUE
        )

    if record_type == 'long':
        return (
            isinstance(datum, (int, long,)) and
            LONG_MIN_VALUE <= datum <= LONG_MAX_VALUE
        )

    if record_type in ['float', 'double']:
        return isinstance(datum, (int, long, float))

    if record_type == 'fixed':
        return isinstance(datum, bytes) and len(datum) == schema['size']

    if record_type == 'union':
        return any(validate(datum, s) for s in schema)

    # dict-y types from here on.
    if record_type == 'enum':
        return datum in schema['symbols']

    if record_type == 'array':
        return (
            isinstance(datum, Iterable) and
            all(validate(d, schema['items']) for d in datum)
        )

    if record_type == 'map':
        return (
            isinstance(datum, Mapping) and
            all(is_str(k) for k in datum.keys()) and
            all(validate(v, schema['values']) for v in datum.values())
        )

    if record_type in ('record', 'error', 'request',):
        return (
            isinstance(datum, Mapping) and
            all(
                validate(datum.get(f['name'], f.get('default')), f['type'])
                for f in schema['fields']
            )
        )

    if record_type in SCHEMA_DEFS:
        return validate(datum, SCHEMA_DEFS[record_type])

    raise ValueError('unkown record type - %s' % record_type)