def is_avro(path_or_buffer): """Return True if path (or buffer) points to an Avro file. Paramaters ---------- path_or_buffer: path to file or file line object Path to file """ if is_str(path_or_buffer): fp = open(path_or_buffer, 'rb') close = True else: fp = path_or_buffer close = False try: header = fp.read(len(MAGIC)) return header == MAGIC finally: if close: fp.close()
def validate(datum, schema): """Determine if a python datum is an instance of a schema.""" record_type = extract_record_type(schema) if record_type == 'null': return datum is None if record_type == 'boolean': return isinstance(datum, bool) if record_type == 'string': return is_str(datum) if record_type == 'bytes': return isinstance(datum, (bytes, decimal.Decimal)) if record_type == 'int': return ((isinstance(datum, ( int, long, )) and INT_MIN_VALUE <= datum <= INT_MAX_VALUE) or isinstance(datum, (datetime.time, datetime.datetime))) if record_type == 'long': return ((isinstance(datum, ( int, long, )) and INT_MIN_VALUE <= datum <= INT_MAX_VALUE) or isinstance(datum, (datetime.time, datetime.datetime))) if record_type in ['float', 'double']: return isinstance(datum, (int, long, float)) if record_type == 'fixed': return isinstance(datum, bytes) and len(datum) == schema['size'] if record_type == 'union': if isinstance(datum, tuple): (name, datum) = datum for candidate in schema: if extract_record_type(candidate) == 'record': if name == candidate["name"]: return validate(datum, candidate) else: return False return any(validate(datum, s) for s in schema) # dict-y types from here on. if record_type == 'enum': return datum in schema['symbols'] if record_type == 'array': return (isinstance(datum, Iterable) and all(validate(d, schema['items']) for d in datum)) if record_type == 'map': return (isinstance(datum, Mapping) and all(is_str(k) for k in datum.keys()) and all(validate(v, schema['values']) for v in datum.values())) if record_type in ( 'record', 'error', 'request', ): return (isinstance(datum, Mapping) and all( validate(datum.get(f['name'], f.get('default')), f['type']) for f in schema['fields'])) if record_type in SCHEMA_DEFS: return validate(datum, SCHEMA_DEFS[record_type]) raise ValueError('unkown record type - %s' % record_type)
def validate(datum, schema): """Determine if a python datum is an instance of a schema.""" record_type = extract_record_type(schema) if record_type == 'null': return datum is None if record_type == 'boolean': return isinstance(datum, bool) if record_type == 'string': return is_str(datum) if record_type == 'bytes': return isinstance(datum, bytes) if record_type == 'int': return ( isinstance(datum, (int, long,)) and INT_MIN_VALUE <= datum <= INT_MAX_VALUE ) if record_type == 'long': return ( isinstance(datum, (int, long,)) and LONG_MIN_VALUE <= datum <= LONG_MAX_VALUE ) if record_type in ['float', 'double']: return isinstance(datum, (int, long, float)) if record_type == 'fixed': return isinstance(datum, bytes) and len(datum) == schema['size'] if record_type == 'union': return any(validate(datum, s) for s in schema) # dict-y types from here on. if record_type == 'enum': return datum in schema['symbols'] if record_type == 'array': return ( isinstance(datum, Iterable) and all(validate(d, schema['items']) for d in datum) ) if record_type == 'map': return ( isinstance(datum, Mapping) and all(is_str(k) for k in datum.keys()) and all(validate(v, schema['values']) for v in datum.values()) ) if record_type in ('record', 'error', 'request',): return ( isinstance(datum, Mapping) and all( validate(datum.get(f['name'], f.get('default')), f['type']) for f in schema['fields'] ) ) if record_type in SCHEMA_DEFS: return validate(datum, SCHEMA_DEFS[record_type]) raise ValueError('unkown record type - %s' % record_type)