def __init__(self, fo, reader_schema=None): """Creates a new iterator Paramaters ---------- fo: file like Input stream reader_schema: dict, optional Reader schema Example ------- >>> with open('some-file.avro', 'rb') as fo: >>> avro = iter_avro(fo) >>> schema = avro.schema >>> for record in avro: >>> process_record(record) """ self.fo = fo try: self._header = read_data(fo, HEADER_SCHEMA) except StopIteration: raise ValueError('cannot read header - is it an avro file?') # `meta` values are bytes. So, the actual decoding has to be external. self.metadata = \ dict((k, btou(v)) for k, v in iteritems(self._header['meta'])) self.schema = self.writer_schema = \ json.loads(self.metadata['avro.schema']) self.codec = self.metadata.get('avro.codec', 'null') self.reader_schema = reader_schema acquaint_schema(self.writer_schema, READERS) if reader_schema: populate_schema_defs(reader_schema, SCHEMA_DEFS) self._records = _iter_avro(fo, self._header, self.codec, self.writer_schema, reader_schema)
def __init__(self, fo, reader_schema=None): """Creates a new iterator Paramaters ---------- fo: file like Input stream reader_schema: dict, optional Reader schema Example ------- >>> with open('some-file.avro', 'rb') as fo: >>> avro = iter_avro(fo) >>> schema = avro.schema >>> for record in avro: >>> process_record(record) """ self.fo = fo try: self._header = read_data(fo, HEADER_SCHEMA) except StopIteration: raise ValueError('cannot read header - is it an avro file?') # `meta` values are bytes. So, the actual decoding has to be external. self.metadata = \ dict((k, btou(v)) for k, v in iteritems(self._header['meta'])) self.schema = self.writer_schema = \ json.loads(self.metadata['avro.schema']) self.codec = self.metadata.get('avro.codec', 'null') self.reader_schema = reader_schema acquaint_schema(self.writer_schema, READERS) if reader_schema: populate_schema_defs(reader_schema) self._records = _iter_avro(fo, self._header, self.codec, self.writer_schema, reader_schema)
def __init__(self, fo, reader_schema=None): self.fo = fo try: self._header = read_data(fo, HEADER_SCHEMA) except StopIteration: raise ValueError('cannot read header - is it an avro file?') # `meta` values are bytes. So, the actual decoding has to be external. self.metadata = \ dict([(k, btou(v)) for k, v in iteritems(self._header['meta'])]) self.schema = self.writer_schema = \ json.loads(self.metadata['avro.schema']) self.codec = self.metadata.get('avro.codec', 'null') self.reader_schema = reader_schema acquaint_schema(self.writer_schema, READERS) if reader_schema: populate_schema_defs(reader_schema, SCHEMA_DEFS) self._records = _iter_avro(fo, self._header, self.codec, self.writer_schema, reader_schema)
def read_utf8(fo, writer_schema=None, reader_schema=None): """A string is encoded as a long followed by that many bytes of UTF-8 encoded character data. """ return btou(read_bytes(fo), 'utf-8')