示例#1
0
    def __init__(self, fo, reader_schema=None):
        """Creates a new iterator

        Paramaters
        ----------
        fo: file like
            Input stream
        reader_schema: dict, optional
            Reader schema

        Example
        -------
        >>> with open('some-file.avro', 'rb') as fo:
        >>>     avro = iter_avro(fo)
        >>>     schema = avro.schema
        >>>     for record in avro:
        >>>         process_record(record)
        """
        self.fo = fo
        try:
            self._header = read_data(fo, HEADER_SCHEMA)
        except StopIteration:
            raise ValueError('cannot read header - is it an avro file?')

        # `meta` values are bytes. So, the actual decoding has to be external.
        self.metadata = \
            dict((k, btou(v)) for k, v in iteritems(self._header['meta']))

        self.schema = self.writer_schema = \
            json.loads(self.metadata['avro.schema'])
        self.codec = self.metadata.get('avro.codec', 'null')
        self.reader_schema = reader_schema

        acquaint_schema(self.writer_schema, READERS)
        if reader_schema:
            populate_schema_defs(reader_schema, SCHEMA_DEFS)
        self._records = _iter_avro(fo,
                                   self._header,
                                   self.codec,
                                   self.writer_schema,
                                   reader_schema)
示例#2
0
    def __init__(self, fo, reader_schema=None):
        """Creates a new iterator

        Paramaters
        ----------
        fo: file like
            Input stream
        reader_schema: dict, optional
            Reader schema

        Example
        -------
        >>> with open('some-file.avro', 'rb') as fo:
        >>>     avro = iter_avro(fo)
        >>>     schema = avro.schema
        >>>     for record in avro:
        >>>         process_record(record)
        """
        self.fo = fo
        try:
            self._header = read_data(fo, HEADER_SCHEMA)
        except StopIteration:
            raise ValueError('cannot read header - is it an avro file?')

        # `meta` values are bytes. So, the actual decoding has to be external.
        self.metadata = \
            dict((k, btou(v)) for k, v in iteritems(self._header['meta']))

        self.schema = self.writer_schema = \
            json.loads(self.metadata['avro.schema'])
        self.codec = self.metadata.get('avro.codec', 'null')
        self.reader_schema = reader_schema

        acquaint_schema(self.writer_schema, READERS)
        if reader_schema:
            populate_schema_defs(reader_schema)
        self._records = _iter_avro(fo,
                                   self._header,
                                   self.codec,
                                   self.writer_schema,
                                   reader_schema)
示例#3
0
    def __init__(self, fo, reader_schema=None):
        self.fo = fo
        try:
            self._header = read_data(fo, HEADER_SCHEMA)
        except StopIteration:
            raise ValueError('cannot read header - is it an avro file?')

        # `meta` values are bytes. So, the actual decoding has to be external.
        self.metadata = \
            dict([(k, btou(v)) for k, v in iteritems(self._header['meta'])])

        self.schema = self.writer_schema = \
            json.loads(self.metadata['avro.schema'])
        self.codec = self.metadata.get('avro.codec', 'null')
        self.reader_schema = reader_schema

        acquaint_schema(self.writer_schema, READERS)
        if reader_schema:
            populate_schema_defs(reader_schema, SCHEMA_DEFS)
        self._records = _iter_avro(fo,
                                   self._header,
                                   self.codec,
                                   self.writer_schema,
                                   reader_schema)
示例#4
0
def read_utf8(fo, writer_schema=None, reader_schema=None):
    """A string is encoded as a long followed by that many bytes of UTF-8
    encoded character data.
    """
    return btou(read_bytes(fo), 'utf-8')
示例#5
0
def read_utf8(fo, writer_schema=None, reader_schema=None):
    """A string is encoded as a long followed by that many bytes of UTF-8
    encoded character data.
    """
    return btou(read_bytes(fo), 'utf-8')