class Writer(object): def __init__(self, fo, schema, codec='null', sync_interval=1000 * SYNC_SIZE, metadata=None, validator=None): self.fo = fo self.schema = schema self.validate_fn = validate if validator is True else validator self.sync_marker = urandom(SYNC_SIZE) self.io = MemoryIO() self.block_count = 0 self.metadata = metadata or {} self.metadata['avro.codec'] = codec self.metadata['avro.schema'] = json.dumps(schema) self.sync_interval = sync_interval try: self.block_writer = BLOCK_WRITERS[codec] except KeyError: raise ValueError('unrecognized codec: %r' % codec) write_header(self.fo, self.metadata, self.sync_marker) acquaint_schema(self.schema) def dump(self): write_long(self.fo, self.block_count) self.block_writer(self.fo, self.io.getvalue()) self.fo.write(self.sync_marker) self.io.truncate(0) self.io.seek(0, SEEK_SET) self.block_count = 0 def write(self, record): if self.validate_fn: self.validate_fn(record, self.schema) write_data(self.io, record, self.schema) self.block_count += 1 if self.io.tell() >= self.sync_interval: self.dump() def flush(self): if self.io.tell() or self.block_count > 0: self.dump() self.fo.flush()