def write_avro(schema, records, runs=1): times = [] for _ in range(runs): iostream = BytesIO() start = time.time() writer = DataFileWriter(iostream, DatumWriter(), avro.schema.SchemaFromJSONData(schema)) for record in records: writer.append(record) writer.flush() end = time.time() times.append(end - start) print(f'... {runs} runs averaged {sum(times) / runs} seconds') return iostream
def _write_data(self, directory=None, prefix=tempfile.template, codec='null', count=len(RECORDS)): with tempfile.NamedTemporaryFile(delete=False, dir=directory, prefix=prefix) as f: writer = DataFileWriter(f, DatumWriter(), self.SCHEMA, codec=codec) len_records = len(self.RECORDS) for i in range(count): writer.append(self.RECORDS[i % len_records]) writer.close() self._temp_files.append(f.name) return f.name
def _write_data(self, directory=None, prefix=tempfile.template, codec='null', count=len(RECORDS), sync_interval=avro.datafile.SYNC_INTERVAL): old_sync_interval = avro.datafile.SYNC_INTERVAL try: avro.datafile.SYNC_INTERVAL = sync_interval with tempfile.NamedTemporaryFile(delete=False, dir=directory, prefix=prefix) as f: writer = DataFileWriter(f, DatumWriter(), self.SCHEMA, codec=codec) len_records = len(self.RECORDS) for i in range(count): writer.append(self.RECORDS[i % len_records]) writer.close() self._temp_files.append(f.name) return f.name finally: avro.datafile.SYNC_INTERVAL = old_sync_interval