Пример #1
0
def write_avro(schema, records, runs=1):
    times = []
    for _ in range(runs):
        iostream = BytesIO()
        start = time.time()
        writer = DataFileWriter(iostream, DatumWriter(),
                                avro.schema.SchemaFromJSONData(schema))
        for record in records:
            writer.append(record)
        writer.flush()
        end = time.time()
        times.append(end - start)
    print(f'... {runs} runs averaged {sum(times) / runs} seconds')
    return iostream
Пример #2
0
 def _write_data(self,
                 directory=None,
                 prefix=tempfile.template,
                 codec='null',
                 count=len(RECORDS)):
   with tempfile.NamedTemporaryFile(delete=False,
                                    dir=directory,
                                    prefix=prefix) as f:
     writer = DataFileWriter(f, DatumWriter(), self.SCHEMA, codec=codec)
     len_records = len(self.RECORDS)
     for i in range(count):
       writer.append(self.RECORDS[i % len_records])
     writer.close()
     self._temp_files.append(f.name)
     return f.name
Пример #3
0
 def _write_data(self,
                 directory=None,
                 prefix=tempfile.template,
                 codec='null',
                 count=len(RECORDS),
                 sync_interval=avro.datafile.SYNC_INTERVAL):
   old_sync_interval = avro.datafile.SYNC_INTERVAL
   try:
     avro.datafile.SYNC_INTERVAL = sync_interval
     with tempfile.NamedTemporaryFile(delete=False,
                                      dir=directory,
                                      prefix=prefix) as f:
       writer = DataFileWriter(f, DatumWriter(), self.SCHEMA, codec=codec)
       len_records = len(self.RECORDS)
       for i in range(count):
         writer.append(self.RECORDS[i % len_records])
       writer.close()
       self._temp_files.append(f.name)
       return f.name
   finally:
     avro.datafile.SYNC_INTERVAL = old_sync_interval