def write_table(self, pa_table: pa.Table, writer_batch_size: Optional[int] = None): """ Write a batch of Example to file. Args: example: the Example to add. """ if writer_batch_size is None: writer_batch_size = self.writer_batch_size if self.pa_writer is None: self._build_writer(inferred_schema=pa_table.schema) batches: List[pa.RecordBatch] = pa_table.to_batches( max_chunksize=writer_batch_size) self._num_bytes += sum(batch.nbytes for batch in batches) self._num_examples += pa_table.num_rows for batch in batches: self.pa_writer.write_batch(batch)
def write_table(self, pa_table: pa.Table, writer_batch_size: Optional[int] = None): """Write a Table to file. Args: example: the Table to add. """ if writer_batch_size is None: writer_batch_size = self.writer_batch_size if self.pa_writer is None: self._build_writer(inferred_schema=pa_table.schema) # reorder the arrays if necessary + cast to self._schema # we can't simply use .cast here because we may need to change the order of the columns pa_table = pa.Table.from_arrays([pa_table[name] for name in self._schema.names], schema=self._schema) batches: List[pa.RecordBatch] = pa_table.to_batches(max_chunksize=writer_batch_size) self._num_bytes += sum(batch.nbytes for batch in batches) self._num_examples += pa_table.num_rows for batch in batches: self.pa_writer.write_batch(batch)
def _TableToRecordBatch( self, table: pa.Table, batch_size: Optional[int] = None) -> List[pa.RecordBatch]: return table.to_batches(max_chunksize=batch_size)
def __init__(self, table: pa.Table): self._schema = table.schema self._batches = table.to_batches() self._offsets = np.cumsum([0] + [len(b) for b in self._batches])
def __init__(self, table: pa.Table): self._schema = table.schema self._batches = table.to_batches() self._offsets: np.ndarray = np.cumsum([0] + [len(b) for b in self._batches], dtype=np.int64)
def write_table(self, table: pa.Table): for batch in table.to_batches(): self.write(batch)