def __init__(self, schema: Schema, spec: PartitionSpec, physical_partition_no: int): self._orig_schema = schema self._key_index = [schema.index_of_key(key) for key in spec.partition_by] self._schema = schema.extract(spec.partition_by) self._physical_partition_no = physical_partition_no # the following will be set by the framework self._row: List[Any] = [] self._partition_no = 0 self._slice_no = 0
def get_partitioner(self, schema: Schema) -> SchemaedDataPartitioner: """Get :class:`~triad.utils.pyarrow.SchemaedDataPartitioner` by input dataframe schema :param schema: the dataframe schema this partition spec to operate on :return: SchemaedDataPartitioner object """ pos = [schema.index_of_key(key) for key in self.partition_by] return SchemaedDataPartitioner( schema.pa_schema, pos, sizer=None, row_limit=self._row_limit, size_limit=self._size_limit, )