示例#1
0
 def _get_column(self,
                 column: Column,
                 batch: RecordBatch) -> Union[np.ndarray, pa.Array]:
     if self._is_numpy_function:
         return batch.get_np_column(column)
     else:
         return batch.get_pa_column(column)
示例#2
0
    def _kernel(self, batch: RecordBatch, arguments: Tuple) -> RecordBatch:
        arrays = self._repeat_scalars(arguments)
        self._ensure_equal_arrays_size(arrays)

        col_names = self._get_column_names()

        if self._keep_input_table:
            return RecordBatch.from_arrays(
                tuple(chain(batch.columns, arrays)),
                tuple(chain(batch.column_names, col_names))
            )
        else:
            return RecordBatch.from_arrays(arrays, col_names)
示例#3
0
 def next(self) -> RecordBatch:
     while True:
         try:
             batch = self._reader.read_next_batch()
         except StopIteration:
             break
         yield RecordBatch(batch)
示例#4
0
    def next(self) -> Iterable[RecordBatch]:
        for batch in self._parent_operator.next():
            self._process_arguments(self._arguments, batch=batch)
            col_names = tuple(i[0] for i in self._expressions)
            exprs = tuple(i[1] for i in self._expressions)
            batch = RecordBatch.from_arrays(
                tuple(chain(batch.columns, exprs)),
                tuple(chain(batch.column_names, col_names))
            )

            # Remove, once sorting by boolean columns is supported by Arrow
            self._verify_bool_columns(batch.get_schema())

            self._sort_op.next(batch.get_batch())
            self._expressions.clear()

        yield RecordBatch(self._sort_op.sorted())

        del self._sort_op
示例#5
0
    def next(self) -> Iterable[RecordBatch]:
        for batch in self._parent_operator.next():
            if not self.agg_obj:
                self._init_agg_obj(batch)

            self.agg_obj.next(batch.get_batch())

        if self.agg_obj:
            yield RecordBatch(self.agg_obj.result())

        del self.agg_obj
示例#6
0
 def _eval_expression(self,
                      expression: 'VectorizedExpression',
                      batch: RecordBatch) -> Any:
     if (
             expression.is_shared()
             and batch.has_column(expression.get_shared_id())
     ):
         # TODO is this ever invoked or already done in the planner?
         return self._get_column(
             Column(expression.get_shared_id()),
             batch
         )
     else:
         return super()._eval_expression(expression, batch)
示例#7
0
 def _get_column(self, column: Column, batch: RecordBatch) -> pa.Array:
     return batch.get_pa_column(column)
示例#8
0
 def next(self) -> RecordBatch:
     yield RecordBatch.empty_batch()
示例#9
0
 def next(self) -> RecordBatch:
     while True:
         batch = self._reader.next()
         if batch is None:
             break
         yield RecordBatch(batch)
示例#10
0
 def _kernel(self,
             batch: RecordBatch,
             arguments: Tuple[AnyArrayLike]) -> RecordBatch:
     assert len(arguments) == 1
     return batch.filter(arguments[0])