示例#1
0
 def as_local(self) -> LocalDataFrame:
     # TODO: does it make sense to also include the metadata?
     if any(pa.types.is_nested(t) for t in self.schema.types):
         data = list(to_type_safe_input(self.native.collect(), self.schema))
         return ArrayDataFrame(data, self.schema, self.metadata)
     return PandasDataFrame(self.native.toPandas(), self.schema,
                            self.metadata)
示例#2
0
 def as_array_iterable(
     self, columns: Optional[List[str]] = None, type_safe: bool = False
 ) -> Iterable[Any]:
     sdf = self._withColumns(columns)
     if not type_safe:
         for row in to_type_safe_input(sdf.native.rdd.toLocalIterator(), sdf.schema):
             yield row
     else:
         df = IterableDataFrame(sdf.as_array_iterable(type_safe=False), sdf.schema)
         for row in df.as_array_iterable(type_safe=True):
             yield row
示例#3
0
 def run(self, no: int, rows: Iterable[ps.Row]) -> Iterable[Any]:
     df = IterableDataFrame(to_type_safe_input(rows, self.schema),
                            self.schema, self.metadata)
     if df.empty:  # pragma: no cover
         return
     cursor = self.partition_spec.get_cursor(self.schema, no)
     if self.on_init is not None:
         self.on_init(no, df)
     if self.partition_spec.empty:
         partitions: Iterable[Tuple[int, int, EmptyAwareIterable]] = [
             (0, 0, df.native)
         ]
     else:
         partitioner = self.partition_spec.get_partitioner(self.schema)
         partitions = partitioner.partition(df.native)
     for pn, sn, sub in partitions:
         cursor.set(sub.peek(), pn, sn)
         sub_df = IterableDataFrame(sub, self.schema)
         sub_df._metadata = self.metadata
         res = self.map_func(cursor, sub_df)
         for r in res.as_array_iterable(type_safe=True):
             yield r