Exemplo n.º 1
0
 def transform(self, dfs: DataFrames) -> LocalDataFrame:
     cb = _get_callback(self)
     if self._dfs_input:  # function has DataFrames input
         self._wrapper.run(  # type: ignore
             [dfs] + cb,
             self.params,
             ignore_unknown=False,
             output=False,
         )
     elif not dfs.has_key:  # input does not have key
         self._wrapper.run(  # type: ignore
             list(dfs.values()) + cb,
             self.params,
             ignore_unknown=False,
             output=False,
         )
     else:  # input DataFrames has key
         p = dict(dfs)
         p.update(self.params)
         self._wrapper.run(
             [] + cb,
             p,
             ignore_unknown=False,
             output=False  # type: ignore
         )
     return ArrayDataFrame([], OUTPUT_TRANSFORMER_DUMMY_SCHEMA)
Exemplo n.º 2
0
 def process(self, dfs: DataFrames) -> None:
     # TODO: how do we make sure multiple dfs are printed together?
     title = self.params.get_or_none("title", object)
     title = str(title) if title is not None else None
     rows = self.params.get("rows", 10)
     show_count = self.params.get("show_count", False)
     df_arr = list(dfs.values())
     heads = [df.head(rows) for df in df_arr]
     counts = [df.count() if show_count else -1 for df in df_arr]
     with Show.LOCK:
         if Show._hook is None:
             if title is not None:
                 print(title)
             for df, head, count in zip(df_arr, heads, counts):
                 df._show(head_rows=head,
                          rows=rows,
                          count=count,
                          title=None)
         else:
             for df, head, count in zip(df_arr, heads, counts):
                 Show._hook(  # pylint: disable=E1102
                     schema=df.schema,
                     head_rows=head,
                     title=title,
                     rows=rows,
                     count=count,
                 )
Exemplo n.º 3
0
 def process(self, dfs: DataFrames) -> None:
     args: List[Any] = []
     kwargs: Dict[str, Any] = {}
     if self._need_engine:
         args.append(self.execution_engine)
     if self._use_dfs:
         args.append(dfs)
     else:
         if not dfs.has_key:
             args += dfs.values()
         else:
             kwargs.update(dfs)
     kwargs.update(self.params)
     return self._wrapper.run(args=args, kwargs=kwargs)
Exemplo n.º 4
0
 def process(self, dfs: DataFrames) -> DataFrame:
     args: List[Any] = []
     kwargs: Dict[str, Any] = {}
     if self._engine_param is not None:
         args.append(self._engine_param.to_input(self.execution_engine))
     if self._use_dfs:
         args.append(dfs)
     else:
         if not dfs.has_key:
             args += dfs.values()
         else:
             kwargs.update(dfs)
     kwargs.update(self.params)
     return self._wrapper.run(
         args=args,
         kwargs=kwargs,
         output_schema=self.output_schema
         if self._need_output_schema else None,
         ctx=self.execution_engine,
     )
Exemplo n.º 5
0
 def transform(self, dfs: DataFrames) -> LocalDataFrame:
     if self._dfs_input:  # function has DataFrames input
         return self._wrapper.run(  # type: ignore
             [dfs],
             self.params,
             ignore_unknown=False,
             output_schema=self.output_schema,
         )
     if not dfs.has_key:  # input does not have key
         return self._wrapper.run(  # type: ignore
             list(dfs.values()),
             self.params,
             ignore_unknown=False,
             output_schema=self.output_schema,
         )
     else:  # input DataFrames has key
         p = dict(dfs)
         p.update(self.params)
         return self._wrapper.run(  # type: ignore
             [],
             p,
             ignore_unknown=False,
             output_schema=self.output_schema)
Exemplo n.º 6
0
def mock_processor2(e: ExecutionEngine, dfs: DataFrames) -> DataFrame:
    assert "test" in e.conf
    return ArrayDataFrame([[sum(s.count() for s in dfs.values())]], "a:int")
Exemplo n.º 7
0
def t5(e: ExecutionEngine, dfs: DataFrames, a) -> List[List[Any]]:
    assert e is not None
    value = sum(x.count() for x in dfs.values()) + a
    return ArrayDataFrame([[value]], "a:int").as_array()
Exemplo n.º 8
0
def t5(e: ExecutionEngine, dfs: DataFrames, a, b) -> None:
    assert e is not None
    b.value = sum(x.count() for x in dfs.values()) + a