示例#1
0
 def select(self, dfs: DataFrames, statement: str) -> DataFrame:
     _dfs = {
         k: self.execution_engine.to_df(v).as_pandas()  # type: ignore
         for k, v in dfs.items()
     }
     df = run_sql_on_pandas(statement, _dfs)
     return self.execution_engine.to_df(df)
示例#2
0
 def select(self, dfs: DataFrames, statement: str) -> DataFrame:
     dask_dfs = {
         k: self.execution_engine.to_df(v).native  # type: ignore
         for k, v in dfs.items()
     }
     df = run_sql_on_dask(statement, dask_dfs)
     return DaskDataFrame(df)
示例#3
0
 def select(self, dfs: DataFrames, statement: str) -> DataFrame:
     sql_engine = create_engine("sqlite:///:memory:")
     for k, v in dfs.items():
         v.as_pandas().to_sql(k,
                              sql_engine,
                              if_exists="replace",
                              index=False)
     df = pd.read_sql_query(statement, sql_engine)
     return PandasDataFrame(df)
示例#4
0
 def select(self, dfs: DataFrames, statement: str) -> DataFrame:
     _dfs = {
         k: self.execution_engine.to_df(v).as_pandas()  # type: ignore
         for k, v in dfs.items()
     }
     df = run_sql_on_pandas(
         statement,
         _dfs,
         ignore_case=self.execution_engine.compile_conf.get(
             FUGUE_CONF_SQL_IGNORE_CASE, False),
     )
     return self.execution_engine.to_df(df)
示例#5
0
    def zip_all(
        self,
        dfs: DataFrames,
        how: str = "inner",
        partition_spec: PartitionSpec = EMPTY_PARTITION_SPEC,
        temp_path: Optional[str] = None,
        to_file_threshold: Any = -1,
    ) -> DataFrame:
        """Zip multiple dataframes together with given partition
        specifications.

        :param dfs: |DataFramesLikeObject|
        :param how: can accept ``inner``, ``left_outer``, ``right_outer``,
          ``full_outer``, ``cross``, defaults to ``inner``
        :param partition_spec: |PartitionLikeObject|, defaults to empty.
        :param temp_path: file path to store the data (used only if the serialized data
          is larger than ``to_file_threshold``), defaults to None
        :param to_file_threshold: file byte size threshold, defaults to -1

        :return: a zipped dataframe, the metadata of the
          dataframe will indicated it's zipped

        :Notice:

        * Please also read :meth:`~.zip`
        * If ``dfs`` is dict like, the zipped dataframe will be dict like,
          If ``dfs`` is list like, the zipped dataframe will be list like
        * It's fine to contain only one dataframe in ``dfs``

        For more details and examples, read
        :ref:`Zip & Comap <tutorial:/tutorials/execution_engine.ipynb#zip-&-comap>`.
        """
        assert_or_throw(len(dfs) > 0, "can't zip 0 dataframes")
        pairs = list(dfs.items())
        has_name = dfs.has_key
        if len(dfs) == 1:
            return self._serialize_by_partition(
                pairs[0][1],
                partition_spec,
                pairs[0][0],
                temp_path,
                to_file_threshold,
                has_name=has_name,
            )
        df = self.zip(
            pairs[0][1],
            pairs[1][1],
            how=how,
            partition_spec=partition_spec,
            temp_path=temp_path,
            to_file_threshold=to_file_threshold,
            df1_name=pairs[0][0] if has_name else None,
            df2_name=pairs[1][0] if has_name else None,
        )
        for i in range(2, len(dfs)):
            df = self.zip(
                df,
                pairs[i][1],
                how=how,
                partition_spec=partition_spec,
                temp_path=temp_path,
                to_file_threshold=to_file_threshold,
                df2_name=pairs[i][0] if has_name else None,
            )
        return df
示例#6
0
 def select(self, dfs: DataFrames, statement: str) -> DataFrame:
     for k, v in dfs.items():
         self.execution_engine.register(v, k)  # type: ignore
     return SparkDataFrame(
         self.execution_engine.spark_session.sql(statement)  # type: ignore
     )