Пример #1
0
def test_global_funcs():
    assert isinstance(make_execution_engine(), NativeExecutionEngine)
    register_execution_engine(
        "xyz", lambda conf, **kwargs: _MockExecutionEngine(conf, **kwargs))
    assert isinstance(make_execution_engine("xyz"), _MockExecutionEngine)
    register_default_execution_engine(
        lambda conf, **kwargs: _MockExecutionEngine(conf, **kwargs),
        on_dup="ignore")
    assert not isinstance(make_execution_engine(), _MockExecutionEngine)
    register_default_execution_engine(
        lambda conf, **kwargs: _MockExecutionEngine(conf, **kwargs),
        on_dup="overwrite")
    assert isinstance(make_execution_engine(), _MockExecutionEngine)

    se = SqliteEngine(make_execution_engine)
    assert make_sql_engine(se) is se
    assert not isinstance(make_sql_engine(None, make_execution_engine()),
                          _MockSQlEngine)
    register_sql_engine("x", lambda engine: _MockSQlEngine(engine))
    assert isinstance(make_sql_engine("x", make_execution_engine()),
                      _MockSQlEngine)
    register_default_sql_engine(
        lambda engine: _MockSQlEngine(engine, other=10))
    e = make_execution_engine()
    assert isinstance(e, _MockExecutionEngine)
    assert isinstance(e.sql_engine, _MockSQlEngine)
    assert 10 == e.sql_engine.other
Пример #2
0
def _register_engines() -> None:
    register_execution_engine(
        "spark",
        lambda conf, **kwargs: SparkExecutionEngine(conf=conf),
        on_dup="ignore",
    )
    register_execution_engine(
        SparkSession,
        lambda session, conf, **kwargs: SparkExecutionEngine(session, conf=conf),
        on_dup="ignore",
    )
Пример #3
0
def test_sql():
    register_execution_engine(
        "da", lambda conf, **kwargs: DaskExecutionEngine(conf=conf))
    df = dd.from_pandas(pd.DataFrame([[0], [1]], columns=["a"]), npartitions=2)
    dag = FugueSQLWorkflow()
    dag(
        """
    SELECT * FROM df WHERE a>0
    PRINT
    """,
        df=df,
    )
    dag.run("da")
Пример #4
0
def test_sql():
    session = SparkSession.builder.getOrCreate()
    register_execution_engine(
        "s",
        lambda conf, **kwargs: SparkExecutionEngine(conf=conf,
                                                    spark_session=session),
    )
    df = session.createDataFrame(pd.DataFrame([[0], [1]], columns=["a"]))
    dag = FugueSQLWorkflow()
    dag(
        """
    SELECT * FROM df WHERE a>0
    PRINT
    """,
        df=df,
    )
    dag.run("s")
Пример #5
0
    def register_execution_engines(self):
        """Register execution engines with names. This will also try to register
        spark and dask engines if the dependent packages are available and they
        are not registered"""
        register_execution_engine(
            "native",
            lambda conf, **kwargs: NativeExecutionEngine(conf=conf),
            on_dup="ignore",
        )

        try:
            import pyspark  # noqa: F401
            import fugue_spark  # noqa: F401
        except ImportError:
            pass

        try:
            import dask.dataframe  # noqa: F401
            import fugue_dask  # noqa: F401
        except ImportError:
            pass
Пример #6
0
 def register_execution_engines(self):
     super().register_execution_engines()
     register_execution_engine(
         "native",
         lambda conf, **kwargs: KaggleNativeExecutionEngine(conf=conf, **kwargs),
     )
     if self._default_engine in ["native", ""]:
         register_default_execution_engine(
             lambda conf, **kwargs: KaggleNativeExecutionEngine(conf=conf, **kwargs)
         )
     register_execution_engine(
         "dask",
         lambda conf, **kwargs: KaggleDaskExecutionEngine(conf=conf),
     )
     if self._default_engine == "dask":
         register_default_execution_engine(
             lambda conf, **kwargs: KaggleDaskExecutionEngine(conf=conf),
         )
     register_execution_engine(
         "spark",
         lambda conf, **kwargs: KaggleSparkExecutionEngine(conf=conf),
     )
     if self._default_engine == "spark":
         register_default_execution_engine(
             lambda conf, **kwargs: KaggleSparkExecutionEngine(conf=conf),
         )
Пример #7
0
    from fugue import WorkflowDataFrame, register_execution_engine
    from fugue_sql import FugueSQLWorkflow
    from triad.utils.convert import get_caller_global_local_vars
except ImportError:  # pragma: no cover
    raise ImportError(
        "Can not load the fugue module. If you want to use this integration, you need to install it."
    )

from typing import Any, Dict, Optional

import dask.dataframe as dd

from dask_sql.context import Context

register_execution_engine("dask",
                          lambda conf: DaskSQLExecutionEngine(conf),
                          on_dup="overwrite")


class DaskSQLEngine(fugue.execution.execution_engine.SQLEngine):
    """
    SQL engine for fugue which uses dask-sql instead of the native
    SQL implementation.

    Please note, that so far the native SQL engine in fugue
    understands a larger set of SQL commands, but in turns is
    (on average) slower in computation and scaling.
    """
    def __init__(self, *args, **kwargs):
        """Create a new instance."""
        super().__init__(*args, **kwargs)
Пример #8
0
def register() -> None:
    """Register engines for DuckDB"""
    register_sql_engine("duck", lambda engine: DuckDBEngine(engine))
    register_sql_engine("duckdb", lambda engine: DuckDBEngine(engine))
    register_execution_engine("duck", lambda conf: DuckExeuctionEngine(conf))
    register_execution_engine("duckdb", lambda conf: DuckExeuctionEngine(conf))
Пример #9
0
def _register_engines() -> None:
    register_execution_engine(
        "dask",
        lambda conf, **kwargs: DaskExecutionEngine(conf=conf),
        on_dup="ignore",
    )