Exemplo n.º 1
0
def test_select_with():
    dag = FugueWorkflow()
    dag.select("with x as ( select * from a ) , y as ( select * from b ) "
               "select * from x union select * from y")
    assert_eq(
        """
    with
        x as (select * from a),
        y as (select * from b)
    select *   from x union select * from y

    """,
        dag,
    )
Exemplo n.º 2
0
def test_general_set_op():
    dag = FugueWorkflow()
    a = dag.create(mock_create1, params=dict(n=1))
    b = dag.create(mock_create1, params=dict(n=2))
    dag.select("select * from", a, "AS a union all select * from", b, "AS b")
    dag.select(
        "SELECT * FROM", dag.create(mock_create1), "union select * from", b, "AS b"
    )
    dag.select(
        "SELECT * FROM",
        dag.create(mock_create1),
        "intersect distinct SELECT * FROM",
        a.process(mock_processor1),
    )
    dag.select(
        "select * from",
        dag.create(mock_create1),
        "union SELECT * FROM",
        a.process(mock_processor1),
    )
    c = dag.create(mock_create1, params=dict(n=2))
    dag.select(
        "SELECT * FROM",
        c.transform(mock_transformer2),
        "union SELECT * FROM",
        c.process(mock_processor1),
    )
    assert_eq(
        """
    a=create using mock_create1(n=1)
    b=create using mock_create1(n=2)

    select * from a union all select * from b
    create using mock_create1 union select * from b
    create using mock_create1 intersect distinct process a using mock_processor1
    select * from (create using mock_create1) union process a using mock_processor1

    # operation on omitted dependencies should work as expected
    c=create using mock_create1(n=2)
    transform using mock_transformer2 union process using mock_processor1
    """,
        dag,
    )
Exemplo n.º 3
0
def test_select_nested():
    dag = FugueWorkflow()
    a = dag.create(mock_create1, params=dict(n=1))
    b = dag.create(mock_create1, params=dict(n=2))
    dag.select("select * from (select * from a.b)")
    dag.select("select * from", dag.create(mock_create1), "AS bb")
    dag.select("select * from", dag.create(mock_create1), "TABLESAMPLE (5 PERCENT)")
    dag.select("select * from (select * from", dag.create(mock_create1), ")")
    assert_eq(
        """
    a=create using mock_create1(n=1)
    b=create using mock_create1(n=2)

    # nested query
    select * from (select * from a.b)
    select * from (create using mock_create1) AS bb
    select * from (create using mock_create1) TABLESAMPLE(5 PERCENT)
    select * from (select * from (create using mock_create1))
    """,
        dag,
    )
Exemplo n.º 4
0
def test_select_plus_engine():
    class MockEngine(SqliteEngine):
        def __init__(self, execution_engine, p: int = 0):
            super().__init__(execution_engine)
            self.p = p

    register_sql_engine("_mock", lambda e, **kwargs: MockEngine(e, **kwargs))

    dag = FugueWorkflow()
    dag.select("select * from xyz", sql_engine=MockEngine).persist()
    dag.select("select * from xyz",
               sql_engine="_mock",
               sql_engine_params={"p": 2})
    dag.select("select * from xyz order by t limit 10", sql_engine=MockEngine)
    dag.select(
        "with a as ( select * from b ) select * from a order by t limit 10",
        sql_engine=MockEngine,
    )

    # temp = dag.select("select a , b from a", sql_engine=MockEngine)
    # temp.transform(mock_transformer2)

    # temp = dag.select("select aa , bb from a", sql_engine=MockEngine)
    # dag.select("select aa + bb as t from", temp)
    assert_eq(
        """
    connect MockEngine select * from xyz persist
    connect _mock(p=2) select * from xyz
    connect MockEngine select * from xyz order by t limit 10

    connect MockEngine with a as (select * from b) select * from a order by t limit 10

    # This is not supported
    # transform (connect MockEngine select a,b from a) using mock_transformer2

    # This is not supported
    # select aa+bb as t from (connect MockEngine select aa,bb from a)
    """,
        dag,
    )
Exemplo n.º 5
0
def test_select():
    dag = FugueWorkflow()
    a = dag.create(mock_create1, params=dict(n=1))
    b = dag.create(mock_create1, params=dict(n=2))
    dag.select("select * from a.b")
    dag.select("select * from a.b TABLESAMPLE (5 PERCENT) AS x")
    dag.select("select * from a.b AS x")
    dag.select("select * from", a, "AS a")  # fugue sql adds 'AS a'
    dag.select("select * from", a, "TABLESAMPLE (5 PERCENT) AS a")
    x = dag.select("select * from", a, "TABLESAMPLE (5 PERCENT) AS x")
    y = dag.select("select * FROM", x)
    z = dag.select("select * FROM", y, "where t = 100")
    dag.select("select a.* from", a, "AS a join", b, "AS b on a.a == b.a")

    dag.select("select * from", a, "AS a").persist().broadcast().show()
    dag.select("select * from", a, "AS a").weak_checkpoint(
        level="a.b.c"
    ).broadcast().show()
    assert_eq(
        """
    a=create using mock_create1(n=1)
    b=create using mock_create1(n=2)

    # assignment and table not found
    x=select * from a.b

    # sample and alias when table not found
    select * from a.b TABLESAMPLE (5 PERCENT) AS x
    select * from a.b AS x

    # when table is found
    select * from a
    select * from a TABLESAMPLE(5 PERCENT)
    select * from a TABLESAMPLE(5 PERCENT) AS x

    # no from
    select *
    select * where t=100

    # multiple dependencies
    select a.* from a join b on a.a==b.a

    # persist & checkpoint & broadcast
    select * from a persist broadcast print
    select * from a persist (level="a.b.c") broadcast print
    """,
        dag,
    )