Exemplo n.º 1
0
def test_window_bind_to_table(t):
    w = ibis.window(group_by="g", order_by=ibis.desc("f"))

    w2 = w.bind(t)
    expected = ibis.window(group_by=t.g, order_by=ibis.desc(t.f))

    assert_equal(w2, expected)
Exemplo n.º 2
0
    def test_window_bind_to_table(self):
        w = ibis.window(group_by='g', order_by=ibis.desc('f'))

        w2 = w.bind(self.t)
        expected = ibis.window(group_by=self.t.g, order_by=ibis.desc(self.t.f))

        assert_equal(w2, expected)
Exemplo n.º 3
0
def test_window_bind_to_table(alltypes):
    t = alltypes
    w = ibis.window(group_by='g', order_by=ibis.desc('f'))

    w2 = w.bind(alltypes)
    expected = ibis.window(group_by=t.g, order_by=ibis.desc(t.f))

    assert_equal(w2, expected)
Exemplo n.º 4
0
    def test_window_bind_to_table(self):
        w = ibis.window(group_by='g', order_by=ibis.desc('f'))

        w2 = w.bind(self.t)
        expected = ibis.window(group_by=self.t.g,
                               order_by=ibis.desc(self.t.f))

        assert_equal(w2, expected)
Exemplo n.º 5
0
def test_window_bind_to_table(alltypes):
    t = alltypes
    w = ibis.window(group_by='g', order_by=ibis.desc('f'))

    w2 = w.bind(alltypes)
    expected = ibis.window(group_by=t.g, order_by=ibis.desc(t.f))

    assert_equal(w2, expected)
Exemplo n.º 6
0
def test_sort_by_desc_deferred_sort_key(table):
    result = (table.group_by('g').size().sort_by(ibis.desc('count')))

    tmp = table.group_by('g').size()
    expected = tmp.sort_by((tmp['count'], False))
    expected2 = tmp.sort_by(ibis.desc(tmp['count']))

    assert_equal(result, expected)
    assert_equal(result, expected2)
Exemplo n.º 7
0
def test_sort_by_desc_deferred_sort_key(table):
    result = table.group_by('g').size().sort_by(ibis.desc('count'))

    tmp = table.group_by('g').size()
    expected = tmp.sort_by((tmp['count'], False))
    expected2 = tmp.sort_by(ibis.desc(tmp['count']))

    assert_equal(result, expected)
    assert_equal(result, expected2)
Exemplo n.º 8
0
def test_sort_by2(table):
    m = table.mutate(foo=table.e + table.f)

    result = m.sort_by(lambda x: -x.foo)
    expected = m.sort_by(-m.foo)
    assert_equal(result, expected)

    result = m.sort_by(lambda x: ibis.desc(x.foo))
    expected = m.sort_by(ibis.desc('foo'))
    assert_equal(result, expected)

    result = m.sort_by(ibis.desc(lambda x: x.foo))
    expected = m.sort_by(ibis.desc('foo'))
    assert_equal(result, expected)
Exemplo n.º 9
0
    def test_sort_by(self):
        m = self.table.mutate(foo=self.table.e + self.table.f)

        result = m.sort_by(lambda x: -x.foo)
        expected = m.sort_by(-m.foo)
        assert_equal(result, expected)

        result = m.sort_by(lambda x: ibis.desc(x.foo))
        expected = m.sort_by(ibis.desc('foo'))
        assert_equal(result, expected)

        result = m.sort_by(ibis.desc(lambda x: x.foo))
        expected = m.sort_by(ibis.desc('foo'))
        assert_equal(result, expected)
Exemplo n.º 10
0
    def test_lower_projection_sort_key(self):
        expr = self._case_subquery_aliased()

        t3 = self._get_sqla('star1').alias('t3')
        t2 = self._get_sqla('star2').alias('t2')
        t4 = (
            sa.select([t3.c.foo_id, F.sum(t3.c.f).label('total')])
            .group_by(t3.c.foo_id)
            .alias('t4')
        )
        t1 = (
            sa.select([t4.c.foo_id, t4.c.total, t2.c.value1])
            .select_from(t4.join(t2, t4.c.foo_id == t2.c.foo_id))
            .alias('t1')
        )
        t0 = (
            sa.select([t1.c.foo_id, t1.c.total, t1.c.value1])
            .where(t1.c.total > L(100))
            .alias('t0')
        )
        expected = sa.select([t0.c.foo_id, t0.c.total, t0.c.value1]).order_by(
            t0.c.total.desc()
        )

        expr2 = expr[expr.total > 100].sort_by(ibis.desc('total'))
        self._compare_sqla(expr2, expected)
Exemplo n.º 11
0
def tpc_h05(con, NAME="ASIA", DATE="1994-01-01"):
    customer = con.table("customer")
    orders = con.table("orders")
    lineitem = con.table("lineitem")
    supplier = con.table("supplier")
    nation = con.table("nation")
    region = con.table("region")

    q = customer
    q = q.join(orders, customer.c_custkey == orders.o_custkey)
    q = q.join(lineitem, lineitem.l_orderkey == orders.o_orderkey)
    q = q.join(supplier, lineitem.l_suppkey == supplier.s_suppkey)
    q = q.join(
        nation,
        (customer.c_nationkey == supplier.s_nationkey)
        & (supplier.s_nationkey == nation.n_nationkey),
    )
    q = q.join(region, nation.n_regionkey == region.r_regionkey)

    q = q.filter([
        q.r_name == NAME, q.o_orderdate >= DATE,
        q.o_orderdate < add_date(DATE, dy=1)
    ])
    revexpr = q.l_extendedprice * (1 - q.l_discount)
    gq = q.group_by([q.n_name])
    q = gq.aggregate(revenue=revexpr.sum())
    q = q.sort_by([ibis.desc(q.revenue)])
    return q
Exemplo n.º 12
0
def log_scraper_last_complete(expr):
    last_completion = expr.filter(
        expr.event.isin(['sql_execute', 'render_vega']))\
        .sort_by(ibis.desc('logtime'))\
        .select(['logtime', 'event', 'query', 'sequence', 'logfile'])
    # print(last_completion.compile())
    return last_completion.execute(1)
Exemplo n.º 13
0
def test_compound_expression(diamonds: ir.TableExpr) -> None:
    expected = diamonds[diamonds.price * diamonds.price / 2.0 >= 100]
    expected = expected.groupby('cut').aggregate([
        expected.carat.max().name('max_carat'),
        expected.carat.mean().name('mean_carat'),
        expected.carat.min().name('min_carat'),
        expected.x.count().name('n'),
        expected.carat.std().name('std_carat'),
        expected.carat.sum().name('sum_carat'),
        expected.carat.var().name('var_carat'),
    ])
    expected = expected.mutate(foo=expected.mean_carat,
                               bar=expected.var_carat).sort_by(
                                   [ibis.desc('foo'), 'bar']).head()

    result = (diamonds >> sift(X.price * X.price / 2.0 >= 100) >> groupby(
        X.cut) >> summarize(
            max_carat=max(X.carat),
            mean_carat=mean(X.carat),
            min_carat=min(X.carat),
            n=n(X.x),
            std_carat=std(X.carat),
            sum_carat=sum(X.carat),
            var_carat=var(X.carat),
        ) >> mutate(foo=X.mean_carat, bar=X.var_carat) >> sort_by(
            desc(X.foo), X.bar) >> head(5))
    assert result.equals(expected)
    tm.assert_frame_equal(expected.execute(), result >> do())
Exemplo n.º 14
0
def test_bug_duplicated_where(airlines):
    # GH #539
    table = airlines

    t = table['arrdelay', 'dest']
    expr = t.group_by('dest').mutate(
        dest_avg=t.arrdelay.mean(), dev=t.arrdelay - t.arrdelay.mean()
    )

    tmp1 = expr[expr.dev.notnull()]
    tmp2 = tmp1.sort_by(ibis.desc('dev'))
    worst = tmp2.limit(10)

    result = Compiler.to_sql(worst)

    expected = """\
SELECT *
FROM (
  SELECT t1.*
  FROM (
    SELECT *, avg(`arrdelay`) OVER (PARTITION BY `dest`) AS `dest_avg`,
           `arrdelay` - avg(`arrdelay`) OVER (PARTITION BY `dest`) AS `dev`
    FROM (
      SELECT `arrdelay`, `dest`
      FROM airlines
    ) t3
  ) t1
  WHERE t1.`dev` IS NOT NULL
) t0
ORDER BY `dev` DESC
LIMIT 10"""
    assert result == expected
Exemplo n.º 15
0
    def test_lower_projection_sort_key(self):
        expr = self._case_subquery_aliased()

        s1 = self._get_sqla('star1').alias('t2')
        s2 = self._get_sqla('star2').alias('t1')

        expr2 = (expr
                 [expr.total > 100]
                 .sort_by(ibis.desc('total')))

        agged = (sa.select([s1.c.foo_id, F.sum(s1.c.f).label('total')])
                 .group_by(s1.c.foo_id)
                 .alias('t3'))

        joined = agged.join(s2, agged.c.foo_id == s2.c.foo_id)
        expected = sa.select([agged, s2.c.value1]).select_from(joined)

        joined = agged.join(s2, agged.c.foo_id == s2.c.foo_id)
        expected = sa.select([agged, s2.c.value1]).select_from(joined)

        ex = expected.alias('t0')

        expected2 = (sa.select([ex])
                     .where(ex.c.total > L(100))
                     .order_by(ex.c.total.desc()))

        self._compare_sqla(expr2, expected2)
Exemplo n.º 16
0
def tpc_h09(con, COLOR="green"):
    part = con.table("part")
    supplier = con.table("supplier")
    lineitem = con.table("lineitem")
    partsupp = con.table("partsupp")
    orders = con.table("orders")
    nation = con.table("nation")

    q = lineitem
    q = q.join(supplier, supplier.s_suppkey == lineitem.l_suppkey)
    q = q.join(
        partsupp,
        (partsupp.ps_suppkey == lineitem.l_suppkey)
        & (partsupp.ps_partkey == lineitem.l_partkey),
    )
    q = q.join(part, part.p_partkey == lineitem.l_partkey)
    q = q.join(orders, orders.o_orderkey == lineitem.l_orderkey)
    q = q.join(nation, supplier.s_nationkey == nation.n_nationkey)

    q = q[(q.l_extendedprice * (1 - q.l_discount) -
           q.ps_supplycost * q.l_quantity).name("amount"),
          q.o_orderdate.year().cast("string").name("o_year"),
          q.n_name.name("nation"), q.p_name, ]

    q = q.filter([q.p_name.like("%" + COLOR + "%")])

    gq = q.group_by([q.nation, q.o_year])
    q = gq.aggregate(sum_profit=q.amount.sum())
    q = q.sort_by([q.nation, ibis.desc(q.o_year)])
    return q
Exemplo n.º 17
0
def tpc_h10(con, DATE="1993-10-01"):
    customer = con.table("customer")
    orders = con.table("orders")
    lineitem = con.table("lineitem")
    nation = con.table("nation")

    q = customer
    q = q.join(orders, customer.c_custkey == orders.o_custkey)
    q = q.join(lineitem, lineitem.l_orderkey == orders.o_orderkey)
    q = q.join(nation, customer.c_nationkey == nation.n_nationkey)

    q = q.filter([
        (q.o_orderdate >= DATE) & (q.o_orderdate < add_date(DATE, dm=3)),
        q.l_returnflag == "R",
    ])

    gq = q.group_by([
        q.c_custkey,
        q.c_name,
        q.c_acctbal,
        q.c_phone,
        q.n_name,
        q.c_address,
        q.c_comment,
    ])
    q = gq.aggregate(revenue=(q.l_extendedprice * (1 - q.l_discount)).sum())

    q = q.sort_by(ibis.desc(q.revenue))
    return q.limit(20)
Exemplo n.º 18
0
def great_vcs():
    con = activate_db()

    i = con.table('investments')
    c = con.table('companies')

    clean_name = i.investor_name.fillna('NO INVESTOR').name('investor_name')
    num_investments = c.permalink.nunique()

    exited = c.status.isin(['ipo', 'acquired']).ifelse(c.permalink, ibis.NA)
    num_exits = exited.nunique()

    stats = (c.left_join(i, c.permalink == i.company_permalink)
             .group_by(clean_name)
             .aggregate(num_investments=num_investments,
                        num_exits=num_exits))

    stats = (stats.mutate(succ_rate=(stats.num_exits /
                                    stats.num_investments.cast('float'))))
    stats.limit(10)

    great_success = (stats
                     [stats.num_investments > 100]
                     .sort_by(ibis.desc('succ_rate')))

    top50 = great_success.limit(50)
    top50_dataframe = top50.execute()
    top50_html = top50_dataframe.to_html()
    # print(top20_view)
    # print(type(top20_view))
    return top50_html
Exemplo n.º 19
0
    def test_sort_aggregation_translation_failure(self):
        # This works around a nuance with our choice to hackishly fuse SortBy
        # after Aggregate to produce a single select statement rather than an
        # inline view.
        t = self.alltypes

        agg = t.group_by('string_col').aggregate(
            t.double_col.max().name('foo')
        )
        expr = agg.sort_by(ibis.desc('foo'))

        sat = self.sa_alltypes.alias('t1')
        base = (
            sa.select(
                [sat.c.string_col, F.max(sat.c.double_col).label('foo')]
            ).group_by(sat.c.string_col)
        ).alias('t0')

        ex = (
            sa.select([base.c.string_col, base.c.foo])
            .select_from(base)
            .order_by(sa.desc('foo'))
        )

        self._compare_sqla(expr, ex)
Exemplo n.º 20
0
def great_vcs():
    con = activate_db()

    i = con.table('investments')
    c = con.table('companies')

    clean_name = i.investor_name.fillna('NO INVESTOR').name('investor_name')
    num_investments = c.permalink.nunique()

    exited = c.status.isin(['ipo', 'acquired']).ifelse(c.permalink, ibis.NA)
    num_exits = exited.nunique()

    stats = (c.left_join(
        i, c.permalink == i.company_permalink).group_by(clean_name).aggregate(
            num_investments=num_investments, num_exits=num_exits))

    stats = (stats.mutate(succ_rate=(stats.num_exits /
                                     stats.num_investments.cast('float'))))
    stats.limit(10)

    great_success = (stats[stats.num_investments > 100].sort_by(
        ibis.desc('succ_rate')))

    top50 = great_success.limit(50)
    top50_dataframe = top50.execute()
    top50_html = top50_dataframe.to_html()
    # print(top20_view)
    # print(type(top20_view))
    return top50_html
Exemplo n.º 21
0
    def test_lower_projection_sort_key(self):
        expr = self._case_subquery_aliased()

        s1 = self._get_sqla('star1').alias('t2')
        s2 = self._get_sqla('star2').alias('t1')

        expr2 = (expr
                 [expr.total > 100]
                 .sort_by(ibis.desc('total')))

        agged = (sa.select([s1.c.foo_id, F.sum(s1.c.f).label('total')])
                 .group_by(s1.c.foo_id)
                 .alias('t3'))

        joined = agged.join(s2, agged.c.foo_id == s2.c.foo_id)
        expected = sa.select([agged, s2.c.value1]).select_from(joined)

        joined = agged.join(s2, agged.c.foo_id == s2.c.foo_id)
        expected = sa.select([agged, s2.c.value1]).select_from(joined)

        ex = expected.alias('t0')

        expected2 = (sa.select([ex])
                     .where(ex.c.total > L(100))
                     .order_by(ex.c.total.desc()))

        self._compare_sqla(expr2, expected2)
Exemplo n.º 22
0
def tpc_h21(con, NATION="SAUDI ARABIA"):
    """Suppliers Who Kept Orders Waiting Query (Q21)
    This query identifies certain suppliers who were not able to ship required
    parts in a timely manner."""

    supplier = con.table("supplier")
    lineitem = con.table("lineitem")
    orders = con.table("orders")
    nation = con.table("nation")

    L2 = lineitem.view()
    L3 = lineitem.view()

    q = supplier
    q = q.join(lineitem, supplier.s_suppkey == lineitem.l_suppkey)
    q = q.join(orders, orders.o_orderkey == lineitem.l_orderkey)
    q = q.join(nation, supplier.s_nationkey == nation.n_nationkey)
    q = q[q.l_orderkey.name("l1_orderkey"), q.o_orderstatus, q.l_receiptdate,
          q.l_commitdate,
          q.l_suppkey.name("l1_suppkey"), q.s_name, q.n_name, ]
    q = q.filter([
        q.o_orderstatus == "F",
        q.l_receiptdate > q.l_commitdate,
        q.n_name == NATION,
        ((L2.l_orderkey == q.l1_orderkey) &
         (L2.l_suppkey != q.l1_suppkey)).any(),
        ~(((L3.l_orderkey == q.l1_orderkey)
           & (L3.l_suppkey != q.l1_suppkey)
           & (L3.l_receiptdate > L3.l_commitdate)).any()),
    ])

    gq = q.group_by([q.s_name])
    q = gq.aggregate(numwait=q.count())
    q = q.sort_by([ibis.desc(q.numwait), q.s_name])
    return q.limit(100)
Exemplo n.º 23
0
def tpc_h16(con,
            BRAND="Brand#45",
            TYPE="MEDIUM POLISHED",
            SIZES=(49, 14, 23, 45, 19, 3, 36, 9)):
    #ibis.options.sql.default_limit = 100000
    """Parts/Supplier Relationship Query (Q16)
    This query finds out how many suppliers can supply parts with given
    attributes. It might be used, for example, to determine whether there is
    a sufficient number of suppliers for heavily ordered parts."""

    partsupp = con.table("partsupp")
    part = con.table("part")
    supplier = con.table("supplier")

    q = partsupp.join(part, part.p_partkey == partsupp.ps_partkey)
    q = q.filter([
        q.p_brand != BRAND,
        ~q.p_type.like(f"{TYPE}%"),
        q.p_size.isin(SIZES),
        ~q.ps_suppkey.isin(
            supplier.filter([supplier.s_comment.like("%Customer%Complaints%")
                             ]).s_suppkey),
    ])
    gq = q.groupby([q.p_brand, q.p_type, q.p_size])
    q = gq.aggregate(supplier_cnt=q.ps_suppkey.nunique())
    q = q.sort_by([ibis.desc(q.supplier_cnt), q.p_brand, q.p_type, q.p_size])
    return q
Exemplo n.º 24
0
def test_bfill(events):
    con = ibis.pandas.connect({"t": events})
    t = con.table("t")

    win = ibis.window(group_by=t.event_id,
                      order_by=ibis.desc(t.measured_on),
                      following=0)
    grouped = t.mutate(grouper=t.measurement.count().over(win))

    expr = (grouped.group_by([
        grouped.event_id, grouped.grouper
    ]).mutate(bfill=grouped.measurement.max()).sort_by("measured_on"))
    result = expr.execute().reset_index(drop=True)

    expected_raw = """\
event_id measured_on  measurement  grouper  bfill
       2  2021-05-05         42.0        3   42.0
       2  2021-05-06         42.0        2   42.0
       2  2021-05-07          NaN        1   11.0
       2  2021-05-08         11.0        1   11.0
       2  2021-05-09          NaN        0    NaN
       2  2021-05-10          NaN        0    NaN
       1  2021-06-01          NaN        1    5.0
       1  2021-06-02          5.0        1    5.0
       1  2021-06-03          NaN        0    NaN
       1  2021-06-04          NaN        0    NaN
       3  2021-07-11          NaN        0    NaN
       3  2021-07-12          NaN        0    NaN"""
    expected = pd.read_csv(
        io.StringIO(expected_raw),
        sep=r"\s+",
        header=0,
        parse_dates=["measured_on"],
    )
    tm.assert_frame_equal(result, expected)
Exemplo n.º 25
0
def test_lower_projection_sort_key(con, subquery_aliased, star1, star2):
    expr = subquery_aliased

    t3 = con.meta.tables["star1"].alias("t3")
    t2 = con.meta.tables["star2"].alias("t2")

    t4 = (
        sa.select([t3.c.foo_id, F.sum(t3.c.f).label('total')])
        .group_by(t3.c.foo_id)
        .alias('t4')
    )
    t1 = (
        sa.select([t4.c.foo_id, t4.c.total, t2.c.value1])
        .select_from(t4.join(t2, t4.c.foo_id == t2.c.foo_id))
        .alias('t1')
    )
    t0 = (
        sa.select([t1.c.foo_id, t1.c.total, t1.c.value1])
        .where(t1.c.total > L(100))
        .alias('t0')
    )
    expected = sa.select([t0.c.foo_id, t0.c.total, t0.c.value1]).order_by(
        t0.c.total.desc()
    )

    expr2 = expr[expr.total > 100].sort_by(ibis.desc('total'))
    _check(expr2, expected)
Exemplo n.º 26
0
    def test_sort_aggregation_translation_failure(self):
        # This works around a nuance with our choice to hackishly fuse SortBy
        # after Aggregate to produce a single select statement rather than an
        # inline view.
        t = self.alltypes

        agg = t.group_by('string_col').aggregate(
            t.double_col.max().name('foo')
        )
        expr = agg.sort_by(ibis.desc('foo'))

        sat = self.sa_alltypes.alias('t1')
        base = (
            sa.select(
                [sat.c.string_col, F.max(sat.c.double_col).label('foo')]
            ).group_by(sat.c.string_col)
        ).alias('t0')

        ex = (
            sa.select([base.c.string_col, base.c.foo])
            .select_from(base)
            .order_by(sa.desc('foo'))
        )

        self._compare_sqla(expr, ex)
Exemplo n.º 27
0
def log_scraper_incomplete_queries(expr):
    endings = expr.filter(expr.event.isin(['sql_execute', 'render_vega'
                                           ])).select(['sequence'])
    incomplete = expr\
        .filter(expr.event.isin(['sql_execute_begin', 'render_vega_begin'])\
               & expr.sequence.notin(endings.sequence))\
        .sort_by(ibis.desc('logtime'))
    return incomplete.select(['logtime', 'event', 'query', 'sequence'])
Exemplo n.º 28
0
def test_compile_twice(dbpath):
    con1 = ibis.sqlite.connect(dbpath)
    t1 = con1.table('batting')
    sort_key1 = ibis.desc(t1.playerID)
    sorted_table1 = t1.sort_by(sort_key1)
    expr1 = sorted_table1.count()

    con2 = ibis.sqlite.connect(dbpath)
    t2 = con2.table('batting')
    sort_key2 = ibis.desc(t2.playerID)
    sorted_table2 = t2.sort_by(sort_key2)
    expr2 = sorted_table2.count()

    result1 = str(expr1.compile())
    result2 = str(expr2.compile())

    assert result1 == result2
Exemplo n.º 29
0
def test_batting_most_hits(players, players_df):
    expr = players.mutate(hits_rank=lambda t: t.H.rank().over(
        ibis.cumulative_window(order_by=ibis.desc(t.H))))
    result = expr.execute()
    hits_rank = players_df.groupby('playerID').H.rank(method='min',
                                                      ascending=False)
    expected = players_df.assign(hits_rank=hits_rank)
    tm.assert_frame_equal(result[expected.columns], expected)
Exemplo n.º 30
0
def test_compile_twice(dbpath):
    con1 = ibis.sqlite.connect(dbpath)
    t1 = con1.table('batting')
    sort_key1 = ibis.desc(t1.playerID)
    sorted_table1 = t1.sort_by(sort_key1)
    expr1 = sorted_table1.count()

    con2 = ibis.sqlite.connect(dbpath)
    t2 = con2.table('batting')
    sort_key2 = ibis.desc(t2.playerID)
    sorted_table2 = t2.sort_by(sort_key2)
    expr2 = sorted_table2.count()

    result1 = str(expr1.compile())
    result2 = str(expr2.compile())

    assert result1 == result2
Exemplo n.º 31
0
def test_order_by_desc(alltypes):
    t = alltypes

    w = window(order_by=ibis.desc(t.f))

    proj = t[t.f, ibis.row_number().over(w).name('revrank')]
    expected = """\
SELECT `f`, (row_number() OVER (ORDER BY `f` DESC) - 1) AS `revrank`
FROM ibis_testing.`alltypes`"""
    assert_sql_equal(proj, expected)

    expr = t.group_by('g').order_by(ibis.desc(t.f))[t.d.lag().name('foo'),
                                                    t.a.max()]
    expected = """\
SELECT lag(`d`) OVER (PARTITION BY `g` ORDER BY `f` DESC) AS `foo`,
       max(`a`) OVER (PARTITION BY `g` ORDER BY `f` DESC) AS `max`
FROM ibis_testing.`alltypes`"""
    assert_sql_equal(expr, expected)
Exemplo n.º 32
0
    def test_order_by_desc(self):
        t = self.con.table('alltypes')

        w = window(order_by=ibis.desc(t.f))

        proj = t[t.f, ibis.row_number().over(w).name('revrank')]
        expected = """\
SELECT f, row_number() OVER (ORDER BY f DESC) - 1 AS `revrank`
FROM alltypes"""
        self._check_sql(proj, expected)

        expr = (t.group_by('g').order_by(ibis.desc(t.f))[t.d.lag().name('foo'),
                                                         t.a.max()])
        expected = """\
SELECT lag(d) OVER (PARTITION BY g ORDER BY f DESC) AS `foo`,
       max(a) OVER (PARTITION BY g ORDER BY f DESC) AS `max`
FROM alltypes"""
        self._check_sql(expr, expected)
Exemplo n.º 33
0
def test_complex_sort_by(t, df):
    expr = t.sort_by(
        [ibis.desc(t.plain_int64 * t.plain_float64), t.plain_float64])
    result = expr.execute()
    expected = (df.assign(foo=df.plain_int64 * df.plain_float64).sort_values(
        ['foo', 'plain_float64'],
        ascending=[False, True]).drop(['foo'], axis=1).reset_index(drop=True))

    tm.assert_frame_equal(result[expected.columns], expected)
Exemplo n.º 34
0
    def test_order_by_desc(self):
        t = self.con.table('alltypes')

        w = window(order_by=ibis.desc(t.f))

        proj = t[t.f, ibis.row_number().over(w).name('revrank')]
        expected = """\
SELECT f, row_number() OVER (ORDER BY f DESC) - 1 AS `revrank`
FROM alltypes"""
        self._check_sql(proj, expected)

        expr = (t.group_by('g')
                .order_by(ibis.desc(t.f))
                [t.d.lag().name('foo'), t.a.max()])
        expected = """\
SELECT lag(d) OVER (PARTITION BY g ORDER BY f DESC) AS `foo`,
       max(a) OVER (PARTITION BY g ORDER BY f DESC) AS `max`
FROM alltypes"""
        self._check_sql(expr, expected)
Exemplo n.º 35
0
def test_count_on_order_by(db):
    t = db.batting
    sort_key = ibis.desc(t.playerID)
    sorted_table = t.sort_by(sort_key)
    expr = sorted_table.count()
    result = str(
        expr.compile().compile(compile_kwargs={'literal_binds': True}))
    expected = ('SELECT count(\'*\') AS count \n'
                'FROM base.batting AS t0')  # noqa: W291
    assert result == expected
Exemplo n.º 36
0
def test_order_by_desc(alltypes):
    t = alltypes

    w = window(order_by=ibis.desc(t.f))

    proj = t[t.f, ibis.row_number().over(w).name('revrank')]
    expected = """\
SELECT `f`, (row_number() OVER (ORDER BY `f` DESC) - 1) AS `revrank`
FROM ibis_testing.`alltypes`"""
    assert_sql_equal(proj, expected)

    expr = t.group_by('g').order_by(ibis.desc(t.f))[
        t.d.lag().name('foo'), t.a.max()
    ]
    expected = """\
SELECT lag(`d`) OVER (PARTITION BY `g` ORDER BY `f` DESC) AS `foo`,
       max(`a`) OVER (PARTITION BY `g` ORDER BY `f` DESC) AS `max`
FROM ibis_testing.`alltypes`"""
    assert_sql_equal(expr, expected)
Exemplo n.º 37
0
def log_scraper_first_incomplete_before_restart(expr, limit=100):
    restart_times = expr.filter(
        ((expr.severity == 'INFO') &
         (expr.msg.contains('OmniSci Server 5')))).select(['logtime'])
    results = []
    for i, endtime in restart_times.execute(limit).iterrows():
        # ceil to avoid ibis warning and dropping microseconds
        endtime = endtime.logtime.ceil('s')

        last_complete_end = expr.filter((expr.logtime < endtime)
            & expr.event.isin(['sql_execute', 'render_vega']))\
            .sort_by(ibis.desc('logtime'))\
            .limit(1)\
            .select(['sequence'])

        last_complete_start = expr\
            .filter(expr.event.isin(['sql_execute_begin', 'render_vega_begin'])
                    & expr.sequence.isin(last_complete_end.sequence)
                    & (expr.logtime < endtime))\
            .sort_by(ibis.desc('logtime'))\
            .select(['logtime', 'sequence'])

        for i, last_complete_start in last_complete_start.execute(
                1).iterrows():
            # floor to avoid ibis warning and dropping microseconds
            last_complete_start_time = last_complete_start.logtime.floor('s')

            incomplete = expr\
                .filter(
                    (expr.logtime < endtime)
                    & (expr.logtime > last_complete_start_time)
                    & (expr.sequence != last_complete_start.sequence)
                    & (expr.event.isin(['sql_execute_begin', 'render_vega_begin'])))\
                .sort_by('logtime')\
                .select(['logtime', 'event', 'query', 'sequence', 'logfile'])
            results.append(incomplete.execute(1))
    if results:
        df = pd.concat(results)
        df.drop_duplicates(inplace=True)
        return df
    else:
        return None
Exemplo n.º 38
0
def test_count_on_order_by(db):
    t = db.batting
    sort_key = ibis.desc(t.playerID)
    sorted_table = t.sort_by(sort_key)
    expr = sorted_table.count()
    result = str(
        expr.compile().compile(compile_kwargs={'literal_binds': True}))
    expected = """\
SELECT count('*') AS count 
FROM "default".batting AS t0"""  # noqa: W291
    assert result == expected
Exemplo n.º 39
0
def test_first_last_value(alltypes, df, func, expected_index):
    col = alltypes.sort_by(ibis.desc(alltypes.string_col)).double_col
    method = getattr(col, func)
    expr = method()
    result = expr.execute().rename('double_col')
    expected = pd.Series(
        df.double_col.iloc[expected_index],
        index=pd.RangeIndex(len(df)),
        name='double_col',
    )
    tm.assert_series_equal(result, expected)
Exemplo n.º 40
0
def test_memoize_insert_sort_key(con):
    table = con.table('airlines')

    t = table['arrdelay', 'dest']
    expr = t.group_by('dest').mutate(dest_avg=t.arrdelay.mean(),
                                     dev=t.arrdelay - t.arrdelay.mean())

    worst = expr[expr.dev.notnull()].sort_by(ibis.desc('dev')).limit(10)

    result = repr(worst)
    assert result.count('airlines') == 1
Exemplo n.º 41
0
def test_count_on_order_by(con):
    t = con.table("batting")
    sort_key = ibis.desc(t.playerID)
    sorted_table = t.sort_by(sort_key)
    expr = sorted_table.count()
    result = str(
        expr.compile().compile(compile_kwargs={'literal_binds': True}))
    expected = (
        "SELECT count('*') AS count \nFROM main.batting AS t0"  # noqa: W291
    )
    assert result == expected
Exemplo n.º 42
0
def test_batting_most_hits(players, players_df):
    expr = players.mutate(
        hits_rank=lambda t: t.H.rank().over(
            ibis.cumulative_window(order_by=ibis.desc(t.H))
        )
    )
    result = expr.execute()
    hits_rank = players_df.groupby('playerID').H.rank(
        method='min', ascending=False
    )
    expected = players_df.assign(hits_rank=hits_rank)
    tm.assert_frame_equal(result[expected.columns], expected)
Exemplo n.º 43
0
def test_count_on_order_by(db):
    t = db.batting
    sort_key = ibis.desc(t.playerID)
    sorted_table = t.sort_by(sort_key)
    expr = sorted_table.count()
    result = str(
        expr.compile().compile(compile_kwargs={'literal_binds': True})
    )
    expected = (
        'SELECT count(\'*\') AS count \n' 'FROM base.batting AS t0'
    )  # noqa: W291
    assert result == expected
Exemplo n.º 44
0
    def test_memoize_insert_sort_key(self):
        table = self.con.table('airlines')

        t = table['arrdelay', 'dest']
        expr = (t.group_by('dest')
                .mutate(dest_avg=t.arrdelay.mean(),
                        dev=t.arrdelay - t.arrdelay.mean()))

        worst = expr[expr.dev.notnull()].sort_by(ibis.desc('dev')).limit(10)

        result = repr(worst)
        assert result.count('airlines') == 1
Exemplo n.º 45
0
def test_complex_sort_by(t, df):
    expr = t.sort_by(
        [ibis.desc(t.plain_int64 * t.plain_float64), t.plain_float64]
    )
    result = expr.execute()
    expected = (
        df.assign(foo=df.plain_int64 * df.plain_float64)
        .sort_values(['foo', 'plain_float64'], ascending=[False, True])
        .drop(['foo'], axis=1)
        .reset_index(drop=True)
    )

    tm.assert_frame_equal(result[expected.columns], expected)
Exemplo n.º 46
0
    def test_sort_aggregation_translation_failure(self):
        # This works around a nuance with our choice to hackishly fuse SortBy
        # after Aggregate to produce a single select statement rather than an
        # inline view.
        t = self.alltypes
        sat = self.sa_alltypes.alias("t0")

        agg = t.group_by("string_col").aggregate(t.double_col.max().name("foo"))
        expr = agg.sort_by(ibis.desc("foo"))

        ex = (
            sa.select([sat.c.string_col, F.max(sat.c.double_col).label("foo")])
            .group_by(sat.c.string_col)
            .order_by(sa.desc("foo"))
        )

        self._compare_sqla(expr, ex)
Exemplo n.º 47
0
@pytest.mark.xfail(
    raises=AttributeError, reason='TableColumn does not implement limit'
)
@pytest.mark.parametrize('offset', [0, 2])
def test_series_limit(t, df, offset):
    n = 5
    s_expr = t.plain_int64.limit(n, offset=offset)
    result = s_expr.execute()
    tm.assert_series_equal(result, df.plain_int64.iloc[offset : offset + n])


@pytest.mark.parametrize(
    ('key', 'pandas_by', 'pandas_ascending'),
    [
        (lambda t, col: [ibis.desc(t[col])], lambda col: [col], False),
        (
            lambda t, col: [t[col], ibis.desc(t.plain_int64)],
            lambda col: [col, 'plain_int64'],
            [True, False],
        ),
        (
            lambda t, col: [ibis.desc(t.plain_int64 * 2)],
            lambda col: ['plain_int64'],
            False,
        ),
    ],
)
@pytest.mark.parametrize(
    'column',
    ['plain_datetimes_naive', 'plain_datetimes_ny', 'plain_datetimes_utc'],
Exemplo n.º 48
0
has_answer_boolean = projection.answer_count > 0
# [END bigquery_ibis_transform_integer]
# [START bigquery_ibis_transform_boolean]
has_answer_int = has_answer_boolean.ifelse(1, 0)
# [END bigquery_ibis_transform_boolean]

# [START bigquery_ibis_aggregate]
total_questions = projection.count()
percentage_answered = has_answer_int.mean() * 100
# [END bigquery_ibis_aggregate]

# [START bigquery_ibis_group_by]
expression = projection.groupby('year').aggregate(
    total_questions=total_questions,
    percentage_answered=percentage_answered,
).sort_by(ibis.desc(projection.year))
# [END bigquery_ibis_group_by]

print('\nExecuting query:')
# [START bigquery_ibis_execute]
print(expression.execute())
#     year  total_questions  percentage_answered
# 0   2018           997508            66.776307
# 1   2017          2318405            75.898732
# 2   2016          2226478            84.193197
# 3   2015          2219791            86.170365
# 4   2014          2164895            88.356987
# 5   2013          2060753            91.533241
# 6   2012          1645498            94.510659
# 7   2011          1200601            97.149261
# 8   2010           694410            99.060497