def test_merge_project(): accounts = TableSymbol("accounts", "{name: string, balance: int32, id: int32}") new_amount = (accounts["balance"] * 1.5).label("new") c = merge(accounts[["name", "balance"]], new_amount) assert c["new"].isidentical(new_amount) assert c["name"].isidentical(accounts["name"]) assert c[["name", "new"]].isidentical(merge(accounts.name, new_amount))
def test_merge_project(): accounts = symbol('accounts', 'var * {name: string, balance: int32, id: int32}') new_amount = (accounts['balance'] * 1.5).label('new') c = merge(accounts[['name', 'balance']], new_amount) assert c['new'].isidentical(new_amount) assert c['name'].isidentical(accounts['name']) assert c[['name', 'new']].isidentical(merge(accounts.name, new_amount))
def test_merge_project(): accounts = TableSymbol('accounts', '{name: string, balance: int32, id: int32}') new_amount = (accounts['balance'] * 1.5).label('new') c = merge(accounts[['name', 'balance']], new_amount) assert c['new'].isidentical(new_amount) assert c['name'].isidentical(accounts['name']) assert c[['name', 'new']].isidentical(merge(accounts.name, new_amount))
def test_merge(): t = TableSymbol("t", "int64") p = TableSymbol("p", "{amount:int}") accounts = TableSymbol("accounts", "{name: string, balance: int32, id: int32}") new_amount = (accounts.balance * 1.5).label("new") c = merge(accounts[["name", "balance"]], new_amount) assert c.fields == ["name", "balance", "new"] assert c.schema == dshape("{name: string, balance: int32, new: float64}") with pytest.raises(ValueError): merge(t, t) with pytest.raises(ValueError): merge(t, p)
def test_merge(): t = TableSymbol('t', 'int64') p = TableSymbol('p', '{amount:int}') accounts = TableSymbol('accounts', '{name: string, balance: int32, id: int32}') new_amount = (accounts['balance'] * 1.5).label('new') c = merge(accounts[['name', 'balance']], new_amount) assert c.columns == ['name', 'balance', 'new'] with pytest.raises(TypeError): merge(t, t) with pytest.raises(ValueError): merge(t, p)
def test_merge(): t = symbol('t', 'int64') p = symbol('p', 'var * {amount:int}') accounts = symbol('accounts', 'var * {name: string, balance: int32, id: int32}') new_amount = (accounts.balance * 1.5).label('new') c = merge(accounts[['name', 'balance']], new_amount) assert c.fields == ['name', 'balance', 'new'] assert c.schema == dshape('{name: string, balance: int32, new: float64}') with pytest.raises(ValueError): merge(t, t) with pytest.raises(ValueError): merge(t, p)
def test_merge(): t = TableSymbol('t', 'int64') p = TableSymbol('p', '{amount:int}') accounts = TableSymbol('accounts', '{name: string, balance: int32, id: int32}') new_amount = (accounts['balance'] * 1.5).label('new') c = merge(accounts[['name', 'balance']], new_amount) assert c.fields == ['name', 'balance', 'new'] assert c.schema == dshape('{name: string, balance: int32, new: float64}') with pytest.raises(ValueError): merge(t, t) with pytest.raises(ValueError): merge(t, p)
def test_complex_group_by(): expr = by(merge(tbig.amount // 10, tbig.id % 2), count=tbig.name.count()) result = compute(expr, dfbig) # can we do this? yes we can! expected = dfbig.groupby([dfbig.amount // 10, dfbig.id % 2])['name'].count().reset_index() expected = expected.rename(columns={'name': 'count'}) tm.assert_frame_equal(result, expected)
def test_merge(): accounts = TableSymbol('accounts', '{name: string, balance: int32, id: int32}') new_amount = (accounts['balance'] * 1.5).label('new') c = merge(accounts[['name', 'balance']], new_amount) assert c.columns == ['name', 'balance', 'new']
def test_merge(): col = (t['amount'] * 2).label('new') expr = merge(t['name'], col) expected = DataFrame([['Alice', 200], ['Bob', 400], ['Alice', 100]], columns=['name', 'new']) result = compute(expr, df) tm.assert_frame_equal(result, expected)
def test_merge(): t = symbol('t', 'int64') p = symbol('p', 'var * {amount: int}') accounts = symbol('accounts', 'var * {name: string, balance: int32, id: int32}') new_amount = (accounts.balance * 1.5).label('new') c = merge(accounts[['name', 'balance']], new_amount) assert c.fields == ['name', 'balance', 'new'] assert c.schema == dshape('{name: string, balance: int32, new: float64}') d = merge(t, p) assert d.fields == ['t', 'amount'] assert_dshape_equal(d.dshape, dshape('var * {t: int64, amount: int}')) with pytest.raises(TypeError) as e: merge(t, t) assert str(e.value) == 'cannot merge all scalar expressions'
def test_merge_with_common_subexpression(): df = DataFrame(np.random.rand(5, 2), columns=list('ab')) t = symbol('t', discover(df)) expr = merge((t.a - t.a % 3).label('a'), (t.a % 3).label('b')) result = compute(expr, {t: df}) expected = pd.concat( [pd.Series(df.a - df.a % 3, name='a'), pd.Series(df.a % 3, name='b')], axis=1) tm.assert_frame_equal(result, expected)
def test_by_groupby_deep(): data = [(1, 2, 'Alice'), (1, 3, 'Bob'), (2, 4, 'Alice'), (2, 4, '')] schema = '{x: int, y: int, name: string}' t = Symbol('t', datashape.var * schema) t2 = t[t['name'] != ''] t3 = merge(t2.x, t2.name) expr = by(t3.name, t3.x.mean()) result = set(compute(expr, data)) assert result == set([('Alice', 1.5), ('Bob', 1.0)])
def test_merge(): col = (t['amount'] * 2).label('new') expr = merge(t['name'], col) result = str(compute(expr, s)) assert 'amount * ' in result assert 'FROM accounts' in result assert 'SELECT accounts.name' in result assert 'new' in result
def test_merge_where(): t2 = t[t.id == 1] expr = merge(t2[['amount', 'name']], t2.id) result = compute(expr, s) expected = normalize("""SELECT accounts.amount, accounts.name, accounts.id FROM accounts WHERE accounts.id = :id_1 """) assert normalize(str(result)) == expected
def test_merge_with_common_subexpression(): df = DataFrame(np.random.rand(5, 2), columns=list('ab')) t = symbol('t', discover(df)) expr = merge((t.a - t.a % 3).label('a'), (t.a % 3).label('b')) result = compute(expr, {t: df}) expected = pd.concat( [ pd.Series(df.a - df.a % 3, name='a'), pd.Series(df.a % 3, name='b') ], axis=1 ) tm.assert_frame_equal(result, expected)
def test_by_groupby_deep(): data = [(1, 2, 'Alice'), (1, 3, 'Bob'), (2, 4, 'Alice'), (2, 4, '')] schema = '{x: int, y: int, name: string}' t = TableSymbol('t', schema) t2 = t[t['name'] != ''] t3 = merge(t2.x, t2.name) expr = by(t3.name, t3.x.mean()) result = set(compute(expr, data)) assert result == set([('Alice', 1.5), ('Bob', 1.0)])
def test_merge(): expr = lean_projection(merge(a=t.x + 1, y=t.y)) assert expr._child.isidentical(t[["x", "y"]])
def test_merge_repeats(): accounts = symbol('accounts', 'var * {name: string, balance: int32, id: int32}') with pytest.raises(ValueError): merge(accounts, (accounts.balance + 1).label('balance'))
def test_merge_with_table(): expr = lean_projection(merge(t, a=t.x + 1)) assert expr.isidentical(expr) # wut?
def test_merge(): col = (t['amount'] * 2).label('new') expr = merge(t['name'], col) assert list(compute(expr, data)) == [(row[0], row[1] * 2) for row in data]
def test_complex_group_by(): expr = by(merge(tbig.amount // 10, tbig.id % 2), count=tbig.name.count()) compute(expr, dfbig) # can we do this?
def test_merge_repeats(): accounts = TableSymbol('accounts', '{name: string, balance: int32, id: int32}') with pytest.raises(ValueError): merge(accounts, (accounts.balance + 1).label('balance'))
def test_merge_repeats(): accounts = TableSymbol("accounts", "{name: string, balance: int32, id: int32}") with pytest.raises(ValueError): merge(accounts, (accounts.balance + 1).label("balance"))
def test_merge(): expr = lean_projection(merge(a=t.x + 1, y=t.y)) assert expr._child.isidentical(t[['x', 'y']])