def test_distinct_name(): t = TableSymbol("t", "{id: int32, name: string}") assert t.name.isidentical(t["name"]) assert t.distinct().name.isidentical(t.distinct()["name"]) assert t.id.distinct()._name == "id" assert t.name._name == "name"
def test_subterms(): a = TableSymbol('a', '{x: int, y: int, z: int}') assert list(a._subterms()) == [a] assert set(a['x']._subterms()) == set([a, a['x']]) assert set(a['x'].map(inc, 'int')._subterms()) == \ set([a, a['x'], a['x'].map(inc, 'int')]) assert a in set((a['x'] + 1)._subterms())
def test_distinct_name(): t = TableSymbol('t', '{id: int32, name: string}') assert t.name.isidentical(t['name']) assert t.distinct().name.isidentical(t.distinct()['name']) assert t.id.distinct()._name == 'id' assert t.name._name == 'name'
def test_relabel_join(): names = TableSymbol('names', '{first: string, last: string}') siblings = join(names.relabel({'last': 'left'}), names.relabel({'last': 'right'}), 'first') assert siblings.fields == ['first', 'left', 'right']
def test_by_columns(): t = TableSymbol('t', '{name: string, amount: int32, id: int32}') assert len(by(t['id'], total=t['amount'].sum()).fields) == 2 assert len(by(t['id'], count=t['id'].count()).fields) == 2 print(by(t, count=t.count()).fields) assert len(by(t, count=t.count()).fields) == 4
def test_by_columns(): t = TableSymbol("t", "{name: string, amount: int32, id: int32}") assert len(by(t["id"], total=t["amount"].sum()).fields) == 2 assert len(by(t["id"], count=t["id"].count()).fields) == 2 print(by(t, count=t.count()).fields) assert len(by(t, count=t.count()).fields) == 4
def test_table_name(): t = TableSymbol('t', '10 * {people: string, amount: int}') r = TableSymbol('r', 'int64') with pytest.raises(AttributeError): t.name with pytest.raises(AttributeError): r.name
def test_broadcast(): from blaze.expr.arithmetic import Add, Eq, Mult, Le t = TableSymbol('t', '{x: int, y: int, z: int}') t2 = TableSymbol('t', '{a: int, b: int, c: int}') x = t['x'] y = t['y'] z = t['z'] a = t2['a'] b = t2['b'] c = t2['c'] assert str(broadcast(Add, x, y)._expr) == 'x + y' assert broadcast(Add, x, y)._child.isidentical(t) c1 = broadcast(Add, x, y) c2 = broadcast(Mult, x, z) assert eval_str(broadcast(Eq, c1, c2)._expr) == '(x + y) == (x * z)' assert broadcast(Eq, c1, c2)._child.isidentical(t) assert str(broadcast(Add, x, 1)._expr) == 'x + 1' assert str(x <= y) == "t.x <= t.y" assert str(x >= y) == "t.x >= t.y" assert str(x | y) == "t.x | t.y" assert str(x.__ror__(y)) == "t.y | t.x" assert str(x.__rand__(y)) == "t.y & t.x" with pytest.raises(ValueError): broadcast(Add, x, a)
def test_by_columns(): t = TableSymbol('t', '{name: string, amount: int32, id: int32}') assert len(by(t['id'], t['amount'].sum()).fields) == 2 assert len(by(t['id'], t['id'].count()).fields) == 2 print(by(t, t.count()).fields) assert len(by(t, t.count()).fields) == 4
def test_traverse(): t = TableSymbol('t', '{name: string, amount: int}') assert t in list(t._traverse()) expr = t.amount.sum() trav = list(expr._traverse()) assert builtins.any(t.amount.isidentical(x) for x in trav)
def test_Distinct(): t = TableSymbol('t', '{name: string, amount: int32}') r = distinct(t['name']) print(r.dshape) assert r.dshape == dshape('var * {name: string}') r = t.distinct() assert r.dshape == t.dshape
def test_traverse(): t = TableSymbol('t', '{name: string, amount: int}') assert t in list(t.traverse()) expr = t[t['amount'] < 0]['name'] trav = list(expr.traverse()) assert any(t['amount'].isidentical(x) for x in trav) assert any((t['amount'] < 0).isidentical(x) for x in trav)
def test_subs(): from blaze.expr import TableSymbol t = TableSymbol('t', '{name: string, amount: int, id: int}') expr = t['amount'] + 3 assert expr._subs({3: 4, 'amount': 'id'}).isidentical(t['id'] + 4) t2 = TableSymbol('t', '{name: string, amount: int}') assert t['amount']._subs({t: t2}).isidentical(t2['amount'])
def test_discover_dshape_symbol(ds): t_ds = TableSymbol('t', dshape=ds) assert t_ds.fields is not None t_sch = TableSymbol('t', dshape=ds.subshape[0]) assert t_sch.fields is not None assert t_ds.isidentical(t_sch)
def test_symbol_projection_failures(): t = TableSymbol('t', '10 * {name: string, amount: int}') with pytest.raises(ValueError): t._project(['name', 'id']) with pytest.raises(AttributeError): t.foo with pytest.raises(TypeError): t._project(t.dshape)
def test_symbol_projection_failures(): t = TableSymbol("t", "10 * {name: string, amount: int}") with pytest.raises(ValueError): t._project(["name", "id"]) with pytest.raises(AttributeError): t.foo with pytest.raises(TypeError): t._project(t.dshape)
def test_Distinct(): t = TableSymbol("t", "{name: string, amount: int32}") r = distinct(t["name"]) print(r.dshape) assert r.dshape == dshape("var * string") assert r._name == "name" r = t.distinct() assert r.dshape == t.dshape
def test_length(): t = TableSymbol('t', '10 * {name: string, amount: int}') assert t.dshape == dshape('10 * {name: string, amount: int}') assert len(t) == 10 assert len(t.name) == 10 assert len(t[['name']]) == 10 assert len(t.sort('name')) == 10 assert len(t.head(5)) == 5 assert len(t.head(50)) == 10
def test_sort(): t = TableSymbol("t", "{name: string, amount: int32, id: int32}") s = t.sort("amount", ascending=True) print(str(s)) assert eval(str(s)).isidentical(s) assert s.schema == t.schema assert t["amount"].sort().key == "amount"
def test_relabel(): t = TableSymbol('t', '{name: string, amount: int32, id: int32}') rl = t.relabel({'name': 'NAME', 'id': 'ID'}) assert eval(str(rl)).isidentical(rl) print(rl.columns) assert rl.columns == ['NAME', 'amount', 'ID']
def test_Distinct(): t = TableSymbol('t', '{name: string, amount: int32}') r = distinct(t['name']) print(r.dshape) assert r.dshape == dshape('var * string') assert r._name == 'name' r = t.distinct() assert r.dshape == t.dshape
def test_sort(): t = TableSymbol('t', '{name: string, amount: int32, id: int32}') s = t.sort('amount', ascending=True) print(str(s)) assert eval(str(s)).isidentical(s) assert s.schema == t.schema assert t['amount'].sort().key == 'amount'
def test_like(): t = TableSymbol('t', '{name: string, city: string}') data = [('Alice Smith', 'New York'), ('Bob Smith', 'Chicago'), ('Alice Walker', 'LA')] assert list(compute(t.like(name='Alice*'), data)) == [data[0], data[2]] assert list(compute(t.like(name='lice*'), data)) == [] assert list(compute(t.like(name='*Smith*'), data)) == [data[0], data[1]] assert list(compute(t.like(name='*Smith*', city='New York'), data)) == [data[0]]
def test_union(): schema = '{x: int, y: int, z: int}' a = TableSymbol('a', schema) b = TableSymbol('b', schema) c = TableSymbol('c', schema) u = union(a, b, c) assert u.schema == a.schema assert raises(Exception, lambda: union(a, TableSymbol('q', '{name: string}')))
def test_multi_column_join(): a = TableSymbol('a', '{x: int, y: int, z: int}') b = TableSymbol('b', '{w: int, x: int, y: int}') j = join(a, b, ['x', 'y']) assert set(j.fields) == set('wxyz') assert j.on_left == j.on_right == ['x', 'y'] assert hash(j) assert j.fields == ['x', 'y', 'z', 'w']
def test_length(): t = TableSymbol("t", "10 * {name: string, amount: int}") s = TableSymbol("s", "{name:string, amount:int}") assert t.dshape == dshape("10 * {name: string, amount: int}") assert len(t) == 10 assert len(t.name) == 10 assert len(t[["name"]]) == 10 assert len(t.sort("name")) == 10 assert len(t.head(5)) == 5 assert len(t.head(50)) == 10 with pytest.raises(ValueError): len(s)
def test_leaves(): t = TableSymbol("t", "{id: int32, name: string}") v = TableSymbol("v", "{id: int32, city: string}") x = symbol("x", "int32") assert t._leaves() == [t] assert t.id._leaves() == [t] assert by(t.name, count=t.id.nunique())._leaves() == [t] assert join(t, v)._leaves() == [t, v] assert join(v, t)._leaves() == [v, t] assert (x + 1)._leaves() == [x]
def test_leaves(): t = TableSymbol('t', '{id: int32, name: string}') v = TableSymbol('v', '{id: int32, city: string}') x = Symbol('x', 'int32') assert t._leaves() == [t] assert t.id._leaves() == [t] assert by(t.name, t.id.nunique())._leaves() == [t] assert join(t, v)._leaves() == [t, v] assert join(v, t)._leaves() == [v, t] assert (x + 1)._leaves() == [x]
def test_serializable(): t = TableSymbol('t', '{id: int, name: string, amount: int}') import pickle t2 = pickle.loads(pickle.dumps(t)) assert t.isidentical(t2) s = TableSymbol('t', '{id: int, city: string}') expr = join(t[t.amount < 0], s).sort('id').city.head() expr2 = pickle.loads(pickle.dumps(expr)) assert expr.isidentical(expr2)
def test_length(): t = TableSymbol('t', '10 * {name: string, amount: int}') s = TableSymbol('s', '{name:string, amount:int}') assert t.dshape == dshape('10 * {name: string, amount: int}') assert len(t) == 10 assert len(t.name) == 10 assert len(t[['name']]) == 10 assert len(t.sort('name')) == 10 assert len(t.head(5)) == 5 assert len(t.head(50)) == 10 with pytest.raises(ValueError): len(s)
def test_leaves(): t = TableSymbol('t', '{id: int32, name: string}') v = TableSymbol('v', '{id: int32, city: string}') x = symbol('x', 'int32') assert t._leaves() == [t] assert t.id._leaves() == [t] assert by(t.name, count=t.id.nunique())._leaves() == [t] assert join(t, v)._leaves() == [t, v] assert join(v, t)._leaves() == [v, t] assert (x + 1)._leaves() == [x]
def test_path(): from blaze.expr import TableSymbol, join t = TableSymbol('t', '{name: string, amount: int, id: int}') v = TableSymbol('v', '{city: string, id: int}') expr = t['amount'].sum() assert list(path(expr, t)) == [t.amount.sum(), t.amount, t] assert list(path(expr, t.amount)) == [t.amount.sum(), t.amount] assert list(path(expr, t.amount)) == [t.amount.sum(), t.amount] expr = join(t, v).amount assert list(path(expr, t)) == [join(t, v).amount, join(t, v), t] assert list(path(expr, v)) == [join(t, v).amount, join(t, v), v]
def test_relabel(): t = TableSymbol("t", "{name: string, amount: int32, id: int32}") rl = t.relabel({"name": "NAME", "id": "ID"}) rlc = t["amount"].relabel({"amount": "BALANCE"}) assert eval(str(rl)).isidentical(rl) print(rl.fields) assert rl.fields == ["NAME", "amount", "ID"] assert not isscalar(rl.dshape.measure) assert isscalar(rlc.dshape.measure)
def test_relabel(): t = TableSymbol('t', '{name: string, amount: int32, id: int32}') rl = t.relabel({'name': 'NAME', 'id': 'ID'}) rlc = t['amount'].relabel({'amount': 'BALANCE'}) assert eval(str(rl)).isidentical(rl) print(rl.fields) assert rl.fields == ['NAME', 'amount', 'ID'] assert not isscalar(rl.dshape.measure) assert isscalar(rlc.dshape.measure)
def test_Distinct(): x = np.array([('Alice', 100), ('Alice', -200), ('Bob', 100), ('Bob', 100)], dtype=[('name', 'S5'), ('amount', 'i8')]) t = TableSymbol('t', '{name: string, amount: int64}') assert eq(compute(t['name'].distinct(), x), np.unique(x['name'])) assert eq(compute(t.distinct(), x), np.unique(x))
def test_scalar_expr(): t = TableSymbol('t', '{x: int64, y: int32, z: int64}') x = t.x._expr y = t.y._expr assert 'int64' in str(x.dshape) assert 'int32' in str(y.dshape) expr = (t.x + 1)._expr assert expr._inputs[0].dshape == x.dshape assert expr._inputs[0].isidentical(x) t = TableSymbol('t', '{ amount : int64, id : int64, name : string }') expr = (t.amount + 1)._expr assert 'int64' in str(expr._inputs[0].dshape)
def test_relabel_join(): names = TableSymbol('names', '{first: string, last: string}') siblings = join(names.relabel({'first': 'left'}), names.relabel({'first': 'right'}), 'last')[['left', 'right']] data = [('Alice', 'Smith'), ('Bob', 'Jones'), ('Charlie', 'Smith')] print(set(compute(siblings, {names: data}))) assert ('Alice', 'Charlie') in set(compute(siblings, {names: data})) assert ('Alice', 'Bob') not in set(compute(siblings, {names: data}))
def test_merge(): t = TableSymbol('t', 'int64') p = TableSymbol('p', '{amount:int}') accounts = TableSymbol('accounts', '{name: string, balance: int32, id: int32}') new_amount = (accounts.balance * 1.5).label('new') c = merge(accounts[['name', 'balance']], new_amount) assert c.fields == ['name', 'balance', 'new'] assert c.schema == dshape('{name: string, balance: int32, new: float64}') with pytest.raises(ValueError): merge(t, t) with pytest.raises(ValueError): merge(t, p)
def test_common_subexpression(): a = TableSymbol('a', '{x: int, y: int, z: int}') assert common_subexpression(a).isidentical(a) assert common_subexpression(a, a['x']).isidentical(a) assert common_subexpression(a['y'] + 1, a['x']).isidentical(a) assert common_subexpression(a['x'].map(inc), a['x']).isidentical(a['x'])
def test_selection_by_getattr(): t = TableSymbol('t', '{name: string, amount: int, id: int}') result = t[t.name == 'Alice'] assert t.schema == result.schema assert 'Alice' in str(result)
def test_columns_attribute_for_backwards_compatibility(): t = TableSymbol('t', '{name: string, amount: int, dt: datetime}') assert t.columns == t.fields assert 'columns' in dir(t) assert 'columns' not in dir(t.name)
def test_dir(): t = TableSymbol('t', '{name: string, amount: int, dt: datetime}') assert 'day' in dir(t.dt) assert 'mean' not in dir(t.dt) assert 'mean' in dir(t.amount) assert 'like' not in dir(t[['amount', 'dt']]) assert 'any' not in dir(t.name)
def test_by(): t = TableSymbol('t', '{name: string, amount: int32, id: int32}') r = by(t['name'], total=sum(t['amount'])) print(r.schema) assert isinstance(r.schema[0], Record) assert str(r.schema[0]['name']) == 'string'
def test_schema_of_complex_interaction(): a = TableSymbol('a', '{x: int, y: int, z: int}') expr = (a['x'] + a['y']) / a['z'] assert expr.schema == dshape('real') expr = expr.label('foo') assert expr.schema == dshape('real')
def test_relational(): t = TableSymbol('t', '{name: string, amount: int, id: int}') r = (t['name'] == 'Alice') assert 'bool' in str(r.dshape) assert r._name
def test_arithmetic(): t = TableSymbol('t', '{x: int, y: int, z: int}') x, y, z = t['x'], t['y'], t['z'] exprs = [ x + 1, x + y, 1 + y, x - y, 1 - x, x - 1, x**y, x**2, 2**x, x * y, x**2, 2**x, x / y, x / 2, 2 / x, x % y, x % 2, 2 % x ]
def test_path_issue(): t = TableSymbol('t', "{topic: string, word: string, result: ?float64}") t2 = transform(t, sizes=t.result.map(lambda x: (x - MIN) * 10 / (MAX - MIN), schema='float64', name='size')) assert t2.sizes in t2.children
def test_path_issue(): t = TableSymbol('t', "{topic: string, word: string, result: ?float64}") t2 = transform(t, sizes=t.result.map(lambda x: (x - MIN) * 10 / (MAX - MIN), schema='float64', name='size')) assert builtins.any(t2.sizes.isidentical(node) for node in t2.children)
def test_like(): t = TableSymbol('t', '{name: string, amount: int, city: string}') expr = like(t, name='Alice*') assert eval(str(expr)).isidentical(expr) assert expr.schema == t.schema assert expr.dshape[0] == datashape.var
def test_reduction(): t = TableSymbol("t", "{name: string, amount: int32}") r = sum(t["amount"]) assert r.dshape in (dshape("int64"), dshape("{amount: int64}"), dshape("{amount_sum: int64}")) assert "amount" not in str(t.count().dshape) assert t.count().dshape[0] in (int32, int64) assert "int" in str(t.count().dshape) assert "int" in str(t.nunique().dshape) assert "string" in str(t["name"].max().dshape) assert "string" in str(t["name"].min().dshape) assert "string" not in str(t.count().dshape) t = TableSymbol("t", "{name: string, amount: real, id: int}") assert "int" in str(t["id"].sum().dshape) assert "int" not in str(t["amount"].sum().dshape)
def test_reduction(): t = TableSymbol('t', '{name: string, amount: int32}') r = sum(t['amount']) assert r.dshape in (dshape('int64'), dshape('{amount: int64}'), dshape('{amount_sum: int64}')) assert 'amount' not in str(t.count().dshape) assert t.count().dshape[0] in (int32, int64) assert 'int' in str(t.count().dshape) assert 'int' in str(t.nunique().dshape) assert 'string' in str(t['name'].max().dshape) assert 'string' in str(t['name'].min().dshape) assert 'string' not in str(t.count().dshape) t = TableSymbol('t', '{name: string, amount: real, id: int}') assert 'int' in str(t['id'].sum().dshape) assert 'int' not in str(t['amount'].sum().dshape)
def test_improper_selection(): t = TableSymbol('t', '{x: int, y: int, z: int}') assert raises(Exception, lambda: t[t['x'] > 0][t.sort()[t['y' > 0]]])