Exemplo n.º 1
0
def test_dataset():
    ns = {'t': df, 'x': 10}
    cache = dict()
    d = CachedDataset(ns, cache=cache)

    assert discover(d) == discover(ns)

    s = symbol('s', discover(d))
    compute(s.x * 2, d) == 20
    cache == {s.x * 2: 20}
Exemplo n.º 2
0
def test_streaming():
    seq = [{'name': 'Alice', 'x': 1}, {'name': 'Bob', 'x': 1}]
    ns = {'t': seq, 'x': 10}
    cache = dict()
    d = CachedDataset(ns, cache=cache)

    s = symbol('s', discover(d))
    expr = s.t.x * 2
    result = compute(expr, d)

    assert not isinstance(d.cache[expr], Iterator)
    assert into(list, d.cache[expr]) == [2, 2]
Exemplo n.º 3
0
def test_pre_compute_on_multiple_datasets_is_selective():
    from odo import CSV
    from blaze import Data
    from blaze.cached import CachedDataset

    df = pd.DataFrame(
        [[1, 'Alice', 100], [2, 'Bob', -200], [3, 'Charlie', 300],
         [4, 'Denis', 400], [5, 'Edith', -500]],
        columns=['id', 'name', 'amount'])
    iris = CSV(example('iris.csv'))
    dset = CachedDataset({'df': df, 'iris': iris})

    d = Data(dset)
    assert str(compute(d.df.amount)) == str(df.amount)