Exemplo n.º 1
0
def test_can_trivially_create_sqlite_table():
    pytest.importorskip('sqlalchemy')
    data('sqlite:///' + example('iris.db') + '::iris')

    # in context
    with data('sqlite:///' + example('iris.db') + '::iris') as d:
        assert d is not None
Exemplo n.º 2
0
def test_base():
    for expr, exclusions in expressions.items():
        if iscollection(expr.dshape):
            model = into(DataFrame, into(np.ndarray, expr._subs({t: data(base, t.dshape)})))
        else:
            model = compute(expr._subs({t: data(base, t.dshape)}))
        print('\nexpr: %s\n' % expr)
        for source in sources:
            if id(source) in map(id, exclusions):
                continue
            print('%s <- %s' % (typename(model), typename(source)))
            T = data(source)
            if iscollection(expr.dshape):
                result = into(type(model), expr._subs({t: T}))
                if isscalar(expr.dshape.measure):
                    assert set(into(list, result)) == set(into(list, model))
                else:
                    assert df_eq(result, model)
            elif isrecord(expr.dshape):
                result = compute(expr._subs({t: T}))
                assert into(tuple, result) == into(tuple, model)
            else:
                result = compute(expr._subs({t: T}))
                try:
                    result = result.scalar()
                except AttributeError:
                    pass
                assert result == model
Exemplo n.º 3
0
def test_can_trivially_create_sqlite_table():
    pytest.importorskip('sqlalchemy')
    data('sqlite:///'+example('iris.db')+'::iris')

    # in context
    with data('sqlite:///'+example('iris.db')+'::iris') as d:
        assert d is not None
Exemplo n.º 4
0
def test_base(expressions, sources):
    base, x, sql, bc, mongo = sources
    for expr, exclusions in expressions.items():
        if iscollection(expr.dshape):
            model = into(
                DataFrame,
                into(np.ndarray, expr._subs({t: data(base, t.dshape)})))
        else:
            model = compute(expr._subs({t: data(base, t.dshape)}))
        print('\nexpr: %s\n' % expr)
        for source in sources:
            if source is None or id(source) in map(id, exclusions):
                continue
            print('%s <- %s' % (typename(model), typename(source)))
            T = data(source)
            if iscollection(expr.dshape):
                result = into(type(model), expr._subs({t: T}))
                if isscalar(expr.dshape.measure):
                    assert set(into(list, result)) == set(into(list, model))
                else:
                    assert df_eq(result, model)
            elif isrecord(expr.dshape):
                result = compute(expr._subs({t: T}))
                assert into(tuple, result) == into(tuple, model)
            else:
                result = compute(expr._subs({t: T}))
                try:
                    result = result.scalar()
                except AttributeError:
                    pass
                assert result == model
Exemplo n.º 5
0
def test_asarray_fails_on_different_column_names():
    vs = {'first': [2., 5., 3.], 'second': [4., 1., 4.], 'third': [6., 4., 3.]}
    df = pd.DataFrame(vs)
    with pytest.raises(ValueError) as excinfo:
        data(df, fields=list('abc'))

    inmsg = "data(data_source).relabel(first='a', second='b', third='c') to rename"
    assert inmsg in str(excinfo.value)
Exemplo n.º 6
0
def test_no_name_for_simple_data():
    d = data([1, 2, 3])
    assert expr_repr(d) == '    \n0  1\n1  2\n2  3'
    assert not d._name

    d = data(1)
    assert not d._name
    assert expr_repr(d) == '1'
Exemplo n.º 7
0
def test___array__():
    x = np.ones(4)
    d = data(x)
    assert (np.array(d + 1) == x + 1).all()

    d = data(x[:2])
    x[2:] = d + 1
    assert x.tolist() == [1, 1, 2, 2]
Exemplo n.º 8
0
def test_no_name_for_simple_data():
    d = data([1, 2, 3])
    assert expr_repr(d) == '    \n0  1\n1  2\n2  3'
    assert not d._name

    d = data(1)
    assert not d._name
    assert expr_repr(d) == '1'
Exemplo n.º 9
0
def test_pickle_roundtrip():
    ds = data(1)
    assert ds.isidentical(pickle.loads(pickle.dumps(ds)))
    assert (ds + 1).isidentical(pickle.loads(pickle.dumps(ds + 1)))
    es = data(np.array([1, 2, 3]))
    rs = pickle.loads(pickle.dumps(es))
    assert (es.data == rs.data).all()
    assert_dshape_equal(es.dshape, rs.dshape)
Exemplo n.º 10
0
def test___array__():
    x = np.ones(4)
    d = data(x)
    assert (np.array(d + 1) == x + 1).all()

    d = data(x[:2])
    x[2:] = d + 1
    assert x.tolist() == [1, 1, 2, 2]
Exemplo n.º 11
0
def test_pickle_roundtrip():
    ds = data(1)
    assert ds.isidentical(pickle.loads(pickle.dumps(ds)))
    assert (ds + 1).isidentical(pickle.loads(pickle.dumps(ds + 1)))
    es = data(np.array([1, 2, 3]))
    rs = pickle.loads(pickle.dumps(es))
    assert (es.data == rs.data).all()
    assert_dshape_equal(es.dshape, rs.dshape)
Exemplo n.º 12
0
def test_data_on_iterator_refies_data():
    tdata = [1, 2, 3]
    d = data(iter(tdata))

    assert into(list, d) == tdata
    assert into(list, d) == tdata

    # in context
    with data(iter(tdata)) as d:
        assert d is not None
Exemplo n.º 13
0
def test_data_on_iterator_refies_data():
    tdata = [1, 2, 3]
    d = data(iter(tdata))

    assert into(list, d) == tdata
    assert into(list, d) == tdata

    # in context
    with data(iter(tdata)) as d:
        assert d is not None
Exemplo n.º 14
0
def test_asarray_fails_on_different_column_names():
    vs = {'first': [2., 5., 3.],
          'second': [4., 1., 4.],
          'third': [6., 4., 3.]}
    df = pd.DataFrame(vs)
    with pytest.raises(ValueError) as excinfo:
        data(df, fields=list('abc'))

    inmsg = "data(data_source).relabel(first='a', second='b', third='c') to rename"
    assert inmsg in str(excinfo.value)
Exemplo n.º 15
0
def test_pickle_roundtrip():
    ds = data(1)
    assert ds.isidentical(
        pickle.loads(pickle.dumps(ds, protocol=pickle.HIGHEST_PROTOCOL)), )
    assert (ds + 1).isidentical(
        pickle.loads(pickle.dumps(ds + 1, protocol=pickle.HIGHEST_PROTOCOL)), )
    es = data(np.array([1, 2, 3]))
    rs = pickle.loads(pickle.dumps(es, protocol=pickle.HIGHEST_PROTOCOL))
    assert (es.data == rs.data).all()
    assert_dshape_equal(es.dshape, rs.dshape)
Exemplo n.º 16
0
def test_pickle_roundtrip():
    ds = data(1)
    assert ds.isidentical(
        pickle.loads(pickle.dumps(ds, protocol=pickle.HIGHEST_PROTOCOL)),
    )
    assert (ds + 1).isidentical(
        pickle.loads(pickle.dumps(ds + 1, protocol=pickle.HIGHEST_PROTOCOL)),
    )
    es = data(np.array([1, 2, 3]))
    rs = pickle.loads(pickle.dumps(es, protocol=pickle.HIGHEST_PROTOCOL))
    assert (es.data == rs.data).all()
    assert_dshape_equal(es.dshape, rs.dshape)
Exemplo n.º 17
0
def test_csv_with_trailing_commas():
    with tmpfile('.csv') as fn:
        with open(fn, 'wt') as f:
            # note the trailing space in the header
            f.write('a,b,c, \n1, 2, 3, ')
        csv = CSV(fn)
        assert expr_repr(data(fn))
        assert discover(csv).measure.names == ['a', 'b', 'c', '']
    with tmpfile('.csv') as fn:
        with open(fn, 'wt') as f:
            f.write('a,b,c,\n1, 2, 3, ')  # NO trailing space in the header
        csv = CSV(fn)
        assert expr_repr(data(fn))
        assert discover(csv).measure.names == ['a', 'b', 'c', 'Unnamed: 3']
Exemplo n.º 18
0
def test_coerce_date_and_datetime():
    x = datetime.datetime.now().date()
    d = data(x)
    assert expr_repr(d) == repr(x)

    x = pd.Timestamp.now()
    d = data(x)
    assert expr_repr(d) == repr(x)

    x = np.nan
    d = data(x, dshape='datetime')
    assert expr_repr(d) == repr(pd.NaT)

    x = float('nan')
    d = data(x, dshape='datetime')
    assert expr_repr(d) == repr(pd.NaT)
Exemplo n.º 19
0
def test_str_does_not_repr():
    # see GH issue #1240.
    d = data([('aa', 1), ('b', 2)], name="ZZZ",
             dshape='2 * {a: string, b: int64}')
    expr = transform(d, c=d.a.str_len() + d.b)
    assert str(
        expr) == "Merge(_child=ZZZ, children=(ZZZ, label(str_len(_child=ZZZ.a) + ZZZ.b, 'c')))"
Exemplo n.º 20
0
def test_coerce_date_and_datetime():
    x = datetime.datetime.now().date()
    d = data(x)
    assert expr_repr(d) == repr(x)

    x = pd.Timestamp.now()
    d = data(x)
    assert expr_repr(d) == repr(x)

    x = np.nan
    d = data(x, dshape='datetime')
    assert expr_repr(d) == repr(pd.NaT)

    x = float('nan')
    d = data(x, dshape='datetime')
    assert expr_repr(d) == repr(pd.NaT)
Exemplo n.º 21
0
def test_dataframe_backed_repr_complex():
    df = pd.DataFrame([(1, 'Alice', 100), (2, 'Bob', -200),
                       (3, 'Charlie', 300), (4, 'Denis', 400),
                       (5, 'Edith', -500)],
                      columns=['id', 'name', 'balance'])
    t = data(df)
    expr_repr(t[t['balance'] < 0])
Exemplo n.º 22
0
def test_str_does_not_repr():
    # see GH issue #1240.
    d = data([('aa', 1), ('b', 2)], name="ZZZ",
             dshape='2 * {a: string, b: int64}')
    expr = transform(d, c=d.a.str.len() + d.b)
    assert (str(expr) ==
            "Merge(_child=ZZZ, children=(ZZZ, label(len(_child=ZZZ.a)"
            " + ZZZ.b, 'c')))")
Exemplo n.º 23
0
def test_incompatible_types():
    d = data(pd.DataFrame(L, columns=['id', 'name', 'amount']))

    with pytest.raises(ValueError):
        d.id == 'foo'

    result = compute(d.id == 3)
    expected = pd.Series([False, False, True, False, False], name='id')
    tm.assert_series_equal(result, expected)
Exemplo n.º 24
0
def test_table_resource():
    with tmpfile('csv') as filename:
        ds = dshape('var * {a: int, b: int}')
        csv = CSV(filename)
        append(csv, [[1, 2], [10, 20]], dshape=ds)

        t = data(filename)
        assert isinstance(t.data, CSV)
        assert into(list, compute(t)) == into(list, csv)
Exemplo n.º 25
0
def test_partially_bound_expr():
    df = pd.DataFrame([(1, 'Alice', 100), (2, 'Bob', -200),
                       (3, 'Charlie', 300), (4, 'Denis', 400),
                       (5, 'Edith', -500)],
                      columns=['id', 'name', 'balance'])
    tdata = data(df, name='data')
    a = symbol('a', 'int')
    expr = tdata.name[tdata.balance > a]
    assert expr_repr(expr) == 'data[data.balance > a].name'
Exemplo n.º 26
0
def test_dataframe_backed_repr_complex():
    df = pd.DataFrame([(1, 'Alice', 100),
                       (2, 'Bob', -200),
                       (3, 'Charlie', 300),
                       (4, 'Denis', 400),
                       (5, 'Edith', -500)],
                      columns=['id', 'name', 'balance'])
    t = data(df)
    expr_repr(t[t['balance'] < 0])
Exemplo n.º 27
0
def test_table_resource():
    with tmpfile('csv') as filename:
        ds = dshape('var * {a: int, b: int}')
        csv = CSV(filename)
        append(csv, [[1, 2], [10, 20]], dshape=ds)

        t = data(filename)
        assert isinstance(t.data, CSV)
        assert into(list, compute(t)) == into(list, csv)
Exemplo n.º 28
0
def test_incompatible_types():
    d = data(pd.DataFrame(L, columns=['id', 'name', 'amount']))

    with pytest.raises(ValueError):
        d.id == 'foo'

    result = compute(d.id == 3)
    expected = pd.Series([False, False, True, False, False], name='id')
    tm.assert_series_equal(result, expected)
Exemplo n.º 29
0
def test_csv_with_trailing_commas():
    with tmpfile('.csv') as fn:
        with open(fn, 'wt') as f:
            # note the trailing space in the header
            f.write('a,b,c, \n1, 2, 3, ')
        csv = CSV(fn)
        assert expr_repr(data(fn))
        assert discover(csv).measure.names == [
            'a', 'b', 'c', ''
        ]
    with tmpfile('.csv') as fn:
        with open(fn, 'wt') as f:
            f.write('a,b,c,\n1, 2, 3, ')  # NO trailing space in the header
        csv = CSV(fn)
        assert expr_repr(data(fn))
        assert discover(csv).measure.names == [
            'a', 'b', 'c', 'Unnamed: 3'
        ]
Exemplo n.º 30
0
def test_partially_bound_expr():
    df = pd.DataFrame([(1, 'Alice', 100),
                       (2, 'Bob', -200),
                       (3, 'Charlie', 300),
                       (4, 'Denis', 400),
                       (5, 'Edith', -500)],
                      columns=['id', 'name', 'balance'])
    tdata = data(df, name='data')
    a = symbol('a', 'int')
    expr = tdata.name[tdata.balance > a]
    assert expr_repr(expr) == 'data[data.balance > a].name'
Exemplo n.º 31
0
def test_head_compute():
    tdata = tm.makeMixedDataFrame()
    t = symbol('t', discover(tdata))
    db = into('sqlite:///:memory:::t', tdata, dshape=t.dshape)
    n = 2
    d = data(db)

    # skip the header and the ... at the end of the repr
    expr = d.head(n)
    s = expr_repr(expr)
    assert '...' not in s
    result = s.split('\n')[1:]
    assert len(result) == n
Exemplo n.º 32
0
def test_head_compute():
    tdata = tm.makeMixedDataFrame()
    t = symbol('t', discover(tdata))
    db = into('sqlite:///:memory:::t', tdata, dshape=t.dshape)
    n = 2
    d = data(db)

    # skip the header and the ... at the end of the repr
    expr = d.head(n)
    s = expr_repr(expr)
    assert '...' not in s
    result = s.split('\n')[1:]
    assert len(result) == n
Exemplo n.º 33
0
def test_all_string_infer_header():
    sdata = """x,tl,z
Be careful driving.,hy,en
Be careful.,hy,en
Can you translate this for me?,hy,en
Chicago is very different from Boston.,hy,en
Don't worry.,hy,en"""
    with tmpfile('.csv') as fn:
        with open(fn, 'w') as f:
            f.write(sdata)

        tdata = data(fn, has_header=True)
        assert tdata.data.has_header
        assert tdata.fields == ['x', 'tl', 'z']
Exemplo n.º 34
0
def test_all_string_infer_header():
    sdata = """x,tl,z
Be careful driving.,hy,en
Be careful.,hy,en
Can you translate this for me?,hy,en
Chicago is very different from Boston.,hy,en
Don't worry.,hy,en"""
    with tmpfile('.csv') as fn:
        with open(fn, 'w') as f:
            f.write(sdata)

        tdata = data(fn, has_header=True)
        assert tdata.data.has_header
        assert tdata.fields == ['x', 'tl', 'z']
Exemplo n.º 35
0
def test_explicit_override_dshape():
    ds = dshape("""var * {a: ?float64,
                        b: ?string,
                        c: ?float32}""")
    # If not overridden, the dshape discovery will return:
    # var * {a: int64, b: string, c: int64}.
    s = textwrap.dedent("""\
                        a,b,c
                        1,x,3
                        2,y,4
                        3,z,5
                        """)
    with tmpfile('.csv') as filename:
        with open(filename, 'w') as fd:
            fd.write(s)
        bdf = data(filename, dshape=ds)
        assert bdf.dshape == ds
Exemplo n.º 36
0
def test_explicit_override_dshape():
    ds = dshape("""var * {a: ?float64,
                        b: ?string,
                        c: ?float32}""")
    # If not overridden, the dshape discovery will return:
    # var * {a: int64, b: string, c: int64}.
    s = textwrap.dedent("""\
                        a,b,c
                        1,x,3
                        2,y,4
                        3,z,5
                        """)
    with tmpfile('.csv') as filename:
        with open(filename, 'w') as fd:
            fd.write(s)
        bdf = data(filename, dshape=ds)
        assert bdf.dshape == ds
Exemplo n.º 37
0
def sources():
    L = [[ 100, 1, 'Alice'],
         [ 200, 2, 'Bob'],
         [ 300, 3, 'Charlie'],
         [-400, 4, 'Dan'],
         [ 500, 5, 'Edith']]

    df = DataFrame(L, columns=['amount', 'id', 'name'])

    x = into(np.ndarray, df)

    try:
        import sqlalchemy
        sql = data('sqlite:///:memory:::accounts', dshape=t.dshape)
        into(sql, L)
    except:
        sql = None

    try:
        import bcolz
        bc = into(bcolz.ctable, df)
    except ImportError:
        bc = None

    try:
        import pymongo
    except ImportError:
        pymongo = mongo = None
    if pymongo:

        try:
            db = pymongo.MongoClient().db

            try:
                coll = db._test_comprehensive
            except AttributeError:
                coll = db['_test_comprehensive']

            coll.drop()
            mongo = into(coll, df)
        except pymongo.errors.ConnectionFailure:
            mongo = None

    return df, x, sql, bc, mongo
Exemplo n.º 38
0
def test_repr():
    result = expr_repr(t['name'])
    print(result)
    assert isinstance(result, str)
    assert 'Alice' in result
    assert 'Bob' in result
    assert '...' not in result

    result = expr_repr(t['amount'] + 1)
    print(result)
    assert '101' in result

    t2 = data(tuple((i, i**2) for i in range(100)), fields=['x', 'y'])
    assert t2.dshape == dshape('100 * {x: int64, y: int64}')

    result = expr_repr(t2)
    print(result)
    assert len(result.split('\n')) < 20
    assert '...' in result
Exemplo n.º 39
0
def test_repr():
    result = expr_repr(t['name'])
    print(result)
    assert isinstance(result, str)
    assert 'Alice' in result
    assert 'Bob' in result
    assert '...' not in result

    result = expr_repr(t['amount'] + 1)
    print(result)
    assert '101' in result

    t2 = data(tuple((i, i**2) for i in range(100)), fields=['x', 'y'])
    assert t2.dshape == dshape('100 * {x: int64, y: int64}')

    result = expr_repr(t2)
    print(result)
    assert len(result.split('\n')) < 20
    assert '...' in result
Exemplo n.º 40
0
def test_str_does_not_repr():
    # see GH issue #1240.
    d = data(
        [('aa', 1), ('b', 2)],
        name="ZZZ",
        dshape='2 * {a: string, b: int64}',
    )
    expr = transform(d, c=d.a.str.len() + d.b)
    assert (
        normalize(str(expr)) ==
        normalize("""
            Merge(
                args=(ZZZ, label(len(_child=ZZZ.a) + ZZZ.b, 'c')),
                _varargsexpr=VarArgsExpr(
                    _inputs=(ZZZ, label(len(_child=ZZZ.a) + ZZZ.b, 'c'))
                ),
                _shape=(2,)
            )
        """)
    )
Exemplo n.º 41
0
def sources():
    L = [[100, 1, 'Alice'], [200, 2, 'Bob'], [300, 3, 'Charlie'],
         [-400, 4, 'Dan'], [500, 5, 'Edith']]

    df = DataFrame(L, columns=['amount', 'id', 'name'])

    x = into(np.ndarray, df)

    try:
        import sqlalchemy
        sql = data('sqlite:///:memory:::accounts', dshape=t.dshape)
        into(sql, L)
    except:
        sql = None

    try:
        import bcolz
        bc = into(bcolz.ctable, df)
    except ImportError:
        bc = None

    try:
        import pymongo
    except ImportError:
        pymongo = mongo = None
    if pymongo:

        try:
            db = pymongo.MongoClient().db

            try:
                coll = db._test_comprehensive
            except AttributeError:
                coll = db['_test_comprehensive']

            coll.drop()
            mongo = into(coll, df)
        except pymongo.errors.ConnectionFailure:
            mongo = None

    return df, x, sql, bc, mongo
Exemplo n.º 42
0
def test_scalar_sql_compute():
    t = into('sqlite:///:memory:::t',
             tdata,
             dshape=dshape('var * {name: string, amount: int}'))
    d = data(t)
    assert expr_repr(d.amount.sum()) == '300'
Exemplo n.º 43
0
def test_isidentical_regr():
    # regression test for #1387
    tdata = np.array([(np.nan,), (np.nan,)], dtype=[('a', 'float64')])
    ds = data(tdata)
    assert ds.a.isidentical(ds.a)
Exemplo n.º 44
0
def test_table_raises_on_inconsistent_inputs():
    with pytest.raises(ValueError) as excinfo:
        t = data(tdata,
                 schema='{name: string, amount: float32}',
                 dshape=dshape("{name: string, amount: float32}"))
    assert "specify one of schema= or dshape= keyword" in str(excinfo.value)
Exemplo n.º 45
0
def test_nameless_data():
    tdata = [('a', 1)]
    assert repr(tdata) in expr_repr(data(tdata))
Exemplo n.º 46
0
def test_DataFrame():
    x = np.array([(1, 2), (1., 2.)], dtype=[('a', 'i4'), ('b', 'f4')])
    d = data(x)
    assert isinstance(pd.DataFrame(d), pd.DataFrame)
Exemplo n.º 47
0
def test_table_raises_on_inconsistent_inputs():
    with pytest.raises(ValueError) as excinfo:
        data(tdata, schema='{name: string, amount: float32}',
             dshape=dshape("{name: string, amount: float32}"))
    assert "specify one of schema= or dshape= keyword" in str(excinfo.value)
Exemplo n.º 48
0
def test_highly_nested_repr():
    tdata = [[0, [[1, 2], [3]], 'abc']]
    d = data(tdata)
    assert 'abc' in expr_repr(d.head())
Exemplo n.º 49
0
def test_create_with_raw_data():
    t = data(tdata, fields=['name', 'amount'])
    assert t.schema == dshape('{name: string, amount: int64}')
    assert t.name
    assert t.data == tdata
Exemplo n.º 50
0
def test_nameless_data():
    tdata = [('a', 1)]
    assert repr(tdata) in expr_repr(data(tdata))
Exemplo n.º 51
0
def test_coerce_timedelta():
    x = datetime.timedelta(days=1, hours=2, minutes=3)
    d = data(x)

    assert expr_repr(d) == repr(x)
Exemplo n.º 52
0
def test_isidentical_regr():
    # regression test for #1387
    tdata = np.array([(np.nan, ), (np.nan, )], dtype=[('a', 'float64')])
    ds = data(tdata)
    assert ds.a.isidentical(ds.a)
Exemplo n.º 53
0
def test_scalar_sql_compute():
    t = into('sqlite:///:memory:::t', tdata,
             dshape=dshape('var * {name: string, amount: int}'))
    d = data(t)
    assert expr_repr(d.amount.sum()) == '300'
Exemplo n.º 54
0
def test_compute_on_Data_gives_back_data():
    assert compute(data([1, 2, 3])) == [1, 2, 3]
Exemplo n.º 55
0
def test_compute_on_Data_gives_back_data():
    assert compute(data([1, 2, 3])) == [1, 2, 3]
Exemplo n.º 56
0
def test_create_with_raw_data():
    t = data(tdata, fields=['name', 'amount'])
    assert t.schema == dshape('{name: string, amount: int64}')
    assert t.name
    assert t.data == tdata
Exemplo n.º 57
0
def test_create_with_schema():
    t = data(tdata, schema='{name: string, amount: float32}')
    assert t.schema == dshape('{name: string, amount: float32}')
Exemplo n.º 58
0
def test_coerce_timedelta():
    x = datetime.timedelta(days=1, hours=2, minutes=3)
    d = data(x)

    assert expr_repr(d) == repr(x)
Exemplo n.º 59
0
def test_highly_nested_repr():
    tdata = [[0, [[1, 2], [3]], 'abc']]
    d = data(tdata)
    assert 'abc' in expr_repr(d.head())
Exemplo n.º 60
0
def test_create_with_schema():
    t = data(tdata, schema='{name: string, amount: float32}')
    assert t.schema == dshape('{name: string, amount: float32}')