Пример #1
0
def test_frame_slice():
    assert_series_equal(compute(t[0], df), df.iloc[0])
    assert_series_equal(compute(t[2], df), df.iloc[2])
    tm.assert_frame_equal(compute(t[:2], df), df.iloc[:2])
    tm.assert_frame_equal(compute(t[1:3], df), df.iloc[1:3])
    tm.assert_frame_equal(compute(t[1::2], df), df.iloc[1::2])
    tm.assert_frame_equal(compute(t[[2, 0]], df), df.iloc[[2, 0]])
Пример #2
0
def test_time_field():
    data = pd.Series(pd.date_range(start='20120101', end='20120102', freq='H'))
    s = symbol('s', discover(data))
    result = compute(s.time, data)
    expected = data.dt.time
    expected.name = 's_time'
    assert_series_equal(result, expected)
Пример #3
0
def test_datetime_truncation_days():
    data = Series(['2000-01-01T12:10:00Z', '2000-06-25T12:35:12Z'],
                  dtype='M8[ns]')
    s = symbol('s', 'var * datetime')
    result = compute(s.truncate(days=3), data)
    expected = Series(['1999-12-31', '2000-06-25'], dtype='M8[ns]', name='s')
    assert_series_equal(result, expected)
Пример #4
0
def test_datetime_truncation_days():
    data = Series(['2000-01-01T12:10:00Z', '2000-06-25T12:35:12Z'],
                  dtype='M8[ns]')
    s = symbol('s', 'var * datetime')
    result = compute(s.truncate(days=3), data)
    expected = Series(['1999-12-31', '2000-06-25'], dtype='M8[ns]', name='s')
    assert_series_equal(result, expected)
Пример #5
0
def test_str_predicates(what, expected):
    predicate = 'is' + what
    expr = getattr(t.name.str, predicate)()
    expected = pd.Series([expected, expected, expected], name='name')
    result = compute(expr, df).reset_index(drop=True)
    assert_series_equal(expected, result)
    assert discover(result).measure == expr.dshape.measure
Пример #6
0
def test_time_field():
    data = pd.Series(pd.date_range(start='20120101', end='20120102', freq='H'))
    s = symbol('s', discover(data))
    result = compute(s.time, data)
    expected = data.dt.time
    expected.name = 's_time'
    assert_series_equal(result, expected)
Пример #7
0
def test_coerce_series_string_datetime(d, tp, ptp):
    s = pd.Series(d, name='a')
    e = symbol('t', discover(s)).coerce(to=tp)
    assert e.schema == dshape(tp)
    result = compute(e, s)
    expected = s.astype(ptp)
    assert_series_equal(result, expected)
Пример #8
0
def test_subsecond(sql_with_subsecond_dts):
    """Verify that `.second` returns a value with subsecond resolution and does not
    truncate to the second.
    """
    t = data(sql_with_subsecond_dts)
    result = compute(t.A.second, sql_with_subsecond_dts, return_type=pd.Series)
    assert_series_equal(result, pd.Series([0.042, 0.047], name='A_second'))
Пример #9
0
def test_frame_slice():
    assert_series_equal(compute(t[0], df), df.iloc[0])
    assert_series_equal(compute(t[2], df), df.iloc[2])
    tm.assert_frame_equal(compute(t[:2], df), df.iloc[:2])
    tm.assert_frame_equal(compute(t[1:3], df), df.iloc[1:3])
    tm.assert_frame_equal(compute(t[1::2], df), df.iloc[1::2])
    tm.assert_frame_equal(compute(t[[2, 0]], df), df.iloc[[2, 0]])
Пример #10
0
def test_datetime_access(attr, sql_with_dts):
    s = symbol('s', discover(sql_with_dts))
    expr = getattr(s.A.dt, attr)()
    assert_series_equal(
        compute(expr, sql_with_dts, return_type=pd.Series),
        getattr(compute(s.A, sql_with_dts, return_type=pd.Series).dt, attr),
        check_names=False,
    )
Пример #11
0
def test_datetime_truncation_nanoseconds():
    data = Series(['2000-01-01T12:10:00.000000005',
                   '2000-01-01T12:10:00.000000025'],
                  dtype='M8[ns]')
    s = symbol('s', 'var * datetime')
    expected = Series(['2000-01-01T12:10:00.000000000',
                       '2000-01-01T12:10:00.000000020'],
                      dtype='M8[ns]', name='s')
    result = compute(s.truncate(nanoseconds=20), data)
    assert_series_equal(result, expected)
Пример #12
0
def test_datetime_truncation_nanoseconds():
    data = Series(['2000-01-01T12:10:00.000000005',
                   '2000-01-01T12:10:00.000000025'],
                  dtype='M8[ns]')
    s = symbol('s', 'var * datetime')
    expected = Series(['2000-01-01T12:10:00.000000000',
                       '2000-01-01T12:10:00.000000020'],
                      dtype='M8[ns]', name='s')
    result = compute(s.truncate(nanoseconds=20), data)
    assert_series_equal(result, expected)
Пример #13
0
def test_str_ops(ds, op, args, data, expected):
    df = pd.Series(data, name='name')
    sym = symbol('t', datashape.var * datashape.R['name': ds])
    expr = getattr(sym.name.str, op)(*args)
    expected = pd.Series(expected, name='name')
    result = compute(expr, df).reset_index(drop=True)
    assert_series_equal(expected, result)
    # Test that the option / non-option dshape of the column passes through to
    # the expression's dshape.
    assert sym.dshape.measure.dict['name'].measure == expr.dshape.measure
Пример #14
0
def test_str_predicates(what, expected):
    predicate = 'is' + what
    expr = getattr(nt.name.str, predicate)()
    expected = pd.Series([expected, expected, None], name='name')
    result = compute(expr, ndf).reset_index(drop=True)
    assert_series_equal(expected, result)
    # 'discover' reports an incorrect value here...
    #assert discover(result).measure == expr.dshape.measure
    # ...so use a hardcoded one instead.
    assert str(expr.dshape.measure) == '?bool'
Пример #15
0
def test_datetime_access(attr, dtype, sql_with_dts):
    s = symbol('s', discover(sql_with_dts))
    expr = getattr(s.A.dt, attr)()
    result = compute(expr, sql_with_dts, return_type=pd.Series)
    assert result.dtype == dtype
    assert_series_equal(
        result,
        getattr(compute(s.A, sql_with_dts, return_type=pd.Series).dt, attr),
        check_names=False,
        check_dtype=False,
    )
Пример #16
0
def test_datetime_access():
    df = DataFrame({'name': ['Alice', 'Bob', 'Joe'],
                    'when': [datetime(2010, 1, 1, 1, 1, 1)] * 3,
                    'amount': [100, 200, 300],
                    'id': [1, 2, 3]})

    t = symbol('t', discover(df))

    for attr in ['day', 'month', 'minute', 'second']:
        expr = getattr(t.when, attr)
        assert_series_equal(compute(expr, df),
                            Series([1, 1, 1], name=expr._name))
Пример #17
0
def test_datetime_access(attr):
    df = DataFrame({'name': ['Alice', 'Bob', 'Joe'],
                    # 2002 is used because the dayofyear 1 is the same as
                    # dayofweek 1
                    'when': [datetime(2002, 1, 1, 1, 1, 1)] * 3,
                    'amount': [100, 200, 300],
                    'id': [1, 2, 3]})

    t = symbol('t', discover(df))
    expr = getattr(t.when.dt, attr)()
    assert_series_equal(compute(expr, df),
                        Series([1, 1, 1], name=expr._name))
Пример #18
0
def test_datetime_access():
    df = DataFrame({'name': ['Alice', 'Bob', 'Joe'],
                    'when': [datetime(2010, 1, 1, 1, 1, 1)] * 3,
                    'amount': [100, 200, 300],
                    'id': [1, 2, 3]})

    t = symbol('t', discover(df))

    for attr in ['day', 'month', 'minute', 'second']:
        expr = getattr(t.when, attr)
        assert_series_equal(compute(expr, df),
                            Series([1, 1, 1], name=expr._name))
Пример #19
0
def test_sort_on_series_no_warning(recwarn):
    expected = df.amount.order()

    recwarn.clear()

    assert_series_equal(compute(t['amount'].sort('amount'), df), expected)

    # raises as assertion error if no warning occurs, same thing for below
    with pytest.raises(AssertionError):
        assert recwarn.pop(FutureWarning)

    assert_series_equal(compute(t['amount'].sort(), df), expected)
    with pytest.raises(AssertionError):
        assert recwarn.pop(FutureWarning)
Пример #20
0
def test_sort_on_series_no_warning(recwarn):
    expected = df.amount.order()

    recwarn.clear()

    assert_series_equal(compute(t['amount'].sort('amount'), df), expected)

    # raises as assertion error if no warning occurs, same thing for below
    with pytest.raises(AssertionError):
        assert recwarn.pop(FutureWarning)

    assert_series_equal(compute(t['amount'].sort(), df), expected)
    with pytest.raises(AssertionError):
        assert recwarn.pop(FutureWarning)
Пример #21
0
def test_arithmetic():
    assert_series_equal(compute(t['amount'] + t['id'], df),
                           df.amount + df.id)
    assert_series_equal(compute(t['amount'] * t['id'], df),
                           df.amount * df.id)
    assert_series_equal(compute(t['amount'] % t['id'], df),
                           df.amount % df.id)
Пример #22
0
def test_arithmetic():
    assert_series_equal(compute(t['amount'] + t['id'], df),
                           df.amount + df.id)
    assert_series_equal(compute(t['amount'] * t['id'], df),
                           df.amount * df.id)
    assert_series_equal(compute(t['amount'] % t['id'], df),
                           df.amount % df.id)
Пример #23
0
def test_map_column():
    inc = lambda x: x + 1
    result = compute(t['amount'].map(inc, 'int'), df)
    expected = df['amount'] + 1
    assert_series_equal(result, expected)
Пример #24
0
def test_selection_out_of_order():
    expr = t['name'][t['amount'] < 100]
    expected = df.loc[df.amount < 100, 'name']
    result = compute(expr, df)
    assert_series_equal(result, expected)
Пример #25
0
def test_frame_broadcast():
    bcast = broadcast_collect(expr=t.amount * t.id)
    result = compute(bcast, df)
    assert_series_equal(result, df.amount * df.id)
Пример #26
0
def test_series_slice():
    assert compute(t.amount[0], df) == df.amount.iloc[0]
    assert compute(t.amount[2], df) == df.amount.iloc[2]
    assert_series_equal(compute(t.amount[:2], df), df.amount.iloc[:2])
    assert_series_equal(compute(t.amount[1:3], df), df.amount.iloc[1:3])
    assert_series_equal(compute(t.amount[1::2], df), df.amount.iloc[1::2])
Пример #27
0
def test_map_column():
    inc = lambda x: x + 1
    result = compute(t['amount'].map(inc, 'int'), df)
    expected = df['amount'] + 1
    assert_series_equal(result, expected)
Пример #28
0
def test_strlen():
    expr = t.name.strlen()
    expected = pd.Series([5, 3, 5], name='name')
    result = compute(expr, df).reset_index(drop=True)
    assert_series_equal(expected, result)
Пример #29
0
def test_series_columnwise():
    s = Series([1, 2, 3], name='a')
    t = symbol('t', 'var * {a: int64}')
    result = compute(t.a + 1, s)
    assert_series_equal(s + 1, result)
Пример #30
0
def test_count_keepdims_frame():
    df = pd.DataFrame(dict(a=[1, 2, 3, np.nan]))
    s = symbol('s', discover(df))
    assert_series_equal(compute(s.count(keepdims=True), df),
                        pd.Series([df.shape[0]], name='s_count'))
Пример #31
0
def test_coerce_series():
    s = pd.Series(list('123'), name='a')
    t = symbol('t', discover(s))
    result = compute(t.coerce(to='int64'), s)
    expected = pd.Series([1, 2, 3], name=s.name)
    assert_series_equal(result, expected)
Пример #32
0
def test_series_slice():
    assert compute(t.amount[0], df) == df.amount.iloc[0]
    assert compute(t.amount[2], df) == df.amount.iloc[2]
    assert_series_equal(compute(t.amount[:2], df), df.amount.iloc[:2])
    assert_series_equal(compute(t.amount[1:3], df), df.amount.iloc[1:3])
    assert_series_equal(compute(t.amount[1::2], df), df.amount.iloc[1::2])
Пример #33
0
def test_map():
    f = lambda _, amt, id: amt + id
    result = compute(t.map(f, 'real'), df)
    expected = df['amount'] + df['id']
    assert_series_equal(result, expected)
Пример #34
0
def test_strlen():
    expr = t.name.strlen()
    expected = pd.Series([5, 3, 5], name='name')
    result = compute(expr, df).reset_index(drop=True)
    assert_series_equal(expected, result)
Пример #35
0
def test_eq():
    assert_series_equal(compute(t['amount'] == 100, df), df['amount'] == 100)
Пример #36
0
def test_summary():
    expr = summary(count=t.id.count(), sum=t.amount.sum())
    assert_series_equal(compute(expr, df), Series({'count': 3, 'sum': 350}))
Пример #37
0
def test_map():
    f = lambda _, amt, id: amt + id
    result = compute(t.map(f, 'real'), df)
    expected = df['amount'] + df['id']
    assert_series_equal(result, expected)
Пример #38
0
def test_selection_out_of_order():
    expr = t['name'][t['amount'] < 100]
    expected = df.loc[df.amount < 100, 'name']
    result = compute(expr, df)
    assert_series_equal(result, expected)
Пример #39
0
def test_shift(n):
    data = pd.Series(pd.date_range(start='20120101', end='20120102', freq='H'))
    s = symbol('s', discover(data))
    result = compute(s.shift(n), data)
    expected = data.shift(n)
    assert_series_equal(result, expected)
Пример #40
0
def test_summary():
    expr = summary(count=t.id.count(), sum=t.amount.sum())
    assert_series_equal(compute(expr, df), Series({'count': 3, 'sum': 350}))
Пример #41
0
def test_neg():
    assert_series_equal(compute(-t['amount'], df),
                           -df['amount'])
Пример #42
0
def test_eq():
    assert_series_equal(compute(t['amount'] == 100, df),
                           df['amount'] == 100)
Пример #43
0
def test_neg_projection():
    assert_series_equal(compute(-t[['amount', 'id']], df),
                           -df[['amount', 'id']])
Пример #44
0
def test_neg_projection():
    assert_series_equal(compute(-t[['amount', 'id']], df),
                        -df[['amount', 'id']])
Пример #45
0
def test_coerce_series():
    s = pd.Series(list('123'), name='a')
    t = symbol('t', discover(s))
    result = compute(t.coerce(to='int64'), s)
    expected = pd.Series([1, 2, 3], name=s.name)
    assert_series_equal(result, expected)
Пример #46
0
def test_neg():
    assert_series_equal(compute(-t['amount'], df), -df['amount'])
Пример #47
0
def test_label():
    expected = df['amount'] * 10
    expected.name = 'foo'
    assert_series_equal(compute((t['amount'] * 10).label('foo'), df), expected)
Пример #48
0
def test_field_on_series():
    expr = symbol('s', 'var * int')
    data = Series([1, 2, 3, 4], name='s')
    assert_series_equal(compute(expr.s, data), data)
Пример #49
0
def test_series_columnwise():
    s = Series([1, 2, 3], name='a')
    t = symbol('t', 'var * {a: int64}')
    result = compute(t.a + 1, s)
    assert_series_equal(s + 1, result)
Пример #50
0
def test_count_keepdims_frame():
    df = pd.DataFrame(dict(a=[1, 2, 3, np.nan]))
    s = symbol('s', discover(df))
    assert_series_equal(compute(s.count(keepdims=True), df),
                        pd.Series([df.shape[0]], name='s_count'))
Пример #51
0
def test_label():
    expected = df['amount'] * 10
    expected.name = 'foo'
    assert_series_equal(compute((t['amount'] * 10).label('foo'), df),
                           expected)
Пример #52
0
def test_shift(n):
    data = pd.Series(pd.date_range(start='20120101', end='20120102', freq='H'))
    s = symbol('s', discover(data))
    result = compute(s.shift(n), data)
    expected = data.shift(n)
    assert_series_equal(result, expected)
Пример #53
0
def test_field_on_series():
    expr = symbol('s', 'var * int')
    data = Series([1, 2, 3, 4], name='s')
    assert_series_equal(compute(expr.s, data), data)