示例#1
0
    def setup(self):
        n = 30 * int(2e5)
        data = pd.DataFrame({
            'key':
            np.random.choice(16000, size=n),
            'low_card_key':
            np.random.choice(30, size=n),
            'value':
            np.random.rand(n),
            'timestamps':
            pd.date_range(start='now', periods=n, freq='s').values,
            'timestamp_strings':
            pd.date_range(start='now', periods=n, freq='s').values.astype(str),
            'repeated_timestamps':
            pd.date_range(start='2018-09-01', periods=30).repeat(int(n / 30))
        })

        t = ibis.pandas.connect({'df': data}).table('df')

        self.high_card_group_by = t.groupby(
            t.key).aggregate(avg_value=t.value.mean())

        self.cast_to_dates = t.timestamps.cast(dt.date)
        self.cast_to_dates_from_strings = t.timestamp_strings.cast(dt.date)

        self.multikey_group_by_with_mutate = t.mutate(
            dates=t.timestamps.cast('date')).groupby(
                ['low_card_key',
                 'dates']).aggregate(avg_value=lambda t: t.value.mean())

        self.simple_sort = t.sort_by([t.key])

        self.simple_sort_projection = t[['key', 'value']].sort_by(['key'])

        self.multikey_sort = t.sort_by(['low_card_key', 'key'])

        self.multikey_sort_projection = t[['low_card_key', 'key', 'value'
                                           ]].sort_by(['low_card_key', 'key'])

        low_card_window = ibis.trailing_range_window(
            2 * ibis.day(),
            order_by=t.repeated_timestamps,
            group_by=t.low_card_key)
        self.low_card_grouped_rolling = t.value.mean().over(low_card_window)

        high_card_window = ibis.trailing_range_window(
            2 * ibis.day(), order_by=t.repeated_timestamps, group_by=t.key)
        self.high_card_grouped_rolling = t.value.mean().over(high_card_window)
示例#2
0
def test_timestamp_scalar_in_filter(alltypes, translate):
    table = alltypes

    expr = (table.filter([
        table.timestamp_col < (ibis.timestamp('2010-01-01') + ibis.week(3)),
        table.timestamp_col < (ibis.now() + ibis.day(10))
    ]).count())
    expr.execute()
示例#3
0
文件: test_join.py 项目: xmnlab/ibis
def test_keyed_asof_join_with_tolerance(
        time_keyed_left, time_keyed_right, time_keyed_df1, time_keyed_df2):
    expr = time_keyed_left.asof_join(
        time_keyed_right, 'time', by='key', tolerance=2 * ibis.day())
    result = expr.execute()
    expected = pd.merge_asof(
        time_keyed_df1, time_keyed_df2,
        on='time', by='key', tolerance=pd.Timedelta('2D'))
    tm.assert_frame_equal(result[expected.columns], expected)
示例#4
0
    def test_timestamp_scalar_in_filter(self):
        # #310
        table = self.alltypes

        expr = (table.filter([table.timestamp_col <
                             (ibis.timestamp('2010-01-01') + ibis.month(3)),
                             table.timestamp_col < (ibis.now() + ibis.day(10))
                              ])
                .count())
        expr.execute()
示例#5
0
    def test_timestamp_scalar_in_filter(self):
        # #310
        table = self.alltypes

        expr = (table.filter([table.timestamp_col <
                             (ibis.timestamp('2010-01-01') + ibis.month(3)),
                             table.timestamp_col < (ibis.now() + ibis.day(10))
                              ])
                .count())
        expr.execute()
示例#6
0
def test_window_with_preceding_expr():
    index = pd.date_range('20180101', '20180110')
    start = 2
    data = np.arange(start, start + len(index))
    df = pd.DataFrame({'value': data, 'time': index}, index=index)
    client = ibis.pandas.connect({'df': df})
    t = client.table('df')
    expected = df.set_index('time').value.rolling('3d').mean()
    expected.index.name = None
    day = ibis.day()
    window = ibis.trailing_window(3 * day, order_by=t.time)
    expr = t.value.mean().over(window)
    result = expr.execute()
    tm.assert_series_equal(result, expected)
示例#7
0
    def test_where_analyze_scalar_op(self):
        # root cause of #310

        table = self.con.table('functional_alltypes')

        expr = (table.filter([
            table.timestamp_col <
            (ibis.timestamp('2010-01-01') + ibis.month(3)),
            table.timestamp_col < (ibis.now() + ibis.day(10))
        ]).count())

        result = to_sql(expr)
        expected = """\
SELECT count(*) AS `tmp`
FROM functional_alltypes
WHERE `timestamp_col` < months_add('2010-01-01 00:00:00', 3) AND
      `timestamp_col` < days_add(now(), 10)"""
        assert result == expected
示例#8
0
    def test_where_analyze_scalar_op(self):
        # root cause of #310

        table = self.con.table('functional_alltypes')

        expr = (table.filter([table.timestamp_col <
                             (ibis.timestamp('2010-01-01') + ibis.month(3)),
                             table.timestamp_col < (ibis.now() +
                                                    ibis.day(10))])
                .count())

        result = to_sql(expr)
        expected = """\
SELECT count(*) AS `tmp`
FROM functional_alltypes
WHERE `timestamp_col` < months_add('2010-01-01 00:00:00', 3) AND
      `timestamp_col` < days_add(now(), 10)"""
        assert result == expected
示例#9
0
 def test_comparison_timestamp(self):
     expr = self.col > (self.col.min() + ibis.day(3))
     assert isinstance(expr, ir.BooleanArray)
示例#10
0
    result = expr.compile()
    expected = """\
SELECT *,
       avg(`float_col`) OVER (PARTITION BY `year` ORDER BY UNIX_MICROS(`timestamp_col`) RANGE BETWEEN 4 PRECEDING AND 2 PRECEDING) AS `win_avg`
FROM `{}.testing.functional_alltypes`""".format(project_id)  # noqa: E501
    assert result == expected


@pytest.mark.parametrize(('preceding', 'value'), [
    (5, 5),
    (ibis.nanosecond(), 0.001),
    (ibis.microsecond(), 1),
    (ibis.second(), 1000000),
    (ibis.minute(), 1000000 * 60),
    (ibis.hour(), 1000000 * 60 * 60),
    (ibis.day(), 1000000 * 60 * 60 * 24),
    (2 * ibis.day(), 1000000 * 60 * 60 * 24 * 2),
    (ibis.week(), 1000000 * 60 * 60 * 24 * 7),
])
def test_trailing_range_window(alltypes, preceding, value, project_id):
    t = alltypes
    w = ibis.trailing_range_window(preceding=preceding,
                                   order_by=t.timestamp_col)
    expr = t.mutate(win_avg=t.float_col.mean().over(w))
    result = expr.compile()
    expected = """\
SELECT *,
       avg(`float_col`) OVER (ORDER BY UNIX_MICROS(`timestamp_col`) RANGE BETWEEN {} PRECEDING AND CURRENT ROW) AS `win_avg`
FROM `{}.testing.functional_alltypes`""".format(  # noqa: E501
        value, project_id)
    assert result == expected
示例#11
0
 def test_comparison_timestamp(self):
     expr = self.col > (self.col.min() + ibis.day(3))
     assert isinstance(expr, ir.BooleanArray)
def test_comparison_timestamp(alltypes):
    expr = alltypes.i > (alltypes.i.min() + ibis.day(3))
    assert isinstance(expr, ir.BooleanColumn)
示例#13
0
execute = ibis.pandas.execute

pytestmark = pytest.mark.pandas


@pytest.fixture(scope='session')
def sort_kind():
    return 'mergesort'


default = pytest.mark.parametrize('default', [ibis.NA, ibis.literal('a')])
row_offset = pytest.mark.parametrize('row_offset',
                                     list(map(ibis.literal, [-1, 1, 0])))
delta_offset = pytest.mark.parametrize(
    'delta_offset', [ibis.day(), 2 * ibis.day(), -2 * ibis.day()])


@default
@row_offset
def test_lead(t, df, row_offset, default):
    expr = t.dup_strings.lead(row_offset, default=default)
    result = expr.execute()
    expected = df.dup_strings.shift(-execute(row_offset))
    if default is not ibis.NA:
        expected = expected.fillna(execute(default))
    tm.assert_series_equal(result, expected)


@default
@row_offset
示例#14
0
execute = ibis.pandas.execute

pytestmark = pytest.mark.pandas


@pytest.fixture(scope='session')
def sort_kind():
    return 'mergesort'


default = pytest.mark.parametrize('default', [ibis.NA, ibis.literal('a')])
row_offset = pytest.mark.parametrize(
    'row_offset', list(map(ibis.literal, [-1, 1, 0])))
range_offset = pytest.mark.parametrize(
    'range_offset',
    [ibis.day(), 2 * ibis.day(), -2 * ibis.day()]
)


@pytest.fixture
def row_window():
    return ibis.window(following=0, order_by='plain_int64')


@pytest.fixture
def range_window():
    return ibis.window(following=0, order_by='plain_datetimes_naive')


@default
@row_offset