示例#1
0
def test_custom_aggregation():
    df = pd.DataFrame({"x": np.arange(10, dtype=float), "y": [1.0, 2.0] * 5})

    class Custom(Aggregation):
        def initial(self, new):
            return 0

        def on_new(self, state, new):
            return state + 1, state

        def on_old(self, state, new):
            return state - 100, state

    sdf = DataFrame(example=df)
    L = sdf.aggregate(Custom()).stream.sink_to_list()

    sdf.emit(df)
    sdf.emit(df)
    sdf.emit(df)

    assert L == [0, 1, 2]

    sdf = DataFrame(example=df)
    L = sdf.window(n=5).aggregate(Custom()).stream.sink_to_list()

    sdf.emit(df)
    sdf.emit(df)
    sdf.emit(df)

    assert L == [1, -198, -397]
示例#2
0
def test_groupby_windowing_value(func, value, getter, grouper, indexer):
    index = pd.DatetimeIndex(start="2000-01-01", end="2000-01-03", freq="1h")
    df = pd.DataFrame(
        {
            "x": np.arange(len(index), dtype=float),
            "y": np.arange(len(index), dtype=float) % 2,
        },
        index=index,
    )

    sdf = DataFrame(example=df)

    def f(x):
        return func(indexer(x.groupby(grouper(x))))

    L = f(sdf.window(value)).stream.gather().sink_to_list()

    value = pd.Timedelta(value)

    diff = 13
    for i in range(0, len(index), diff):
        sdf.emit(df.iloc[i : i + diff])
    sdf.emit(df.iloc[:0])

    assert len(L) == 5

    first = df.iloc[:diff]
    lost = first[first.index.min() + value :]
    first = first.iloc[len(lost) :]

    assert_eq(L[0], f(first))

    last = df.loc[index.max() - value + pd.Timedelta("1s") :]

    assert_eq(L[-1], f(last))
示例#3
0
def test_window_sum(stream):
    df = pd.DataFrame({"x": [1, 2, 3], "y": [4, 5, 6]})
    sdf = DataFrame(example=df, stream=stream)
    L = sdf.window(n=4).x.sum().stream.gather().sink_to_list()

    sdf.emit(df)
    assert L == [6]
    sdf.emit(df)
    assert L == [6, 9]
    sdf.emit(df)
    assert L == [6, 9, 9]
示例#4
0
def test_window_full():
    df = pd.DataFrame({"x": np.arange(10, dtype=float), "y": [1.0, 2.0] * 5})

    sdf = DataFrame(example=df)

    L = sdf.window(n=4).apply(lambda x: x).stream.sink_to_list()

    sdf.emit(df.iloc[:3])
    sdf.emit(df.iloc[3:8])
    sdf.emit(df.iloc[8:])

    assert_eq(L[0], df.iloc[:3])
    assert_eq(L[1], df.iloc[4:8])
    assert_eq(L[2], df.iloc[-4:])
示例#5
0
def test_window_sum_dataframe(stream):
    df = pd.DataFrame({"x": [1, 2, 3], "y": [4, 5, 6]})
    sdf = DataFrame(example=df, stream=stream)
    L = sdf.window(n=4).sum().stream.gather().sink_to_list()

    sdf.emit(df)
    assert_eq(L[0], pd.Series([6, 15], index=["x", "y"]))
    sdf.emit(df)
    assert_eq(L[0], pd.Series([6, 15], index=["x", "y"]))
    assert_eq(L[1], pd.Series([9, 21], index=["x", "y"]))
    sdf.emit(df)
    assert_eq(L[0], pd.Series([6, 15], index=["x", "y"]))
    assert_eq(L[1], pd.Series([9, 21], index=["x", "y"]))
    assert_eq(L[2], pd.Series([9, 21], index=["x", "y"]))
示例#6
0
def test_groupby_windowing_n(func, n, getter, grouper, indexer):
    df = pd.DataFrame({"x": np.arange(10, dtype=float), "y": [1.0, 2.0] * 5})

    sdf = DataFrame(example=df)

    def f(x):
        return func(indexer(x.groupby(grouper(x))))

    L = f(sdf.window(n=n)).stream.gather().sink_to_list()

    diff = 3
    for i in range(0, 10, diff):
        sdf.emit(df.iloc[i : i + diff])
    sdf.emit(df.iloc[:0])

    assert len(L) == 5

    first = df.iloc[max(0, diff - n) : diff]
    assert_eq(L[0], f(first))

    last = df.iloc[len(df) - n :]
    assert_eq(L[-1], f(last))