def test_custom_aggregation(): df = pd.DataFrame({"x": np.arange(10, dtype=float), "y": [1.0, 2.0] * 5}) class Custom(Aggregation): def initial(self, new): return 0 def on_new(self, state, new): return state + 1, state def on_old(self, state, new): return state - 100, state sdf = DataFrame(example=df) L = sdf.aggregate(Custom()).stream.sink_to_list() sdf.emit(df) sdf.emit(df) sdf.emit(df) assert L == [0, 1, 2] sdf = DataFrame(example=df) L = sdf.window(n=5).aggregate(Custom()).stream.sink_to_list() sdf.emit(df) sdf.emit(df) sdf.emit(df) assert L == [1, -198, -397]
def test_groupby_windowing_value(func, value, getter, grouper, indexer): index = pd.DatetimeIndex(start="2000-01-01", end="2000-01-03", freq="1h") df = pd.DataFrame( { "x": np.arange(len(index), dtype=float), "y": np.arange(len(index), dtype=float) % 2, }, index=index, ) sdf = DataFrame(example=df) def f(x): return func(indexer(x.groupby(grouper(x)))) L = f(sdf.window(value)).stream.gather().sink_to_list() value = pd.Timedelta(value) diff = 13 for i in range(0, len(index), diff): sdf.emit(df.iloc[i : i + diff]) sdf.emit(df.iloc[:0]) assert len(L) == 5 first = df.iloc[:diff] lost = first[first.index.min() + value :] first = first.iloc[len(lost) :] assert_eq(L[0], f(first)) last = df.loc[index.max() - value + pd.Timedelta("1s") :] assert_eq(L[-1], f(last))
def test_window_sum(stream): df = pd.DataFrame({"x": [1, 2, 3], "y": [4, 5, 6]}) sdf = DataFrame(example=df, stream=stream) L = sdf.window(n=4).x.sum().stream.gather().sink_to_list() sdf.emit(df) assert L == [6] sdf.emit(df) assert L == [6, 9] sdf.emit(df) assert L == [6, 9, 9]
def test_window_full(): df = pd.DataFrame({"x": np.arange(10, dtype=float), "y": [1.0, 2.0] * 5}) sdf = DataFrame(example=df) L = sdf.window(n=4).apply(lambda x: x).stream.sink_to_list() sdf.emit(df.iloc[:3]) sdf.emit(df.iloc[3:8]) sdf.emit(df.iloc[8:]) assert_eq(L[0], df.iloc[:3]) assert_eq(L[1], df.iloc[4:8]) assert_eq(L[2], df.iloc[-4:])
def test_window_sum_dataframe(stream): df = pd.DataFrame({"x": [1, 2, 3], "y": [4, 5, 6]}) sdf = DataFrame(example=df, stream=stream) L = sdf.window(n=4).sum().stream.gather().sink_to_list() sdf.emit(df) assert_eq(L[0], pd.Series([6, 15], index=["x", "y"])) sdf.emit(df) assert_eq(L[0], pd.Series([6, 15], index=["x", "y"])) assert_eq(L[1], pd.Series([9, 21], index=["x", "y"])) sdf.emit(df) assert_eq(L[0], pd.Series([6, 15], index=["x", "y"])) assert_eq(L[1], pd.Series([9, 21], index=["x", "y"])) assert_eq(L[2], pd.Series([9, 21], index=["x", "y"]))
def test_groupby_windowing_n(func, n, getter, grouper, indexer): df = pd.DataFrame({"x": np.arange(10, dtype=float), "y": [1.0, 2.0] * 5}) sdf = DataFrame(example=df) def f(x): return func(indexer(x.groupby(grouper(x)))) L = f(sdf.window(n=n)).stream.gather().sink_to_list() diff = 3 for i in range(0, 10, diff): sdf.emit(df.iloc[i : i + diff]) sdf.emit(df.iloc[:0]) assert len(L) == 5 first = df.iloc[max(0, diff - n) : diff] assert_eq(L[0], f(first)) last = df.iloc[len(df) - n :] assert_eq(L[-1], f(last))