def test_aggregate_with_nat_size(): # GH 9925 n = 20 data = np.random.randn(n, 4).astype('int64') normal_df = DataFrame(data, columns=['A', 'B', 'C', 'D']) normal_df['key'] = [1, 2, np.nan, 4, 5] * 4 dt_df = DataFrame(data, columns=['A', 'B', 'C', 'D']) dt_df['key'] = [ datetime(2013, 1, 1), datetime(2013, 1, 2), pd.NaT, datetime(2013, 1, 4), datetime(2013, 1, 5) ] * 4 normal_grouped = normal_df.groupby('key') dt_grouped = dt_df.groupby(TimeGrouper(key='key', freq='D')) normal_result = normal_grouped.size() dt_result = dt_grouped.size() pad = Series([0], index=[3]) expected = normal_result.append(pad) expected = expected.sort_index() expected.index = date_range(start='2013-01-01', freq='D', periods=5, name='key') assert_series_equal(expected, dt_result) assert dt_result.index.name == 'key'
def test_aggregate_with_nat(func, fill_value): # check TimeGrouper's aggregation is identical as normal groupby # if NaT is included, 'var', 'std', 'mean', 'first','last' # and 'nth' doesn't work yet n = 20 data = np.random.randn(n, 4).astype('int64') normal_df = DataFrame(data, columns=['A', 'B', 'C', 'D']) normal_df['key'] = [1, 2, np.nan, 4, 5] * 4 dt_df = DataFrame(data, columns=['A', 'B', 'C', 'D']) dt_df['key'] = [ datetime(2013, 1, 1), datetime(2013, 1, 2), pd.NaT, datetime(2013, 1, 4), datetime(2013, 1, 5) ] * 4 normal_grouped = normal_df.groupby('key') dt_grouped = dt_df.groupby(TimeGrouper(key='key', freq='D')) normal_result = getattr(normal_grouped, func)() dt_result = getattr(dt_grouped, func)() pad = DataFrame([[fill_value] * 4], index=[3], columns=['A', 'B', 'C', 'D']) expected = normal_result.append(pad) expected = expected.sort_index() expected.index = date_range(start='2013-01-01', freq='D', periods=5, name='key') assert_frame_equal(expected, dt_result) assert dt_result.index.name == 'key'
def __new__(cls, *args, **kwargs): from pandas.core.resample import TimeGrouper import warnings warnings.warn("pd.TimeGrouper is deprecated and will be removed; " "Please use pd.Grouper(freq=...)", FutureWarning, stacklevel=2) return TimeGrouper(*args, **kwargs)
def test_repr(): # GH18203 result = repr(TimeGrouper(key='A', freq='H')) expected = ("TimeGrouper(key='A', freq=<Hour>, axis=0, sort=True, " "closed='left', label='left', how='mean', " "convention='e', base=0)") assert result == expected
def test_aggregate_normal(resample_method): """Check TimeGrouper's aggregation is identical as normal groupby.""" if resample_method == 'ohlc': pytest.xfail(reason='DataError: No numeric types to aggregate') data = np.random.randn(20, 4) normal_df = DataFrame(data, columns=['A', 'B', 'C', 'D']) normal_df['key'] = [1, 2, 3, 4, 5] * 4 dt_df = DataFrame(data, columns=['A', 'B', 'C', 'D']) dt_df['key'] = [ datetime(2013, 1, 1), datetime(2013, 1, 2), datetime(2013, 1, 3), datetime(2013, 1, 4), datetime(2013, 1, 5) ] * 4 normal_grouped = normal_df.groupby('key') dt_grouped = dt_df.groupby(TimeGrouper(key='key', freq='D')) expected = getattr(normal_grouped, resample_method)() dt_result = getattr(dt_grouped, resample_method)() expected.index = date_range(start='2013-01-01', freq='D', periods=5, name='key') tm.assert_equal(expected, dt_result) # if TimeGrouper is used included, 'nth' doesn't work yet """
def test_panel_aggregation(): ind = pd.date_range('1/1/2000', periods=100) data = np.random.randn(2, len(ind), 4) wp = Panel(data, items=['Item1', 'Item2'], major_axis=ind, minor_axis=['A', 'B', 'C', 'D']) tg = TimeGrouper('M', axis=1) _, grouper, _ = tg._get_grouper(wp) bingrouped = wp.groupby(grouper) binagg = bingrouped.mean() def f(x): assert (isinstance(x, Panel)) return x.mean(1) result = bingrouped.agg(f) tm.assert_panel_equal(result, binagg)
def test_apply_iteration(): # #2300 N = 1000 ind = pd.date_range(start="2000-01-01", freq="D", periods=N) df = DataFrame({'open': 1, 'close': 2}, index=ind) tg = TimeGrouper('M') _, grouper, _ = tg._get_grouper(df) # Errors grouped = df.groupby(grouper, group_keys=False) def f(df): return df['close'] / df['open'] # it works! result = grouped.apply(f) tm.assert_index_equal(result.index, df.index)
def test_resampler_is_iterable_all_ts(series): # GH 15314 freq = 'H' tg = TimeGrouper(freq, convention='start') grouped = series.groupby(tg) resampled = series.resample(freq) for (rk, rv), (gk, gv) in zip(resampled, grouped): assert rk == gk assert_series_equal(rv, gv)
def test_panel_aggregation(): ind = pd.date_range('1/1/2000', periods=100) data = np.random.randn(2, len(ind), 4) wp = Panel(data, items=['Item1', 'Item2'], major_axis=ind, minor_axis=['A', 'B', 'C', 'D']) tg = TimeGrouper('M', axis=1) _, grouper, _ = tg._get_grouper(wp) bingrouped = wp.groupby(grouper) binagg = bingrouped.mean() def f(x): assert (isinstance(x, Panel)) return x.mean(1) result = bingrouped.agg(f) tm.assert_panel_equal(result, binagg)
def test_fails_on_no_datetime_index(name, func): n = 2 index = func(n) df = DataFrame({'a': np.random.randn(n)}, index=index) msg = ("Only valid with DatetimeIndex, TimedeltaIndex " "or PeriodIndex, but got an instance of '{}'".format(name)) with pytest.raises(TypeError, match=msg): df.groupby(TimeGrouper('D'))
def test_apply_iteration(): # #2300 N = 1000 ind = pd.date_range(start="2000-01-01", freq="D", periods=N) df = DataFrame({'open': 1, 'close': 2}, index=ind) tg = TimeGrouper('M') _, grouper, _ = tg._get_grouper(df) # Errors grouped = df.groupby(grouper, group_keys=False) def f(df): return df['close'] / df['open'] # it works! result = grouped.apply(f) tm.assert_index_equal(result.index, df.index)
def test_custom_grouper(index): dti = index s = Series(np.array([1] * len(dti)), index=dti, dtype='int64') b = TimeGrouper(Minute(5)) g = s.groupby(b) # check all cython functions work funcs = ['add', 'mean', 'prod', 'ohlc', 'min', 'max', 'var'] for f in funcs: g._cython_agg_general(f) b = TimeGrouper(Minute(5), closed='right', label='right') g = s.groupby(b) # check all cython functions work funcs = ['add', 'mean', 'prod', 'ohlc', 'min', 'max', 'var'] for f in funcs: g._cython_agg_general(f) assert g.ngroups == 2593 assert notna(g.mean()).all() # construct expected val arr = [1] + [5] * 2592 idx = dti[0:-1:5] idx = idx.append(dti[-1:]) expect = Series(arr, index=idx) # GH2763 - return in put dtype if we can result = g.agg(np.sum) assert_series_equal(result, expect) df = DataFrame(np.random.rand(len(dti), 10), index=dti, dtype='float64') r = df.groupby(b).agg(np.sum) assert len(r.columns) == 10 assert len(r.index) == 2593
def test_fails_on_no_datetime_index(): index_names = ('Int64Index', 'Index', 'Float64Index', 'MultiIndex') index_funcs = (tm.makeIntIndex, tm.makeUnicodeIndex, tm.makeFloatIndex, lambda m: tm.makeCustomIndex(m, 2)) n = 2 for name, func in zip(index_names, index_funcs): index = func(n) df = DataFrame({'a': np.random.randn(n)}, index=index) msg = ("Only valid with DatetimeIndex, TimedeltaIndex " "or PeriodIndex, but got an instance of %r" % name) with pytest.raises(TypeError, match=msg): df.groupby(TimeGrouper('D'))
def test_aggregate_normal(): # check TimeGrouper's aggregation is identical as normal groupby n = 20 data = np.random.randn(n, 4) normal_df = DataFrame(data, columns=['A', 'B', 'C', 'D']) normal_df['key'] = [1, 2, 3, 4, 5] * 4 dt_df = DataFrame(data, columns=['A', 'B', 'C', 'D']) dt_df['key'] = [datetime(2013, 1, 1), datetime(2013, 1, 2), datetime(2013, 1, 3), datetime(2013, 1, 4), datetime(2013, 1, 5)] * 4 normal_grouped = normal_df.groupby('key') dt_grouped = dt_df.groupby(TimeGrouper(key='key', freq='D')) for func in ['min', 'max', 'prod', 'var', 'std', 'mean']: expected = getattr(normal_grouped, func)() dt_result = getattr(dt_grouped, func)() expected.index = date_range(start='2013-01-01', freq='D', periods=5, name='key') assert_frame_equal(expected, dt_result) for func in ['count', 'sum']: expected = getattr(normal_grouped, func)() expected.index = date_range(start='2013-01-01', freq='D', periods=5, name='key') dt_result = getattr(dt_grouped, func)() assert_frame_equal(expected, dt_result) # GH 7453 for func in ['size']: expected = getattr(normal_grouped, func)() expected.index = date_range(start='2013-01-01', freq='D', periods=5, name='key') dt_result = getattr(dt_grouped, func)() assert_series_equal(expected, dt_result) # GH 7453 for func in ['first', 'last']: expected = getattr(normal_grouped, func)() expected.index = date_range(start='2013-01-01', freq='D', periods=5, name='key') dt_result = getattr(dt_grouped, func)() assert_frame_equal(expected, dt_result) # if TimeGrouper is used included, 'nth' doesn't work yet """
def test_resample_frame_basic(): df = tm.makeTimeDataFrame() b = TimeGrouper('M') g = df.groupby(b) # check all cython functions work funcs = ['add', 'mean', 'prod', 'min', 'max', 'var'] for f in funcs: g._cython_agg_general(f) result = df.resample('A').mean() assert_series_equal(result['A'], df['A'].resample('A').mean()) result = df.resample('M').mean() assert_series_equal(result['A'], df['A'].resample('M').mean()) df.resample('M', kind='period').mean() df.resample('W-WED', kind='period').mean()
def test_aaa_group_order(): # GH 12840 # check TimeGrouper perform stable sorts n = 20 data = np.random.randn(n, 4) df = DataFrame(data, columns=['A', 'B', 'C', 'D']) df['key'] = [ datetime(2013, 1, 1), datetime(2013, 1, 2), datetime(2013, 1, 3), datetime(2013, 1, 4), datetime(2013, 1, 5) ] * 4 grouped = df.groupby(TimeGrouper(key='key', freq='D')) tm.assert_frame_equal(grouped.get_group(datetime(2013, 1, 1)), df[::5]) tm.assert_frame_equal(grouped.get_group(datetime(2013, 1, 2)), df[1::5]) tm.assert_frame_equal(grouped.get_group(datetime(2013, 1, 3)), df[2::5]) tm.assert_frame_equal(grouped.get_group(datetime(2013, 1, 4)), df[3::5]) tm.assert_frame_equal(grouped.get_group(datetime(2013, 1, 5)), df[4::5])
def test_resample_ohlc(series): s = series grouper = TimeGrouper(Minute(5)) expect = s.groupby(grouper).agg(lambda x: x[-1]) result = s.resample('5Min').ohlc() assert len(result) == len(expect) assert len(result.columns) == 4 xs = result.iloc[-2] assert xs['open'] == s[-6] assert xs['high'] == s[-6:-1].max() assert xs['low'] == s[-6:-1].min() assert xs['close'] == s[-2] xs = result.iloc[0] assert xs['open'] == s[0] assert xs['high'] == s[:5].max() assert xs['low'] == s[:5].min() assert xs['close'] == s[4]
def test_resample_basic_grouper(series): s = series result = s.resample('5Min').last() grouper = TimeGrouper(Minute(5), closed='left', label='left') expected = s.groupby(grouper).agg(lambda x: x[-1]) assert_series_equal(result, expected)