def _resample_periods(self, obj): axlabels = obj._get_axis(self.axis) # Start vs. end of period memb = axlabels.asfreq(self.freq, how=self.convention) if is_subperiod(axlabels.freq, self.freq): # Downsampling if len(memb) > 1: rng = np.arange(memb.values[0], memb.values[-1]) bins = memb.searchsorted(rng, side='right') else: bins = np.array([], dtype=np.int32) index = period_range(memb[0], memb[-1], freq=self.freq) grouper = BinGrouper(bins, index) grouped = obj.groupby(grouper, axis=self.axis) return grouped.agg(self.how) elif is_superperiod(axlabels.freq, self.freq): # Generate full range new_index = period_range(memb[0], memb[-1], freq=self.freq) # Get the fill indexer indexer = memb.get_indexer(new_index, method=self.fill_method, limit=self.limit) return _take_new_index(obj, indexer, new_index, axis=self.axis) else: raise ValueError('Frequency %s cannot be resampled to %s' % (axlabels.freq, self.freq))
def test_annual_upsample(self): targets = ['D', 'B', 'M'] for month in MONTHS: ts = _simple_pts('1/1/1990', '12/31/1995', freq='A-%s' % month) for targ, conv, meth in product(targets, ['start', 'end'], ['ffill', 'bfill']): result = ts.resample(targ, fill_method=meth, convention=conv) expected = result.to_timestamp(targ, how=conv) expected = expected.asfreq(targ, meth).to_period() assert_series_equal(result, expected) df = DataFrame({'a' : ts}) rdf = df.resample('D', fill_method='ffill') exp = df['a'].resample('D', fill_method='ffill') assert_series_equal(rdf['a'], exp) rng = period_range('2000', '2003', freq='A-DEC') ts = Series([1, 2, 3, 4], index=rng) result = ts.resample('M', fill_method='ffill') ex_index = period_range('2000-01', '2003-12', freq='M') expected = ts.asfreq('M', how='start').reindex(ex_index, method='ffill') assert_series_equal(result, expected)
def test_annual_upsample(self): targets = ["D", "B", "M"] for month in MONTHS: ts = _simple_pts("1/1/1990", "12/31/1995", freq="A-%s" % month) for targ, conv, meth in product(targets, ["start", "end"], ["ffill", "bfill"]): result = ts.resample(targ, fill_method=meth, convention=conv) expected = result.to_timestamp(targ, how=conv) expected = expected.asfreq(targ, meth).to_period() assert_series_equal(result, expected) df = DataFrame({"a": ts}) rdf = df.resample("D", fill_method="ffill") exp = df["a"].resample("D", fill_method="ffill") assert_series_equal(rdf["a"], exp) rng = period_range("2000", "2003", freq="A-DEC") ts = Series([1, 2, 3, 4], index=rng) result = ts.resample("M", fill_method="ffill") ex_index = period_range("2000-01", "2003-12", freq="M") expected = ts.asfreq("M", how="start").reindex(ex_index, method="ffill") assert_series_equal(result, expected)
def test_upsample_daily_business_daily(self): ts = _simple_pts("1/1/2000", "2/1/2000", freq="B") result = ts.resample("D") expected = ts.asfreq("D").reindex(period_range("1/3/2000", "2/1/2000")) assert_series_equal(result, expected) ts = _simple_pts("1/1/2000", "2/1/2000") result = ts.resample("H", convention="s") exp_rng = period_range("1/1/2000", "2/1/2000 23:00", freq="H") expected = ts.asfreq("H", how="s").reindex(exp_rng) assert_series_equal(result, expected)
def test_upsample_daily_business_daily(self): ts = _simple_pts('1/1/2000', '2/1/2000', freq='B') result = ts.resample('D') expected = ts.asfreq('D').reindex(period_range('1/3/2000', '2/1/2000')) assert_series_equal(result, expected) ts = _simple_pts('1/1/2000', '2/1/2000') result = ts.resample('H', convention='s') exp_rng = period_range('1/1/2000', '2/1/2000 23:00', freq='H') expected = ts.asfreq('H', how='s').reindex(exp_rng) assert_series_equal(result, expected)
def test_annual_upsample(self): ts = _simple_pts("1/1/1990", "12/31/1995", freq="A-DEC") df = DataFrame({"a": ts}) rdf = df.resample("D", fill_method="ffill") exp = df["a"].resample("D", fill_method="ffill") assert_series_equal(rdf["a"], exp) rng = period_range("2000", "2003", freq="A-DEC") ts = Series([1, 2, 3, 4], index=rng) result = ts.resample("M", fill_method="ffill") ex_index = period_range("2000-01", "2003-12", freq="M") expected = ts.asfreq("M", how="start").reindex(ex_index, method="ffill") assert_series_equal(result, expected)
def test_to_period(self): from pandas.tseries.period import period_range ts = _simple_ts('1/1/2000', '1/1/2001') pts = ts.to_period() exp = ts.copy() exp.index = period_range('1/1/2000', '1/1/2001') assert_series_equal(pts, exp) pts = ts.to_period('M') exp.index = exp.index.asfreq('M') tm.assert_index_equal(pts.index, exp.index.asfreq('M')) assert_series_equal(pts, exp) # GH 7606 without freq idx = DatetimeIndex(['2011-01-01', '2011-01-02', '2011-01-03', '2011-01-04']) exp_idx = pd.PeriodIndex(['2011-01-01', '2011-01-02', '2011-01-03', '2011-01-04'], freq='D') s = Series(np.random.randn(4), index=idx) expected = s.copy() expected.index = exp_idx assert_series_equal(s.to_period(), expected) df = DataFrame(np.random.randn(4, 4), index=idx, columns=idx) expected = df.copy() expected.index = exp_idx assert_frame_equal(df.to_period(), expected) expected = df.copy() expected.columns = exp_idx assert_frame_equal(df.to_period(axis=1), expected)
def test_quarterly_resampling(self): rng = period_range('2000Q1', periods=10, freq='Q-DEC') ts = Series(np.arange(10), index=rng) result = ts.resample('A') exp = ts.to_timestamp().resample('A').to_period() assert_series_equal(result, exp)
def test_all_values_single_bin(self): # 2070 index = period_range(start="2012-01-01", end="2012-12-31", freq="M") s = Series(np.random.randn(len(index)), index=index) result = s.resample("A", how='mean') tm.assert_almost_equal(result[0], s.mean())
def test_upsample_with_limit(self): rng = period_range("1/1/2000", periods=5, freq="A") ts = Series(np.random.randn(len(rng)), rng) result = ts.resample("M", fill_method="ffill", limit=2, convention="end") expected = ts.asfreq("M").reindex(result.index, method="ffill", limit=2) assert_series_equal(result, expected)
def test_mixed_freq_irreg_period(self): ts = tm.makeTimeSeries() irreg = ts[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 15, 16, 17, 18, 29]] rng = period_range('1/3/2000', periods=30, freq='B') ps = Series(np.random.randn(len(rng)), rng) irreg.plot() ps.plot()
def test_resample_to_quarterly(self): for month in MONTHS: ts = _simple_pts("1990", "1992", freq="A-%s" % month) quar_ts = ts.resample("Q-%s" % month, fill_method="ffill") stamps = ts.to_timestamp("D", how="start") qdates = period_range( ts.index[0].asfreq("D", "start"), ts.index[-1].asfreq("D", "end"), freq="Q-%s" % month ) expected = stamps.reindex(qdates.to_timestamp("D", "s"), method="ffill") expected.index = qdates assert_series_equal(quar_ts, expected) # conforms, but different month ts = _simple_pts("1990", "1992", freq="A-JUN") for how in ["start", "end"]: result = ts.resample("Q-MAR", convention=how, fill_method="ffill") expected = ts.asfreq("Q-MAR", how=how) expected = expected.reindex(result.index, method="ffill") # .to_timestamp('D') # expected = expected.resample('Q-MAR', fill_method='ffill') assert_series_equal(result, expected)
def test_resample_5minute(self): rng = period_range('1/1/2000', '1/5/2000', freq='T') ts = TimeSeries(np.random.randn(len(rng)), index=rng) result = ts.resample('5min') expected = ts.to_timestamp().resample('5min') assert_series_equal(result, expected)
def test_resample_to_quarterly(self): for month in MONTHS: ts = _simple_pts('1990', '1992', freq='A-%s' % month) quar_ts = ts.resample('Q-%s' % month, fill_method='ffill') stamps = ts.to_timestamp('D', how='start') qdates = period_range(ts.index[0].asfreq('D', 'start'), ts.index[-1].asfreq('D', 'end'), freq='Q-%s' % month) expected = stamps.reindex(qdates.to_timestamp('D', 'e'), method='ffill') expected.index = qdates assert_series_equal(quar_ts, expected) # conforms, but different month ts = _simple_pts('1990', '1992', freq='A-JUN') for how in ['start', 'end']: result = ts.resample('Q-MAR', convention=how, fill_method='ffill') expected = ts.asfreq('Q-MAR', how=how) expected = expected.reindex(result.index, method='ffill') # .to_timestamp('D') # expected = expected.resample('Q-MAR', fill_method='ffill') assert_series_equal(result, expected)
def test_annual_upsample(self): ts = _simple_pts('1/1/1990', '12/31/1995', freq='A-DEC') df = DataFrame({'a': ts}) rdf = df.resample('D', fill_method='ffill') exp = df['a'].resample('D', fill_method='ffill') assert_series_equal(rdf['a'], exp) rng = period_range('2000', '2003', freq='A-DEC') ts = Series([1, 2, 3, 4], index=rng) result = ts.resample('M', fill_method='ffill') ex_index = period_range('2000-01', '2003-12', freq='M') expected = ts.asfreq('M', how='start').reindex(ex_index, method='ffill') assert_series_equal(result, expected)
def test_frame_to_period(self): K = 5 from pandas.tseries.period import period_range dr = date_range('1/1/2000', '1/1/2001') pr = period_range('1/1/2000', '1/1/2001') df = DataFrame(randn(len(dr), K), index=dr) df['mix'] = 'a' pts = df.to_period() exp = df.copy() exp.index = pr assert_frame_equal(pts, exp) pts = df.to_period('M') tm.assert_index_equal(pts.index, exp.index.asfreq('M')) df = df.T pts = df.to_period(axis=1) exp = df.copy() exp.columns = pr assert_frame_equal(pts, exp) pts = df.to_period('M', axis=1) tm.assert_index_equal(pts.columns, exp.columns.asfreq('M')) self.assertRaises(ValueError, df.to_period, axis=2)
def test_resample_tz_localized(self): dr = date_range(start="2012-4-13", end="2012-5-1") ts = Series(lrange(len(dr)), dr) ts_utc = ts.tz_localize("UTC") ts_local = ts_utc.tz_convert("America/Los_Angeles") result = ts_local.resample("W") ts_local_naive = ts_local.copy() ts_local_naive.index = [x.replace(tzinfo=None) for x in ts_local_naive.index.to_pydatetime()] exp = ts_local_naive.resample("W").tz_localize("America/Los_Angeles") assert_series_equal(result, exp) # it works result = ts_local.resample("D") # #2245 idx = date_range("2001-09-20 15:59", "2001-09-20 16:00", freq="T", tz="Australia/Sydney") s = Series([1, 2], index=idx) result = s.resample("D", closed="right", label="right") ex_index = date_range("2001-09-21", periods=1, freq="D", tz="Australia/Sydney") expected = Series([1.5], index=ex_index) assert_series_equal(result, expected) # for good measure result = s.resample("D", kind="period") ex_index = period_range("2001-09-20", periods=1, freq="D") expected = Series([1.5], index=ex_index) assert_series_equal(result, expected)
def _resample_periods(self, obj): axlabels = obj._get_axis(self.axis) if len(axlabels) == 0: new_index = PeriodIndex(data=[], freq=self.freq) return obj.reindex(new_index) else: start = axlabels[0].asfreq(self.freq, how=self.convention) end = axlabels[-1].asfreq(self.freq, how=self.convention) new_index = period_range(start, end, freq=self.freq) # Start vs. end of period memb = axlabels.asfreq(self.freq, how=self.convention) if is_subperiod(axlabels.freq, self.freq) or self.how is not None: # Downsampling rng = np.arange(memb.values[0], memb.values[-1]) bins = memb.searchsorted(rng, side='right') grouper = BinGrouper(bins, new_index) grouped = obj.groupby(grouper, axis=self.axis) return grouped.aggregate(self._agg_method) elif is_superperiod(axlabels.freq, self.freq): # Get the fill indexer indexer = memb.get_indexer(new_index, method=self.fill_method, limit=self.limit) return _take_new_index(obj, indexer, new_index, axis=self.axis) else: raise ValueError('Frequency %s cannot be resampled to %s' % (axlabels.freq, self.freq))
def test_upsample_with_limit(self): rng = period_range('1/1/2000', periods=5, freq='A') ts = Series(np.random.randn(len(rng)), rng) result = ts.resample('M', fill_method='ffill', limit=2) expected = ts.asfreq('M').reindex(result.index, method='ffill', limit=2) assert_series_equal(result, expected)
def test_finder_monthly_long(self): rng = period_range('1988Q1', periods=24 * 12, freq='M') ser = Series(np.random.randn(len(rng)), rng) ax = ser.plot() xaxis = ax.get_xaxis() rs = xaxis.get_majorticklocs()[0] xp = Period('1989Q1', 'M').ordinal self.assertEqual(rs, xp)
def test_resample_to_period_monthly_buglet(self): # GH #1259 rng = date_range('1/1/2000', '12/31/2000') ts = Series(np.random.randn(len(rng)), index=rng) result = ts.resample('M', kind='period') exp_index = period_range('Jan-2000', 'Dec-2000', freq='M') self.assert_(result.index.equals(exp_index))
def test_resample_to_period_monthly_buglet(self): # GH #1259 rng = date_range("1/1/2000", "12/31/2000") ts = Series(np.random.randn(len(rng)), index=rng) result = ts.resample("M", kind="period") exp_index = period_range("Jan-2000", "Dec-2000", freq="M") self.assert_(result.index.equals(exp_index))
def setUp(self): freq = ["S", "T", "H", "D", "W", "M", "Q", "Y"] idx = [period_range("12/31/1999", freq=x, periods=100) for x in freq] self.period_ser = [Series(np.random.randn(len(x)), x) for x in idx] self.period_df = [DataFrame(np.random.randn(len(x), 3), index=x, columns=["A", "B", "C"]) for x in idx] freq = ["S", "T", "H", "D", "W", "M", "Q-DEC", "A", "1B30Min"] idx = [date_range("12/31/1999", freq=x, periods=100) for x in freq] self.datetime_ser = [Series(np.random.randn(len(x)), x) for x in idx] self.datetime_df = [DataFrame(np.random.randn(len(x), 3), index=x, columns=["A", "B", "C"]) for x in idx]
def test_finder_monthly_long(self): import matplotlib.pyplot as plt plt.close('all') rng = period_range('1988Q1', periods=24 * 12, freq='M') ser = Series(np.random.randn(len(rng)), rng) ax = ser.plot() xaxis = ax.get_xaxis() rs = xaxis.get_majorticklocs()[0] xp = Period('1989Q1', 'M').ordinal self.assert_(rs == xp)
def test_finder_monthly(self): xp = Period('1988-1').ordinal yrs = [1.15, 2.5, 4, 11] for n in yrs: rng = period_range('1987Q2', periods=int(n * 12), freq='M') ser = Series(np.random.randn(len(rng)), rng) ax = ser.plot() xaxis = ax.get_xaxis() rs = xaxis.get_majorticklocs()[0] self.assert_(rs == xp)
def test_finder_quarterly(self): xp = Period('1988Q1').ordinal yrs = [3.5, 11] for n in yrs: rng = period_range('1987Q2', periods=int(n * 4), freq='Q') ser = Series(np.random.randn(len(rng)), rng) ax = ser.plot() xaxis = ax.get_xaxis() rs = xaxis.get_majorticklocs()[0] self.assert_(rs == xp)
def test_finder_annual(self): import matplotlib.pyplot as plt xp = [1987, 1988, 1990, 1990, 1995, 2020, 2070, 2170] for i, nyears in enumerate([5, 10, 19, 49, 99, 199, 599, 1001]): rng = period_range('1987', periods=nyears, freq='A') ser = Series(np.random.randn(len(rng)), rng) ax = ser.plot() xaxis = ax.get_xaxis() rs = xaxis.get_majorticklocs()[0] self.assertEqual(rs, Period(xp[i], freq='A').ordinal) plt.close(ax.get_figure())
def test_basic_downsample(self): ts = _simple_pts("1/1/1990", "6/30/1995", freq="M") result = ts.resample("a-dec") expected = ts.groupby(ts.index.year).mean() expected.index = period_range("1/1/1990", "6/30/1995", freq="a-dec") assert_series_equal(result, expected) # this is ok assert_series_equal(ts.resample("a-dec"), result) assert_series_equal(ts.resample("a"), result)
def test_basic_downsample(self): ts = _simple_pts('1/1/1990', '6/30/1995', freq='M') result = ts.resample('a-dec') expected = ts.groupby(ts.index.year).mean() expected.index = period_range('1/1/1990', '6/30/1995', freq='a-dec') assert_series_equal(result, expected) # this is ok assert_series_equal(ts.resample('a-dec'), result) assert_series_equal(ts.resample('a'), result)
def _get_new_index(self): """ return our new index """ ax = self.ax if len(ax) == 0: values = [] else: start = ax[0].asfreq(self.freq, how=self.convention) end = ax[-1].asfreq(self.freq, how='end') values = period_range(start, end, freq=self.freq).values return ax._shallow_copy(values, freq=self.freq)
def setUp(self): TestPlotBase.setUp(self) freq = ['S', 'T', 'H', 'D', 'W', 'M', 'Q', 'A'] idx = [period_range('12/31/1999', freq=x, periods=100) for x in freq] self.period_ser = [Series(np.random.randn(len(x)), x) for x in idx] self.period_df = [DataFrame(np.random.randn(len(x), 3), index=x, columns=['A', 'B', 'C']) for x in idx] freq = ['S', 'T', 'H', 'D', 'W', 'M', 'Q-DEC', 'A', '1B30Min'] idx = [date_range('12/31/1999', freq=x, periods=100) for x in freq] self.datetime_ser = [Series(np.random.randn(len(x)), x) for x in idx] self.datetime_df = [DataFrame(np.random.randn(len(x), 3), index=x, columns=['A', 'B', 'C']) for x in idx]
def test_finder_quarterly(self): import matplotlib.pyplot as plt xp = Period('1988Q1').ordinal yrs = [3.5, 11] plt.close('all') for n in yrs: rng = period_range('1987Q2', periods=int(n * 4), freq='Q') ser = Series(np.random.randn(len(rng)), rng) ax = ser.plot() xaxis = ax.get_xaxis() rs = xaxis.get_majorticklocs()[0] self.assert_(rs == xp) (vmin, vmax) = ax.get_xlim() ax.set_xlim(vmin + 0.9, vmax) rs = xaxis.get_majorticklocs()[0] self.assertEqual(xp, rs)
def test_finder_monthly(self): import matplotlib.pyplot as plt xp = Period('Jan 1988').ordinal yrs = [1.15, 2.5, 4, 11] for n in yrs: rng = period_range('1987Q2', periods=int(n * 12), freq='M') ser = Series(np.random.randn(len(rng)), rng) ax = ser.plot() xaxis = ax.get_xaxis() rs = xaxis.get_majorticklocs()[0] self.assertEqual(rs, xp) vmin, vmax = ax.get_xlim() ax.set_xlim(vmin + 0.9, vmax) rs = xaxis.get_majorticklocs()[0] self.assertEqual(xp, rs) plt.close(ax.get_figure())
def test_resample_tz_localized(self): dr = date_range(start='2012-4-13', end='2012-5-1') ts = Series(range(len(dr)), dr) ts_utc = ts.tz_localize('UTC') ts_local = ts_utc.tz_convert('America/Los_Angeles') result = ts_local.resample('W') ts_local_naive = ts_local.copy() ts_local_naive.index = [ x.replace(tzinfo=None) for x in ts_local_naive.index.to_pydatetime() ] exp = ts_local_naive.resample('W').tz_localize('America/Los_Angeles') assert_series_equal(result, exp) # it works result = ts_local.resample('D') # #2245 idx = date_range('2001-09-20 15:59', '2001-09-20 16:00', freq='T', tz='Australia/Sydney') s = Series([1, 2], index=idx) result = s.resample('D', closed='right', label='right') ex_index = date_range('2001-09-21', periods=1, freq='D', tz='Australia/Sydney') expected = Series([1.5], index=ex_index) assert_series_equal(result, expected) # for good measure result = s.resample('D', kind='period') ex_index = period_range('2001-09-20', periods=1, freq='D') expected = Series([1.5], index=ex_index) assert_series_equal(result, expected)
def test_resample_to_quarterly(self): for month in MONTHS: ts = _simple_pts('1990', '1992', freq='A-%s' % month) quar_ts = ts.resample('Q-%s' % month, fill_method='ffill') stamps = ts.to_timestamp('D', how='end') qdates = period_range(stamps.index[0], stamps.index[-1], freq='Q-%s' % month) expected = stamps.reindex(qdates.to_timestamp('D', 'e'), method='ffill') expected.index = qdates assert_series_equal(quar_ts, expected) # conforms, but different month ts = _simple_pts('1990', '1992', freq='A-JUN') for how in ['start', 'end']: result = ts.resample('Q-MAR', convention=how, fill_method='ffill') expected = ts.asfreq('Q-MAR', how=how).to_timestamp('D') expected = expected.resample('Q-MAR', fill_method='ffill') assert_series_equal(result, expected.to_period('Q-MAR'))
def _resample_periods(self, obj): axlabels = obj._get_axis(self.axis) if len(axlabels) == 0: new_index = PeriodIndex(data=[], freq=self.freq) return obj.reindex(new_index) else: start = axlabels[0].asfreq(self.freq, how=self.convention) end = axlabels[-1].asfreq(self.freq, how=self.convention) new_index = period_range(start, end, freq=self.freq) # Start vs. end of period memb = axlabels.asfreq(self.freq, how=self.convention) if is_subperiod(axlabels.freq, self.freq): # Downsampling if len(memb) > 1: rng = np.arange(memb.values[0], memb.values[-1]) bins = memb.searchsorted(rng, side='right') else: bins = np.array([], dtype=np.int32) grouper = BinGrouper(bins, new_index) grouped = obj.groupby(grouper, axis=self.axis) return grouped.aggregate(self.how) elif is_superperiod(axlabels.freq, self.freq): # Get the fill indexer indexer = memb.get_indexer(new_index, method=self.fill_method, limit=self.limit) return _take_new_index(obj, indexer, new_index, axis=self.axis) else: raise ValueError('Frequency %s cannot be resampled to %s' % (axlabels.freq, self.freq))
def test_resample_tz_localized(self): dr = date_range(start='2012-4-13', end='2012-5-1') ts = Series(lrange(len(dr)), dr) ts_utc = ts.tz_localize('UTC') ts_local = ts_utc.tz_convert('America/Los_Angeles') result = ts_local.resample('W') ts_local_naive = ts_local.copy() ts_local_naive.index = [ x.replace(tzinfo=None) for x in ts_local_naive.index.to_pydatetime() ] exp = ts_local_naive.resample('W').tz_localize('America/Los_Angeles') assert_series_equal(result, exp) # it works result = ts_local.resample('D') # #2245 idx = date_range('2001-09-20 15:59', '2001-09-20 16:00', freq='T', tz='Australia/Sydney') s = Series([1, 2], index=idx) result = s.resample('D', closed='right', label='right') ex_index = date_range('2001-09-21', periods=1, freq='D', tz='Australia/Sydney') expected = Series([1.5], index=ex_index) assert_series_equal(result, expected) # for good measure result = s.resample('D', kind='period') ex_index = period_range('2001-09-20', periods=1, freq='D') expected = Series([1.5], index=ex_index) assert_series_equal(result, expected) # GH 6397 # comparing an offset that doesn't propogate tz's rng = date_range('1/1/2011', periods=20000, freq='H') rng = rng.tz_localize('EST') ts = DataFrame(index=rng) ts['first'] = np.random.randn(len(rng)) ts['second'] = np.cumsum(np.random.randn(len(rng))) expected = DataFrame( { 'first': ts.resample('A', how=np.sum)['first'], 'second': ts.resample('A', how=np.mean)['second'] }, columns=['first', 'second']) result = ts.resample('A', how={ 'first': np.sum, 'second': np.mean }).reindex(columns=['first', 'second']) assert_frame_equal(result, expected)
def _simple_pts(start, end, freq='D'): rng = period_range(start, end, freq=freq) return TimeSeries(np.random.randn(len(rng)), index=rng)