class TestDatelike(object): @pytest.mark.parametrize('s', [ Series(DatetimeIndex(['20180101', NaT, '20180103'])), Series(TimedeltaIndex(['0 days', NaT, '2 days'])) ], ids=lambda x: str(x.dtype)) def test_qcut_nat(self, s): # GH 19768 intervals = IntervalIndex.from_tuples([(s[0] - Nano(), s[2] - Day()), np.nan, (s[2] - Day(), s[2])]) expected = Series(Categorical(intervals, ordered=True)) result = qcut(s, 2) tm.assert_series_equal(result, expected) def test_datetime_cut(self): # GH 14714 # testing for time data to be present as series data = to_datetime(Series(['2013-01-01', '2013-01-02', '2013-01-03'])) result, bins = cut(data, 3, retbins=True) expected = (Series( IntervalIndex([ Interval(Timestamp('2012-12-31 23:57:07.200000'), Timestamp('2013-01-01 16:00:00')), Interval(Timestamp('2013-01-01 16:00:00'), Timestamp('2013-01-02 08:00:00')), Interval(Timestamp('2013-01-02 08:00:00'), Timestamp('2013-01-03 00:00:00')) ])).astype(CDT(ordered=True))) tm.assert_series_equal(result, expected) # testing for time data to be present as list data = [ np.datetime64('2013-01-01'), np.datetime64('2013-01-02'), np.datetime64('2013-01-03') ] result, bins = cut(data, 3, retbins=True) tm.assert_series_equal(Series(result), expected) # testing for time data to be present as ndarray data = np.array([ np.datetime64('2013-01-01'), np.datetime64('2013-01-02'), np.datetime64('2013-01-03') ]) result, bins = cut(data, 3, retbins=True) tm.assert_series_equal(Series(result), expected) # testing for time data to be present as datetime index data = DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03']) result, bins = cut(data, 3, retbins=True) tm.assert_series_equal(Series(result), expected) @pytest.mark.parametrize('bins', [ 3, [ Timestamp('2013-01-01 04:57:07.200000'), Timestamp('2013-01-01 21:00:00'), Timestamp('2013-01-02 13:00:00'), Timestamp('2013-01-03 05:00:00') ] ]) @pytest.mark.parametrize('box', [list, np.array, Index, Series]) def test_datetimetz_cut(self, bins, box): # GH 19872 tz = 'US/Eastern' s = Series(date_range('20130101', periods=3, tz=tz)) if not isinstance(bins, int): bins = box(bins) result = cut(s, bins) expected = (Series( IntervalIndex([ Interval(Timestamp('2012-12-31 23:57:07.200000', tz=tz), Timestamp('2013-01-01 16:00:00', tz=tz)), Interval(Timestamp('2013-01-01 16:00:00', tz=tz), Timestamp('2013-01-02 08:00:00', tz=tz)), Interval(Timestamp('2013-01-02 08:00:00', tz=tz), Timestamp('2013-01-03 00:00:00', tz=tz)) ])).astype(CDT(ordered=True))) tm.assert_series_equal(result, expected) @pytest.mark.parametrize('bins', [3, np.linspace(0, 1, 4)]) def test_datetimetz_qcut(self, bins): # GH 19872 tz = 'US/Eastern' s = Series(date_range('20130101', periods=3, tz=tz)) result = qcut(s, bins) expected = (Series( IntervalIndex([ Interval(Timestamp('2012-12-31 23:59:59.999999999', tz=tz), Timestamp('2013-01-01 16:00:00', tz=tz)), Interval(Timestamp('2013-01-01 16:00:00', tz=tz), Timestamp('2013-01-02 08:00:00', tz=tz)), Interval(Timestamp('2013-01-02 08:00:00', tz=tz), Timestamp('2013-01-03 00:00:00', tz=tz)) ])).astype(CDT(ordered=True))) tm.assert_series_equal(result, expected) def test_datetime_bin(self): data = [np.datetime64('2012-12-13'), np.datetime64('2012-12-15')] bin_data = ['2012-12-12', '2012-12-14', '2012-12-16'] expected = (Series( IntervalIndex([ Interval(Timestamp(bin_data[0]), Timestamp(bin_data[1])), Interval(Timestamp(bin_data[1]), Timestamp(bin_data[2])) ])).astype(CDT(ordered=True))) for conv in [Timestamp, Timestamp, np.datetime64]: bins = [conv(v) for v in bin_data] result = cut(data, bins=bins) tm.assert_series_equal(Series(result), expected) bin_pydatetime = [Timestamp(v).to_pydatetime() for v in bin_data] result = cut(data, bins=bin_pydatetime) tm.assert_series_equal(Series(result), expected) bins = to_datetime(bin_data) result = cut(data, bins=bin_pydatetime) tm.assert_series_equal(Series(result), expected) def test_datetime_nan(self): def f(): cut(date_range('20130101', periods=3), bins=[0, 2, 4]) pytest.raises(ValueError, f) result = cut(date_range('20130102', periods=5), bins=date_range('20130101', periods=2)) mask = result.categories.isna() tm.assert_numpy_array_equal(mask, np.array([False])) mask = result.isna() tm.assert_numpy_array_equal(mask, np.array([False, True, True, True, True]))
def test_int64_nocopy(self): # GH#23539 check that a copy isn't made when we pass int64 data # and copy=False arr = np.arange(10, dtype=np.int64) tdi = TimedeltaIndex(arr, copy=False) assert tdi._data._data.base is arr
def test_order(self): # GH 10295 idx1 = TimedeltaIndex(["1 day", "2 day", "3 day"], freq="D", name="idx") idx2 = TimedeltaIndex(["1 hour", "2 hour", "3 hour"], freq="H", name="idx") for idx in [idx1, idx2]: ordered = idx.sort_values() tm.assert_index_equal(ordered, idx) assert ordered.freq == idx.freq ordered = idx.sort_values(ascending=False) expected = idx[::-1] tm.assert_index_equal(ordered, expected) assert ordered.freq == expected.freq assert ordered.freq.n == -1 ordered, indexer = idx.sort_values(return_indexer=True) tm.assert_index_equal(ordered, idx) tm.assert_numpy_array_equal(indexer, np.array([0, 1, 2]), check_dtype=False) assert ordered.freq == idx.freq ordered, indexer = idx.sort_values(return_indexer=True, ascending=False) tm.assert_index_equal(ordered, idx[::-1]) assert ordered.freq == expected.freq assert ordered.freq.n == -1 idx1 = TimedeltaIndex( ["1 hour", "3 hour", "5 hour", "2 hour ", "1 hour"], name="idx1") exp1 = TimedeltaIndex( ["1 hour", "1 hour", "2 hour", "3 hour", "5 hour"], name="idx1") idx2 = TimedeltaIndex(["1 day", "3 day", "5 day", "2 day", "1 day"], name="idx2") for idx, expected in [(idx1, exp1), (idx1, exp1), (idx1, exp1)]: ordered = idx.sort_values() tm.assert_index_equal(ordered, expected) assert ordered.freq is None ordered = idx.sort_values(ascending=False) tm.assert_index_equal(ordered, expected[::-1]) assert ordered.freq is None ordered, indexer = idx.sort_values(return_indexer=True) tm.assert_index_equal(ordered, expected) exp = np.array([0, 4, 3, 1, 2]) tm.assert_numpy_array_equal(indexer, exp, check_dtype=False) assert ordered.freq is None ordered, indexer = idx.sort_values(return_indexer=True, ascending=False) tm.assert_index_equal(ordered, expected[::-1]) exp = np.array([2, 1, 3, 4, 0]) tm.assert_numpy_array_equal(indexer, exp, check_dtype=False) assert ordered.freq is None
def test_round(self): t1 = Timedelta('1 days 02:34:56.789123456') t2 = Timedelta('-1 days 02:34:56.789123456') for (freq, s1, s2) in [ ('N', t1, t2), ('U', Timedelta('1 days 02:34:56.789123000'), Timedelta('-1 days 02:34:56.789123000')), ('L', Timedelta('1 days 02:34:56.789000000'), Timedelta('-1 days 02:34:56.789000000')), ('S', Timedelta('1 days 02:34:57'), Timedelta('-1 days 02:34:57')), ('2S', Timedelta('1 days 02:34:56'), Timedelta('-1 days 02:34:56')), ('5S', Timedelta('1 days 02:34:55'), Timedelta('-1 days 02:34:55')), ('T', Timedelta('1 days 02:35:00'), Timedelta('-1 days 02:35:00')), ('12T', Timedelta('1 days 02:36:00'), Timedelta('-1 days 02:36:00')), ('H', Timedelta('1 days 03:00:00'), Timedelta('-1 days 03:00:00')), ('d', Timedelta('1 days'), Timedelta('-1 days')) ]: r1 = t1.round(freq) assert r1 == s1 r2 = t2.round(freq) assert r2 == s2 # invalid for freq in ['Y', 'M', 'foobar']: pytest.raises(ValueError, lambda: t1.round(freq)) t1 = timedelta_range('1 days', periods=3, freq='1 min 2 s 3 us') t2 = -1 * t1 t1a = timedelta_range('1 days', periods=3, freq='1 min 2 s') t1c = pd.TimedeltaIndex([1, 1, 1], unit='D') # note that negative times round DOWN! so don't give whole numbers for (freq, s1, s2) in [ ('N', t1, t2), ('U', t1, t2), ('L', t1a, TimedeltaIndex([ '-1 days +00:00:00', '-2 days +23:58:58', '-2 days +23:57:56' ], dtype='timedelta64[ns]', freq=None)), ('S', t1a, TimedeltaIndex([ '-1 days +00:00:00', '-2 days +23:58:58', '-2 days +23:57:56' ], dtype='timedelta64[ns]', freq=None)), ('12T', t1c, TimedeltaIndex(['-1 days', '-1 days', '-1 days'], dtype='timedelta64[ns]', freq=None)), ('H', t1c, TimedeltaIndex(['-1 days', '-1 days', '-1 days'], dtype='timedelta64[ns]', freq=None)), ('d', t1c, pd.TimedeltaIndex([-1, -1, -1], unit='D')) ]: r1 = t1.round(freq) tm.assert_index_equal(r1, s1) r2 = t2.round(freq) tm.assert_index_equal(r2, s2) # invalid for freq in ['Y', 'M', 'foobar']: pytest.raises(ValueError, lambda: t1.round(freq))
def test_value_counts_datetime64(self, klass): # GH 3002, datetime64[ns] # don't test names though txt = "\n".join([ 'xxyyzz20100101PIE', 'xxyyzz20100101GUM', 'xxyyzz20100101EGG', 'xxyyww20090101EGG', 'foofoo20080909PIE', 'foofoo20080909GUM' ]) f = StringIO(txt) df = pd.read_fwf(f, widths=[6, 8, 3], names=["person_id", "dt", "food"], parse_dates=["dt"]) s = klass(df['dt'].copy()) s.name = None idx = pd.to_datetime([ '2010-01-01 00:00:00', '2008-09-09 00:00:00', '2009-01-01 00:00:00' ]) expected_s = Series([3, 2, 1], index=idx) tm.assert_series_equal(s.value_counts(), expected_s) expected = np_array_datetime64_compat([ '2010-01-01 00:00:00', '2009-01-01 00:00:00', '2008-09-09 00:00:00' ], dtype='datetime64[ns]') if isinstance(s, Index): tm.assert_index_equal(s.unique(), DatetimeIndex(expected)) else: tm.assert_numpy_array_equal(s.unique(), expected) assert s.nunique() == 3 # with NaT s = df['dt'].copy() s = klass([v for v in s.values] + [pd.NaT]) result = s.value_counts() assert result.index.dtype == 'datetime64[ns]' tm.assert_series_equal(result, expected_s) result = s.value_counts(dropna=False) expected_s[pd.NaT] = 1 tm.assert_series_equal(result, expected_s) unique = s.unique() assert unique.dtype == 'datetime64[ns]' # numpy_array_equal cannot compare pd.NaT if isinstance(s, Index): exp_idx = DatetimeIndex(expected.tolist() + [pd.NaT]) tm.assert_index_equal(unique, exp_idx) else: tm.assert_numpy_array_equal(unique[:3], expected) assert pd.isna(unique[3]) assert s.nunique() == 3 assert s.nunique(dropna=False) == 4 # timedelta64[ns] td = df.dt - df.dt + timedelta(1) td = klass(td, name='dt') result = td.value_counts() expected_s = Series([6], index=[Timedelta('1day')], name='dt') tm.assert_series_equal(result, expected_s) expected = TimedeltaIndex(['1 days'], name='dt') if isinstance(td, Index): tm.assert_index_equal(td.unique(), expected) else: tm.assert_numpy_array_equal(td.unique(), expected.values) td2 = timedelta(1) + (df.dt - df.dt) td2 = klass(td2, name='dt') result2 = td2.value_counts() tm.assert_series_equal(result2, expected_s)
def test_verify_integrity_deprecated(self): # GH#23919 with tm.assert_produces_warning(FutureWarning): TimedeltaIndex(['1 Day'], verify_integrity=False)
def test_value_counts_inferred(self): klasses = [Index, Series] for klass in klasses: s_values = ['a', 'b', 'b', 'b', 'b', 'c', 'd', 'd', 'a', 'a'] s = klass(s_values) expected = Series([4, 3, 2, 1], index=['b', 'a', 'd', 'c']) tm.assert_series_equal(s.value_counts(), expected) self.assert_numpy_array_equal(s.unique(), np.unique(s_values)) self.assertEqual(s.nunique(), 4) # don't sort, have to sort after the fact as not sorting is platform-dep hist = s.value_counts(sort=False) hist.sort() expected = Series([3, 1, 4, 2], index=list('acbd')) expected.sort() tm.assert_series_equal(hist, expected) # sort ascending hist = s.value_counts(ascending=True) expected = Series([1, 2, 3, 4], index=list('cdab')) tm.assert_series_equal(hist, expected) # relative histogram. hist = s.value_counts(normalize=True) expected = Series([.4, .3, .2, .1], index=['b', 'a', 'd', 'c']) tm.assert_series_equal(hist, expected) # bins self.assertRaises(TypeError, lambda bins: s.value_counts(bins=bins), 1) s1 = Series([1, 1, 2, 3]) res1 = s1.value_counts(bins=1) exp1 = Series({0.998: 4}) tm.assert_series_equal(res1, exp1) res1n = s1.value_counts(bins=1, normalize=True) exp1n = Series({0.998: 1.0}) tm.assert_series_equal(res1n, exp1n) self.assert_numpy_array_equal(s1.unique(), np.array([1, 2, 3])) self.assertEqual(s1.nunique(), 3) res4 = s1.value_counts(bins=4) exp4 = Series({ 0.998: 2, 1.5: 1, 2.0: 0, 2.5: 1 }, index=[0.998, 2.5, 1.5, 2.0]) tm.assert_series_equal(res4, exp4) res4n = s1.value_counts(bins=4, normalize=True) exp4n = Series({ 0.998: 0.5, 1.5: 0.25, 2.0: 0.0, 2.5: 0.25 }, index=[0.998, 2.5, 1.5, 2.0]) tm.assert_series_equal(res4n, exp4n) # handle NA's properly s_values = [ 'a', 'b', 'b', 'b', np.nan, np.nan, 'd', 'd', 'a', 'a', 'b' ] s = klass(s_values) expected = Series([4, 3, 2], index=['b', 'a', 'd']) tm.assert_series_equal(s.value_counts(), expected) self.assert_numpy_array_equal( s.unique(), np.array(['a', 'b', np.nan, 'd'], dtype='O')) self.assertEqual(s.nunique(), 3) s = klass({}) expected = Series([], dtype=np.int64) tm.assert_series_equal(s.value_counts(), expected) self.assert_numpy_array_equal(s.unique(), np.array([])) self.assertEqual(s.nunique(), 0) # GH 3002, datetime64[ns] txt = "\n".join([ 'xxyyzz20100101PIE', 'xxyyzz20100101GUM', 'xxyyzz20100101EGG', 'xxyyww20090101EGG', 'foofoo20080909PIE', 'foofoo20080909GUM' ]) f = StringIO(txt) df = pd.read_fwf(f, widths=[6, 8, 3], names=["person_id", "dt", "food"], parse_dates=["dt"]) s = klass(df['dt'].copy(), name='dt') idx = pd.to_datetime([ '2010-01-01 00:00:00Z', '2008-09-09 00:00:00Z', '2009-01-01 00:00:00X' ]) expected_s = Series([3, 2, 1], index=idx, name='dt') tm.assert_series_equal(s.value_counts(), expected_s) expected = np.array([ '2010-01-01 00:00:00Z', '2009-01-01 00:00:00Z', '2008-09-09 00:00:00Z' ], dtype='datetime64[ns]') if isinstance(s, DatetimeIndex): expected = DatetimeIndex(expected) self.assertTrue(s.unique().equals(expected)) else: self.assert_numpy_array_equal(s.unique(), expected) self.assertEqual(s.nunique(), 3) # with NaT s = df['dt'].copy() s = klass([v for v in s.values] + [pd.NaT], name='dt') result = s.value_counts() self.assertEqual(result.index.dtype, 'datetime64[ns]') tm.assert_series_equal(result, expected_s) result = s.value_counts(dropna=False) expected_s[pd.NaT] = 1 tm.assert_series_equal(result, expected_s) unique = s.unique() self.assertEqual(unique.dtype, 'datetime64[ns]') # numpy_array_equal cannot compare pd.NaT self.assert_numpy_array_equal(unique[:3], expected) self.assertTrue(unique[3] is pd.NaT or unique[3].astype('int64') == pd.tslib.iNaT) self.assertEqual(s.nunique(), 3) self.assertEqual(s.nunique(dropna=False), 4) # timedelta64[ns] td = df.dt - df.dt + timedelta(1) td = klass(td, name='dt') result = td.value_counts() expected_s = Series([6], index=[Timedelta('1day')], name='dt') tm.assert_series_equal(result, expected_s) expected = TimedeltaIndex(['1 days']) if isinstance(td, TimedeltaIndex): self.assertTrue(td.unique().equals(expected)) else: self.assert_numpy_array_equal(td.unique(), expected.values) td2 = timedelta(1) + (df.dt - df.dt) td2 = klass(td2, name='dt') result2 = td2.value_counts() tm.assert_series_equal(result2, expected_s)
def test_shift(self, datetime_series): shifted = datetime_series.shift(1) unshifted = shifted.shift(-1) tm.assert_index_equal(shifted.index, datetime_series.index) tm.assert_index_equal(unshifted.index, datetime_series.index) tm.assert_numpy_array_equal(unshifted.dropna().values, datetime_series.values[:-1]) offset = BDay() shifted = datetime_series.shift(1, freq=offset) unshifted = shifted.shift(-1, freq=offset) tm.assert_series_equal(unshifted, datetime_series) unshifted = datetime_series.shift(0, freq=offset) tm.assert_series_equal(unshifted, datetime_series) shifted = datetime_series.shift(1, freq="B") unshifted = shifted.shift(-1, freq="B") tm.assert_series_equal(unshifted, datetime_series) # corner case unshifted = datetime_series.shift(0) tm.assert_series_equal(unshifted, datetime_series) # Shifting with PeriodIndex ps = tm.makePeriodSeries() shifted = ps.shift(1) unshifted = shifted.shift(-1) tm.assert_index_equal(shifted.index, ps.index) tm.assert_index_equal(unshifted.index, ps.index) tm.assert_numpy_array_equal(unshifted.dropna().values, ps.values[:-1]) shifted2 = ps.shift(1, "B") shifted3 = ps.shift(1, BDay()) tm.assert_series_equal(shifted2, shifted3) tm.assert_series_equal(ps, shifted2.shift(-1, "B")) msg = "Given freq D does not match PeriodIndex freq B" with pytest.raises(ValueError, match=msg): ps.shift(freq="D") # legacy support shifted4 = ps.shift(1, freq="B") tm.assert_series_equal(shifted2, shifted4) shifted5 = ps.shift(1, freq=BDay()) tm.assert_series_equal(shifted5, shifted4) # 32-bit taking # GH#8129 index = date_range("2000-01-01", periods=5) for dtype in ["int32", "int64"]: s1 = Series(np.arange(5, dtype=dtype), index=index) p = s1.iloc[1] result = s1.shift(periods=p) expected = Series([np.nan, 0, 1, 2, 3], index=index) tm.assert_series_equal(result, expected) # GH#8260 # with tz s = Series(date_range("2000-01-01 09:00:00", periods=5, tz="US/Eastern"), name="foo") result = s - s.shift() exp = Series(TimedeltaIndex(["NaT"] + ["1 days"] * 4), name="foo") tm.assert_series_equal(result, exp) # incompat tz s2 = Series(date_range("2000-01-01 09:00:00", periods=5, tz="CET"), name="foo") msg = "DatetimeArray subtraction must have the same timezones or no timezones" with pytest.raises(TypeError, match=msg): s - s2
def test_dt_namespace_accessor(self): # GH 7207, 11128 # test .dt namespace accessor ok_for_period = PeriodArray._datetimelike_ops ok_for_period_methods = ["strftime", "to_timestamp", "asfreq"] ok_for_dt = DatetimeIndex._datetimelike_ops ok_for_dt_methods = [ "to_period", "to_pydatetime", "tz_localize", "tz_convert", "normalize", "strftime", "round", "floor", "ceil", "day_name", "month_name", "isocalendar", ] ok_for_td = TimedeltaIndex._datetimelike_ops ok_for_td_methods = [ "components", "to_pytimedelta", "total_seconds", "round", "floor", "ceil", ] def get_expected(s, name): result = getattr(Index(s._values), prop) if isinstance(result, np.ndarray): if is_integer_dtype(result): result = result.astype("int64") elif not is_list_like(result) or isinstance(result, DataFrame): return result return Series(result, index=s.index, name=s.name) def compare(s, name): a = getattr(s.dt, prop) b = get_expected(s, prop) if not (is_list_like(a) and is_list_like(b)): assert a == b elif isinstance(a, DataFrame): tm.assert_frame_equal(a, b) else: tm.assert_series_equal(a, b) # datetimeindex cases = [ Series(date_range("20130101", periods=5), name="xxx"), Series(date_range("20130101", periods=5, freq="s"), name="xxx"), Series(date_range("20130101 00:00:00", periods=5, freq="ms"), name="xxx"), ] for s in cases: for prop in ok_for_dt: # we test freq below # we ignore week and weekofyear because they are deprecated if prop not in ["freq", "week", "weekofyear"]: compare(s, prop) for prop in ok_for_dt_methods: getattr(s.dt, prop) result = s.dt.to_pydatetime() assert isinstance(result, np.ndarray) assert result.dtype == object result = s.dt.tz_localize("US/Eastern") exp_values = DatetimeIndex(s.values).tz_localize("US/Eastern") expected = Series(exp_values, index=s.index, name="xxx") tm.assert_series_equal(result, expected) tz_result = result.dt.tz assert str(tz_result) == "US/Eastern" freq_result = s.dt.freq assert freq_result == DatetimeIndex(s.values, freq="infer").freq # let's localize, then convert result = s.dt.tz_localize("UTC").dt.tz_convert("US/Eastern") exp_values = (DatetimeIndex( s.values).tz_localize("UTC").tz_convert("US/Eastern")) expected = Series(exp_values, index=s.index, name="xxx") tm.assert_series_equal(result, expected) # datetimeindex with tz s = Series(date_range("20130101", periods=5, tz="US/Eastern"), name="xxx") for prop in ok_for_dt: # we test freq below # we ignore week and weekofyear because they are deprecated if prop not in ["freq", "week", "weekofyear"]: compare(s, prop) for prop in ok_for_dt_methods: getattr(s.dt, prop) result = s.dt.to_pydatetime() assert isinstance(result, np.ndarray) assert result.dtype == object result = s.dt.tz_convert("CET") expected = Series(s._values.tz_convert("CET"), index=s.index, name="xxx") tm.assert_series_equal(result, expected) tz_result = result.dt.tz assert str(tz_result) == "CET" freq_result = s.dt.freq assert freq_result == DatetimeIndex(s.values, freq="infer").freq # timedelta index cases = [ Series(timedelta_range("1 day", periods=5), index=list("abcde"), name="xxx"), Series(timedelta_range("1 day 01:23:45", periods=5, freq="s"), name="xxx"), Series( timedelta_range("2 days 01:23:45.012345", periods=5, freq="ms"), name="xxx", ), ] for s in cases: for prop in ok_for_td: # we test freq below if prop != "freq": compare(s, prop) for prop in ok_for_td_methods: getattr(s.dt, prop) result = s.dt.components assert isinstance(result, DataFrame) tm.assert_index_equal(result.index, s.index) result = s.dt.to_pytimedelta() assert isinstance(result, np.ndarray) assert result.dtype == object result = s.dt.total_seconds() assert isinstance(result, Series) assert result.dtype == "float64" freq_result = s.dt.freq assert freq_result == TimedeltaIndex(s.values, freq="infer").freq # both index = date_range("20130101", periods=3, freq="D") s = Series(date_range("20140204", periods=3, freq="s"), index=index, name="xxx") exp = Series(np.array([2014, 2014, 2014], dtype="int64"), index=index, name="xxx") tm.assert_series_equal(s.dt.year, exp) exp = Series(np.array([2, 2, 2], dtype="int64"), index=index, name="xxx") tm.assert_series_equal(s.dt.month, exp) exp = Series(np.array([0, 1, 2], dtype="int64"), index=index, name="xxx") tm.assert_series_equal(s.dt.second, exp) exp = Series([s[0]] * 3, index=index, name="xxx") tm.assert_series_equal(s.dt.normalize(), exp) # periodindex cases = [ Series(period_range("20130101", periods=5, freq="D"), name="xxx") ] for s in cases: for prop in ok_for_period: # we test freq below if prop != "freq": compare(s, prop) for prop in ok_for_period_methods: getattr(s.dt, prop) freq_result = s.dt.freq assert freq_result == PeriodIndex(s.values).freq # test limited display api def get_dir(s): results = [r for r in s.dt.__dir__() if not r.startswith("_")] return sorted(set(results)) s = Series(date_range("20130101", periods=5, freq="D"), name="xxx") results = get_dir(s) tm.assert_almost_equal(results, sorted(set(ok_for_dt + ok_for_dt_methods))) s = Series( period_range("20130101", periods=5, freq="D", name="xxx").astype(object)) results = get_dir(s) tm.assert_almost_equal( results, sorted(set(ok_for_period + ok_for_period_methods))) # 11295 # ambiguous time error on the conversions s = Series(date_range("2015-01-01", "2016-01-01", freq="T"), name="xxx") s = s.dt.tz_localize("UTC").dt.tz_convert("America/Chicago") results = get_dir(s) tm.assert_almost_equal(results, sorted(set(ok_for_dt + ok_for_dt_methods))) exp_values = date_range("2015-01-01", "2016-01-01", freq="T", tz="UTC").tz_convert("America/Chicago") # freq not preserved by tz_localize above exp_values = exp_values._with_freq(None) expected = Series(exp_values, name="xxx") tm.assert_series_equal(s, expected) # no setting allowed s = Series(date_range("20130101", periods=5, freq="D"), name="xxx") with pytest.raises(ValueError, match="modifications"): s.dt.hour = 5 # trying to set a copy msg = "modifications to a property of a datetimelike.+not supported" with pd.option_context("chained_assignment", "raise"): with pytest.raises(com.SettingWithCopyError, match=msg): s.dt.hour[0] = 5
def test_argmin_argmax(self): idx = TimedeltaIndex( ["1 day 00:00:05", "1 day 00:00:01", "1 day 00:00:02"]) assert idx.argmin() == 1 assert idx.argmax() == 0
def ccf(x, y, lags=365, bin_method='rectangle', bin_width=0.5, max_gap=inf, min_obs=100, full_output=False, alpha=0.05): """Method to compute the cross-correlation for irregular time series. Parameters ---------- x,y: pandas.Series Pandas Series containing the values to calculate the cross-correlation for. The index has to be a Pandas.DatetimeIndex lags: array_like, optional numpy array containing the lags in days for which the cross-correlation is calculated. Default [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 13, 14, 30, 61, 90, 120, 150, 180, 210, 240, 270, 300, 330, 365] bin_method: str, optional method to determine the type of bin. Options are "rectangle" (default), and "gaussian". bin_width: float, optional number of days used as the width for the bin to calculate the correlation. By default these values are chosen based on the bin_method and the average time step (dt_mu). That is 0.5dt_mu when bin_method="rectangle" and 0.25dt_mu when bin_method="gaussian". max_gap: float, optional Maximum timestep gap in the data. All timesteps above this gap value are not used for calculating the average timestep. This can be helpful when there is a large gap in the data that influences the average timestep. min_obs: int, optional Minimum number of observations in a bin to determine the correlation. full_output: bool, optional If True, also estimated uncertainties are returned. Default is False. alpha: float alpha level to compute the confidence interval (e.g., 1-alpha). Returns ------- c: pandas.Series or pandas.DataFrame The Cross-correlation function. References ---------- Rehfeld, K., Marwan, N., Heitzig, J., Kurths, J. (2011). Comparison of correlation analysis techniques for irregularly sampled time series. Nonlinear Processes in Geophysics. 18. 389-404. 10.5194 pg-18-389-2011. Tip --- This method will be significantly faster when Numba is installed. Check out the [Numba project here](https://numba.pydata.org) Examples -------- >>> ccf = ps.stats.ccf(x, y, bin_method="gaussian") """ # prepare the time indices for x and y if x.index.inferred_freq and y.index.inferred_freq: bin_method = "regular" elif bin_method == "regular": raise Warning("time series does not have regular time steps, " "choose different bin_method") x, t_x, dt_x_mu = _preprocess(x, max_gap=max_gap) y, t_y, dt_y_mu = _preprocess(y, max_gap=max_gap) dt_mu = max(dt_x_mu, dt_y_mu) # Mean time step from both series if isinstance(lags, int) and bin_method == "regular": lags = arange(int(dt_mu), lags + 1, int(dt_mu), dtype=float) elif isinstance(lags, int): lags = arange(1.0, lags + 1, dtype=float) elif isinstance(lags, list): lags = array(lags, dtype=float) if bin_method == "rectangle": if bin_width is None: bin_width = 0.5 * dt_mu c, b = _compute_ccf_rectangle(lags, t_x, x, t_y, y, bin_width) elif bin_method == "gaussian": if bin_width is None: bin_width = 0.25 * dt_mu c, b = _compute_ccf_gaussian(lags, t_x, x, t_y, y, bin_width) elif bin_method == "regular": c, b = _compute_ccf_regular(arange(1.0, len(lags) + 1), x, y) else: raise NotImplementedError std = norm.ppf(1 - alpha / 2.) / sqrt(b) result = DataFrame(data={ "ccf": c, "stderr": std, "n": b }, index=TimedeltaIndex(lags, unit="D", name="Lags")) result = result.where(result.n > min_obs).dropna() if full_output: return result else: return result.ccf
def test_searchsorted_invalid_argument_dtype(self, arg): idx = TimedeltaIndex(["1 day", "2 days", "3 days"]) msg = "searchsorted requires compatible dtype" with pytest.raises(TypeError, match=msg): idx.searchsorted(arg)
def test_constructor_wrong_precision_raises(self): msg = r"dtype timedelta64\[us\] cannot be converted to timedelta64\[ns\]" with pytest.raises(ValueError, match=msg): TimedeltaIndex(["2000"], dtype="timedelta64[us]")
class TestTimedeltaIndex: def test_union(self): i1 = timedelta_range("1day", periods=5) i2 = timedelta_range("3day", periods=5) result = i1.union(i2) expected = timedelta_range("1day", periods=7) tm.assert_index_equal(result, expected) i1 = Int64Index(np.arange(0, 20, 2)) i2 = timedelta_range(start="1 day", periods=10, freq="D") i1.union(i2) # Works i2.union(i1) # Fails with "AttributeError: can't set attribute" def test_union_coverage(self): idx = TimedeltaIndex(["3d", "1d", "2d"]) ordered = TimedeltaIndex(idx.sort_values(), freq="infer") result = ordered.union(idx) tm.assert_index_equal(result, ordered) result = ordered[:0].union(ordered) tm.assert_index_equal(result, ordered) assert result.freq == ordered.freq def test_union_bug_1730(self): rng_a = timedelta_range("1 day", periods=4, freq="3H") rng_b = timedelta_range("1 day", periods=4, freq="4H") result = rng_a.union(rng_b) exp = TimedeltaIndex(sorted(set(list(rng_a)) | set(list(rng_b)))) tm.assert_index_equal(result, exp) def test_union_bug_1745(self): left = TimedeltaIndex(["1 day 15:19:49.695000"]) right = TimedeltaIndex([ "2 day 13:04:21.322000", "1 day 15:27:24.873000", "1 day 15:31:05.350000" ]) result = left.union(right) exp = TimedeltaIndex(sorted(set(list(left)) | set(list(right)))) tm.assert_index_equal(result, exp) def test_union_bug_4564(self): left = timedelta_range("1 day", "30d") right = left + pd.offsets.Minute(15) result = left.union(right) exp = TimedeltaIndex(sorted(set(list(left)) | set(list(right)))) tm.assert_index_equal(result, exp) def test_intersection_bug_1708(self): index_1 = timedelta_range("1 day", periods=4, freq="h") index_2 = index_1 + pd.offsets.Hour(5) result = index_1 & index_2 assert len(result) == 0 index_1 = timedelta_range("1 day", periods=4, freq="h") index_2 = index_1 + pd.offsets.Hour(1) result = index_1 & index_2 expected = timedelta_range("1 day 01:00:00", periods=3, freq="h") tm.assert_index_equal(result, expected) @pytest.mark.parametrize("sort", [None, False]) def test_intersection_equal(self, sort): # GH 24471 Test intersection outcome given the sort keyword # for equal indicies intersection should return the original index first = timedelta_range("1 day", periods=4, freq="h") second = timedelta_range("1 day", periods=4, freq="h") intersect = first.intersection(second, sort=sort) if sort is None: tm.assert_index_equal(intersect, second.sort_values()) assert tm.equalContents(intersect, second) # Corner cases inter = first.intersection(first, sort=sort) assert inter is first @pytest.mark.parametrize("period_1, period_2", [(0, 4), (4, 0)]) @pytest.mark.parametrize("sort", [None, False]) def test_intersection_zero_length(self, period_1, period_2, sort): # GH 24471 test for non overlap the intersection should be zero length index_1 = timedelta_range("1 day", periods=period_1, freq="h") index_2 = timedelta_range("1 day", periods=period_2, freq="h") expected = timedelta_range("1 day", periods=0, freq="h") result = index_1.intersection(index_2, sort=sort) tm.assert_index_equal(result, expected) @pytest.mark.parametrize("sort", [None, False]) def test_zero_length_input_index(self, sort): # GH 24966 test for 0-len intersections are copied index_1 = timedelta_range("1 day", periods=0, freq="h") index_2 = timedelta_range("1 day", periods=3, freq="h") result = index_1.intersection(index_2, sort=sort) assert index_1 is not result assert index_2 is not result tm.assert_copy(result, index_1) @pytest.mark.parametrize( "rng, expected", # if target has the same name, it is preserved [ ( timedelta_range("1 day", periods=5, freq="h", name="idx"), timedelta_range("1 day", periods=4, freq="h", name="idx"), ), # if target name is different, it will be reset ( timedelta_range("1 day", periods=5, freq="h", name="other"), timedelta_range("1 day", periods=4, freq="h", name=None), ), # if no overlap exists return empty index ( timedelta_range("1 day", periods=10, freq="h", name="idx")[5:], TimedeltaIndex([], name="idx"), ), ], ) @pytest.mark.parametrize("sort", [None, False]) def test_intersection(self, rng, expected, sort): # GH 4690 (with tz) base = timedelta_range("1 day", periods=4, freq="h", name="idx") result = base.intersection(rng, sort=sort) if sort is None: expected = expected.sort_values() tm.assert_index_equal(result, expected) assert result.name == expected.name assert result.freq == expected.freq @pytest.mark.parametrize( "rng, expected", # part intersection works [ ( TimedeltaIndex(["5 hour", "2 hour", "4 hour", "9 hour"], name="idx"), TimedeltaIndex(["2 hour", "4 hour"], name="idx"), ), # reordered part intersection ( TimedeltaIndex(["2 hour", "5 hour", "5 hour", "1 hour"], name="other"), TimedeltaIndex(["1 hour", "2 hour"], name=None), ), # reveresed index ( TimedeltaIndex(["1 hour", "2 hour", "4 hour", "3 hour"], name="idx")[::-1], TimedeltaIndex(["1 hour", "2 hour", "4 hour", "3 hour"], name="idx"), ), ], ) @pytest.mark.parametrize("sort", [None, False]) def test_intersection_non_monotonic(self, rng, expected, sort): # 24471 non-monotonic base = TimedeltaIndex(["1 hour", "2 hour", "4 hour", "3 hour"], name="idx") result = base.intersection(rng, sort=sort) if sort is None: expected = expected.sort_values() tm.assert_index_equal(result, expected) assert result.name == expected.name # if reveresed order, frequency is still the same if all(base == rng[::-1]) and sort is None: assert isinstance(result.freq, Hour) else: assert result.freq is None
def test_subtraction_ops_with_tz(self): # check that dt/dti subtraction ops with tz are validated dti = date_range('20130101', periods=3) ts = Timestamp('20130101') dt = ts.to_pydatetime() dti_tz = date_range('20130101', periods=3).tz_localize('US/Eastern') ts_tz = Timestamp('20130101').tz_localize('US/Eastern') ts_tz2 = Timestamp('20130101').tz_localize('CET') dt_tz = ts_tz.to_pydatetime() td = Timedelta('1 days') def _check(result, expected): assert result == expected assert isinstance(result, Timedelta) # scalars result = ts - ts expected = Timedelta('0 days') _check(result, expected) result = dt_tz - ts_tz expected = Timedelta('0 days') _check(result, expected) result = ts_tz - dt_tz expected = Timedelta('0 days') _check(result, expected) # tz mismatches pytest.raises(TypeError, lambda: dt_tz - ts) pytest.raises(TypeError, lambda: dt_tz - dt) pytest.raises(TypeError, lambda: dt_tz - ts_tz2) pytest.raises(TypeError, lambda: dt - dt_tz) pytest.raises(TypeError, lambda: ts - dt_tz) pytest.raises(TypeError, lambda: ts_tz2 - ts) pytest.raises(TypeError, lambda: ts_tz2 - dt) pytest.raises(TypeError, lambda: ts_tz - ts_tz2) # with dti pytest.raises(TypeError, lambda: dti - ts_tz) pytest.raises(TypeError, lambda: dti_tz - ts) pytest.raises(TypeError, lambda: dti_tz - ts_tz2) result = dti_tz - dt_tz expected = TimedeltaIndex(['0 days', '1 days', '2 days']) tm.assert_index_equal(result, expected) result = dt_tz - dti_tz expected = TimedeltaIndex(['0 days', '-1 days', '-2 days']) tm.assert_index_equal(result, expected) result = dti_tz - ts_tz expected = TimedeltaIndex(['0 days', '1 days', '2 days']) tm.assert_index_equal(result, expected) result = ts_tz - dti_tz expected = TimedeltaIndex(['0 days', '-1 days', '-2 days']) tm.assert_index_equal(result, expected) result = td - td expected = Timedelta('0 days') _check(result, expected) result = dti_tz - td expected = DatetimeIndex( ['20121231', '20130101', '20130102'], tz='US/Eastern') tm.assert_index_equal(result, expected)
def test_round(self): t1 = Timedelta("1 days 02:34:56.789123456") t2 = Timedelta("-1 days 02:34:56.789123456") for (freq, s1, s2) in [ ("N", t1, t2), ( "U", Timedelta("1 days 02:34:56.789123000"), Timedelta("-1 days 02:34:56.789123000"), ), ( "L", Timedelta("1 days 02:34:56.789000000"), Timedelta("-1 days 02:34:56.789000000"), ), ("S", Timedelta("1 days 02:34:57"), Timedelta("-1 days 02:34:57")), ("2S", Timedelta("1 days 02:34:56"), Timedelta("-1 days 02:34:56")), ("5S", Timedelta("1 days 02:34:55"), Timedelta("-1 days 02:34:55")), ("T", Timedelta("1 days 02:35:00"), Timedelta("-1 days 02:35:00")), ("12T", Timedelta("1 days 02:36:00"), Timedelta("-1 days 02:36:00")), ("H", Timedelta("1 days 03:00:00"), Timedelta("-1 days 03:00:00")), ("d", Timedelta("1 days"), Timedelta("-1 days")), ]: r1 = t1.round(freq) assert r1 == s1 r2 = t2.round(freq) assert r2 == s2 # invalid for freq, msg in [ ("Y", "<YearEnd: month=12> is a non-fixed frequency"), ("M", "<MonthEnd> is a non-fixed frequency"), ("foobar", "Invalid frequency: foobar"), ]: with pytest.raises(ValueError, match=msg): t1.round(freq) t1 = timedelta_range("1 days", periods=3, freq="1 min 2 s 3 us") t2 = -1 * t1 t1a = timedelta_range("1 days", periods=3, freq="1 min 2 s") t1c = TimedeltaIndex([1, 1, 1], unit="D") # note that negative times round DOWN! so don't give whole numbers for (freq, s1, s2) in [ ("N", t1, t2), ("U", t1, t2), ( "L", t1a, TimedeltaIndex( ["-1 days +00:00:00", "-2 days +23:58:58", "-2 days +23:57:56"], dtype="timedelta64[ns]", freq=None, ), ), ( "S", t1a, TimedeltaIndex( ["-1 days +00:00:00", "-2 days +23:58:58", "-2 days +23:57:56"], dtype="timedelta64[ns]", freq=None, ), ), ( "12T", t1c, TimedeltaIndex( ["-1 days", "-1 days", "-1 days"], dtype="timedelta64[ns]", freq=None, ), ), ( "H", t1c, TimedeltaIndex( ["-1 days", "-1 days", "-1 days"], dtype="timedelta64[ns]", freq=None, ), ), ("d", t1c, TimedeltaIndex([-1, -1, -1], unit="D")), ]: r1 = t1.round(freq) tm.assert_index_equal(r1, s1) r2 = t2.round(freq) tm.assert_index_equal(r2, s2) # invalid for freq, msg in [ ("Y", "<YearEnd: month=12> is a non-fixed frequency"), ("M", "<MonthEnd> is a non-fixed frequency"), ("foobar", "Invalid frequency: foobar"), ]: with pytest.raises(ValueError, match=msg): t1.round(freq)
tm.assert_numpy_array_equal(result, expected) def test_strftime_nat(self): # GH 29578 arr = PeriodArray(PeriodIndex(["2019-01-01", NaT], dtype="period[D]")) result = arr.strftime("%Y-%m-%d") expected = np.array(["2019-01-01", np.nan], dtype=object) tm.assert_numpy_array_equal(result, expected) @pytest.mark.parametrize( "arr,casting_nats", [ ( TimedeltaIndex(["1 Day", "3 Hours", "NaT"])._data, (NaT, np.timedelta64("NaT", "ns")), ), ( pd.date_range("2000-01-01", periods=3, freq="D")._data, (NaT, np.datetime64("NaT", "ns")), ), (pd.period_range("2000-01-01", periods=3, freq="D")._data, (NaT, )), ], ids=lambda x: type(x).__name__, ) def test_casting_nat_setitem_array(arr, casting_nats): expected = type(arr)._from_sequence([NaT, arr[1], arr[2]]) for nat in casting_nats: arr = arr.copy()
@pytest.mark.parametrize( "op_name", [ "left_plus_right", "right_plus_left", "left_minus_right", "right_minus_left" ], ) @pytest.mark.parametrize( "value", [ DatetimeIndex(["2011-01-01", "2011-01-02"], name="x"), DatetimeIndex(["2011-01-01", "2011-01-02"], tz="US/Eastern", name="x"), DatetimeArray._from_sequence(["2011-01-01", "2011-01-02"]), DatetimeArray._from_sequence(["2011-01-01", "2011-01-02"], tz="US/Pacific"), TimedeltaIndex(["1 day", "2 day"], name="x"), ], ) def test_nat_arithmetic_index(op_name, value): # see gh-11718 exp_name = "x" exp_data = [NaT] * 2 if is_datetime64_any_dtype(value.dtype) and "plus" in op_name: expected = DatetimeIndex(exp_data, tz=value.tz, name=exp_name) else: expected = TimedeltaIndex(exp_data, name=exp_name) if not isinstance(value, Index): expected = expected.array
def test_range_kwargs_deprecated(self): # GH#23919 with tm.assert_produces_warning(FutureWarning): TimedeltaIndex(start='1 Day', end='3 Days', freq='D')
def diff(arr, n, axis=0): """ difference of n between self, analagoust to s-s.shift(n) """ n = int(n) na = np.nan dtype = arr.dtype is_timedelta = False if needs_i8_conversion(arr): dtype = np.float64 arr = arr.view('i8') na = tslib.iNaT is_timedelta = True elif issubclass(dtype.type, np.integer): dtype = np.float64 elif issubclass(dtype.type, np.bool_): dtype = np.object_ dtype = np.dtype(dtype) out_arr = np.empty(arr.shape, dtype=dtype) na_indexer = [slice(None)] * arr.ndim na_indexer[axis] = slice(None, n) if n >= 0 else slice(n, None) out_arr[tuple(na_indexer)] = na if arr.ndim == 2 and arr.dtype.name in _diff_special: f = _diff_special[arr.dtype.name] f(arr, out_arr, n, axis) else: res_indexer = [slice(None)] * arr.ndim res_indexer[axis] = slice(n, None) if n >= 0 else slice(None, n) res_indexer = tuple(res_indexer) lag_indexer = [slice(None)] * arr.ndim lag_indexer[axis] = slice(None, -n) if n > 0 else slice(-n, None) lag_indexer = tuple(lag_indexer) # need to make sure that we account for na for datelike/timedelta # we don't actually want to subtract these i8 numbers if is_timedelta: res = arr[res_indexer] lag = arr[lag_indexer] mask = (arr[res_indexer] == na) | (arr[lag_indexer] == na) if mask.any(): res = res.copy() res[mask] = 0 lag = lag.copy() lag[mask] = 0 result = res - lag result[mask] = na out_arr[res_indexer] = result else: out_arr[res_indexer] = arr[res_indexer] - arr[lag_indexer] if is_timedelta: from pandas import TimedeltaIndex out_arr = TimedeltaIndex(out_arr.ravel().astype('int64')).asi8.reshape( out_arr.shape).astype('timedelta64[ns]') return out_arr
if labels is None: intervals = IntervalIndex([Interval(start, end)] * length, closed="right") expected = Series(intervals).astype(CDT(ordered=True)) else: expected = Series([0] * length, dtype=np.intp) tm.assert_series_equal(result, expected) @pytest.mark.parametrize( "ser", [ Series(DatetimeIndex(["20180101", NaT, "20180103"])), Series(TimedeltaIndex(["0 days", NaT, "2 days"])), ], ids=lambda x: str(x.dtype), ) def test_qcut_nat(ser): # see gh-19768 intervals = IntervalIndex.from_tuples([(ser[0] - Nano(), ser[2] - Day()), np.nan, (ser[2] - Day(), ser[2])]) expected = Series(Categorical(intervals, ordered=True)) result = qcut(ser, 2) tm.assert_series_equal(result, expected) @pytest.mark.parametrize("bins", [3, np.linspace(0, 1, 4)]) def test_datetime_tz_qcut(bins):
def test_shift_no_freq(self): # GH#19147 tdi = TimedeltaIndex(['1 days 01:00:00', '2 days 01:00:00'], freq=None) with pytest.raises(NullFrequencyError): tdi.shift(2)
def test_dt_namespace_accessor(self): # GH 7207, 11128 # test .dt namespace accessor ok_for_period = PeriodArray._datetimelike_ops ok_for_period_methods = ['strftime', 'to_timestamp', 'asfreq'] ok_for_dt = DatetimeIndex._datetimelike_ops ok_for_dt_methods = [ 'to_period', 'to_pydatetime', 'tz_localize', 'tz_convert', 'normalize', 'strftime', 'round', 'floor', 'ceil', 'day_name', 'month_name' ] ok_for_td = TimedeltaIndex._datetimelike_ops ok_for_td_methods = [ 'components', 'to_pytimedelta', 'total_seconds', 'round', 'floor', 'ceil' ] def get_expected(s, name): result = getattr(Index(s._values), prop) if isinstance(result, np.ndarray): if is_integer_dtype(result): result = result.astype('int64') elif not is_list_like(result): return result return Series(result, index=s.index, name=s.name) def compare(s, name): a = getattr(s.dt, prop) b = get_expected(s, prop) if not (is_list_like(a) and is_list_like(b)): assert a == b else: tm.assert_series_equal(a, b) # datetimeindex cases = [ Series(date_range('20130101', periods=5), name='xxx'), Series(date_range('20130101', periods=5, freq='s'), name='xxx'), Series(date_range('20130101 00:00:00', periods=5, freq='ms'), name='xxx') ] for s in cases: for prop in ok_for_dt: # we test freq below if prop != 'freq': compare(s, prop) for prop in ok_for_dt_methods: getattr(s.dt, prop) result = s.dt.to_pydatetime() assert isinstance(result, np.ndarray) assert result.dtype == object result = s.dt.tz_localize('US/Eastern') exp_values = DatetimeIndex(s.values).tz_localize('US/Eastern') expected = Series(exp_values, index=s.index, name='xxx') tm.assert_series_equal(result, expected) tz_result = result.dt.tz assert str(tz_result) == 'US/Eastern' freq_result = s.dt.freq assert freq_result == DatetimeIndex(s.values, freq='infer').freq # let's localize, then convert result = s.dt.tz_localize('UTC').dt.tz_convert('US/Eastern') exp_values = (DatetimeIndex( s.values).tz_localize('UTC').tz_convert('US/Eastern')) expected = Series(exp_values, index=s.index, name='xxx') tm.assert_series_equal(result, expected) # datetimeindex with tz s = Series(date_range('20130101', periods=5, tz='US/Eastern'), name='xxx') for prop in ok_for_dt: # we test freq below if prop != 'freq': compare(s, prop) for prop in ok_for_dt_methods: getattr(s.dt, prop) result = s.dt.to_pydatetime() assert isinstance(result, np.ndarray) assert result.dtype == object result = s.dt.tz_convert('CET') expected = Series(s._values.tz_convert('CET'), index=s.index, name='xxx') tm.assert_series_equal(result, expected) tz_result = result.dt.tz assert str(tz_result) == 'CET' freq_result = s.dt.freq assert freq_result == DatetimeIndex(s.values, freq='infer').freq # timedelta index cases = [ Series(timedelta_range('1 day', periods=5), index=list('abcde'), name='xxx'), Series(timedelta_range('1 day 01:23:45', periods=5, freq='s'), name='xxx'), Series(timedelta_range('2 days 01:23:45.012345', periods=5, freq='ms'), name='xxx') ] for s in cases: for prop in ok_for_td: # we test freq below if prop != 'freq': compare(s, prop) for prop in ok_for_td_methods: getattr(s.dt, prop) result = s.dt.components assert isinstance(result, DataFrame) tm.assert_index_equal(result.index, s.index) result = s.dt.to_pytimedelta() assert isinstance(result, np.ndarray) assert result.dtype == object result = s.dt.total_seconds() assert isinstance(result, pd.Series) assert result.dtype == 'float64' freq_result = s.dt.freq assert freq_result == TimedeltaIndex(s.values, freq='infer').freq # both index = date_range('20130101', periods=3, freq='D') s = Series(date_range('20140204', periods=3, freq='s'), index=index, name='xxx') exp = Series(np.array([2014, 2014, 2014], dtype='int64'), index=index, name='xxx') tm.assert_series_equal(s.dt.year, exp) exp = Series(np.array([2, 2, 2], dtype='int64'), index=index, name='xxx') tm.assert_series_equal(s.dt.month, exp) exp = Series(np.array([0, 1, 2], dtype='int64'), index=index, name='xxx') tm.assert_series_equal(s.dt.second, exp) exp = pd.Series([s[0]] * 3, index=index, name='xxx') tm.assert_series_equal(s.dt.normalize(), exp) # periodindex cases = [ Series(period_range('20130101', periods=5, freq='D'), name='xxx') ] for s in cases: for prop in ok_for_period: # we test freq below if prop != 'freq': compare(s, prop) for prop in ok_for_period_methods: getattr(s.dt, prop) freq_result = s.dt.freq assert freq_result == PeriodIndex(s.values).freq # test limited display api def get_dir(s): results = [r for r in s.dt.__dir__() if not r.startswith('_')] return list(sorted(set(results))) s = Series(date_range('20130101', periods=5, freq='D'), name='xxx') results = get_dir(s) tm.assert_almost_equal( results, list(sorted(set(ok_for_dt + ok_for_dt_methods)))) s = Series( period_range('20130101', periods=5, freq='D', name='xxx').astype(object)) results = get_dir(s) tm.assert_almost_equal( results, list(sorted(set(ok_for_period + ok_for_period_methods)))) # 11295 # ambiguous time error on the conversions s = Series(pd.date_range('2015-01-01', '2016-01-01', freq='T'), name='xxx') s = s.dt.tz_localize('UTC').dt.tz_convert('America/Chicago') results = get_dir(s) tm.assert_almost_equal( results, list(sorted(set(ok_for_dt + ok_for_dt_methods)))) exp_values = pd.date_range('2015-01-01', '2016-01-01', freq='T', tz='UTC').tz_convert('America/Chicago') expected = Series(exp_values, name='xxx') tm.assert_series_equal(s, expected) # no setting allowed s = Series(date_range('20130101', periods=5, freq='D'), name='xxx') with pytest.raises(ValueError, match="modifications"): s.dt.hour = 5 # trying to set a copy with pd.option_context('chained_assignment', 'raise'): with pytest.raises(com.SettingWithCopyError): s.dt.hour[0] = 5
def test_tdi_sub_timestamp_raises(self): idx = TimedeltaIndex(['1 day', '2 day']) msg = "cannot subtract a datelike from a TimedeltaIndex" with tm.assert_raises_regex(TypeError, msg): idx - Timestamp('2011-01-01')
def test_unit_m_y_raises(self, unit): msg = "Units 'M', 'Y', and 'y' are no longer supported" with pytest.raises(ValueError, match=msg): TimedeltaIndex([1, 3, 7], unit)
def test_tdi_radd_timestamp(self): idx = TimedeltaIndex(['1 day', '2 day']) result = Timestamp('2011-01-01') + idx expected = DatetimeIndex(['2011-01-02', '2011-01-03']) tm.assert_index_equal(result, expected)
def test_float64_unit_conversion(self): # GH#23539 tdi = TimedeltaIndex([1.5, 2.25], unit="D") expected = TimedeltaIndex([Timedelta(days=1.5), Timedelta(days=2.25)]) tm.assert_index_equal(tdi, expected)
def test_tdi_div_tdlike_scalar_with_nat(self, delta): rng = TimedeltaIndex(['1 days', pd.NaT, '2 days'], name='foo') expected = Float64Index([12, np.nan, 24], name='foo') result = rng / delta tm.assert_index_equal(result, expected)
def test_to_timedelta(self): def conv(v): return v.astype('m8[ns]') d1 = np.timedelta64(1, 'D') assert (to_timedelta('1 days 06:05:01.00003', box=False) == conv(d1 + np.timedelta64(6 * 3600 + 5 * 60 + 1, 's') + np.timedelta64(30, 'us'))) assert (to_timedelta('15.5us', box=False) == conv(np.timedelta64(15500, 'ns'))) # empty string result = to_timedelta('', box=False) assert result.astype('int64') == iNaT result = to_timedelta(['', '']) assert isna(result).all() # pass thru result = to_timedelta(np.array([np.timedelta64(1, 's')])) expected = pd.Index(np.array([np.timedelta64(1, 's')])) tm.assert_index_equal(result, expected) # ints result = np.timedelta64(0, 'ns') expected = to_timedelta(0, box=False) assert result == expected # Series expected = Series([timedelta(days=1), timedelta(days=1, seconds=1)]) result = to_timedelta(Series(['1d', '1days 00:00:01'])) tm.assert_series_equal(result, expected) # with units result = TimedeltaIndex([np.timedelta64(0, 'ns'), np.timedelta64( 10, 's').astype('m8[ns]')]) expected = to_timedelta([0, 10], unit='s') tm.assert_index_equal(result, expected) # single element conversion v = timedelta(seconds=1) result = to_timedelta(v, box=False) expected = np.timedelta64(timedelta(seconds=1)) assert result == expected v = np.timedelta64(timedelta(seconds=1)) result = to_timedelta(v, box=False) expected = np.timedelta64(timedelta(seconds=1)) assert result == expected # arrays of various dtypes arr = np.array([1] * 5, dtype='int64') result = to_timedelta(arr, unit='s') expected = TimedeltaIndex([np.timedelta64(1, 's')] * 5) tm.assert_index_equal(result, expected) arr = np.array([1] * 5, dtype='int64') result = to_timedelta(arr, unit='m') expected = TimedeltaIndex([np.timedelta64(1, 'm')] * 5) tm.assert_index_equal(result, expected) arr = np.array([1] * 5, dtype='int64') result = to_timedelta(arr, unit='h') expected = TimedeltaIndex([np.timedelta64(1, 'h')] * 5) tm.assert_index_equal(result, expected) arr = np.array([1] * 5, dtype='timedelta64[s]') result = to_timedelta(arr) expected = TimedeltaIndex([np.timedelta64(1, 's')] * 5) tm.assert_index_equal(result, expected) arr = np.array([1] * 5, dtype='timedelta64[D]') result = to_timedelta(arr) expected = TimedeltaIndex([np.timedelta64(1, 'D')] * 5) tm.assert_index_equal(result, expected) # Test with lists as input when box=false expected = np.array(np.arange(3) * 1000000000, dtype='timedelta64[ns]') result = to_timedelta(range(3), unit='s', box=False) tm.assert_numpy_array_equal(expected, result) result = to_timedelta(np.arange(3), unit='s', box=False) tm.assert_numpy_array_equal(expected, result) result = to_timedelta([0, 1, 2], unit='s', box=False) tm.assert_numpy_array_equal(expected, result) # Tests with fractional seconds as input: expected = np.array( [0, 500000000, 800000000, 1200000000], dtype='timedelta64[ns]') result = to_timedelta([0., 0.5, 0.8, 1.2], unit='s', box=False) tm.assert_numpy_array_equal(expected, result)
def test_insert_nat(self, null): # GH 18295 (test missing) idx = timedelta_range("1day", "3day") result = idx.insert(1, null) expected = TimedeltaIndex(["1day", pd.NaT, "2day", "3day"]) tm.assert_index_equal(result, expected)