def test_difference_sort(self, sort): index = pd.TimedeltaIndex(["5 days", "3 days", "2 days", "4 days", "1 days", "0 days"]) other = timedelta_range("1 days", "4 days", freq="D") idx_diff = index.difference(other, sort) expected = TimedeltaIndex(["5 days", "0 days"], freq=None) if sort is None: expected = expected.sort_values() tm.assert_index_equal(idx_diff, expected) tm.assert_attr_equal('freq', idx_diff, expected) other = timedelta_range("2 days", "5 days", freq="D") idx_diff = index.difference(other, sort) expected = TimedeltaIndex(["1 days", "0 days"], freq=None) if sort is None: expected = expected.sort_values() tm.assert_index_equal(idx_diff, expected) tm.assert_attr_equal('freq', idx_diff, expected)
def test_get_loc_nat(self): tidx = TimedeltaIndex(['1 days 01:00:00', 'NaT', '2 days 01:00:00']) self.assertEqual(tidx.get_loc(pd.NaT), 1) self.assertEqual(tidx.get_loc(None), 1) self.assertEqual(tidx.get_loc(float('nan')), 1) self.assertEqual(tidx.get_loc(np.nan), 1)
def test_get_duplicates(self): idx = TimedeltaIndex(['1 day', '2 day', '2 day', '3 day', '3day', '4day']) result = idx.get_duplicates() ex = TimedeltaIndex(['2 day', '3day']) self.assert_index_equal(result, ex)
def test_value_counts_unique(self): # GH 7735 idx = timedelta_range('1 days 09:00:00', freq='H', periods=10) # create repeated values, 'n'th element is repeated by n+1 times idx = TimedeltaIndex(np.repeat(idx.values, range(1, len(idx) + 1))) exp_idx = timedelta_range('1 days 18:00:00', freq='-1H', periods=10) expected = Series(range(10, 0, -1), index=exp_idx, dtype='int64') for obj in [idx, Series(idx)]: tm.assert_series_equal(obj.value_counts(), expected) expected = timedelta_range('1 days 09:00:00', freq='H', periods=10) tm.assert_index_equal(idx.unique(), expected) idx = TimedeltaIndex(['1 days 09:00:00', '1 days 09:00:00', '1 days 09:00:00', '1 days 08:00:00', '1 days 08:00:00', pd.NaT]) exp_idx = TimedeltaIndex(['1 days 09:00:00', '1 days 08:00:00']) expected = Series([3, 2], index=exp_idx) for obj in [idx, Series(idx)]: tm.assert_series_equal(obj.value_counts(), expected) exp_idx = TimedeltaIndex(['1 days 09:00:00', '1 days 08:00:00', pd.NaT]) expected = Series([3, 2, 1], index=exp_idx) for obj in [idx, Series(idx)]: tm.assert_series_equal(obj.value_counts(dropna=False), expected) tm.assert_index_equal(idx.unique(), exp_idx)
def test_take_fill_value(self): # GH 12631 idx = TimedeltaIndex(['1 days', '2 days', '3 days'], name='xxx') result = idx.take(np.array([1, 0, -1])) expected = TimedeltaIndex(['2 days', '1 days', '3 days'], name='xxx') tm.assert_index_equal(result, expected) # fill_value result = idx.take(np.array([1, 0, -1]), fill_value=True) expected = TimedeltaIndex(['2 days', '1 days', 'NaT'], name='xxx') tm.assert_index_equal(result, expected) # allow_fill=False result = idx.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True) expected = TimedeltaIndex(['2 days', '1 days', '3 days'], name='xxx') tm.assert_index_equal(result, expected) msg = ('When allow_fill=True and fill_value is not None, ' 'all indices must be >= -1') with tm.assert_raises_regex(ValueError, msg): idx.take(np.array([1, 0, -2]), fill_value=True) with tm.assert_raises_regex(ValueError, msg): idx.take(np.array([1, 0, -5]), fill_value=True) with pytest.raises(IndexError): idx.take(np.array([1, -5]))
class TimedeltaIndexing(object): def setup(self): self.index = TimedeltaIndex(start='1985', periods=1000, freq='D') self.index2 = TimedeltaIndex(start='1986', periods=1000, freq='D') self.series = Series(range(1000), index=self.index) self.timedelta = self.index[500] def time_get_loc(self): self.index.get_loc(self.timedelta) def time_shape(self): self.index.shape def time_shallow_copy(self): self.index._shallow_copy() def time_series_loc(self): self.series.loc[self.timedelta] def time_align(self): DataFrame({'a': self.series, 'b': self.series[:500]}) def time_intersection(self): self.index.intersection(self.index2) def time_union(self): self.index.union(self.index2) def time_unique(self): self.index.unique()
def test_get_loc_nat(self): tidx = TimedeltaIndex(['1 days 01:00:00', 'NaT', '2 days 01:00:00']) assert tidx.get_loc(pd.NaT) == 1 assert tidx.get_loc(None) == 1 assert tidx.get_loc(float('nan')) == 1 assert tidx.get_loc(np.nan) == 1
def test_union_bug_1745(self): left = TimedeltaIndex(["1 day 15:19:49.695000"]) right = TimedeltaIndex(["2 day 13:04:21.322000", "1 day 15:27:24.873000", "1 day 15:31:05.350000"]) result = left.union(right) exp = TimedeltaIndex(sorted(set(list(left)) | set(list(right)))) self.assertTrue(result.equals(exp))
def test_astype_object_with_nat(self): idx = TimedeltaIndex([timedelta(days=1), timedelta(days=2), NaT, timedelta(days=4)], name='idx') expected_list = [Timedelta('1 days'), Timedelta('2 days'), NaT, Timedelta('4 days')] result = idx.astype(object) expected = Index(expected_list, dtype=object, name='idx') tm.assert_index_equal(result, expected) assert idx.tolist() == expected_list
def test_union_coverage(self): idx = TimedeltaIndex(["3d", "1d", "2d"]) ordered = TimedeltaIndex(idx.order(), freq="infer") result = ordered.union(idx) self.assertTrue(result.equals(ordered)) result = ordered[:0].union(ordered) self.assertTrue(result.equals(ordered)) self.assertEqual(result.freq, ordered.freq)
def test_get_duplicates(self): idx = TimedeltaIndex(['1 day', '2 day', '2 day', '3 day', '3day', '4day']) with warnings.catch_warnings(record=True): # Deprecated - see GH20239 result = idx.get_duplicates() ex = TimedeltaIndex(['2 day', '3day']) tm.assert_index_equal(result, ex)
def test_union_coverage(self): idx = TimedeltaIndex(['3d','1d','2d']) ordered = TimedeltaIndex(idx.order(), freq='infer') result = ordered.union(idx) self.assertTrue(result.equals(ordered)) result = ordered[:0].union(ordered) self.assertTrue(result.equals(ordered)) self.assertEqual(result.freq, ordered.freq)
def test_union_bug_1745(self): left = TimedeltaIndex(['1 day 15:19:49.695000']) right = TimedeltaIndex(['2 day 13:04:21.322000', '1 day 15:27:24.873000', '1 day 15:31:05.350000']) result = left.union(right) exp = TimedeltaIndex(sorted(set(list(left)) | set(list(right)))) tm.assert_index_equal(result, exp)
def test_get_duplicates(self): idx = TimedeltaIndex(['1 day', '2 day', '2 day', '3 day', '3day', '4day']) with tm.assert_produces_warning(FutureWarning): # Deprecated - see GH20239 result = idx.get_duplicates() ex = TimedeltaIndex(['2 day', '3day']) tm.assert_index_equal(result, ex)
def test_union_coverage(self): idx = TimedeltaIndex(['3d', '1d', '2d']) ordered = TimedeltaIndex(idx.sort_values(), freq='infer') result = ordered.union(idx) tm.assert_index_equal(result, ordered) result = ordered[:0].union(ordered) tm.assert_index_equal(result, ordered) assert result.freq == ordered.freq
def test_misc_coverage(self): rng = timedelta_range("1 day", periods=5) result = rng.groupby(rng.days) tm.assert_isinstance(list(result.values())[0][0], Timedelta) idx = TimedeltaIndex(["3d", "1d", "2d"]) self.assertTrue(idx.equals(list(idx))) non_td = Index(list("abc")) self.assertFalse(idx.equals(list(non_td)))
def test_misc_coverage(self): rng = timedelta_range('1 day', periods=5) result = rng.groupby(rng.days) tm.assertIsInstance(list(result.values())[0][0], Timedelta) idx = TimedeltaIndex(['3d', '1d', '2d']) self.assertFalse(idx.equals(list(idx))) non_td = Index(list('abc')) self.assertFalse(idx.equals(list(non_td)))
def test_tdi_floordiv_timedelta_scalar(self, scalar_td): # GH#19125 tdi = TimedeltaIndex(['00:05:03', '00:05:03', pd.NaT], freq=None) expected = pd.Index([2.0, 2.0, np.nan]) res = tdi.__rfloordiv__(scalar_td) tm.assert_index_equal(res, expected) expected = pd.Index([0.0, 0.0, np.nan]) res = tdi // (scalar_td) tm.assert_index_equal(res, expected)
def test_union(self): i1 = timedelta_range('1day', periods=5) i2 = timedelta_range('3day', periods=5) result = i1.union(i2) expected = timedelta_range('1day', periods=7) tm.assert_index_equal(result, expected) i1 = Int64Index(np.arange(0, 20, 2)) i2 = TimedeltaIndex(start='1 day', periods=10, freq='D') i1.union(i2) # Works i2.union(i1) # Fails with "AttributeError: can't set attribute"
def test_freq_setter(self, values, freq): # GH 20678 idx = TimedeltaIndex(values) # can set to an offset, converting from string if necessary idx.freq = freq assert idx.freq == freq assert isinstance(idx.freq, ABCDateOffset) # can reset to None idx.freq = None assert idx.freq is None
def test_union(self): i1 = timedelta_range("1day", periods=5) i2 = timedelta_range("3day", periods=5) result = i1.union(i2) expected = timedelta_range("1day", periods=7) self.assert_numpy_array_equal(result, expected) i1 = Int64Index(np.arange(0, 20, 2)) i2 = TimedeltaIndex(start="1 day", periods=10, freq="D") i1.union(i2) # Works i2.union(i1) # Fails with "AttributeError: can't set attribute"
def test_is_timedelta(): assert (com.is_timedelta64_dtype('timedelta64')) assert (com.is_timedelta64_dtype('timedelta64[ns]')) assert (not com.is_timedelta64_ns_dtype('timedelta64')) assert (com.is_timedelta64_ns_dtype('timedelta64[ns]')) tdi = TimedeltaIndex([1e14, 2e14], dtype='timedelta64') assert (com.is_timedelta64_dtype(tdi)) assert (com.is_timedelta64_ns_dtype(tdi)) assert (com.is_timedelta64_ns_dtype(tdi.astype('timedelta64[ns]'))) # Conversion to Int64Index: assert (not com.is_timedelta64_ns_dtype(tdi.astype('timedelta64'))) assert (not com.is_timedelta64_ns_dtype(tdi.astype('timedelta64[h]')))
def test_take2(self): tds = ['1day 02:00:00', '1 day 04:00:00', '1 day 10:00:00'] idx = TimedeltaIndex(start='1d', end='2d', freq='H', name='idx') expected = TimedeltaIndex(tds, freq=None, name='idx') taken1 = idx.take([2, 4, 10]) taken2 = idx[[2, 4, 10]] for taken in [taken1, taken2]: tm.assert_index_equal(taken, expected) assert isinstance(taken, TimedeltaIndex) assert taken.freq is None assert taken.name == expected.name
def test_insert(self): idx = TimedeltaIndex(['4day', '1day', '2day'], name='idx') result = idx.insert(2, timedelta(days=5)) exp = TimedeltaIndex(['4day', '1day', '5day', '2day'], name='idx') tm.assert_index_equal(result, exp) # insertion of non-datetime should coerce to object index result = idx.insert(1, 'inserted') expected = Index([Timedelta('4day'), 'inserted', Timedelta('1day'), Timedelta('2day')], name='idx') assert not isinstance(result, TimedeltaIndex) tm.assert_index_equal(result, expected) assert result.name == expected.name idx = timedelta_range('1day 00:00:01', periods=3, freq='s', name='idx') # preserve freq expected_0 = TimedeltaIndex(['1day', '1day 00:00:01', '1day 00:00:02', '1day 00:00:03'], name='idx', freq='s') expected_3 = TimedeltaIndex(['1day 00:00:01', '1day 00:00:02', '1day 00:00:03', '1day 00:00:04'], name='idx', freq='s') # reset freq to None expected_1_nofreq = TimedeltaIndex(['1day 00:00:01', '1day 00:00:01', '1day 00:00:02', '1day 00:00:03'], name='idx', freq=None) expected_3_nofreq = TimedeltaIndex(['1day 00:00:01', '1day 00:00:02', '1day 00:00:03', '1day 00:00:05'], name='idx', freq=None) cases = [(0, Timedelta('1day'), expected_0), (-3, Timedelta('1day'), expected_0), (3, Timedelta('1day 00:00:04'), expected_3), (1, Timedelta('1day 00:00:01'), expected_1_nofreq), (3, Timedelta('1day 00:00:05'), expected_3_nofreq)] for n, d, expected in cases: result = idx.insert(n, d) tm.assert_index_equal(result, expected) assert result.name == expected.name assert result.freq == expected.freq # GH 18295 (test missing) expected = TimedeltaIndex(['1day', pd.NaT, '2day', '3day']) for na in (np.nan, pd.NaT, None): result = timedelta_range('1day', '3day').insert(1, na) tm.assert_index_equal(result, expected)
def test_is_timedelta(self): self.assertTrue(is_timedelta64_dtype('timedelta64')) self.assertTrue(is_timedelta64_dtype('timedelta64[ns]')) self.assertFalse(is_timedelta64_ns_dtype('timedelta64')) self.assertTrue(is_timedelta64_ns_dtype('timedelta64[ns]')) tdi = TimedeltaIndex([1e14, 2e14], dtype='timedelta64') self.assertTrue(is_timedelta64_dtype(tdi)) self.assertTrue(is_timedelta64_ns_dtype(tdi)) self.assertTrue(is_timedelta64_ns_dtype(tdi.astype('timedelta64[ns]'))) # Conversion to Int64Index: self.assertFalse(is_timedelta64_ns_dtype(tdi.astype('timedelta64'))) self.assertFalse(is_timedelta64_ns_dtype(tdi.astype('timedelta64[h]')))
def test_take(self): tds = ['1day 02:00:00','1 day 04:00:00','1 day 10:00:00'] idx = TimedeltaIndex(start='1d',end='2d',freq='H',name='idx') expected = TimedeltaIndex(tds, freq=None, name='idx') taken1 = idx.take([2, 4, 10]) taken2 = idx[[2,4,10]] for taken in [taken1, taken2]: self.assertTrue(taken.equals(expected)) tm.assert_isinstance(taken, TimedeltaIndex) self.assertIsNone(taken.freq) self.assertEqual(taken.name, expected.name)
def test_is_timedelta(self): assert is_timedelta64_dtype('timedelta64') assert is_timedelta64_dtype('timedelta64[ns]') assert not is_timedelta64_ns_dtype('timedelta64') assert is_timedelta64_ns_dtype('timedelta64[ns]') tdi = TimedeltaIndex([1e14, 2e14], dtype='timedelta64') assert is_timedelta64_dtype(tdi) assert is_timedelta64_ns_dtype(tdi) assert is_timedelta64_ns_dtype(tdi.astype('timedelta64[ns]')) # Conversion to Int64Index: assert not is_timedelta64_ns_dtype(tdi.astype('timedelta64')) assert not is_timedelta64_ns_dtype(tdi.astype('timedelta64[h]'))
def test_take(self): tds = ["1day 02:00:00", "1 day 04:00:00", "1 day 10:00:00"] idx = TimedeltaIndex(start="1d", end="2d", freq="H", name="idx") expected = TimedeltaIndex(tds, freq=None, name="idx") taken1 = idx.take([2, 4, 10]) taken2 = idx[[2, 4, 10]] for taken in [taken1, taken2]: self.assertTrue(taken.equals(expected)) tm.assert_isinstance(taken, TimedeltaIndex) self.assertIsNone(taken.freq) self.assertEqual(taken.name, expected.name)
def test_repeat(self): index = pd.timedelta_range('1 days', periods=2, freq='D') exp = pd.TimedeltaIndex(['1 days', '1 days', '2 days', '2 days']) for res in [index.repeat(2), np.repeat(index, 2)]: tm.assert_index_equal(res, exp) self.assertIsNone(res.freq) index = TimedeltaIndex(['1 days', 'NaT', '3 days']) exp = TimedeltaIndex(['1 days', '1 days', '1 days', 'NaT', 'NaT', 'NaT', '3 days', '3 days', '3 days']) for res in [index.repeat(3), np.repeat(index, 3)]: tm.assert_index_equal(res, exp) self.assertIsNone(res.freq)
def test_astype_timedelta64(self): # GH 13149, GH 13209 idx = TimedeltaIndex([1e14, 'NaT', pd.NaT, np.NaN]) result = idx.astype('timedelta64') expected = Float64Index([1e+14] + [np.NaN] * 3, dtype='float64') tm.assert_index_equal(result, expected) result = idx.astype('timedelta64[ns]') tm.assert_index_equal(result, idx) self.assertFalse(result is idx) result = idx.astype('timedelta64[ns]', copy=False) tm.assert_index_equal(result, idx) self.assertTrue(result is idx)
def test_astype_raises(self, dtype): # GH 13149, GH 13209 idx = TimedeltaIndex([1e14, 'NaT', NaT, np.NaN]) msg = 'Cannot cast TimedeltaArray to dtype' with pytest.raises(TypeError, match=msg): idx.astype(dtype)
def test_insert(self): idx = TimedeltaIndex(["4day", "1day", "2day"], name="idx") result = idx.insert(2, timedelta(days=5)) exp = TimedeltaIndex(["4day", "1day", "5day", "2day"], name="idx") tm.assert_index_equal(result, exp) # insertion of non-datetime should coerce to object index result = idx.insert(1, "inserted") expected = Index( [ Timedelta("4day"), "inserted", Timedelta("1day"), Timedelta("2day") ], name="idx", ) assert not isinstance(result, TimedeltaIndex) tm.assert_index_equal(result, expected) assert result.name == expected.name idx = timedelta_range("1day 00:00:01", periods=3, freq="s", name="idx") # preserve freq expected_0 = TimedeltaIndex( ["1day", "1day 00:00:01", "1day 00:00:02", "1day 00:00:03"], name="idx", freq="s", ) expected_3 = TimedeltaIndex( [ "1day 00:00:01", "1day 00:00:02", "1day 00:00:03", "1day 00:00:04" ], name="idx", freq="s", ) # reset freq to None expected_1_nofreq = TimedeltaIndex( [ "1day 00:00:01", "1day 00:00:01", "1day 00:00:02", "1day 00:00:03" ], name="idx", freq=None, ) expected_3_nofreq = TimedeltaIndex( [ "1day 00:00:01", "1day 00:00:02", "1day 00:00:03", "1day 00:00:05" ], name="idx", freq=None, ) cases = [ (0, Timedelta("1day"), expected_0), (-3, Timedelta("1day"), expected_0), (3, Timedelta("1day 00:00:04"), expected_3), (1, Timedelta("1day 00:00:01"), expected_1_nofreq), (3, Timedelta("1day 00:00:05"), expected_3_nofreq), ] for n, d, expected in cases: result = idx.insert(n, d) tm.assert_index_equal(result, expected) assert result.name == expected.name assert result.freq == expected.freq
def test_insert_invalid_na(self): idx = TimedeltaIndex(["4day", "1day", "2day"], name="idx") with pytest.raises(TypeError, match="incompatible label"): idx.insert(0, np.datetime64("NaT"))
def test_take_fill_value(self): # GH 12631 idx = TimedeltaIndex(["1 days", "2 days", "3 days"], name="xxx") result = idx.take(np.array([1, 0, -1])) expected = TimedeltaIndex(["2 days", "1 days", "3 days"], name="xxx") tm.assert_index_equal(result, expected) # fill_value result = idx.take(np.array([1, 0, -1]), fill_value=True) expected = TimedeltaIndex(["2 days", "1 days", "NaT"], name="xxx") tm.assert_index_equal(result, expected) # allow_fill=False result = idx.take(np.array([1, 0, -1]), allow_fill=False, fill_value=True) expected = TimedeltaIndex(["2 days", "1 days", "3 days"], name="xxx") tm.assert_index_equal(result, expected) msg = ( "When allow_fill=True and fill_value is not None, " "all indices must be >= -1" ) with pytest.raises(ValueError, match=msg): idx.take(np.array([1, 0, -2]), fill_value=True) with pytest.raises(ValueError, match=msg): idx.take(np.array([1, 0, -5]), fill_value=True) msg = "index -5 is out of bounds for (axis 0 with )?size 3" with pytest.raises(IndexError, match=msg): idx.take(np.array([1, -5]))
@pytest.mark.parametrize( "op_name", [ "left_plus_right", "right_plus_left", "left_minus_right", "right_minus_left" ], ) @pytest.mark.parametrize( "value", [ DatetimeIndex(["2011-01-01", "2011-01-02"], name="x"), DatetimeIndex(["2011-01-01", "2011-01-02"], tz="US/Eastern", name="x"), DatetimeArray._from_sequence(["2011-01-01", "2011-01-02"]), DatetimeArray._from_sequence(["2011-01-01", "2011-01-02"], dtype=DatetimeTZDtype(tz="US/Pacific")), TimedeltaIndex(["1 day", "2 day"], name="x"), ], ) def test_nat_arithmetic_index(op_name, value): # see gh-11718 exp_name = "x" exp_data = [NaT] * 2 if is_datetime64_any_dtype(value.dtype) and "plus" in op_name: expected = DatetimeIndex(exp_data, tz=value.tz, name=exp_name) else: expected = TimedeltaIndex(exp_data, name=exp_name) if not isinstance(value, Index): expected = expected.array
def test_shift(self, datetime_series): shifted = datetime_series.shift(1) unshifted = shifted.shift(-1) tm.assert_index_equal(shifted.index, datetime_series.index) tm.assert_index_equal(unshifted.index, datetime_series.index) tm.assert_numpy_array_equal( unshifted.dropna().values, datetime_series.values[:-1] ) offset = BDay() shifted = datetime_series.shift(1, freq=offset) unshifted = shifted.shift(-1, freq=offset) tm.assert_series_equal(unshifted, datetime_series) unshifted = datetime_series.shift(0, freq=offset) tm.assert_series_equal(unshifted, datetime_series) shifted = datetime_series.shift(1, freq="B") unshifted = shifted.shift(-1, freq="B") tm.assert_series_equal(unshifted, datetime_series) # corner case unshifted = datetime_series.shift(0) tm.assert_series_equal(unshifted, datetime_series) # Shifting with PeriodIndex ps = tm.makePeriodSeries() shifted = ps.shift(1) unshifted = shifted.shift(-1) tm.assert_index_equal(shifted.index, ps.index) tm.assert_index_equal(unshifted.index, ps.index) tm.assert_numpy_array_equal(unshifted.dropna().values, ps.values[:-1]) shifted2 = ps.shift(1, "B") shifted3 = ps.shift(1, BDay()) tm.assert_series_equal(shifted2, shifted3) tm.assert_series_equal(ps, shifted2.shift(-1, "B")) msg = "Given freq D does not match PeriodIndex freq B" with pytest.raises(ValueError, match=msg): ps.shift(freq="D") # legacy support shifted4 = ps.shift(1, freq="B") tm.assert_series_equal(shifted2, shifted4) shifted5 = ps.shift(1, freq=BDay()) tm.assert_series_equal(shifted5, shifted4) # 32-bit taking # GH#8129 index = date_range("2000-01-01", periods=5) for dtype in ["int32", "int64"]: s1 = Series(np.arange(5, dtype=dtype), index=index) p = s1.iloc[1] result = s1.shift(periods=p) expected = Series([np.nan, 0, 1, 2, 3], index=index) tm.assert_series_equal(result, expected) # GH#8260 # with tz s = Series( date_range("2000-01-01 09:00:00", periods=5, tz="US/Eastern"), name="foo" ) result = s - s.shift() exp = Series(TimedeltaIndex(["NaT"] + ["1 days"] * 4), name="foo") tm.assert_series_equal(result, exp) # incompat tz s2 = Series(date_range("2000-01-01 09:00:00", periods=5, tz="CET"), name="foo") msg = "DatetimeArray subtraction must have the same timezones or no timezones" with pytest.raises(TypeError, match=msg): s - s2
def diff(arr, n, axis=0): """ difference of n between self, analagoust to s-s.shift(n) """ n = int(n) na = np.nan dtype = arr.dtype is_timedelta = False if needs_i8_conversion(arr): dtype = np.float64 arr = arr.view('i8') na = iNaT is_timedelta = True elif issubclass(dtype.type, np.integer): dtype = np.float64 elif issubclass(dtype.type, np.bool_): dtype = np.object_ dtype = np.dtype(dtype) out_arr = np.empty(arr.shape, dtype=dtype) na_indexer = [slice(None)] * arr.ndim na_indexer[axis] = slice(None, n) if n >= 0 else slice(n, None) out_arr[tuple(na_indexer)] = na if arr.ndim == 2 and arr.dtype.name in _diff_special: f = _diff_special[arr.dtype.name] f(arr, out_arr, n, axis) else: res_indexer = [slice(None)] * arr.ndim res_indexer[axis] = slice(n, None) if n >= 0 else slice(None, n) res_indexer = tuple(res_indexer) lag_indexer = [slice(None)] * arr.ndim lag_indexer[axis] = slice(None, -n) if n > 0 else slice(-n, None) lag_indexer = tuple(lag_indexer) # need to make sure that we account for na for datelike/timedelta # we don't actually want to subtract these i8 numbers if is_timedelta: res = arr[res_indexer] lag = arr[lag_indexer] mask = (arr[res_indexer] == na) | (arr[lag_indexer] == na) if mask.any(): res = res.copy() res[mask] = 0 lag = lag.copy() lag[mask] = 0 result = res - lag result[mask] = na out_arr[res_indexer] = result else: out_arr[res_indexer] = arr[res_indexer] - arr[lag_indexer] if is_timedelta: from pandas import TimedeltaIndex out_arr = TimedeltaIndex(out_arr.ravel().astype('int64')).asi8.reshape( out_arr.shape).astype('timedelta64[ns]') return out_arr
def test_shift_no_freq(self): # GH#19147 tdi = TimedeltaIndex(["1 days 01:00:00", "2 days 01:00:00"], freq=None) with pytest.raises(NullFrequencyError, match="Cannot shift with no freq"): tdi.shift(2)
def test_tdi_shift_empty(self): # GH#9903 idx = TimedeltaIndex([], name="xxx") tm.assert_index_equal(idx.shift(0, freq="H"), idx) tm.assert_index_equal(idx.shift(3, freq="H"), idx)
def test_subtraction_ops_with_tz(self): # check that dt/dti subtraction ops with tz are validated dti = date_range('20130101', periods=3) ts = Timestamp('20130101') dt = ts.to_pydatetime() dti_tz = date_range('20130101', periods=3).tz_localize('US/Eastern') ts_tz = Timestamp('20130101').tz_localize('US/Eastern') ts_tz2 = Timestamp('20130101').tz_localize('CET') dt_tz = ts_tz.to_pydatetime() td = Timedelta('1 days') def _check(result, expected): assert result == expected assert isinstance(result, Timedelta) # scalars result = ts - ts expected = Timedelta('0 days') _check(result, expected) result = dt_tz - ts_tz expected = Timedelta('0 days') _check(result, expected) result = ts_tz - dt_tz expected = Timedelta('0 days') _check(result, expected) # tz mismatches pytest.raises(TypeError, lambda: dt_tz - ts) pytest.raises(TypeError, lambda: dt_tz - dt) pytest.raises(TypeError, lambda: dt_tz - ts_tz2) pytest.raises(TypeError, lambda: dt - dt_tz) pytest.raises(TypeError, lambda: ts - dt_tz) pytest.raises(TypeError, lambda: ts_tz2 - ts) pytest.raises(TypeError, lambda: ts_tz2 - dt) pytest.raises(TypeError, lambda: ts_tz - ts_tz2) # with dti pytest.raises(TypeError, lambda: dti - ts_tz) pytest.raises(TypeError, lambda: dti_tz - ts) pytest.raises(TypeError, lambda: dti_tz - ts_tz2) result = dti_tz - dt_tz expected = TimedeltaIndex(['0 days', '1 days', '2 days']) tm.assert_index_equal(result, expected) result = dt_tz - dti_tz expected = TimedeltaIndex(['0 days', '-1 days', '-2 days']) tm.assert_index_equal(result, expected) result = dti_tz - ts_tz expected = TimedeltaIndex(['0 days', '1 days', '2 days']) tm.assert_index_equal(result, expected) result = ts_tz - dti_tz expected = TimedeltaIndex(['0 days', '-1 days', '-2 days']) tm.assert_index_equal(result, expected) result = td - td expected = Timedelta('0 days') _check(result, expected) result = dti_tz - td expected = DatetimeIndex( ['20121231', '20130101', '20130102'], tz='US/Eastern') tm.assert_index_equal(result, expected)
def test_argmin_argmax(self): idx = TimedeltaIndex( ["1 day 00:00:05", "1 day 00:00:01", "1 day 00:00:02"]) assert idx.argmin() == 1 assert idx.argmax() == 0
def test_tdi_div_tdlike_scalar_with_nat(self, delta): rng = TimedeltaIndex(['1 days', pd.NaT, '2 days'], name='foo') expected = Float64Index([12, np.nan, 24], name='foo') result = rng / delta tm.assert_index_equal(result, expected)
def test_tdi_radd_timestamp(self): idx = TimedeltaIndex(['1 day', '2 day']) result = Timestamp('2011-01-01') + idx expected = DatetimeIndex(['2011-01-02', '2011-01-03']) tm.assert_index_equal(result, expected)
def test_order(self): # GH 10295 idx1 = TimedeltaIndex(["1 day", "2 day", "3 day"], freq="D", name="idx") idx2 = TimedeltaIndex(["1 hour", "2 hour", "3 hour"], freq="H", name="idx") for idx in [idx1, idx2]: ordered = idx.sort_values() tm.assert_index_equal(ordered, idx) assert ordered.freq == idx.freq ordered = idx.sort_values(ascending=False) expected = idx[::-1] tm.assert_index_equal(ordered, expected) assert ordered.freq == expected.freq assert ordered.freq.n == -1 ordered, indexer = idx.sort_values(return_indexer=True) tm.assert_index_equal(ordered, idx) tm.assert_numpy_array_equal(indexer, np.array([0, 1, 2]), check_dtype=False) assert ordered.freq == idx.freq ordered, indexer = idx.sort_values(return_indexer=True, ascending=False) tm.assert_index_equal(ordered, idx[::-1]) assert ordered.freq == expected.freq assert ordered.freq.n == -1 idx1 = TimedeltaIndex( ["1 hour", "3 hour", "5 hour", "2 hour ", "1 hour"], name="idx1") exp1 = TimedeltaIndex( ["1 hour", "1 hour", "2 hour", "3 hour", "5 hour"], name="idx1") idx2 = TimedeltaIndex(["1 day", "3 day", "5 day", "2 day", "1 day"], name="idx2") for idx, expected in [(idx1, exp1), (idx1, exp1), (idx1, exp1)]: ordered = idx.sort_values() tm.assert_index_equal(ordered, expected) assert ordered.freq is None ordered = idx.sort_values(ascending=False) tm.assert_index_equal(ordered, expected[::-1]) assert ordered.freq is None ordered, indexer = idx.sort_values(return_indexer=True) tm.assert_index_equal(ordered, expected) exp = np.array([0, 4, 3, 1, 2]) tm.assert_numpy_array_equal(indexer, exp, check_dtype=False) assert ordered.freq is None ordered, indexer = idx.sort_values(return_indexer=True, ascending=False) tm.assert_index_equal(ordered, expected[::-1]) exp = np.array([2, 1, 3, 4, 0]) tm.assert_numpy_array_equal(indexer, exp, check_dtype=False) assert ordered.freq is None
import pandas.core.dtypes.concat as _concat from pandas import DatetimeIndex, Period, PeriodIndex, Series, TimedeltaIndex @pytest.mark.parametrize( "to_concat, expected", [ # int/float/str ([["a"], [1, 2]], ["i", "object"]), ([[3, 4], [1, 2]], ["i"]), ([[3, 4], [1, 2.1]], ["i", "f"]), # datetimelike ([DatetimeIndex(["2011-01-01"]), DatetimeIndex(["2011-01-02"])], ["datetime"]), ([TimedeltaIndex(["1 days"]), TimedeltaIndex(["2 days"])], ["timedelta"]), # datetimelike object ( [ DatetimeIndex(["2011-01-01"]), DatetimeIndex(["2011-01-02"], tz="US/Eastern"), ], ["datetime", "datetime64[ns, US/Eastern]"], ), ( [ DatetimeIndex(["2011-01-01"], tz="Asia/Tokyo"), DatetimeIndex(["2011-01-02"], tz="US/Eastern"), ], ["datetime64[ns, Asia/Tokyo]", "datetime64[ns, US/Eastern]"],
def test_value_counts_datetime64(self): klasses = [Index, Series] for klass in klasses: # GH 3002, datetime64[ns] # don't test names though txt = "\n".join([ 'xxyyzz20100101PIE', 'xxyyzz20100101GUM', 'xxyyzz20100101EGG', 'xxyyww20090101EGG', 'foofoo20080909PIE', 'foofoo20080909GUM' ]) f = StringIO(txt) df = pd.read_fwf(f, widths=[6, 8, 3], names=["person_id", "dt", "food"], parse_dates=["dt"]) s = klass(df['dt'].copy()) s.name = None idx = pd.to_datetime([ '2010-01-01 00:00:00Z', '2008-09-09 00:00:00Z', '2009-01-01 00:00:00X' ]) expected_s = Series([3, 2, 1], index=idx) tm.assert_series_equal(s.value_counts(), expected_s) expected = np_array_datetime64_compat([ '2010-01-01 00:00:00Z', '2009-01-01 00:00:00Z', '2008-09-09 00:00:00Z' ], dtype='datetime64[ns]') if isinstance(s, Index): tm.assert_index_equal(s.unique(), DatetimeIndex(expected)) else: tm.assert_numpy_array_equal(s.unique(), expected) self.assertEqual(s.nunique(), 3) # with NaT s = df['dt'].copy() s = klass([v for v in s.values] + [pd.NaT]) result = s.value_counts() self.assertEqual(result.index.dtype, 'datetime64[ns]') tm.assert_series_equal(result, expected_s) result = s.value_counts(dropna=False) expected_s[pd.NaT] = 1 tm.assert_series_equal(result, expected_s) unique = s.unique() self.assertEqual(unique.dtype, 'datetime64[ns]') # numpy_array_equal cannot compare pd.NaT if isinstance(s, Index): exp_idx = DatetimeIndex(expected.tolist() + [pd.NaT]) tm.assert_index_equal(unique, exp_idx) else: tm.assert_numpy_array_equal(unique[:3], expected) self.assertTrue(pd.isnull(unique[3])) self.assertEqual(s.nunique(), 3) self.assertEqual(s.nunique(dropna=False), 4) # timedelta64[ns] td = df.dt - df.dt + timedelta(1) td = klass(td, name='dt') result = td.value_counts() expected_s = Series([6], index=[Timedelta('1day')], name='dt') tm.assert_series_equal(result, expected_s) expected = TimedeltaIndex(['1 days'], name='dt') if isinstance(td, Index): tm.assert_index_equal(td.unique(), expected) else: tm.assert_numpy_array_equal(td.unique(), expected.values) td2 = timedelta(1) + (df.dt - df.dt) td2 = klass(td2, name='dt') result2 = td2.value_counts() tm.assert_series_equal(result2, expected_s)
def test_unit_m_y_raises(self, unit): msg = "Units 'M' and 'Y' are no longer supported" with pytest.raises(ValueError, match=msg): TimedeltaIndex([1, 3, 7], unit)
def test_value_counts_inferred(self): klasses = [Index, Series] for klass in klasses: s_values = ['a', 'b', 'b', 'b', 'b', 'c', 'd', 'd', 'a', 'a'] s = klass(s_values) expected = Series([4, 3, 2, 1], index=['b', 'a', 'd', 'c']) tm.assert_series_equal(s.value_counts(), expected) self.assert_numpy_array_equal(s.unique(), np.unique(s_values)) self.assertEqual(s.nunique(), 4) # don't sort, have to sort after the fact as not sorting is platform-dep hist = s.value_counts(sort=False) hist.sort() expected = Series([3, 1, 4, 2], index=list('acbd')) expected.sort() tm.assert_series_equal(hist, expected) # sort ascending hist = s.value_counts(ascending=True) expected = Series([1, 2, 3, 4], index=list('cdab')) tm.assert_series_equal(hist, expected) # relative histogram. hist = s.value_counts(normalize=True) expected = Series([.4, .3, .2, .1], index=['b', 'a', 'd', 'c']) tm.assert_series_equal(hist, expected) # bins self.assertRaises(TypeError, lambda bins: s.value_counts(bins=bins), 1) s1 = Series([1, 1, 2, 3]) res1 = s1.value_counts(bins=1) exp1 = Series({0.998: 4}) tm.assert_series_equal(res1, exp1) res1n = s1.value_counts(bins=1, normalize=True) exp1n = Series({0.998: 1.0}) tm.assert_series_equal(res1n, exp1n) self.assert_numpy_array_equal(s1.unique(), np.array([1, 2, 3])) self.assertEqual(s1.nunique(), 3) res4 = s1.value_counts(bins=4) exp4 = Series({ 0.998: 2, 1.5: 1, 2.0: 0, 2.5: 1 }, index=[0.998, 2.5, 1.5, 2.0]) tm.assert_series_equal(res4, exp4) res4n = s1.value_counts(bins=4, normalize=True) exp4n = Series({ 0.998: 0.5, 1.5: 0.25, 2.0: 0.0, 2.5: 0.25 }, index=[0.998, 2.5, 1.5, 2.0]) tm.assert_series_equal(res4n, exp4n) # handle NA's properly s_values = [ 'a', 'b', 'b', 'b', np.nan, np.nan, 'd', 'd', 'a', 'a', 'b' ] s = klass(s_values) expected = Series([4, 3, 2], index=['b', 'a', 'd']) tm.assert_series_equal(s.value_counts(), expected) self.assert_numpy_array_equal( s.unique(), np.array(['a', 'b', np.nan, 'd'], dtype='O')) self.assertEqual(s.nunique(), 3) s = klass({}) expected = Series([], dtype=np.int64) tm.assert_series_equal(s.value_counts(), expected) self.assert_numpy_array_equal(s.unique(), np.array([])) self.assertEqual(s.nunique(), 0) # GH 3002, datetime64[ns] txt = "\n".join([ 'xxyyzz20100101PIE', 'xxyyzz20100101GUM', 'xxyyzz20100101EGG', 'xxyyww20090101EGG', 'foofoo20080909PIE', 'foofoo20080909GUM' ]) f = StringIO(txt) df = pd.read_fwf(f, widths=[6, 8, 3], names=["person_id", "dt", "food"], parse_dates=["dt"]) s = klass(df['dt'].copy()) idx = pd.to_datetime([ '2010-01-01 00:00:00Z', '2008-09-09 00:00:00Z', '2009-01-01 00:00:00X' ]) expected_s = Series([3, 2, 1], index=idx) tm.assert_series_equal(s.value_counts(), expected_s) expected = np.array([ '2010-01-01 00:00:00Z', '2009-01-01 00:00:00Z', '2008-09-09 00:00:00Z' ], dtype='datetime64[ns]') if isinstance(s, DatetimeIndex): expected = DatetimeIndex(expected) self.assertTrue(s.unique().equals(expected)) else: self.assert_numpy_array_equal(s.unique(), expected) self.assertEqual(s.nunique(), 3) # with NaT s = df['dt'].copy() s = klass([v for v in s.values] + [pd.NaT]) result = s.value_counts() self.assertEqual(result.index.dtype, 'datetime64[ns]') tm.assert_series_equal(result, expected_s) result = s.value_counts(dropna=False) expected_s[pd.NaT] = 1 tm.assert_series_equal(result, expected_s) unique = s.unique() self.assertEqual(unique.dtype, 'datetime64[ns]') # numpy_array_equal cannot compare pd.NaT self.assert_numpy_array_equal(unique[:3], expected) self.assertTrue(unique[3] is pd.NaT or unique[3].astype('int64') == pd.tslib.iNaT) self.assertEqual(s.nunique(), 3) self.assertEqual(s.nunique(dropna=False), 4) # timedelta64[ns] td = df.dt - df.dt + timedelta(1) td = klass(td) result = td.value_counts() expected_s = Series([6], index=[Timedelta('1day')]) tm.assert_series_equal(result, expected_s) expected = TimedeltaIndex(['1 days']) if isinstance(td, TimedeltaIndex): self.assertTrue(td.unique().equals(expected)) else: self.assert_numpy_array_equal(td.unique(), expected.values) td2 = timedelta(1) + (df.dt - df.dt) td2 = klass(td2) result2 = td2.value_counts() tm.assert_series_equal(result2, expected_s)
def test_order(self): # GH 10295 idx1 = TimedeltaIndex(['1 day', '2 day', '3 day'], freq='D', name='idx') idx2 = TimedeltaIndex(['1 hour', '2 hour', '3 hour'], freq='H', name='idx') for idx in [idx1, idx2]: ordered = idx.sort_values() tm.assert_index_equal(ordered, idx) assert ordered.freq == idx.freq ordered = idx.sort_values(ascending=False) expected = idx[::-1] tm.assert_index_equal(ordered, expected) assert ordered.freq == expected.freq assert ordered.freq.n == -1 ordered, indexer = idx.sort_values(return_indexer=True) tm.assert_index_equal(ordered, idx) tm.assert_numpy_array_equal(indexer, np.array([0, 1, 2]), check_dtype=False) assert ordered.freq == idx.freq ordered, indexer = idx.sort_values(return_indexer=True, ascending=False) tm.assert_index_equal(ordered, idx[::-1]) assert ordered.freq == expected.freq assert ordered.freq.n == -1 idx1 = TimedeltaIndex( ['1 hour', '3 hour', '5 hour', '2 hour ', '1 hour'], name='idx1') exp1 = TimedeltaIndex( ['1 hour', '1 hour', '2 hour', '3 hour', '5 hour'], name='idx1') idx2 = TimedeltaIndex(['1 day', '3 day', '5 day', '2 day', '1 day'], name='idx2') # TODO(wesm): unused? # exp2 = TimedeltaIndex(['1 day', '1 day', '2 day', # '3 day', '5 day'], name='idx2') # idx3 = TimedeltaIndex([pd.NaT, '3 minute', '5 minute', # '2 minute', pd.NaT], name='idx3') # exp3 = TimedeltaIndex([pd.NaT, pd.NaT, '2 minute', '3 minute', # '5 minute'], name='idx3') for idx, expected in [(idx1, exp1), (idx1, exp1), (idx1, exp1)]: ordered = idx.sort_values() tm.assert_index_equal(ordered, expected) assert ordered.freq is None ordered = idx.sort_values(ascending=False) tm.assert_index_equal(ordered, expected[::-1]) assert ordered.freq is None ordered, indexer = idx.sort_values(return_indexer=True) tm.assert_index_equal(ordered, expected) exp = np.array([0, 4, 3, 1, 2]) tm.assert_numpy_array_equal(indexer, exp, check_dtype=False) assert ordered.freq is None ordered, indexer = idx.sort_values(return_indexer=True, ascending=False) tm.assert_index_equal(ordered, expected[::-1]) exp = np.array([2, 1, 3, 4, 0]) tm.assert_numpy_array_equal(indexer, exp, check_dtype=False) assert ordered.freq is None
def test_infer_freq(self, freq_sample): # GH#11018 idx = pd.timedelta_range("1", freq=freq_sample, periods=10) result = TimedeltaIndex(idx.asi8, freq="infer") tm.assert_index_equal(idx, result) assert result.freq == freq_sample
def test_dt_namespace_accessor(self): # GH 7207, 11128 # test .dt namespace accessor ok_for_period = PeriodIndex._datetimelike_ops ok_for_period_methods = ['strftime', 'to_timestamp', 'asfreq'] ok_for_dt = DatetimeIndex._datetimelike_ops ok_for_dt_methods = [ 'to_period', 'to_pydatetime', 'tz_localize', 'tz_convert', 'normalize', 'strftime', 'round', 'floor', 'ceil', 'weekday_name' ] ok_for_td = TimedeltaIndex._datetimelike_ops ok_for_td_methods = [ 'components', 'to_pytimedelta', 'total_seconds', 'round', 'floor', 'ceil' ] def get_expected(s, name): result = getattr(Index(s._values), prop) if isinstance(result, np.ndarray): if is_integer_dtype(result): result = result.astype('int64') elif not is_list_like(result): return result return Series(result, index=s.index, name=s.name) def compare(s, name): a = getattr(s.dt, prop) b = get_expected(s, prop) if not (is_list_like(a) and is_list_like(b)): self.assertEqual(a, b) else: tm.assert_series_equal(a, b) # datetimeindex cases = [ Series(date_range('20130101', periods=5), name='xxx'), Series(date_range('20130101', periods=5, freq='s'), name='xxx'), Series(date_range('20130101 00:00:00', periods=5, freq='ms'), name='xxx') ] for s in cases: for prop in ok_for_dt: # we test freq below if prop != 'freq': compare(s, prop) for prop in ok_for_dt_methods: getattr(s.dt, prop) result = s.dt.to_pydatetime() self.assertIsInstance(result, np.ndarray) self.assertTrue(result.dtype == object) result = s.dt.tz_localize('US/Eastern') exp_values = DatetimeIndex(s.values).tz_localize('US/Eastern') expected = Series(exp_values, index=s.index, name='xxx') tm.assert_series_equal(result, expected) tz_result = result.dt.tz self.assertEqual(str(tz_result), 'US/Eastern') freq_result = s.dt.freq self.assertEqual(freq_result, DatetimeIndex(s.values, freq='infer').freq) # let's localize, then convert result = s.dt.tz_localize('UTC').dt.tz_convert('US/Eastern') exp_values = (DatetimeIndex( s.values).tz_localize('UTC').tz_convert('US/Eastern')) expected = Series(exp_values, index=s.index, name='xxx') tm.assert_series_equal(result, expected) # round s = Series(pd.to_datetime([ '2012-01-01 13:00:00', '2012-01-01 12:01:00', '2012-01-01 08:00:00' ]), name='xxx') result = s.dt.round('D') expected = Series(pd.to_datetime( ['2012-01-02', '2012-01-02', '2012-01-01']), name='xxx') tm.assert_series_equal(result, expected) # round with tz result = ( s.dt.tz_localize('UTC').dt.tz_convert('US/Eastern').dt.round('D')) exp_values = pd.to_datetime(['2012-01-01', '2012-01-01', '2012-01-01']).tz_localize('US/Eastern') expected = Series(exp_values, name='xxx') tm.assert_series_equal(result, expected) # floor s = Series(pd.to_datetime([ '2012-01-01 13:00:00', '2012-01-01 12:01:00', '2012-01-01 08:00:00' ]), name='xxx') result = s.dt.floor('D') expected = Series(pd.to_datetime( ['2012-01-01', '2012-01-01', '2012-01-01']), name='xxx') tm.assert_series_equal(result, expected) # ceil s = Series(pd.to_datetime([ '2012-01-01 13:00:00', '2012-01-01 12:01:00', '2012-01-01 08:00:00' ]), name='xxx') result = s.dt.ceil('D') expected = Series(pd.to_datetime( ['2012-01-02', '2012-01-02', '2012-01-02']), name='xxx') tm.assert_series_equal(result, expected) # datetimeindex with tz s = Series(date_range('20130101', periods=5, tz='US/Eastern'), name='xxx') for prop in ok_for_dt: # we test freq below if prop != 'freq': compare(s, prop) for prop in ok_for_dt_methods: getattr(s.dt, prop) result = s.dt.to_pydatetime() self.assertIsInstance(result, np.ndarray) self.assertTrue(result.dtype == object) result = s.dt.tz_convert('CET') expected = Series(s._values.tz_convert('CET'), index=s.index, name='xxx') tm.assert_series_equal(result, expected) tz_result = result.dt.tz self.assertEqual(str(tz_result), 'CET') freq_result = s.dt.freq self.assertEqual(freq_result, DatetimeIndex(s.values, freq='infer').freq) # timedeltaindex cases = [ Series(timedelta_range('1 day', periods=5), index=list('abcde'), name='xxx'), Series(timedelta_range('1 day 01:23:45', periods=5, freq='s'), name='xxx'), Series(timedelta_range('2 days 01:23:45.012345', periods=5, freq='ms'), name='xxx') ] for s in cases: for prop in ok_for_td: # we test freq below if prop != 'freq': compare(s, prop) for prop in ok_for_td_methods: getattr(s.dt, prop) result = s.dt.components self.assertIsInstance(result, DataFrame) tm.assert_index_equal(result.index, s.index) result = s.dt.to_pytimedelta() self.assertIsInstance(result, np.ndarray) self.assertTrue(result.dtype == object) result = s.dt.total_seconds() self.assertIsInstance(result, pd.Series) self.assertTrue(result.dtype == 'float64') freq_result = s.dt.freq self.assertEqual(freq_result, TimedeltaIndex(s.values, freq='infer').freq) # both index = date_range('20130101', periods=3, freq='D') s = Series(date_range('20140204', periods=3, freq='s'), index=index, name='xxx') exp = Series(np.array([2014, 2014, 2014], dtype='int64'), index=index, name='xxx') tm.assert_series_equal(s.dt.year, exp) exp = Series(np.array([2, 2, 2], dtype='int64'), index=index, name='xxx') tm.assert_series_equal(s.dt.month, exp) exp = Series(np.array([0, 1, 2], dtype='int64'), index=index, name='xxx') tm.assert_series_equal(s.dt.second, exp) exp = pd.Series([s[0]] * 3, index=index, name='xxx') tm.assert_series_equal(s.dt.normalize(), exp) # periodindex cases = [ Series(period_range('20130101', periods=5, freq='D'), name='xxx') ] for s in cases: for prop in ok_for_period: # we test freq below if prop != 'freq': compare(s, prop) for prop in ok_for_period_methods: getattr(s.dt, prop) freq_result = s.dt.freq self.assertEqual(freq_result, PeriodIndex(s.values).freq) # test limited display api def get_dir(s): results = [r for r in s.dt.__dir__() if not r.startswith('_')] return list(sorted(set(results))) s = Series(date_range('20130101', periods=5, freq='D'), name='xxx') results = get_dir(s) tm.assert_almost_equal( results, list(sorted(set(ok_for_dt + ok_for_dt_methods)))) s = Series( period_range('20130101', periods=5, freq='D', name='xxx').asobject) results = get_dir(s) tm.assert_almost_equal( results, list(sorted(set(ok_for_period + ok_for_period_methods)))) # 11295 # ambiguous time error on the conversions s = Series(pd.date_range('2015-01-01', '2016-01-01', freq='T'), name='xxx') s = s.dt.tz_localize('UTC').dt.tz_convert('America/Chicago') results = get_dir(s) tm.assert_almost_equal( results, list(sorted(set(ok_for_dt + ok_for_dt_methods)))) exp_values = pd.date_range('2015-01-01', '2016-01-01', freq='T', tz='UTC').tz_convert('America/Chicago') expected = Series(exp_values, name='xxx') tm.assert_series_equal(s, expected) # no setting allowed s = Series(date_range('20130101', periods=5, freq='D'), name='xxx') with tm.assertRaisesRegexp(ValueError, "modifications"): s.dt.hour = 5 # trying to set a copy with pd.option_context('chained_assignment', 'raise'): def f(): s.dt.hour[0] = 5 self.assertRaises(com.SettingWithCopyError, f)
def test_insert_nat(self, null): # GH 18295 (test missing) idx = timedelta_range("1day", "3day") result = idx.insert(1, null) expected = TimedeltaIndex(["1day", pd.NaT, "2day", "3day"]) tm.assert_index_equal(result, expected)
def test_value_counts_datetime64(self, index_or_series): klass = index_or_series # GH 3002, datetime64[ns] # don't test names though txt = "\n".join([ "xxyyzz20100101PIE", "xxyyzz20100101GUM", "xxyyzz20100101EGG", "xxyyww20090101EGG", "foofoo20080909PIE", "foofoo20080909GUM", ]) f = StringIO(txt) df = pd.read_fwf(f, widths=[6, 8, 3], names=["person_id", "dt", "food"], parse_dates=["dt"]) s = klass(df["dt"].copy()) s.name = None idx = pd.to_datetime([ "2010-01-01 00:00:00", "2008-09-09 00:00:00", "2009-01-01 00:00:00" ]) expected_s = Series([3, 2, 1], index=idx) tm.assert_series_equal(s.value_counts(), expected_s) expected = np_array_datetime64_compat( [ "2010-01-01 00:00:00", "2009-01-01 00:00:00", "2008-09-09 00:00:00" ], dtype="datetime64[ns]", ) if isinstance(s, Index): tm.assert_index_equal(s.unique(), DatetimeIndex(expected)) else: tm.assert_numpy_array_equal(s.unique(), expected) assert s.nunique() == 3 # with NaT s = df["dt"].copy() s = klass(list(s.values) + [pd.NaT]) result = s.value_counts() assert result.index.dtype == "datetime64[ns]" tm.assert_series_equal(result, expected_s) result = s.value_counts(dropna=False) expected_s[pd.NaT] = 1 tm.assert_series_equal(result, expected_s) unique = s.unique() assert unique.dtype == "datetime64[ns]" # numpy_array_equal cannot compare pd.NaT if isinstance(s, Index): exp_idx = DatetimeIndex(expected.tolist() + [pd.NaT]) tm.assert_index_equal(unique, exp_idx) else: tm.assert_numpy_array_equal(unique[:3], expected) assert pd.isna(unique[3]) assert s.nunique() == 3 assert s.nunique(dropna=False) == 4 # timedelta64[ns] td = df.dt - df.dt + timedelta(1) td = klass(td, name="dt") result = td.value_counts() expected_s = Series([6], index=[Timedelta("1day")], name="dt") tm.assert_series_equal(result, expected_s) expected = TimedeltaIndex(["1 days"], name="dt") if isinstance(td, Index): tm.assert_index_equal(td.unique(), expected) else: tm.assert_numpy_array_equal(td.unique(), expected.values) td2 = timedelta(1) + (df.dt - df.dt) td2 = klass(td2, name="dt") result2 = td2.value_counts() tm.assert_series_equal(result2, expected_s)
tm.assert_numpy_array_equal(result, expected) def test_strftime_nat(self): # GH 29578 arr = PeriodArray(PeriodIndex(["2019-01-01", NaT], dtype="period[D]")) result = arr.strftime("%Y-%m-%d") expected = np.array(["2019-01-01", np.nan], dtype=object) tm.assert_numpy_array_equal(result, expected) @pytest.mark.parametrize( "arr,casting_nats", [ ( TimedeltaIndex(["1 Day", "3 Hours", "NaT"])._data, (NaT, np.timedelta64("NaT", "ns")), ), ( pd.date_range("2000-01-01", periods=3, freq="D")._data, (NaT, np.datetime64("NaT", "ns")), ), (pd.period_range("2000-01-01", periods=3, freq="D")._data, (NaT, )), ], ids=lambda x: type(x).__name__, ) def test_casting_nat_setitem_array(arr, casting_nats): expected = type(arr)._from_sequence([NaT, arr[1], arr[2]]) for nat in casting_nats: arr = arr.copy()
def test_round(self): t1 = Timedelta('1 days 02:34:56.789123456') t2 = Timedelta('-1 days 02:34:56.789123456') for (freq, s1, s2) in [ ('N', t1, t2), ('U', Timedelta('1 days 02:34:56.789123000'), Timedelta('-1 days 02:34:56.789123000')), ('L', Timedelta('1 days 02:34:56.789000000'), Timedelta('-1 days 02:34:56.789000000')), ('S', Timedelta('1 days 02:34:57'), Timedelta('-1 days 02:34:57')), ('2S', Timedelta('1 days 02:34:56'), Timedelta('-1 days 02:34:56')), ('5S', Timedelta('1 days 02:34:55'), Timedelta('-1 days 02:34:55')), ('T', Timedelta('1 days 02:35:00'), Timedelta('-1 days 02:35:00')), ('12T', Timedelta('1 days 02:36:00'), Timedelta('-1 days 02:36:00')), ('H', Timedelta('1 days 03:00:00'), Timedelta('-1 days 03:00:00')), ('d', Timedelta('1 days'), Timedelta('-1 days')) ]: r1 = t1.round(freq) assert r1 == s1 r2 = t2.round(freq) assert r2 == s2 # invalid for freq in ['Y', 'M', 'foobar']: pytest.raises(ValueError, lambda: t1.round(freq)) t1 = timedelta_range('1 days', periods=3, freq='1 min 2 s 3 us') t2 = -1 * t1 t1a = timedelta_range('1 days', periods=3, freq='1 min 2 s') t1c = pd.TimedeltaIndex([1, 1, 1], unit='D') # note that negative times round DOWN! so don't give whole numbers for (freq, s1, s2) in [ ('N', t1, t2), ('U', t1, t2), ('L', t1a, TimedeltaIndex([ '-1 days +00:00:00', '-2 days +23:58:58', '-2 days +23:57:56' ], dtype='timedelta64[ns]', freq=None)), ('S', t1a, TimedeltaIndex([ '-1 days +00:00:00', '-2 days +23:58:58', '-2 days +23:57:56' ], dtype='timedelta64[ns]', freq=None)), ('12T', t1c, TimedeltaIndex(['-1 days', '-1 days', '-1 days'], dtype='timedelta64[ns]', freq=None)), ('H', t1c, TimedeltaIndex(['-1 days', '-1 days', '-1 days'], dtype='timedelta64[ns]', freq=None)), ('d', t1c, pd.TimedeltaIndex([-1, -1, -1], unit='D')) ]: r1 = t1.round(freq) tm.assert_index_equal(r1, s1) r2 = t2.round(freq) tm.assert_index_equal(r2, s2) # invalid for freq in ['Y', 'M', 'foobar']: pytest.raises(ValueError, lambda: t1.round(freq))
def test_dt_namespace_accessor(self): # GH 7207, 11128 # test .dt namespace accessor ok_for_period = PeriodArray._datetimelike_ops ok_for_period_methods = ["strftime", "to_timestamp", "asfreq"] ok_for_dt = DatetimeIndex._datetimelike_ops ok_for_dt_methods = [ "to_period", "to_pydatetime", "tz_localize", "tz_convert", "normalize", "strftime", "round", "floor", "ceil", "day_name", "month_name", "isocalendar", ] ok_for_td = TimedeltaArray._datetimelike_ops ok_for_td_methods = [ "components", "to_pytimedelta", "total_seconds", "round", "floor", "ceil", ] def get_expected(s, name): result = getattr(Index(s._values), prop) if isinstance(result, np.ndarray): if is_integer_dtype(result): result = result.astype("int64") elif not is_list_like(result) or isinstance(result, DataFrame): return result return Series(result, index=s.index, name=s.name) def compare(s, name): a = getattr(s.dt, prop) b = get_expected(s, prop) if not (is_list_like(a) and is_list_like(b)): assert a == b elif isinstance(a, DataFrame): tm.assert_frame_equal(a, b) else: tm.assert_series_equal(a, b) # datetimeindex cases = [ Series(date_range("20130101", periods=5), name="xxx"), Series(date_range("20130101", periods=5, freq="s"), name="xxx"), Series(date_range("20130101 00:00:00", periods=5, freq="ms"), name="xxx"), ] for s in cases: for prop in ok_for_dt: # we test freq below # we ignore week and weekofyear because they are deprecated if prop not in ["freq", "week", "weekofyear"]: compare(s, prop) for prop in ok_for_dt_methods: getattr(s.dt, prop) result = s.dt.to_pydatetime() assert isinstance(result, np.ndarray) assert result.dtype == object result = s.dt.tz_localize("US/Eastern") exp_values = DatetimeIndex(s.values).tz_localize("US/Eastern") expected = Series(exp_values, index=s.index, name="xxx") tm.assert_series_equal(result, expected) tz_result = result.dt.tz assert str(tz_result) == "US/Eastern" freq_result = s.dt.freq assert freq_result == DatetimeIndex(s.values, freq="infer").freq # let's localize, then convert result = s.dt.tz_localize("UTC").dt.tz_convert("US/Eastern") exp_values = (DatetimeIndex( s.values).tz_localize("UTC").tz_convert("US/Eastern")) expected = Series(exp_values, index=s.index, name="xxx") tm.assert_series_equal(result, expected) # datetimeindex with tz s = Series(date_range("20130101", periods=5, tz="US/Eastern"), name="xxx") for prop in ok_for_dt: # we test freq below # we ignore week and weekofyear because they are deprecated if prop not in ["freq", "week", "weekofyear"]: compare(s, prop) for prop in ok_for_dt_methods: getattr(s.dt, prop) result = s.dt.to_pydatetime() assert isinstance(result, np.ndarray) assert result.dtype == object result = s.dt.tz_convert("CET") expected = Series(s._values.tz_convert("CET"), index=s.index, name="xxx") tm.assert_series_equal(result, expected) tz_result = result.dt.tz assert str(tz_result) == "CET" freq_result = s.dt.freq assert freq_result == DatetimeIndex(s.values, freq="infer").freq # timedelta index cases = [ Series(timedelta_range("1 day", periods=5), index=list("abcde"), name="xxx"), Series(timedelta_range("1 day 01:23:45", periods=5, freq="s"), name="xxx"), Series( timedelta_range("2 days 01:23:45.012345", periods=5, freq="ms"), name="xxx", ), ] for s in cases: for prop in ok_for_td: # we test freq below if prop != "freq": compare(s, prop) for prop in ok_for_td_methods: getattr(s.dt, prop) result = s.dt.components assert isinstance(result, DataFrame) tm.assert_index_equal(result.index, s.index) result = s.dt.to_pytimedelta() assert isinstance(result, np.ndarray) assert result.dtype == object result = s.dt.total_seconds() assert isinstance(result, Series) assert result.dtype == "float64" freq_result = s.dt.freq assert freq_result == TimedeltaIndex(s.values, freq="infer").freq # both index = date_range("20130101", periods=3, freq="D") s = Series(date_range("20140204", periods=3, freq="s"), index=index, name="xxx") exp = Series(np.array([2014, 2014, 2014], dtype="int64"), index=index, name="xxx") tm.assert_series_equal(s.dt.year, exp) exp = Series(np.array([2, 2, 2], dtype="int64"), index=index, name="xxx") tm.assert_series_equal(s.dt.month, exp) exp = Series(np.array([0, 1, 2], dtype="int64"), index=index, name="xxx") tm.assert_series_equal(s.dt.second, exp) exp = Series([s[0]] * 3, index=index, name="xxx") tm.assert_series_equal(s.dt.normalize(), exp) # periodindex cases = [ Series(period_range("20130101", periods=5, freq="D"), name="xxx") ] for s in cases: for prop in ok_for_period: # we test freq below if prop != "freq": compare(s, prop) for prop in ok_for_period_methods: getattr(s.dt, prop) freq_result = s.dt.freq assert freq_result == PeriodIndex(s.values).freq # test limited display api def get_dir(s): results = [r for r in s.dt.__dir__() if not r.startswith("_")] return sorted(set(results)) s = Series(date_range("20130101", periods=5, freq="D"), name="xxx") results = get_dir(s) tm.assert_almost_equal(results, sorted(set(ok_for_dt + ok_for_dt_methods))) s = Series( period_range("20130101", periods=5, freq="D", name="xxx").astype(object)) results = get_dir(s) tm.assert_almost_equal( results, sorted(set(ok_for_period + ok_for_period_methods))) # 11295 # ambiguous time error on the conversions s = Series(date_range("2015-01-01", "2016-01-01", freq="T"), name="xxx") s = s.dt.tz_localize("UTC").dt.tz_convert("America/Chicago") results = get_dir(s) tm.assert_almost_equal(results, sorted(set(ok_for_dt + ok_for_dt_methods))) exp_values = date_range("2015-01-01", "2016-01-01", freq="T", tz="UTC").tz_convert("America/Chicago") # freq not preserved by tz_localize above exp_values = exp_values._with_freq(None) expected = Series(exp_values, name="xxx") tm.assert_series_equal(s, expected) # no setting allowed s = Series(date_range("20130101", periods=5, freq="D"), name="xxx") with pytest.raises(ValueError, match="modifications"): s.dt.hour = 5 # trying to set a copy msg = "modifications to a property of a datetimelike.+not supported" with pd.option_context("chained_assignment", "raise"): with pytest.raises(com.SettingWithCopyError, match=msg): s.dt.hour[0] = 5
def to_perioddelta(self, freq) -> TimedeltaIndex: from pandas.core.indexes.api import TimedeltaIndex arr = self._data.to_perioddelta(freq) return TimedeltaIndex._simple_new(arr, name=self.name)
def test_to_timedelta(self): def conv(v): return v.astype('m8[ns]') d1 = np.timedelta64(1, 'D') assert (to_timedelta( '1 days 06:05:01.00003', box=False) == conv(d1 + np.timedelta64(6 * 3600 + 5 * 60 + 1, 's') + np.timedelta64(30, 'us'))) assert (to_timedelta('15.5us', box=False) == conv(np.timedelta64(15500, 'ns'))) # empty string result = to_timedelta('', box=False) assert result.astype('int64') == iNaT result = to_timedelta(['', '']) assert isna(result).all() # pass thru result = to_timedelta(np.array([np.timedelta64(1, 's')])) expected = pd.Index(np.array([np.timedelta64(1, 's')])) tm.assert_index_equal(result, expected) # ints result = np.timedelta64(0, 'ns') expected = to_timedelta(0, box=False) assert result == expected # Series expected = Series([timedelta(days=1), timedelta(days=1, seconds=1)]) result = to_timedelta(Series(['1d', '1days 00:00:01'])) tm.assert_series_equal(result, expected) # with units result = TimedeltaIndex([ np.timedelta64(0, 'ns'), np.timedelta64(10, 's').astype('m8[ns]') ]) expected = to_timedelta([0, 10], unit='s') tm.assert_index_equal(result, expected) # single element conversion v = timedelta(seconds=1) result = to_timedelta(v, box=False) expected = np.timedelta64(timedelta(seconds=1)) assert result == expected v = np.timedelta64(timedelta(seconds=1)) result = to_timedelta(v, box=False) expected = np.timedelta64(timedelta(seconds=1)) assert result == expected # arrays of various dtypes arr = np.array([1] * 5, dtype='int64') result = to_timedelta(arr, unit='s') expected = TimedeltaIndex([np.timedelta64(1, 's')] * 5) tm.assert_index_equal(result, expected) arr = np.array([1] * 5, dtype='int64') result = to_timedelta(arr, unit='m') expected = TimedeltaIndex([np.timedelta64(1, 'm')] * 5) tm.assert_index_equal(result, expected) arr = np.array([1] * 5, dtype='int64') result = to_timedelta(arr, unit='h') expected = TimedeltaIndex([np.timedelta64(1, 'h')] * 5) tm.assert_index_equal(result, expected) arr = np.array([1] * 5, dtype='timedelta64[s]') result = to_timedelta(arr) expected = TimedeltaIndex([np.timedelta64(1, 's')] * 5) tm.assert_index_equal(result, expected) arr = np.array([1] * 5, dtype='timedelta64[D]') result = to_timedelta(arr) expected = TimedeltaIndex([np.timedelta64(1, 'D')] * 5) tm.assert_index_equal(result, expected) # Test with lists as input when box=false expected = np.array(np.arange(3) * 1000000000, dtype='timedelta64[ns]') result = to_timedelta(range(3), unit='s', box=False) tm.assert_numpy_array_equal(expected, result) result = to_timedelta(np.arange(3), unit='s', box=False) tm.assert_numpy_array_equal(expected, result) result = to_timedelta([0, 1, 2], unit='s', box=False) tm.assert_numpy_array_equal(expected, result) # Tests with fractional seconds as input: expected = np.array([0, 500000000, 800000000, 1200000000], dtype='timedelta64[ns]') result = to_timedelta([0., 0.5, 0.8, 1.2], unit='s', box=False) tm.assert_numpy_array_equal(expected, result)
def test_tdi_sub_timestamp_raises(self): idx = TimedeltaIndex(['1 day', '2 day']) msg = "cannot subtract a datelike from a TimedeltaIndex" with tm.assert_raises_regex(TypeError, msg): idx - Timestamp('2011-01-01')