def test_ambiguous_infer(self): # November 6, 2011, fall back, repeat 2 AM hour # With no repeated hours, we cannot infer the transition tz = self.tz('US/Eastern') dr = date_range(datetime(2011, 11, 6, 0), periods=5, freq=datetools.Hour()) self.assertRaises(pytz.AmbiguousTimeError, dr.tz_localize, tz) # With repeated hours, we can infer the transition dr = date_range(datetime(2011, 11, 6, 0), periods=5, freq=datetools.Hour(), tz=tz) times = ['11/06/2011 00:00', '11/06/2011 01:00', '11/06/2011 01:00', '11/06/2011 02:00', '11/06/2011 03:00'] di = DatetimeIndex(times) localized = di.tz_localize(tz, ambiguous='infer') self.assert_numpy_array_equal(dr, localized) with tm.assert_produces_warning(FutureWarning): localized_old = di.tz_localize(tz, infer_dst=True) self.assert_numpy_array_equal(dr, localized_old) self.assert_numpy_array_equal(dr, DatetimeIndex(times, tz=tz, ambiguous='infer')) # When there is no dst transition, nothing special happens dr = date_range(datetime(2011, 6, 1, 0), periods=10, freq=datetools.Hour()) localized = dr.tz_localize(tz) localized_infer = dr.tz_localize(tz, ambiguous='infer') self.assert_numpy_array_equal(localized, localized_infer) with tm.assert_produces_warning(FutureWarning): localized_infer_old = dr.tz_localize(tz, infer_dst=True) self.assert_numpy_array_equal(localized, localized_infer_old)
def test_reasonable_keyerror(self): # GH #1062 index = DatetimeIndex(["1/3/2000"]) try: index.get_loc("1/1/2000") except KeyError, e: self.assert_("2000" in str(e))
def test_dti_tz_localize_nonexistent_shift_invalid(self, offset, tz_type): # GH 8917 tz = tz_type + 'Europe/Warsaw' dti = DatetimeIndex([Timestamp('2015-03-29 02:20:00')]) msg = "The provided timedelta will relocalize on a nonexistent time" with pytest.raises(ValueError, match=msg): dti.tz_localize(tz, nonexistent=timedelta(seconds=offset))
def construct_1d_arraylike_from_scalar(value, length, dtype): """ create a np.ndarray / pandas type of specified shape and dtype filled with values Parameters ---------- value : scalar value length : int dtype : pandas_dtype / np.dtype Returns ------- np.ndarray / pandas type of length, filled with value """ if is_datetimetz(dtype): from pandas import DatetimeIndex subarr = DatetimeIndex([value] * length, dtype=dtype) elif is_categorical_dtype(dtype): from pandas import Categorical subarr = Categorical([value] * length) else: if not isinstance(dtype, (np.dtype, type(np.dtype))): dtype = dtype.dtype # coerce if we have nan for an integer dtype if is_integer_dtype(dtype) and isna(value): dtype = np.float64 subarr = np.empty(length, dtype=dtype) subarr.fill(value) return subarr
def test_dti_tz_localize_ambiguous_infer(self, tz): # November 6, 2011, fall back, repeat 2 AM hour # With no repeated hours, we cannot infer the transition dr = date_range(datetime(2011, 11, 6, 0), periods=5, freq=pd.offsets.Hour()) with pytest.raises(pytz.AmbiguousTimeError): dr.tz_localize(tz) # With repeated hours, we can infer the transition dr = date_range(datetime(2011, 11, 6, 0), periods=5, freq=pd.offsets.Hour(), tz=tz) times = ['11/06/2011 00:00', '11/06/2011 01:00', '11/06/2011 01:00', '11/06/2011 02:00', '11/06/2011 03:00'] di = DatetimeIndex(times) localized = di.tz_localize(tz, ambiguous='infer') tm.assert_index_equal(dr, localized) tm.assert_index_equal(dr, DatetimeIndex(times, tz=tz, ambiguous='infer')) # When there is no dst transition, nothing special happens dr = date_range(datetime(2011, 6, 1, 0), periods=10, freq=pd.offsets.Hour()) localized = dr.tz_localize(tz) localized_infer = dr.tz_localize(tz, ambiguous='infer') tm.assert_index_equal(localized, localized_infer)
def test_tz_localize_dti(self): from pandas.tseries.offsets import Hour dti = DatetimeIndex(start='1/1/2005', end='1/1/2005 0:00:30.256', freq='L') dti2 = dti.tz_localize('US/Eastern') dti_utc = DatetimeIndex(start='1/1/2005 05:00', end='1/1/2005 5:00:30.256', freq='L', tz='utc') self.assert_(np.array_equal(dti2.values, dti_utc.values)) dti3 = dti2.tz_convert('US/Pacific') self.assert_(np.array_equal(dti3.values, dti_utc.values)) dti = DatetimeIndex(start='11/6/2011 1:59', end='11/6/2011 2:00', freq='L') self.assertRaises(pytz.AmbiguousTimeError, dti.tz_localize, 'US/Eastern') dti = DatetimeIndex(start='3/13/2011 1:59', end='3/13/2011 2:00', freq='L') self.assertRaises(pytz.AmbiguousTimeError, dti.tz_localize, 'US/Eastern')
def test_round_int64(self, start, index_freq, periods, round_freq): dt = DatetimeIndex(start=start, freq=index_freq, periods=periods) unit = to_offset(round_freq).nanos # test floor result = dt.floor(round_freq) diff = dt.asi8 - result.asi8 mod = result.asi8 % unit assert (mod == 0).all(), "floor not a {} multiple".format(round_freq) assert (0 <= diff).all() and (diff < unit).all(), "floor error" # test ceil result = dt.ceil(round_freq) diff = result.asi8 - dt.asi8 mod = result.asi8 % unit assert (mod == 0).all(), "ceil not a {} multiple".format(round_freq) assert (0 <= diff).all() and (diff < unit).all(), "ceil error" # test round result = dt.round(round_freq) diff = abs(result.asi8 - dt.asi8) mod = result.asi8 % unit assert (mod == 0).all(), "round not a {} multiple".format(round_freq) assert (diff <= unit // 2).all(), "round error" if unit % 2 == 0: assert ( result.asi8[diff == unit // 2] % 2 == 0 ).all(), "round half to even error"
def test_get_duplicates(self): idx = DatetimeIndex(['2000-01-01', '2000-01-02', '2000-01-02', '2000-01-03', '2000-01-03', '2000-01-04']) result = idx.get_duplicates() ex = DatetimeIndex(['2000-01-02', '2000-01-03']) tm.assert_index_equal(result, ex)
def test_reasonable_keyerror(self): # GH #1062 index = DatetimeIndex(['1/3/2000']) try: index.get_loc('1/1/2000') except KeyError as e: assert '2000' in str(e)
def test_map_bug_1677(self): index = DatetimeIndex(['2012-04-25 09:30:00.393000']) f = index.asof result = index.map(f) expected = Index([f(index[0])]) tm.assert_index_equal(result, expected)
def test_order_without_freq(self, index_dates, expected_dates, tz_fixture): tz = tz_fixture # without freq index = DatetimeIndex(index_dates, tz=tz, name='idx') expected = DatetimeIndex(expected_dates, tz=tz, name='idx') ordered = index.sort_values() tm.assert_index_equal(ordered, expected) assert ordered.freq is None ordered = index.sort_values(ascending=False) tm.assert_index_equal(ordered, expected[::-1]) assert ordered.freq is None ordered, indexer = index.sort_values(return_indexer=True) tm.assert_index_equal(ordered, expected) exp = np.array([0, 4, 3, 1, 2]) tm.assert_numpy_array_equal(indexer, exp, check_dtype=False) assert ordered.freq is None ordered, indexer = index.sort_values(return_indexer=True, ascending=False) tm.assert_index_equal(ordered, expected[::-1]) exp = np.array([2, 1, 3, 4, 0]) tm.assert_numpy_array_equal(indexer, exp, check_dtype=False) assert ordered.freq is None
def test_isnull_datetime(self): self.assertFalse(isnull(datetime.now())) self.assertTrue(notnull(datetime.now())) idx = date_range('1/1/1990', periods=20) exp = np.ones(len(idx), dtype=bool) tm.assert_numpy_array_equal(notnull(idx), exp) idx = np.asarray(idx) idx[0] = iNaT idx = DatetimeIndex(idx) mask = isnull(idx) self.assertTrue(mask[0]) exp = np.array([True] + [False] * (len(idx) - 1), dtype=bool) tm.assert_numpy_array_equal(mask, exp) # GH 9129 pidx = idx.to_period(freq='M') mask = isnull(pidx) self.assertTrue(mask[0]) exp = np.array([True] + [False] * (len(idx) - 1), dtype=bool) tm.assert_numpy_array_equal(mask, exp) mask = isnull(pidx[1:]) exp = np.zeros(len(mask), dtype=bool) tm.assert_numpy_array_equal(mask, exp)
def test_astype_str_compat(self): # GH 13149, GH 13209 # verify that we are returning NaT as a string (and not unicode) idx = DatetimeIndex(['2016-05-16', 'NaT', NaT, np.NaN]) result = idx.astype(str) expected = Index(['2016-05-16', 'NaT', 'NaT', 'NaT'], dtype=object) tm.assert_index_equal(result, expected)
def test_series_tz_convert_to_utc(self): base = DatetimeIndex(['2011-01-01', '2011-01-02', '2011-01-03'], tz='UTC') idx1 = base.tz_convert('Asia/Tokyo')[:2] idx2 = base.tz_convert('US/Eastern')[1:] res = Series([1, 2], index=idx1) + Series([1, 1], index=idx2) tm.assert_series_equal(res, Series([np.nan, 3, np.nan], index=base))
def test_index_conversion(self): index = self.frame.index obj_index = index.asobject conv = DatetimeIndex(obj_index) self.assert_(conv.equals(index)) self.assertRaises(ValueError, DatetimeIndex, ["a", "b", "c", "d"])
def test_dti_tz_convert_compat_timestamp(self, prefix): strdates = ['1/1/2012', '3/1/2012', '4/1/2012'] idx = DatetimeIndex(strdates, tz=prefix + 'US/Eastern') conv = idx[0].tz_convert(prefix + 'US/Pacific') expected = idx.tz_convert(prefix + 'US/Pacific')[0] assert conv == expected
def test_timestamp_tz_convert(self): strdates = ["1/1/2012", "3/1/2012", "4/1/2012"] idx = DatetimeIndex(strdates, tz=self.tzstr("US/Eastern")) conv = idx[0].tz_convert(self.tzstr("US/Pacific")) expected = idx.tz_convert(self.tzstr("US/Pacific"))[0] self.assertEqual(conv, expected)
def test_timestamp_tz_convert(self): strdates = ['1/1/2012', '3/1/2012', '4/1/2012'] idx = DatetimeIndex(strdates, tz='US/Eastern') conv = idx[0].tz_convert('US/Pacific') expected = idx.tz_convert('US/Pacific')[0] self.assertEquals(conv, expected)
def test_index_conversion(self): index = self.frame.index obj_index = index.asobject conv = DatetimeIndex(obj_index) self.assertTrue(conv.equals(index)) self.assertRaises(ValueError, DatetimeIndex, ['a', 'b', 'c', 'd'])
def test_union_coverage(self, sort): idx = DatetimeIndex(['2000-01-03', '2000-01-01', '2000-01-02']) ordered = DatetimeIndex(idx.sort_values(), freq='infer') result = ordered.union(idx, sort=sort) tm.assert_index_equal(result, ordered) result = ordered[:0].union(ordered, sort=sort) tm.assert_index_equal(result, ordered) assert result.freq == ordered.freq
def test_ambiguous_nat(self): tz = self.tz("US/Eastern") times = ["11/06/2011 00:00", "11/06/2011 01:00", "11/06/2011 01:00", "11/06/2011 02:00", "11/06/2011 03:00"] di = DatetimeIndex(times) localized = di.tz_localize(tz, ambiguous="NaT") times = ["11/06/2011 00:00", np.NaN, np.NaN, "11/06/2011 02:00", "11/06/2011 03:00"] di_test = DatetimeIndex(times, tz="US/Eastern") self.assert_numpy_array_equal(di_test, localized)
def test_union_bug_1745(self): left = DatetimeIndex(['2012-05-11 15:19:49.695000']) right = DatetimeIndex(['2012-05-29 13:04:21.322000', '2012-05-11 15:27:24.873000', '2012-05-11 15:31:05.350000']) result = left.union(right) exp = DatetimeIndex(sorted(set(list(left)) | set(list(right)))) tm.assert_index_equal(result, exp)
def test_get_duplicates(self): idx = DatetimeIndex(['2000-01-01', '2000-01-02', '2000-01-02', '2000-01-03', '2000-01-03', '2000-01-04']) with tm.assert_produces_warning(FutureWarning): # Deprecated - see GH20239 result = idx.get_duplicates() ex = DatetimeIndex(['2000-01-02', '2000-01-03']) tm.assert_index_equal(result, ex)
def test_datetimeindex_union_join_empty(self): dti = DatetimeIndex(start="1/1/2001", end="2/1/2001", freq="D") empty = Index([]) result = dti.union(empty) self.assert_(isinstance(result, DatetimeIndex)) self.assert_(result is result) result = dti.join(empty) self.assert_(isinstance(result, DatetimeIndex))
def test_pass_dates_localize_to_utc(self): strdates = ['1/1/2012', '3/1/2012', '4/1/2012'] idx = DatetimeIndex(strdates) conv = idx.tz_localize('US/Eastern') fromdates = DatetimeIndex(strdates, tz='US/Eastern') self.assert_(conv.tz == fromdates.tz) self.assert_(np.array_equal(conv.values, fromdates.values))
def test_to_timestamp_to_period_astype(self): idx = DatetimeIndex([pd.NaT, '2011-01-01', '2011-02-01'], name='idx') res = idx.astype('period[M]') exp = PeriodIndex(['NaT', '2011-01', '2011-02'], freq='M', name='idx') tm.assert_index_equal(res, exp) res = idx.astype('period[3M]') exp = PeriodIndex(['NaT', '2011-01', '2011-02'], freq='3M', name='idx') tm.assert_index_equal(res, exp)
def test_misc_coverage(self): rng = date_range('1/1/2000', periods=5) result = rng.groupby(rng.day) assert isinstance(list(result.values())[0][0], Timestamp) idx = DatetimeIndex(['2000-01-03', '2000-01-01', '2000-01-02']) assert not idx.equals(list(idx)) non_datetime = Index(list('abc')) assert not idx.equals(list(non_datetime))
def test_astype_object_with_nat(self): idx = DatetimeIndex([datetime(2013, 1, 1), datetime(2013, 1, 2), pd.NaT, datetime(2013, 1, 4)], name='idx') expected_list = [Timestamp('2013-01-01'), Timestamp('2013-01-02'), pd.NaT, Timestamp('2013-01-04')] expected = pd.Index(expected_list, dtype=object, name='idx') result = idx.astype(object) tm.assert_index_equal(result, expected) assert idx.tolist() == expected_list
def test_dti_tz_localize_pass_dates_to_utc(self, tzstr): strdates = ['1/1/2012', '3/1/2012', '4/1/2012'] idx = DatetimeIndex(strdates) conv = idx.tz_localize(tzstr) fromdates = DatetimeIndex(strdates, tz=tzstr) assert conv.tz == fromdates.tz tm.assert_numpy_array_equal(conv.values, fromdates.values)
def test_pass_dates_localize_to_utc(self): strdates = ["1/1/2012", "3/1/2012", "4/1/2012"] idx = DatetimeIndex(strdates) conv = idx.tz_localize(self.tzstr("US/Eastern")) fromdates = DatetimeIndex(strdates, tz=self.tzstr("US/Eastern")) self.assertEqual(conv.tz, fromdates.tz) self.assert_numpy_array_equal(conv.values, fromdates.values)
def test_dti_from_tzaware_datetime(self, tz): d = [datetime(2012, 8, 19, tzinfo=tz)] index = DatetimeIndex(d) assert timezones.tz_compare(index.tz, tz)
def test_dti_tz_conversion_freq(self, tz_naive_fixture): # GH25241 t3 = DatetimeIndex(["2019-01-01 10:00"], freq="H") assert t3.tz_localize(tz=tz_naive_fixture).freq == t3.freq t4 = DatetimeIndex(["2019-01-02 12:00"], tz="UTC", freq="T") assert t4.tz_convert(tz="UTC").freq == t4.freq
def test_cdaterange(self): rng = cdate_range('2013-05-01', periods=3) xp = DatetimeIndex(['2013-05-01', '2013-05-02', '2013-05-03']) tm.assert_index_equal(xp, rng)
def test_datetimeindex_accessors(self): dti_naive = pd.date_range(freq='D', start=datetime(1998, 1, 1), periods=365) # GH#13303 dti_tz = pd.date_range(freq='D', start=datetime(1998, 1, 1), periods=365, tz='US/Eastern') for dti in [dti_naive, dti_tz]: assert dti.year[0] == 1998 assert dti.month[0] == 1 assert dti.day[0] == 1 assert dti.hour[0] == 0 assert dti.minute[0] == 0 assert dti.second[0] == 0 assert dti.microsecond[0] == 0 assert dti.dayofweek[0] == 3 assert dti.dayofyear[0] == 1 assert dti.dayofyear[120] == 121 assert dti.weekofyear[0] == 1 assert dti.weekofyear[120] == 18 assert dti.quarter[0] == 1 assert dti.quarter[120] == 2 assert dti.days_in_month[0] == 31 assert dti.days_in_month[90] == 30 assert dti.is_month_start[0] assert not dti.is_month_start[1] assert dti.is_month_start[31] assert dti.is_quarter_start[0] assert dti.is_quarter_start[90] assert dti.is_year_start[0] assert not dti.is_year_start[364] assert not dti.is_month_end[0] assert dti.is_month_end[30] assert not dti.is_month_end[31] assert dti.is_month_end[364] assert not dti.is_quarter_end[0] assert not dti.is_quarter_end[30] assert dti.is_quarter_end[89] assert dti.is_quarter_end[364] assert not dti.is_year_end[0] assert dti.is_year_end[364] assert len(dti.year) == 365 assert len(dti.month) == 365 assert len(dti.day) == 365 assert len(dti.hour) == 365 assert len(dti.minute) == 365 assert len(dti.second) == 365 assert len(dti.microsecond) == 365 assert len(dti.dayofweek) == 365 assert len(dti.dayofyear) == 365 assert len(dti.weekofyear) == 365 assert len(dti.quarter) == 365 assert len(dti.is_month_start) == 365 assert len(dti.is_month_end) == 365 assert len(dti.is_quarter_start) == 365 assert len(dti.is_quarter_end) == 365 assert len(dti.is_year_start) == 365 assert len(dti.is_year_end) == 365 assert len(dti.weekday_name) == 365 dti.name = 'name' # non boolean accessors -> return Index for accessor in DatetimeIndex._field_ops: res = getattr(dti, accessor) assert len(res) == 365 assert isinstance(res, Index) assert res.name == 'name' # boolean accessors -> return array for accessor in DatetimeIndex._bool_ops: res = getattr(dti, accessor) assert len(res) == 365 assert isinstance(res, np.ndarray) # test boolean indexing res = dti[dti.is_quarter_start] exp = dti[[0, 90, 181, 273]] tm.assert_index_equal(res, exp) res = dti[dti.is_leap_year] exp = DatetimeIndex([], freq='D', tz=dti.tz, name='name') tm.assert_index_equal(res, exp) dti = pd.date_range(freq='BQ-FEB', start=datetime(1998, 1, 1), periods=4) assert sum(dti.is_quarter_start) == 0 assert sum(dti.is_quarter_end) == 4 assert sum(dti.is_year_start) == 0 assert sum(dti.is_year_end) == 1 # Ensure is_start/end accessors throw ValueError for CustomBusinessDay, bday_egypt = offsets.CustomBusinessDay(weekmask='Sun Mon Tue Wed Thu') dti = date_range(datetime(2013, 4, 30), periods=5, freq=bday_egypt) msg = "Custom business days is not supported by is_month_start" with pytest.raises(ValueError, match=msg): dti.is_month_start dti = DatetimeIndex(['2000-01-01', '2000-01-02', '2000-01-03']) assert dti.is_month_start[0] == 1 tests = [(Timestamp('2013-06-01', freq='M').is_month_start, 1), (Timestamp('2013-06-01', freq='BM').is_month_start, 0), (Timestamp('2013-06-03', freq='M').is_month_start, 0), (Timestamp('2013-06-03', freq='BM').is_month_start, 1), (Timestamp('2013-02-28', freq='Q-FEB').is_month_end, 1), (Timestamp('2013-02-28', freq='Q-FEB').is_quarter_end, 1), (Timestamp('2013-02-28', freq='Q-FEB').is_year_end, 1), (Timestamp('2013-03-01', freq='Q-FEB').is_month_start, 1), (Timestamp('2013-03-01', freq='Q-FEB').is_quarter_start, 1), (Timestamp('2013-03-01', freq='Q-FEB').is_year_start, 1), (Timestamp('2013-03-31', freq='QS-FEB').is_month_end, 1), (Timestamp('2013-03-31', freq='QS-FEB').is_quarter_end, 0), (Timestamp('2013-03-31', freq='QS-FEB').is_year_end, 0), (Timestamp('2013-02-01', freq='QS-FEB').is_month_start, 1), (Timestamp('2013-02-01', freq='QS-FEB').is_quarter_start, 1), (Timestamp('2013-02-01', freq='QS-FEB').is_year_start, 1), (Timestamp('2013-06-30', freq='BQ').is_month_end, 0), (Timestamp('2013-06-30', freq='BQ').is_quarter_end, 0), (Timestamp('2013-06-30', freq='BQ').is_year_end, 0), (Timestamp('2013-06-28', freq='BQ').is_month_end, 1), (Timestamp('2013-06-28', freq='BQ').is_quarter_end, 1), (Timestamp('2013-06-28', freq='BQ').is_year_end, 0), (Timestamp('2013-06-30', freq='BQS-APR').is_month_end, 0), (Timestamp('2013-06-30', freq='BQS-APR').is_quarter_end, 0), (Timestamp('2013-06-30', freq='BQS-APR').is_year_end, 0), (Timestamp('2013-06-28', freq='BQS-APR').is_month_end, 1), (Timestamp('2013-06-28', freq='BQS-APR').is_quarter_end, 1), (Timestamp('2013-03-29', freq='BQS-APR').is_year_end, 1), (Timestamp('2013-11-01', freq='AS-NOV').is_year_start, 1), (Timestamp('2013-10-31', freq='AS-NOV').is_year_end, 1), (Timestamp('2012-02-01').days_in_month, 29), (Timestamp('2013-02-01').days_in_month, 28)] for ts, value in tests: assert ts == value # GH 6538: Check that DatetimeIndex and its TimeStamp elements # return the same weekofyear accessor close to new year w/ tz dates = ["2013/12/29", "2013/12/30", "2013/12/31"] dates = DatetimeIndex(dates, tz="Europe/Brussels") expected = [52, 1, 1] assert dates.weekofyear.tolist() == expected assert [d.weekofyear for d in dates] == expected
def test_construction_index_with_mixed_timezones_with_NaT(self): # GH 11488 result = Index( [pd.NaT, Timestamp('2011-01-01'), pd.NaT, Timestamp('2011-01-02')], name='idx') exp = DatetimeIndex( [pd.NaT, Timestamp('2011-01-01'), pd.NaT, Timestamp('2011-01-02')], name='idx') self.assert_index_equal(result, exp, exact=True) self.assertTrue(isinstance(result, DatetimeIndex)) self.assertIsNone(result.tz) # same tz results in DatetimeIndex result = Index([ pd.NaT, Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'), pd.NaT, Timestamp('2011-01-02 10:00', tz='Asia/Tokyo') ], name='idx') exp = DatetimeIndex([ pd.NaT, Timestamp('2011-01-01 10:00'), pd.NaT, Timestamp('2011-01-02 10:00') ], tz='Asia/Tokyo', name='idx') self.assert_index_equal(result, exp, exact=True) self.assertTrue(isinstance(result, DatetimeIndex)) self.assertIsNotNone(result.tz) self.assertEqual(result.tz, exp.tz) # same tz results in DatetimeIndex (DST) result = Index([ Timestamp('2011-01-01 10:00', tz='US/Eastern'), pd.NaT, Timestamp('2011-08-01 10:00', tz='US/Eastern') ], name='idx') exp = DatetimeIndex([ Timestamp('2011-01-01 10:00'), pd.NaT, Timestamp('2011-08-01 10:00') ], tz='US/Eastern', name='idx') self.assert_index_equal(result, exp, exact=True) self.assertTrue(isinstance(result, DatetimeIndex)) self.assertIsNotNone(result.tz) self.assertEqual(result.tz, exp.tz) # different tz results in Index(dtype=object) result = Index([ pd.NaT, Timestamp('2011-01-01 10:00'), pd.NaT, Timestamp('2011-01-02 10:00', tz='US/Eastern') ], name='idx') exp = Index([ pd.NaT, Timestamp('2011-01-01 10:00'), pd.NaT, Timestamp('2011-01-02 10:00', tz='US/Eastern') ], dtype='object', name='idx') self.assert_index_equal(result, exp, exact=True) self.assertFalse(isinstance(result, DatetimeIndex)) result = Index([ pd.NaT, Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'), pd.NaT, Timestamp('2011-01-02 10:00', tz='US/Eastern') ], name='idx') exp = Index([ pd.NaT, Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'), pd.NaT, Timestamp('2011-01-02 10:00', tz='US/Eastern') ], dtype='object', name='idx') self.assert_index_equal(result, exp, exact=True) self.assertFalse(isinstance(result, DatetimeIndex)) # all NaT result = Index([pd.NaT, pd.NaT], name='idx') exp = DatetimeIndex([pd.NaT, pd.NaT], name='idx') self.assert_index_equal(result, exp, exact=True) self.assertTrue(isinstance(result, DatetimeIndex)) self.assertIsNone(result.tz) # all NaT with tz result = Index([pd.NaT, pd.NaT], tz='Asia/Tokyo', name='idx') exp = DatetimeIndex([pd.NaT, pd.NaT], tz='Asia/Tokyo', name='idx') self.assert_index_equal(result, exp, exact=True) self.assertTrue(isinstance(result, DatetimeIndex)) self.assertIsNotNone(result.tz) self.assertEqual(result.tz, exp.tz)
def test_array_equivalent(): assert array_equivalent(np.array([np.nan, np.nan]), np.array([np.nan, np.nan])) assert array_equivalent(np.array([np.nan, 1, np.nan]), np.array([np.nan, 1, np.nan])) assert array_equivalent( np.array([np.nan, None], dtype="object"), np.array([np.nan, None], dtype="object"), ) # Check the handling of nested arrays in array_equivalent_object assert array_equivalent( np.array([np.array([np.nan, None], dtype="object"), None], dtype="object"), np.array([np.array([np.nan, None], dtype="object"), None], dtype="object"), ) assert array_equivalent( np.array([np.nan, 1 + 1j], dtype="complex"), np.array([np.nan, 1 + 1j], dtype="complex"), ) assert not array_equivalent( np.array([np.nan, 1 + 1j], dtype="complex"), np.array([np.nan, 1 + 2j], dtype="complex"), ) assert not array_equivalent(np.array([np.nan, 1, np.nan]), np.array([np.nan, 2, np.nan])) assert not array_equivalent(np.array(["a", "b", "c", "d"]), np.array(["e", "e"])) assert array_equivalent(Float64Index([0, np.nan]), Float64Index([0, np.nan])) assert not array_equivalent(Float64Index([0, np.nan]), Float64Index([1, np.nan])) assert array_equivalent(DatetimeIndex([0, np.nan]), DatetimeIndex([0, np.nan])) assert not array_equivalent(DatetimeIndex([0, np.nan]), DatetimeIndex([1, np.nan])) assert array_equivalent(TimedeltaIndex([0, np.nan]), TimedeltaIndex([0, np.nan])) assert not array_equivalent(TimedeltaIndex([0, np.nan]), TimedeltaIndex([1, np.nan])) assert array_equivalent( DatetimeIndex([0, np.nan], tz="US/Eastern"), DatetimeIndex([0, np.nan], tz="US/Eastern"), ) assert not array_equivalent( DatetimeIndex([0, np.nan], tz="US/Eastern"), DatetimeIndex([1, np.nan], tz="US/Eastern"), ) assert not array_equivalent(DatetimeIndex([0, np.nan]), DatetimeIndex([0, np.nan], tz="US/Eastern")) assert not array_equivalent( DatetimeIndex([0, np.nan], tz="CET"), DatetimeIndex([0, np.nan], tz="US/Eastern"), ) assert not array_equivalent(DatetimeIndex([0, np.nan]), TimedeltaIndex([0, np.nan]))
def test_dti_constructor_static_tzinfo(self, prefix): # it works! index = DatetimeIndex([datetime(2012, 1, 1)], tz=prefix + "EST") index.hour index[0]
def factorize(values, sort=False, order=None, na_sentinel=-1, size_hint=None): """ Encode input values as an enumerated type or categorical variable Parameters ---------- values : ndarray (1-d) Sequence sort : boolean, default False Sort by values na_sentinel : int, default -1 Value to mark "not found" size_hint : hint to the hashtable sizer Returns ------- labels : the indexer to the original array uniques : ndarray (1-d) or Index the unique values. Index is returned when passed values is Index or Series note: an array of Periods will ignore sort as it returns an always sorted PeriodIndex """ from pandas import Index, Series, DatetimeIndex, PeriodIndex # handling two possibilities here # - for a numpy datetimelike simply view as i8 then cast back # - for an extension datetimelike view as i8 then # reconstruct from boxed values to transfer metadata dtype = None if needs_i8_conversion(values): if is_period_dtype(values): values = PeriodIndex(values) vals = values.asi8 elif is_datetimetz(values): values = DatetimeIndex(values) vals = values.asi8 else: # numpy dtype dtype = values.dtype vals = values.view(np.int64) else: vals = np.asarray(values) (hash_klass, vec_klass), vals = _get_data_algo(vals, _hashtables) table = hash_klass(size_hint or len(vals)) uniques = vec_klass() labels = table.get_labels(vals, uniques, 0, na_sentinel, True) labels = _ensure_platform_int(labels) uniques = uniques.to_array() if sort and len(uniques) > 0: uniques, labels = safe_sort(uniques, labels, na_sentinel=na_sentinel, assume_unique=True) if dtype is not None: uniques = uniques.astype(dtype) if isinstance(values, Index): uniques = values._shallow_copy(uniques, name=None) elif isinstance(values, Series): uniques = Index(uniques) return labels, uniques
def test_datetimeindex_from_empty_datetime64_array(): for unit in ['ms', 'us', 'ns']: idx = DatetimeIndex(np.array([], dtype='datetime64[%s]' % unit)) assert (len(idx) == 0)
def test_infer_freq(self, freq_sample): # GH 11018 idx = pd.date_range("2011-01-01 09:00:00", freq=freq_sample, periods=10) result = DatetimeIndex(idx.asi8, freq="infer") tm.assert_index_equal(idx, result) assert result.freq == freq_sample
def test_range_edges(self): # GH#13672 idx = pd.date_range(start=Timestamp('1970-01-01 00:00:00.000000001'), end=Timestamp('1970-01-01 00:00:00.000000004'), freq='N') exp = DatetimeIndex([ '1970-01-01 00:00:00.000000001', '1970-01-01 00:00:00.000000002', '1970-01-01 00:00:00.000000003', '1970-01-01 00:00:00.000000004' ]) tm.assert_index_equal(idx, exp) idx = pd.date_range(start=Timestamp('1970-01-01 00:00:00.000000004'), end=Timestamp('1970-01-01 00:00:00.000000001'), freq='N') exp = DatetimeIndex([]) tm.assert_index_equal(idx, exp) idx = pd.date_range(start=Timestamp('1970-01-01 00:00:00.000000001'), end=Timestamp('1970-01-01 00:00:00.000000001'), freq='N') exp = DatetimeIndex(['1970-01-01 00:00:00.000000001']) tm.assert_index_equal(idx, exp) idx = pd.date_range(start=Timestamp('1970-01-01 00:00:00.000001'), end=Timestamp('1970-01-01 00:00:00.000004'), freq='U') exp = DatetimeIndex([ '1970-01-01 00:00:00.000001', '1970-01-01 00:00:00.000002', '1970-01-01 00:00:00.000003', '1970-01-01 00:00:00.000004' ]) tm.assert_index_equal(idx, exp) idx = pd.date_range(start=Timestamp('1970-01-01 00:00:00.001'), end=Timestamp('1970-01-01 00:00:00.004'), freq='L') exp = DatetimeIndex([ '1970-01-01 00:00:00.001', '1970-01-01 00:00:00.002', '1970-01-01 00:00:00.003', '1970-01-01 00:00:00.004' ]) tm.assert_index_equal(idx, exp) idx = pd.date_range(start=Timestamp('1970-01-01 00:00:01'), end=Timestamp('1970-01-01 00:00:04'), freq='S') exp = DatetimeIndex([ '1970-01-01 00:00:01', '1970-01-01 00:00:02', '1970-01-01 00:00:03', '1970-01-01 00:00:04' ]) tm.assert_index_equal(idx, exp) idx = pd.date_range(start=Timestamp('1970-01-01 00:01'), end=Timestamp('1970-01-01 00:04'), freq='T') exp = DatetimeIndex([ '1970-01-01 00:01', '1970-01-01 00:02', '1970-01-01 00:03', '1970-01-01 00:04' ]) tm.assert_index_equal(idx, exp) idx = pd.date_range(start=Timestamp('1970-01-01 01:00'), end=Timestamp('1970-01-01 04:00'), freq='H') exp = DatetimeIndex([ '1970-01-01 01:00', '1970-01-01 02:00', '1970-01-01 03:00', '1970-01-01 04:00' ]) tm.assert_index_equal(idx, exp) idx = pd.date_range(start=Timestamp('1970-01-01'), end=Timestamp('1970-01-04'), freq='D') exp = DatetimeIndex( ['1970-01-01', '1970-01-02', '1970-01-03', '1970-01-04']) tm.assert_index_equal(idx, exp)
def test_dti_tz_convert_hour_overflow_dst_timestamps(self, tz): # Regression test for GH#13306 # sorted case US/Eastern -> UTC ts = [ Timestamp("2008-05-12 09:50:00", tz=tz), Timestamp("2008-12-12 09:50:35", tz=tz), Timestamp("2009-05-12 09:50:32", tz=tz), ] tt = DatetimeIndex(ts) ut = tt.tz_convert("UTC") expected = Index([13, 14, 13]) tm.assert_index_equal(ut.hour, expected) # sorted case UTC -> US/Eastern ts = [ Timestamp("2008-05-12 13:50:00", tz="UTC"), Timestamp("2008-12-12 14:50:35", tz="UTC"), Timestamp("2009-05-12 13:50:32", tz="UTC"), ] tt = DatetimeIndex(ts) ut = tt.tz_convert("US/Eastern") expected = Index([9, 9, 9]) tm.assert_index_equal(ut.hour, expected) # unsorted case US/Eastern -> UTC ts = [ Timestamp("2008-05-12 09:50:00", tz=tz), Timestamp("2008-12-12 09:50:35", tz=tz), Timestamp("2008-05-12 09:50:32", tz=tz), ] tt = DatetimeIndex(ts) ut = tt.tz_convert("UTC") expected = Index([13, 14, 13]) tm.assert_index_equal(ut.hour, expected) # unsorted case UTC -> US/Eastern ts = [ Timestamp("2008-05-12 13:50:00", tz="UTC"), Timestamp("2008-12-12 14:50:35", tz="UTC"), Timestamp("2008-05-12 13:50:32", tz="UTC"), ] tt = DatetimeIndex(ts) ut = tt.tz_convert("US/Eastern") expected = Index([9, 9, 9]) tm.assert_index_equal(ut.hour, expected)
def test_no_millisecond_field(self): with self.assertRaises(AttributeError): DatetimeIndex.millisecond with self.assertRaises(AttributeError): DatetimeIndex([]).millisecond
def test_set_index_datetime(self): # GH#3950 df = DataFrame({ "label": ["a", "a", "a", "b", "b", "b"], "datetime": [ "2011-07-19 07:00:00", "2011-07-19 08:00:00", "2011-07-19 09:00:00", "2011-07-19 07:00:00", "2011-07-19 08:00:00", "2011-07-19 09:00:00", ], "value": range(6), }) df.index = to_datetime(df.pop("datetime"), utc=True) df.index = df.index.tz_convert("US/Pacific") expected = DatetimeIndex( [ "2011-07-19 07:00:00", "2011-07-19 08:00:00", "2011-07-19 09:00:00" ], name="datetime", ) expected = expected.tz_localize("UTC").tz_convert("US/Pacific") df = df.set_index("label", append=True) tm.assert_index_equal(df.index.levels[0], expected) tm.assert_index_equal(df.index.levels[1], Index(["a", "b"], name="label")) assert df.index.names == ["datetime", "label"] df = df.swaplevel(0, 1) tm.assert_index_equal(df.index.levels[0], Index(["a", "b"], name="label")) tm.assert_index_equal(df.index.levels[1], expected) assert df.index.names == ["label", "datetime"] df = DataFrame(np.random.random(6)) idx1 = DatetimeIndex( [ "2011-07-19 07:00:00", "2011-07-19 08:00:00", "2011-07-19 09:00:00", "2011-07-19 07:00:00", "2011-07-19 08:00:00", "2011-07-19 09:00:00", ], tz="US/Eastern", ) idx2 = DatetimeIndex( [ "2012-04-01 09:00", "2012-04-01 09:00", "2012-04-01 09:00", "2012-04-02 09:00", "2012-04-02 09:00", "2012-04-02 09:00", ], tz="US/Eastern", ) idx3 = date_range("2011-01-01 09:00", periods=6, tz="Asia/Tokyo") idx3 = idx3._with_freq(None) df = df.set_index(idx1) df = df.set_index(idx2, append=True) df = df.set_index(idx3, append=True) expected1 = DatetimeIndex( [ "2011-07-19 07:00:00", "2011-07-19 08:00:00", "2011-07-19 09:00:00" ], tz="US/Eastern", ) expected2 = DatetimeIndex(["2012-04-01 09:00", "2012-04-02 09:00"], tz="US/Eastern") tm.assert_index_equal(df.index.levels[0], expected1) tm.assert_index_equal(df.index.levels[1], expected2) tm.assert_index_equal(df.index.levels[2], idx3) # GH#7092 tm.assert_index_equal(df.index.get_level_values(0), idx1) tm.assert_index_equal(df.index.get_level_values(1), idx2) tm.assert_index_equal(df.index.get_level_values(2), idx3)
def test_equals(self): # GH 13107 idx = DatetimeIndex(["2011-01-01", "2011-01-02", "NaT"]) assert idx.equals(idx) assert idx.equals(idx.copy()) assert idx.equals(idx.astype(object)) assert idx.astype(object).equals(idx) assert idx.astype(object).equals(idx.astype(object)) assert not idx.equals(list(idx)) assert not idx.equals(Series(idx)) idx2 = DatetimeIndex(["2011-01-01", "2011-01-02", "NaT"], tz="US/Pacific") assert not idx.equals(idx2) assert not idx.equals(idx2.copy()) assert not idx.equals(idx2.astype(object)) assert not idx.astype(object).equals(idx2) assert not idx.equals(list(idx2)) assert not idx.equals(Series(idx2)) # same internal, different tz idx3 = DatetimeIndex(idx.asi8, tz="US/Pacific") tm.assert_numpy_array_equal(idx.asi8, idx3.asi8) assert not idx.equals(idx3) assert not idx.equals(idx3.copy()) assert not idx.equals(idx3.astype(object)) assert not idx.astype(object).equals(idx3) assert not idx.equals(list(idx3)) assert not idx.equals(Series(idx3)) # check that we do not raise when comparing with OutOfBounds objects oob = Index([datetime(2500, 1, 1)] * 3, dtype=object) assert not idx.equals(oob) assert not idx2.equals(oob) assert not idx3.equals(oob) # check that we do not raise when comparing with OutOfBounds dt64 oob2 = oob.map(np.datetime64) assert not idx.equals(oob2) assert not idx2.equals(oob2) assert not idx3.equals(oob2)
def test_astype_raises(self, dtype): # GH 13149, GH 13209 idx = DatetimeIndex(["2016-05-16", "NaT", NaT, np.NaN]) msg = "Cannot cast DatetimeArray to dtype" with pytest.raises(TypeError, match=msg): idx.astype(dtype)
def test_dti_tz_nat(self, tzstr): idx = DatetimeIndex([Timestamp("2013-1-1", tz=tzstr), pd.NaT]) assert isna(idx[1]) assert idx[0].tzinfo is not None
def test_datetime64_tz_fillna(self, tz): # DatetimeLikeBlock ser = Series([ Timestamp("2011-01-01 10:00"), NaT, Timestamp("2011-01-03 10:00"), NaT, ]) null_loc = Series([False, True, False, True]) result = ser.fillna(Timestamp("2011-01-02 10:00")) expected = Series([ Timestamp("2011-01-01 10:00"), Timestamp("2011-01-02 10:00"), Timestamp("2011-01-03 10:00"), Timestamp("2011-01-02 10:00"), ]) tm.assert_series_equal(expected, result) # check s is not changed tm.assert_series_equal(isna(ser), null_loc) result = ser.fillna(Timestamp("2011-01-02 10:00", tz=tz)) expected = Series([ Timestamp("2011-01-01 10:00"), Timestamp("2011-01-02 10:00", tz=tz), Timestamp("2011-01-03 10:00"), Timestamp("2011-01-02 10:00", tz=tz), ]) tm.assert_series_equal(expected, result) tm.assert_series_equal(isna(ser), null_loc) result = ser.fillna("AAA") expected = Series( [ Timestamp("2011-01-01 10:00"), "AAA", Timestamp("2011-01-03 10:00"), "AAA", ], dtype=object, ) tm.assert_series_equal(expected, result) tm.assert_series_equal(isna(ser), null_loc) result = ser.fillna({ 1: Timestamp("2011-01-02 10:00", tz=tz), 3: Timestamp("2011-01-04 10:00"), }) expected = Series([ Timestamp("2011-01-01 10:00"), Timestamp("2011-01-02 10:00", tz=tz), Timestamp("2011-01-03 10:00"), Timestamp("2011-01-04 10:00"), ]) tm.assert_series_equal(expected, result) tm.assert_series_equal(isna(ser), null_loc) result = ser.fillna({ 1: Timestamp("2011-01-02 10:00"), 3: Timestamp("2011-01-04 10:00") }) expected = Series([ Timestamp("2011-01-01 10:00"), Timestamp("2011-01-02 10:00"), Timestamp("2011-01-03 10:00"), Timestamp("2011-01-04 10:00"), ]) tm.assert_series_equal(expected, result) tm.assert_series_equal(isna(ser), null_loc) # DatetimeTZBlock idx = DatetimeIndex(["2011-01-01 10:00", NaT, "2011-01-03 10:00", NaT], tz=tz) ser = Series(idx) assert ser.dtype == f"datetime64[ns, {tz}]" tm.assert_series_equal(isna(ser), null_loc) result = ser.fillna(Timestamp("2011-01-02 10:00")) expected = Series([ Timestamp("2011-01-01 10:00", tz=tz), Timestamp("2011-01-02 10:00"), Timestamp("2011-01-03 10:00", tz=tz), Timestamp("2011-01-02 10:00"), ]) tm.assert_series_equal(expected, result) tm.assert_series_equal(isna(ser), null_loc) result = ser.fillna(Timestamp("2011-01-02 10:00", tz=tz)) idx = DatetimeIndex( [ "2011-01-01 10:00", "2011-01-02 10:00", "2011-01-03 10:00", "2011-01-02 10:00", ], tz=tz, ) expected = Series(idx) tm.assert_series_equal(expected, result) tm.assert_series_equal(isna(ser), null_loc) result = ser.fillna( Timestamp("2011-01-02 10:00", tz=tz).to_pydatetime()) idx = DatetimeIndex( [ "2011-01-01 10:00", "2011-01-02 10:00", "2011-01-03 10:00", "2011-01-02 10:00", ], tz=tz, ) expected = Series(idx) tm.assert_series_equal(expected, result) tm.assert_series_equal(isna(ser), null_loc) result = ser.fillna("AAA") expected = Series( [ Timestamp("2011-01-01 10:00", tz=tz), "AAA", Timestamp("2011-01-03 10:00", tz=tz), "AAA", ], dtype=object, ) tm.assert_series_equal(expected, result) tm.assert_series_equal(isna(ser), null_loc) result = ser.fillna({ 1: Timestamp("2011-01-02 10:00", tz=tz), 3: Timestamp("2011-01-04 10:00"), }) expected = Series([ Timestamp("2011-01-01 10:00", tz=tz), Timestamp("2011-01-02 10:00", tz=tz), Timestamp("2011-01-03 10:00", tz=tz), Timestamp("2011-01-04 10:00"), ]) tm.assert_series_equal(expected, result) tm.assert_series_equal(isna(ser), null_loc) result = ser.fillna({ 1: Timestamp("2011-01-02 10:00", tz=tz), 3: Timestamp("2011-01-04 10:00", tz=tz), }) expected = Series([ Timestamp("2011-01-01 10:00", tz=tz), Timestamp("2011-01-02 10:00", tz=tz), Timestamp("2011-01-03 10:00", tz=tz), Timestamp("2011-01-04 10:00", tz=tz), ]) tm.assert_series_equal(expected, result) tm.assert_series_equal(isna(ser), null_loc) # filling with a naive/other zone, coerce to object result = ser.fillna(Timestamp("20130101")) expected = Series([ Timestamp("2011-01-01 10:00", tz=tz), Timestamp("2013-01-01"), Timestamp("2011-01-03 10:00", tz=tz), Timestamp("2013-01-01"), ]) tm.assert_series_equal(expected, result) tm.assert_series_equal(isna(ser), null_loc) result = ser.fillna(Timestamp("20130101", tz="US/Pacific")) expected = Series([ Timestamp("2011-01-01 10:00", tz=tz), Timestamp("2013-01-01", tz="US/Pacific"), Timestamp("2011-01-03 10:00", tz=tz), Timestamp("2013-01-01", tz="US/Pacific"), ]) tm.assert_series_equal(expected, result) tm.assert_series_equal(isna(ser), null_loc)
def test_cdaterange_holidays(self): rng = cdate_range('2013-05-01', periods=3, holidays=['2013-05-01']) xp = DatetimeIndex(['2013-05-02', '2013-05-03', '2013-05-06']) tm.assert_index_equal(xp, rng)
def test_dti_tz_localize_ambiguous_flags(self, tz): # November 6, 2011, fall back, repeat 2 AM hour # Pass in flags to determine right dst transition dr = date_range(datetime(2011, 11, 6, 0), periods=5, freq=pd.offsets.Hour(), tz=tz) times = [ "11/06/2011 00:00", "11/06/2011 01:00", "11/06/2011 01:00", "11/06/2011 02:00", "11/06/2011 03:00", ] # Test tz_localize di = DatetimeIndex(times) is_dst = [1, 1, 0, 0, 0] localized = di.tz_localize(tz, ambiguous=is_dst) expected = dr._with_freq(None) tm.assert_index_equal(expected, localized) tm.assert_index_equal(expected, DatetimeIndex(times, tz=tz, ambiguous=is_dst)) localized = di.tz_localize(tz, ambiguous=np.array(is_dst)) tm.assert_index_equal(dr, localized) localized = di.tz_localize(tz, ambiguous=np.array(is_dst).astype("bool")) tm.assert_index_equal(dr, localized) # Test constructor localized = DatetimeIndex(times, tz=tz, ambiguous=is_dst) tm.assert_index_equal(dr, localized) # Test duplicate times where inferring the dst fails times += times di = DatetimeIndex(times) # When the sizes are incompatible, make sure error is raised msg = "Length of ambiguous bool-array must be the same size as vals" with pytest.raises(Exception, match=msg): di.tz_localize(tz, ambiguous=is_dst) # When sizes are compatible and there are repeats ('infer' won't work) is_dst = np.hstack((is_dst, is_dst)) localized = di.tz_localize(tz, ambiguous=is_dst) dr = dr.append(dr) tm.assert_index_equal(dr, localized) # When there is no dst transition, nothing special happens dr = date_range(datetime(2011, 6, 1, 0), periods=10, freq=pd.offsets.Hour()) is_dst = np.array([1] * 10) localized = dr.tz_localize(tz) localized_is_dst = dr.tz_localize(tz, ambiguous=is_dst) tm.assert_index_equal(localized, localized_is_dst)
def test_cdaterange_weekmask(self): rng = cdate_range('2013-05-01', periods=3, weekmask='Sun Mon Tue Wed Thu') xp = DatetimeIndex(['2013-05-01', '2013-05-02', '2013-05-05']) tm.assert_index_equal(xp, rng)
def test_value_counts_inferred(self): klasses = [Index, Series] for klass in klasses: s_values = ['a', 'b', 'b', 'b', 'b', 'c', 'd', 'd', 'a', 'a'] s = klass(s_values) expected = Series([4, 3, 2, 1], index=['b', 'a', 'd', 'c']) tm.assert_series_equal(s.value_counts(), expected) self.assert_numpy_array_equal(s.unique(), np.unique(s_values)) self.assertEqual(s.nunique(), 4) # don't sort, have to sort after the fact as not sorting is platform-dep hist = s.value_counts(sort=False) hist.sort() expected = Series([3, 1, 4, 2], index=list('acbd')) expected.sort() tm.assert_series_equal(hist, expected) # sort ascending hist = s.value_counts(ascending=True) expected = Series([1, 2, 3, 4], index=list('cdab')) tm.assert_series_equal(hist, expected) # relative histogram. hist = s.value_counts(normalize=True) expected = Series([.4, .3, .2, .1], index=['b', 'a', 'd', 'c']) tm.assert_series_equal(hist, expected) # bins self.assertRaises(TypeError, lambda bins: s.value_counts(bins=bins), 1) s1 = Series([1, 1, 2, 3]) res1 = s1.value_counts(bins=1) exp1 = Series({0.998: 4}) tm.assert_series_equal(res1, exp1) res1n = s1.value_counts(bins=1, normalize=True) exp1n = Series({0.998: 1.0}) tm.assert_series_equal(res1n, exp1n) self.assert_numpy_array_equal(s1.unique(), np.array([1, 2, 3])) self.assertEqual(s1.nunique(), 3) res4 = s1.value_counts(bins=4) exp4 = Series({ 0.998: 2, 1.5: 1, 2.0: 0, 2.5: 1 }, index=[0.998, 2.5, 1.5, 2.0]) tm.assert_series_equal(res4, exp4) res4n = s1.value_counts(bins=4, normalize=True) exp4n = Series({ 0.998: 0.5, 1.5: 0.25, 2.0: 0.0, 2.5: 0.25 }, index=[0.998, 2.5, 1.5, 2.0]) tm.assert_series_equal(res4n, exp4n) # handle NA's properly s_values = [ 'a', 'b', 'b', 'b', np.nan, np.nan, 'd', 'd', 'a', 'a', 'b' ] s = klass(s_values) expected = Series([4, 3, 2], index=['b', 'a', 'd']) tm.assert_series_equal(s.value_counts(), expected) self.assert_numpy_array_equal( s.unique(), np.array(['a', 'b', np.nan, 'd'], dtype='O')) self.assertEqual(s.nunique(), 3) s = klass({}) expected = Series([], dtype=np.int64) tm.assert_series_equal(s.value_counts(), expected) self.assert_numpy_array_equal(s.unique(), np.array([])) self.assertEqual(s.nunique(), 0) # GH 3002, datetime64[ns] txt = "\n".join([ 'xxyyzz20100101PIE', 'xxyyzz20100101GUM', 'xxyyzz20100101EGG', 'xxyyww20090101EGG', 'foofoo20080909PIE', 'foofoo20080909GUM' ]) f = StringIO(txt) df = pd.read_fwf(f, widths=[6, 8, 3], names=["person_id", "dt", "food"], parse_dates=["dt"]) s = klass(df['dt'].copy()) idx = pd.to_datetime([ '2010-01-01 00:00:00Z', '2008-09-09 00:00:00Z', '2009-01-01 00:00:00X' ]) expected_s = Series([3, 2, 1], index=idx) tm.assert_series_equal(s.value_counts(), expected_s) expected = np.array([ '2010-01-01 00:00:00Z', '2009-01-01 00:00:00Z', '2008-09-09 00:00:00Z' ], dtype='datetime64[ns]') if isinstance(s, DatetimeIndex): expected = DatetimeIndex(expected) self.assertTrue(s.unique().equals(expected)) else: self.assert_numpy_array_equal(s.unique(), expected) self.assertEqual(s.nunique(), 3) # with NaT s = df['dt'].copy() s = klass([v for v in s.values] + [pd.NaT]) result = s.value_counts() self.assertEqual(result.index.dtype, 'datetime64[ns]') tm.assert_series_equal(result, expected_s) result = s.value_counts(dropna=False) expected_s[pd.NaT] = 1 tm.assert_series_equal(result, expected_s) unique = s.unique() self.assertEqual(unique.dtype, 'datetime64[ns]') # numpy_array_equal cannot compare pd.NaT self.assert_numpy_array_equal(unique[:3], expected) self.assertTrue(unique[3] is pd.NaT or unique[3].astype('int64') == pd.tslib.iNaT) self.assertEqual(s.nunique(), 3) self.assertEqual(s.nunique(dropna=False), 4) # timedelta64[ns] td = df.dt - df.dt + timedelta(1) td = klass(td) result = td.value_counts() expected_s = Series([6], index=[Timedelta('1day')]) tm.assert_series_equal(result, expected_s) expected = TimedeltaIndex(['1 days']) if isinstance(td, TimedeltaIndex): self.assertTrue(td.unique().equals(expected)) else: self.assert_numpy_array_equal(td.unique(), expected.values) td2 = timedelta(1) + (df.dt - df.dt) td2 = klass(td2) result2 = td2.value_counts() tm.assert_series_equal(result2, expected_s)
def test_tz_convert_nat(self): # GH#5546 dates = [pd.NaT] idx = DatetimeIndex(dates) idx = idx.tz_localize("US/Pacific") tm.assert_index_equal(idx, DatetimeIndex(dates, tz="US/Pacific")) idx = idx.tz_convert("US/Eastern") tm.assert_index_equal(idx, DatetimeIndex(dates, tz="US/Eastern")) idx = idx.tz_convert("UTC") tm.assert_index_equal(idx, DatetimeIndex(dates, tz="UTC")) dates = ["2010-12-01 00:00", "2010-12-02 00:00", pd.NaT] idx = DatetimeIndex(dates) idx = idx.tz_localize("US/Pacific") tm.assert_index_equal(idx, DatetimeIndex(dates, tz="US/Pacific")) idx = idx.tz_convert("US/Eastern") expected = ["2010-12-01 03:00", "2010-12-02 03:00", pd.NaT] tm.assert_index_equal(idx, DatetimeIndex(expected, tz="US/Eastern")) idx = idx + pd.offsets.Hour(5) expected = ["2010-12-01 08:00", "2010-12-02 08:00", pd.NaT] tm.assert_index_equal(idx, DatetimeIndex(expected, tz="US/Eastern")) idx = idx.tz_convert("US/Pacific") expected = ["2010-12-01 05:00", "2010-12-02 05:00", pd.NaT] tm.assert_index_equal(idx, DatetimeIndex(expected, tz="US/Pacific")) idx = idx + np.timedelta64(3, "h") expected = ["2010-12-01 08:00", "2010-12-02 08:00", pd.NaT] tm.assert_index_equal(idx, DatetimeIndex(expected, tz="US/Pacific")) idx = idx.tz_convert("US/Eastern") expected = ["2010-12-01 11:00", "2010-12-02 11:00", pd.NaT] tm.assert_index_equal(idx, DatetimeIndex(expected, tz="US/Eastern"))
def test_dti_tz_convert_hour_overflow_dst(self): # Regression test for: # https://github.com/pandas-dev/pandas/issues/13306 # sorted case US/Eastern -> UTC ts = [ "2008-05-12 09:50:00", "2008-12-12 09:50:35", "2009-05-12 09:50:32" ] tt = DatetimeIndex(ts).tz_localize("US/Eastern") ut = tt.tz_convert("UTC") expected = Index([13, 14, 13]) tm.assert_index_equal(ut.hour, expected) # sorted case UTC -> US/Eastern ts = [ "2008-05-12 13:50:00", "2008-12-12 14:50:35", "2009-05-12 13:50:32" ] tt = DatetimeIndex(ts).tz_localize("UTC") ut = tt.tz_convert("US/Eastern") expected = Index([9, 9, 9]) tm.assert_index_equal(ut.hour, expected) # unsorted case US/Eastern -> UTC ts = [ "2008-05-12 09:50:00", "2008-12-12 09:50:35", "2008-05-12 09:50:32" ] tt = DatetimeIndex(ts).tz_localize("US/Eastern") ut = tt.tz_convert("UTC") expected = Index([13, 14, 13]) tm.assert_index_equal(ut.hour, expected) # unsorted case UTC -> US/Eastern ts = [ "2008-05-12 13:50:00", "2008-12-12 14:50:35", "2008-05-12 13:50:32" ] tt = DatetimeIndex(ts).tz_localize("UTC") ut = tt.tz_convert("US/Eastern") expected = Index([9, 9, 9]) tm.assert_index_equal(ut.hour, expected)
def test_intersection(self, tz, sort): # GH 4690 (with tz) base = date_range("6/1/2000", "6/30/2000", freq="D", name="idx") # if target has the same name, it is preserved rng2 = date_range("5/15/2000", "6/20/2000", freq="D", name="idx") expected2 = date_range("6/1/2000", "6/20/2000", freq="D", name="idx") # if target name is different, it will be reset rng3 = date_range("5/15/2000", "6/20/2000", freq="D", name="other") expected3 = date_range("6/1/2000", "6/20/2000", freq="D", name=None) rng4 = date_range("7/1/2000", "7/31/2000", freq="D", name="idx") expected4 = DatetimeIndex([], freq="D", name="idx") for (rng, expected) in [ (rng2, expected2), (rng3, expected3), (rng4, expected4), ]: result = base.intersection(rng) tm.assert_index_equal(result, expected) assert result.freq == expected.freq # non-monotonic base = DatetimeIndex( ["2011-01-05", "2011-01-04", "2011-01-02", "2011-01-03"], tz=tz, name="idx") rng2 = DatetimeIndex( ["2011-01-04", "2011-01-02", "2011-02-02", "2011-02-03"], tz=tz, name="idx") expected2 = DatetimeIndex(["2011-01-04", "2011-01-02"], tz=tz, name="idx") rng3 = DatetimeIndex( ["2011-01-04", "2011-01-02", "2011-02-02", "2011-02-03"], tz=tz, name="other", ) expected3 = DatetimeIndex(["2011-01-04", "2011-01-02"], tz=tz, name=None) # GH 7880 rng4 = date_range("7/1/2000", "7/31/2000", freq="D", tz=tz, name="idx") expected4 = DatetimeIndex([], tz=tz, name="idx") assert expected4.freq is None for (rng, expected) in [ (rng2, expected2), (rng3, expected3), (rng4, expected4), ]: result = base.intersection(rng, sort=sort) if sort is None: expected = expected.sort_values() tm.assert_index_equal(result, expected) assert result.freq == expected.freq
def test_construction_dti_with_mixed_timezones(self): # GH 11488 (not changed, added explicit tests) # no tz results in DatetimeIndex result = DatetimeIndex( [Timestamp('2011-01-01'), Timestamp('2011-01-02')], name='idx') exp = DatetimeIndex([Timestamp('2011-01-01'), Timestamp('2011-01-02')], name='idx') self.assert_index_equal(result, exp, exact=True) self.assertTrue(isinstance(result, DatetimeIndex)) # same tz results in DatetimeIndex result = DatetimeIndex([ Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'), Timestamp('2011-01-02 10:00', tz='Asia/Tokyo') ], name='idx') exp = DatetimeIndex( [Timestamp('2011-01-01 10:00'), Timestamp('2011-01-02 10:00')], tz='Asia/Tokyo', name='idx') self.assert_index_equal(result, exp, exact=True) self.assertTrue(isinstance(result, DatetimeIndex)) # same tz results in DatetimeIndex (DST) result = DatetimeIndex([ Timestamp('2011-01-01 10:00', tz='US/Eastern'), Timestamp('2011-08-01 10:00', tz='US/Eastern') ], name='idx') exp = DatetimeIndex( [Timestamp('2011-01-01 10:00'), Timestamp('2011-08-01 10:00')], tz='US/Eastern', name='idx') self.assert_index_equal(result, exp, exact=True) self.assertTrue(isinstance(result, DatetimeIndex)) # different tz coerces tz-naive to tz-awareIndex(dtype=object) result = DatetimeIndex([ Timestamp('2011-01-01 10:00'), Timestamp('2011-01-02 10:00', tz='US/Eastern') ], name='idx') exp = DatetimeIndex( [Timestamp('2011-01-01 05:00'), Timestamp('2011-01-02 10:00')], tz='US/Eastern', name='idx') self.assert_index_equal(result, exp, exact=True) self.assertTrue(isinstance(result, DatetimeIndex)) # tz mismatch affecting to tz-aware raises TypeError/ValueError with tm.assertRaises(ValueError): DatetimeIndex([ Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'), Timestamp('2011-01-02 10:00', tz='US/Eastern') ], name='idx') with tm.assertRaisesRegexp(TypeError, 'data is already tz-aware'): DatetimeIndex([ Timestamp('2011-01-01 10:00'), Timestamp('2011-01-02 10:00', tz='US/Eastern') ], tz='Asia/Tokyo', name='idx') with tm.assertRaises(ValueError): DatetimeIndex([ Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'), Timestamp('2011-01-02 10:00', tz='US/Eastern') ], tz='US/Eastern', name='idx') with tm.assertRaisesRegexp(TypeError, 'data is already tz-aware'): # passing tz should results in DatetimeIndex, then mismatch raises # TypeError Index([ pd.NaT, Timestamp('2011-01-01 10:00'), pd.NaT, Timestamp('2011-01-02 10:00', tz='US/Eastern') ], tz='Asia/Tokyo', name='idx')
def test_nat(self): self.assertIs(DatetimeIndex([np.nan])[0], pd.NaT)
def test_dti_convert_datetime_list(self, tzstr): dr = date_range("2012-06-02", periods=10, tz=tzstr, name="foo") dr2 = DatetimeIndex(list(dr), name="foo", freq="D") tm.assert_index_equal(dr, dr2)
class TestDatetimeIndexOps: def test_ops_properties_basic(self, datetime_series): # sanity check that the behavior didn't change # GH#7206 for op in ["year", "day", "second", "weekday"]: msg = f"'Series' object has no attribute '{op}'" with pytest.raises(AttributeError, match=msg): getattr(datetime_series, op) # attribute access should still work! s = Series(dict(year=2000, month=1, day=10)) assert s.year == 2000 assert s.month == 1 assert s.day == 10 msg = "'Series' object has no attribute 'weekday'" with pytest.raises(AttributeError, match=msg): s.weekday def test_repeat_range(self, tz_naive_fixture): tz = tz_naive_fixture rng = date_range("1/1/2000", "1/1/2001") result = rng.repeat(5) assert result.freq is None assert len(result) == 5 * len(rng) index = pd.date_range("2001-01-01", periods=2, freq="D", tz=tz) exp = DatetimeIndex( ["2001-01-01", "2001-01-01", "2001-01-02", "2001-01-02"], tz=tz ) for res in [index.repeat(2), np.repeat(index, 2)]: tm.assert_index_equal(res, exp) assert res.freq is None index = pd.date_range("2001-01-01", periods=2, freq="2D", tz=tz) exp = DatetimeIndex( ["2001-01-01", "2001-01-01", "2001-01-03", "2001-01-03"], tz=tz ) for res in [index.repeat(2), np.repeat(index, 2)]: tm.assert_index_equal(res, exp) assert res.freq is None index = DatetimeIndex(["2001-01-01", "NaT", "2003-01-01"], tz=tz) exp = DatetimeIndex( [ "2001-01-01", "2001-01-01", "2001-01-01", "NaT", "NaT", "NaT", "2003-01-01", "2003-01-01", "2003-01-01", ], tz=tz, ) for res in [index.repeat(3), np.repeat(index, 3)]: tm.assert_index_equal(res, exp) assert res.freq is None def test_repeat(self, tz_naive_fixture): tz = tz_naive_fixture reps = 2 msg = "the 'axis' parameter is not supported" rng = pd.date_range(start="2016-01-01", periods=2, freq="30Min", tz=tz) expected_rng = DatetimeIndex( [ Timestamp("2016-01-01 00:00:00", tz=tz, freq="30T"), Timestamp("2016-01-01 00:00:00", tz=tz, freq="30T"), Timestamp("2016-01-01 00:30:00", tz=tz, freq="30T"), Timestamp("2016-01-01 00:30:00", tz=tz, freq="30T"), ] ) res = rng.repeat(reps) tm.assert_index_equal(res, expected_rng) assert res.freq is None tm.assert_index_equal(np.repeat(rng, reps), expected_rng) with pytest.raises(ValueError, match=msg): np.repeat(rng, reps, axis=1) def test_resolution(self, tz_naive_fixture): tz = tz_naive_fixture for freq, expected in zip( ["A", "Q", "M", "D", "H", "T", "S", "L", "U"], [ "day", "day", "day", "day", "hour", "minute", "second", "millisecond", "microsecond", ], ): idx = pd.date_range(start="2013-04-01", periods=30, freq=freq, tz=tz) assert idx.resolution == expected def test_value_counts_unique(self, tz_naive_fixture): tz = tz_naive_fixture # GH 7735 idx = pd.date_range("2011-01-01 09:00", freq="H", periods=10) # create repeated values, 'n'th element is repeated by n+1 times idx = DatetimeIndex(np.repeat(idx.values, range(1, len(idx) + 1)), tz=tz) exp_idx = pd.date_range("2011-01-01 18:00", freq="-1H", periods=10, tz=tz) expected = Series(range(10, 0, -1), index=exp_idx, dtype="int64") expected.index = expected.index._with_freq(None) for obj in [idx, Series(idx)]: tm.assert_series_equal(obj.value_counts(), expected) expected = pd.date_range("2011-01-01 09:00", freq="H", periods=10, tz=tz) expected = expected._with_freq(None) tm.assert_index_equal(idx.unique(), expected) idx = DatetimeIndex( [ "2013-01-01 09:00", "2013-01-01 09:00", "2013-01-01 09:00", "2013-01-01 08:00", "2013-01-01 08:00", pd.NaT, ], tz=tz, ) exp_idx = DatetimeIndex(["2013-01-01 09:00", "2013-01-01 08:00"], tz=tz) expected = Series([3, 2], index=exp_idx) for obj in [idx, Series(idx)]: tm.assert_series_equal(obj.value_counts(), expected) exp_idx = DatetimeIndex(["2013-01-01 09:00", "2013-01-01 08:00", pd.NaT], tz=tz) expected = Series([3, 2, 1], index=exp_idx) for obj in [idx, Series(idx)]: tm.assert_series_equal(obj.value_counts(dropna=False), expected) tm.assert_index_equal(idx.unique(), exp_idx) @pytest.mark.parametrize( "idx", [ DatetimeIndex( ["2011-01-01", "2011-01-02", "2011-01-03"], freq="D", name="idx" ), DatetimeIndex( ["2011-01-01 09:00", "2011-01-01 10:00", "2011-01-01 11:00"], freq="H", name="tzidx", tz="Asia/Tokyo", ), ], ) def test_order_with_freq(self, idx): ordered = idx.sort_values() tm.assert_index_equal(ordered, idx) assert ordered.freq == idx.freq ordered = idx.sort_values(ascending=False) expected = idx[::-1] tm.assert_index_equal(ordered, expected) assert ordered.freq == expected.freq assert ordered.freq.n == -1 ordered, indexer = idx.sort_values(return_indexer=True) tm.assert_index_equal(ordered, idx) tm.assert_numpy_array_equal(indexer, np.array([0, 1, 2]), check_dtype=False) assert ordered.freq == idx.freq ordered, indexer = idx.sort_values(return_indexer=True, ascending=False) expected = idx[::-1] tm.assert_index_equal(ordered, expected) tm.assert_numpy_array_equal(indexer, np.array([2, 1, 0]), check_dtype=False) assert ordered.freq == expected.freq assert ordered.freq.n == -1 @pytest.mark.parametrize( "index_dates,expected_dates", [ ( ["2011-01-01", "2011-01-03", "2011-01-05", "2011-01-02", "2011-01-01"], ["2011-01-01", "2011-01-01", "2011-01-02", "2011-01-03", "2011-01-05"], ), ( ["2011-01-01", "2011-01-03", "2011-01-05", "2011-01-02", "2011-01-01"], ["2011-01-01", "2011-01-01", "2011-01-02", "2011-01-03", "2011-01-05"], ), ( [pd.NaT, "2011-01-03", "2011-01-05", "2011-01-02", pd.NaT], [pd.NaT, pd.NaT, "2011-01-02", "2011-01-03", "2011-01-05"], ), ], ) def test_order_without_freq(self, index_dates, expected_dates, tz_naive_fixture): tz = tz_naive_fixture # without freq index = DatetimeIndex(index_dates, tz=tz, name="idx") expected = DatetimeIndex(expected_dates, tz=tz, name="idx") ordered = index.sort_values() tm.assert_index_equal(ordered, expected) assert ordered.freq is None ordered = index.sort_values(ascending=False) tm.assert_index_equal(ordered, expected[::-1]) assert ordered.freq is None ordered, indexer = index.sort_values(return_indexer=True) tm.assert_index_equal(ordered, expected) exp = np.array([0, 4, 3, 1, 2]) tm.assert_numpy_array_equal(indexer, exp, check_dtype=False) assert ordered.freq is None ordered, indexer = index.sort_values(return_indexer=True, ascending=False) tm.assert_index_equal(ordered, expected[::-1]) exp = np.array([2, 1, 3, 4, 0]) tm.assert_numpy_array_equal(indexer, exp, check_dtype=False) assert ordered.freq is None def test_drop_duplicates_metadata(self, freq_sample): # GH 10115 idx = pd.date_range("2011-01-01", freq=freq_sample, periods=10, name="idx") result = idx.drop_duplicates() tm.assert_index_equal(idx, result) assert idx.freq == result.freq idx_dup = idx.append(idx) assert idx_dup.freq is None # freq is reset result = idx_dup.drop_duplicates() expected = idx._with_freq(None) tm.assert_index_equal(result, expected) assert result.freq is None @pytest.mark.parametrize( "keep, expected, index", [ ("first", np.concatenate(([False] * 10, [True] * 5)), np.arange(0, 10)), ("last", np.concatenate(([True] * 5, [False] * 10)), np.arange(5, 15)), ( False, np.concatenate(([True] * 5, [False] * 5, [True] * 5)), np.arange(5, 10), ), ], ) def test_drop_duplicates(self, freq_sample, keep, expected, index): # to check Index/Series compat idx = pd.date_range("2011-01-01", freq=freq_sample, periods=10, name="idx") idx = idx.append(idx[:5]) tm.assert_numpy_array_equal(idx.duplicated(keep=keep), expected) expected = idx[~expected] result = idx.drop_duplicates(keep=keep) tm.assert_index_equal(result, expected) result = Series(idx).drop_duplicates(keep=keep) tm.assert_series_equal(result, Series(expected, index=index)) def test_infer_freq(self, freq_sample): # GH 11018 idx = pd.date_range("2011-01-01 09:00:00", freq=freq_sample, periods=10) result = DatetimeIndex(idx.asi8, freq="infer") tm.assert_index_equal(idx, result) assert result.freq == freq_sample def test_nat(self, tz_naive_fixture): tz = tz_naive_fixture assert DatetimeIndex._na_value is pd.NaT assert DatetimeIndex([])._na_value is pd.NaT idx = DatetimeIndex(["2011-01-01", "2011-01-02"], tz=tz) assert idx._can_hold_na tm.assert_numpy_array_equal(idx._isnan, np.array([False, False])) assert idx.hasnans is False tm.assert_numpy_array_equal(idx._nan_idxs, np.array([], dtype=np.intp)) idx = DatetimeIndex(["2011-01-01", "NaT"], tz=tz) assert idx._can_hold_na tm.assert_numpy_array_equal(idx._isnan, np.array([False, True])) assert idx.hasnans is True tm.assert_numpy_array_equal(idx._nan_idxs, np.array([1], dtype=np.intp)) def test_equals(self): # GH 13107 idx = DatetimeIndex(["2011-01-01", "2011-01-02", "NaT"]) assert idx.equals(idx) assert idx.equals(idx.copy()) assert idx.equals(idx.astype(object)) assert idx.astype(object).equals(idx) assert idx.astype(object).equals(idx.astype(object)) assert not idx.equals(list(idx)) assert not idx.equals(Series(idx)) idx2 = DatetimeIndex(["2011-01-01", "2011-01-02", "NaT"], tz="US/Pacific") assert not idx.equals(idx2) assert not idx.equals(idx2.copy()) assert not idx.equals(idx2.astype(object)) assert not idx.astype(object).equals(idx2) assert not idx.equals(list(idx2)) assert not idx.equals(Series(idx2)) # same internal, different tz idx3 = DatetimeIndex(idx.asi8, tz="US/Pacific") tm.assert_numpy_array_equal(idx.asi8, idx3.asi8) assert not idx.equals(idx3) assert not idx.equals(idx3.copy()) assert not idx.equals(idx3.astype(object)) assert not idx.astype(object).equals(idx3) assert not idx.equals(list(idx3)) assert not idx.equals(Series(idx3)) # check that we do not raise when comparing with OutOfBounds objects oob = Index([datetime(2500, 1, 1)] * 3, dtype=object) assert not idx.equals(oob) assert not idx2.equals(oob) assert not idx3.equals(oob) # check that we do not raise when comparing with OutOfBounds dt64 oob2 = oob.map(np.datetime64) assert not idx.equals(oob2) assert not idx2.equals(oob2) assert not idx3.equals(oob2) @pytest.mark.parametrize("values", [["20180101", "20180103", "20180105"], []]) @pytest.mark.parametrize("freq", ["2D", Day(2), "2B", BDay(2), "48H", Hour(48)]) @pytest.mark.parametrize("tz", [None, "US/Eastern"]) def test_freq_setter(self, values, freq, tz): # GH 20678 idx = DatetimeIndex(values, tz=tz) # can set to an offset, converting from string if necessary idx._data.freq = freq assert idx.freq == freq assert isinstance(idx.freq, DateOffset) # can reset to None idx._data.freq = None assert idx.freq is None def test_freq_setter_errors(self): # GH 20678 idx = DatetimeIndex(["20180101", "20180103", "20180105"]) # setting with an incompatible freq msg = ( "Inferred frequency 2D from passed values does not conform to " "passed frequency 5D" ) with pytest.raises(ValueError, match=msg): idx._data.freq = "5D" # setting with non-freq string with pytest.raises(ValueError, match="Invalid frequency"): idx._data.freq = "foo" def test_freq_view_safe(self): # Setting the freq for one DatetimeIndex shouldn't alter the freq # for another that views the same data dti = pd.date_range("2016-01-01", periods=5) dta = dti._data dti2 = DatetimeIndex(dta)._with_freq(None) assert dti2.freq is None # Original was not altered assert dti.freq == "D" assert dta.freq == "D"
def test_order(self): # with freq idx1 = DatetimeIndex(['2011-01-01', '2011-01-02', '2011-01-03'], freq='D', name='idx') idx2 = DatetimeIndex(['2011-01-01 09:00', '2011-01-01 10:00', '2011-01-01 11:00'], freq='H', tz='Asia/Tokyo', name='tzidx') for idx in [idx1, idx2]: ordered = idx.sort_values() self.assert_index_equal(ordered, idx) self.assertEqual(ordered.freq, idx.freq) ordered = idx.sort_values(ascending=False) expected = idx[::-1] self.assert_index_equal(ordered, expected) self.assertEqual(ordered.freq, expected.freq) self.assertEqual(ordered.freq.n, -1) ordered, indexer = idx.sort_values(return_indexer=True) self.assert_index_equal(ordered, idx) self.assert_numpy_array_equal(indexer, np.array([0, 1, 2]), check_dtype=False) self.assertEqual(ordered.freq, idx.freq) ordered, indexer = idx.sort_values(return_indexer=True, ascending=False) expected = idx[::-1] self.assert_index_equal(ordered, expected) self.assert_numpy_array_equal(indexer, np.array([2, 1, 0]), check_dtype=False) self.assertEqual(ordered.freq, expected.freq) self.assertEqual(ordered.freq.n, -1) # without freq for tz in self.tz: idx1 = DatetimeIndex(['2011-01-01', '2011-01-03', '2011-01-05', '2011-01-02', '2011-01-01'], tz=tz, name='idx1') exp1 = DatetimeIndex(['2011-01-01', '2011-01-01', '2011-01-02', '2011-01-03', '2011-01-05'], tz=tz, name='idx1') idx2 = DatetimeIndex(['2011-01-01', '2011-01-03', '2011-01-05', '2011-01-02', '2011-01-01'], tz=tz, name='idx2') exp2 = DatetimeIndex(['2011-01-01', '2011-01-01', '2011-01-02', '2011-01-03', '2011-01-05'], tz=tz, name='idx2') idx3 = DatetimeIndex([pd.NaT, '2011-01-03', '2011-01-05', '2011-01-02', pd.NaT], tz=tz, name='idx3') exp3 = DatetimeIndex([pd.NaT, pd.NaT, '2011-01-02', '2011-01-03', '2011-01-05'], tz=tz, name='idx3') for idx, expected in [(idx1, exp1), (idx2, exp2), (idx3, exp3)]: ordered = idx.sort_values() self.assert_index_equal(ordered, expected) self.assertIsNone(ordered.freq) ordered = idx.sort_values(ascending=False) self.assert_index_equal(ordered, expected[::-1]) self.assertIsNone(ordered.freq) ordered, indexer = idx.sort_values(return_indexer=True) self.assert_index_equal(ordered, expected) exp = np.array([0, 4, 3, 1, 2]) self.assert_numpy_array_equal(indexer, exp, check_dtype=False) self.assertIsNone(ordered.freq) ordered, indexer = idx.sort_values(return_indexer=True, ascending=False) self.assert_index_equal(ordered, expected[::-1]) exp = np.array([2, 1, 3, 4, 0]) self.assert_numpy_array_equal(indexer, exp, check_dtype=False) self.assertIsNone(ordered.freq)