class TimestampOps(object): goal_time = 0.2 def setup(self): self.ts = Timestamp('2017-08-25 08:16:14') self.ts_tz = Timestamp('2017-08-25 08:16:14', tz='US/Eastern') dt = datetime.datetime(2016, 3, 27, 1) self.tzinfo = pytz.timezone('CET').localize(dt, is_dst=False).tzinfo self.ts2 = Timestamp(dt) def time_replace_tz(self): self.ts.replace(tzinfo=pytz.timezone('US/Eastern')) def time_replace_across_dst(self): self.ts2.replace(tzinfo=self.tzinfo) def time_replace_None(self): self.ts_tz.replace(tzinfo=None) def time_to_pydatetime(self): self.ts.to_pydatetime() def time_to_pydatetime_tz(self): self.ts_tz.to_pydatetime()
def test_timestamp_tz_localize_nonexistent_raise(self, tz): # GH 8917 ts = Timestamp('2015-03-29 02:20:00') with pytest.raises(pytz.NonExistentTimeError): ts.tz_localize(tz, nonexistent='raise') with pytest.raises(ValueError): ts.tz_localize(tz, nonexistent='foo')
def test_datetime_name_accessors(self, time_locale): # Test Monday -> Sunday and January -> December, in that sequence if time_locale is None: # If the time_locale is None, day-name and month_name should # return the english attributes expected_days = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday'] expected_months = ['January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December'] else: with tm.set_locale(time_locale, locale.LC_TIME): expected_days = calendar.day_name[:] expected_months = calendar.month_name[1:] # GH#11128 dti = pd.date_range(freq='D', start=datetime(1998, 1, 1), periods=365) english_days = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday'] for day, name, eng_name in zip(range(4, 11), expected_days, english_days): name = name.capitalize() assert dti.weekday_name[day] == eng_name assert dti.day_name(locale=time_locale)[day] == name ts = Timestamp(datetime(2016, 4, day)) with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): assert ts.weekday_name == eng_name assert ts.day_name(locale=time_locale) == name dti = dti.append(DatetimeIndex([pd.NaT])) assert np.isnan(dti.day_name(locale=time_locale)[-1]) ts = Timestamp(pd.NaT) assert np.isnan(ts.day_name(locale=time_locale)) # GH#12805 dti = pd.date_range(freq='M', start='2012', end='2013') result = dti.month_name(locale=time_locale) expected = Index([month.capitalize() for month in expected_months]) # work around different normalization schemes # https://github.com/pandas-dev/pandas/issues/22342 if not compat.PY2: result = result.str.normalize("NFD") expected = expected.str.normalize("NFD") tm.assert_index_equal(result, expected) for date, expected in zip(dti, expected_months): result = date.month_name(locale=time_locale) expected = expected.capitalize() if not compat.PY2: result = unicodedata.normalize("NFD", result) expected = unicodedata.normalize("NFD", result) assert result == expected dti = dti.append(DatetimeIndex([pd.NaT])) assert np.isnan(dti.month_name(locale=time_locale)[-1])
class TimestampProperties: _tzs = [None, pytz.timezone('Europe/Amsterdam'), pytz.UTC, dateutil.tz.tzutc()] _freqs = [None, 'B'] params = [_tzs, _freqs] param_names = ['tz', 'freq'] def setup(self, tz, freq): self.ts = Timestamp('2017-08-25 08:16:14', tzinfo=tz, freq=freq) def time_tz(self, tz, freq): self.ts.tz def time_dayofweek(self, tz, freq): self.ts.dayofweek def time_weekday_name(self, tz, freq): self.ts.day_name def time_dayofyear(self, tz, freq): self.ts.dayofyear def time_week(self, tz, freq): self.ts.week def time_quarter(self, tz, freq): self.ts.quarter def time_days_in_month(self, tz, freq): self.ts.days_in_month def time_freqstr(self, tz, freq): self.ts.freqstr def time_is_month_start(self, tz, freq): self.ts.is_month_start def time_is_month_end(self, tz, freq): self.ts.is_month_end def time_is_quarter_start(self, tz, freq): self.ts.is_quarter_start def time_is_quarter_end(self, tz, freq): self.ts.is_quarter_end def time_is_year_start(self, tz, freq): self.ts.is_year_start def time_is_year_end(self, tz, freq): self.ts.is_year_end def time_is_leap_year(self, tz, freq): self.ts.is_leap_year def time_microsecond(self, tz, freq): self.ts.microsecond def time_month_name(self, tz, freq): self.ts.month_name()
def test_timestamp_tz_localize(self, tz): stamp = Timestamp('3/11/2012 04:00') result = stamp.tz_localize(tz) expected = Timestamp('3/11/2012 04:00', tz=tz) assert result.hour == expected.hour assert result == expected
def test_replace_preserves_nanos(self, tz_aware_fixture): tz = tz_aware_fixture # GH#14621, GH#7825 ts = Timestamp('2016-01-01 09:00:00.000000123', tz=tz) result = ts.replace(hour=0) expected = Timestamp('2016-01-01 00:00:00.000000123', tz=tz) assert result == expected
def test_timestamp_to_datetime_tzoffset(self): #tzoffset from dateutil.tz import tzoffset tzinfo = tzoffset(None, 7200) expected = Timestamp('3/11/2012 04:00', tz=tzinfo) result = Timestamp(expected.to_datetime()) self.assertEquals(expected, result)
def setup(self): self.ts = Timestamp('2017-08-25 08:16:14') self.ts_tz = Timestamp('2017-08-25 08:16:14', tz='US/Eastern') dt = datetime.datetime(2016, 3, 27, 1) self.tzinfo = pytz.timezone('CET').localize(dt, is_dst=False).tzinfo self.ts2 = Timestamp(dt)
def test_tz_localize_errors_invalid_arg(self): # GH 22644 tz = 'Europe/Warsaw' ts = Timestamp('2015-03-29 02:00:00') with pytest.raises(ValueError): with tm.assert_produces_warning(FutureWarning): ts.tz_localize(tz, errors='foo')
def test_constructor_strptime(self): # GH25016 # Test support for Timestamp.strptime fmt = '%Y%m%d-%H%M%S-%f%z' ts = '20190129-235348-000001+0000' with pytest.raises(NotImplementedError): Timestamp.strptime(ts, fmt)
def test_replace_aware(self, tz): # GH#14621, GH#7825 # replacing datetime components with and w/o presence of a timezone ts = Timestamp('2016-01-01 09:00:00', tz=tz) result = ts.replace(hour=0) expected = Timestamp('2016-01-01 00:00:00', tz=tz) assert result == expected
def test_names(self, data, time_locale): # GH 17354 # Test .weekday_name, .day_name(), .month_name with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): assert data.weekday_name == 'Monday' if time_locale is None: expected_day = 'Monday' expected_month = 'August' else: with tm.set_locale(time_locale, locale.LC_TIME): expected_day = calendar.day_name[0].capitalize() expected_month = calendar.month_name[8].capitalize() result_day = data.day_name(time_locale) result_month = data.month_name(time_locale) # Work around https://github.com/pandas-dev/pandas/issues/22342 # different normalizations if not PY2: expected_day = unicodedata.normalize("NFD", expected_day) expected_month = unicodedata.normalize("NFD", expected_month) result_day = unicodedata.normalize("NFD", result_day,) result_month = unicodedata.normalize("NFD", result_month) assert result_day == expected_day assert result_month == expected_month # Test NaT nan_ts = Timestamp(NaT) assert np.isnan(nan_ts.day_name(time_locale)) assert np.isnan(nan_ts.month_name(time_locale))
def test_to_period_tz_warning(self): # GH#21333 make sure a warning is issued when timezone # info is lost ts = Timestamp('2009-04-15 16:17:18', tz='US/Eastern') with tm.assert_produces_warning(UserWarning): # warning that timezone info will be lost ts.to_period('D')
def test_timestamp_tz_localize_explicit(self): stamp = Timestamp("3/11/2012 04:00") result = stamp.tz_localize(self.tz("US/Eastern")) expected = Timestamp("3/11/2012 04:00", tz=self.tz("US/Eastern")) self.assertEqual(result.hour, expected.hour) self.assertEqual(result, expected)
def test_cant_compare_tz_naive_w_aware(self, utc_fixture): # see GH#1404 a = Timestamp('3/12/2012') b = Timestamp('3/12/2012', tz=utc_fixture) with pytest.raises(TypeError): a == b with pytest.raises(TypeError): a != b with pytest.raises(TypeError): a < b with pytest.raises(TypeError): a <= b with pytest.raises(TypeError): a > b with pytest.raises(TypeError): a >= b with pytest.raises(TypeError): b == a with pytest.raises(TypeError): b != a with pytest.raises(TypeError): b < a with pytest.raises(TypeError): b <= a with pytest.raises(TypeError): b > a with pytest.raises(TypeError): b >= a assert not a == b.to_pydatetime() assert not a.to_pydatetime() == b
def test_timestamp_tz_localize(self): stamp = Timestamp('3/11/2012 04:00') result = stamp.tz_localize('US/Eastern') expected = Timestamp('3/11/2012 04:00', tz='US/Eastern') self.assertEquals(result.hour, expected.hour) self.assertEquals(result, expected)
class TimestampAcrossDst: def setup(self): dt = datetime.datetime(2016, 3, 27, 1) self.tzinfo = pytz.timezone('CET').localize(dt, is_dst=False).tzinfo self.ts2 = Timestamp(dt) def time_replace_across_dst(self): self.ts2.replace(tzinfo=self.tzinfo)
def test_tz_convert_roundtrip(self, stamp, tz): ts = Timestamp(stamp, tz='UTC') converted = ts.tz_convert(tz) reset = converted.tz_convert(None) assert reset == Timestamp(stamp) assert reset.tzinfo is None assert reset == converted.tz_convert('UTC').tz_localize(None)
def test_astimezone(self, tzstr): # astimezone is an alias for tz_convert, so keep it with # the tz_convert tests utcdate = Timestamp('3/11/2012 22:00', tz='UTC') expected = utcdate.tz_convert(tzstr) result = utcdate.astimezone(tzstr) assert expected == result assert isinstance(result, Timestamp)
def test_timestamp_tz_localize_nonexistent_shift_invalid(self, offset, tz_type): # GH 8917, 24466 tz = tz_type + 'Europe/Warsaw' ts = Timestamp('2015-03-29 02:20:00') msg = "The provided timedelta will relocalize on a nonexistent time" with pytest.raises(ValueError, match=msg): ts.tz_localize(tz, nonexistent=timedelta(seconds=offset))
def test_replace_dst_fold(self, fold, tz): # GH 25017 d = datetime(2019, 10, 27, 2, 30) ts = Timestamp(d, tz=tz) result = ts.replace(hour=1, fold=fold) expected = Timestamp(datetime(2019, 10, 27, 1, 30)).tz_localize( tz, ambiguous=not fold ) assert result == expected
def test_replace_multiple(self, tz): # GH#14621, GH#7825 # replacing datetime components with and w/o presence of a timezone # test all ts = Timestamp('2016-01-01 09:00:00.000000123', tz=tz) result = ts.replace(year=2015, month=2, day=2, hour=0, minute=5, second=5, microsecond=5, nanosecond=5) expected = Timestamp('2015-02-02 00:05:05.000005005', tz=tz) assert result == expected
def test_tz_localize_errors_coerce(self): # GH 22644 # make sure errors='coerce' gets mapped correctly to nonexistent tz = 'Europe/Warsaw' ts = Timestamp('2015-03-29 02:00:00') with tm.assert_produces_warning(FutureWarning): result = ts.tz_localize(tz, errors='coerce') expected = ts.tz_localize(tz, nonexistent='NaT') assert result is expected
def test_round_tzaware(self): dt = Timestamp('20130101 09:10:11', tz='US/Eastern') result = dt.round('D') expected = Timestamp('20130101', tz='US/Eastern') assert result == expected dt = Timestamp('20130101 09:10:11', tz='US/Eastern') result = dt.round('s') assert result == dt
def test_to_pydatetime_nonzero_nano(self): ts = Timestamp('2011-01-01 9:00:00.123456789') # Warn the user of data loss (nanoseconds). with tm.assert_produces_warning(UserWarning, check_stacklevel=False): expected = datetime(2011, 1, 1, 9, 0, 0, 123456) result = ts.to_pydatetime() assert result == expected
def __init__(self, site, start, end, savepath='data'): self.site = site self.start = Timestamp(start) self.end = Timestamp(end) self.savepath = Path('.' or savepath) self._daily_json = None self._insta_json = None self._daily_data = None self._insta_data = None
def test_tz_localize_roundtrip(self, stamp, tz): ts = Timestamp(stamp) localized = ts.tz_localize(tz) assert localized == Timestamp(stamp, tz=tz) with pytest.raises(TypeError): localized.tz_localize(tz) reset = localized.tz_localize(None) assert reset == ts assert reset.tzinfo is None
def __init__( self, name, year=None, month=None, day=None, offset=None, observance=None, start_date=None, end_date=None, days_of_week=None, ): """ Parameters ---------- name : str Name of the holiday , defaults to class name offset : array of pandas.tseries.offsets or class from pandas.tseries.offsets computes offset from date observance: function computes when holiday is given a pandas Timestamp days_of_week: provide a tuple of days e.g (0,1,2,3,) for Monday Through Thursday Monday=0,..,Sunday=6 Examples -------- >>> from pandas.tseries.holiday import Holiday, nearest_workday >>> from pandas import DateOffset >>> from dateutil.relativedelta import MO >>> USMemorialDay = Holiday('MemorialDay', month=5, day=24, offset=DateOffset(weekday=MO(1))) >>> USLaborDay = Holiday('Labor Day', month=9, day=1, offset=DateOffset(weekday=MO(1))) >>> July3rd = Holiday('July 3rd', month=7, day=3,) >>> NewYears = Holiday('New Years Day', month=1, day=1, observance=nearest_workday), >>> July3rd = Holiday('July 3rd', month=7, day=3, days_of_week=(0, 1, 2, 3)) """ if offset is not None and observance is not None: raise NotImplementedError("Cannot use both offset and observance.") self.name = name self.year = year self.month = month self.day = day self.offset = offset self.start_date = Timestamp(start_date) if start_date is not None else start_date self.end_date = Timestamp(end_date) if end_date is not None else end_date self.observance = observance assert days_of_week is None or type(days_of_week) == tuple self.days_of_week = days_of_week
def test_timestamp_tz_localize_nonexistent_shift(self, start_ts, tz, end_ts, shift, tz_type): # GH 8917, 24466 tz = tz_type + tz if isinstance(shift, str): shift = 'shift_' + shift ts = Timestamp(start_ts) result = ts.tz_localize(tz, nonexistent=shift) expected = Timestamp(end_ts).tz_localize(tz) assert result == expected
def test_tz_convert_utc_with_system_utc(self): from pandas._libs.tslibs.timezones import maybe_get_tz # from system utc to real utc ts = Timestamp('2001-01-05 11:56', tz=maybe_get_tz('dateutil/UTC')) # check that the time hasn't changed. assert ts == ts.tz_convert(dateutil.tz.tzutc()) # from system utc to real utc ts = Timestamp('2001-01-05 11:56', tz=maybe_get_tz('dateutil/UTC')) # check that the time hasn't changed. assert ts == ts.tz_convert(dateutil.tz.tzutc())
class TestDatetimeIndex: @pytest.mark.parametrize("dt_cls", [DatetimeIndex, DatetimeArray._from_sequence]) def test_freq_validation_with_nat(self, dt_cls): # GH#11587 make sure we get a useful error message when generate_range # raises msg = ( "Inferred frequency None from passed values does not conform " "to passed frequency D" ) with pytest.raises(ValueError, match=msg): dt_cls([pd.NaT, pd.Timestamp("2011-01-01")], freq="D") with pytest.raises(ValueError, match=msg): dt_cls([pd.NaT, pd.Timestamp("2011-01-01").value], freq="D") def test_categorical_preserves_tz(self): # GH#18664 retain tz when going DTI-->Categorical-->DTI # TODO: parametrize over DatetimeIndex/DatetimeArray # once CategoricalIndex(DTA) works dti = pd.DatetimeIndex( [pd.NaT, "2015-01-01", "1999-04-06 15:14:13", "2015-01-01"], tz="US/Eastern" ) ci = pd.CategoricalIndex(dti) carr = pd.Categorical(dti) cser = pd.Series(ci) for obj in [ci, carr, cser]: result = pd.DatetimeIndex(obj) tm.assert_index_equal(result, dti) def test_dti_with_period_data_raises(self): # GH#23675 data = pd.PeriodIndex(["2016Q1", "2016Q2"], freq="Q") with pytest.raises(TypeError, match="PeriodDtype data is invalid"): DatetimeIndex(data) with pytest.raises(TypeError, match="PeriodDtype data is invalid"): to_datetime(data) with pytest.raises(TypeError, match="PeriodDtype data is invalid"): DatetimeIndex(period_array(data)) with pytest.raises(TypeError, match="PeriodDtype data is invalid"): to_datetime(period_array(data)) def test_dti_with_timedelta64_data_raises(self): # GH#23675 deprecated, enforrced in GH#29794 data = np.array([0], dtype="m8[ns]") msg = r"timedelta64\[ns\] cannot be converted to datetime64" with pytest.raises(TypeError, match=msg): DatetimeIndex(data) with pytest.raises(TypeError, match=msg): to_datetime(data) with pytest.raises(TypeError, match=msg): DatetimeIndex(pd.TimedeltaIndex(data)) with pytest.raises(TypeError, match=msg): to_datetime(pd.TimedeltaIndex(data)) def test_construction_caching(self): df = pd.DataFrame( { "dt": pd.date_range("20130101", periods=3), "dttz": pd.date_range("20130101", periods=3, tz="US/Eastern"), "dt_with_null": [ pd.Timestamp("20130101"), pd.NaT, pd.Timestamp("20130103"), ], "dtns": pd.date_range("20130101", periods=3, freq="ns"), } ) assert df.dttz.dtype.tz.zone == "US/Eastern" @pytest.mark.parametrize( "kwargs", [{"tz": "dtype.tz"}, {"dtype": "dtype"}, {"dtype": "dtype", "tz": "dtype.tz"}], ) def test_construction_with_alt(self, kwargs, tz_aware_fixture): tz = tz_aware_fixture i = pd.date_range("20130101", periods=5, freq="H", tz=tz) kwargs = {key: attrgetter(val)(i) for key, val in kwargs.items()} result = DatetimeIndex(i, **kwargs) tm.assert_index_equal(i, result) @pytest.mark.parametrize( "kwargs", [{"tz": "dtype.tz"}, {"dtype": "dtype"}, {"dtype": "dtype", "tz": "dtype.tz"}], ) def test_construction_with_alt_tz_localize(self, kwargs, tz_aware_fixture): tz = tz_aware_fixture i = pd.date_range("20130101", periods=5, freq="H", tz=tz) kwargs = {key: attrgetter(val)(i) for key, val in kwargs.items()} if "tz" in kwargs: result = DatetimeIndex(i.asi8, tz="UTC").tz_convert(kwargs["tz"]) expected = DatetimeIndex(i, **kwargs) tm.assert_index_equal(result, expected) # localize into the provided tz i2 = DatetimeIndex(i.tz_localize(None).asi8, tz="UTC") expected = i.tz_localize(None).tz_localize("UTC") tm.assert_index_equal(i2, expected) # incompat tz/dtype msg = "cannot supply both a tz and a dtype with a tz" with pytest.raises(ValueError, match=msg): DatetimeIndex(i.tz_localize(None).asi8, dtype=i.dtype, tz="US/Pacific") def test_construction_index_with_mixed_timezones(self): # gh-11488: no tz results in DatetimeIndex result = Index([Timestamp("2011-01-01"), Timestamp("2011-01-02")], name="idx") exp = DatetimeIndex( [Timestamp("2011-01-01"), Timestamp("2011-01-02")], name="idx" ) tm.assert_index_equal(result, exp, exact=True) assert isinstance(result, DatetimeIndex) assert result.tz is None # same tz results in DatetimeIndex result = Index( [ Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"), Timestamp("2011-01-02 10:00", tz="Asia/Tokyo"), ], name="idx", ) exp = DatetimeIndex( [Timestamp("2011-01-01 10:00"), Timestamp("2011-01-02 10:00")], tz="Asia/Tokyo", name="idx", ) tm.assert_index_equal(result, exp, exact=True) assert isinstance(result, DatetimeIndex) assert result.tz is not None assert result.tz == exp.tz # same tz results in DatetimeIndex (DST) result = Index( [ Timestamp("2011-01-01 10:00", tz="US/Eastern"), Timestamp("2011-08-01 10:00", tz="US/Eastern"), ], name="idx", ) exp = DatetimeIndex( [Timestamp("2011-01-01 10:00"), Timestamp("2011-08-01 10:00")], tz="US/Eastern", name="idx", ) tm.assert_index_equal(result, exp, exact=True) assert isinstance(result, DatetimeIndex) assert result.tz is not None assert result.tz == exp.tz # Different tz results in Index(dtype=object) result = Index( [ Timestamp("2011-01-01 10:00"), Timestamp("2011-01-02 10:00", tz="US/Eastern"), ], name="idx", ) exp = Index( [ Timestamp("2011-01-01 10:00"), Timestamp("2011-01-02 10:00", tz="US/Eastern"), ], dtype="object", name="idx", ) tm.assert_index_equal(result, exp, exact=True) assert not isinstance(result, DatetimeIndex) result = Index( [ Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"), Timestamp("2011-01-02 10:00", tz="US/Eastern"), ], name="idx", ) exp = Index( [ Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"), Timestamp("2011-01-02 10:00", tz="US/Eastern"), ], dtype="object", name="idx", ) tm.assert_index_equal(result, exp, exact=True) assert not isinstance(result, DatetimeIndex) # length = 1 result = Index([Timestamp("2011-01-01")], name="idx") exp = DatetimeIndex([Timestamp("2011-01-01")], name="idx") tm.assert_index_equal(result, exp, exact=True) assert isinstance(result, DatetimeIndex) assert result.tz is None # length = 1 with tz result = Index([Timestamp("2011-01-01 10:00", tz="Asia/Tokyo")], name="idx") exp = DatetimeIndex( [Timestamp("2011-01-01 10:00")], tz="Asia/Tokyo", name="idx" ) tm.assert_index_equal(result, exp, exact=True) assert isinstance(result, DatetimeIndex) assert result.tz is not None assert result.tz == exp.tz def test_construction_index_with_mixed_timezones_with_NaT(self): # see gh-11488 result = Index( [pd.NaT, Timestamp("2011-01-01"), pd.NaT, Timestamp("2011-01-02")], name="idx", ) exp = DatetimeIndex( [pd.NaT, Timestamp("2011-01-01"), pd.NaT, Timestamp("2011-01-02")], name="idx", ) tm.assert_index_equal(result, exp, exact=True) assert isinstance(result, DatetimeIndex) assert result.tz is None # Same tz results in DatetimeIndex result = Index( [ pd.NaT, Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"), pd.NaT, Timestamp("2011-01-02 10:00", tz="Asia/Tokyo"), ], name="idx", ) exp = DatetimeIndex( [ pd.NaT, Timestamp("2011-01-01 10:00"), pd.NaT, Timestamp("2011-01-02 10:00"), ], tz="Asia/Tokyo", name="idx", ) tm.assert_index_equal(result, exp, exact=True) assert isinstance(result, DatetimeIndex) assert result.tz is not None assert result.tz == exp.tz # same tz results in DatetimeIndex (DST) result = Index( [ Timestamp("2011-01-01 10:00", tz="US/Eastern"), pd.NaT, Timestamp("2011-08-01 10:00", tz="US/Eastern"), ], name="idx", ) exp = DatetimeIndex( [Timestamp("2011-01-01 10:00"), pd.NaT, Timestamp("2011-08-01 10:00")], tz="US/Eastern", name="idx", ) tm.assert_index_equal(result, exp, exact=True) assert isinstance(result, DatetimeIndex) assert result.tz is not None assert result.tz == exp.tz # different tz results in Index(dtype=object) result = Index( [ pd.NaT, Timestamp("2011-01-01 10:00"), pd.NaT, Timestamp("2011-01-02 10:00", tz="US/Eastern"), ], name="idx", ) exp = Index( [ pd.NaT, Timestamp("2011-01-01 10:00"), pd.NaT, Timestamp("2011-01-02 10:00", tz="US/Eastern"), ], dtype="object", name="idx", ) tm.assert_index_equal(result, exp, exact=True) assert not isinstance(result, DatetimeIndex) result = Index( [ pd.NaT, Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"), pd.NaT, Timestamp("2011-01-02 10:00", tz="US/Eastern"), ], name="idx", ) exp = Index( [ pd.NaT, Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"), pd.NaT, Timestamp("2011-01-02 10:00", tz="US/Eastern"), ], dtype="object", name="idx", ) tm.assert_index_equal(result, exp, exact=True) assert not isinstance(result, DatetimeIndex) # all NaT result = Index([pd.NaT, pd.NaT], name="idx") exp = DatetimeIndex([pd.NaT, pd.NaT], name="idx") tm.assert_index_equal(result, exp, exact=True) assert isinstance(result, DatetimeIndex) assert result.tz is None # all NaT with tz result = Index([pd.NaT, pd.NaT], tz="Asia/Tokyo", name="idx") exp = DatetimeIndex([pd.NaT, pd.NaT], tz="Asia/Tokyo", name="idx") tm.assert_index_equal(result, exp, exact=True) assert isinstance(result, DatetimeIndex) assert result.tz is not None assert result.tz == exp.tz def test_construction_dti_with_mixed_timezones(self): # GH 11488 (not changed, added explicit tests) # no tz results in DatetimeIndex result = DatetimeIndex( [Timestamp("2011-01-01"), Timestamp("2011-01-02")], name="idx" ) exp = DatetimeIndex( [Timestamp("2011-01-01"), Timestamp("2011-01-02")], name="idx" ) tm.assert_index_equal(result, exp, exact=True) assert isinstance(result, DatetimeIndex) # same tz results in DatetimeIndex result = DatetimeIndex( [ Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"), Timestamp("2011-01-02 10:00", tz="Asia/Tokyo"), ], name="idx", ) exp = DatetimeIndex( [Timestamp("2011-01-01 10:00"), Timestamp("2011-01-02 10:00")], tz="Asia/Tokyo", name="idx", ) tm.assert_index_equal(result, exp, exact=True) assert isinstance(result, DatetimeIndex) # same tz results in DatetimeIndex (DST) result = DatetimeIndex( [ Timestamp("2011-01-01 10:00", tz="US/Eastern"), Timestamp("2011-08-01 10:00", tz="US/Eastern"), ], name="idx", ) exp = DatetimeIndex( [Timestamp("2011-01-01 10:00"), Timestamp("2011-08-01 10:00")], tz="US/Eastern", name="idx", ) tm.assert_index_equal(result, exp, exact=True) assert isinstance(result, DatetimeIndex) # tz mismatch affecting to tz-aware raises TypeError/ValueError with pytest.raises(ValueError): DatetimeIndex( [ Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"), Timestamp("2011-01-02 10:00", tz="US/Eastern"), ], name="idx", ) msg = "cannot be converted to datetime64" with pytest.raises(ValueError, match=msg): DatetimeIndex( [ Timestamp("2011-01-01 10:00"), Timestamp("2011-01-02 10:00", tz="US/Eastern"), ], tz="Asia/Tokyo", name="idx", ) with pytest.raises(ValueError): DatetimeIndex( [ Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"), Timestamp("2011-01-02 10:00", tz="US/Eastern"), ], tz="US/Eastern", name="idx", ) with pytest.raises(ValueError, match=msg): # passing tz should results in DatetimeIndex, then mismatch raises # TypeError Index( [ pd.NaT, Timestamp("2011-01-01 10:00"), pd.NaT, Timestamp("2011-01-02 10:00", tz="US/Eastern"), ], tz="Asia/Tokyo", name="idx", ) def test_construction_base_constructor(self): arr = [pd.Timestamp("2011-01-01"), pd.NaT, pd.Timestamp("2011-01-03")] tm.assert_index_equal(pd.Index(arr), pd.DatetimeIndex(arr)) tm.assert_index_equal(pd.Index(np.array(arr)), pd.DatetimeIndex(np.array(arr))) arr = [np.nan, pd.NaT, pd.Timestamp("2011-01-03")] tm.assert_index_equal(pd.Index(arr), pd.DatetimeIndex(arr)) tm.assert_index_equal(pd.Index(np.array(arr)), pd.DatetimeIndex(np.array(arr))) def test_construction_outofbounds(self): # GH 13663 dates = [ datetime(3000, 1, 1), datetime(4000, 1, 1), datetime(5000, 1, 1), datetime(6000, 1, 1), ] exp = Index(dates, dtype=object) # coerces to object tm.assert_index_equal(Index(dates), exp) with pytest.raises(OutOfBoundsDatetime): # can't create DatetimeIndex DatetimeIndex(dates) def test_construction_with_ndarray(self): # GH 5152 dates = [datetime(2013, 10, 7), datetime(2013, 10, 8), datetime(2013, 10, 9)] data = DatetimeIndex(dates, freq=pd.offsets.BDay()).values result = DatetimeIndex(data, freq=pd.offsets.BDay()) expected = DatetimeIndex(["2013-10-07", "2013-10-08", "2013-10-09"], freq="B") tm.assert_index_equal(result, expected) def test_integer_values_and_tz_interpreted_as_utc(self): # GH-24559 val = np.datetime64("2000-01-01 00:00:00", "ns") values = np.array([val.view("i8")]) result = DatetimeIndex(values).tz_localize("US/Central") expected = pd.DatetimeIndex(["2000-01-01T00:00:00"], tz="US/Central") tm.assert_index_equal(result, expected) # but UTC is *not* deprecated. with tm.assert_produces_warning(None): result = DatetimeIndex(values, tz="UTC") expected = pd.DatetimeIndex(["2000-01-01T00:00:00"], tz="US/Central") def test_constructor_coverage(self): rng = date_range("1/1/2000", periods=10.5) exp = date_range("1/1/2000", periods=10) tm.assert_index_equal(rng, exp) msg = "periods must be a number, got foo" with pytest.raises(TypeError, match=msg): date_range(start="1/1/2000", periods="foo", freq="D") with pytest.raises(TypeError): DatetimeIndex("1/1/2000") # generator expression gen = (datetime(2000, 1, 1) + timedelta(i) for i in range(10)) result = DatetimeIndex(gen) expected = DatetimeIndex( [datetime(2000, 1, 1) + timedelta(i) for i in range(10)] ) tm.assert_index_equal(result, expected) # NumPy string array strings = np.array(["2000-01-01", "2000-01-02", "2000-01-03"]) result = DatetimeIndex(strings) expected = DatetimeIndex(strings.astype("O")) tm.assert_index_equal(result, expected) from_ints = DatetimeIndex(expected.asi8) tm.assert_index_equal(from_ints, expected) # string with NaT strings = np.array(["2000-01-01", "2000-01-02", "NaT"]) result = DatetimeIndex(strings) expected = DatetimeIndex(strings.astype("O")) tm.assert_index_equal(result, expected) from_ints = DatetimeIndex(expected.asi8) tm.assert_index_equal(from_ints, expected) # non-conforming msg = ( "Inferred frequency None from passed values does not conform" " to passed frequency D" ) with pytest.raises(ValueError, match=msg): DatetimeIndex(["2000-01-01", "2000-01-02", "2000-01-04"], freq="D") msg = ( "Of the four parameters: start, end, periods, and freq, exactly" " three must be specified" ) with pytest.raises(ValueError, match=msg): date_range(start="2011-01-01", freq="b") with pytest.raises(ValueError, match=msg): date_range(end="2011-01-01", freq="B") with pytest.raises(ValueError, match=msg): date_range(periods=10, freq="D") @pytest.mark.parametrize("freq", ["AS", "W-SUN"]) def test_constructor_datetime64_tzformat(self, freq): # see GH#6572: ISO 8601 format results in pytz.FixedOffset idx = date_range( "2013-01-01T00:00:00-05:00", "2016-01-01T23:59:59-05:00", freq=freq ) expected = date_range( "2013-01-01T00:00:00", "2016-01-01T23:59:59", freq=freq, tz=pytz.FixedOffset(-300), ) tm.assert_index_equal(idx, expected) # Unable to use `US/Eastern` because of DST expected_i8 = date_range( "2013-01-01T00:00:00", "2016-01-01T23:59:59", freq=freq, tz="America/Lima" ) tm.assert_numpy_array_equal(idx.asi8, expected_i8.asi8) idx = date_range( "2013-01-01T00:00:00+09:00", "2016-01-01T23:59:59+09:00", freq=freq ) expected = date_range( "2013-01-01T00:00:00", "2016-01-01T23:59:59", freq=freq, tz=pytz.FixedOffset(540), ) tm.assert_index_equal(idx, expected) expected_i8 = date_range( "2013-01-01T00:00:00", "2016-01-01T23:59:59", freq=freq, tz="Asia/Tokyo" ) tm.assert_numpy_array_equal(idx.asi8, expected_i8.asi8) # Non ISO 8601 format results in dateutil.tz.tzoffset idx = date_range("2013/1/1 0:00:00-5:00", "2016/1/1 23:59:59-5:00", freq=freq) expected = date_range( "2013-01-01T00:00:00", "2016-01-01T23:59:59", freq=freq, tz=pytz.FixedOffset(-300), ) tm.assert_index_equal(idx, expected) # Unable to use `US/Eastern` because of DST expected_i8 = date_range( "2013-01-01T00:00:00", "2016-01-01T23:59:59", freq=freq, tz="America/Lima" ) tm.assert_numpy_array_equal(idx.asi8, expected_i8.asi8) idx = date_range("2013/1/1 0:00:00+9:00", "2016/1/1 23:59:59+09:00", freq=freq) expected = date_range( "2013-01-01T00:00:00", "2016-01-01T23:59:59", freq=freq, tz=pytz.FixedOffset(540), ) tm.assert_index_equal(idx, expected) expected_i8 = date_range( "2013-01-01T00:00:00", "2016-01-01T23:59:59", freq=freq, tz="Asia/Tokyo" ) tm.assert_numpy_array_equal(idx.asi8, expected_i8.asi8) def test_constructor_dtype(self): # passing a dtype with a tz should localize idx = DatetimeIndex( ["2013-01-01", "2013-01-02"], dtype="datetime64[ns, US/Eastern]" ) expected = DatetimeIndex(["2013-01-01", "2013-01-02"]).tz_localize("US/Eastern") tm.assert_index_equal(idx, expected) idx = DatetimeIndex(["2013-01-01", "2013-01-02"], tz="US/Eastern") tm.assert_index_equal(idx, expected) # if we already have a tz and its not the same, then raise idx = DatetimeIndex( ["2013-01-01", "2013-01-02"], dtype="datetime64[ns, US/Eastern]" ) msg = ( "cannot supply both a tz and a timezone-naive dtype" r" \(i\.e\. datetime64\[ns\]\)" ) with pytest.raises(ValueError, match=msg): DatetimeIndex(idx, dtype="datetime64[ns]") # this is effectively trying to convert tz's msg = "data is already tz-aware US/Eastern, unable to set specified tz: CET" with pytest.raises(TypeError, match=msg): DatetimeIndex(idx, dtype="datetime64[ns, CET]") msg = "cannot supply both a tz and a dtype with a tz" with pytest.raises(ValueError, match=msg): DatetimeIndex(idx, tz="CET", dtype="datetime64[ns, US/Eastern]") result = DatetimeIndex(idx, dtype="datetime64[ns, US/Eastern]") tm.assert_index_equal(idx, result) @pytest.mark.parametrize("dtype", [object, np.int32, np.int64]) def test_constructor_invalid_dtype_raises(self, dtype): # GH 23986 with pytest.raises(ValueError): DatetimeIndex([1, 2], dtype=dtype) def test_constructor_name(self): idx = date_range(start="2000-01-01", periods=1, freq="A", name="TEST") assert idx.name == "TEST" def test_000constructor_resolution(self): # 2252 t1 = Timestamp((1352934390 * 1000000000) + 1000000 + 1000 + 1) idx = DatetimeIndex([t1]) assert idx.nanosecond[0] == t1.nanosecond def test_disallow_setting_tz(self): # GH 3746 dti = DatetimeIndex(["2010"], tz="UTC") with pytest.raises(AttributeError): dti.tz = pytz.timezone("US/Pacific") @pytest.mark.parametrize( "tz", [ None, "America/Los_Angeles", pytz.timezone("America/Los_Angeles"), Timestamp("2000", tz="America/Los_Angeles").tz, ], ) def test_constructor_start_end_with_tz(self, tz): # GH 18595 start = Timestamp("2013-01-01 06:00:00", tz="America/Los_Angeles") end = Timestamp("2013-01-02 06:00:00", tz="America/Los_Angeles") result = date_range(freq="D", start=start, end=end, tz=tz) expected = DatetimeIndex( ["2013-01-01 06:00:00", "2013-01-02 06:00:00"], tz="America/Los_Angeles" ) tm.assert_index_equal(result, expected) # Especially assert that the timezone is consistent for pytz assert pytz.timezone("America/Los_Angeles") is result.tz @pytest.mark.parametrize("tz", ["US/Pacific", "US/Eastern", "Asia/Tokyo"]) def test_constructor_with_non_normalized_pytz(self, tz): # GH 18595 non_norm_tz = Timestamp("2010", tz=tz).tz result = DatetimeIndex(["2010"], tz=non_norm_tz) assert pytz.timezone(tz) is result.tz def test_constructor_timestamp_near_dst(self): # GH 20854 ts = [ Timestamp("2016-10-30 03:00:00+0300", tz="Europe/Helsinki"), Timestamp("2016-10-30 03:00:00+0200", tz="Europe/Helsinki"), ] result = DatetimeIndex(ts) expected = DatetimeIndex([ts[0].to_pydatetime(), ts[1].to_pydatetime()]) tm.assert_index_equal(result, expected) # TODO(GH-24559): Remove the xfail for the tz-aware case. @pytest.mark.parametrize("klass", [Index, DatetimeIndex]) @pytest.mark.parametrize("box", [np.array, partial(np.array, dtype=object), list]) @pytest.mark.parametrize( "tz, dtype", [("US/Pacific", "datetime64[ns, US/Pacific]"), (None, "datetime64[ns]")], ) def test_constructor_with_int_tz(self, klass, box, tz, dtype): # GH 20997, 20964 ts = Timestamp("2018-01-01", tz=tz) result = klass(box([ts.value]), dtype=dtype) expected = klass([ts]) assert result == expected # This is the desired future behavior # Note: this xfail is not strict because the test passes with # None or any of the UTC variants for tz_naive_fixture @pytest.mark.xfail(reason="Future behavior", strict=False) @pytest.mark.filterwarnings("ignore:\\n Passing:FutureWarning") def test_construction_int_rountrip(self, tz_naive_fixture): # GH 12619 # TODO(GH-24559): Remove xfail tz = tz_naive_fixture result = 1293858000000000000 expected = DatetimeIndex([result], tz=tz).asi8[0] assert result == expected def test_construction_from_replaced_timestamps_with_dst(self): # GH 18785 index = pd.date_range( pd.Timestamp(2000, 1, 1), pd.Timestamp(2005, 1, 1), freq="MS", tz="Australia/Melbourne", ) test = pd.DataFrame({"data": range(len(index))}, index=index) test = test.resample("Y").mean() result = pd.DatetimeIndex([x.replace(month=6, day=1) for x in test.index]) expected = pd.DatetimeIndex( [ "2000-06-01 00:00:00", "2001-06-01 00:00:00", "2002-06-01 00:00:00", "2003-06-01 00:00:00", "2004-06-01 00:00:00", "2005-06-01 00:00:00", ], tz="Australia/Melbourne", ) tm.assert_index_equal(result, expected) def test_construction_with_tz_and_tz_aware_dti(self): # GH 23579 dti = date_range("2016-01-01", periods=3, tz="US/Central") with pytest.raises(TypeError): DatetimeIndex(dti, tz="Asia/Tokyo") def test_construction_with_nat_and_tzlocal(self): tz = dateutil.tz.tzlocal() result = DatetimeIndex(["2018", "NaT"], tz=tz) expected = DatetimeIndex([Timestamp("2018", tz=tz), pd.NaT]) tm.assert_index_equal(result, expected) def test_constructor_no_precision_raises(self): # GH-24753, GH-24739 msg = "with no precision is not allowed" with pytest.raises(ValueError, match=msg): pd.DatetimeIndex(["2000"], dtype="datetime64") with pytest.raises(ValueError, match=msg): pd.Index(["2000"], dtype="datetime64") def test_constructor_wrong_precision_raises(self): with pytest.raises(ValueError): pd.DatetimeIndex(["2000"], dtype="datetime64[us]") def test_index_constructor_with_numpy_object_array_and_timestamp_tz_with_nan(self): # GH 27011 result = Index(np.array([Timestamp("2019", tz="UTC"), np.nan], dtype=object)) expected = DatetimeIndex([Timestamp("2019", tz="UTC"), pd.NaT]) tm.assert_index_equal(result, expected)
def test_construction_dti_with_mixed_timezones(self): # GH 11488 (not changed, added explicit tests) # no tz results in DatetimeIndex result = DatetimeIndex( [Timestamp("2011-01-01"), Timestamp("2011-01-02")], name="idx" ) exp = DatetimeIndex( [Timestamp("2011-01-01"), Timestamp("2011-01-02")], name="idx" ) tm.assert_index_equal(result, exp, exact=True) assert isinstance(result, DatetimeIndex) # same tz results in DatetimeIndex result = DatetimeIndex( [ Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"), Timestamp("2011-01-02 10:00", tz="Asia/Tokyo"), ], name="idx", ) exp = DatetimeIndex( [Timestamp("2011-01-01 10:00"), Timestamp("2011-01-02 10:00")], tz="Asia/Tokyo", name="idx", ) tm.assert_index_equal(result, exp, exact=True) assert isinstance(result, DatetimeIndex) # same tz results in DatetimeIndex (DST) result = DatetimeIndex( [ Timestamp("2011-01-01 10:00", tz="US/Eastern"), Timestamp("2011-08-01 10:00", tz="US/Eastern"), ], name="idx", ) exp = DatetimeIndex( [Timestamp("2011-01-01 10:00"), Timestamp("2011-08-01 10:00")], tz="US/Eastern", name="idx", ) tm.assert_index_equal(result, exp, exact=True) assert isinstance(result, DatetimeIndex) # tz mismatch affecting to tz-aware raises TypeError/ValueError with pytest.raises(ValueError): DatetimeIndex( [ Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"), Timestamp("2011-01-02 10:00", tz="US/Eastern"), ], name="idx", ) msg = "cannot be converted to datetime64" with pytest.raises(ValueError, match=msg): DatetimeIndex( [ Timestamp("2011-01-01 10:00"), Timestamp("2011-01-02 10:00", tz="US/Eastern"), ], tz="Asia/Tokyo", name="idx", ) with pytest.raises(ValueError): DatetimeIndex( [ Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"), Timestamp("2011-01-02 10:00", tz="US/Eastern"), ], tz="US/Eastern", name="idx", ) with pytest.raises(ValueError, match=msg): # passing tz should results in DatetimeIndex, then mismatch raises # TypeError Index( [ pd.NaT, Timestamp("2011-01-01 10:00"), pd.NaT, Timestamp("2011-01-02 10:00", tz="US/Eastern"), ], tz="Asia/Tokyo", name="idx", )
def test_td_sub_timedeltalike_object_dtype_array(self): # GH#21980 arr = np.array([Timestamp("20130101 9:01"), Timestamp("20121230 9:02")]) exp = np.array([Timestamp("20121231 9:01"), Timestamp("20121229 9:02")]) res = arr - Timedelta("1D") tm.assert_numpy_array_equal(res, exp)
class TestDataFrameSetitemCopyViewSemantics: def test_setitem_always_copy(self, float_frame): assert "E" not in float_frame.columns s = float_frame["A"].copy() float_frame["E"] = s float_frame["E"][5:10] = np.nan assert notna(s[5:10]).all() @pytest.mark.parametrize("consolidate", [True, False]) def test_setitem_partial_column_inplace(self, consolidate, using_array_manager): # This setting should be in-place, regardless of whether frame is # single-block or multi-block # GH#304 this used to be incorrectly not-inplace, in which case # we needed to ensure _item_cache was cleared. df = DataFrame({ "x": [1.1, 2.1, 3.1, 4.1], "y": [5.1, 6.1, 7.1, 8.1] }, index=[0, 1, 2, 3]) df.insert(2, "z", np.nan) if not using_array_manager: if consolidate: df._consolidate_inplace() assert len(df._mgr.blocks) == 1 else: assert len(df._mgr.blocks) == 2 zvals = df["z"]._values df.loc[2:, "z"] = 42 expected = Series([np.nan, np.nan, 42, 42], index=df.index, name="z") tm.assert_series_equal(df["z"], expected) # check setting occurred in-place tm.assert_numpy_array_equal(zvals, expected.values) assert np.shares_memory(zvals, df["z"]._values) def test_setitem_duplicate_columns_not_inplace(self): # GH#39510 cols = ["A", "B"] * 2 df = DataFrame(0.0, index=[0], columns=cols) df_copy = df.copy() df_view = df[:] df["B"] = (2, 5) expected = DataFrame([[0.0, 2, 0.0, 5]], columns=cols) tm.assert_frame_equal(df_view, df_copy) tm.assert_frame_equal(df, expected) @pytest.mark.parametrize( "value", [1, np.array([[1], [1]], dtype="int64"), [[1], [1]]]) def test_setitem_same_dtype_not_inplace(self, value, using_array_manager): # GH#39510 cols = ["A", "B"] df = DataFrame(0, index=[0, 1], columns=cols) df_copy = df.copy() df_view = df[:] df[["B"]] = value expected = DataFrame([[0, 1], [0, 1]], columns=cols) tm.assert_frame_equal(df, expected) tm.assert_frame_equal(df_view, df_copy) @pytest.mark.parametrize( "value", [1.0, np.array([[1.0], [1.0]]), [[1.0], [1.0]]]) def test_setitem_listlike_key_scalar_value_not_inplace(self, value): # GH#39510 cols = ["A", "B"] df = DataFrame(0, index=[0, 1], columns=cols) df_copy = df.copy() df_view = df[:] df[["B"]] = value expected = DataFrame([[0, 1.0], [0, 1.0]], columns=cols) tm.assert_frame_equal(df_view, df_copy) tm.assert_frame_equal(df, expected) @pytest.mark.parametrize( "indexer", [ "a", ["a"], pytest.param( [True, False], marks=pytest.mark.xfail( reason="Boolean indexer incorrectly setting inplace", strict=False, # passing on some builds, no obvious pattern ), ), ], ) @pytest.mark.parametrize( "value, set_value", [ (1, 5), (1.0, 5.0), (Timestamp("2020-12-31"), Timestamp("2021-12-31")), ("a", "b"), ], ) def test_setitem_not_operating_inplace(self, value, set_value, indexer): # GH#43406 df = DataFrame({"a": value}, index=[0, 1]) expected = df.copy() view = df[:] df[indexer] = set_value tm.assert_frame_equal(view, expected)
def time_f(): return lambda x: Timestamp(x) def fxcm_timestamp_fn(df):
def test_td_add_timedeltalike_object_dtype_array(self, op): # GH#21980 arr = np.array([Timestamp("20130101 9:01"), Timestamp("20121230 9:02")]) exp = np.array([Timestamp("20130102 9:01"), Timestamp("20121231 9:02")]) res = op(arr, Timedelta("1D")) tm.assert_numpy_array_equal(res, exp)
def test_index_constructor_with_numpy_object_array_and_timestamp_tz_with_nan(self): # GH 27011 result = Index(np.array([Timestamp("2019", tz="UTC"), np.nan], dtype=object)) expected = DatetimeIndex([Timestamp("2019", tz="UTC"), pd.NaT]) tm.assert_index_equal(result, expected)
def test_bins_not_monotonic(): msg = "bins must increase monotonically" data = [0.2, 1.4, 2.5, 6.2, 9.7, 2.1] with pytest.raises(ValueError, match=msg): cut(data, [0.1, 1.5, 1, 10]) @pytest.mark.parametrize( "x, bins, expected", [ ( date_range("2017-12-31", periods=3), [Timestamp.min, Timestamp("2018-01-01"), Timestamp.max], IntervalIndex.from_tuples( [ (Timestamp.min, Timestamp("2018-01-01")), (Timestamp("2018-01-01"), Timestamp.max), ] ), ), ( [-1, 0, 1], np.array( [np.iinfo(np.int64).min, 0, np.iinfo(np.int64).max], dtype="int64" ), IntervalIndex.from_tuples( [(np.iinfo(np.int64).min, 0), (0, np.iinfo(np.int64).max)] ),
def test_datetimeindex_constructor_misc(self): arr = ["1/1/2005", "1/2/2005", "Jn 3, 2005", "2005-01-04"] msg = r"(\(')?Unknown string format(:', 'Jn 3, 2005'\))?" with pytest.raises(ValueError, match=msg): DatetimeIndex(arr) arr = ["1/1/2005", "1/2/2005", "1/3/2005", "2005-01-04"] idx1 = DatetimeIndex(arr) arr = [datetime(2005, 1, 1), "1/2/2005", "1/3/2005", "2005-01-04"] idx2 = DatetimeIndex(arr) arr = [Timestamp(datetime(2005, 1, 1)), "1/2/2005", "1/3/2005", "2005-01-04"] idx3 = DatetimeIndex(arr) arr = np.array(["1/1/2005", "1/2/2005", "1/3/2005", "2005-01-04"], dtype="O") idx4 = DatetimeIndex(arr) arr = to_datetime(["1/1/2005", "1/2/2005", "1/3/2005", "2005-01-04"]) idx5 = DatetimeIndex(arr) arr = to_datetime(["1/1/2005", "1/2/2005", "Jan 3, 2005", "2005-01-04"]) idx6 = DatetimeIndex(arr) idx7 = DatetimeIndex(["12/05/2007", "25/01/2008"], dayfirst=True) idx8 = DatetimeIndex( ["2007/05/12", "2008/01/25"], dayfirst=False, yearfirst=True ) tm.assert_index_equal(idx7, idx8) for other in [idx2, idx3, idx4, idx5, idx6]: assert (idx1.values == other.values).all() sdate = datetime(1999, 12, 25) edate = datetime(2000, 1, 1) idx = date_range(start=sdate, freq="1B", periods=20) assert len(idx) == 20 assert idx[0] == sdate + 0 * offsets.BDay() assert idx.freq == "B" idx = date_range(end=edate, freq=("D", 5), periods=20) assert len(idx) == 20 assert idx[-1] == edate assert idx.freq == "5D" idx1 = date_range(start=sdate, end=edate, freq="W-SUN") idx2 = date_range(start=sdate, end=edate, freq=offsets.Week(weekday=6)) assert len(idx1) == len(idx2) assert idx1.freq == idx2.freq idx1 = date_range(start=sdate, end=edate, freq="QS") idx2 = date_range( start=sdate, end=edate, freq=offsets.QuarterBegin(startingMonth=1) ) assert len(idx1) == len(idx2) assert idx1.freq == idx2.freq idx1 = date_range(start=sdate, end=edate, freq="BQ") idx2 = date_range( start=sdate, end=edate, freq=offsets.BQuarterEnd(startingMonth=12) ) assert len(idx1) == len(idx2) assert idx1.freq == idx2.freq
class TestCounting(object): def test_cumcount(self): df = DataFrame([['a'], ['a'], ['a'], ['b'], ['a']], columns=['A']) g = df.groupby('A') sg = g.A expected = Series([0, 1, 2, 0, 3]) assert_series_equal(expected, g.cumcount()) assert_series_equal(expected, sg.cumcount()) def test_cumcount_empty(self): ge = DataFrame().groupby(level=0) se = Series().groupby(level=0) # edge case, as this is usually considered float e = Series(dtype='int64') assert_series_equal(e, ge.cumcount()) assert_series_equal(e, se.cumcount()) def test_cumcount_dupe_index(self): df = DataFrame([['a'], ['a'], ['a'], ['b'], ['a']], columns=['A'], index=[0] * 5) g = df.groupby('A') sg = g.A expected = Series([0, 1, 2, 0, 3], index=[0] * 5) assert_series_equal(expected, g.cumcount()) assert_series_equal(expected, sg.cumcount()) def test_cumcount_mi(self): mi = MultiIndex.from_tuples([[0, 1], [1, 2], [2, 2], [2, 2], [1, 0]]) df = DataFrame([['a'], ['a'], ['a'], ['b'], ['a']], columns=['A'], index=mi) g = df.groupby('A') sg = g.A expected = Series([0, 1, 2, 0, 3], index=mi) assert_series_equal(expected, g.cumcount()) assert_series_equal(expected, sg.cumcount()) def test_cumcount_groupby_not_col(self): df = DataFrame([['a'], ['a'], ['a'], ['b'], ['a']], columns=['A'], index=[0] * 5) g = df.groupby([0, 0, 0, 1, 0]) sg = g.A expected = Series([0, 1, 2, 0, 3], index=[0] * 5) assert_series_equal(expected, g.cumcount()) assert_series_equal(expected, sg.cumcount()) def test_ngroup(self): df = DataFrame({'A': list('aaaba')}) g = df.groupby('A') sg = g.A expected = Series([0, 0, 0, 1, 0]) assert_series_equal(expected, g.ngroup()) assert_series_equal(expected, sg.ngroup()) def test_ngroup_distinct(self): df = DataFrame({'A': list('abcde')}) g = df.groupby('A') sg = g.A expected = Series(range(5), dtype='int64') assert_series_equal(expected, g.ngroup()) assert_series_equal(expected, sg.ngroup()) def test_ngroup_one_group(self): df = DataFrame({'A': [0] * 5}) g = df.groupby('A') sg = g.A expected = Series([0] * 5) assert_series_equal(expected, g.ngroup()) assert_series_equal(expected, sg.ngroup()) def test_ngroup_empty(self): ge = DataFrame().groupby(level=0) se = Series().groupby(level=0) # edge case, as this is usually considered float e = Series(dtype='int64') assert_series_equal(e, ge.ngroup()) assert_series_equal(e, se.ngroup()) def test_ngroup_series_matches_frame(self): df = DataFrame({'A': list('aaaba')}) s = Series(list('aaaba')) assert_series_equal(df.groupby(s).ngroup(), s.groupby(s).ngroup()) def test_ngroup_dupe_index(self): df = DataFrame({'A': list('aaaba')}, index=[0] * 5) g = df.groupby('A') sg = g.A expected = Series([0, 0, 0, 1, 0], index=[0] * 5) assert_series_equal(expected, g.ngroup()) assert_series_equal(expected, sg.ngroup()) def test_ngroup_mi(self): mi = MultiIndex.from_tuples([[0, 1], [1, 2], [2, 2], [2, 2], [1, 0]]) df = DataFrame({'A': list('aaaba')}, index=mi) g = df.groupby('A') sg = g.A expected = Series([0, 0, 0, 1, 0], index=mi) assert_series_equal(expected, g.ngroup()) assert_series_equal(expected, sg.ngroup()) def test_ngroup_groupby_not_col(self): df = DataFrame({'A': list('aaaba')}, index=[0] * 5) g = df.groupby([0, 0, 0, 1, 0]) sg = g.A expected = Series([0, 0, 0, 1, 0], index=[0] * 5) assert_series_equal(expected, g.ngroup()) assert_series_equal(expected, sg.ngroup()) def test_ngroup_descending(self): df = DataFrame(['a', 'a', 'b', 'a', 'b'], columns=['A']) g = df.groupby(['A']) ascending = Series([0, 0, 1, 0, 1]) descending = Series([1, 1, 0, 1, 0]) assert_series_equal(descending, (g.ngroups - 1) - ascending) assert_series_equal(ascending, g.ngroup(ascending=True)) assert_series_equal(descending, g.ngroup(ascending=False)) def test_ngroup_matches_cumcount(self): # verify one manually-worked out case works df = DataFrame( [['a', 'x'], ['a', 'y'], ['b', 'x'], ['a', 'x'], ['b', 'y']], columns=['A', 'X']) g = df.groupby(['A', 'X']) g_ngroup = g.ngroup() g_cumcount = g.cumcount() expected_ngroup = Series([0, 1, 2, 0, 3]) expected_cumcount = Series([0, 0, 0, 1, 0]) assert_series_equal(g_ngroup, expected_ngroup) assert_series_equal(g_cumcount, expected_cumcount) def test_ngroup_cumcount_pair(self): # brute force comparison for all small series for p in cart_product(range(3), repeat=4): df = DataFrame({'a': p}) g = df.groupby(['a']) order = sorted(set(p)) ngroupd = [order.index(val) for val in p] cumcounted = [p[:i].count(val) for i, val in enumerate(p)] assert_series_equal(g.ngroup(), Series(ngroupd)) assert_series_equal(g.cumcount(), Series(cumcounted)) def test_ngroup_respects_groupby_order(self): np.random.seed(0) df = DataFrame({'a': np.random.choice(list('abcdef'), 100)}) for sort_flag in (False, True): g = df.groupby(['a'], sort=sort_flag) df['group_id'] = -1 df['group_index'] = -1 for i, (_, group) in enumerate(g): df.loc[group.index, 'group_id'] = i for j, ind in enumerate(group.index): df.loc[ind, 'group_index'] = j assert_series_equal(Series(df['group_id'].values), g.ngroup()) assert_series_equal(Series(df['group_index'].values), g.cumcount()) @pytest.mark.parametrize( 'datetimelike', [[Timestamp('2016-05-%02d 20:09:25+00:00' % i) for i in range(1, 4)], [Timestamp('2016-05-%02d 20:09:25' % i) for i in range(1, 4)], [Timedelta(x, unit="h") for x in range(1, 4)], [Period(freq="2W", year=2017, month=x) for x in range(1, 4)]]) def test_count_with_datetimelike(self, datetimelike): # test for #13393, where DataframeGroupBy.count() fails # when counting a datetimelike column. df = DataFrame({'x': ['a', 'a', 'b'], 'y': datetimelike}) res = df.groupby('x').count() expected = DataFrame({'y': [2, 1]}, index=['a', 'b']) expected.index.name = "x" assert_frame_equal(expected, res) def test_count_with_only_nans_in_first_group(self): # GH21956 df = DataFrame({'A': [np.nan, np.nan], 'B': ['a', 'b'], 'C': [1, 2]}) result = df.groupby(['A', 'B']).C.count() mi = MultiIndex(levels=[[], ['a', 'b']], codes=[[], []], names=['A', 'B']) expected = Series([], index=mi, dtype=np.int64, name='C') assert_series_equal(result, expected, check_index_type=False)
def create_data(): """ create the pickle/msgpack data """ data = { u'A': [0., 1., 2., 3., np.nan], u'B': [0, 1, 0, 1, 0], u'C': [u'foo1', u'foo2', u'foo3', u'foo4', u'foo5'], u'D': date_range('1/1/2009', periods=5), u'E': [0., 1, Timestamp('20100101'), u'foo', 2.] } scalars = dict(timestamp=Timestamp('20130101'), period=Period('2012', 'M')) index = dict(int=Index(np.arange(10)), date=date_range('20130101', periods=10), period=period_range('2013-01-01', freq='M', periods=10), float=Index(np.arange(10, dtype=np.float64)), uint=Index(np.arange(10, dtype=np.uint64)), timedelta=timedelta_range('00:00:00', freq='30T', periods=10)) if _loose_version >= LooseVersion('0.18'): from pandas import RangeIndex index['range'] = RangeIndex(10) if _loose_version >= LooseVersion('0.21'): from pandas import interval_range index['interval'] = interval_range(0, periods=10) mi = dict(reg2=MultiIndex.from_tuples(tuple( zip(*[[u'bar', u'bar', u'baz', u'baz', u'foo', u'foo', u'qux', u'qux'], [u'one', u'two', u'one', u'two', u'one', u'two', u'one', u'two'] ])), names=[u'first', u'second'])) series = dict( float=Series(data[u'A']), int=Series(data[u'B']), mixed=Series(data[u'E']), ts=Series(np.arange(10).astype(np.int64), index=date_range('20130101', periods=10)), mi=Series(np.arange(5).astype(np.float64), index=MultiIndex.from_tuples(tuple( zip(*[[1, 1, 2, 2, 2], [3, 4, 3, 4, 5]])), names=[u'one', u'two'])), dup=Series(np.arange(5).astype(np.float64), index=[u'A', u'B', u'C', u'D', u'A']), cat=Series(Categorical([u'foo', u'bar', u'baz'])), dt=Series(date_range('20130101', periods=5)), dt_tz=Series(date_range('20130101', periods=5, tz='US/Eastern')), period=Series([Period('2000Q1')] * 5)) mixed_dup_df = DataFrame(data) mixed_dup_df.columns = list(u"ABCDA") frame = dict( float=DataFrame({ u'A': series[u'float'], u'B': series[u'float'] + 1 }), int=DataFrame({ u'A': series[u'int'], u'B': series[u'int'] + 1 }), mixed=DataFrame({k: data[k] for k in [u'A', u'B', u'C', u'D']}), mi=DataFrame( { u'A': np.arange(5).astype(np.float64), u'B': np.arange(5).astype(np.int64) }, index=MultiIndex.from_tuples(tuple( zip(*[[u'bar', u'bar', u'baz', u'baz', u'baz'], [u'one', u'two', u'one', u'two', u'three']])), names=[u'first', u'second'])), dup=DataFrame(np.arange(15).reshape(5, 3).astype(np.float64), columns=[u'A', u'B', u'A']), cat_onecol=DataFrame({u'A': Categorical([u'foo', u'bar'])}), cat_and_float=DataFrame({ u'A': Categorical([u'foo', u'bar', u'baz']), u'B': np.arange(3).astype(np.int64) }), mixed_dup=mixed_dup_df, dt_mixed_tzs=DataFrame( { u'A': Timestamp('20130102', tz='US/Eastern'), u'B': Timestamp('20130603', tz='CET') }, index=range(5)), dt_mixed2_tzs=DataFrame( { u'A': Timestamp('20130102', tz='US/Eastern'), u'B': Timestamp('20130603', tz='CET'), u'C': Timestamp('20130603', tz='UTC') }, index=range(5))) with catch_warnings(record=True): mixed_dup_panel = Panel({ u'ItemA': frame[u'float'], u'ItemB': frame[u'int'] }) mixed_dup_panel.items = [u'ItemA', u'ItemA'] panel = dict(float=Panel({ u'ItemA': frame[u'float'], u'ItemB': frame[u'float'] + 1 }), dup=Panel(np.arange(30).reshape(3, 5, 2).astype(np.float64), items=[u'A', u'B', u'A']), mixed_dup=mixed_dup_panel) cat = dict(int8=Categorical(list('abcdefg')), int16=Categorical(np.arange(1000)), int32=Categorical(np.arange(10000))) timestamp = dict(normal=Timestamp('2011-01-01'), nat=NaT, tz=Timestamp('2011-01-01', tz='US/Eastern')) if _loose_version < LooseVersion('0.19.2'): timestamp['freq'] = Timestamp('2011-01-01', offset='D') timestamp['both'] = Timestamp('2011-01-01', tz='Asia/Tokyo', offset='M') else: timestamp['freq'] = Timestamp('2011-01-01', freq='D') timestamp['both'] = Timestamp('2011-01-01', tz='Asia/Tokyo', freq='M') off = { 'DateOffset': DateOffset(years=1), 'DateOffset_h_ns': DateOffset(hour=6, nanoseconds=5824), 'BusinessDay': BusinessDay(offset=timedelta(seconds=9)), 'BusinessHour': BusinessHour(normalize=True, n=6, end='15:14'), 'CustomBusinessDay': CustomBusinessDay(weekmask='Mon Fri'), 'SemiMonthBegin': SemiMonthBegin(day_of_month=9), 'SemiMonthEnd': SemiMonthEnd(day_of_month=24), 'MonthBegin': MonthBegin(1), 'MonthEnd': MonthEnd(1), 'QuarterBegin': QuarterBegin(1), 'QuarterEnd': QuarterEnd(1), 'Day': Day(1), 'YearBegin': YearBegin(1), 'YearEnd': YearEnd(1), 'Week': Week(1), 'Week_Tues': Week(2, normalize=False, weekday=1), 'WeekOfMonth': WeekOfMonth(week=3, weekday=4), 'LastWeekOfMonth': LastWeekOfMonth(n=1, weekday=3), 'FY5253': FY5253(n=2, weekday=6, startingMonth=7, variation="last"), 'Easter': Easter(), 'Hour': Hour(1), 'Minute': Minute(1) } return dict(series=series, frame=frame, panel=panel, index=index, scalars=scalars, mi=mi, sp_series=dict(float=_create_sp_series(), ts=_create_sp_tsseries()), sp_frame=dict(float=_create_sp_frame()), cat=cat, timestamp=timestamp, offsets=off)
def test_divmod_invalid(self): # GH#19365 td = Timedelta(days=2, hours=6) with pytest.raises(TypeError): divmod(td, Timestamp("2018-01-22"))
def test_construction_index_with_mixed_timezones_with_NaT(self): # see gh-11488 result = Index( [pd.NaT, Timestamp("2011-01-01"), pd.NaT, Timestamp("2011-01-02")], name="idx", ) exp = DatetimeIndex( [pd.NaT, Timestamp("2011-01-01"), pd.NaT, Timestamp("2011-01-02")], name="idx", ) tm.assert_index_equal(result, exp, exact=True) assert isinstance(result, DatetimeIndex) assert result.tz is None # Same tz results in DatetimeIndex result = Index( [ pd.NaT, Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"), pd.NaT, Timestamp("2011-01-02 10:00", tz="Asia/Tokyo"), ], name="idx", ) exp = DatetimeIndex( [ pd.NaT, Timestamp("2011-01-01 10:00"), pd.NaT, Timestamp("2011-01-02 10:00"), ], tz="Asia/Tokyo", name="idx", ) tm.assert_index_equal(result, exp, exact=True) assert isinstance(result, DatetimeIndex) assert result.tz is not None assert result.tz == exp.tz # same tz results in DatetimeIndex (DST) result = Index( [ Timestamp("2011-01-01 10:00", tz="US/Eastern"), pd.NaT, Timestamp("2011-08-01 10:00", tz="US/Eastern"), ], name="idx", ) exp = DatetimeIndex( [Timestamp("2011-01-01 10:00"), pd.NaT, Timestamp("2011-08-01 10:00")], tz="US/Eastern", name="idx", ) tm.assert_index_equal(result, exp, exact=True) assert isinstance(result, DatetimeIndex) assert result.tz is not None assert result.tz == exp.tz # different tz results in Index(dtype=object) result = Index( [ pd.NaT, Timestamp("2011-01-01 10:00"), pd.NaT, Timestamp("2011-01-02 10:00", tz="US/Eastern"), ], name="idx", ) exp = Index( [ pd.NaT, Timestamp("2011-01-01 10:00"), pd.NaT, Timestamp("2011-01-02 10:00", tz="US/Eastern"), ], dtype="object", name="idx", ) tm.assert_index_equal(result, exp, exact=True) assert not isinstance(result, DatetimeIndex) result = Index( [ pd.NaT, Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"), pd.NaT, Timestamp("2011-01-02 10:00", tz="US/Eastern"), ], name="idx", ) exp = Index( [ pd.NaT, Timestamp("2011-01-01 10:00", tz="Asia/Tokyo"), pd.NaT, Timestamp("2011-01-02 10:00", tz="US/Eastern"), ], dtype="object", name="idx", ) tm.assert_index_equal(result, exp, exact=True) assert not isinstance(result, DatetimeIndex) # all NaT result = Index([pd.NaT, pd.NaT], name="idx") exp = DatetimeIndex([pd.NaT, pd.NaT], name="idx") tm.assert_index_equal(result, exp, exact=True) assert isinstance(result, DatetimeIndex) assert result.tz is None # all NaT with tz result = Index([pd.NaT, pd.NaT], tz="Asia/Tokyo", name="idx") exp = DatetimeIndex([pd.NaT, pd.NaT], tz="Asia/Tokyo", name="idx") tm.assert_index_equal(result, exp, exact=True) assert isinstance(result, DatetimeIndex) assert result.tz is not None assert result.tz == exp.tz
def test_convert(self): # GH#10265 dt = datetime(2001, 1, 1, 0, 0) td = dt - datetime(2000, 1, 1, 0, 0) # Test coercion with mixed types ser = Series(["a", "3.1415", dt, td]) results = ser._convert(numeric=True) expected = Series([np.nan, 3.1415, np.nan, np.nan]) tm.assert_series_equal(results, expected) # Test standard conversion returns original results = ser._convert(datetime=True) tm.assert_series_equal(results, ser) results = ser._convert(numeric=True) expected = Series([np.nan, 3.1415, np.nan, np.nan]) tm.assert_series_equal(results, expected) results = ser._convert(timedelta=True) tm.assert_series_equal(results, ser) # test pass-through and non-conversion when other types selected ser = Series(["1.0", "2.0", "3.0"]) results = ser._convert(datetime=True, numeric=True, timedelta=True) expected = Series([1.0, 2.0, 3.0]) tm.assert_series_equal(results, expected) results = ser._convert(True, False, True) tm.assert_series_equal(results, ser) ser = Series( [datetime(2001, 1, 1, 0, 0), datetime(2001, 1, 1, 0, 0)], dtype="O" ) results = ser._convert(datetime=True, numeric=True, timedelta=True) expected = Series([datetime(2001, 1, 1, 0, 0), datetime(2001, 1, 1, 0, 0)]) tm.assert_series_equal(results, expected) results = ser._convert(datetime=False, numeric=True, timedelta=True) tm.assert_series_equal(results, ser) td = datetime(2001, 1, 1, 0, 0) - datetime(2000, 1, 1, 0, 0) ser = Series([td, td], dtype="O") results = ser._convert(datetime=True, numeric=True, timedelta=True) expected = Series([td, td]) tm.assert_series_equal(results, expected) results = ser._convert(True, True, False) tm.assert_series_equal(results, ser) ser = Series([1.0, 2, 3], index=["a", "b", "c"]) result = ser._convert(numeric=True) tm.assert_series_equal(result, ser) # force numeric conversion res = ser.copy().astype("O") res["a"] = "1" result = res._convert(numeric=True) tm.assert_series_equal(result, ser) res = ser.copy().astype("O") res["a"] = "1." result = res._convert(numeric=True) tm.assert_series_equal(result, ser) res = ser.copy().astype("O") res["a"] = "garbled" result = res._convert(numeric=True) expected = ser.copy() expected["a"] = np.nan tm.assert_series_equal(result, expected) # GH 4119, not converting a mixed type (e.g.floats and object) ser = Series([1, "na", 3, 4]) result = ser._convert(datetime=True, numeric=True) expected = Series([1, np.nan, 3, 4]) tm.assert_series_equal(result, expected) ser = Series([1, "", 3, 4]) result = ser._convert(datetime=True, numeric=True) tm.assert_series_equal(result, expected) # dates ser = Series( [ datetime(2001, 1, 1, 0, 0), datetime(2001, 1, 2, 0, 0), datetime(2001, 1, 3, 0, 0), ] ) result = ser._convert(datetime=True) expected = Series( [Timestamp("20010101"), Timestamp("20010102"), Timestamp("20010103")], dtype="M8[ns]", ) tm.assert_series_equal(result, expected) result = ser._convert(datetime=True) tm.assert_series_equal(result, expected) # preserver if non-object ser = Series([1], dtype="float32") result = ser._convert(datetime=True) tm.assert_series_equal(result, ser)
def decode(obj): """ Decoder for deserializing numpy data types. """ typ = obj.get('typ') if typ is None: return obj elif typ == 'timestamp': return Timestamp(obj['value'], tz=obj['tz'], offset=obj['offset']) elif typ == 'period': return Period(ordinal=obj['ordinal'], freq=obj['freq']) elif typ == 'index': dtype = dtype_for(obj['dtype']) data = unconvert(obj['data'], np.typeDict[obj['dtype']], obj.get('compress')) return globals()[obj['klass']](data, dtype=dtype, name=obj['name']) elif typ == 'multi_index': data = unconvert(obj['data'], np.typeDict[obj['dtype']], obj.get('compress')) data = [tuple(x) for x in data] return globals()[obj['klass']].from_tuples(data, names=obj['names']) elif typ == 'period_index': data = unconvert(obj['data'], np.int64, obj.get('compress')) d = dict(name=obj['name'], freq=obj['freq']) return globals()[obj['klass']](data, **d) elif typ == 'datetime_index': data = unconvert(obj['data'], np.int64, obj.get('compress')) d = dict(name=obj['name'], freq=obj['freq'], verify_integrity=False) result = globals()[obj['klass']](data, **d) tz = obj['tz'] # reverse tz conversion if tz is not None: result = result.tz_localize('UTC').tz_convert(tz) return result elif typ == 'series': dtype = dtype_for(obj['dtype']) index = obj['index'] return globals()[obj['klass']](unconvert(obj['data'], dtype, obj['compress']), index=index, name=obj['name']) elif typ == 'block_manager': axes = obj['axes'] def create_block(b): values = unconvert(b['values'], dtype_for(b['dtype']), b['compress']).reshape(b['shape']) return make_block(values=values, klass=getattr(internals, b['klass']), placement=axes[0].get_indexer(b['items'])) blocks = [create_block(b) for b in obj['blocks']] return globals()[obj['klass']](BlockManager(blocks, axes)) elif typ == 'datetime': return parse(obj['data']) elif typ == 'datetime64': return np.datetime64(parse(obj['data'])) elif typ == 'date': return parse(obj['data']).date() elif typ == 'timedelta': return timedelta(*obj['data']) elif typ == 'timedelta64': return np.timedelta64(int(obj['data'])) #elif typ == 'sparse_series': # dtype = dtype_for(obj['dtype']) # return globals()[obj['klass']]( # unconvert(obj['sp_values'], dtype, obj['compress']), # sparse_index=obj['sp_index'], index=obj['index'], # fill_value=obj['fill_value'], kind=obj['kind'], name=obj['name']) #elif typ == 'sparse_dataframe': # return globals()[obj['klass']]( # obj['data'], columns=obj['columns'], # default_fill_value=obj['default_fill_value'], # default_kind=obj['default_kind'] # ) #elif typ == 'sparse_panel': # return globals()[obj['klass']]( # obj['data'], items=obj['items'], # default_fill_value=obj['default_fill_value'], # default_kind=obj['default_kind']) elif typ == 'block_index': return globals()[obj['klass']](obj['length'], obj['blocs'], obj['blengths']) elif typ == 'int_index': return globals()[obj['klass']](obj['length'], obj['indices']) elif typ == 'ndarray': return unconvert(obj['data'], np.typeDict[obj['dtype']], obj.get('compress')).reshape(obj['shape']) elif typ == 'np_scalar': if obj.get('sub_typ') == 'np_complex': return c2f(obj['real'], obj['imag'], obj['dtype']) else: dtype = dtype_for(obj['dtype']) try: return dtype(obj['data']) except: return dtype.type(obj['data']) elif typ == 'np_complex': return complex(obj['real'] + '+' + obj['imag'] + 'j') elif isinstance(obj, (dict, list, set)): return obj else: return obj
class TestArithmetic(object): @pytest.mark.parametrize("op", [operator.add, ops.radd]) @pytest.mark.parametrize("other", ["category", "Int64"]) def test_add_extension_scalar(self, other, box, op): # GH#22378 # Check that scalars satisfying is_extension_array_dtype(obj) # do not incorrectly try to dispatch to an ExtensionArray operation arr = pd.Series(['a', 'b', 'c']) expected = pd.Series([op(x, other) for x in arr]) arr = tm.box_expected(arr, box) expected = tm.box_expected(expected, box) result = op(arr, other) tm.assert_equal(result, expected) @pytest.mark.parametrize('box', [ pytest.param(pd.Index, marks=pytest.mark.xfail(reason="Does not mask nulls", strict=True, raises=TypeError)), pd.Series, pd.DataFrame ], ids=lambda x: x.__name__) def test_objarr_add_str(self, box): ser = pd.Series(['x', np.nan, 'x']) expected = pd.Series(['xa', np.nan, 'xa']) ser = tm.box_expected(ser, box) expected = tm.box_expected(expected, box) result = ser + 'a' tm.assert_equal(result, expected) @pytest.mark.parametrize('box', [ pytest.param(pd.Index, marks=pytest.mark.xfail(reason="Does not mask nulls", strict=True, raises=TypeError)), pd.Series, pd.DataFrame ], ids=lambda x: x.__name__) def test_objarr_radd_str(self, box): ser = pd.Series(['x', np.nan, 'x']) expected = pd.Series(['ax', np.nan, 'ax']) ser = tm.box_expected(ser, box) expected = tm.box_expected(expected, box) result = 'a' + ser tm.assert_equal(result, expected) @pytest.mark.parametrize( 'data', [[1, 2, 3], [1.1, 2.2, 3.3], [Timestamp('2011-01-01'), Timestamp('2011-01-02'), pd.NaT], ['x', 'y', 1]]) @pytest.mark.parametrize('dtype', [None, object]) def test_objarr_radd_str_invalid(self, dtype, data, box): ser = Series(data, dtype=dtype) ser = tm.box_expected(ser, box) with pytest.raises(TypeError): 'foo_' + ser @pytest.mark.parametrize('op', [operator.add, ops.radd, operator.sub, ops.rsub]) def test_objarr_add_invalid(self, op, box): # invalid ops if box is pd.DataFrame and op is ops.radd: pytest.xfail(reason="DataFrame op incorrectly casts the np.array" "case to M8[ns]") obj_ser = tm.makeObjectSeries() obj_ser.name = 'objects' obj_ser = tm.box_expected(obj_ser, box) with pytest.raises(Exception): op(obj_ser, 1) with pytest.raises(Exception): op(obj_ser, np.array(1, dtype=np.int64)) # TODO: Moved from tests.series.test_operators; needs cleanup def test_operators_na_handling(self): ser = Series(['foo', 'bar', 'baz', np.nan]) result = 'prefix_' + ser expected = pd.Series( ['prefix_foo', 'prefix_bar', 'prefix_baz', np.nan]) tm.assert_series_equal(result, expected) result = ser + '_suffix' expected = pd.Series( ['foo_suffix', 'bar_suffix', 'baz_suffix', np.nan]) tm.assert_series_equal(result, expected) # TODO: parametrize over box @pytest.mark.parametrize('dtype', [None, object]) def test_series_with_dtype_radd_timedelta(self, dtype): # note this test is _not_ aimed at timedelta64-dtyped Series ser = pd.Series([ pd.Timedelta('1 days'), pd.Timedelta('2 days'), pd.Timedelta('3 days') ], dtype=dtype) expected = pd.Series([ pd.Timedelta('4 days'), pd.Timedelta('5 days'), pd.Timedelta('6 days') ]) result = pd.Timedelta('3 days') + ser tm.assert_series_equal(result, expected) result = ser + pd.Timedelta('3 days') tm.assert_series_equal(result, expected)
def test_construction_with_nat_and_tzlocal(self): tz = dateutil.tz.tzlocal() result = DatetimeIndex(["2018", "NaT"], tz=tz) expected = DatetimeIndex([Timestamp("2018", tz=tz), pd.NaT]) tm.assert_index_equal(result, expected)
def test_td_rsub_mixed_most_timedeltalike_object_dtype_array(self): # GH#21980 now = Timestamp.now() arr = np.array([now, Timedelta("1D"), np.timedelta64(2, "h")]) with pytest.raises(TypeError): Timedelta("1D") - arr
def test_maybe_cast_slice_duplicate_monotonic(self): # https://github.com/pandas-dev/pandas/issues/16515 idx = DatetimeIndex(["2017", "2017"]) result = idx._maybe_cast_slice_bound("2017-01-01", "left", "loc") expected = Timestamp("2017-01-01") assert result == expected
def test_interleave(self): # interleave with object result = self.tzframe.assign(D='foo').values expected = np.array( [[ Timestamp('2013-01-01 00:00:00'), Timestamp('2013-01-02 00:00:00'), Timestamp('2013-01-03 00:00:00') ], [ Timestamp('2013-01-01 00:00:00-0500', tz='US/Eastern'), pd.NaT, Timestamp('2013-01-03 00:00:00-0500', tz='US/Eastern') ], [ Timestamp('2013-01-01 00:00:00+0100', tz='CET'), pd.NaT, Timestamp('2013-01-03 00:00:00+0100', tz='CET') ], ['foo', 'foo', 'foo']], dtype=object).T tm.assert_numpy_array_equal(result, expected) # interleave with only datetime64[ns] result = self.tzframe.values expected = np.array( [[ Timestamp('2013-01-01 00:00:00'), Timestamp('2013-01-02 00:00:00'), Timestamp('2013-01-03 00:00:00') ], [ Timestamp('2013-01-01 00:00:00-0500', tz='US/Eastern'), pd.NaT, Timestamp('2013-01-03 00:00:00-0500', tz='US/Eastern') ], [ Timestamp('2013-01-01 00:00:00+0100', tz='CET'), pd.NaT, Timestamp('2013-01-03 00:00:00+0100', tz='CET') ]], dtype=object).T tm.assert_numpy_array_equal(result, expected)
def test_generate_training_set(mocker): mocker.patch("dagster_examples.bay_bikes.solids.read_sql_table", side_effect=mock_read_sql) # Execute Pipeline test_pipeline_result = execute_pipeline( pipeline=generate_test_training_set_pipeline, mode="testing", run_config=compose_training_data_env_dict(), ) assert test_pipeline_result.success # Check solids EXPECTED_TRAFFIC_RECORDS = [ { "interval_date": date(2019, 7, 31), "peak_traffic_load": 1, "time": Timestamp("2019-07-31 00:00:00"), }, { "interval_date": date(2019, 8, 31), "peak_traffic_load": 1, "time": Timestamp("2019-08-31 00:00:00"), }, ] traffic_dataset = test_pipeline_result.output_for_solid( "transform_into_traffic_dataset", output_name="traffic_dataframe").to_dict("records") assert all(record in EXPECTED_TRAFFIC_RECORDS for record in traffic_dataset) EXPECTED_WEATHER_RECORDS = [ { "time": Timestamp("2019-08-31 00:00:00"), "summary": "Clear throughout the day.", "icon": "clear-day", "sunriseTime": 1546269960, "sunsetTime": 1546304520, "precipIntensity": 0.0007, "precipIntensityMax": 0.0019, "precipProbability": 0.05, "precipType": "rain", "temperatureHigh": 56.71, "temperatureHighTime": 1546294020, "temperatureLow": 44.75, "temperatureLowTime": 1546358040, "dewPoint": 28.34, "humidity": 0.43, "pressure": 1017.7, "windSpeed": 12.46, "windGust": 26.85, "windGustTime": 1546289220, "windBearing": 0, "cloudCover": 0.11, "uvIndex": 2, "uvIndexTime": 1546287180, "visibility": 10, "ozone": 314.4, }, { "time": Timestamp("2019-07-31 00:00:00"), "summary": "Clear throughout the day.", "icon": "clear-day", "sunriseTime": 1546356420, "sunsetTime": 1546390920, "precipIntensity": 0.0005, "precipIntensityMax": 0.0016, "precipProbability": 0.02, "precipType": "sunny", "temperatureHigh": 55.91, "temperatureHighTime": 1546382040, "temperatureLow": 41.18, "temperatureLowTime": 1546437660, "dewPoint": 20.95, "humidity": 0.33, "pressure": 1023.3, "windSpeed": 6.77, "windGust": 22.08, "windGustTime": 1546343340, "windBearing": 22, "cloudCover": 0.1, "uvIndex": 2, "uvIndexTime": 1546373580, "visibility": 10, "ozone": 305.3, }, ] weather_dataset = test_pipeline_result.output_for_solid( "produce_weather_dataset", output_name="weather_dataframe").to_dict("records") assert all(record in EXPECTED_WEATHER_RECORDS for record in weather_dataset) # Ensure we are generating the expected training set training_set, labels = test_pipeline_result.output_for_solid( "produce_training_set") assert len(labels) == 1 and labels[0] == 1 assert array_equal( training_set, [[ [ 1546356420.0, 1546390920.0, 0.0005, 0.0016, 0.02, 55.91, 1546382040.0, 41.18, 1546437660.0, 20.95, 0.33, 1023.3, 6.77, 22.08, 1546343340.0, 22.0, 0.1, 2.0, 1546373580.0, 10.0, 305.3, ], [ 1546269960.0, 1546304520.0, 0.0007, 0.0019, 0.05, 56.71, 1546294020.0, 44.75, 1546358040.0, 28.34, 0.43, 1017.7, 12.46, 26.85, 1546289220.0, 0.0, 0.11, 2.0, 1546287180.0, 10.0, 314.4, ], ]], ) materialization_events = [ event for event in test_pipeline_result.step_event_list if event.solid_name == "upload_training_set_to_gcs" and event.event_type_value == "STEP_MATERIALIZATION" ] assert len(materialization_events) == 1 materialization = materialization_events[ 0].event_specific_data.materialization assert materialization.asset_key.path[0:5] == [ "gs", "dagster", "scratch", "ccdfe1e", "training_data", ] materialization_event_metadata = materialization.metadata_entries assert len(materialization_event_metadata) == 1 assert materialization_event_metadata[ 0].label == "google cloud storage URI" assert materialization_event_metadata[0].entry_data.text.startswith( "gs://dagster-scratch-ccdfe1e/training_data") # Clean up shutil.rmtree(os.path.join(tempfile.gettempdir(), "testing-storage"), ignore_errors=True)
MockDailyBarReader, ) from zipline.testing.fixtures import ( WithAdjustmentReader, ZiplineTestCase, ) # Test calendar ranges over the month of June 2015 # June 2015 # Mo Tu We Th Fr Sa Su # 1 2 3 4 5 6 7 # 8 9 10 11 12 13 14 # 15 16 17 18 19 20 21 # 22 23 24 25 26 27 28 # 29 30 TEST_CALENDAR_START = Timestamp('2015-06-01', tz='UTC') TEST_CALENDAR_STOP = Timestamp('2015-06-30', tz='UTC') TEST_QUERY_START = Timestamp('2015-06-10', tz='UTC') TEST_QUERY_STOP = Timestamp('2015-06-19', tz='UTC') # One asset for each of the cases enumerated in load_raw_arrays_from_bcolz. EQUITY_INFO = DataFrame( [ # 1) The equity's trades start and end before query. { 'start_date': '2015-06-01', 'end_date': '2015-06-05' }, # 2) The equity's trades start and end after query. {
class TestDataFrameSetItem: def test_setitem_str_subclass(self): # GH#37366 class mystring(str): pass data = ["2020-10-22 01:21:00+00:00"] index = DatetimeIndex(data) df = DataFrame({"a": [1]}, index=index) df["b"] = 2 df[mystring("c")] = 3 expected = DataFrame({ "a": [1], "b": [2], mystring("c"): [3] }, index=index) tm.assert_equal(df, expected) @pytest.mark.parametrize( "dtype", ["int32", "int64", "uint32", "uint64", "float32", "float64"]) def test_setitem_dtype(self, dtype, float_frame): arr = np.random.randn(len(float_frame)) float_frame[dtype] = np.array(arr, dtype=dtype) assert float_frame[dtype].dtype.name == dtype def test_setitem_list_not_dataframe(self, float_frame): data = np.random.randn(len(float_frame), 2) float_frame[["A", "B"]] = data tm.assert_almost_equal(float_frame[["A", "B"]].values, data) def test_setitem_error_msmgs(self): # GH 7432 df = DataFrame( { "bar": [1, 2, 3], "baz": ["d", "e", "f"] }, index=Index(["a", "b", "c"], name="foo"), ) ser = Series( ["g", "h", "i", "j"], index=Index(["a", "b", "c", "a"], name="foo"), name="fiz", ) msg = "cannot reindex on an axis with duplicate labels" with pytest.raises(ValueError, match=msg): with tm.assert_produces_warning(FutureWarning, match="non-unique"): df["newcol"] = ser # GH 4107, more descriptive error message df = DataFrame(np.random.randint(0, 2, (4, 4)), columns=["a", "b", "c", "d"]) msg = "incompatible index of inserted column with frame index" with pytest.raises(TypeError, match=msg): df["gr"] = df.groupby(["b", "c"]).count() def test_setitem_benchmark(self): # from the vb_suite/frame_methods/frame_insert_columns N = 10 K = 5 df = DataFrame(index=range(N)) new_col = np.random.randn(N) for i in range(K): df[i] = new_col expected = DataFrame(np.repeat(new_col, K).reshape(N, K), index=range(N)) tm.assert_frame_equal(df, expected) def test_setitem_different_dtype(self): df = DataFrame(np.random.randn(5, 3), index=np.arange(5), columns=["c", "b", "a"]) df.insert(0, "foo", df["a"]) df.insert(2, "bar", df["c"]) # diff dtype # new item df["x"] = df["a"].astype("float32") result = df.dtypes expected = Series( [np.dtype("float64")] * 5 + [np.dtype("float32")], index=["foo", "c", "bar", "b", "a", "x"], ) tm.assert_series_equal(result, expected) # replacing current (in different block) df["a"] = df["a"].astype("float32") result = df.dtypes expected = Series( [np.dtype("float64")] * 4 + [np.dtype("float32")] * 2, index=["foo", "c", "bar", "b", "a", "x"], ) tm.assert_series_equal(result, expected) df["y"] = df["a"].astype("int32") result = df.dtypes expected = Series( [np.dtype("float64")] * 4 + [np.dtype("float32")] * 2 + [np.dtype("int32")], index=["foo", "c", "bar", "b", "a", "x", "y"], ) tm.assert_series_equal(result, expected) def test_setitem_empty_columns(self): # GH 13522 df = DataFrame(index=["A", "B", "C"]) df["X"] = df.index df["X"] = ["x", "y", "z"] exp = DataFrame(data={"X": ["x", "y", "z"]}, index=["A", "B", "C"]) tm.assert_frame_equal(df, exp) def test_setitem_dt64_index_empty_columns(self): rng = date_range("1/1/2000 00:00:00", "1/1/2000 1:59:50", freq="10s") df = DataFrame(index=np.arange(len(rng))) df["A"] = rng assert df["A"].dtype == np.dtype("M8[ns]") def test_setitem_timestamp_empty_columns(self): # GH#19843 df = DataFrame(index=range(3)) df["now"] = Timestamp("20130101", tz="UTC") expected = DataFrame([[Timestamp("20130101", tz="UTC")]] * 3, index=[0, 1, 2], columns=["now"]) tm.assert_frame_equal(df, expected) def test_setitem_wrong_length_categorical_dtype_raises(self): # GH#29523 cat = Categorical.from_codes([0, 1, 1, 0, 1, 2], ["a", "b", "c"]) df = DataFrame(range(10), columns=["bar"]) msg = (rf"Length of values \({len(cat)}\) " rf"does not match length of index \({len(df)}\)") with pytest.raises(ValueError, match=msg): df["foo"] = cat def test_setitem_with_sparse_value(self): # GH#8131 df = DataFrame({"c_1": ["a", "b", "c"], "n_1": [1.0, 2.0, 3.0]}) sp_array = SparseArray([0, 0, 1]) df["new_column"] = sp_array expected = Series(sp_array, name="new_column") tm.assert_series_equal(df["new_column"], expected) def test_setitem_with_unaligned_sparse_value(self): df = DataFrame({"c_1": ["a", "b", "c"], "n_1": [1.0, 2.0, 3.0]}) sp_series = Series(SparseArray([0, 0, 1]), index=[2, 1, 0]) df["new_column"] = sp_series expected = Series(SparseArray([1, 0, 0]), name="new_column") tm.assert_series_equal(df["new_column"], expected) def test_setitem_period_preserves_dtype(self): # GH: 26861 data = [Period("2003-12", "D")] result = DataFrame([]) result["a"] = data expected = DataFrame({"a": data}) tm.assert_frame_equal(result, expected) def test_setitem_dict_preserves_dtypes(self): # https://github.com/pandas-dev/pandas/issues/34573 expected = DataFrame({ "a": Series([0, 1, 2], dtype="int64"), "b": Series([1, 2, 3], dtype=float), "c": Series([1, 2, 3], dtype=float), "d": Series([1, 2, 3], dtype="uint32"), }) df = DataFrame({ "a": Series([], dtype="int64"), "b": Series([], dtype=float), "c": Series([], dtype=float), "d": Series([], dtype="uint32"), }) for idx, b in enumerate([1, 2, 3]): df.loc[df.shape[0]] = { "a": int(idx), "b": float(b), "c": float(b), "d": np.uint32(b), } tm.assert_frame_equal(df, expected) @pytest.mark.parametrize( "obj,dtype", [ (Period("2020-01"), PeriodDtype("M")), ( Interval(left=0, right=5, inclusive="right"), IntervalDtype("int64", "right"), ), ( Timestamp("2011-01-01", tz="US/Eastern"), DatetimeTZDtype(tz="US/Eastern"), ), ], ) def test_setitem_extension_types(self, obj, dtype): # GH: 34832 expected = DataFrame({ "idx": [1, 2, 3], "obj": Series([obj] * 3, dtype=dtype) }) df = DataFrame({"idx": [1, 2, 3]}) df["obj"] = obj tm.assert_frame_equal(df, expected) @pytest.mark.parametrize( "ea_name", [ dtype.name for dtype in ea_registry.dtypes # property would require instantiation if not isinstance(dtype.name, property) ] # mypy doesn't allow adding lists of different types # https://github.com/python/mypy/issues/5492 + ["datetime64[ns, UTC]", "period[D]"], # type: ignore[list-item] ) def test_setitem_with_ea_name(self, ea_name): # GH 38386 result = DataFrame([0]) result[ea_name] = [1] expected = DataFrame({0: [0], ea_name: [1]}) tm.assert_frame_equal(result, expected) def test_setitem_dt64_ndarray_with_NaT_and_diff_time_units(self): # GH#7492 data_ns = np.array([1, "nat"], dtype="datetime64[ns]") result = Series(data_ns).to_frame() result["new"] = data_ns expected = DataFrame({ 0: [1, None], "new": [1, None] }, dtype="datetime64[ns]") tm.assert_frame_equal(result, expected) # OutOfBoundsDatetime error shouldn't occur data_s = np.array([1, "nat"], dtype="datetime64[s]") result["new"] = data_s expected = DataFrame({ 0: [1, None], "new": [1e9, None] }, dtype="datetime64[ns]") tm.assert_frame_equal(result, expected) @pytest.mark.parametrize("unit", ["h", "m", "s", "ms", "D", "M", "Y"]) def test_frame_setitem_datetime64_col_other_units(self, unit): # Check that non-nano dt64 values get cast to dt64 on setitem # into a not-yet-existing column n = 100 dtype = np.dtype(f"M8[{unit}]") vals = np.arange(n, dtype=np.int64).view(dtype) ex_vals = vals.astype("datetime64[ns]") df = DataFrame({"ints": np.arange(n)}, index=np.arange(n)) df[unit] = vals assert df[unit].dtype == np.dtype("M8[ns]") assert (df[unit].values == ex_vals).all() @pytest.mark.parametrize("unit", ["h", "m", "s", "ms", "D", "M", "Y"]) def test_frame_setitem_existing_datetime64_col_other_units(self, unit): # Check that non-nano dt64 values get cast to dt64 on setitem # into an already-existing dt64 column n = 100 dtype = np.dtype(f"M8[{unit}]") vals = np.arange(n, dtype=np.int64).view(dtype) ex_vals = vals.astype("datetime64[ns]") df = DataFrame({"ints": np.arange(n)}, index=np.arange(n)) df["dates"] = np.arange(n, dtype=np.int64).view("M8[ns]") # We overwrite existing dt64 column with new, non-nano dt64 vals df["dates"] = vals assert (df["dates"].values == ex_vals).all() def test_setitem_dt64tz(self, timezone_frame): df = timezone_frame idx = df["B"].rename("foo") # setitem df["C"] = idx tm.assert_series_equal(df["C"], Series(idx, name="C")) df["D"] = "foo" df["D"] = idx tm.assert_series_equal(df["D"], Series(idx, name="D")) del df["D"] # assert that A & C are not sharing the same base (e.g. they # are copies) v1 = df._mgr.arrays[1] v2 = df._mgr.arrays[2] tm.assert_extension_array_equal(v1, v2) v1base = v1._data.base v2base = v2._data.base assert v1base is None or (id(v1base) != id(v2base)) # with nan df2 = df.copy() df2.iloc[1, 1] = NaT df2.iloc[1, 2] = NaT result = df2["B"] tm.assert_series_equal(notna(result), Series([True, False, True], name="B")) tm.assert_series_equal(df2.dtypes, df.dtypes) def test_setitem_periodindex(self): rng = period_range("1/1/2000", periods=5, name="index") df = DataFrame(np.random.randn(5, 3), index=rng) df["Index"] = rng rs = Index(df["Index"]) tm.assert_index_equal(rs, rng, check_names=False) assert rs.name == "Index" assert rng.name == "index" rs = df.reset_index().set_index("index") assert isinstance(rs.index, PeriodIndex) tm.assert_index_equal(rs.index, rng) def test_setitem_complete_column_with_array(self): # GH#37954 df = DataFrame({"a": ["one", "two", "three"], "b": [1, 2, 3]}) arr = np.array([[1, 1], [3, 1], [5, 1]]) df[["c", "d"]] = arr expected = DataFrame({ "a": ["one", "two", "three"], "b": [1, 2, 3], "c": [1, 3, 5], "d": [1, 1, 1], }) expected["c"] = expected["c"].astype(arr.dtype) expected["d"] = expected["d"].astype(arr.dtype) assert expected["c"].dtype == arr.dtype assert expected["d"].dtype == arr.dtype tm.assert_frame_equal(df, expected) @pytest.mark.parametrize("dtype", ["f8", "i8", "u8"]) def test_setitem_bool_with_numeric_index(self, dtype): # GH#36319 cols = Index([1, 2, 3], dtype=dtype) df = DataFrame(np.random.randn(3, 3), columns=cols) df[False] = ["a", "b", "c"] expected_cols = Index([1, 2, 3, False], dtype=object) if dtype == "f8": expected_cols = Index([1.0, 2.0, 3.0, False], dtype=object) tm.assert_index_equal(df.columns, expected_cols) @pytest.mark.parametrize("indexer", ["B", ["B"]]) def test_setitem_frame_length_0_str_key(self, indexer): # GH#38831 df = DataFrame(columns=["A", "B"]) other = DataFrame({"B": [1, 2]}) df[indexer] = other expected = DataFrame({"A": [np.nan] * 2, "B": [1, 2]}) expected["A"] = expected["A"].astype("object") tm.assert_frame_equal(df, expected) def test_setitem_frame_duplicate_columns(self, using_array_manager): # GH#15695 warn = FutureWarning if using_array_manager else None msg = "will attempt to set the values inplace" cols = ["A", "B", "C"] * 2 df = DataFrame(index=range(3), columns=cols) df.loc[0, "A"] = (0, 3) with tm.assert_produces_warning(warn, match=msg): df.loc[:, "B"] = (1, 4) df["C"] = (2, 5) expected = DataFrame( [ [0, 1, 2, 3, 4, 5], [np.nan, 1, 2, np.nan, 4, 5], [np.nan, 1, 2, np.nan, 4, 5], ], dtype="object", ) if using_array_manager: # setitem replaces column so changes dtype expected.columns = cols expected["C"] = expected["C"].astype("int64") # TODO(ArrayManager) .loc still overwrites expected["B"] = expected["B"].astype("int64") else: # set these with unique columns to be extra-unambiguous expected[2] = expected[2].astype(np.int64) expected[5] = expected[5].astype(np.int64) expected.columns = cols tm.assert_frame_equal(df, expected) def test_setitem_frame_duplicate_columns_size_mismatch(self): # GH#39510 cols = ["A", "B", "C"] * 2 df = DataFrame(index=range(3), columns=cols) with pytest.raises(ValueError, match="Columns must be same length as key"): df[["A"]] = (0, 3, 5) df2 = df.iloc[:, :3] # unique columns with pytest.raises(ValueError, match="Columns must be same length as key"): df2[["A"]] = (0, 3, 5) @pytest.mark.parametrize("cols", [["a", "b", "c"], ["a", "a", "a"]]) def test_setitem_df_wrong_column_number(self, cols): # GH#38604 df = DataFrame([[1, 2, 3]], columns=cols) rhs = DataFrame([[10, 11]], columns=["d", "e"]) msg = "Columns must be same length as key" with pytest.raises(ValueError, match=msg): df["a"] = rhs def test_setitem_listlike_indexer_duplicate_columns(self): # GH#38604 df = DataFrame([[1, 2, 3]], columns=["a", "b", "b"]) rhs = DataFrame([[10, 11, 12]], columns=["a", "b", "b"]) df[["a", "b"]] = rhs expected = DataFrame([[10, 11, 12]], columns=["a", "b", "b"]) tm.assert_frame_equal(df, expected) df[["c", "b"]] = rhs expected = DataFrame([[10, 11, 12, 10]], columns=["a", "b", "b", "c"]) tm.assert_frame_equal(df, expected) def test_setitem_listlike_indexer_duplicate_columns_not_equal_length(self): # GH#39403 df = DataFrame([[1, 2, 3]], columns=["a", "b", "b"]) rhs = DataFrame([[10, 11]], columns=["a", "b"]) msg = "Columns must be same length as key" with pytest.raises(ValueError, match=msg): df[["a", "b"]] = rhs def test_setitem_intervals(self): df = DataFrame({"A": range(10)}) ser = cut(df["A"], 5) assert isinstance(ser.cat.categories, IntervalIndex) # B & D end up as Categoricals # the remainder are converted to in-line objects # containing an IntervalIndex.values df["B"] = ser df["C"] = np.array(ser) df["D"] = ser.values df["E"] = np.array(ser.values) df["F"] = ser.astype(object) assert is_categorical_dtype(df["B"].dtype) assert is_interval_dtype(df["B"].cat.categories) assert is_categorical_dtype(df["D"].dtype) assert is_interval_dtype(df["D"].cat.categories) # These go through the Series constructor and so get inferred back # to IntervalDtype assert is_interval_dtype(df["C"]) assert is_interval_dtype(df["E"]) # But the Series constructor doesn't do inference on Series objects, # so setting df["F"] doesn't get cast back to IntervalDtype assert is_object_dtype(df["F"]) # they compare equal as Index # when converted to numpy objects c = lambda x: Index(np.array(x)) tm.assert_index_equal(c(df.B), c(df.B)) tm.assert_index_equal(c(df.B), c(df.C), check_names=False) tm.assert_index_equal(c(df.B), c(df.D), check_names=False) tm.assert_index_equal(c(df.C), c(df.D), check_names=False) # B & D are the same Series tm.assert_series_equal(df["B"], df["B"]) tm.assert_series_equal(df["B"], df["D"], check_names=False) # C & E are the same Series tm.assert_series_equal(df["C"], df["C"]) tm.assert_series_equal(df["C"], df["E"], check_names=False) def test_setitem_categorical(self): # GH#35369 df = DataFrame({"h": Series(list("mn")).astype("category")}) df.h = df.h.cat.reorder_categories(["n", "m"]) expected = DataFrame( {"h": Categorical(["m", "n"]).reorder_categories(["n", "m"])}) tm.assert_frame_equal(df, expected) def test_setitem_with_empty_listlike(self): # GH#17101 index = Index([], name="idx") result = DataFrame(columns=["A"], index=index) result["A"] = [] expected = DataFrame(columns=["A"], index=index) tm.assert_index_equal(result.index, expected.index) @pytest.mark.parametrize( "cols, values, expected", [ (["C", "D", "D", "a"], [1, 2, 3, 4], 4), # with duplicates (["D", "C", "D", "a"], [1, 2, 3, 4], 4), # mixed order (["C", "B", "B", "a"], [1, 2, 3, 4], 4), # other duplicate cols (["C", "B", "a"], [1, 2, 3], 3), # no duplicates (["B", "C", "a"], [3, 2, 1], 1), # alphabetical order (["C", "a", "B"], [3, 2, 1], 2), # in the middle ], ) def test_setitem_same_column(self, cols, values, expected): # GH#23239 df = DataFrame([values], columns=cols) df["a"] = df["a"] result = df["a"].values[0] assert result == expected def test_setitem_multi_index(self): # GH#7655, test that assigning to a sub-frame of a frame # with multi-index columns aligns both rows and columns it = ["jim", "joe", "jolie"], ["first", "last"], ["left", "center", "right"] cols = MultiIndex.from_product(it) index = date_range("20141006", periods=20) vals = np.random.randint(1, 1000, (len(index), len(cols))) df = DataFrame(vals, columns=cols, index=index) i, j = df.index.values.copy(), it[-1][:] np.random.shuffle(i) df["jim"] = df["jolie"].loc[i, ::-1] tm.assert_frame_equal(df["jim"], df["jolie"]) np.random.shuffle(j) df[("joe", "first")] = df[("jolie", "last")].loc[i, j] tm.assert_frame_equal(df[("joe", "first")], df[("jolie", "last")]) np.random.shuffle(j) df[("joe", "last")] = df[("jolie", "first")].loc[i, j] tm.assert_frame_equal(df[("joe", "last")], df[("jolie", "first")]) @pytest.mark.parametrize( "columns,box,expected", [ ( ["A", "B", "C", "D"], 7, DataFrame( [[7, 7, 7, 7], [7, 7, 7, 7], [7, 7, 7, 7]], columns=["A", "B", "C", "D"], ), ), ( ["C", "D"], [7, 8], DataFrame( [[1, 2, 7, 8], [3, 4, 7, 8], [5, 6, 7, 8]], columns=["A", "B", "C", "D"], ), ), ( ["A", "B", "C"], np.array([7, 8, 9], dtype=np.int64), DataFrame([[7, 8, 9], [7, 8, 9], [7, 8, 9]], columns=["A", "B", "C"]), ), ( ["B", "C", "D"], [[7, 8, 9], [10, 11, 12], [13, 14, 15]], DataFrame( [[1, 7, 8, 9], [3, 10, 11, 12], [5, 13, 14, 15]], columns=["A", "B", "C", "D"], ), ), ( ["C", "A", "D"], np.array([[7, 8, 9], [10, 11, 12], [13, 14, 15]], dtype=np.int64), DataFrame( [[8, 2, 7, 9], [11, 4, 10, 12], [14, 6, 13, 15]], columns=["A", "B", "C", "D"], ), ), ( ["A", "C"], DataFrame([[7, 8], [9, 10], [11, 12]], columns=["A", "C"]), DataFrame([[7, 2, 8], [9, 4, 10], [11, 6, 12]], columns=["A", "B", "C"]), ), ], ) def test_setitem_list_missing_columns(self, columns, box, expected): # GH#29334 df = DataFrame([[1, 2], [3, 4], [5, 6]], columns=["A", "B"]) df[columns] = box tm.assert_frame_equal(df, expected) def test_setitem_list_of_tuples(self, float_frame): tuples = list(zip(float_frame["A"], float_frame["B"])) float_frame["tuples"] = tuples result = float_frame["tuples"] expected = Series(tuples, index=float_frame.index, name="tuples") tm.assert_series_equal(result, expected) def test_setitem_iloc_generator(self): # GH#39614 df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) indexer = (x for x in [1, 2]) df.iloc[indexer] = 1 expected = DataFrame({"a": [1, 1, 1], "b": [4, 1, 1]}) tm.assert_frame_equal(df, expected) def test_setitem_iloc_two_dimensional_generator(self): df = DataFrame({"a": [1, 2, 3], "b": [4, 5, 6]}) indexer = (x for x in [1, 2]) df.iloc[indexer, 1] = 1 expected = DataFrame({"a": [1, 2, 3], "b": [4, 1, 1]}) tm.assert_frame_equal(df, expected) def test_setitem_dtypes_bytes_type_to_object(self): # GH 20734 index = Series(name="id", dtype="S24") df = DataFrame(index=index) df["a"] = Series(name="a", index=index, dtype=np.uint32) df["b"] = Series(name="b", index=index, dtype="S64") df["c"] = Series(name="c", index=index, dtype="S64") df["d"] = Series(name="d", index=index, dtype=np.uint8) result = df.dtypes expected = Series([np.uint32, object, object, np.uint8], index=list("abcd")) tm.assert_series_equal(result, expected) def test_boolean_mask_nullable_int64(self): # GH 28928 result = DataFrame({ "a": [3, 4], "b": [5, 6] }).astype({ "a": "int64", "b": "Int64" }) mask = Series(False, index=result.index) result.loc[mask, "a"] = result["a"] result.loc[mask, "b"] = result["b"] expected = DataFrame({ "a": [3, 4], "b": [5, 6] }).astype({ "a": "int64", "b": "Int64" }) tm.assert_frame_equal(result, expected) def test_setitem_ea_dtype_rhs_series(self): # GH#47425 df = DataFrame({"a": [1, 2]}) df["a"] = Series([1, 2], dtype="Int64") expected = DataFrame({"a": [1, 2]}, dtype="Int64") tm.assert_frame_equal(df, expected) # TODO(ArrayManager) set column with 2d column array, see #44788 @td.skip_array_manager_not_yet_implemented def test_setitem_npmatrix_2d(self): # GH#42376 # for use-case df["x"] = sparse.random(10, 10).mean(axis=1) expected = DataFrame( { "np-array": np.ones(10), "np-matrix": np.ones(10) }, index=np.arange(10)) a = np.ones((10, 1)) df = DataFrame(index=np.arange(10)) df["np-array"] = a # Instantiation of `np.matrix` gives PendingDeprecationWarning with tm.assert_produces_warning(PendingDeprecationWarning): df["np-matrix"] = np.matrix(a) tm.assert_frame_equal(df, expected) @pytest.mark.parametrize("vals", [{}, {"d": "a"}]) def test_setitem_aligning_dict_with_index(self, vals): # GH#47216 df = DataFrame({"a": [1, 2], "b": [3, 4], **vals}) df.loc[:, "a"] = {1: 100, 0: 200} df.loc[:, "c"] = {0: 5, 1: 6} df.loc[:, "e"] = {1: 5} expected = DataFrame({ "a": [200, 100], "b": [3, 4], **vals, "c": [5, 6], "e": [np.nan, 5] }) tm.assert_frame_equal(df, expected) def test_setitem_rhs_dataframe(self): # GH#47578 df = DataFrame({"a": [1, 2]}) df["a"] = DataFrame({"a": [10, 11]}, index=[1, 2]) expected = DataFrame({"a": [np.nan, 10]}) tm.assert_frame_equal(df, expected) df = DataFrame({"a": [1, 2]}) df.isetitem(0, DataFrame({"a": [10, 11]}, index=[1, 2])) tm.assert_frame_equal(df, expected)
def test_000constructor_resolution(self): # 2252 t1 = Timestamp((1352934390 * 1000000000) + 1000000 + 1000 + 1) idx = DatetimeIndex([t1]) assert idx.nanosecond[0] == t1.nanosecond
def test_constructor_with_int_tz(self, klass, box, tz, dtype): # GH 20997, 20964 ts = Timestamp("2018-01-01", tz=tz) result = klass(box([ts.value]), dtype=dtype) expected = klass([ts]) assert result == expected
tm.assert_frame_equal(result, expected) # prod result = df.groupby(pd.cut(df["a"], grps), observed=observed)._cython_agg_general( "prod" ) expected = pd.DataFrame( {"a": [1, 1, 1716, 1]}, index=pd.CategoricalIndex(intervals, name="a", ordered=True), ) if observed: expected = expected[expected.a != 1] tm.assert_frame_equal(result, expected) @pytest.mark.parametrize("op", ["first", "last", "max", "min"]) @pytest.mark.parametrize( "data", [Timestamp("2016-10-14 21:00:44.557"), Timedelta("17088 days 21:00:44.557")] ) def test_cython_with_timestamp_and_nat(op, data): # https://github.com/pandas-dev/pandas/issues/19526 df = DataFrame({"a": [0, 1], "b": [data, NaT]}) index = Index([0, 1], name="a") # We will group by a and test the cython aggregations expected = DataFrame({"b": [data, NaT]}, index=index) result = df.groupby("a").aggregate(op) tm.assert_frame_equal(expected, result)
def test_compare_hour13(self): r = Timestamp("2000-08-12T13:00:00").to_julian_date() assert r == 2_451_769.0416666666666666
def test_constructor_with_non_normalized_pytz(self, tz): # GH 18595 non_norm_tz = Timestamp("2010", tz=tz).tz result = DatetimeIndex(["2010"], tz=non_norm_tz) assert pytz.timezone(tz) is result.tz
class TestTimedeltaMultiplicationDivision: """ Tests for Timedelta methods: __mul__, __rmul__, __div__, __rdiv__, __truediv__, __rtruediv__, __floordiv__, __rfloordiv__, __mod__, __rmod__, __divmod__, __rdivmod__ """ # --------------------------------------------------------------- # Timedelta.__mul__, __rmul__ @pytest.mark.parametrize( "td_nat", [NaT, np.timedelta64("NaT", "ns"), np.timedelta64("NaT")] ) @pytest.mark.parametrize("op", [operator.mul, ops.rmul]) def test_td_mul_nat(self, op, td_nat): # GH#19819 td = Timedelta(10, unit="d") with pytest.raises(TypeError): op(td, td_nat) @pytest.mark.parametrize("nan", [np.nan, np.float64("NaN"), float("nan")]) @pytest.mark.parametrize("op", [operator.mul, ops.rmul]) def test_td_mul_nan(self, op, nan): # np.float64('NaN') has a 'dtype' attr, avoid treating as array td = Timedelta(10, unit="d") result = op(td, nan) assert result is NaT @pytest.mark.parametrize("op", [operator.mul, ops.rmul]) def test_td_mul_scalar(self, op): # GH#19738 td = Timedelta(minutes=3) result = op(td, 2) assert result == Timedelta(minutes=6) result = op(td, 1.5) assert result == Timedelta(minutes=4, seconds=30) assert op(td, np.nan) is NaT assert op(-1, td).value == -1 * td.value assert op(-1.0, td).value == -1.0 * td.value with pytest.raises(TypeError): # timedelta * datetime is gibberish op(td, Timestamp(2016, 1, 2)) with pytest.raises(TypeError): # invalid multiply with another timedelta op(td, td) # --------------------------------------------------------------- # Timedelta.__div__, __truediv__ def test_td_div_timedeltalike_scalar(self): # GH#19738 td = Timedelta(10, unit="d") result = td / offsets.Hour(1) assert result == 240 assert td / td == 1 assert td / np.timedelta64(60, "h") == 4 assert np.isnan(td / NaT) def test_td_div_numeric_scalar(self): # GH#19738 td = Timedelta(10, unit="d") result = td / 2 assert isinstance(result, Timedelta) assert result == Timedelta(days=5) result = td / 5.0 assert isinstance(result, Timedelta) assert result == Timedelta(days=2) @pytest.mark.parametrize("nan", [np.nan, np.float64("NaN"), float("nan")]) def test_td_div_nan(self, nan): # np.float64('NaN') has a 'dtype' attr, avoid treating as array td = Timedelta(10, unit="d") result = td / nan assert result is NaT result = td // nan assert result is NaT # --------------------------------------------------------------- # Timedelta.__rdiv__ def test_td_rdiv_timedeltalike_scalar(self): # GH#19738 td = Timedelta(10, unit="d") result = offsets.Hour(1) / td assert result == 1 / 240.0 assert np.timedelta64(60, "h") / td == 0.25 # --------------------------------------------------------------- # Timedelta.__floordiv__ def test_td_floordiv_timedeltalike_scalar(self): # GH#18846 td = Timedelta(hours=3, minutes=4) scalar = Timedelta(hours=3, minutes=3) assert td // scalar == 1 assert -td // scalar.to_pytimedelta() == -2 assert (2 * td) // scalar.to_timedelta64() == 2 def test_td_floordiv_null_scalar(self): # GH#18846 td = Timedelta(hours=3, minutes=4) assert td // np.nan is NaT assert np.isnan(td // NaT) assert np.isnan(td // np.timedelta64("NaT")) def test_td_floordiv_offsets(self): # GH#19738 td = Timedelta(hours=3, minutes=4) assert td // offsets.Hour(1) == 3 assert td // offsets.Minute(2) == 92 def test_td_floordiv_invalid_scalar(self): # GH#18846 td = Timedelta(hours=3, minutes=4) with pytest.raises(TypeError): td // np.datetime64("2016-01-01", dtype="datetime64[us]") def test_td_floordiv_numeric_scalar(self): # GH#18846 td = Timedelta(hours=3, minutes=4) expected = Timedelta(hours=1, minutes=32) assert td // 2 == expected assert td // 2.0 == expected assert td // np.float64(2.0) == expected assert td // np.int32(2.0) == expected assert td // np.uint8(2.0) == expected def test_td_floordiv_timedeltalike_array(self): # GH#18846 td = Timedelta(hours=3, minutes=4) scalar = Timedelta(hours=3, minutes=3) # Array-like others assert td // np.array(scalar.to_timedelta64()) == 1 res = (3 * td) // np.array([scalar.to_timedelta64()]) expected = np.array([3], dtype=np.int64) tm.assert_numpy_array_equal(res, expected) res = (10 * td) // np.array([scalar.to_timedelta64(), np.timedelta64("NaT")]) expected = np.array([10, np.nan]) tm.assert_numpy_array_equal(res, expected) def test_td_floordiv_numeric_series(self): # GH#18846 td = Timedelta(hours=3, minutes=4) ser = pd.Series([1], dtype=np.int64) res = td // ser assert res.dtype.kind == "m" # --------------------------------------------------------------- # Timedelta.__rfloordiv__ def test_td_rfloordiv_timedeltalike_scalar(self): # GH#18846 td = Timedelta(hours=3, minutes=3) scalar = Timedelta(hours=3, minutes=4) # scalar others # x // Timedelta is defined only for timedelta-like x. int-like, # float-like, and date-like, in particular, should all either # a) raise TypeError directly or # b) return NotImplemented, following which the reversed # operation will raise TypeError. assert td.__rfloordiv__(scalar) == 1 assert (-td).__rfloordiv__(scalar.to_pytimedelta()) == -2 assert (2 * td).__rfloordiv__(scalar.to_timedelta64()) == 0 def test_td_rfloordiv_null_scalar(self): # GH#18846 td = Timedelta(hours=3, minutes=3) assert np.isnan(td.__rfloordiv__(NaT)) assert np.isnan(td.__rfloordiv__(np.timedelta64("NaT"))) def test_td_rfloordiv_offsets(self): # GH#19738 assert offsets.Hour(1) // Timedelta(minutes=25) == 2 def test_td_rfloordiv_invalid_scalar(self): # GH#18846 td = Timedelta(hours=3, minutes=3) dt64 = np.datetime64("2016-01-01", dtype="datetime64[us]") with pytest.raises(TypeError): td.__rfloordiv__(dt64) def test_td_rfloordiv_numeric_scalar(self): # GH#18846 td = Timedelta(hours=3, minutes=3) assert td.__rfloordiv__(np.nan) is NotImplemented assert td.__rfloordiv__(3.5) is NotImplemented assert td.__rfloordiv__(2) is NotImplemented with pytest.raises(TypeError): td.__rfloordiv__(np.float64(2.0)) with pytest.raises(TypeError): td.__rfloordiv__(np.uint8(9)) with pytest.raises(TypeError, match="Invalid dtype"): # deprecated GH#19761, enforced GH#29797 td.__rfloordiv__(np.int32(2.0)) def test_td_rfloordiv_timedeltalike_array(self): # GH#18846 td = Timedelta(hours=3, minutes=3) scalar = Timedelta(hours=3, minutes=4) # Array-like others assert td.__rfloordiv__(np.array(scalar.to_timedelta64())) == 1 res = td.__rfloordiv__(np.array([(3 * scalar).to_timedelta64()])) expected = np.array([3], dtype=np.int64) tm.assert_numpy_array_equal(res, expected) arr = np.array([(10 * scalar).to_timedelta64(), np.timedelta64("NaT")]) res = td.__rfloordiv__(arr) expected = np.array([10, np.nan]) tm.assert_numpy_array_equal(res, expected) def test_td_rfloordiv_numeric_series(self): # GH#18846 td = Timedelta(hours=3, minutes=3) ser = pd.Series([1], dtype=np.int64) res = td.__rfloordiv__(ser) assert res is NotImplemented with pytest.raises(TypeError, match="Invalid dtype"): # Deprecated GH#19761, enforced GH#29797 # TODO: GH-19761. Change to TypeError. ser // td # ---------------------------------------------------------------- # Timedelta.__mod__, __rmod__ def test_mod_timedeltalike(self): # GH#19365 td = Timedelta(hours=37) # Timedelta-like others result = td % Timedelta(hours=6) assert isinstance(result, Timedelta) assert result == Timedelta(hours=1) result = td % timedelta(minutes=60) assert isinstance(result, Timedelta) assert result == Timedelta(0) result = td % NaT assert result is NaT def test_mod_timedelta64_nat(self): # GH#19365 td = Timedelta(hours=37) result = td % np.timedelta64("NaT", "ns") assert result is NaT def test_mod_timedelta64(self): # GH#19365 td = Timedelta(hours=37) result = td % np.timedelta64(2, "h") assert isinstance(result, Timedelta) assert result == Timedelta(hours=1) def test_mod_offset(self): # GH#19365 td = Timedelta(hours=37) result = td % offsets.Hour(5) assert isinstance(result, Timedelta) assert result == Timedelta(hours=2) def test_mod_numeric(self): # GH#19365 td = Timedelta(hours=37) # Numeric Others result = td % 2 assert isinstance(result, Timedelta) assert result == Timedelta(0) result = td % 1e12 assert isinstance(result, Timedelta) assert result == Timedelta(minutes=3, seconds=20) result = td % int(1e12) assert isinstance(result, Timedelta) assert result == Timedelta(minutes=3, seconds=20) def test_mod_invalid(self): # GH#19365 td = Timedelta(hours=37) with pytest.raises(TypeError): td % Timestamp("2018-01-22") with pytest.raises(TypeError): td % [] def test_rmod_pytimedelta(self): # GH#19365 td = Timedelta(minutes=3) result = timedelta(minutes=4) % td assert isinstance(result, Timedelta) assert result == Timedelta(minutes=1) def test_rmod_timedelta64(self): # GH#19365 td = Timedelta(minutes=3) result = np.timedelta64(5, "m") % td assert isinstance(result, Timedelta) assert result == Timedelta(minutes=2) def test_rmod_invalid(self): # GH#19365 td = Timedelta(minutes=3) with pytest.raises(TypeError): Timestamp("2018-01-22") % td with pytest.raises(TypeError): 15 % td with pytest.raises(TypeError): 16.0 % td with pytest.raises(TypeError): np.array([22, 24]) % td # ---------------------------------------------------------------- # Timedelta.__divmod__, __rdivmod__ def test_divmod_numeric(self): # GH#19365 td = Timedelta(days=2, hours=6) result = divmod(td, 53 * 3600 * 1e9) assert result[0] == Timedelta(1, unit="ns") assert isinstance(result[1], Timedelta) assert result[1] == Timedelta(hours=1) assert result result = divmod(td, np.nan) assert result[0] is NaT assert result[1] is NaT def test_divmod(self): # GH#19365 td = Timedelta(days=2, hours=6) result = divmod(td, timedelta(days=1)) assert result[0] == 2 assert isinstance(result[1], Timedelta) assert result[1] == Timedelta(hours=6) result = divmod(td, 54) assert result[0] == Timedelta(hours=1) assert isinstance(result[1], Timedelta) assert result[1] == Timedelta(0) result = divmod(td, NaT) assert np.isnan(result[0]) assert result[1] is NaT def test_divmod_offset(self): # GH#19365 td = Timedelta(days=2, hours=6) result = divmod(td, offsets.Hour(-4)) assert result[0] == -14 assert isinstance(result[1], Timedelta) assert result[1] == Timedelta(hours=-2) def test_divmod_invalid(self): # GH#19365 td = Timedelta(days=2, hours=6) with pytest.raises(TypeError): divmod(td, Timestamp("2018-01-22")) def test_rdivmod_pytimedelta(self): # GH#19365 result = divmod(timedelta(days=2, hours=6), Timedelta(days=1)) assert result[0] == 2 assert isinstance(result[1], Timedelta) assert result[1] == Timedelta(hours=6) def test_rdivmod_offset(self): result = divmod(offsets.Hour(54), Timedelta(hours=-4)) assert result[0] == -14 assert isinstance(result[1], Timedelta) assert result[1] == Timedelta(hours=-2) def test_rdivmod_invalid(self): # GH#19365 td = Timedelta(minutes=3) with pytest.raises(TypeError): divmod(Timestamp("2018-01-22"), td) with pytest.raises(TypeError): divmod(15, td) with pytest.raises(TypeError): divmod(16.0, td) with pytest.raises(TypeError): divmod(np.array([22, 24]), td) # ---------------------------------------------------------------- @pytest.mark.parametrize( "op", [operator.mul, ops.rmul, operator.truediv, ops.rdiv, ops.rsub] ) @pytest.mark.parametrize( "arr", [ np.array([Timestamp("20130101 9:01"), Timestamp("20121230 9:02")]), np.array([Timestamp.now(), Timedelta("1D")]), ], ) def test_td_op_timedelta_timedeltalike_array(self, op, arr): with pytest.raises(TypeError): op(arr, Timedelta("1D"))
class TestContains: def test_contains(self): ci = CategoricalIndex(list("aabbca"), categories=list("cabdef"), ordered=False) assert "a" in ci assert "z" not in ci assert "e" not in ci assert np.nan not in ci # assert codes NOT in index assert 0 not in ci assert 1 not in ci def test_contains_nan(self): ci = CategoricalIndex(list("aabbca") + [np.nan], categories=list("cabdef")) assert np.nan in ci @pytest.mark.parametrize("unwrap", [True, False]) def test_contains_na_dtype(self, unwrap): dti = pd.date_range("2016-01-01", periods=100).insert(0, pd.NaT) pi = dti.to_period("D") tdi = dti - dti[-1] ci = CategoricalIndex(dti) obj = ci if unwrap: obj = ci._data assert np.nan in obj assert None in obj assert pd.NaT in obj assert np.datetime64("NaT") in obj assert np.timedelta64("NaT") not in obj obj2 = CategoricalIndex(tdi) if unwrap: obj2 = obj2._data assert np.nan in obj2 assert None in obj2 assert pd.NaT in obj2 assert np.datetime64("NaT") not in obj2 assert np.timedelta64("NaT") in obj2 obj3 = CategoricalIndex(pi) if unwrap: obj3 = obj3._data assert np.nan in obj3 assert None in obj3 assert pd.NaT in obj3 assert np.datetime64("NaT") not in obj3 assert np.timedelta64("NaT") not in obj3 @pytest.mark.parametrize( "item, expected", [ (pd.Interval(0, 1), True), (1.5, True), (pd.Interval(0.5, 1.5), False), ("a", False), (Timestamp(1), False), (pd.Timedelta(1), False), ], ids=str, ) def test_contains_interval(self, item, expected): # GH 23705 ci = CategoricalIndex(IntervalIndex.from_breaks(range(3))) result = item in ci assert result is expected def test_contains_list(self): # GH#21729 idx = CategoricalIndex([1, 2, 3]) assert "a" not in idx with pytest.raises(TypeError, match="unhashable type"): ["a"] in idx with pytest.raises(TypeError, match="unhashable type"): ["a", "b"] in idx