def test_frame_to_time_stamp(self): K = 5 index = PeriodIndex(freq='A', start='1/1/2001', end='12/1/2009') df = DataFrame(randn(len(index), K), index=index) df['mix'] = 'a' exp_index = date_range('1/1/2001', end='12/31/2009', freq='A-DEC') exp_index = exp_index + Timedelta(1, 'D') - Timedelta(1, 'ns') result = df.to_timestamp('D', 'end') tm.assert_index_equal(result.index, exp_index) tm.assert_numpy_array_equal(result.values, df.values) exp_index = date_range('1/1/2001', end='1/1/2009', freq='AS-JAN') result = df.to_timestamp('D', 'start') tm.assert_index_equal(result.index, exp_index) def _get_with_delta(delta, freq='A-DEC'): return date_range(to_datetime('1/1/2001') + delta, to_datetime('12/31/2009') + delta, freq=freq) delta = timedelta(hours=23) result = df.to_timestamp('H', 'end') exp_index = _get_with_delta(delta) exp_index = exp_index + Timedelta(1, 'h') - Timedelta(1, 'ns') tm.assert_index_equal(result.index, exp_index) delta = timedelta(hours=23, minutes=59) result = df.to_timestamp('T', 'end') exp_index = _get_with_delta(delta) exp_index = exp_index + Timedelta(1, 'm') - Timedelta(1, 'ns') tm.assert_index_equal(result.index, exp_index) result = df.to_timestamp('S', 'end') delta = timedelta(hours=23, minutes=59, seconds=59) exp_index = _get_with_delta(delta) exp_index = exp_index + Timedelta(1, 's') - Timedelta(1, 'ns') tm.assert_index_equal(result.index, exp_index) # columns df = df.T exp_index = date_range('1/1/2001', end='12/31/2009', freq='A-DEC') exp_index = exp_index + Timedelta(1, 'D') - Timedelta(1, 'ns') result = df.to_timestamp('D', 'end', axis=1) tm.assert_index_equal(result.columns, exp_index) tm.assert_numpy_array_equal(result.values, df.values) exp_index = date_range('1/1/2001', end='1/1/2009', freq='AS-JAN') result = df.to_timestamp('D', 'start', axis=1) tm.assert_index_equal(result.columns, exp_index) delta = timedelta(hours=23) result = df.to_timestamp('H', 'end', axis=1) exp_index = _get_with_delta(delta) exp_index = exp_index + Timedelta(1, 'h') - Timedelta(1, 'ns') tm.assert_index_equal(result.columns, exp_index) delta = timedelta(hours=23, minutes=59) result = df.to_timestamp('T', 'end', axis=1) exp_index = _get_with_delta(delta) exp_index = exp_index + Timedelta(1, 'm') - Timedelta(1, 'ns') tm.assert_index_equal(result.columns, exp_index) result = df.to_timestamp('S', 'end', axis=1) delta = timedelta(hours=23, minutes=59, seconds=59) exp_index = _get_with_delta(delta) exp_index = exp_index + Timedelta(1, 's') - Timedelta(1, 'ns') tm.assert_index_equal(result.columns, exp_index) # invalid axis tm.assert_raises_regex(ValueError, 'axis', df.to_timestamp, axis=2) result1 = df.to_timestamp('5t', axis=1) result2 = df.to_timestamp('t', axis=1) expected = pd.date_range('2001-01-01', '2009-01-01', freq='AS') assert isinstance(result1.columns, DatetimeIndex) assert isinstance(result2.columns, DatetimeIndex) tm.assert_numpy_array_equal(result1.columns.asi8, expected.asi8) tm.assert_numpy_array_equal(result2.columns.asi8, expected.asi8) # PeriodIndex.to_timestamp always use 'infer' assert result1.columns.freqstr == 'AS-JAN' assert result2.columns.freqstr == 'AS-JAN'
def test_period_index_length(self): pi = PeriodIndex(freq='A', start='1/1/2001', end='12/1/2009') assert len(pi) == 9 pi = PeriodIndex(freq='Q', start='1/1/2001', end='12/1/2009') assert len(pi) == 4 * 9 pi = PeriodIndex(freq='M', start='1/1/2001', end='12/1/2009') assert len(pi) == 12 * 9 start = Period('02-Apr-2005', 'B') i1 = PeriodIndex(start=start, periods=20) assert len(i1) == 20 assert i1.freq == start.freq assert i1[0] == start end_intv = Period('2006-12-31', 'W') i1 = PeriodIndex(end=end_intv, periods=10) assert len(i1) == 10 assert i1.freq == end_intv.freq assert i1[-1] == end_intv end_intv = Period('2006-12-31', '1w') i2 = PeriodIndex(end=end_intv, periods=10) assert len(i1) == len(i2) assert (i1 == i2).all() assert i1.freq == i2.freq end_intv = Period('2006-12-31', ('w', 1)) i2 = PeriodIndex(end=end_intv, periods=10) assert len(i1) == len(i2) assert (i1 == i2).all() assert i1.freq == i2.freq try: PeriodIndex(start=start, end=end_intv) raise AssertionError('Cannot allow mixed freq for start and end') except ValueError: pass end_intv = Period('2005-05-01', 'B') i1 = PeriodIndex(start=start, end=end_intv) try: PeriodIndex(start=start) raise AssertionError( 'Must specify periods if missing start or end') except ValueError: pass # infer freq from first element i2 = PeriodIndex([end_intv, Period('2005-05-05', 'B')]) assert len(i2) == 2 assert i2[0] == end_intv i2 = PeriodIndex(np.array([end_intv, Period('2005-05-05', 'B')])) assert len(i2) == 2 assert i2[0] == end_intv # Mixed freq should fail vals = [end_intv, Period('2006-12-31', 'w')] pytest.raises(ValueError, PeriodIndex, vals) vals = np.array(vals) pytest.raises(ValueError, PeriodIndex, vals)
def test_shift(self): pi1 = PeriodIndex(freq='A', start='1/1/2001', end='12/1/2009') pi2 = PeriodIndex(freq='A', start='1/1/2002', end='12/1/2010') tm.assert_index_equal(pi1.shift(0), pi1) assert len(pi1) == len(pi2) tm.assert_index_equal(pi1.shift(1), pi2) pi1 = PeriodIndex(freq='A', start='1/1/2001', end='12/1/2009') pi2 = PeriodIndex(freq='A', start='1/1/2000', end='12/1/2008') assert len(pi1) == len(pi2) tm.assert_index_equal(pi1.shift(-1), pi2) pi1 = PeriodIndex(freq='M', start='1/1/2001', end='12/1/2009') pi2 = PeriodIndex(freq='M', start='2/1/2001', end='1/1/2010') assert len(pi1) == len(pi2) tm.assert_index_equal(pi1.shift(1), pi2) pi1 = PeriodIndex(freq='M', start='1/1/2001', end='12/1/2009') pi2 = PeriodIndex(freq='M', start='12/1/2000', end='11/1/2009') assert len(pi1) == len(pi2) tm.assert_index_equal(pi1.shift(-1), pi2) pi1 = PeriodIndex(freq='D', start='1/1/2001', end='12/1/2009') pi2 = PeriodIndex(freq='D', start='1/2/2001', end='12/2/2009') assert len(pi1) == len(pi2) tm.assert_index_equal(pi1.shift(1), pi2) pi1 = PeriodIndex(freq='D', start='1/1/2001', end='12/1/2009') pi2 = PeriodIndex(freq='D', start='12/31/2000', end='11/30/2009') assert len(pi1) == len(pi2) tm.assert_index_equal(pi1.shift(-1), pi2)
def test_intersection_cases(self, sort): base = period_range("6/1/2000", "6/30/2000", freq="D", name="idx") # if target has the same name, it is preserved rng2 = period_range("5/15/2000", "6/20/2000", freq="D", name="idx") expected2 = period_range("6/1/2000", "6/20/2000", freq="D", name="idx") # if target name is different, it will be reset rng3 = period_range("5/15/2000", "6/20/2000", freq="D", name="other") expected3 = period_range("6/1/2000", "6/20/2000", freq="D", name=None) rng4 = period_range("7/1/2000", "7/31/2000", freq="D", name="idx") expected4 = PeriodIndex([], name="idx", freq="D") for (rng, expected) in [ (rng2, expected2), (rng3, expected3), (rng4, expected4), ]: result = base.intersection(rng, sort=sort) tm.assert_index_equal(result, expected) assert result.name == expected.name assert result.freq == expected.freq # non-monotonic base = PeriodIndex( ["2011-01-05", "2011-01-04", "2011-01-02", "2011-01-03"], freq="D", name="idx", ) rng2 = PeriodIndex( ["2011-01-04", "2011-01-02", "2011-02-02", "2011-02-03"], freq="D", name="idx", ) expected2 = PeriodIndex(["2011-01-04", "2011-01-02"], freq="D", name="idx") rng3 = PeriodIndex( ["2011-01-04", "2011-01-02", "2011-02-02", "2011-02-03"], freq="D", name="other", ) expected3 = PeriodIndex(["2011-01-04", "2011-01-02"], freq="D", name=None) rng4 = period_range("7/1/2000", "7/31/2000", freq="D", name="idx") expected4 = PeriodIndex([], freq="D", name="idx") for (rng, expected) in [ (rng2, expected2), (rng3, expected3), (rng4, expected4), ]: result = base.intersection(rng, sort=sort) if sort is None: expected = expected.sort_values() tm.assert_index_equal(result, expected) assert result.name == expected.name assert result.freq == "D" # empty same freq rng = date_range("6/1/2000", "6/15/2000", freq="T") result = rng[0:0].intersection(rng) assert len(result) == 0 result = rng.intersection(rng[0:0]) assert len(result) == 0
["datetime64[ns, Asia/Tokyo]", "timedelta"], ), ], ) def test_get_dtype_kinds(index_or_series, to_concat, expected): to_concat_klass = [index_or_series(c) for c in to_concat] result = _concat._get_dtype_kinds(to_concat_klass) assert result == set(expected) @pytest.mark.parametrize( "to_concat, expected", [ ( [ PeriodIndex(["2011-01"], freq="M"), PeriodIndex(["2011-01"], freq="M") ], ["period[M]"], ), ( [ Series([Period("2011-01", freq="M")]), Series([Period("2011-02", freq="M")]), ], ["period[M]"], ), ( [ PeriodIndex(["2011-01"], freq="M"), PeriodIndex(["2011-01"], freq="D")
def test_repeat_freqstr(self, index, use_numpy): # GH10183 expected = PeriodIndex([p for p in index for _ in range(3)]) result = np.repeat(index, 3) if use_numpy else index.repeat(3) tm.assert_index_equal(result, expected) assert result.freqstr == index.freqstr
def test_astype_raises(self, dtype): # GH#13149, GH#13209 idx = PeriodIndex(["2016-05-16", "NaT", NaT, np.NaN], freq="D") msg = "Cannot cast PeriodIndex to dtype" with pytest.raises(TypeError, match=msg): idx.astype(dtype)
def test_recreate_from_data(self, freq): org = period_range(start="2001/04/01", freq=freq, periods=1) idx = PeriodIndex(org.values, freq=freq) tm.assert_index_equal(idx, org)
result = arr.to_numpy(na_value=arr[1].to_numpy()) assert result[0] == result[1] result = arr.to_numpy(na_value=arr[1].to_numpy(copy=False)) assert result[0] == result[1] tm.assert_equal(arr, original) @pytest.mark.parametrize("as_index", [True, False]) @pytest.mark.parametrize( "values", [ pd.to_datetime(["2020-01-01", "2020-02-01"]), TimedeltaIndex([1, 2], unit="D"), PeriodIndex(["2020-01-01", "2020-02-01"], freq="D"), ], ) @pytest.mark.parametrize( "klass", [ list, np.array, pd.array, pd.Series, pd.Index, pd.Categorical, pd.CategoricalIndex, ], ) def test_searchsorted_datetimelike_with_listlike(values, klass, as_index):
def test_constructor_pi_nat(self): idx = PeriodIndex( [Period("2011-01", freq="M"), NaT, Period("2011-01", freq="M")]) exp = PeriodIndex(["2011-01", "NaT", "2011-01"], freq="M") tm.assert_index_equal(idx, exp) idx = PeriodIndex( np.array([ Period("2011-01", freq="M"), NaT, Period("2011-01", freq="M") ])) tm.assert_index_equal(idx, exp) idx = PeriodIndex([ NaT, NaT, Period("2011-01", freq="M"), Period("2011-01", freq="M") ]) exp = PeriodIndex(["NaT", "NaT", "2011-01", "2011-01"], freq="M") tm.assert_index_equal(idx, exp) idx = PeriodIndex( np.array([ NaT, NaT, Period("2011-01", freq="M"), Period("2011-01", freq="M") ])) tm.assert_index_equal(idx, exp) idx = PeriodIndex([NaT, NaT, "2011-01", "2011-01"], freq="M") tm.assert_index_equal(idx, exp) with pytest.raises(ValueError, match="freq not specified"): PeriodIndex([NaT, NaT]) with pytest.raises(ValueError, match="freq not specified"): PeriodIndex(np.array([NaT, NaT])) with pytest.raises(ValueError, match="freq not specified"): PeriodIndex(["NaT", "NaT"]) with pytest.raises(ValueError, match="freq not specified"): PeriodIndex(np.array(["NaT", "NaT"]))
def test_constructor(self): pi = period_range(freq="A", start="1/1/2001", end="12/1/2009") assert len(pi) == 9 pi = period_range(freq="Q", start="1/1/2001", end="12/1/2009") assert len(pi) == 4 * 9 pi = period_range(freq="M", start="1/1/2001", end="12/1/2009") assert len(pi) == 12 * 9 pi = period_range(freq="D", start="1/1/2001", end="12/31/2009") assert len(pi) == 365 * 9 + 2 pi = period_range(freq="B", start="1/1/2001", end="12/31/2009") assert len(pi) == 261 * 9 pi = period_range(freq="H", start="1/1/2001", end="12/31/2001 23:00") assert len(pi) == 365 * 24 pi = period_range(freq="Min", start="1/1/2001", end="1/1/2001 23:59") assert len(pi) == 24 * 60 pi = period_range(freq="S", start="1/1/2001", end="1/1/2001 23:59:59") assert len(pi) == 24 * 60 * 60 start = Period("02-Apr-2005", "B") i1 = period_range(start=start, periods=20) assert len(i1) == 20 assert i1.freq == start.freq assert i1[0] == start end_intv = Period("2006-12-31", "W") i1 = period_range(end=end_intv, periods=10) assert len(i1) == 10 assert i1.freq == end_intv.freq assert i1[-1] == end_intv end_intv = Period("2006-12-31", "1w") i2 = period_range(end=end_intv, periods=10) assert len(i1) == len(i2) assert (i1 == i2).all() assert i1.freq == i2.freq end_intv = Period("2006-12-31", ("w", 1)) i2 = period_range(end=end_intv, periods=10) assert len(i1) == len(i2) assert (i1 == i2).all() assert i1.freq == i2.freq end_intv = Period("2005-05-01", "B") i1 = period_range(start=start, end=end_intv) # infer freq from first element i2 = PeriodIndex([end_intv, Period("2005-05-05", "B")]) assert len(i2) == 2 assert i2[0] == end_intv i2 = PeriodIndex(np.array([end_intv, Period("2005-05-05", "B")])) assert len(i2) == 2 assert i2[0] == end_intv # Mixed freq should fail vals = [end_intv, Period("2006-12-31", "w")] msg = r"Input has different freq=W-SUN from PeriodIndex\(freq=B\)" with pytest.raises(IncompatibleFrequency, match=msg): PeriodIndex(vals) vals = np.array(vals) with pytest.raises(IncompatibleFrequency, match=msg): PeriodIndex(vals)
def test_constructor_fromarraylike(self): idx = period_range("2007-01", periods=20, freq="M") # values is an array of Period, thus can retrieve freq tm.assert_index_equal(PeriodIndex(idx.values), idx) tm.assert_index_equal(PeriodIndex(list(idx.values)), idx) msg = "freq not specified and cannot be inferred" with pytest.raises(ValueError, match=msg): PeriodIndex(idx._ndarray_values) with pytest.raises(ValueError, match=msg): PeriodIndex(list(idx._ndarray_values)) msg = "'Period' object is not iterable" with pytest.raises(TypeError, match=msg): PeriodIndex(data=Period("2007", freq="A")) result = PeriodIndex(iter(idx)) tm.assert_index_equal(result, idx) result = PeriodIndex(idx) tm.assert_index_equal(result, idx) result = PeriodIndex(idx, freq="M") tm.assert_index_equal(result, idx) result = PeriodIndex(idx, freq=offsets.MonthEnd()) tm.assert_index_equal(result, idx) assert result.freq == "M" result = PeriodIndex(idx, freq="2M") tm.assert_index_equal(result, idx.asfreq("2M")) assert result.freq == "2M" result = PeriodIndex(idx, freq=offsets.MonthEnd(2)) tm.assert_index_equal(result, idx.asfreq("2M")) assert result.freq == "2M" result = PeriodIndex(idx, freq="D") exp = idx.asfreq("D", "e") tm.assert_index_equal(result, exp)
def test_constructor_invalid_quarters(self): msg = "Quarter must be 1 <= q <= 4" with pytest.raises(ValueError, match=msg): PeriodIndex(year=range(2000, 2004), quarter=list(range(4)), freq="Q-DEC")
def factorize(values, sort=False, order=None, na_sentinel=-1, size_hint=None): """ Encode input values as an enumerated type or categorical variable Parameters ---------- values : ndarray (1-d) Sequence sort : boolean, default False Sort by values na_sentinel : int, default -1 Value to mark "not found" size_hint : hint to the hashtable sizer Returns ------- labels : the indexer to the original array uniques : ndarray (1-d) or Index the unique values. Index is returned when passed values is Index or Series note: an array of Periods will ignore sort as it returns an always sorted PeriodIndex """ from pandas import Index, Series, DatetimeIndex, PeriodIndex # handling two possibilities here # - for a numpy datetimelike simply view as i8 then cast back # - for an extension datetimelike view as i8 then # reconstruct from boxed values to transfer metadata dtype = None if needs_i8_conversion(values): if is_period_dtype(values): values = PeriodIndex(values) vals = values.asi8 elif is_datetimetz(values): values = DatetimeIndex(values) vals = values.asi8 else: # numpy dtype dtype = values.dtype vals = values.view(np.int64) else: vals = np.asarray(values) (hash_klass, vec_klass), vals = _get_data_algo(vals, _hashtables) table = hash_klass(size_hint or len(vals)) uniques = vec_klass() check_nulls = not is_integer_dtype(values) labels = table.get_labels(vals, uniques, 0, na_sentinel, check_nulls) labels = _ensure_platform_int(labels) uniques = uniques.to_array() if sort and len(uniques) > 0: uniques, labels = safe_sort(uniques, labels, na_sentinel=na_sentinel, assume_unique=True) if dtype is not None: uniques = uniques.astype(dtype) if isinstance(values, Index): uniques = values._shallow_copy(uniques, name=None) elif isinstance(values, Series): uniques = Index(uniques) return labels, uniques
def test_asfreq(self): pi1 = PeriodIndex(freq='A', start='1/1/2001', end='1/1/2001') pi2 = PeriodIndex(freq='Q', start='1/1/2001', end='1/1/2001') pi3 = PeriodIndex(freq='M', start='1/1/2001', end='1/1/2001') pi4 = PeriodIndex(freq='D', start='1/1/2001', end='1/1/2001') pi5 = PeriodIndex(freq='H', start='1/1/2001', end='1/1/2001 00:00') pi6 = PeriodIndex(freq='Min', start='1/1/2001', end='1/1/2001 00:00') pi7 = PeriodIndex(freq='S', start='1/1/2001', end='1/1/2001 00:00:00') assert pi1.asfreq('Q', 'S') == pi2 assert pi1.asfreq('Q', 's') == pi2 assert pi1.asfreq('M', 'start') == pi3 assert pi1.asfreq('D', 'StarT') == pi4 assert pi1.asfreq('H', 'beGIN') == pi5 assert pi1.asfreq('Min', 'S') == pi6 assert pi1.asfreq('S', 'S') == pi7 assert pi2.asfreq('A', 'S') == pi1 assert pi2.asfreq('M', 'S') == pi3 assert pi2.asfreq('D', 'S') == pi4 assert pi2.asfreq('H', 'S') == pi5 assert pi2.asfreq('Min', 'S') == pi6 assert pi2.asfreq('S', 'S') == pi7 assert pi3.asfreq('A', 'S') == pi1 assert pi3.asfreq('Q', 'S') == pi2 assert pi3.asfreq('D', 'S') == pi4 assert pi3.asfreq('H', 'S') == pi5 assert pi3.asfreq('Min', 'S') == pi6 assert pi3.asfreq('S', 'S') == pi7 assert pi4.asfreq('A', 'S') == pi1 assert pi4.asfreq('Q', 'S') == pi2 assert pi4.asfreq('M', 'S') == pi3 assert pi4.asfreq('H', 'S') == pi5 assert pi4.asfreq('Min', 'S') == pi6 assert pi4.asfreq('S', 'S') == pi7 assert pi5.asfreq('A', 'S') == pi1 assert pi5.asfreq('Q', 'S') == pi2 assert pi5.asfreq('M', 'S') == pi3 assert pi5.asfreq('D', 'S') == pi4 assert pi5.asfreq('Min', 'S') == pi6 assert pi5.asfreq('S', 'S') == pi7 assert pi6.asfreq('A', 'S') == pi1 assert pi6.asfreq('Q', 'S') == pi2 assert pi6.asfreq('M', 'S') == pi3 assert pi6.asfreq('D', 'S') == pi4 assert pi6.asfreq('H', 'S') == pi5 assert pi6.asfreq('S', 'S') == pi7 assert pi7.asfreq('A', 'S') == pi1 assert pi7.asfreq('Q', 'S') == pi2 assert pi7.asfreq('M', 'S') == pi3 assert pi7.asfreq('D', 'S') == pi4 assert pi7.asfreq('H', 'S') == pi5 assert pi7.asfreq('Min', 'S') == pi6 pytest.raises(ValueError, pi7.asfreq, 'T', 'foo') result1 = pi1.asfreq('3M') result2 = pi1.asfreq('M') expected = PeriodIndex(freq='M', start='2001-12', end='2001-12') tm.assert_numpy_array_equal(result1.asi8, expected.asi8) assert result1.freqstr == '3M' tm.assert_numpy_array_equal(result2.asi8, expected.asi8) assert result2.freqstr == 'M'
class TestPeriodArray(SharedTests): index_cls = PeriodIndex array_cls = PeriodArray scalar_type = Period example_dtype = PeriodIndex([], freq="W").dtype @pytest.fixture def arr1d(self, period_index): return period_index._data def test_from_pi(self, arr1d): pi = self.index_cls(arr1d) arr = arr1d assert list(arr) == list(pi) # Check that Index.__new__ knows what to do with PeriodArray pi2 = pd.Index(arr) assert isinstance(pi2, PeriodIndex) assert list(pi2) == list(arr) def test_astype_object(self, arr1d): pi = self.index_cls(arr1d) arr = arr1d asobj = arr.astype("O") assert isinstance(asobj, np.ndarray) assert asobj.dtype == "O" assert list(asobj) == list(pi) def test_take_fill_valid(self, arr1d): arr = arr1d value = NaT.value msg = f"value should be a '{arr1d._scalar_type.__name__}' or 'NaT'. Got" with pytest.raises(TypeError, match=msg): # require NaT, not iNaT, as it could be confused with an integer arr.take([-1, 1], allow_fill=True, fill_value=value) value = np.timedelta64("NaT", "ns") with pytest.raises(TypeError, match=msg): # require appropriate-dtype if we have a NA value arr.take([-1, 1], allow_fill=True, fill_value=value) @pytest.mark.parametrize("how", ["S", "E"]) def test_to_timestamp(self, how, arr1d): pi = self.index_cls(arr1d) arr = arr1d expected = DatetimeArray(pi.to_timestamp(how=how)) result = arr.to_timestamp(how=how) assert isinstance(result, DatetimeArray) # placeholder until these become actual EA subclasses and we can use # an EA-specific tm.assert_ function tm.assert_index_equal(pd.Index(result), pd.Index(expected)) def test_to_timestamp_out_of_bounds(self): # GH#19643 previously overflowed silently pi = pd.period_range("1500", freq="Y", periods=3) msg = "Out of bounds nanosecond timestamp: 1500-01-01 00:00:00" with pytest.raises(OutOfBoundsDatetime, match=msg): pi.to_timestamp() with pytest.raises(OutOfBoundsDatetime, match=msg): pi._data.to_timestamp() @pytest.mark.parametrize("propname", PeriodArray._bool_ops) def test_bool_properties(self, arr1d, propname): # in this case _bool_ops is just `is_leap_year` pi = self.index_cls(arr1d) arr = arr1d result = getattr(arr, propname) expected = np.array(getattr(pi, propname)) tm.assert_numpy_array_equal(result, expected) @pytest.mark.parametrize("propname", PeriodArray._field_ops) def test_int_properties(self, arr1d, propname): pi = self.index_cls(arr1d) arr = arr1d result = getattr(arr, propname) expected = np.array(getattr(pi, propname)) tm.assert_numpy_array_equal(result, expected) def test_array_interface(self, arr1d): arr = arr1d # default asarray gives objects result = np.asarray(arr) expected = np.array(list(arr), dtype=object) tm.assert_numpy_array_equal(result, expected) # to object dtype (same as default) result = np.asarray(arr, dtype=object) tm.assert_numpy_array_equal(result, expected) result = np.asarray(arr, dtype="int64") tm.assert_numpy_array_equal(result, arr.asi8) # to other dtypes msg = r"float\(\) argument must be a string or a( real)? number, not 'Period'" with pytest.raises(TypeError, match=msg): np.asarray(arr, dtype="float64") result = np.asarray(arr, dtype="S20") expected = np.asarray(arr).astype("S20") tm.assert_numpy_array_equal(result, expected) def test_strftime(self, arr1d): arr = arr1d result = arr.strftime("%Y") expected = np.array([per.strftime("%Y") for per in arr], dtype=object) tm.assert_numpy_array_equal(result, expected) def test_strftime_nat(self): # GH 29578 arr = PeriodArray(PeriodIndex(["2019-01-01", NaT], dtype="period[D]")) result = arr.strftime("%Y-%m-%d") expected = np.array(["2019-01-01", np.nan], dtype=object) tm.assert_numpy_array_equal(result, expected)
def test_period_index_length(self): pi = period_range(freq='A', start='1/1/2001', end='12/1/2009') assert len(pi) == 9 pi = period_range(freq='Q', start='1/1/2001', end='12/1/2009') assert len(pi) == 4 * 9 pi = period_range(freq='M', start='1/1/2001', end='12/1/2009') assert len(pi) == 12 * 9 start = Period('02-Apr-2005', 'B') i1 = period_range(start=start, periods=20) assert len(i1) == 20 assert i1.freq == start.freq assert i1[0] == start end_intv = Period('2006-12-31', 'W') i1 = period_range(end=end_intv, periods=10) assert len(i1) == 10 assert i1.freq == end_intv.freq assert i1[-1] == end_intv end_intv = Period('2006-12-31', '1w') i2 = period_range(end=end_intv, periods=10) assert len(i1) == len(i2) assert (i1 == i2).all() assert i1.freq == i2.freq end_intv = Period('2006-12-31', ('w', 1)) i2 = period_range(end=end_intv, periods=10) assert len(i1) == len(i2) assert (i1 == i2).all() assert i1.freq == i2.freq msg = "start and end must have same freq" with pytest.raises(ValueError, match=msg): period_range(start=start, end=end_intv) end_intv = Period('2005-05-01', 'B') i1 = period_range(start=start, end=end_intv) msg = ("Of the three parameters: start, end, and periods, exactly two" " must be specified") with pytest.raises(ValueError, match=msg): period_range(start=start) # infer freq from first element i2 = PeriodIndex([end_intv, Period('2005-05-05', 'B')]) assert len(i2) == 2 assert i2[0] == end_intv i2 = PeriodIndex(np.array([end_intv, Period('2005-05-05', 'B')])) assert len(i2) == 2 assert i2[0] == end_intv # Mixed freq should fail vals = [end_intv, Period('2006-12-31', 'w')] msg = r"Input has different freq=W-SUN from PeriodIndex\(freq=B\)" with pytest.raises(IncompatibleFrequency, match=msg): PeriodIndex(vals) vals = np.array(vals) with pytest.raises(ValueError, match=msg): PeriodIndex(vals)
def test_constructor_pi_nat(self): idx = PeriodIndex( [Period('2011-01', freq='M'), pd.NaT, Period('2011-01', freq='M')]) exp = PeriodIndex(['2011-01', 'NaT', '2011-01'], freq='M') tm.assert_index_equal(idx, exp) idx = PeriodIndex( np.array([ Period('2011-01', freq='M'), pd.NaT, Period('2011-01', freq='M') ])) tm.assert_index_equal(idx, exp) idx = PeriodIndex([ pd.NaT, pd.NaT, Period('2011-01', freq='M'), Period('2011-01', freq='M') ]) exp = PeriodIndex(['NaT', 'NaT', '2011-01', '2011-01'], freq='M') tm.assert_index_equal(idx, exp) idx = PeriodIndex( np.array([ pd.NaT, pd.NaT, Period('2011-01', freq='M'), Period('2011-01', freq='M') ])) tm.assert_index_equal(idx, exp) idx = PeriodIndex([pd.NaT, pd.NaT, '2011-01', '2011-01'], freq='M') tm.assert_index_equal(idx, exp) with tm.assert_raises_regex(ValueError, 'freq not specified'): PeriodIndex([pd.NaT, pd.NaT]) with tm.assert_raises_regex(ValueError, 'freq not specified'): PeriodIndex(np.array([pd.NaT, pd.NaT])) with tm.assert_raises_regex(ValueError, 'freq not specified'): PeriodIndex(['NaT', 'NaT']) with tm.assert_raises_regex(ValueError, 'freq not specified'): PeriodIndex(np.array(['NaT', 'NaT']))
def test_pindex_qaccess(self): pi = PeriodIndex(['2Q05', '3Q05', '4Q05', '1Q06', '2Q06'], freq='Q') s = Series(np.random.rand(len(pi)), index=pi).cumsum() # Todo: fix these accessors! assert s['05Q4'] == s[2]
def test_constructor_simple_new_empty(self): # GH13079 idx = PeriodIndex([], freq='M', name='p') result = idx._simple_new(idx, name='p', freq='M') tm.assert_index_equal(result, idx)
def test_union(self, sort): # union other1 = period_range("1/1/2000", freq="D", periods=5) rng1 = period_range("1/6/2000", freq="D", periods=5) expected1 = PeriodIndex( [ "2000-01-06", "2000-01-07", "2000-01-08", "2000-01-09", "2000-01-10", "2000-01-01", "2000-01-02", "2000-01-03", "2000-01-04", "2000-01-05", ], freq="D", ) rng2 = period_range("1/1/2000", freq="D", periods=5) other2 = period_range("1/4/2000", freq="D", periods=5) expected2 = period_range("1/1/2000", freq="D", periods=8) rng3 = period_range("1/1/2000", freq="D", periods=5) other3 = PeriodIndex([], freq="D") expected3 = period_range("1/1/2000", freq="D", periods=5) rng4 = period_range("2000-01-01 09:00", freq="H", periods=5) other4 = period_range("2000-01-02 09:00", freq="H", periods=5) expected4 = PeriodIndex( [ "2000-01-01 09:00", "2000-01-01 10:00", "2000-01-01 11:00", "2000-01-01 12:00", "2000-01-01 13:00", "2000-01-02 09:00", "2000-01-02 10:00", "2000-01-02 11:00", "2000-01-02 12:00", "2000-01-02 13:00", ], freq="H", ) rng5 = PeriodIndex( ["2000-01-01 09:01", "2000-01-01 09:03", "2000-01-01 09:05"], freq="T") other5 = PeriodIndex( ["2000-01-01 09:01", "2000-01-01 09:05", "2000-01-01 09:08"], freq="T") expected5 = PeriodIndex( [ "2000-01-01 09:01", "2000-01-01 09:03", "2000-01-01 09:05", "2000-01-01 09:08", ], freq="T", ) rng6 = period_range("2000-01-01", freq="M", periods=7) other6 = period_range("2000-04-01", freq="M", periods=7) expected6 = period_range("2000-01-01", freq="M", periods=10) rng7 = period_range("2003-01-01", freq="A", periods=5) other7 = period_range("1998-01-01", freq="A", periods=8) expected7 = PeriodIndex( [ "2003", "2004", "2005", "2006", "2007", "1998", "1999", "2000", "2001", "2002", ], freq="A", ) rng8 = PeriodIndex( ["1/3/2000", "1/2/2000", "1/1/2000", "1/5/2000", "1/4/2000"], freq="D") other8 = period_range("1/6/2000", freq="D", periods=5) expected8 = PeriodIndex( [ "1/3/2000", "1/2/2000", "1/1/2000", "1/5/2000", "1/4/2000", "1/6/2000", "1/7/2000", "1/8/2000", "1/9/2000", "1/10/2000", ], freq="D", ) for rng, other, expected in [ (rng1, other1, expected1), (rng2, other2, expected2), (rng3, other3, expected3), (rng4, other4, expected4), (rng5, other5, expected5), (rng6, other6, expected6), (rng7, other7, expected7), (rng8, other8, expected8), ]: result_union = rng.union(other, sort=sort) if sort is None: expected = expected.sort_values() tm.assert_index_equal(result_union, expected)
def test_constructor_use_start_freq(self): # GH #1118 p = Period('4/2/2012', freq='B') index = PeriodIndex(start=p, periods=10) expected = PeriodIndex(start='4/2/2012', periods=10, freq='B') tm.assert_index_equal(index, expected)
def test_difference(self, sort): # diff period_rng = [ "1/3/2000", "1/2/2000", "1/1/2000", "1/5/2000", "1/4/2000" ] rng1 = PeriodIndex(period_rng, freq="D") other1 = period_range("1/6/2000", freq="D", periods=5) expected1 = rng1 rng2 = PeriodIndex(period_rng, freq="D") other2 = period_range("1/4/2000", freq="D", periods=5) expected2 = PeriodIndex(["1/3/2000", "1/2/2000", "1/1/2000"], freq="D") rng3 = PeriodIndex(period_rng, freq="D") other3 = PeriodIndex([], freq="D") expected3 = rng3 period_rng = [ "2000-01-01 10:00", "2000-01-01 09:00", "2000-01-01 12:00", "2000-01-01 11:00", "2000-01-01 13:00", ] rng4 = PeriodIndex(period_rng, freq="H") other4 = period_range("2000-01-02 09:00", freq="H", periods=5) expected4 = rng4 rng5 = PeriodIndex( ["2000-01-01 09:03", "2000-01-01 09:01", "2000-01-01 09:05"], freq="T") other5 = PeriodIndex(["2000-01-01 09:01", "2000-01-01 09:05"], freq="T") expected5 = PeriodIndex(["2000-01-01 09:03"], freq="T") period_rng = [ "2000-02-01", "2000-01-01", "2000-06-01", "2000-07-01", "2000-05-01", "2000-03-01", "2000-04-01", ] rng6 = PeriodIndex(period_rng, freq="M") other6 = period_range("2000-04-01", freq="M", periods=7) expected6 = PeriodIndex(["2000-02-01", "2000-01-01", "2000-03-01"], freq="M") period_rng = ["2003", "2007", "2006", "2005", "2004"] rng7 = PeriodIndex(period_rng, freq="A") other7 = period_range("1998-01-01", freq="A", periods=8) expected7 = PeriodIndex(["2007", "2006"], freq="A") for rng, other, expected in [ (rng1, other1, expected1), (rng2, other2, expected2), (rng3, other3, expected3), (rng4, other4, expected4), (rng5, other5, expected5), (rng6, other6, expected6), (rng7, other7, expected7), ]: result_difference = rng.difference(other, sort=sort) if sort is None and len(other): # We dont sort (yet?) when empty GH#24959 expected = expected.sort_values() tm.assert_index_equal(result_difference, expected)
def test_constructor(self): pi = PeriodIndex(freq='A', start='1/1/2001', end='12/1/2009') assert len(pi) == 9 pi = PeriodIndex(freq='Q', start='1/1/2001', end='12/1/2009') assert len(pi) == 4 * 9 pi = PeriodIndex(freq='M', start='1/1/2001', end='12/1/2009') assert len(pi) == 12 * 9 pi = PeriodIndex(freq='D', start='1/1/2001', end='12/31/2009') assert len(pi) == 365 * 9 + 2 pi = PeriodIndex(freq='B', start='1/1/2001', end='12/31/2009') assert len(pi) == 261 * 9 pi = PeriodIndex(freq='H', start='1/1/2001', end='12/31/2001 23:00') assert len(pi) == 365 * 24 pi = PeriodIndex(freq='Min', start='1/1/2001', end='1/1/2001 23:59') assert len(pi) == 24 * 60 pi = PeriodIndex(freq='S', start='1/1/2001', end='1/1/2001 23:59:59') assert len(pi) == 24 * 60 * 60 start = Period('02-Apr-2005', 'B') i1 = PeriodIndex(start=start, periods=20) assert len(i1) == 20 assert i1.freq == start.freq assert i1[0] == start end_intv = Period('2006-12-31', 'W') i1 = PeriodIndex(end=end_intv, periods=10) assert len(i1) == 10 assert i1.freq == end_intv.freq assert i1[-1] == end_intv end_intv = Period('2006-12-31', '1w') i2 = PeriodIndex(end=end_intv, periods=10) assert len(i1) == len(i2) assert (i1 == i2).all() assert i1.freq == i2.freq end_intv = Period('2006-12-31', ('w', 1)) i2 = PeriodIndex(end=end_intv, periods=10) assert len(i1) == len(i2) assert (i1 == i2).all() assert i1.freq == i2.freq end_intv = Period('2005-05-01', 'B') i1 = PeriodIndex(start=start, end=end_intv) # infer freq from first element i2 = PeriodIndex([end_intv, Period('2005-05-05', 'B')]) assert len(i2) == 2 assert i2[0] == end_intv i2 = PeriodIndex(np.array([end_intv, Period('2005-05-05', 'B')])) assert len(i2) == 2 assert i2[0] == end_intv # Mixed freq should fail vals = [end_intv, Period('2006-12-31', 'w')] pytest.raises(ValueError, PeriodIndex, vals) vals = np.array(vals) pytest.raises(ValueError, PeriodIndex, vals)
def test_make_time_series(self): index = PeriodIndex(freq='A', start='1/1/2001', end='12/1/2009') series = Series(1, index=index) assert isinstance(series, Series)
def test_recreate_from_data(self): for o in ['M', 'Q', 'A', 'D', 'B', 'T', 'S', 'L', 'U', 'N', 'H']: org = PeriodIndex(start='2001/04/01', freq=o, periods=1) idx = PeriodIndex(org.values, freq=o) tm.assert_index_equal(idx, org)
def test_end_time(self): index = PeriodIndex(freq='M', start='2016-01-01', end='2016-05-31') expected_index = date_range('2016-01-01', end='2016-05-31', freq='M') tm.assert_index_equal(index.end_time, expected_index)
def test_asfreq_nat(self): idx = PeriodIndex(['2011-01', '2011-02', 'NaT', '2011-04'], freq='M') result = idx.asfreq(freq='Q') expected = PeriodIndex(['2011Q1', '2011Q1', 'NaT', '2011Q2'], freq='Q') tm.assert_index_equal(result, expected)
def test_pickle_round_trip(self): for freq in ['D', 'M', 'A']: idx = PeriodIndex(['2016-05-16', 'NaT', NaT, np.NaN], freq=freq) result = tm.round_trip_pickle(idx) tm.assert_index_equal(result, idx)
def test_frame_index_to_string(self): index = PeriodIndex(['2011-1', '2011-2', '2011-3'], freq='M') frame = DataFrame(np.random.randn(3, 4), index=index) # it works! frame.to_string()