def test_take_fill_valid(self, timedelta_index): tdi = timedelta_index arr = TimedeltaArray(tdi) td1 = pd.Timedelta(days=1) result = arr.take([-1, 1], allow_fill=True, fill_value=td1) assert result[0] == td1 now = Timestamp.now() value = now msg = f"value should be a '{arr._scalar_type.__name__}' or 'NaT'. Got" with pytest.raises(TypeError, match=msg): # fill_value Timestamp invalid arr.take([0, 1], allow_fill=True, fill_value=value) value = now.to_period("D") with pytest.raises(TypeError, match=msg): # fill_value Period invalid arr.take([0, 1], allow_fill=True, fill_value=value) value = np.datetime64("NaT", "ns") with pytest.raises(TypeError, match=msg): # require appropriate-dtype if we have a NA value arr.take([-1, 1], allow_fill=True, fill_value=value)
class SharedTests: index_cls: Type[Union[DatetimeIndex, PeriodIndex, TimedeltaIndex]] @pytest.fixture def arr1d(self): data = np.arange(10, dtype="i8") * 24 * 3600 * 10**9 arr = self.array_cls(data, freq="D") return arr def test_compare_len1_raises(self, arr1d): # make sure we raise when comparing with different lengths, specific # to the case where one has length-1, which numpy would broadcast arr = arr1d idx = self.index_cls(arr) with pytest.raises(ValueError, match="Lengths must match"): arr == arr[:1] # test the index classes while we're at it, GH#23078 with pytest.raises(ValueError, match="Lengths must match"): idx <= idx[[0]] @pytest.mark.parametrize( "result", [ pd.date_range("2020", periods=3), pd.date_range("2020", periods=3, tz="UTC"), pd.timedelta_range("0 days", periods=3), pd.period_range("2020Q1", periods=3, freq="Q"), ], ) def test_compare_with_Categorical(self, result): expected = pd.Categorical(result) assert all(result == expected) assert not any(result != expected) @pytest.mark.parametrize("reverse", [True, False]) @pytest.mark.parametrize("as_index", [True, False]) def test_compare_categorical_dtype(self, arr1d, as_index, reverse, ordered): other = pd.Categorical(arr1d, ordered=ordered) if as_index: other = pd.CategoricalIndex(other) left, right = arr1d, other if reverse: left, right = right, left ones = np.ones(arr1d.shape, dtype=bool) zeros = ~ones result = left == right tm.assert_numpy_array_equal(result, ones) result = left != right tm.assert_numpy_array_equal(result, zeros) if not reverse and not as_index: # Otherwise Categorical raises TypeError bc it is not ordered # TODO: we should probably get the same behavior regardless? result = left < right tm.assert_numpy_array_equal(result, zeros) result = left <= right tm.assert_numpy_array_equal(result, ones) result = left > right tm.assert_numpy_array_equal(result, zeros) result = left >= right tm.assert_numpy_array_equal(result, ones) def test_take(self): data = np.arange(100, dtype="i8") * 24 * 3600 * 10**9 np.random.shuffle(data) freq = None if self.array_cls is not PeriodArray else "D" arr = self.array_cls(data, freq=freq) idx = self.index_cls._simple_new(arr) takers = [1, 4, 94] result = arr.take(takers) expected = idx.take(takers) tm.assert_index_equal(self.index_cls(result), expected) takers = np.array([1, 4, 94]) result = arr.take(takers) expected = idx.take(takers) tm.assert_index_equal(self.index_cls(result), expected) @pytest.mark.parametrize("fill_value", [2, 2.0, Timestamp.now().time]) def test_take_fill_raises(self, fill_value): data = np.arange(10, dtype="i8") * 24 * 3600 * 10**9 arr = self.array_cls(data, freq="D") msg = f"value should be a '{arr._scalar_type.__name__}' or 'NaT'. Got" with pytest.raises(TypeError, match=msg): arr.take([0, 1], allow_fill=True, fill_value=fill_value) def test_take_fill(self): data = np.arange(10, dtype="i8") * 24 * 3600 * 10**9 arr = self.array_cls(data, freq="D") result = arr.take([-1, 1], allow_fill=True, fill_value=None) assert result[0] is NaT result = arr.take([-1, 1], allow_fill=True, fill_value=np.nan) assert result[0] is NaT result = arr.take([-1, 1], allow_fill=True, fill_value=NaT) assert result[0] is NaT def test_take_fill_str(self, arr1d): # Cast str fill_value matching other fill_value-taking methods result = arr1d.take([-1, 1], allow_fill=True, fill_value=str(arr1d[-1])) expected = arr1d[[-1, 1]] tm.assert_equal(result, expected) msg = f"value should be a '{arr1d._scalar_type.__name__}' or 'NaT'. Got" with pytest.raises(TypeError, match=msg): arr1d.take([-1, 1], allow_fill=True, fill_value="foo") def test_concat_same_type(self, arr1d): arr = arr1d idx = self.index_cls(arr) idx = idx.insert(0, NaT) arr = self.array_cls(idx) result = arr._concat_same_type([arr[:-1], arr[1:], arr]) arr2 = arr.astype(object) expected = self.index_cls(np.concatenate([arr2[:-1], arr2[1:], arr2]), None) tm.assert_index_equal(self.index_cls(result), expected) def test_unbox_scalar(self): data = np.arange(10, dtype="i8") * 24 * 3600 * 10**9 arr = self.array_cls(data, freq="D") result = arr._unbox_scalar(arr[0]) expected = arr._data.dtype.type assert isinstance(result, expected) result = arr._unbox_scalar(NaT) assert isinstance(result, expected) msg = f"'value' should be a {self.scalar_type.__name__}." with pytest.raises(ValueError, match=msg): arr._unbox_scalar("foo") def test_check_compatible_with(self): data = np.arange(10, dtype="i8") * 24 * 3600 * 10**9 arr = self.array_cls(data, freq="D") arr._check_compatible_with(arr[0]) arr._check_compatible_with(arr[:1]) arr._check_compatible_with(NaT) def test_scalar_from_string(self): data = np.arange(10, dtype="i8") * 24 * 3600 * 10**9 arr = self.array_cls(data, freq="D") result = arr._scalar_from_string(str(arr[0])) assert result == arr[0] def test_reduce_invalid(self): data = np.arange(10, dtype="i8") * 24 * 3600 * 10**9 arr = self.array_cls(data, freq="D") msg = f"'{type(arr).__name__}' does not implement reduction 'not a method'" with pytest.raises(TypeError, match=msg): arr._reduce("not a method") @pytest.mark.parametrize("method", ["pad", "backfill"]) def test_fillna_method_doesnt_change_orig(self, method): data = np.arange(10, dtype="i8") * 24 * 3600 * 10**9 arr = self.array_cls(data, freq="D") arr[4] = NaT fill_value = arr[3] if method == "pad" else arr[5] result = arr.fillna(method=method) assert result[4] == fill_value # check that the original was not changed assert arr[4] is NaT def test_searchsorted(self): data = np.arange(10, dtype="i8") * 24 * 3600 * 10**9 arr = self.array_cls(data, freq="D") # scalar result = arr.searchsorted(arr[1]) assert result == 1 result = arr.searchsorted(arr[2], side="right") assert result == 3 # own-type result = arr.searchsorted(arr[1:3]) expected = np.array([1, 2], dtype=np.intp) tm.assert_numpy_array_equal(result, expected) result = arr.searchsorted(arr[1:3], side="right") expected = np.array([2, 3], dtype=np.intp) tm.assert_numpy_array_equal(result, expected) # GH#29884 match numpy convention on whether NaT goes # at the end or the beginning result = arr.searchsorted(NaT) if np_version_under1p18: # Following numpy convention, NaT goes at the beginning # (unlike NaN which goes at the end) assert result == 0 else: assert result == 10 @pytest.mark.parametrize("box", [None, "index", "series"]) def test_searchsorted_castable_strings(self, arr1d, box, request): if isinstance(arr1d, DatetimeArray): tz = arr1d.tz ts1, ts2 = arr1d[1:3] if tz is not None and ts1.tz.tzname(ts1) != ts2.tz.tzname(ts2): # If we have e.g. tzutc(), when we cast to string and parse # back we get pytz.UTC, and then consider them different timezones # so incorrectly raise. mark = pytest.mark.xfail( reason="timezone comparisons inconsistent") request.node.add_marker(mark) arr = arr1d if box is None: pass elif box == "index": # Test the equivalent Index.searchsorted method while we're here arr = self.index_cls(arr) else: # Test the equivalent Series.searchsorted method while we're here arr = pd.Series(arr) # scalar result = arr.searchsorted(str(arr[1])) assert result == 1 result = arr.searchsorted(str(arr[2]), side="right") assert result == 3 result = arr.searchsorted([str(x) for x in arr[1:3]]) expected = np.array([1, 2], dtype=np.intp) tm.assert_numpy_array_equal(result, expected) with pytest.raises( TypeError, match=re.escape( f"value should be a '{arr1d._scalar_type.__name__}', 'NaT', " "or array of those. Got 'str' instead."), ): arr.searchsorted("foo") with pytest.raises( TypeError, match=re.escape( f"value should be a '{arr1d._scalar_type.__name__}', 'NaT', " "or array of those. Got 'StringArray' instead."), ): arr.searchsorted([str(arr[1]), "baz"]) def test_getitem_near_implementation_bounds(self): # We only check tz-naive for DTA bc the bounds are slightly different # for other tzs i8vals = np.asarray([NaT.value + n for n in range(1, 5)], dtype="i8") arr = self.array_cls(i8vals, freq="ns") arr[0] # should not raise OutOfBoundsDatetime index = pd.Index(arr) index[0] # should not raise OutOfBoundsDatetime ser = pd.Series(arr) ser[0] # should not raise OutOfBoundsDatetime def test_getitem_2d(self, arr1d): # 2d slicing on a 1D array expected = type(arr1d)(arr1d._data[:, np.newaxis], dtype=arr1d.dtype) result = arr1d[:, np.newaxis] tm.assert_equal(result, expected) # Lookup on a 2D array arr2d = expected expected = type(arr2d)(arr2d._data[:3, 0], dtype=arr2d.dtype) result = arr2d[:3, 0] tm.assert_equal(result, expected) # Scalar lookup result = arr2d[-1, 0] expected = arr1d[-1] assert result == expected def test_iter_2d(self, arr1d): data2d = arr1d._data[:3, np.newaxis] arr2d = type(arr1d)._simple_new(data2d, dtype=arr1d.dtype) result = list(arr2d) assert len(result) == 3 for x in result: assert isinstance(x, type(arr1d)) assert x.ndim == 1 assert x.dtype == arr1d.dtype def test_repr_2d(self, arr1d): data2d = arr1d._data[:3, np.newaxis] arr2d = type(arr1d)._simple_new(data2d, dtype=arr1d.dtype) result = repr(arr2d) if isinstance(arr2d, TimedeltaArray): expected = (f"<{type(arr2d).__name__}>\n" "[\n" f"['{arr1d[0]._repr_base()}'],\n" f"['{arr1d[1]._repr_base()}'],\n" f"['{arr1d[2]._repr_base()}']\n" "]\n" f"Shape: (3, 1), dtype: {arr1d.dtype}") else: expected = (f"<{type(arr2d).__name__}>\n" "[\n" f"['{arr1d[0]}'],\n" f"['{arr1d[1]}'],\n" f"['{arr1d[2]}']\n" "]\n" f"Shape: (3, 1), dtype: {arr1d.dtype}") assert result == expected def test_setitem(self): data = np.arange(10, dtype="i8") * 24 * 3600 * 10**9 arr = self.array_cls(data, freq="D") arr[0] = arr[1] expected = np.arange(10, dtype="i8") * 24 * 3600 * 10**9 expected[0] = expected[1] tm.assert_numpy_array_equal(arr.asi8, expected) arr[:2] = arr[-2:] expected[:2] = expected[-2:] tm.assert_numpy_array_equal(arr.asi8, expected) @pytest.mark.parametrize( "box", [ pd.Index, pd.Series, np.array, list, PandasArray, ], ) def test_setitem_object_dtype(self, box, arr1d): expected = arr1d.copy()[::-1] if expected.dtype.kind in ["m", "M"]: expected = expected._with_freq(None) vals = expected if box is list: vals = list(vals) elif box is np.array: # if we do np.array(x).astype(object) then dt64 and td64 cast to ints vals = np.array(vals.astype(object)) elif box is PandasArray: vals = box(np.asarray(vals, dtype=object)) else: vals = box(vals).astype(object) arr1d[:] = vals tm.assert_equal(arr1d, expected) def test_setitem_strs(self, arr1d, request): # Check that we parse strs in both scalar and listlike if isinstance(arr1d, DatetimeArray): tz = arr1d.tz ts1, ts2 = arr1d[-2:] if tz is not None and ts1.tz.tzname(ts1) != ts2.tz.tzname(ts2): # If we have e.g. tzutc(), when we cast to string and parse # back we get pytz.UTC, and then consider them different timezones # so incorrectly raise. mark = pytest.mark.xfail( reason="timezone comparisons inconsistent") request.node.add_marker(mark) # Setting list-like of strs expected = arr1d.copy() expected[[0, 1]] = arr1d[-2:] result = arr1d.copy() result[:2] = [str(x) for x in arr1d[-2:]] tm.assert_equal(result, expected) # Same thing but now for just a scalar str expected = arr1d.copy() expected[0] = arr1d[-1] result = arr1d.copy() result[0] = str(arr1d[-1]) tm.assert_equal(result, expected) @pytest.mark.parametrize("as_index", [True, False]) def test_setitem_categorical(self, arr1d, as_index): expected = arr1d.copy()[::-1] if not isinstance(expected, PeriodArray): expected = expected._with_freq(None) cat = pd.Categorical(arr1d) if as_index: cat = pd.CategoricalIndex(cat) arr1d[:] = cat[::-1] tm.assert_equal(arr1d, expected) def test_setitem_raises(self): data = np.arange(10, dtype="i8") * 24 * 3600 * 10**9 arr = self.array_cls(data, freq="D") val = arr[0] with pytest.raises(IndexError, match="index 12 is out of bounds"): arr[12] = val with pytest.raises(TypeError, match="value should be a.* 'object'"): arr[0] = object() msg = "cannot set using a list-like indexer with a different length" with pytest.raises(ValueError, match=msg): # GH#36339 arr[[]] = [arr[1]] msg = "cannot set using a slice indexer with a different length than" with pytest.raises(ValueError, match=msg): # GH#36339 arr[1:1] = arr[:3] @pytest.mark.parametrize("box", [list, np.array, pd.Index, pd.Series]) def test_setitem_numeric_raises(self, arr1d, box): # We dont case e.g. int64 to our own dtype for setitem msg = (f"value should be a '{arr1d._scalar_type.__name__}', " "'NaT', or array of those. Got") with pytest.raises(TypeError, match=msg): arr1d[:2] = box([0, 1]) with pytest.raises(TypeError, match=msg): arr1d[:2] = box([0.0, 1.0]) def test_inplace_arithmetic(self): # GH#24115 check that iadd and isub are actually in-place data = np.arange(10, dtype="i8") * 24 * 3600 * 10**9 arr = self.array_cls(data, freq="D") expected = arr + pd.Timedelta(days=1) arr += pd.Timedelta(days=1) tm.assert_equal(arr, expected) expected = arr - pd.Timedelta(days=1) arr -= pd.Timedelta(days=1) tm.assert_equal(arr, expected) def test_shift_fill_int_deprecated(self): # GH#31971 data = np.arange(10, dtype="i8") * 24 * 3600 * 10**9 arr = self.array_cls(data, freq="D") with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): result = arr.shift(1, fill_value=1) expected = arr.copy() if self.array_cls is PeriodArray: fill_val = PeriodArray._scalar_type._from_ordinal(1, freq=arr.freq) else: fill_val = arr._scalar_type(1) expected[0] = fill_val expected[1:] = arr[:-1] tm.assert_equal(result, expected) def test_median(self, arr1d): arr = arr1d if len(arr) % 2 == 0: # make it easier to define `expected` arr = arr[:-1] expected = arr[len(arr) // 2] result = arr.median() assert type(result) is type(expected) assert result == expected arr[len(arr) // 2] = NaT if not isinstance(expected, Period): expected = arr[len(arr) // 2 - 1:len(arr) // 2 + 2].mean() assert arr.median(skipna=False) is NaT result = arr.median() assert type(result) is type(expected) assert result == expected assert arr[:0].median() is NaT assert arr[:0].median(skipna=False) is NaT # 2d Case arr2 = arr.reshape(-1, 1) result = arr2.median(axis=None) assert type(result) is type(expected) assert result == expected assert arr2.median(axis=None, skipna=False) is NaT result = arr2.median(axis=0) expected2 = type(arr)._from_sequence([expected], dtype=arr.dtype) tm.assert_equal(result, expected2) result = arr2.median(axis=0, skipna=False) expected2 = type(arr)._from_sequence([NaT], dtype=arr.dtype) tm.assert_equal(result, expected2) result = arr2.median(axis=1) tm.assert_equal(result, arr) result = arr2.median(axis=1, skipna=False) tm.assert_equal(result, arr) def test_from_integer_array(self): arr = np.array([1, 2, 3], dtype=np.int64) expected = self.array_cls(arr, dtype=self.example_dtype) data = pd.array(arr, dtype="Int64") result = self.array_cls(data, dtype=self.example_dtype) tm.assert_extension_array_equal(result, expected)