def test_tdi_iadd_timedeltalike(self, delta): # only test adding/sub offsets as + is now numeric rng = timedelta_range('1 days', '10 days') expected = timedelta_range('1 days 02:00:00', '10 days 02:00:00', freq='D') rng += delta tm.assert_index_equal(rng, expected)
def test_constructor_coverage(self): rng = timedelta_range('1 days', periods=10.5) exp = timedelta_range('1 days', periods=10) tm.assert_index_equal(rng, exp) msg = 'periods must be a number, got foo' with pytest.raises(TypeError, match=msg): TimedeltaIndex(start='1 days', periods='foo', freq='D') pytest.raises(ValueError, TimedeltaIndex, start='1 days', end='10 days') with pytest.raises(TypeError): TimedeltaIndex('1 days') # generator expression gen = (timedelta(i) for i in range(10)) result = TimedeltaIndex(gen) expected = TimedeltaIndex([timedelta(i) for i in range(10)]) tm.assert_index_equal(result, expected) # NumPy string array strings = np.array(['1 days', '2 days', '3 days']) result = TimedeltaIndex(strings) expected = to_timedelta([1, 2, 3], unit='d') tm.assert_index_equal(result, expected) from_ints = TimedeltaIndex(expected.asi8) tm.assert_index_equal(from_ints, expected) # non-conforming freq pytest.raises(ValueError, TimedeltaIndex, ['1 days', '2 days', '4 days'], freq='D') pytest.raises(ValueError, TimedeltaIndex, periods=10, freq='D')
def test_delete(self): idx = timedelta_range(start='1 Days', periods=5, freq='D', name='idx') # prserve freq expected_0 = timedelta_range(start='2 Days', periods=4, freq='D', name='idx') expected_4 = timedelta_range(start='1 Days', periods=4, freq='D', name='idx') # reset freq to None expected_1 = TimedeltaIndex( ['1 day', '3 day', '4 day', '5 day'], freq=None, name='idx') cases = {0: expected_0, -5: expected_0, -1: expected_4, 4: expected_4, 1: expected_1} for n, expected in compat.iteritems(cases): result = idx.delete(n) tm.assert_index_equal(result, expected) assert result.name == expected.name assert result.freq == expected.freq with pytest.raises((IndexError, ValueError)): # either depeidnig on numpy version result = idx.delete(5)
def test_delete_slice(self): idx = timedelta_range(start='1 days', periods=10, freq='D', name='idx') # prserve freq expected_0_2 = timedelta_range(start='4 days', periods=7, freq='D', name='idx') expected_7_9 = timedelta_range(start='1 days', periods=7, freq='D', name='idx') # reset freq to None expected_3_5 = TimedeltaIndex(['1 d', '2 d', '3 d', '7 d', '8 d', '9 d', '10d'], freq=None, name='idx') cases = {(0, 1, 2): expected_0_2, (7, 8, 9): expected_7_9, (3, 4, 5): expected_3_5} for n, expected in compat.iteritems(cases): result = idx.delete(n) tm.assert_index_equal(result, expected) assert result.name == expected.name assert result.freq == expected.freq result = idx.delete(slice(n[0], n[-1] + 1)) tm.assert_index_equal(result, expected) assert result.name == expected.name assert result.freq == expected.freq
def test_categorical_repr_timedelta_ordered(self): idx = timedelta_range('1 days', periods=5) c = Categorical(idx, ordered=True) exp = """[1 days, 2 days, 3 days, 4 days, 5 days] Categories (5, timedelta64[ns]): [1 days < 2 days < 3 days < 4 days < 5 days]""" # noqa assert repr(c) == exp c = Categorical(idx.append(idx), categories=idx, ordered=True) exp = """[1 days, 2 days, 3 days, 4 days, 5 days, 1 days, 2 days, 3 days, 4 days, 5 days] Categories (5, timedelta64[ns]): [1 days < 2 days < 3 days < 4 days < 5 days]""" # noqa assert repr(c) == exp idx = timedelta_range('1 hours', periods=20) c = Categorical(idx, ordered=True) exp = """[0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00, 3 days 01:00:00, 4 days 01:00:00, ..., 15 days 01:00:00, 16 days 01:00:00, 17 days 01:00:00, 18 days 01:00:00, 19 days 01:00:00] Length: 20 Categories (20, timedelta64[ns]): [0 days 01:00:00 < 1 days 01:00:00 < 2 days 01:00:00 < 3 days 01:00:00 ... 16 days 01:00:00 < 17 days 01:00:00 < 18 days 01:00:00 < 19 days 01:00:00]""" # noqa assert repr(c) == exp c = Categorical(idx.append(idx), categories=idx, ordered=True) exp = """[0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00, 3 days 01:00:00, 4 days 01:00:00, ..., 15 days 01:00:00, 16 days 01:00:00, 17 days 01:00:00, 18 days 01:00:00, 19 days 01:00:00] Length: 40 Categories (20, timedelta64[ns]): [0 days 01:00:00 < 1 days 01:00:00 < 2 days 01:00:00 < 3 days 01:00:00 ... 16 days 01:00:00 < 17 days 01:00:00 < 18 days 01:00:00 < 19 days 01:00:00]""" # noqa assert repr(c) == exp
def test_categorical_series_repr_timedelta(self): idx = timedelta_range('1 days', periods=5) s = Series(Categorical(idx)) exp = """0 1 days 1 2 days 2 3 days 3 4 days 4 5 days dtype: category Categories (5, timedelta64[ns]): [1 days, 2 days, 3 days, 4 days, 5 days]""" assert repr(s) == exp idx = timedelta_range('1 hours', periods=10) s = Series(Categorical(idx)) exp = """0 0 days 01:00:00 1 1 days 01:00:00 2 2 days 01:00:00 3 3 days 01:00:00 4 4 days 01:00:00 5 5 days 01:00:00 6 6 days 01:00:00 7 7 days 01:00:00 8 8 days 01:00:00 9 9 days 01:00:00 dtype: category Categories (10, timedelta64[ns]): [0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00, 3 days 01:00:00, ..., 6 days 01:00:00, 7 days 01:00:00, 8 days 01:00:00, 9 days 01:00:00]""" # noqa assert repr(s) == exp
def test_getitem(self): idx1 = timedelta_range('1 day', '31 day', freq='D', name='idx') for idx in [idx1]: result = idx[0] assert result == Timedelta('1 day') result = idx[0:5] expected = timedelta_range('1 day', '5 day', freq='D', name='idx') tm.assert_index_equal(result, expected) assert result.freq == expected.freq result = idx[0:10:2] expected = timedelta_range('1 day', '9 day', freq='2D', name='idx') tm.assert_index_equal(result, expected) assert result.freq == expected.freq result = idx[-20:-5:3] expected = timedelta_range('12 day', '24 day', freq='3D', name='idx') tm.assert_index_equal(result, expected) assert result.freq == expected.freq result = idx[4::-1] expected = TimedeltaIndex(['5 day', '4 day', '3 day', '2 day', '1 day'], freq='-1D', name='idx') tm.assert_index_equal(result, expected) assert result.freq == expected.freq
def test_tdi_sub_int(self, one): rng = timedelta_range('1 days 09:00:00', freq='H', periods=10) with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): # GH#22535 result = rng - one expected = timedelta_range('1 days 08:00:00', freq='H', periods=10) tm.assert_index_equal(result, expected)
def test_categorical_series_repr_timedelta_ordered(self): idx = timedelta_range('1 days', periods=5) s = Series(Categorical(idx, ordered=True)) exp = """0 1 days 1 2 days 2 3 days 3 4 days 4 5 days dtype: category Categories (5, timedelta64[ns]): [1 days < 2 days < 3 days < 4 days < 5 days]""" # noqa assert repr(s) == exp idx = timedelta_range('1 hours', periods=10) s = Series(Categorical(idx, ordered=True)) exp = """0 0 days 01:00:00 1 1 days 01:00:00 2 2 days 01:00:00 3 3 days 01:00:00 4 4 days 01:00:00 5 5 days 01:00:00 6 6 days 01:00:00 7 7 days 01:00:00 8 8 days 01:00:00 9 9 days 01:00:00 dtype: category Categories (10, timedelta64[ns]): [0 days 01:00:00 < 1 days 01:00:00 < 2 days 01:00:00 < 3 days 01:00:00 ... 6 days 01:00:00 < 7 days 01:00:00 < 8 days 01:00:00 < 9 days 01:00:00]""" # noqa assert repr(s) == exp
def test_constructor_coverage(self): rng = timedelta_range('1 days', periods=10.5) exp = timedelta_range('1 days', periods=10) self.assertTrue(rng.equals(exp)) self.assertRaises(ValueError, TimedeltaIndex, start='1 days', periods='foo', freq='D') self.assertRaises(ValueError, TimedeltaIndex, start='1 days', end='10 days') self.assertRaises(ValueError, TimedeltaIndex, '1 days') # generator expression gen = (timedelta(i) for i in range(10)) result = TimedeltaIndex(gen) expected = TimedeltaIndex([timedelta(i) for i in range(10)]) self.assertTrue(result.equals(expected)) # NumPy string array strings = np.array(['1 days', '2 days', '3 days']) result = TimedeltaIndex(strings) expected = to_timedelta([1,2,3],unit='d') self.assertTrue(result.equals(expected)) from_ints = TimedeltaIndex(expected.asi8) self.assertTrue(from_ints.equals(expected)) # non-conforming freq self.assertRaises(ValueError, TimedeltaIndex, ['1 days', '2 days', '4 days'], freq='D') self.assertRaises(ValueError, TimedeltaIndex, periods=10, freq='D')
def test_value_counts_unique(self): # GH 7735 idx = timedelta_range("1 days 09:00:00", freq="H", periods=10) # create repeated values, 'n'th element is repeated by n+1 times idx = TimedeltaIndex(np.repeat(idx.values, range(1, len(idx) + 1))) exp_idx = timedelta_range("1 days 18:00:00", freq="-1H", periods=10) expected = Series(range(10, 0, -1), index=exp_idx, dtype="int64") tm.assert_series_equal(idx.value_counts(), expected) expected = timedelta_range("1 days 09:00:00", freq="H", periods=10) tm.assert_index_equal(idx.unique(), expected) idx = TimedeltaIndex( ["1 days 09:00:00", "1 days 09:00:00", "1 days 09:00:00", "1 days 08:00:00", "1 days 08:00:00", pd.NaT] ) exp_idx = TimedeltaIndex(["1 days 09:00:00", "1 days 08:00:00"]) expected = Series([3, 2], index=exp_idx) tm.assert_series_equal(idx.value_counts(), expected) exp_idx = TimedeltaIndex(["1 days 09:00:00", "1 days 08:00:00", pd.NaT]) expected = Series([3, 2, 1], index=exp_idx) tm.assert_series_equal(idx.value_counts(dropna=False), expected) tm.assert_index_equal(idx.unique(), exp_idx)
def test_intersection_zero_length(self, period_1, period_2, sort): # GH 24471 test for non overlap the intersection should be zero length index_1 = timedelta_range('1 day', periods=period_1, freq='h') index_2 = timedelta_range('1 day', periods=period_2, freq='h') expected = timedelta_range('1 day', periods=0, freq='h') result = index_1.intersection(index_2, sort=sort) tm.assert_index_equal(result, expected)
def test_difference_sort(self, sort): index = pd.TimedeltaIndex(["5 days", "3 days", "2 days", "4 days", "1 days", "0 days"]) other = timedelta_range("1 days", "4 days", freq="D") idx_diff = index.difference(other, sort) expected = TimedeltaIndex(["5 days", "0 days"], freq=None) if sort is None: expected = expected.sort_values() tm.assert_index_equal(idx_diff, expected) tm.assert_attr_equal('freq', idx_diff, expected) other = timedelta_range("2 days", "5 days", freq="D") idx_diff = index.difference(other, sort) expected = TimedeltaIndex(["1 days", "0 days"], freq=None) if sort is None: expected = expected.sort_values() tm.assert_index_equal(idx_diff, expected) tm.assert_attr_equal('freq', idx_diff, expected)
def test_value_counts_unique(self): # GH 7735 idx = timedelta_range('1 days 09:00:00', freq='H', periods=10) # create repeated values, 'n'th element is repeated by n+1 times idx = TimedeltaIndex(np.repeat(idx.values, range(1, len(idx) + 1))) exp_idx = timedelta_range('1 days 18:00:00', freq='-1H', periods=10) expected = Series(range(10, 0, -1), index=exp_idx, dtype='int64') for obj in [idx, Series(idx)]: tm.assert_series_equal(obj.value_counts(), expected) expected = timedelta_range('1 days 09:00:00', freq='H', periods=10) tm.assert_index_equal(idx.unique(), expected) idx = TimedeltaIndex(['1 days 09:00:00', '1 days 09:00:00', '1 days 09:00:00', '1 days 08:00:00', '1 days 08:00:00', pd.NaT]) exp_idx = TimedeltaIndex(['1 days 09:00:00', '1 days 08:00:00']) expected = Series([3, 2], index=exp_idx) for obj in [idx, Series(idx)]: tm.assert_series_equal(obj.value_counts(), expected) exp_idx = TimedeltaIndex(['1 days 09:00:00', '1 days 08:00:00', pd.NaT]) expected = Series([3, 2, 1], index=exp_idx) for obj in [idx, Series(idx)]: tm.assert_series_equal(obj.value_counts(dropna=False), expected) tm.assert_index_equal(idx.unique(), exp_idx)
def test_sub_isub(self): # only test adding/sub offsets as - is now numeric # offset offsets = [pd.offsets.Hour(2), timedelta(hours=2), np.timedelta64(2, 'h'), Timedelta(hours=2)] for delta in offsets: rng = timedelta_range('1 days', '10 days') result = rng - delta expected = timedelta_range('0 days 22:00:00', '9 days 22:00:00') tm.assert_index_equal(result, expected) rng -= delta tm.assert_index_equal(rng, expected) # int rng = timedelta_range('1 days 09:00:00', freq='H', periods=10) result = rng - 1 expected = timedelta_range('1 days 08:00:00', freq='H', periods=10) tm.assert_index_equal(result, expected) rng -= 1 tm.assert_index_equal(rng, expected) idx = TimedeltaIndex(['1 day', '2 day']) msg = "cannot subtract a datelike from a TimedeltaIndex" with tm.assertRaisesRegexp(TypeError, msg): idx - Timestamp('2011-01-01') result = Timestamp('2011-01-01') + idx expected = DatetimeIndex(['2011-01-02', '2011-01-03']) tm.assert_index_equal(result, expected)
def test_constructor_coverage(self): rng = timedelta_range("1 days", periods=10.5) exp = timedelta_range("1 days", periods=10) self.assertTrue(rng.equals(exp)) self.assertRaises(ValueError, TimedeltaIndex, start="1 days", periods="foo", freq="D") self.assertRaises(ValueError, TimedeltaIndex, start="1 days", end="10 days") self.assertRaises(ValueError, TimedeltaIndex, "1 days") # generator expression gen = (timedelta(i) for i in range(10)) result = TimedeltaIndex(gen) expected = TimedeltaIndex([timedelta(i) for i in range(10)]) self.assertTrue(result.equals(expected)) # NumPy string array strings = np.array(["1 days", "2 days", "3 days"]) result = TimedeltaIndex(strings) expected = to_timedelta([1, 2, 3], unit="d") self.assertTrue(result.equals(expected)) from_ints = TimedeltaIndex(expected.asi8) self.assertTrue(from_ints.equals(expected)) # non-conforming freq self.assertRaises(ValueError, TimedeltaIndex, ["1 days", "2 days", "4 days"], freq="D") self.assertRaises(ValueError, TimedeltaIndex, periods=10, freq="D")
def test_tdi_iadd_int(self, one): rng = timedelta_range('1 days 09:00:00', freq='H', periods=10) expected = timedelta_range('1 days 10:00:00', freq='H', periods=10) with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): # GH#22535 rng += one tm.assert_index_equal(rng, expected)
def test_zero_length_input_index(self, sort): # GH 24966 test for 0-len intersections are copied index_1 = timedelta_range('1 day', periods=0, freq='h') index_2 = timedelta_range('1 day', periods=3, freq='h') result = index_1.intersection(index_2, sort=sort) assert index_1 is not result assert index_2 is not result tm.assert_copy(result, index_1)
def test_resample_single_period_timedelta(): s = Series(list(range(5)), index=pd.timedelta_range( '1 day', freq='s', periods=5)) result = s.resample('2s').sum() expected = Series([1, 5, 4], index=pd.timedelta_range( '1 day', freq='2s', periods=3)) assert_series_equal(result, expected)
def test_union_bug_1730(self): rng_a = timedelta_range('1 day', periods=4, freq='3H') rng_b = timedelta_range('1 day', periods=4, freq='4H') result = rng_a.union(rng_b) exp = TimedeltaIndex(sorted(set(list(rng_a)) | set(list(rng_b)))) tm.assert_index_equal(result, exp)
def test_union_bug_1730(self): rng_a = timedelta_range("1 day", periods=4, freq="3H") rng_b = timedelta_range("1 day", periods=4, freq="4H") result = rng_a.union(rng_b) exp = TimedeltaIndex(sorted(set(list(rng_a)) | set(list(rng_b)))) self.assertTrue(result.equals(exp))
def test_tdi_add_int(self, one): # Variants of `one` for #19012 rng = timedelta_range('1 days 09:00:00', freq='H', periods=10) with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): # GH#22535 result = rng + one expected = timedelta_range('1 days 10:00:00', freq='H', periods=10) tm.assert_index_equal(result, expected)
def test_union(self): i1 = timedelta_range("1day", periods=5) i2 = timedelta_range("3day", periods=5) result = i1.union(i2) expected = timedelta_range("1day", periods=7) self.assert_numpy_array_equal(result, expected) i1 = Int64Index(np.arange(0, 20, 2)) i2 = TimedeltaIndex(start="1 day", periods=10, freq="D") i1.union(i2) # Works i2.union(i1) # Fails with "AttributeError: can't set attribute"
def test_union(self): i1 = timedelta_range('1day', periods=5) i2 = timedelta_range('3day', periods=5) result = i1.union(i2) expected = timedelta_range('1day', periods=7) tm.assert_index_equal(result, expected) i1 = Int64Index(np.arange(0, 20, 2)) i2 = TimedeltaIndex(start='1 day', periods=10, freq='D') i1.union(i2) # Works i2.union(i1) # Fails with "AttributeError: can't set attribute"
def test_from_arrays_index_series_timedelta(): idx1 = pd.timedelta_range('1 days', freq='D', periods=3) idx2 = pd.timedelta_range('2 hours', freq='H', periods=3) result = pd.MultiIndex.from_arrays([idx1, idx2]) tm.assert_index_equal(result.get_level_values(0), idx1) tm.assert_index_equal(result.get_level_values(1), idx2) result2 = pd.MultiIndex.from_arrays([pd.Series(idx1), pd.Series(idx2)]) tm.assert_index_equal(result2.get_level_values(0), idx1) tm.assert_index_equal(result2.get_level_values(1), idx2) tm.assert_index_equal(result, result2)
def test_intersection_bug_1708(self): index_1 = timedelta_range("1 day", periods=4, freq="h") index_2 = index_1 + pd.offsets.Hour(5) result = index_1 & index_2 self.assertEqual(len(result), 0) index_1 = timedelta_range("1 day", periods=4, freq="h") index_2 = index_1 + pd.offsets.Hour(1) result = index_1 & index_2 expected = timedelta_range("1 day 01:00:00", periods=3, freq="h") tm.assert_index_equal(result, expected)
def test_intersection_equal(self, sort): # GH 24471 Test intersection outcome given the sort keyword # for equal indicies intersection should return the original index first = timedelta_range('1 day', periods=4, freq='h') second = timedelta_range('1 day', periods=4, freq='h') intersect = first.intersection(second, sort=sort) if sort is None: tm.assert_index_equal(intersect, second.sort_values()) assert tm.equalContents(intersect, second) # Corner cases inter = first.intersection(first, sort=sort) assert inter is first
def test_intersection_bug_1708(self): index_1 = timedelta_range('1 day', periods=4, freq='h') index_2 = index_1 + pd.offsets.Hour(5) result = index_1 & index_2 assert len(result) == 0 index_1 = timedelta_range('1 day', periods=4, freq='h') index_2 = index_1 + pd.offsets.Hour(1) result = index_1 & index_2 expected = timedelta_range('1 day 01:00:00', periods=3, freq='h') tm.assert_index_equal(result, expected)
def test_insert(self): idx = TimedeltaIndex(['4day', '1day', '2day'], name='idx') result = idx.insert(2, timedelta(days=5)) exp = TimedeltaIndex(['4day', '1day', '5day', '2day'], name='idx') tm.assert_index_equal(result, exp) # insertion of non-datetime should coerce to object index result = idx.insert(1, 'inserted') expected = Index([Timedelta('4day'), 'inserted', Timedelta('1day'), Timedelta('2day')], name='idx') assert not isinstance(result, TimedeltaIndex) tm.assert_index_equal(result, expected) assert result.name == expected.name idx = timedelta_range('1day 00:00:01', periods=3, freq='s', name='idx') # preserve freq expected_0 = TimedeltaIndex(['1day', '1day 00:00:01', '1day 00:00:02', '1day 00:00:03'], name='idx', freq='s') expected_3 = TimedeltaIndex(['1day 00:00:01', '1day 00:00:02', '1day 00:00:03', '1day 00:00:04'], name='idx', freq='s') # reset freq to None expected_1_nofreq = TimedeltaIndex(['1day 00:00:01', '1day 00:00:01', '1day 00:00:02', '1day 00:00:03'], name='idx', freq=None) expected_3_nofreq = TimedeltaIndex(['1day 00:00:01', '1day 00:00:02', '1day 00:00:03', '1day 00:00:05'], name='idx', freq=None) cases = [(0, Timedelta('1day'), expected_0), (-3, Timedelta('1day'), expected_0), (3, Timedelta('1day 00:00:04'), expected_3), (1, Timedelta('1day 00:00:01'), expected_1_nofreq), (3, Timedelta('1day 00:00:05'), expected_3_nofreq)] for n, d, expected in cases: result = idx.insert(n, d) tm.assert_index_equal(result, expected) assert result.name == expected.name assert result.freq == expected.freq # GH 18295 (test missing) expected = TimedeltaIndex(['1day', pd.NaT, '2day', '3day']) for na in (np.nan, pd.NaT, None): result = timedelta_range('1day', '3day').insert(1, na) tm.assert_index_equal(result, expected)
def test_categorical_index_repr_timedelta_ordered(self): idx = timedelta_range('1 days', periods=5) i = CategoricalIndex(Categorical(idx, ordered=True)) exp = """CategoricalIndex(['1 days', '2 days', '3 days', '4 days', '5 days'], categories=[1 days 00:00:00, 2 days 00:00:00, 3 days 00:00:00, 4 days 00:00:00, 5 days 00:00:00], ordered=True, dtype='category')""" # noqa assert repr(i) == exp idx = timedelta_range('1 hours', periods=10) i = CategoricalIndex(Categorical(idx, ordered=True)) exp = """CategoricalIndex(['0 days 01:00:00', '1 days 01:00:00', '2 days 01:00:00', '3 days 01:00:00', '4 days 01:00:00', '5 days 01:00:00', '6 days 01:00:00', '7 days 01:00:00', '8 days 01:00:00', '9 days 01:00:00'], categories=[0 days 01:00:00, 1 days 01:00:00, 2 days 01:00:00, 3 days 01:00:00, 4 days 01:00:00, 5 days 01:00:00, 6 days 01:00:00, 7 days 01:00:00, ...], ordered=True, dtype='category')""" # noqa assert repr(i) == exp
class TestIntervalIndex: index = IntervalIndex.from_arrays([0, 1], [1, 2]) def create_index(self, closed="right"): return IntervalIndex.from_breaks(range(11), closed=closed) def create_index_with_nan(self, closed="right"): mask = [True, False] + [True] * 8 return IntervalIndex.from_arrays( np.where(mask, np.arange(10), np.nan), np.where(mask, np.arange(1, 11), np.nan), closed=closed, ) def test_properties(self, closed): index = self.create_index(closed=closed) assert len(index) == 10 assert index.size == 10 assert index.shape == (10, ) tm.assert_index_equal(index.left, Index(np.arange(10))) tm.assert_index_equal(index.right, Index(np.arange(1, 11))) tm.assert_index_equal(index.mid, Index(np.arange(0.5, 10.5))) assert index.closed == closed ivs = [Interval(l, r, closed) for l, r in zip(range(10), range(1, 11))] expected = np.array(ivs, dtype=object) tm.assert_numpy_array_equal(np.asarray(index), expected) # with nans index = self.create_index_with_nan(closed=closed) assert len(index) == 10 assert index.size == 10 assert index.shape == (10, ) expected_left = Index([0, np.nan, 2, 3, 4, 5, 6, 7, 8, 9]) expected_right = expected_left + 1 expected_mid = expected_left + 0.5 tm.assert_index_equal(index.left, expected_left) tm.assert_index_equal(index.right, expected_right) tm.assert_index_equal(index.mid, expected_mid) assert index.closed == closed ivs = [ Interval(l, r, closed) if notna(l) else np.nan for l, r in zip(expected_left, expected_right) ] expected = np.array(ivs, dtype=object) tm.assert_numpy_array_equal(np.asarray(index), expected) @pytest.mark.parametrize( "breaks", [ [1, 1, 2, 5, 15, 53, 217, 1014, 5335, 31240, 201608], [-np.inf, -100, -10, 0.5, 1, 1.5, 3.8, 101, 202, np.inf], pd.to_datetime(["20170101", "20170202", "20170303", "20170404"]), pd.to_timedelta(["1ns", "2ms", "3s", "4M", "5H", "6D"]), ], ) def test_length(self, closed, breaks): # GH 18789 index = IntervalIndex.from_breaks(breaks, closed=closed) result = index.length expected = Index(iv.length for iv in index) tm.assert_index_equal(result, expected) # with NA index = index.insert(1, np.nan) result = index.length expected = Index(iv.length if notna(iv) else iv for iv in index) tm.assert_index_equal(result, expected) def test_with_nans(self, closed): index = self.create_index(closed=closed) assert index.hasnans is False result = index.isna() expected = np.zeros(len(index), dtype=bool) tm.assert_numpy_array_equal(result, expected) result = index.notna() expected = np.ones(len(index), dtype=bool) tm.assert_numpy_array_equal(result, expected) index = self.create_index_with_nan(closed=closed) assert index.hasnans is True result = index.isna() expected = np.array([False, True] + [False] * (len(index) - 2)) tm.assert_numpy_array_equal(result, expected) result = index.notna() expected = np.array([True, False] + [True] * (len(index) - 2)) tm.assert_numpy_array_equal(result, expected) def test_copy(self, closed): expected = self.create_index(closed=closed) result = expected.copy() assert result.equals(expected) result = expected.copy(deep=True) assert result.equals(expected) assert result.left is not expected.left def test_ensure_copied_data(self, closed): # exercise the copy flag in the constructor # not copying index = self.create_index(closed=closed) result = IntervalIndex(index, copy=False) tm.assert_numpy_array_equal(index.left.values, result.left.values, check_same="same") tm.assert_numpy_array_equal(index.right.values, result.right.values, check_same="same") # by-definition make a copy result = IntervalIndex(np.array(index), copy=False) tm.assert_numpy_array_equal(index.left.values, result.left.values, check_same="copy") tm.assert_numpy_array_equal(index.right.values, result.right.values, check_same="copy") def test_delete(self, closed): expected = IntervalIndex.from_breaks(np.arange(1, 11), closed=closed) result = self.create_index(closed=closed).delete(0) tm.assert_index_equal(result, expected) @pytest.mark.parametrize( "data", [ interval_range(0, periods=10, closed="neither"), interval_range(1.7, periods=8, freq=2.5, closed="both"), interval_range(Timestamp("20170101"), periods=12, closed="left"), interval_range(Timedelta("1 day"), periods=6, closed="right"), ], ) def test_insert(self, data): item = data[0] idx_item = IntervalIndex([item]) # start expected = idx_item.append(data) result = data.insert(0, item) tm.assert_index_equal(result, expected) # end expected = data.append(idx_item) result = data.insert(len(data), item) tm.assert_index_equal(result, expected) # mid expected = data[:3].append(idx_item).append(data[3:]) result = data.insert(3, item) tm.assert_index_equal(result, expected) # invalid type msg = "can only insert Interval objects and NA into an IntervalIndex" with pytest.raises(ValueError, match=msg): data.insert(1, "foo") # invalid closed msg = "inserted item must be closed on the same side as the index" for closed in {"left", "right", "both", "neither"} - {item.closed}: with pytest.raises(ValueError, match=msg): bad_item = Interval(item.left, item.right, closed=closed) data.insert(1, bad_item) # GH 18295 (test missing) na_idx = IntervalIndex([np.nan], closed=data.closed) for na in (np.nan, pd.NaT, None): expected = data[:1].append(na_idx).append(data[1:]) result = data.insert(1, na) tm.assert_index_equal(result, expected) def test_is_unique_interval(self, closed): """ Interval specific tests for is_unique in addition to base class tests """ # unique overlapping - distinct endpoints idx = IntervalIndex.from_tuples([(0, 1), (0.5, 1.5)], closed=closed) assert idx.is_unique is True # unique overlapping - shared endpoints idx = pd.IntervalIndex.from_tuples([(1, 2), (1, 3), (2, 3)], closed=closed) assert idx.is_unique is True # unique nested idx = IntervalIndex.from_tuples([(-1, 1), (-2, 2)], closed=closed) assert idx.is_unique is True def test_monotonic(self, closed): # increasing non-overlapping idx = IntervalIndex.from_tuples([(0, 1), (2, 3), (4, 5)], closed=closed) assert idx.is_monotonic is True assert idx._is_strictly_monotonic_increasing is True assert idx.is_monotonic_decreasing is False assert idx._is_strictly_monotonic_decreasing is False # decreasing non-overlapping idx = IntervalIndex.from_tuples([(4, 5), (2, 3), (1, 2)], closed=closed) assert idx.is_monotonic is False assert idx._is_strictly_monotonic_increasing is False assert idx.is_monotonic_decreasing is True assert idx._is_strictly_monotonic_decreasing is True # unordered non-overlapping idx = IntervalIndex.from_tuples([(0, 1), (4, 5), (2, 3)], closed=closed) assert idx.is_monotonic is False assert idx._is_strictly_monotonic_increasing is False assert idx.is_monotonic_decreasing is False assert idx._is_strictly_monotonic_decreasing is False # increasing overlapping idx = IntervalIndex.from_tuples([(0, 2), (0.5, 2.5), (1, 3)], closed=closed) assert idx.is_monotonic is True assert idx._is_strictly_monotonic_increasing is True assert idx.is_monotonic_decreasing is False assert idx._is_strictly_monotonic_decreasing is False # decreasing overlapping idx = IntervalIndex.from_tuples([(1, 3), (0.5, 2.5), (0, 2)], closed=closed) assert idx.is_monotonic is False assert idx._is_strictly_monotonic_increasing is False assert idx.is_monotonic_decreasing is True assert idx._is_strictly_monotonic_decreasing is True # unordered overlapping idx = IntervalIndex.from_tuples([(0.5, 2.5), (0, 2), (1, 3)], closed=closed) assert idx.is_monotonic is False assert idx._is_strictly_monotonic_increasing is False assert idx.is_monotonic_decreasing is False assert idx._is_strictly_monotonic_decreasing is False # increasing overlapping shared endpoints idx = pd.IntervalIndex.from_tuples([(1, 2), (1, 3), (2, 3)], closed=closed) assert idx.is_monotonic is True assert idx._is_strictly_monotonic_increasing is True assert idx.is_monotonic_decreasing is False assert idx._is_strictly_monotonic_decreasing is False # decreasing overlapping shared endpoints idx = pd.IntervalIndex.from_tuples([(2, 3), (1, 3), (1, 2)], closed=closed) assert idx.is_monotonic is False assert idx._is_strictly_monotonic_increasing is False assert idx.is_monotonic_decreasing is True assert idx._is_strictly_monotonic_decreasing is True # stationary idx = IntervalIndex.from_tuples([(0, 1), (0, 1)], closed=closed) assert idx.is_monotonic is True assert idx._is_strictly_monotonic_increasing is False assert idx.is_monotonic_decreasing is True assert idx._is_strictly_monotonic_decreasing is False # empty idx = IntervalIndex([], closed=closed) assert idx.is_monotonic is True assert idx._is_strictly_monotonic_increasing is True assert idx.is_monotonic_decreasing is True assert idx._is_strictly_monotonic_decreasing is True def test_get_item(self, closed): i = IntervalIndex.from_arrays((0, 1, np.nan), (1, 2, np.nan), closed=closed) assert i[0] == Interval(0.0, 1.0, closed=closed) assert i[1] == Interval(1.0, 2.0, closed=closed) assert isna(i[2]) result = i[0:1] expected = IntervalIndex.from_arrays((0.0, ), (1.0, ), closed=closed) tm.assert_index_equal(result, expected) result = i[0:2] expected = IntervalIndex.from_arrays((0.0, 1), (1.0, 2.0), closed=closed) tm.assert_index_equal(result, expected) result = i[1:3] expected = IntervalIndex.from_arrays((1.0, np.nan), (2.0, np.nan), closed=closed) tm.assert_index_equal(result, expected) @pytest.mark.parametrize( "breaks", [ date_range("20180101", periods=4), date_range("20180101", periods=4, tz="US/Eastern"), timedelta_range("0 days", periods=4), ], ids=lambda x: str(x.dtype), ) def test_maybe_convert_i8(self, breaks): # GH 20636 index = IntervalIndex.from_breaks(breaks) # intervalindex result = index._maybe_convert_i8(index) expected = IntervalIndex.from_breaks(breaks.asi8) tm.assert_index_equal(result, expected) # interval interval = Interval(breaks[0], breaks[1]) result = index._maybe_convert_i8(interval) expected = Interval(breaks[0].value, breaks[1].value) assert result == expected # datetimelike index result = index._maybe_convert_i8(breaks) expected = Index(breaks.asi8) tm.assert_index_equal(result, expected) # datetimelike scalar result = index._maybe_convert_i8(breaks[0]) expected = breaks[0].value assert result == expected # list-like of datetimelike scalars result = index._maybe_convert_i8(list(breaks)) expected = Index(breaks.asi8) tm.assert_index_equal(result, expected) @pytest.mark.parametrize( "breaks", [ date_range("2018-01-01", periods=5), timedelta_range("0 days", periods=5) ], ) def test_maybe_convert_i8_nat(self, breaks): # GH 20636 index = IntervalIndex.from_breaks(breaks) to_convert = breaks._constructor([pd.NaT] * 3) expected = pd.Float64Index([np.nan] * 3) result = index._maybe_convert_i8(to_convert) tm.assert_index_equal(result, expected) to_convert = to_convert.insert(0, breaks[0]) expected = expected.insert(0, float(breaks[0].value)) result = index._maybe_convert_i8(to_convert) tm.assert_index_equal(result, expected) @pytest.mark.parametrize( "breaks", [np.arange(5, dtype="int64"), np.arange(5, dtype="float64")], ids=lambda x: str(x.dtype), ) @pytest.mark.parametrize( "make_key", [ IntervalIndex.from_breaks, lambda breaks: Interval(breaks[0], breaks[1]), lambda breaks: breaks, lambda breaks: breaks[0], list, ], ids=["IntervalIndex", "Interval", "Index", "scalar", "list"], ) def test_maybe_convert_i8_numeric(self, breaks, make_key): # GH 20636 index = IntervalIndex.from_breaks(breaks) key = make_key(breaks) # no conversion occurs for numeric result = index._maybe_convert_i8(key) assert result is key @pytest.mark.parametrize( "breaks1, breaks2", permutations( [ date_range("20180101", periods=4), date_range("20180101", periods=4, tz="US/Eastern"), timedelta_range("0 days", periods=4), ], 2, ), ids=lambda x: str(x.dtype), ) @pytest.mark.parametrize( "make_key", [ IntervalIndex.from_breaks, lambda breaks: Interval(breaks[0], breaks[1]), lambda breaks: breaks, lambda breaks: breaks[0], list, ], ids=["IntervalIndex", "Interval", "Index", "scalar", "list"], ) def test_maybe_convert_i8_errors(self, breaks1, breaks2, make_key): # GH 20636 index = IntervalIndex.from_breaks(breaks1) key = make_key(breaks2) msg = ( f"Cannot index an IntervalIndex of subtype {breaks1.dtype} with " f"values of dtype {breaks2.dtype}") msg = re.escape(msg) with pytest.raises(ValueError, match=msg): index._maybe_convert_i8(key) def test_contains_method(self): # can select values that are IN the range of a value i = IntervalIndex.from_arrays([0, 1], [1, 2]) expected = np.array([False, False], dtype="bool") actual = i.contains(0) tm.assert_numpy_array_equal(actual, expected) actual = i.contains(3) tm.assert_numpy_array_equal(actual, expected) expected = np.array([True, False], dtype="bool") actual = i.contains(0.5) tm.assert_numpy_array_equal(actual, expected) actual = i.contains(1) tm.assert_numpy_array_equal(actual, expected) # __contains__ not implemented for "interval in interval", follow # that for the contains method for now with pytest.raises(NotImplementedError, match="contains not implemented for two"): i.contains(Interval(0, 1)) def test_contains_dunder(self): index = IntervalIndex.from_arrays([0, 1], [1, 2], closed="right") # __contains__ requires perfect matches to intervals. assert 0 not in index assert 1 not in index assert 2 not in index assert Interval(0, 1, closed="right") in index assert Interval(0, 2, closed="right") not in index assert Interval(0, 0.5, closed="right") not in index assert Interval(3, 5, closed="right") not in index assert Interval(-1, 0, closed="left") not in index assert Interval(0, 1, closed="left") not in index assert Interval(0, 1, closed="both") not in index def test_dropna(self, closed): expected = IntervalIndex.from_tuples([(0.0, 1.0), (1.0, 2.0)], closed=closed) ii = IntervalIndex.from_tuples([(0, 1), (1, 2), np.nan], closed=closed) result = ii.dropna() tm.assert_index_equal(result, expected) ii = IntervalIndex.from_arrays([0, 1, np.nan], [1, 2, np.nan], closed=closed) result = ii.dropna() tm.assert_index_equal(result, expected) def test_non_contiguous(self, closed): index = IntervalIndex.from_tuples([(0, 1), (2, 3)], closed=closed) target = [0.5, 1.5, 2.5] actual = index.get_indexer(target) expected = np.array([0, -1, 1], dtype="intp") tm.assert_numpy_array_equal(actual, expected) assert 1.5 not in index def test_isin(self, closed): index = self.create_index(closed=closed) expected = np.array([True] + [False] * (len(index) - 1)) result = index.isin(index[:1]) tm.assert_numpy_array_equal(result, expected) result = index.isin([index[0]]) tm.assert_numpy_array_equal(result, expected) other = IntervalIndex.from_breaks(np.arange(-2, 10), closed=closed) expected = np.array([True] * (len(index) - 1) + [False]) result = index.isin(other) tm.assert_numpy_array_equal(result, expected) result = index.isin(other.tolist()) tm.assert_numpy_array_equal(result, expected) for other_closed in {"right", "left", "both", "neither"}: other = self.create_index(closed=other_closed) expected = np.repeat(closed == other_closed, len(index)) result = index.isin(other) tm.assert_numpy_array_equal(result, expected) result = index.isin(other.tolist()) tm.assert_numpy_array_equal(result, expected) def test_comparison(self): actual = Interval(0, 1) < self.index expected = np.array([False, True]) tm.assert_numpy_array_equal(actual, expected) actual = Interval(0.5, 1.5) < self.index expected = np.array([False, True]) tm.assert_numpy_array_equal(actual, expected) actual = self.index > Interval(0.5, 1.5) tm.assert_numpy_array_equal(actual, expected) actual = self.index == self.index expected = np.array([True, True]) tm.assert_numpy_array_equal(actual, expected) actual = self.index <= self.index tm.assert_numpy_array_equal(actual, expected) actual = self.index >= self.index tm.assert_numpy_array_equal(actual, expected) actual = self.index < self.index expected = np.array([False, False]) tm.assert_numpy_array_equal(actual, expected) actual = self.index > self.index tm.assert_numpy_array_equal(actual, expected) actual = self.index == IntervalIndex.from_breaks([0, 1, 2], "left") tm.assert_numpy_array_equal(actual, expected) actual = self.index == self.index.values tm.assert_numpy_array_equal(actual, np.array([True, True])) actual = self.index.values == self.index tm.assert_numpy_array_equal(actual, np.array([True, True])) actual = self.index <= self.index.values tm.assert_numpy_array_equal(actual, np.array([True, True])) actual = self.index != self.index.values tm.assert_numpy_array_equal(actual, np.array([False, False])) actual = self.index > self.index.values tm.assert_numpy_array_equal(actual, np.array([False, False])) actual = self.index.values > self.index tm.assert_numpy_array_equal(actual, np.array([False, False])) # invalid comparisons actual = self.index == 0 tm.assert_numpy_array_equal(actual, np.array([False, False])) actual = self.index == self.index.left tm.assert_numpy_array_equal(actual, np.array([False, False])) msg = ("not supported between instances of 'int' and " "'pandas._libs.interval.Interval'") with pytest.raises(TypeError, match=msg): self.index > 0 with pytest.raises(TypeError, match=msg): self.index <= 0 with pytest.raises(TypeError, match=msg): self.index > np.arange(2) msg = "Lengths must match to compare" with pytest.raises(ValueError, match=msg): self.index > np.arange(3) def test_missing_values(self, closed): idx = Index([ np.nan, Interval(0, 1, closed=closed), Interval(1, 2, closed=closed) ]) idx2 = IntervalIndex.from_arrays([np.nan, 0, 1], [np.nan, 1, 2], closed=closed) assert idx.equals(idx2) msg = ("missing values must be missing in the same location both left " "and right sides") with pytest.raises(ValueError, match=msg): IntervalIndex.from_arrays([np.nan, 0, 1], np.array([0, 1, 2]), closed=closed) tm.assert_numpy_array_equal(isna(idx), np.array([True, False, False])) def test_sort_values(self, closed): index = self.create_index(closed=closed) result = index.sort_values() tm.assert_index_equal(result, index) result = index.sort_values(ascending=False) tm.assert_index_equal(result, index[::-1]) # with nan index = IntervalIndex([Interval(1, 2), np.nan, Interval(0, 1)]) result = index.sort_values() expected = IntervalIndex([Interval(0, 1), Interval(1, 2), np.nan]) tm.assert_index_equal(result, expected) result = index.sort_values(ascending=False) expected = IntervalIndex([np.nan, Interval(1, 2), Interval(0, 1)]) tm.assert_index_equal(result, expected) @pytest.mark.parametrize("tz", [None, "US/Eastern"]) def test_datetime(self, tz): start = Timestamp("2000-01-01", tz=tz) dates = date_range(start=start, periods=10) index = IntervalIndex.from_breaks(dates) # test mid start = Timestamp("2000-01-01T12:00", tz=tz) expected = date_range(start=start, periods=9) tm.assert_index_equal(index.mid, expected) # __contains__ doesn't check individual points assert Timestamp("2000-01-01", tz=tz) not in index assert Timestamp("2000-01-01T12", tz=tz) not in index assert Timestamp("2000-01-02", tz=tz) not in index iv_true = Interval(Timestamp("2000-01-02", tz=tz), Timestamp("2000-01-03", tz=tz)) iv_false = Interval(Timestamp("1999-12-31", tz=tz), Timestamp("2000-01-01", tz=tz)) assert iv_true in index assert iv_false not in index # .contains does check individual points assert not index.contains(Timestamp("2000-01-01", tz=tz)).any() assert index.contains(Timestamp("2000-01-01T12", tz=tz)).any() assert index.contains(Timestamp("2000-01-02", tz=tz)).any() # test get_indexer start = Timestamp("1999-12-31T12:00", tz=tz) target = date_range(start=start, periods=7, freq="12H") actual = index.get_indexer(target) expected = np.array([-1, -1, 0, 0, 1, 1, 2], dtype="intp") tm.assert_numpy_array_equal(actual, expected) start = Timestamp("2000-01-08T18:00", tz=tz) target = date_range(start=start, periods=7, freq="6H") actual = index.get_indexer(target) expected = np.array([7, 7, 8, 8, 8, 8, -1], dtype="intp") tm.assert_numpy_array_equal(actual, expected) def test_append(self, closed): index1 = IntervalIndex.from_arrays([0, 1], [1, 2], closed=closed) index2 = IntervalIndex.from_arrays([1, 2], [2, 3], closed=closed) result = index1.append(index2) expected = IntervalIndex.from_arrays([0, 1, 1, 2], [1, 2, 2, 3], closed=closed) tm.assert_index_equal(result, expected) result = index1.append([index1, index2]) expected = IntervalIndex.from_arrays([0, 1, 0, 1, 1, 2], [1, 2, 1, 2, 2, 3], closed=closed) tm.assert_index_equal(result, expected) msg = "Intervals must all be closed on the same side" for other_closed in {"left", "right", "both", "neither"} - {closed}: index_other_closed = IntervalIndex.from_arrays([0, 1], [1, 2], closed=other_closed) with pytest.raises(ValueError, match=msg): index1.append(index_other_closed) def test_is_non_overlapping_monotonic(self, closed): # Should be True in all cases tpls = [(0, 1), (2, 3), (4, 5), (6, 7)] idx = IntervalIndex.from_tuples(tpls, closed=closed) assert idx.is_non_overlapping_monotonic is True idx = IntervalIndex.from_tuples(tpls[::-1], closed=closed) assert idx.is_non_overlapping_monotonic is True # Should be False in all cases (overlapping) tpls = [(0, 2), (1, 3), (4, 5), (6, 7)] idx = IntervalIndex.from_tuples(tpls, closed=closed) assert idx.is_non_overlapping_monotonic is False idx = IntervalIndex.from_tuples(tpls[::-1], closed=closed) assert idx.is_non_overlapping_monotonic is False # Should be False in all cases (non-monotonic) tpls = [(0, 1), (2, 3), (6, 7), (4, 5)] idx = IntervalIndex.from_tuples(tpls, closed=closed) assert idx.is_non_overlapping_monotonic is False idx = IntervalIndex.from_tuples(tpls[::-1], closed=closed) assert idx.is_non_overlapping_monotonic is False # Should be False for closed='both', otherwise True (GH16560) if closed == "both": idx = IntervalIndex.from_breaks(range(4), closed=closed) assert idx.is_non_overlapping_monotonic is False else: idx = IntervalIndex.from_breaks(range(4), closed=closed) assert idx.is_non_overlapping_monotonic is True @pytest.mark.parametrize( "start, shift, na_value", [ (0, 1, np.nan), (Timestamp("2018-01-01"), Timedelta("1 day"), pd.NaT), (Timedelta("0 days"), Timedelta("1 day"), pd.NaT), ], ) def test_is_overlapping(self, start, shift, na_value, closed): # GH 23309 # see test_interval_tree.py for extensive tests; interface tests here # non-overlapping tuples = [(start + n * shift, start + (n + 1) * shift) for n in (0, 2, 4)] index = IntervalIndex.from_tuples(tuples, closed=closed) assert index.is_overlapping is False # non-overlapping with NA tuples = [(na_value, na_value)] + tuples + [(na_value, na_value)] index = IntervalIndex.from_tuples(tuples, closed=closed) assert index.is_overlapping is False # overlapping tuples = [(start + n * shift, start + (n + 2) * shift) for n in range(3)] index = IntervalIndex.from_tuples(tuples, closed=closed) assert index.is_overlapping is True # overlapping with NA tuples = [(na_value, na_value)] + tuples + [(na_value, na_value)] index = IntervalIndex.from_tuples(tuples, closed=closed) assert index.is_overlapping is True # common endpoints tuples = [(start + n * shift, start + (n + 1) * shift) for n in range(3)] index = IntervalIndex.from_tuples(tuples, closed=closed) result = index.is_overlapping expected = closed == "both" assert result is expected # common endpoints with NA tuples = [(na_value, na_value)] + tuples + [(na_value, na_value)] index = IntervalIndex.from_tuples(tuples, closed=closed) result = index.is_overlapping assert result is expected @pytest.mark.parametrize( "tuples", [ list(zip(range(10), range(1, 11))), list( zip( date_range("20170101", periods=10), date_range("20170101", periods=10), )), list( zip( timedelta_range("0 days", periods=10), timedelta_range("1 day", periods=10), )), ], ) def test_to_tuples(self, tuples): # GH 18756 idx = IntervalIndex.from_tuples(tuples) result = idx.to_tuples() expected = Index(com.asarray_tuplesafe(tuples)) tm.assert_index_equal(result, expected) @pytest.mark.parametrize( "tuples", [ list(zip(range(10), range(1, 11))) + [np.nan], list( zip( date_range("20170101", periods=10), date_range("20170101", periods=10), )) + [np.nan], list( zip( timedelta_range("0 days", periods=10), timedelta_range("1 day", periods=10), )) + [np.nan], ], ) @pytest.mark.parametrize("na_tuple", [True, False]) def test_to_tuples_na(self, tuples, na_tuple): # GH 18756 idx = IntervalIndex.from_tuples(tuples) result = idx.to_tuples(na_tuple=na_tuple) # check the non-NA portion expected_notna = Index(com.asarray_tuplesafe(tuples[:-1])) result_notna = result[:-1] tm.assert_index_equal(result_notna, expected_notna) # check the NA portion result_na = result[-1] if na_tuple: assert isinstance(result_na, tuple) assert len(result_na) == 2 assert all(isna(x) for x in result_na) else: assert isna(result_na) def test_nbytes(self): # GH 19209 left = np.arange(0, 4, dtype="i8") right = np.arange(1, 5, dtype="i8") result = IntervalIndex.from_arrays(left, right).nbytes expected = 64 # 4 * 8 * 2 assert result == expected @pytest.mark.parametrize("new_closed", ["left", "right", "both", "neither"]) def test_set_closed(self, name, closed, new_closed): # GH 21670 index = interval_range(0, 5, closed=closed, name=name) result = index.set_closed(new_closed) expected = interval_range(0, 5, closed=new_closed, name=name) tm.assert_index_equal(result, expected) @pytest.mark.parametrize("bad_closed", ["foo", 10, "LEFT", True, False]) def test_set_closed_errors(self, bad_closed): # GH 21670 index = interval_range(0, 5) msg = f"invalid option for 'closed': {bad_closed}" with pytest.raises(ValueError, match=msg): index.set_closed(bad_closed) def test_is_all_dates(self): # GH 23576 year_2017 = pd.Interval(pd.Timestamp("2017-01-01 00:00:00"), pd.Timestamp("2018-01-01 00:00:00")) year_2017_index = pd.IntervalIndex([year_2017]) assert not year_2017_index.is_all_dates @pytest.mark.parametrize("key", [[5], (2, 3)]) def test_get_value_non_scalar_errors(self, key): # GH 31117 idx = IntervalIndex.from_tuples([(1, 3), (2, 4), (3, 5), (7, 10), (3, 10)]) s = pd.Series(range(len(idx)), index=idx) msg = str(key) with pytest.raises(InvalidIndexError, match=msg): with tm.assert_produces_warning(FutureWarning): idx.get_value(s, key)
def test_unknown_attribute(self): # see gh-9680 tdi = pd.timedelta_range(start=0, periods=10, freq='1s') ts = pd.Series(np.random.normal(size=10), index=tdi) assert 'foo' not in ts.__dict__.keys() pytest.raises(AttributeError, lambda: ts.foo)
class TestCategoricalConstructors: def test_validate_ordered(self): # see gh-14058 exp_msg = "'ordered' must either be 'True' or 'False'" exp_err = TypeError # This should be a boolean. ordered = np.array([0, 1, 2]) with pytest.raises(exp_err, match=exp_msg): Categorical([1, 2, 3], ordered=ordered) with pytest.raises(exp_err, match=exp_msg): Categorical.from_codes([0, 0, 1], categories=["a", "b", "c"], ordered=ordered) def test_constructor_empty(self): # GH 17248 c = Categorical([]) expected = Index([]) tm.assert_index_equal(c.categories, expected) c = Categorical([], categories=[1, 2, 3]) expected = pd.Int64Index([1, 2, 3]) tm.assert_index_equal(c.categories, expected) def test_constructor_empty_boolean(self): # see gh-22702 cat = Categorical([], categories=[True, False]) categories = sorted(cat.categories.tolist()) assert categories == [False, True] def test_constructor_tuples(self): values = np.array([(1, ), (1, 2), (1, ), (1, 2)], dtype=object) result = Categorical(values) expected = Index([(1, ), (1, 2)], tupleize_cols=False) tm.assert_index_equal(result.categories, expected) assert result.ordered is False def test_constructor_tuples_datetimes(self): # numpy will auto reshape when all of the tuples are the # same len, so add an extra one with 2 items and slice it off values = np.array( [ (Timestamp("2010-01-01"), ), (Timestamp("2010-01-02"), ), (Timestamp("2010-01-01"), ), (Timestamp("2010-01-02"), ), ("a", "b"), ], dtype=object, )[:-1] result = Categorical(values) expected = Index( [(Timestamp("2010-01-01"), ), (Timestamp("2010-01-02"), )], tupleize_cols=False, ) tm.assert_index_equal(result.categories, expected) def test_constructor_unsortable(self): # it works! arr = np.array([1, 2, 3, datetime.now()], dtype="O") factor = Categorical(arr, ordered=False) assert not factor.ordered # this however will raise as cannot be sorted msg = ("'values' is not ordered, please explicitly specify the " "categories order by passing in a categories argument.") with pytest.raises(TypeError, match=msg): Categorical(arr, ordered=True) def test_constructor_interval(self): result = Categorical( [Interval(1, 2), Interval(2, 3), Interval(3, 6)], ordered=True) ii = IntervalIndex([Interval(1, 2), Interval(2, 3), Interval(3, 6)]) exp = Categorical(ii, ordered=True) tm.assert_categorical_equal(result, exp) tm.assert_index_equal(result.categories, ii) def test_constructor(self): exp_arr = np.array(["a", "b", "c", "a", "b", "c"], dtype=np.object_) c1 = Categorical(exp_arr) tm.assert_numpy_array_equal(c1.__array__(), exp_arr) c2 = Categorical(exp_arr, categories=["a", "b", "c"]) tm.assert_numpy_array_equal(c2.__array__(), exp_arr) c2 = Categorical(exp_arr, categories=["c", "b", "a"]) tm.assert_numpy_array_equal(c2.__array__(), exp_arr) # categories must be unique msg = "Categorical categories must be unique" with pytest.raises(ValueError, match=msg): Categorical([1, 2], [1, 2, 2]) with pytest.raises(ValueError, match=msg): Categorical(["a", "b"], ["a", "b", "b"]) # The default should be unordered c1 = Categorical(["a", "b", "c", "a"]) assert not c1.ordered # Categorical as input c1 = Categorical(["a", "b", "c", "a"]) c2 = Categorical(c1) tm.assert_categorical_equal(c1, c2) c1 = Categorical(["a", "b", "c", "a"], categories=["a", "b", "c", "d"]) c2 = Categorical(c1) tm.assert_categorical_equal(c1, c2) c1 = Categorical(["a", "b", "c", "a"], categories=["a", "c", "b"]) c2 = Categorical(c1) tm.assert_categorical_equal(c1, c2) c1 = Categorical(["a", "b", "c", "a"], categories=["a", "c", "b"]) c2 = Categorical(c1, categories=["a", "b", "c"]) tm.assert_numpy_array_equal(c1.__array__(), c2.__array__()) tm.assert_index_equal(c2.categories, Index(["a", "b", "c"])) # Series of dtype category c1 = Categorical(["a", "b", "c", "a"], categories=["a", "b", "c", "d"]) c2 = Categorical(Series(c1)) tm.assert_categorical_equal(c1, c2) c1 = Categorical(["a", "b", "c", "a"], categories=["a", "c", "b"]) c2 = Categorical(Series(c1)) tm.assert_categorical_equal(c1, c2) # Series c1 = Categorical(["a", "b", "c", "a"]) c2 = Categorical(Series(["a", "b", "c", "a"])) tm.assert_categorical_equal(c1, c2) c1 = Categorical(["a", "b", "c", "a"], categories=["a", "b", "c", "d"]) c2 = Categorical(Series(["a", "b", "c", "a"]), categories=["a", "b", "c", "d"]) tm.assert_categorical_equal(c1, c2) # This should result in integer categories, not float! cat = Categorical([1, 2, 3, np.nan], categories=[1, 2, 3]) assert is_integer_dtype(cat.categories) # https://github.com/pandas-dev/pandas/issues/3678 cat = Categorical([np.nan, 1, 2, 3]) assert is_integer_dtype(cat.categories) # this should result in floats cat = Categorical([np.nan, 1, 2.0, 3]) assert is_float_dtype(cat.categories) cat = Categorical([np.nan, 1.0, 2.0, 3.0]) assert is_float_dtype(cat.categories) # This doesn't work -> this would probably need some kind of "remember # the original type" feature to try to cast the array interface result # to... # vals = np.asarray(cat[cat.notna()]) # assert is_integer_dtype(vals) # corner cases cat = Categorical([1]) assert len(cat.categories) == 1 assert cat.categories[0] == 1 assert len(cat.codes) == 1 assert cat.codes[0] == 0 cat = Categorical(["a"]) assert len(cat.categories) == 1 assert cat.categories[0] == "a" assert len(cat.codes) == 1 assert cat.codes[0] == 0 # Scalars should be converted to lists cat = Categorical(1) assert len(cat.categories) == 1 assert cat.categories[0] == 1 assert len(cat.codes) == 1 assert cat.codes[0] == 0 # two arrays # - when the first is an integer dtype and the second is not # - when the resulting codes are all -1/NaN with tm.assert_produces_warning(None): c_old = Categorical([0, 1, 2, 0, 1, 2], categories=["a", "b", "c"]) with tm.assert_produces_warning(None): c_old = Categorical([0, 1, 2, 0, 1, 2], categories=[3, 4, 5]) # noqa # the next one are from the old docs with tm.assert_produces_warning(None): c_old2 = Categorical([0, 1, 2, 0, 1, 2], [1, 2, 3]) # noqa cat = Categorical([1, 2], categories=[1, 2, 3]) # this is a legitimate constructor with tm.assert_produces_warning(None): c = Categorical( # noqa np.array([], dtype="int64"), categories=[3, 2, 1], ordered=True) def test_constructor_with_existing_categories(self): # GH25318: constructing with pd.Series used to bogusly skip recoding # categories c0 = Categorical(["a", "b", "c", "a"]) c1 = Categorical(["a", "b", "c", "a"], categories=["b", "c"]) c2 = Categorical(c0, categories=c1.categories) tm.assert_categorical_equal(c1, c2) c3 = Categorical(Series(c0), categories=c1.categories) tm.assert_categorical_equal(c1, c3) def test_constructor_not_sequence(self): # https://github.com/pandas-dev/pandas/issues/16022 msg = r"^Parameter 'categories' must be list-like, was" with pytest.raises(TypeError, match=msg): Categorical(["a", "b"], categories="a") def test_constructor_with_null(self): # Cannot have NaN in categories msg = "Categorical categories cannot be null" with pytest.raises(ValueError, match=msg): Categorical([np.nan, "a", "b", "c"], categories=[np.nan, "a", "b", "c"]) with pytest.raises(ValueError, match=msg): Categorical([None, "a", "b", "c"], categories=[None, "a", "b", "c"]) with pytest.raises(ValueError, match=msg): Categorical( DatetimeIndex(["nat", "20160101"]), categories=[NaT, Timestamp("20160101")], ) def test_constructor_with_index(self): ci = CategoricalIndex(list("aabbca"), categories=list("cab")) tm.assert_categorical_equal(ci.values, Categorical(ci)) ci = CategoricalIndex(list("aabbca"), categories=list("cab")) tm.assert_categorical_equal( ci.values, Categorical(ci.astype(object), categories=ci.categories)) def test_constructor_with_generator(self): # This was raising an Error in isna(single_val).any() because isna # returned a scalar for a generator exp = Categorical([0, 1, 2]) cat = Categorical(x for x in [0, 1, 2]) tm.assert_categorical_equal(cat, exp) cat = Categorical(range(3)) tm.assert_categorical_equal(cat, exp) MultiIndex.from_product([range(5), ["a", "b", "c"]]) # check that categories accept generators and sequences cat = Categorical([0, 1, 2], categories=(x for x in [0, 1, 2])) tm.assert_categorical_equal(cat, exp) cat = Categorical([0, 1, 2], categories=range(3)) tm.assert_categorical_equal(cat, exp) @pytest.mark.parametrize( "dtl", [ date_range("1995-01-01 00:00:00", periods=5, freq="s"), date_range( "1995-01-01 00:00:00", periods=5, freq="s", tz="US/Eastern"), timedelta_range("1 day", periods=5, freq="s"), ], ) def test_constructor_with_datetimelike(self, dtl): # see gh-12077 # constructor with a datetimelike and NaT s = Series(dtl) c = Categorical(s) expected = type(dtl)(s) expected._data.freq = None tm.assert_index_equal(c.categories, expected) tm.assert_numpy_array_equal(c.codes, np.arange(5, dtype="int8")) # with NaT s2 = s.copy() s2.iloc[-1] = NaT c = Categorical(s2) expected = type(dtl)(s2.dropna()) expected._data.freq = None tm.assert_index_equal(c.categories, expected) exp = np.array([0, 1, 2, 3, -1], dtype=np.int8) tm.assert_numpy_array_equal(c.codes, exp) result = repr(c) assert "NaT" in result def test_constructor_from_index_series_datetimetz(self): idx = date_range("2015-01-01 10:00", freq="D", periods=3, tz="US/Eastern") idx = idx._with_freq(None) # freq not preserved in result.categories result = Categorical(idx) tm.assert_index_equal(result.categories, idx) result = Categorical(Series(idx)) tm.assert_index_equal(result.categories, idx) def test_constructor_from_index_series_timedelta(self): idx = timedelta_range("1 days", freq="D", periods=3) idx = idx._with_freq(None) # freq not preserved in result.categories result = Categorical(idx) tm.assert_index_equal(result.categories, idx) result = Categorical(Series(idx)) tm.assert_index_equal(result.categories, idx) def test_constructor_from_index_series_period(self): idx = period_range("2015-01-01", freq="D", periods=3) result = Categorical(idx) tm.assert_index_equal(result.categories, idx) result = Categorical(Series(idx)) tm.assert_index_equal(result.categories, idx) @pytest.mark.parametrize( "values", [ np.array([1.0, 1.2, 1.8, np.nan]), np.array([1, 2, 3], dtype="int64"), ["a", "b", "c", np.nan], [pd.Period("2014-01"), pd.Period("2014-02"), NaT], [Timestamp("2014-01-01"), Timestamp("2014-01-02"), NaT], [ Timestamp("2014-01-01", tz="US/Eastern"), Timestamp("2014-01-02", tz="US/Eastern"), NaT, ], ], ) def test_constructor_invariant(self, values): # GH 14190 c = Categorical(values) c2 = Categorical(c) tm.assert_categorical_equal(c, c2) @pytest.mark.parametrize("ordered", [True, False]) def test_constructor_with_dtype(self, ordered): categories = ["b", "a", "c"] dtype = CategoricalDtype(categories, ordered=ordered) result = Categorical(["a", "b", "a", "c"], dtype=dtype) expected = Categorical(["a", "b", "a", "c"], categories=categories, ordered=ordered) tm.assert_categorical_equal(result, expected) assert result.ordered is ordered def test_constructor_dtype_and_others_raises(self): dtype = CategoricalDtype(["a", "b"], ordered=True) msg = "Cannot specify `categories` or `ordered` together with `dtype`." with pytest.raises(ValueError, match=msg): Categorical(["a", "b"], categories=["a", "b"], dtype=dtype) with pytest.raises(ValueError, match=msg): Categorical(["a", "b"], ordered=True, dtype=dtype) with pytest.raises(ValueError, match=msg): Categorical(["a", "b"], ordered=False, dtype=dtype) @pytest.mark.parametrize("categories", [None, ["a", "b"], ["a", "c"]]) @pytest.mark.parametrize("ordered", [True, False]) def test_constructor_str_category(self, categories, ordered): result = Categorical(["a", "b"], categories=categories, ordered=ordered, dtype="category") expected = Categorical(["a", "b"], categories=categories, ordered=ordered) tm.assert_categorical_equal(result, expected) def test_constructor_str_unknown(self): with pytest.raises(ValueError, match="Unknown dtype"): Categorical([1, 2], dtype="foo") def test_constructor_np_strs(self): # GH#31499 Hastable.map_locations needs to work on np.str_ objects cat = Categorical(["1", "0", "1"], [np.str_("0"), np.str_("1")]) assert all(isinstance(x, np.str_) for x in cat.categories) def test_constructor_from_categorical_with_dtype(self): dtype = CategoricalDtype(["a", "b", "c"], ordered=True) values = Categorical(["a", "b", "d"]) result = Categorical(values, dtype=dtype) # We use dtype.categories, not values.categories expected = Categorical(["a", "b", "d"], categories=["a", "b", "c"], ordered=True) tm.assert_categorical_equal(result, expected) def test_constructor_from_categorical_with_unknown_dtype(self): dtype = CategoricalDtype(None, ordered=True) values = Categorical(["a", "b", "d"]) result = Categorical(values, dtype=dtype) # We use values.categories, not dtype.categories expected = Categorical(["a", "b", "d"], categories=["a", "b", "d"], ordered=True) tm.assert_categorical_equal(result, expected) def test_constructor_from_categorical_string(self): values = Categorical(["a", "b", "d"]) # use categories, ordered result = Categorical(values, categories=["a", "b", "c"], ordered=True, dtype="category") expected = Categorical(["a", "b", "d"], categories=["a", "b", "c"], ordered=True) tm.assert_categorical_equal(result, expected) # No string result = Categorical(values, categories=["a", "b", "c"], ordered=True) tm.assert_categorical_equal(result, expected) def test_constructor_with_categorical_categories(self): # GH17884 expected = Categorical(["a", "b"], categories=["a", "b", "c"]) result = Categorical(["a", "b"], categories=Categorical(["a", "b", "c"])) tm.assert_categorical_equal(result, expected) result = Categorical(["a", "b"], categories=CategoricalIndex(["a", "b", "c"])) tm.assert_categorical_equal(result, expected) @pytest.mark.parametrize("klass", [lambda x: np.array(x, dtype=object), list]) def test_construction_with_null(self, klass, nulls_fixture): # https://github.com/pandas-dev/pandas/issues/31927 values = klass(["a", nulls_fixture, "b"]) result = Categorical(values) dtype = CategoricalDtype(["a", "b"]) codes = [0, -1, 1] expected = Categorical.from_codes(codes=codes, dtype=dtype) tm.assert_categorical_equal(result, expected) def test_from_codes_empty(self): cat = ["a", "b", "c"] result = Categorical.from_codes([], categories=cat) expected = Categorical([], categories=cat) tm.assert_categorical_equal(result, expected) def test_from_codes_too_few_categories(self): dtype = CategoricalDtype(categories=[1, 2]) msg = "codes need to be between " with pytest.raises(ValueError, match=msg): Categorical.from_codes([1, 2], categories=dtype.categories) with pytest.raises(ValueError, match=msg): Categorical.from_codes([1, 2], dtype=dtype) def test_from_codes_non_int_codes(self): dtype = CategoricalDtype(categories=[1, 2]) msg = "codes need to be array-like integers" with pytest.raises(ValueError, match=msg): Categorical.from_codes(["a"], categories=dtype.categories) with pytest.raises(ValueError, match=msg): Categorical.from_codes(["a"], dtype=dtype) def test_from_codes_non_unique_categories(self): with pytest.raises(ValueError, match="Categorical categories must be unique"): Categorical.from_codes([0, 1, 2], categories=["a", "a", "b"]) def test_from_codes_nan_cat_included(self): with pytest.raises(ValueError, match="Categorical categories cannot be null"): Categorical.from_codes([0, 1, 2], categories=["a", "b", np.nan]) def test_from_codes_too_negative(self): dtype = CategoricalDtype(categories=["a", "b", "c"]) msg = r"codes need to be between -1 and len\(categories\)-1" with pytest.raises(ValueError, match=msg): Categorical.from_codes([-2, 1, 2], categories=dtype.categories) with pytest.raises(ValueError, match=msg): Categorical.from_codes([-2, 1, 2], dtype=dtype) def test_from_codes(self): dtype = CategoricalDtype(categories=["a", "b", "c"]) exp = Categorical(["a", "b", "c"], ordered=False) res = Categorical.from_codes([0, 1, 2], categories=dtype.categories) tm.assert_categorical_equal(exp, res) res = Categorical.from_codes([0, 1, 2], dtype=dtype) tm.assert_categorical_equal(exp, res) @pytest.mark.parametrize("klass", [Categorical, CategoricalIndex]) def test_from_codes_with_categorical_categories(self, klass): # GH17884 expected = Categorical(["a", "b"], categories=["a", "b", "c"]) result = Categorical.from_codes([0, 1], categories=klass(["a", "b", "c"])) tm.assert_categorical_equal(result, expected) @pytest.mark.parametrize("klass", [Categorical, CategoricalIndex]) def test_from_codes_with_non_unique_categorical_categories(self, klass): with pytest.raises(ValueError, match="Categorical categories must be unique"): Categorical.from_codes([0, 1], klass(["a", "b", "a"])) def test_from_codes_with_nan_code(self): # GH21767 codes = [1, 2, np.nan] dtype = CategoricalDtype(categories=["a", "b", "c"]) with pytest.raises(ValueError, match="codes need to be array-like integers"): Categorical.from_codes(codes, categories=dtype.categories) with pytest.raises(ValueError, match="codes need to be array-like integers"): Categorical.from_codes(codes, dtype=dtype) @pytest.mark.parametrize("codes", [[1.0, 2.0, 0], [1.1, 2.0, 0]]) def test_from_codes_with_float(self, codes): # GH21767 # float codes should raise even if values are equal to integers dtype = CategoricalDtype(categories=["a", "b", "c"]) msg = "codes need to be array-like integers" with pytest.raises(ValueError, match=msg): Categorical.from_codes(codes, dtype.categories) with pytest.raises(ValueError, match=msg): Categorical.from_codes(codes, dtype=dtype) def test_from_codes_with_dtype_raises(self): msg = "Cannot specify" with pytest.raises(ValueError, match=msg): Categorical.from_codes([0, 1], categories=["a", "b"], dtype=CategoricalDtype(["a", "b"])) with pytest.raises(ValueError, match=msg): Categorical.from_codes([0, 1], ordered=True, dtype=CategoricalDtype(["a", "b"])) def test_from_codes_neither(self): msg = "Both were None" with pytest.raises(ValueError, match=msg): Categorical.from_codes([0, 1]) def test_from_codes_with_nullable_int(self): codes = pd.array([0, 1], dtype="Int64") categories = ["a", "b"] result = Categorical.from_codes(codes, categories=categories) expected = Categorical.from_codes(codes.to_numpy(int), categories=categories) tm.assert_categorical_equal(result, expected) def test_from_codes_with_nullable_int_na_raises(self): codes = pd.array([0, None], dtype="Int64") categories = ["a", "b"] msg = "codes cannot contain NA values" with pytest.raises(ValueError, match=msg): Categorical.from_codes(codes, categories=categories) @pytest.mark.parametrize("dtype", [None, "category"]) def test_from_inferred_categories(self, dtype): cats = ["a", "b"] codes = np.array([0, 0, 1, 1], dtype="i8") result = Categorical._from_inferred_categories(cats, codes, dtype) expected = Categorical.from_codes(codes, cats) tm.assert_categorical_equal(result, expected) @pytest.mark.parametrize("dtype", [None, "category"]) def test_from_inferred_categories_sorts(self, dtype): cats = ["b", "a"] codes = np.array([0, 1, 1, 1], dtype="i8") result = Categorical._from_inferred_categories(cats, codes, dtype) expected = Categorical.from_codes([1, 0, 0, 0], ["a", "b"]) tm.assert_categorical_equal(result, expected) def test_from_inferred_categories_dtype(self): cats = ["a", "b", "d"] codes = np.array([0, 1, 0, 2], dtype="i8") dtype = CategoricalDtype(["c", "b", "a"], ordered=True) result = Categorical._from_inferred_categories(cats, codes, dtype) expected = Categorical(["a", "b", "a", "d"], categories=["c", "b", "a"], ordered=True) tm.assert_categorical_equal(result, expected) def test_from_inferred_categories_coerces(self): cats = ["1", "2", "bad"] codes = np.array([0, 0, 1, 2], dtype="i8") dtype = CategoricalDtype([1, 2]) result = Categorical._from_inferred_categories(cats, codes, dtype) expected = Categorical([1, 1, 2, np.nan]) tm.assert_categorical_equal(result, expected) @pytest.mark.parametrize("ordered", [None, True, False]) def test_construction_with_ordered(self, ordered): # GH 9347, 9190 cat = Categorical([0, 1, 2], ordered=ordered) assert cat.ordered == bool(ordered) @pytest.mark.xfail(reason="Imaginary values not supported in Categorical") def test_constructor_imaginary(self): values = [1, 2, 3 + 1j] c1 = Categorical(values) tm.assert_index_equal(c1.categories, Index(values)) tm.assert_numpy_array_equal(np.array(c1), np.array(values)) def test_constructor_string_and_tuples(self): # GH 21416 c = Categorical( np.array(["c", ("a", "b"), ("b", "a"), "c"], dtype=object)) expected_index = Index([("a", "b"), ("b", "a"), "c"]) assert c.categories.equals(expected_index) def test_interval(self): idx = pd.interval_range(0, 10, periods=10) cat = Categorical(idx, categories=idx) expected_codes = np.arange(10, dtype="int8") tm.assert_numpy_array_equal(cat.codes, expected_codes) tm.assert_index_equal(cat.categories, idx) # infer categories cat = Categorical(idx) tm.assert_numpy_array_equal(cat.codes, expected_codes) tm.assert_index_equal(cat.categories, idx) # list values cat = Categorical(list(idx)) tm.assert_numpy_array_equal(cat.codes, expected_codes) tm.assert_index_equal(cat.categories, idx) # list values, categories cat = Categorical(list(idx), categories=list(idx)) tm.assert_numpy_array_equal(cat.codes, expected_codes) tm.assert_index_equal(cat.categories, idx) # shuffled values = idx.take([1, 2, 0]) cat = Categorical(values, categories=idx) tm.assert_numpy_array_equal(cat.codes, np.array([1, 2, 0], dtype="int8")) tm.assert_index_equal(cat.categories, idx) # extra values = pd.interval_range(8, 11, periods=3) cat = Categorical(values, categories=idx) expected_codes = np.array([8, 9, -1], dtype="int8") tm.assert_numpy_array_equal(cat.codes, expected_codes) tm.assert_index_equal(cat.categories, idx) # overlapping idx = pd.IntervalIndex([pd.Interval(0, 2), pd.Interval(0, 1)]) cat = Categorical(idx, categories=idx) expected_codes = np.array([0, 1], dtype="int8") tm.assert_numpy_array_equal(cat.codes, expected_codes) tm.assert_index_equal(cat.categories, idx)
class TestGetLoc: @pytest.mark.parametrize("side", ["right", "left", "both", "neither"]) def test_get_loc_interval(self, closed, side): idx = IntervalIndex.from_tuples([(0, 1), (2, 3)], closed=closed) for bound in [[0, 1], [1, 2], [2, 3], [3, 4], [0, 2], [2.5, 3], [-1, 4]]: # if get_loc is supplied an interval, it should only search # for exact matches, not overlaps or covers, else KeyError. msg = re.escape( f"Interval({bound[0]}, {bound[1]}, closed='{side}')") if closed == side: if bound == [0, 1]: assert idx.get_loc(Interval(0, 1, closed=side)) == 0 elif bound == [2, 3]: assert idx.get_loc(Interval(2, 3, closed=side)) == 1 else: with pytest.raises(KeyError, match=msg): idx.get_loc(Interval(*bound, closed=side)) else: with pytest.raises(KeyError, match=msg): idx.get_loc(Interval(*bound, closed=side)) @pytest.mark.parametrize("scalar", [-0.5, 0, 0.5, 1, 1.5, 2, 2.5, 3, 3.5]) def test_get_loc_scalar(self, closed, scalar): # correct = {side: {query: answer}}. # If query is not in the dict, that query should raise a KeyError correct = { "right": { 0.5: 0, 1: 0, 2.5: 1, 3: 1 }, "left": { 0: 0, 0.5: 0, 2: 1, 2.5: 1 }, "both": { 0: 0, 0.5: 0, 1: 0, 2: 1, 2.5: 1, 3: 1 }, "neither": { 0.5: 0, 2.5: 1 }, } idx = IntervalIndex.from_tuples([(0, 1), (2, 3)], closed=closed) # if get_loc is supplied a scalar, it should return the index of # the interval which contains the scalar, or KeyError. if scalar in correct[closed].keys(): assert idx.get_loc(scalar) == correct[closed][scalar] else: with pytest.raises(KeyError, match=str(scalar)): idx.get_loc(scalar) @pytest.mark.parametrize("scalar", [-1, 0, 0.5, 3, 4.5, 5, 6]) def test_get_loc_length_one_scalar(self, scalar, closed): # GH 20921 index = IntervalIndex.from_tuples([(0, 5)], closed=closed) if scalar in index[0]: result = index.get_loc(scalar) assert result == 0 else: with pytest.raises(KeyError, match=str(scalar)): index.get_loc(scalar) @pytest.mark.parametrize("other_closed", ["left", "right", "both", "neither"]) @pytest.mark.parametrize("left, right", [(0, 5), (-1, 4), (-1, 6), (6, 7)]) def test_get_loc_length_one_interval(self, left, right, closed, other_closed): # GH 20921 index = IntervalIndex.from_tuples([(0, 5)], closed=closed) interval = Interval(left, right, closed=other_closed) if interval == index[0]: result = index.get_loc(interval) assert result == 0 else: with pytest.raises( KeyError, match=re.escape( f"Interval({left}, {right}, closed='{other_closed}')"), ): index.get_loc(interval) # Make consistent with test_interval_new.py (see #16316, #16386) @pytest.mark.parametrize( "breaks", [ date_range("20180101", periods=4), date_range("20180101", periods=4, tz="US/Eastern"), timedelta_range("0 days", periods=4), ], ids=lambda x: str(x.dtype), ) def test_get_loc_datetimelike_nonoverlapping(self, breaks): # GH 20636 # nonoverlapping = IntervalIndex method and no i8 conversion index = IntervalIndex.from_breaks(breaks) value = index[0].mid result = index.get_loc(value) expected = 0 assert result == expected interval = Interval(index[0].left, index[0].right) result = index.get_loc(interval) expected = 0 assert result == expected @pytest.mark.parametrize( "arrays", [ (date_range("20180101", periods=4), date_range("20180103", periods=4)), ( date_range("20180101", periods=4, tz="US/Eastern"), date_range("20180103", periods=4, tz="US/Eastern"), ), ( timedelta_range("0 days", periods=4), timedelta_range("2 days", periods=4), ), ], ids=lambda x: str(x[0].dtype), ) def test_get_loc_datetimelike_overlapping(self, arrays): # GH 20636 index = IntervalIndex.from_arrays(*arrays) value = index[0].mid + Timedelta("12 hours") result = index.get_loc(value) expected = slice(0, 2, None) assert result == expected interval = Interval(index[0].left, index[0].right) result = index.get_loc(interval) expected = 0 assert result == expected @pytest.mark.parametrize( "values", [ date_range("2018-01-04", periods=4, freq="-1D"), date_range("2018-01-04", periods=4, freq="-1D", tz="US/Eastern"), timedelta_range("3 days", periods=4, freq="-1D"), np.arange(3.0, -1.0, -1.0), np.arange(3, -1, -1), ], ids=lambda x: str(x.dtype), ) def test_get_loc_decreasing(self, values): # GH 25860 index = IntervalIndex.from_arrays(values[1:], values[:-1]) result = index.get_loc(index[0]) expected = 0 assert result == expected @pytest.mark.parametrize("key", [[5], (2, 3)]) def test_get_loc_non_scalar_errors(self, key): # GH 31117 idx = IntervalIndex.from_tuples([(1, 3), (2, 4), (3, 5), (7, 10), (3, 10)]) msg = str(key) with pytest.raises(InvalidIndexError, match=msg): idx.get_loc(key) def test_get_indexer_with_nans(self): # GH#41831 index = IntervalIndex([np.nan, Interval(1, 2), np.nan]) expected = np.array([True, False, True]) for key in [None, np.nan, NA]: assert key in index result = index.get_loc(key) tm.assert_numpy_array_equal(result, expected) for key in [ NaT, np.timedelta64("NaT", "ns"), np.datetime64("NaT", "ns") ]: with pytest.raises(KeyError, match=str(key)): index.get_loc(key)
return 'Samsung' return x df.insert(0, 'inc', range(1, len(df) + 1)) df['lastTimeSeen'] = pd.to_datetime(df['lastTimeSeen'], format='%Y-%m-%d %H:%M:%S') df['firstTimeSeen'] = pd.to_datetime(df['firstTimeSeen'], format='%Y-%m-%d %H:%M:%S') df['stationMacVendor'] = df['stationMacVendor'].map(clean_manufacturer) df['date'] = df['firstTimeSeen'].map( lambda x: datetime.strftime(x, '%d-%m-%Y')) df['time'] = df['firstTimeSeen'].map( lambda x: datetime.strftime(x, '%H:%M:%S')) tbins = pd.timedelta_range(0, periods=25, freq='H') tslots = pd.cut(pd.to_timedelta(df['time']), bins=tbins) dfheatmap = df.replace(0, np.nan).groupby([tslots, pd.cut(df['packets'], 1)]).packets.mean().unstack() df['power'] = df['power'].map(lambda x: '{0:,.0f}'.format(x)) df['packets'] = df['packets'].map(lambda x: '{0:,.0f}'.format(x)) df['packetsAvg'] = df['packetsAvg'].map(lambda x: '{0:,.1f}'.format(x)) df['powerAvg'] = df['powerAvg'].apply(lambda x: '{0:,.1f}'.format(x)) df['packetsTotal'] = df['packetsTotal'].map(lambda x: '{0:,.0f}'.format(x)) df['timeSeen'] = df['timeSeen'].map(lambda x: '{0:,.0f}'.format(x)) #df['timeSeenTotal'] = df['timeSeenTotal'].map(lambda x: '{0:,}'.format(x)) df.set_index('lastTimeSeen', inplace=True)
Timedelta, Timestamp, date_range, period_range, timedelta_range, ) import pandas._testing as tm from pandas.core.arrays import IntervalArray @pytest.fixture( params=[ (Index([0, 2, 4, 4]), Index([1, 3, 5, 8])), (Index([0.0, 1.0, 2.0, np.nan]), Index([1.0, 2.0, 3.0, np.nan])), ( timedelta_range("0 days", periods=3).insert(4, pd.NaT), timedelta_range("1 day", periods=3).insert(4, pd.NaT), ), ( date_range("20170101", periods=3).insert(4, pd.NaT), date_range("20170102", periods=3).insert(4, pd.NaT), ), ( date_range("20170101", periods=3, tz="US/Eastern").insert(4, pd.NaT), date_range("20170102", periods=3, tz="US/Eastern").insert(4, pd.NaT), ), ], ids=lambda x: str(x[0].dtype), )
def test_arg_passthru(): # make sure that we are passing thru kwargs # to our agg functions # GH3668 # GH5724 df = DataFrame( { "group": [1, 1, 2], "int": [1, 2, 3], "float": [4.0, 5.0, 6.0], "string": list("abc"), "category_string": Series(list("abc")).astype("category"), "category_int": [7, 8, 9], "datetime": pd.date_range("20130101", periods=3), "datetimetz": pd.date_range("20130101", periods=3, tz="US/Eastern"), "timedelta": pd.timedelta_range("1 s", periods=3, freq="s"), }, columns=[ "group", "int", "float", "string", "category_string", "category_int", "datetime", "datetimetz", "timedelta", ], ) expected_columns_numeric = Index(["int", "float", "category_int"]) # mean / median expected = DataFrame( { "category_int": [7.5, 9], "float": [4.5, 6.0], "timedelta": [pd.Timedelta("1.5s"), pd.Timedelta("3s")], "int": [1.5, 3], "datetime": [ Timestamp("2013-01-01 12:00:00"), Timestamp("2013-01-03 00:00:00"), ], "datetimetz": [ Timestamp("2013-01-01 12:00:00", tz="US/Eastern"), Timestamp("2013-01-03 00:00:00", tz="US/Eastern"), ], }, index=Index([1, 2], name="group"), columns=[ "int", "float", "category_int", "datetime", "datetimetz", "timedelta" ], ) for attr in ["mean", "median"]: result = getattr(df.groupby("group"), attr)() tm.assert_index_equal(result.columns, expected_columns_numeric) result = getattr(df.groupby("group"), attr)(numeric_only=False) tm.assert_frame_equal(result.reindex_like(expected), expected) # TODO: min, max *should* handle # categorical (ordered) dtype expected_columns = Index([ "int", "float", "string", "category_int", "datetime", "datetimetz", "timedelta", ]) for attr in ["min", "max"]: result = getattr(df.groupby("group"), attr)() tm.assert_index_equal(result.columns, expected_columns) result = getattr(df.groupby("group"), attr)(numeric_only=False) tm.assert_index_equal(result.columns, expected_columns) expected_columns = Index([ "int", "float", "string", "category_string", "category_int", "datetime", "datetimetz", "timedelta", ]) for attr in ["first", "last"]: result = getattr(df.groupby("group"), attr)() tm.assert_index_equal(result.columns, expected_columns) result = getattr(df.groupby("group"), attr)(numeric_only=False) tm.assert_index_equal(result.columns, expected_columns) expected_columns = Index( ["int", "float", "string", "category_int", "timedelta"]) result = df.groupby("group").sum() tm.assert_index_equal(result.columns, expected_columns_numeric) result = df.groupby("group").sum(numeric_only=False) tm.assert_index_equal(result.columns, expected_columns) expected_columns = Index(["int", "float", "category_int"]) for attr in ["prod", "cumprod"]: result = getattr(df.groupby("group"), attr)() tm.assert_index_equal(result.columns, expected_columns_numeric) result = getattr(df.groupby("group"), attr)(numeric_only=False) tm.assert_index_equal(result.columns, expected_columns) # like min, max, but don't include strings expected_columns = Index([ "int", "float", "category_int", "datetime", "datetimetz", "timedelta" ]) for attr in ["cummin", "cummax"]: result = getattr(df.groupby("group"), attr)() # GH 15561: numeric_only=False set by default like min/max tm.assert_index_equal(result.columns, expected_columns) result = getattr(df.groupby("group"), attr)(numeric_only=False) tm.assert_index_equal(result.columns, expected_columns) expected_columns = Index(["int", "float", "category_int", "timedelta"]) result = getattr(df.groupby("group"), "cumsum")() tm.assert_index_equal(result.columns, expected_columns_numeric) result = getattr(df.groupby("group"), "cumsum")(numeric_only=False) tm.assert_index_equal(result.columns, expected_columns)
def test_round(self): t1 = Timedelta('1 days 02:34:56.789123456') t2 = Timedelta('-1 days 02:34:56.789123456') for (freq, s1, s2) in [ ('N', t1, t2), ('U', Timedelta('1 days 02:34:56.789123000'), Timedelta('-1 days 02:34:56.789123000')), ('L', Timedelta('1 days 02:34:56.789000000'), Timedelta('-1 days 02:34:56.789000000')), ('S', Timedelta('1 days 02:34:57'), Timedelta('-1 days 02:34:57')), ('2S', Timedelta('1 days 02:34:56'), Timedelta('-1 days 02:34:56')), ('5S', Timedelta('1 days 02:34:55'), Timedelta('-1 days 02:34:55')), ('T', Timedelta('1 days 02:35:00'), Timedelta('-1 days 02:35:00')), ('12T', Timedelta('1 days 02:36:00'), Timedelta('-1 days 02:36:00')), ('H', Timedelta('1 days 03:00:00'), Timedelta('-1 days 03:00:00')), ('d', Timedelta('1 days'), Timedelta('-1 days')) ]: r1 = t1.round(freq) assert r1 == s1 r2 = t2.round(freq) assert r2 == s2 # invalid for freq in ['Y', 'M', 'foobar']: pytest.raises(ValueError, lambda: t1.round(freq)) t1 = timedelta_range('1 days', periods=3, freq='1 min 2 s 3 us') t2 = -1 * t1 t1a = timedelta_range('1 days', periods=3, freq='1 min 2 s') t1c = pd.TimedeltaIndex([1, 1, 1], unit='D') # note that negative times round DOWN! so don't give whole numbers for (freq, s1, s2) in [ ('N', t1, t2), ('U', t1, t2), ('L', t1a, TimedeltaIndex([ '-1 days +00:00:00', '-2 days +23:58:58', '-2 days +23:57:56' ], dtype='timedelta64[ns]', freq=None)), ('S', t1a, TimedeltaIndex([ '-1 days +00:00:00', '-2 days +23:58:58', '-2 days +23:57:56' ], dtype='timedelta64[ns]', freq=None)), ('12T', t1c, TimedeltaIndex(['-1 days', '-1 days', '-1 days'], dtype='timedelta64[ns]', freq=None)), ('H', t1c, TimedeltaIndex(['-1 days', '-1 days', '-1 days'], dtype='timedelta64[ns]', freq=None)), ('d', t1c, pd.TimedeltaIndex([-1, -1, -1], unit='D')) ]: r1 = t1.round(freq) tm.assert_index_equal(r1, s1) r2 = t2.round(freq) tm.assert_index_equal(r2, s2) # invalid for freq in ['Y', 'M', 'foobar']: pytest.raises(ValueError, lambda: t1.round(freq))
class SharedTests: index_cls: type[DatetimeIndex | PeriodIndex | TimedeltaIndex] @pytest.fixture def arr1d(self): data = np.arange(10, dtype="i8") * 24 * 3600 * 10**9 arr = self.array_cls(data, freq="D") return arr def test_compare_len1_raises(self, arr1d): # make sure we raise when comparing with different lengths, specific # to the case where one has length-1, which numpy would broadcast arr = arr1d idx = self.index_cls(arr) with pytest.raises(ValueError, match="Lengths must match"): arr == arr[:1] # test the index classes while we're at it, GH#23078 with pytest.raises(ValueError, match="Lengths must match"): idx <= idx[[0]] @pytest.mark.parametrize( "result", [ pd.date_range("2020", periods=3), pd.date_range("2020", periods=3, tz="UTC"), pd.timedelta_range("0 days", periods=3), pd.period_range("2020Q1", periods=3, freq="Q"), ], ) def test_compare_with_Categorical(self, result): expected = pd.Categorical(result) assert all(result == expected) assert not any(result != expected) @pytest.mark.parametrize("reverse", [True, False]) @pytest.mark.parametrize("as_index", [True, False]) def test_compare_categorical_dtype(self, arr1d, as_index, reverse, ordered): other = pd.Categorical(arr1d, ordered=ordered) if as_index: other = pd.CategoricalIndex(other) left, right = arr1d, other if reverse: left, right = right, left ones = np.ones(arr1d.shape, dtype=bool) zeros = ~ones result = left == right tm.assert_numpy_array_equal(result, ones) result = left != right tm.assert_numpy_array_equal(result, zeros) if not reverse and not as_index: # Otherwise Categorical raises TypeError bc it is not ordered # TODO: we should probably get the same behavior regardless? result = left < right tm.assert_numpy_array_equal(result, zeros) result = left <= right tm.assert_numpy_array_equal(result, ones) result = left > right tm.assert_numpy_array_equal(result, zeros) result = left >= right tm.assert_numpy_array_equal(result, ones) def test_take(self): data = np.arange(100, dtype="i8") * 24 * 3600 * 10**9 np.random.shuffle(data) freq = None if self.array_cls is not PeriodArray else "D" arr = self.array_cls(data, freq=freq) idx = self.index_cls._simple_new(arr) takers = [1, 4, 94] result = arr.take(takers) expected = idx.take(takers) tm.assert_index_equal(self.index_cls(result), expected) takers = np.array([1, 4, 94]) result = arr.take(takers) expected = idx.take(takers) tm.assert_index_equal(self.index_cls(result), expected) @pytest.mark.parametrize("fill_value", [2, 2.0, Timestamp(2021, 1, 1, 12).time]) def test_take_fill_raises(self, fill_value): data = np.arange(10, dtype="i8") * 24 * 3600 * 10**9 arr = self.array_cls(data, freq="D") msg = f"value should be a '{arr._scalar_type.__name__}' or 'NaT'. Got" with pytest.raises(TypeError, match=msg): arr.take([0, 1], allow_fill=True, fill_value=fill_value) def test_take_fill(self): data = np.arange(10, dtype="i8") * 24 * 3600 * 10**9 arr = self.array_cls(data, freq="D") result = arr.take([-1, 1], allow_fill=True, fill_value=None) assert result[0] is NaT result = arr.take([-1, 1], allow_fill=True, fill_value=np.nan) assert result[0] is NaT result = arr.take([-1, 1], allow_fill=True, fill_value=NaT) assert result[0] is NaT def test_take_fill_str(self, arr1d): # Cast str fill_value matching other fill_value-taking methods result = arr1d.take([-1, 1], allow_fill=True, fill_value=str(arr1d[-1])) expected = arr1d[[-1, 1]] tm.assert_equal(result, expected) msg = f"value should be a '{arr1d._scalar_type.__name__}' or 'NaT'. Got" with pytest.raises(TypeError, match=msg): arr1d.take([-1, 1], allow_fill=True, fill_value="foo") def test_concat_same_type(self, arr1d): arr = arr1d idx = self.index_cls(arr) idx = idx.insert(0, NaT) arr = self.array_cls(idx) result = arr._concat_same_type([arr[:-1], arr[1:], arr]) arr2 = arr.astype(object) expected = self.index_cls(np.concatenate([arr2[:-1], arr2[1:], arr2]), None) tm.assert_index_equal(self.index_cls(result), expected) def test_unbox_scalar(self): data = np.arange(10, dtype="i8") * 24 * 3600 * 10**9 arr = self.array_cls(data, freq="D") result = arr._unbox_scalar(arr[0]) expected = arr._data.dtype.type assert isinstance(result, expected) result = arr._unbox_scalar(NaT) assert isinstance(result, expected) msg = f"'value' should be a {self.scalar_type.__name__}." with pytest.raises(ValueError, match=msg): arr._unbox_scalar("foo") def test_check_compatible_with(self): data = np.arange(10, dtype="i8") * 24 * 3600 * 10**9 arr = self.array_cls(data, freq="D") arr._check_compatible_with(arr[0]) arr._check_compatible_with(arr[:1]) arr._check_compatible_with(NaT) def test_scalar_from_string(self): data = np.arange(10, dtype="i8") * 24 * 3600 * 10**9 arr = self.array_cls(data, freq="D") result = arr._scalar_from_string(str(arr[0])) assert result == arr[0] def test_reduce_invalid(self): data = np.arange(10, dtype="i8") * 24 * 3600 * 10**9 arr = self.array_cls(data, freq="D") msg = f"'{type(arr).__name__}' does not implement reduction 'not a method'" with pytest.raises(TypeError, match=msg): arr._reduce("not a method") @pytest.mark.parametrize("method", ["pad", "backfill"]) def test_fillna_method_doesnt_change_orig(self, method): data = np.arange(10, dtype="i8") * 24 * 3600 * 10**9 arr = self.array_cls(data, freq="D") arr[4] = NaT fill_value = arr[3] if method == "pad" else arr[5] result = arr.fillna(method=method) assert result[4] == fill_value # check that the original was not changed assert arr[4] is NaT def test_searchsorted(self): data = np.arange(10, dtype="i8") * 24 * 3600 * 10**9 arr = self.array_cls(data, freq="D") # scalar result = arr.searchsorted(arr[1]) assert result == 1 result = arr.searchsorted(arr[2], side="right") assert result == 3 # own-type result = arr.searchsorted(arr[1:3]) expected = np.array([1, 2], dtype=np.intp) tm.assert_numpy_array_equal(result, expected) result = arr.searchsorted(arr[1:3], side="right") expected = np.array([2, 3], dtype=np.intp) tm.assert_numpy_array_equal(result, expected) # GH#29884 match numpy convention on whether NaT goes # at the end or the beginning result = arr.searchsorted(NaT) assert result == 10 @pytest.mark.parametrize("box", [None, "index", "series"]) def test_searchsorted_castable_strings(self, arr1d, box, request, string_storage): if isinstance(arr1d, DatetimeArray): tz = arr1d.tz ts1, ts2 = arr1d[1:3] if tz is not None and ts1.tz.tzname(ts1) != ts2.tz.tzname(ts2): # If we have e.g. tzutc(), when we cast to string and parse # back we get pytz.UTC, and then consider them different timezones # so incorrectly raise. mark = pytest.mark.xfail( raises=TypeError, reason="timezone comparisons inconsistent") request.node.add_marker(mark) arr = arr1d if box is None: pass elif box == "index": # Test the equivalent Index.searchsorted method while we're here arr = self.index_cls(arr) else: # Test the equivalent Series.searchsorted method while we're here arr = pd.Series(arr) # scalar result = arr.searchsorted(str(arr[1])) assert result == 1 result = arr.searchsorted(str(arr[2]), side="right") assert result == 3 result = arr.searchsorted([str(x) for x in arr[1:3]]) expected = np.array([1, 2], dtype=np.intp) tm.assert_numpy_array_equal(result, expected) with pytest.raises( TypeError, match=re.escape( f"value should be a '{arr1d._scalar_type.__name__}', 'NaT', " "or array of those. Got 'str' instead."), ): arr.searchsorted("foo") arr_type = "StringArray" if string_storage == "python" else "ArrowStringArray" with pd.option_context("string_storage", string_storage): with pytest.raises( TypeError, match=re.escape( f"value should be a '{arr1d._scalar_type.__name__}', 'NaT', " f"or array of those. Got '{arr_type}' instead."), ): arr.searchsorted([str(arr[1]), "baz"]) def test_getitem_near_implementation_bounds(self): # We only check tz-naive for DTA bc the bounds are slightly different # for other tzs i8vals = np.asarray([NaT.value + n for n in range(1, 5)], dtype="i8") arr = self.array_cls(i8vals, freq="ns") arr[0] # should not raise OutOfBoundsDatetime index = pd.Index(arr) index[0] # should not raise OutOfBoundsDatetime ser = pd.Series(arr) ser[0] # should not raise OutOfBoundsDatetime def test_getitem_2d(self, arr1d): # 2d slicing on a 1D array expected = type(arr1d)(arr1d._data[:, np.newaxis], dtype=arr1d.dtype) result = arr1d[:, np.newaxis] tm.assert_equal(result, expected) # Lookup on a 2D array arr2d = expected expected = type(arr2d)(arr2d._data[:3, 0], dtype=arr2d.dtype) result = arr2d[:3, 0] tm.assert_equal(result, expected) # Scalar lookup result = arr2d[-1, 0] expected = arr1d[-1] assert result == expected def test_iter_2d(self, arr1d): data2d = arr1d._data[:3, np.newaxis] arr2d = type(arr1d)._simple_new(data2d, dtype=arr1d.dtype) result = list(arr2d) assert len(result) == 3 for x in result: assert isinstance(x, type(arr1d)) assert x.ndim == 1 assert x.dtype == arr1d.dtype def test_repr_2d(self, arr1d): data2d = arr1d._data[:3, np.newaxis] arr2d = type(arr1d)._simple_new(data2d, dtype=arr1d.dtype) result = repr(arr2d) if isinstance(arr2d, TimedeltaArray): expected = (f"<{type(arr2d).__name__}>\n" "[\n" f"['{arr1d[0]._repr_base()}'],\n" f"['{arr1d[1]._repr_base()}'],\n" f"['{arr1d[2]._repr_base()}']\n" "]\n" f"Shape: (3, 1), dtype: {arr1d.dtype}") else: expected = (f"<{type(arr2d).__name__}>\n" "[\n" f"['{arr1d[0]}'],\n" f"['{arr1d[1]}'],\n" f"['{arr1d[2]}']\n" "]\n" f"Shape: (3, 1), dtype: {arr1d.dtype}") assert result == expected def test_setitem(self): data = np.arange(10, dtype="i8") * 24 * 3600 * 10**9 arr = self.array_cls(data, freq="D") arr[0] = arr[1] expected = np.arange(10, dtype="i8") * 24 * 3600 * 10**9 expected[0] = expected[1] tm.assert_numpy_array_equal(arr.asi8, expected) arr[:2] = arr[-2:] expected[:2] = expected[-2:] tm.assert_numpy_array_equal(arr.asi8, expected) @pytest.mark.parametrize( "box", [ pd.Index, pd.Series, np.array, list, PandasArray, ], ) def test_setitem_object_dtype(self, box, arr1d): expected = arr1d.copy()[::-1] if expected.dtype.kind in ["m", "M"]: expected = expected._with_freq(None) vals = expected if box is list: vals = list(vals) elif box is np.array: # if we do np.array(x).astype(object) then dt64 and td64 cast to ints vals = np.array(vals.astype(object)) elif box is PandasArray: vals = box(np.asarray(vals, dtype=object)) else: vals = box(vals).astype(object) arr1d[:] = vals tm.assert_equal(arr1d, expected) def test_setitem_strs(self, arr1d, request): # Check that we parse strs in both scalar and listlike if isinstance(arr1d, DatetimeArray): tz = arr1d.tz ts1, ts2 = arr1d[-2:] if tz is not None and ts1.tz.tzname(ts1) != ts2.tz.tzname(ts2): # If we have e.g. tzutc(), when we cast to string and parse # back we get pytz.UTC, and then consider them different timezones # so incorrectly raise. mark = pytest.mark.xfail( raises=TypeError, reason="timezone comparisons inconsistent") request.node.add_marker(mark) # Setting list-like of strs expected = arr1d.copy() expected[[0, 1]] = arr1d[-2:] result = arr1d.copy() result[:2] = [str(x) for x in arr1d[-2:]] tm.assert_equal(result, expected) # Same thing but now for just a scalar str expected = arr1d.copy() expected[0] = arr1d[-1] result = arr1d.copy() result[0] = str(arr1d[-1]) tm.assert_equal(result, expected) @pytest.mark.parametrize("as_index", [True, False]) def test_setitem_categorical(self, arr1d, as_index): expected = arr1d.copy()[::-1] if not isinstance(expected, PeriodArray): expected = expected._with_freq(None) cat = pd.Categorical(arr1d) if as_index: cat = pd.CategoricalIndex(cat) arr1d[:] = cat[::-1] tm.assert_equal(arr1d, expected) def test_setitem_raises(self): data = np.arange(10, dtype="i8") * 24 * 3600 * 10**9 arr = self.array_cls(data, freq="D") val = arr[0] with pytest.raises(IndexError, match="index 12 is out of bounds"): arr[12] = val with pytest.raises(TypeError, match="value should be a.* 'object'"): arr[0] = object() msg = "cannot set using a list-like indexer with a different length" with pytest.raises(ValueError, match=msg): # GH#36339 arr[[]] = [arr[1]] msg = "cannot set using a slice indexer with a different length than" with pytest.raises(ValueError, match=msg): # GH#36339 arr[1:1] = arr[:3] @pytest.mark.parametrize("box", [list, np.array, pd.Index, pd.Series]) def test_setitem_numeric_raises(self, arr1d, box): # We dont case e.g. int64 to our own dtype for setitem msg = (f"value should be a '{arr1d._scalar_type.__name__}', " "'NaT', or array of those. Got") with pytest.raises(TypeError, match=msg): arr1d[:2] = box([0, 1]) with pytest.raises(TypeError, match=msg): arr1d[:2] = box([0.0, 1.0]) def test_inplace_arithmetic(self): # GH#24115 check that iadd and isub are actually in-place data = np.arange(10, dtype="i8") * 24 * 3600 * 10**9 arr = self.array_cls(data, freq="D") expected = arr + pd.Timedelta(days=1) arr += pd.Timedelta(days=1) tm.assert_equal(arr, expected) expected = arr - pd.Timedelta(days=1) arr -= pd.Timedelta(days=1) tm.assert_equal(arr, expected) def test_shift_fill_int_deprecated(self): # GH#31971 data = np.arange(10, dtype="i8") * 24 * 3600 * 10**9 arr = self.array_cls(data, freq="D") msg = "Passing <class 'int'> to shift" with tm.assert_produces_warning(FutureWarning, match=msg): result = arr.shift(1, fill_value=1) expected = arr.copy() if self.array_cls is PeriodArray: fill_val = PeriodArray._scalar_type._from_ordinal(1, freq=arr.freq) else: fill_val = arr._scalar_type(1) expected[0] = fill_val expected[1:] = arr[:-1] tm.assert_equal(result, expected) def test_median(self, arr1d): arr = arr1d if len(arr) % 2 == 0: # make it easier to define `expected` arr = arr[:-1] expected = arr[len(arr) // 2] result = arr.median() assert type(result) is type(expected) assert result == expected arr[len(arr) // 2] = NaT if not isinstance(expected, Period): expected = arr[len(arr) // 2 - 1:len(arr) // 2 + 2].mean() assert arr.median(skipna=False) is NaT result = arr.median() assert type(result) is type(expected) assert result == expected assert arr[:0].median() is NaT assert arr[:0].median(skipna=False) is NaT # 2d Case arr2 = arr.reshape(-1, 1) result = arr2.median(axis=None) assert type(result) is type(expected) assert result == expected assert arr2.median(axis=None, skipna=False) is NaT result = arr2.median(axis=0) expected2 = type(arr)._from_sequence([expected], dtype=arr.dtype) tm.assert_equal(result, expected2) result = arr2.median(axis=0, skipna=False) expected2 = type(arr)._from_sequence([NaT], dtype=arr.dtype) tm.assert_equal(result, expected2) result = arr2.median(axis=1) tm.assert_equal(result, arr) result = arr2.median(axis=1, skipna=False) tm.assert_equal(result, arr) def test_from_integer_array(self): arr = np.array([1, 2, 3], dtype=np.int64) expected = self.array_cls(arr, dtype=self.example_dtype) data = pd.array(arr, dtype="Int64") result = self.array_cls(data, dtype=self.example_dtype) tm.assert_extension_array_equal(result, expected)
""" def test_slice_with_zero_step_raises(index, frame_or_series, indexer_sli): ts = frame_or_series(np.arange(len(index)), index=index) with pytest.raises(ValueError, match="slice step cannot be zero"): indexer_sli(ts)[::0] @pytest.mark.parametrize( "index", [ date_range("2014-01-01", periods=20, freq="MS"), period_range("2014-01", periods=20, freq="M"), timedelta_range("0", periods=20, freq="H"), ], ) def test_slice_with_negative_step(index): keystr1 = str(index[9]) keystr2 = str(index[13]) ser = Series(np.arange(20), index) SLC = IndexSlice for key in [keystr1, index[9]]: tm.assert_indexing_slices_equivalent(ser, SLC[key::-1], SLC[9::-1]) tm.assert_indexing_slices_equivalent(ser, SLC[:key:-1], SLC[:8:-1]) for key2 in [keystr2, index[13]]: tm.assert_indexing_slices_equivalent(ser, SLC[key2:key:-1],
def test_dt_namespace_accessor(self): # GH 7207, 11128 # test .dt namespace accessor ok_for_period = PeriodIndex._datetimelike_ops ok_for_period_methods = ['strftime', 'to_timestamp', 'asfreq'] ok_for_dt = DatetimeIndex._datetimelike_ops ok_for_dt_methods = [ 'to_period', 'to_pydatetime', 'tz_localize', 'tz_convert', 'normalize', 'strftime', 'round', 'floor', 'ceil', 'weekday_name' ] ok_for_td = TimedeltaIndex._datetimelike_ops ok_for_td_methods = [ 'components', 'to_pytimedelta', 'total_seconds', 'round', 'floor', 'ceil' ] def get_expected(s, name): result = getattr(Index(s._values), prop) if isinstance(result, np.ndarray): if is_integer_dtype(result): result = result.astype('int64') elif not is_list_like(result): return result return Series(result, index=s.index, name=s.name) def compare(s, name): a = getattr(s.dt, prop) b = get_expected(s, prop) if not (is_list_like(a) and is_list_like(b)): self.assertEqual(a, b) else: tm.assert_series_equal(a, b) # datetimeindex cases = [ Series(date_range('20130101', periods=5), name='xxx'), Series(date_range('20130101', periods=5, freq='s'), name='xxx'), Series(date_range('20130101 00:00:00', periods=5, freq='ms'), name='xxx') ] for s in cases: for prop in ok_for_dt: # we test freq below if prop != 'freq': compare(s, prop) for prop in ok_for_dt_methods: getattr(s.dt, prop) result = s.dt.to_pydatetime() self.assertIsInstance(result, np.ndarray) self.assertTrue(result.dtype == object) result = s.dt.tz_localize('US/Eastern') exp_values = DatetimeIndex(s.values).tz_localize('US/Eastern') expected = Series(exp_values, index=s.index, name='xxx') tm.assert_series_equal(result, expected) tz_result = result.dt.tz self.assertEqual(str(tz_result), 'US/Eastern') freq_result = s.dt.freq self.assertEqual(freq_result, DatetimeIndex(s.values, freq='infer').freq) # let's localize, then convert result = s.dt.tz_localize('UTC').dt.tz_convert('US/Eastern') exp_values = (DatetimeIndex( s.values).tz_localize('UTC').tz_convert('US/Eastern')) expected = Series(exp_values, index=s.index, name='xxx') tm.assert_series_equal(result, expected) # round s = Series(pd.to_datetime([ '2012-01-01 13:00:00', '2012-01-01 12:01:00', '2012-01-01 08:00:00' ]), name='xxx') result = s.dt.round('D') expected = Series(pd.to_datetime( ['2012-01-02', '2012-01-02', '2012-01-01']), name='xxx') tm.assert_series_equal(result, expected) # round with tz result = ( s.dt.tz_localize('UTC').dt.tz_convert('US/Eastern').dt.round('D')) exp_values = pd.to_datetime(['2012-01-01', '2012-01-01', '2012-01-01']).tz_localize('US/Eastern') expected = Series(exp_values, name='xxx') tm.assert_series_equal(result, expected) # floor s = Series(pd.to_datetime([ '2012-01-01 13:00:00', '2012-01-01 12:01:00', '2012-01-01 08:00:00' ]), name='xxx') result = s.dt.floor('D') expected = Series(pd.to_datetime( ['2012-01-01', '2012-01-01', '2012-01-01']), name='xxx') tm.assert_series_equal(result, expected) # ceil s = Series(pd.to_datetime([ '2012-01-01 13:00:00', '2012-01-01 12:01:00', '2012-01-01 08:00:00' ]), name='xxx') result = s.dt.ceil('D') expected = Series(pd.to_datetime( ['2012-01-02', '2012-01-02', '2012-01-02']), name='xxx') tm.assert_series_equal(result, expected) # datetimeindex with tz s = Series(date_range('20130101', periods=5, tz='US/Eastern'), name='xxx') for prop in ok_for_dt: # we test freq below if prop != 'freq': compare(s, prop) for prop in ok_for_dt_methods: getattr(s.dt, prop) result = s.dt.to_pydatetime() self.assertIsInstance(result, np.ndarray) self.assertTrue(result.dtype == object) result = s.dt.tz_convert('CET') expected = Series(s._values.tz_convert('CET'), index=s.index, name='xxx') tm.assert_series_equal(result, expected) tz_result = result.dt.tz self.assertEqual(str(tz_result), 'CET') freq_result = s.dt.freq self.assertEqual(freq_result, DatetimeIndex(s.values, freq='infer').freq) # timedeltaindex cases = [ Series(timedelta_range('1 day', periods=5), index=list('abcde'), name='xxx'), Series(timedelta_range('1 day 01:23:45', periods=5, freq='s'), name='xxx'), Series(timedelta_range('2 days 01:23:45.012345', periods=5, freq='ms'), name='xxx') ] for s in cases: for prop in ok_for_td: # we test freq below if prop != 'freq': compare(s, prop) for prop in ok_for_td_methods: getattr(s.dt, prop) result = s.dt.components self.assertIsInstance(result, DataFrame) tm.assert_index_equal(result.index, s.index) result = s.dt.to_pytimedelta() self.assertIsInstance(result, np.ndarray) self.assertTrue(result.dtype == object) result = s.dt.total_seconds() self.assertIsInstance(result, pd.Series) self.assertTrue(result.dtype == 'float64') freq_result = s.dt.freq self.assertEqual(freq_result, TimedeltaIndex(s.values, freq='infer').freq) # both index = date_range('20130101', periods=3, freq='D') s = Series(date_range('20140204', periods=3, freq='s'), index=index, name='xxx') exp = Series(np.array([2014, 2014, 2014], dtype='int64'), index=index, name='xxx') tm.assert_series_equal(s.dt.year, exp) exp = Series(np.array([2, 2, 2], dtype='int64'), index=index, name='xxx') tm.assert_series_equal(s.dt.month, exp) exp = Series(np.array([0, 1, 2], dtype='int64'), index=index, name='xxx') tm.assert_series_equal(s.dt.second, exp) exp = pd.Series([s[0]] * 3, index=index, name='xxx') tm.assert_series_equal(s.dt.normalize(), exp) # periodindex cases = [ Series(period_range('20130101', periods=5, freq='D'), name='xxx') ] for s in cases: for prop in ok_for_period: # we test freq below if prop != 'freq': compare(s, prop) for prop in ok_for_period_methods: getattr(s.dt, prop) freq_result = s.dt.freq self.assertEqual(freq_result, PeriodIndex(s.values).freq) # test limited display api def get_dir(s): results = [r for r in s.dt.__dir__() if not r.startswith('_')] return list(sorted(set(results))) s = Series(date_range('20130101', periods=5, freq='D'), name='xxx') results = get_dir(s) tm.assert_almost_equal( results, list(sorted(set(ok_for_dt + ok_for_dt_methods)))) s = Series( period_range('20130101', periods=5, freq='D', name='xxx').asobject) results = get_dir(s) tm.assert_almost_equal( results, list(sorted(set(ok_for_period + ok_for_period_methods)))) # 11295 # ambiguous time error on the conversions s = Series(pd.date_range('2015-01-01', '2016-01-01', freq='T'), name='xxx') s = s.dt.tz_localize('UTC').dt.tz_convert('America/Chicago') results = get_dir(s) tm.assert_almost_equal( results, list(sorted(set(ok_for_dt + ok_for_dt_methods)))) exp_values = pd.date_range('2015-01-01', '2016-01-01', freq='T', tz='UTC').tz_convert('America/Chicago') expected = Series(exp_values, name='xxx') tm.assert_series_equal(s, expected) # no setting allowed s = Series(date_range('20130101', periods=5, freq='D'), name='xxx') with tm.assertRaisesRegexp(ValueError, "modifications"): s.dt.hour = 5 # trying to set a copy with pd.option_context('chained_assignment', 'raise'): def f(): s.dt.hour[0] = 5 self.assertRaises(com.SettingWithCopyError, f)
def test_timedelta_property(attr): s = pd.Series(pd.timedelta_range("2000", periods=4)) s.attrs = {"a": 1} result = getattr(s.dt, attr) assert result.attrs == {"a": 1}
def test_dt_namespace_accessor(self): # GH 7207, 11128 # test .dt namespace accessor ok_for_period = PeriodArray._datetimelike_ops ok_for_period_methods = ["strftime", "to_timestamp", "asfreq"] ok_for_dt = DatetimeIndex._datetimelike_ops ok_for_dt_methods = [ "to_period", "to_pydatetime", "tz_localize", "tz_convert", "normalize", "strftime", "round", "floor", "ceil", "day_name", "month_name", "isocalendar", ] ok_for_td = TimedeltaArray._datetimelike_ops ok_for_td_methods = [ "components", "to_pytimedelta", "total_seconds", "round", "floor", "ceil", ] def get_expected(s, name): result = getattr(Index(s._values), prop) if isinstance(result, np.ndarray): if is_integer_dtype(result): result = result.astype("int64") elif not is_list_like(result) or isinstance(result, DataFrame): return result return Series(result, index=s.index, name=s.name) def compare(s, name): a = getattr(s.dt, prop) b = get_expected(s, prop) if not (is_list_like(a) and is_list_like(b)): assert a == b elif isinstance(a, DataFrame): tm.assert_frame_equal(a, b) else: tm.assert_series_equal(a, b) # datetimeindex cases = [ Series(date_range("20130101", periods=5), name="xxx"), Series(date_range("20130101", periods=5, freq="s"), name="xxx"), Series(date_range("20130101 00:00:00", periods=5, freq="ms"), name="xxx"), ] for s in cases: for prop in ok_for_dt: # we test freq below # we ignore week and weekofyear because they are deprecated if prop not in ["freq", "week", "weekofyear"]: compare(s, prop) for prop in ok_for_dt_methods: getattr(s.dt, prop) result = s.dt.to_pydatetime() assert isinstance(result, np.ndarray) assert result.dtype == object result = s.dt.tz_localize("US/Eastern") exp_values = DatetimeIndex(s.values).tz_localize("US/Eastern") expected = Series(exp_values, index=s.index, name="xxx") tm.assert_series_equal(result, expected) tz_result = result.dt.tz assert str(tz_result) == "US/Eastern" freq_result = s.dt.freq assert freq_result == DatetimeIndex(s.values, freq="infer").freq # let's localize, then convert result = s.dt.tz_localize("UTC").dt.tz_convert("US/Eastern") exp_values = (DatetimeIndex( s.values).tz_localize("UTC").tz_convert("US/Eastern")) expected = Series(exp_values, index=s.index, name="xxx") tm.assert_series_equal(result, expected) # datetimeindex with tz s = Series(date_range("20130101", periods=5, tz="US/Eastern"), name="xxx") for prop in ok_for_dt: # we test freq below # we ignore week and weekofyear because they are deprecated if prop not in ["freq", "week", "weekofyear"]: compare(s, prop) for prop in ok_for_dt_methods: getattr(s.dt, prop) result = s.dt.to_pydatetime() assert isinstance(result, np.ndarray) assert result.dtype == object result = s.dt.tz_convert("CET") expected = Series(s._values.tz_convert("CET"), index=s.index, name="xxx") tm.assert_series_equal(result, expected) tz_result = result.dt.tz assert str(tz_result) == "CET" freq_result = s.dt.freq assert freq_result == DatetimeIndex(s.values, freq="infer").freq # timedelta index cases = [ Series(timedelta_range("1 day", periods=5), index=list("abcde"), name="xxx"), Series(timedelta_range("1 day 01:23:45", periods=5, freq="s"), name="xxx"), Series( timedelta_range("2 days 01:23:45.012345", periods=5, freq="ms"), name="xxx", ), ] for s in cases: for prop in ok_for_td: # we test freq below if prop != "freq": compare(s, prop) for prop in ok_for_td_methods: getattr(s.dt, prop) result = s.dt.components assert isinstance(result, DataFrame) tm.assert_index_equal(result.index, s.index) result = s.dt.to_pytimedelta() assert isinstance(result, np.ndarray) assert result.dtype == object result = s.dt.total_seconds() assert isinstance(result, Series) assert result.dtype == "float64" freq_result = s.dt.freq assert freq_result == TimedeltaIndex(s.values, freq="infer").freq # both index = date_range("20130101", periods=3, freq="D") s = Series(date_range("20140204", periods=3, freq="s"), index=index, name="xxx") exp = Series(np.array([2014, 2014, 2014], dtype="int64"), index=index, name="xxx") tm.assert_series_equal(s.dt.year, exp) exp = Series(np.array([2, 2, 2], dtype="int64"), index=index, name="xxx") tm.assert_series_equal(s.dt.month, exp) exp = Series(np.array([0, 1, 2], dtype="int64"), index=index, name="xxx") tm.assert_series_equal(s.dt.second, exp) exp = Series([s[0]] * 3, index=index, name="xxx") tm.assert_series_equal(s.dt.normalize(), exp) # periodindex cases = [ Series(period_range("20130101", periods=5, freq="D"), name="xxx") ] for s in cases: for prop in ok_for_period: # we test freq below if prop != "freq": compare(s, prop) for prop in ok_for_period_methods: getattr(s.dt, prop) freq_result = s.dt.freq assert freq_result == PeriodIndex(s.values).freq # test limited display api def get_dir(s): results = [r for r in s.dt.__dir__() if not r.startswith("_")] return sorted(set(results)) s = Series(date_range("20130101", periods=5, freq="D"), name="xxx") results = get_dir(s) tm.assert_almost_equal(results, sorted(set(ok_for_dt + ok_for_dt_methods))) s = Series( period_range("20130101", periods=5, freq="D", name="xxx").astype(object)) results = get_dir(s) tm.assert_almost_equal( results, sorted(set(ok_for_period + ok_for_period_methods))) # 11295 # ambiguous time error on the conversions s = Series(date_range("2015-01-01", "2016-01-01", freq="T"), name="xxx") s = s.dt.tz_localize("UTC").dt.tz_convert("America/Chicago") results = get_dir(s) tm.assert_almost_equal(results, sorted(set(ok_for_dt + ok_for_dt_methods))) exp_values = date_range("2015-01-01", "2016-01-01", freq="T", tz="UTC").tz_convert("America/Chicago") # freq not preserved by tz_localize above exp_values = exp_values._with_freq(None) expected = Series(exp_values, name="xxx") tm.assert_series_equal(s, expected) # no setting allowed s = Series(date_range("20130101", periods=5, freq="D"), name="xxx") with pytest.raises(ValueError, match="modifications"): s.dt.hour = 5 # trying to set a copy msg = "modifications to a property of a datetimelike.+not supported" with pd.option_context("chained_assignment", "raise"): with pytest.raises(com.SettingWithCopyError, match=msg): s.dt.hour[0] = 5
Interval, IntervalIndex, Timedelta, Timestamp, date_range, timedelta_range, ) import pandas._testing as tm from pandas.core.arrays import IntervalArray @pytest.fixture( params=[ (Index([0, 2, 4]), Index([1, 3, 5])), (Index([0.0, 1.0, 2.0]), Index([1.0, 2.0, 3.0])), (timedelta_range("0 days", periods=3), timedelta_range("1 day", periods=3)), (date_range("20170101", periods=3), date_range("20170102", periods=3)), ( date_range("20170101", periods=3, tz="US/Eastern"), date_range("20170102", periods=3, tz="US/Eastern"), ), ], ids=lambda x: str(x[0].dtype), ) def left_right_dtypes(request): """ Fixture for building an IntervalArray from various dtypes """ return request.param
def _nonempty_index(idx): typ = type(idx) if typ is pd.RangeIndex: return pd.RangeIndex(2, name=idx.name) elif typ in _numeric_index_types: return typ([1, 2], name=idx.name) elif typ is pd.Index: return pd.Index(["a", "b"], name=idx.name) elif typ is pd.DatetimeIndex: start = "1970-01-01" # Need a non-monotonic decreasing index to avoid issues with # partial string indexing see https://github.com/dask/dask/issues/2389 # and https://github.com/pandas-dev/pandas/issues/16515 # This doesn't mean `_meta_nonempty` should ever rely on # `self.monotonic_increasing` or `self.monotonic_decreasing` try: return pd.date_range(start=start, periods=2, freq=idx.freq, tz=idx.tz, name=idx.name) except ValueError: # older pandas versions data = [start, "1970-01-02"] if idx.freq is None else None return pd.DatetimeIndex(data, start=start, periods=2, freq=idx.freq, tz=idx.tz, name=idx.name) elif typ is pd.PeriodIndex: return pd.period_range(start="1970-01-01", periods=2, freq=idx.freq, name=idx.name) elif typ is pd.TimedeltaIndex: start = np.timedelta64(1, "D") try: return pd.timedelta_range(start=start, periods=2, freq=idx.freq, name=idx.name) except ValueError: # older pandas versions start = np.timedelta64(1, "D") data = [start, start + 1] if idx.freq is None else None return pd.TimedeltaIndex(data, start=start, periods=2, freq=idx.freq, name=idx.name) elif typ is pd.CategoricalIndex: if len(idx.categories) == 0: data = pd.Categorical(_nonempty_index(idx.categories), ordered=idx.ordered) else: data = pd.Categorical.from_codes([-1, 0], categories=idx.categories, ordered=idx.ordered) return pd.CategoricalIndex(data, name=idx.name) elif typ is pd.MultiIndex: levels = [_nonempty_index(l) for l in idx.levels] codes = [[0, 0] for i in idx.levels] try: return pd.MultiIndex(levels=levels, codes=codes, names=idx.names) except TypeError: # older pandas versions return pd.MultiIndex(levels=levels, labels=codes, names=idx.names) raise TypeError("Don't know how to handle index of " "type {0}".format(typename(type(idx))))
def test_arg_passthru(): # make sure that we are passing thru kwargs # to our agg functions # GH3668 # GH5724 df = pd.DataFrame( {'group': [1, 1, 2], 'int': [1, 2, 3], 'float': [4., 5., 6.], 'string': list('abc'), 'category_string': pd.Series(list('abc')).astype('category'), 'category_int': [7, 8, 9], 'datetime': pd.date_range('20130101', periods=3), 'datetimetz': pd.date_range('20130101', periods=3, tz='US/Eastern'), 'timedelta': pd.timedelta_range('1 s', periods=3, freq='s')}, columns=['group', 'int', 'float', 'string', 'category_string', 'category_int', 'datetime', 'datetimetz', 'timedelta']) expected_columns_numeric = Index(['int', 'float', 'category_int']) # mean / median expected = pd.DataFrame( {'category_int': [7.5, 9], 'float': [4.5, 6.], 'timedelta': [pd.Timedelta('1.5s'), pd.Timedelta('3s')], 'int': [1.5, 3], 'datetime': [pd.Timestamp('2013-01-01 12:00:00'), pd.Timestamp('2013-01-03 00:00:00')], 'datetimetz': [ pd.Timestamp('2013-01-01 12:00:00', tz='US/Eastern'), pd.Timestamp('2013-01-03 00:00:00', tz='US/Eastern')]}, index=Index([1, 2], name='group'), columns=['int', 'float', 'category_int', 'datetime', 'datetimetz', 'timedelta']) for attr in ['mean', 'median']: f = getattr(df.groupby('group'), attr) result = f() tm.assert_index_equal(result.columns, expected_columns_numeric) result = f(numeric_only=False) tm.assert_frame_equal(result.reindex_like(expected), expected) # TODO: min, max *should* handle # categorical (ordered) dtype expected_columns = Index(['int', 'float', 'string', 'category_int', 'datetime', 'datetimetz', 'timedelta']) for attr in ['min', 'max']: f = getattr(df.groupby('group'), attr) result = f() tm.assert_index_equal(result.columns, expected_columns) result = f(numeric_only=False) tm.assert_index_equal(result.columns, expected_columns) expected_columns = Index(['int', 'float', 'string', 'category_string', 'category_int', 'datetime', 'datetimetz', 'timedelta']) for attr in ['first', 'last']: f = getattr(df.groupby('group'), attr) result = f() tm.assert_index_equal(result.columns, expected_columns) result = f(numeric_only=False) tm.assert_index_equal(result.columns, expected_columns) expected_columns = Index(['int', 'float', 'string', 'category_int', 'timedelta']) for attr in ['sum']: f = getattr(df.groupby('group'), attr) result = f() tm.assert_index_equal(result.columns, expected_columns_numeric) result = f(numeric_only=False) tm.assert_index_equal(result.columns, expected_columns) expected_columns = Index(['int', 'float', 'category_int']) for attr in ['prod', 'cumprod']: f = getattr(df.groupby('group'), attr) result = f() tm.assert_index_equal(result.columns, expected_columns_numeric) result = f(numeric_only=False) tm.assert_index_equal(result.columns, expected_columns) # like min, max, but don't include strings expected_columns = Index(['int', 'float', 'category_int', 'datetime', 'datetimetz', 'timedelta']) for attr in ['cummin', 'cummax']: f = getattr(df.groupby('group'), attr) result = f() # GH 15561: numeric_only=False set by default like min/max tm.assert_index_equal(result.columns, expected_columns) result = f(numeric_only=False) tm.assert_index_equal(result.columns, expected_columns) expected_columns = Index(['int', 'float', 'category_int', 'timedelta']) for attr in ['cumsum']: f = getattr(df.groupby('group'), attr) result = f() tm.assert_index_equal(result.columns, expected_columns_numeric) result = f(numeric_only=False) tm.assert_index_equal(result.columns, expected_columns)
class TestComparison: @pytest.fixture(params=[operator.eq, operator.ne]) def op(self, request): return request.param @pytest.fixture( params=[ IntervalArray.from_arrays, IntervalIndex.from_arrays, create_categorical_intervals, create_series_intervals, create_series_categorical_intervals, ], ids=[ "IntervalArray", "IntervalIndex", "Categorical[Interval]", "Series[Interval]", "Series[Categorical[Interval]]", ], ) def interval_constructor(self, request): """ Fixture for all pandas native interval constructors. To be used as the LHS of IntervalArray comparisons. """ return request.param def elementwise_comparison(self, op, array, other): """ Helper that performs elementwise comparisons between `array` and `other` """ other = other if is_list_like(other) else [other] * len(array) expected = np.array([op(x, y) for x, y in zip(array, other)]) if isinstance(other, Series): return Series(expected, index=other.index) return expected def test_compare_scalar_interval(self, op, array): # matches first interval other = array[0] result = op(array, other) expected = self.elementwise_comparison(op, array, other) tm.assert_numpy_array_equal(result, expected) # matches on a single endpoint but not both other = Interval(array.left[0], array.right[1]) result = op(array, other) expected = self.elementwise_comparison(op, array, other) tm.assert_numpy_array_equal(result, expected) def test_compare_scalar_interval_mixed_closed(self, op, closed, other_closed): array = IntervalArray.from_arrays(range(2), range(1, 3), closed=closed) other = Interval(0, 1, closed=other_closed) result = op(array, other) expected = self.elementwise_comparison(op, array, other) tm.assert_numpy_array_equal(result, expected) def test_compare_scalar_na(self, op, array, nulls_fixture, request): result = op(array, nulls_fixture) expected = self.elementwise_comparison(op, array, nulls_fixture) if nulls_fixture is pd.NA and array.dtype.subtype != "int64": mark = pytest.mark.xfail( reason="broken for non-integer IntervalArray; see GH 31882") request.node.add_marker(mark) tm.assert_numpy_array_equal(result, expected) @pytest.mark.parametrize( "other", [ 0, 1.0, True, "foo", Timestamp("2017-01-01"), Timestamp("2017-01-01", tz="US/Eastern"), Timedelta("0 days"), Period("2017-01-01", "D"), ], ) def test_compare_scalar_other(self, op, array, other): result = op(array, other) expected = self.elementwise_comparison(op, array, other) tm.assert_numpy_array_equal(result, expected) def test_compare_list_like_interval(self, op, array, interval_constructor): # same endpoints other = interval_constructor(array.left, array.right) result = op(array, other) expected = self.elementwise_comparison(op, array, other) tm.assert_equal(result, expected) # different endpoints other = interval_constructor(array.left[::-1], array.right[::-1]) result = op(array, other) expected = self.elementwise_comparison(op, array, other) tm.assert_equal(result, expected) # all nan endpoints other = interval_constructor([np.nan] * 4, [np.nan] * 4) result = op(array, other) expected = self.elementwise_comparison(op, array, other) tm.assert_equal(result, expected) def test_compare_list_like_interval_mixed_closed(self, op, interval_constructor, closed, other_closed): array = IntervalArray.from_arrays(range(2), range(1, 3), closed=closed) other = interval_constructor(range(2), range(1, 3), closed=other_closed) result = op(array, other) expected = self.elementwise_comparison(op, array, other) tm.assert_equal(result, expected) @pytest.mark.parametrize( "other", [ ( Interval(0, 1), Interval(Timedelta("1 day"), Timedelta("2 days")), Interval(4, 5, "both"), Interval(10, 20, "neither"), ), (0, 1.5, Timestamp("20170103"), np.nan), ( Timestamp("20170102", tz="US/Eastern"), Timedelta("2 days"), "baz", pd.NaT, ), ], ) def test_compare_list_like_object(self, op, array, other): result = op(array, other) expected = self.elementwise_comparison(op, array, other) tm.assert_numpy_array_equal(result, expected) def test_compare_list_like_nan(self, op, array, nulls_fixture, request): other = [nulls_fixture] * 4 result = op(array, other) expected = self.elementwise_comparison(op, array, other) if nulls_fixture is pd.NA and array.dtype.subtype != "i8": reason = "broken for non-integer IntervalArray; see GH 31882" mark = pytest.mark.xfail(reason=reason) request.node.add_marker(mark) tm.assert_numpy_array_equal(result, expected) @pytest.mark.parametrize( "other", [ np.arange(4, dtype="int64"), np.arange(4, dtype="float64"), date_range("2017-01-01", periods=4), date_range("2017-01-01", periods=4, tz="US/Eastern"), timedelta_range("0 days", periods=4), period_range("2017-01-01", periods=4, freq="D"), Categorical(list("abab")), Categorical(date_range("2017-01-01", periods=4)), pd.array(list("abcd")), pd.array(["foo", 3.14, None, object()]), ], ids=lambda x: str(x.dtype), ) def test_compare_list_like_other(self, op, array, other): result = op(array, other) expected = self.elementwise_comparison(op, array, other) tm.assert_numpy_array_equal(result, expected) @pytest.mark.parametrize("length", [1, 3, 5]) @pytest.mark.parametrize("other_constructor", [IntervalArray, list]) def test_compare_length_mismatch_errors(self, op, other_constructor, length): array = IntervalArray.from_arrays(range(4), range(1, 5)) other = other_constructor([Interval(0, 1)] * length) with pytest.raises(ValueError, match="Lengths must match to compare"): op(array, other) @pytest.mark.parametrize( "constructor, expected_type, assert_func", [ (IntervalIndex, np.array, tm.assert_numpy_array_equal), (Series, Series, tm.assert_series_equal), ], ) def test_index_series_compat(self, op, constructor, expected_type, assert_func): # IntervalIndex/Series that rely on IntervalArray for comparisons breaks = range(4) index = constructor(IntervalIndex.from_breaks(breaks)) # scalar comparisons other = index[0] result = op(index, other) expected = expected_type(self.elementwise_comparison(op, index, other)) assert_func(result, expected) other = breaks[0] result = op(index, other) expected = expected_type(self.elementwise_comparison(op, index, other)) assert_func(result, expected) # list-like comparisons other = IntervalArray.from_breaks(breaks) result = op(index, other) expected = expected_type(self.elementwise_comparison(op, index, other)) assert_func(result, expected) other = [index[0], breaks[0], "foo"] result = op(index, other) expected = expected_type(self.elementwise_comparison(op, index, other)) assert_func(result, expected) @pytest.mark.parametrize("scalars", ["a", False, 1, 1.0, None]) def test_comparison_operations(self, scalars): # GH #28981 expected = Series([False, False]) s = Series([Interval(0, 1), Interval(1, 2)], dtype="interval") result = s == scalars tm.assert_series_equal(result, expected)
def test_basic(self, fp, df_full): df = df_full df["datetime_tz"] = pd.date_range("20130101", periods=3, tz="US/Eastern") df["timedelta"] = pd.timedelta_range("1 day", periods=3) check_round_trip(df, fp)
class TestPartialSetting: def test_partial_setting(self): # GH2578, allow ix and friends to partially set # series s_orig = Series([1, 2, 3]) s = s_orig.copy() s[5] = 5 expected = Series([1, 2, 3, 5], index=[0, 1, 2, 5]) tm.assert_series_equal(s, expected) s = s_orig.copy() s.loc[5] = 5 expected = Series([1, 2, 3, 5], index=[0, 1, 2, 5]) tm.assert_series_equal(s, expected) s = s_orig.copy() s[5] = 5.0 expected = Series([1, 2, 3, 5.0], index=[0, 1, 2, 5]) tm.assert_series_equal(s, expected) s = s_orig.copy() s.loc[5] = 5.0 expected = Series([1, 2, 3, 5.0], index=[0, 1, 2, 5]) tm.assert_series_equal(s, expected) # iloc/iat raise s = s_orig.copy() msg = "iloc cannot enlarge its target object" with pytest.raises(IndexError, match=msg): s.iloc[3] = 5.0 msg = "index 3 is out of bounds for axis 0 with size 3" with pytest.raises(IndexError, match=msg): s.iat[3] = 5.0 # ## frame ## df_orig = DataFrame(np.arange(6).reshape(3, 2), columns=["A", "B"], dtype="int64") # iloc/iat raise df = df_orig.copy() msg = "iloc cannot enlarge its target object" with pytest.raises(IndexError, match=msg): df.iloc[4, 2] = 5.0 msg = "index 2 is out of bounds for axis 0 with size 2" with pytest.raises(IndexError, match=msg): df.iat[4, 2] = 5.0 # row setting where it exists expected = DataFrame(dict({"A": [0, 4, 4], "B": [1, 5, 5]})) df = df_orig.copy() df.iloc[1] = df.iloc[2] tm.assert_frame_equal(df, expected) expected = DataFrame(dict({"A": [0, 4, 4], "B": [1, 5, 5]})) df = df_orig.copy() df.loc[1] = df.loc[2] tm.assert_frame_equal(df, expected) # like 2578, partial setting with dtype preservation expected = DataFrame(dict({"A": [0, 2, 4, 4], "B": [1, 3, 5, 5]})) df = df_orig.copy() df.loc[3] = df.loc[2] tm.assert_frame_equal(df, expected) # single dtype frame, overwrite expected = DataFrame(dict({"A": [0, 2, 4], "B": [0, 2, 4]})) df = df_orig.copy() df.loc[:, "B"] = df.loc[:, "A"] tm.assert_frame_equal(df, expected) # mixed dtype frame, overwrite expected = DataFrame(dict({"A": [0, 2, 4], "B": Series([0, 2, 4])})) df = df_orig.copy() df["B"] = df["B"].astype(np.float64) df.loc[:, "B"] = df.loc[:, "A"] tm.assert_frame_equal(df, expected) # single dtype frame, partial setting expected = df_orig.copy() expected["C"] = df["A"] df = df_orig.copy() df.loc[:, "C"] = df.loc[:, "A"] tm.assert_frame_equal(df, expected) # mixed frame, partial setting expected = df_orig.copy() expected["C"] = df["A"] df = df_orig.copy() df.loc[:, "C"] = df.loc[:, "A"] tm.assert_frame_equal(df, expected) def test_partial_setting2(self): # GH 8473 dates = date_range("1/1/2000", periods=8) df_orig = DataFrame(np.random.randn(8, 4), index=dates, columns=["A", "B", "C", "D"]) expected = pd.concat( [df_orig, DataFrame({"A": 7}, index=dates[-1:] + dates.freq)], sort=True) df = df_orig.copy() df.loc[dates[-1] + dates.freq, "A"] = 7 tm.assert_frame_equal(df, expected) df = df_orig.copy() df.at[dates[-1] + dates.freq, "A"] = 7 tm.assert_frame_equal(df, expected) exp_other = DataFrame({0: 7}, index=dates[-1:] + dates.freq) expected = pd.concat([df_orig, exp_other], axis=1) df = df_orig.copy() df.loc[dates[-1] + dates.freq, 0] = 7 tm.assert_frame_equal(df, expected) df = df_orig.copy() df.at[dates[-1] + dates.freq, 0] = 7 tm.assert_frame_equal(df, expected) # TODO(ArrayManager) # df.loc[0] = Series(1, index=range(4)) case creates float columns # instead of object dtype @td.skip_array_manager_not_yet_implemented def test_partial_setting_mixed_dtype(self): # in a mixed dtype environment, try to preserve dtypes # by appending df = DataFrame([[True, 1], [False, 2]], columns=["female", "fitness"]) s = df.loc[1].copy() s.name = 2 expected = df.append(s) df.loc[2] = df.loc[1] tm.assert_frame_equal(df, expected) # columns will align df = DataFrame(columns=["A", "B"]) df.loc[0] = Series(1, index=range(4)) expected = DataFrame(columns=["A", "B"], index=[0], dtype=np.float64) tm.assert_frame_equal(df, expected) # columns will align # TODO: it isn't great that this behavior depends on consolidation df = DataFrame(columns=["A", "B"])._consolidate() df.loc[0] = Series(1, index=["B"]) exp = DataFrame([[np.nan, 1]], columns=["A", "B"], index=[0], dtype="float64") tm.assert_frame_equal(df, exp) # list-like must conform df = DataFrame(columns=["A", "B"]) msg = "cannot set a row with mismatched columns" with pytest.raises(ValueError, match=msg): df.loc[0] = [1, 2, 3] df = DataFrame(columns=["A", "B"]) df.loc[3] = [6, 7] exp = DataFrame([[6, 7]], index=[3], columns=["A", "B"], dtype=np.int64) tm.assert_frame_equal(df, exp) def test_series_partial_set(self): # partial set with new index # Regression from GH4825 ser = Series([0.1, 0.2], index=[1, 2]) # loc equiv to .reindex expected = Series([np.nan, 0.2, np.nan], index=[3, 2, 3]) with pytest.raises(KeyError, match=r"not in index"): ser.loc[[3, 2, 3]] result = ser.reindex([3, 2, 3]) tm.assert_series_equal(result, expected, check_index_type=True) expected = Series([np.nan, 0.2, np.nan, np.nan], index=[3, 2, 3, "x"]) with pytest.raises(KeyError, match="not in index"): ser.loc[[3, 2, 3, "x"]] result = ser.reindex([3, 2, 3, "x"]) tm.assert_series_equal(result, expected, check_index_type=True) expected = Series([0.2, 0.2, 0.1], index=[2, 2, 1]) result = ser.loc[[2, 2, 1]] tm.assert_series_equal(result, expected, check_index_type=True) expected = Series([0.2, 0.2, np.nan, 0.1], index=[2, 2, "x", 1]) with pytest.raises(KeyError, match="not in index"): ser.loc[[2, 2, "x", 1]] result = ser.reindex([2, 2, "x", 1]) tm.assert_series_equal(result, expected, check_index_type=True) # raises as nothing is in the index msg = (r"\"None of \[Int64Index\(\[3, 3, 3\], dtype='int64'\)\] are " r"in the \[index\]\"") with pytest.raises(KeyError, match=msg): ser.loc[[3, 3, 3]] expected = Series([0.2, 0.2, np.nan], index=[2, 2, 3]) with pytest.raises(KeyError, match="not in index"): ser.loc[[2, 2, 3]] result = ser.reindex([2, 2, 3]) tm.assert_series_equal(result, expected, check_index_type=True) s = Series([0.1, 0.2, 0.3], index=[1, 2, 3]) expected = Series([0.3, np.nan, np.nan], index=[3, 4, 4]) with pytest.raises(KeyError, match="not in index"): s.loc[[3, 4, 4]] result = s.reindex([3, 4, 4]) tm.assert_series_equal(result, expected, check_index_type=True) s = Series([0.1, 0.2, 0.3, 0.4], index=[1, 2, 3, 4]) expected = Series([np.nan, 0.3, 0.3], index=[5, 3, 3]) with pytest.raises(KeyError, match="not in index"): s.loc[[5, 3, 3]] result = s.reindex([5, 3, 3]) tm.assert_series_equal(result, expected, check_index_type=True) s = Series([0.1, 0.2, 0.3, 0.4], index=[1, 2, 3, 4]) expected = Series([np.nan, 0.4, 0.4], index=[5, 4, 4]) with pytest.raises(KeyError, match="not in index"): s.loc[[5, 4, 4]] result = s.reindex([5, 4, 4]) tm.assert_series_equal(result, expected, check_index_type=True) s = Series([0.1, 0.2, 0.3, 0.4], index=[4, 5, 6, 7]) expected = Series([0.4, np.nan, np.nan], index=[7, 2, 2]) with pytest.raises(KeyError, match="not in index"): s.loc[[7, 2, 2]] result = s.reindex([7, 2, 2]) tm.assert_series_equal(result, expected, check_index_type=True) s = Series([0.1, 0.2, 0.3, 0.4], index=[1, 2, 3, 4]) expected = Series([0.4, np.nan, np.nan], index=[4, 5, 5]) with pytest.raises(KeyError, match="not in index"): s.loc[[4, 5, 5]] result = s.reindex([4, 5, 5]) tm.assert_series_equal(result, expected, check_index_type=True) # iloc expected = Series([0.2, 0.2, 0.1, 0.1], index=[2, 2, 1, 1]) result = ser.iloc[[1, 1, 0, 0]] tm.assert_series_equal(result, expected, check_index_type=True) def test_series_partial_set_with_name(self): # GH 11497 idx = Index([1, 2], dtype="int64", name="idx") ser = Series([0.1, 0.2], index=idx, name="s") # loc with pytest.raises(KeyError, match=r"\[3\] not in index"): ser.loc[[3, 2, 3]] with pytest.raises(KeyError, match=r"not in index"): ser.loc[[3, 2, 3, "x"]] exp_idx = Index([2, 2, 1], dtype="int64", name="idx") expected = Series([0.2, 0.2, 0.1], index=exp_idx, name="s") result = ser.loc[[2, 2, 1]] tm.assert_series_equal(result, expected, check_index_type=True) with pytest.raises(KeyError, match=r"\['x'\] not in index"): ser.loc[[2, 2, "x", 1]] # raises as nothing is in the index msg = (r"\"None of \[Int64Index\(\[3, 3, 3\], dtype='int64', " r"name='idx'\)\] are in the \[index\]\"") with pytest.raises(KeyError, match=msg): ser.loc[[3, 3, 3]] with pytest.raises(KeyError, match="not in index"): ser.loc[[2, 2, 3]] idx = Index([1, 2, 3], dtype="int64", name="idx") with pytest.raises(KeyError, match="not in index"): Series([0.1, 0.2, 0.3], index=idx, name="s").loc[[3, 4, 4]] idx = Index([1, 2, 3, 4], dtype="int64", name="idx") with pytest.raises(KeyError, match="not in index"): Series([0.1, 0.2, 0.3, 0.4], index=idx, name="s").loc[[5, 3, 3]] idx = Index([1, 2, 3, 4], dtype="int64", name="idx") with pytest.raises(KeyError, match="not in index"): Series([0.1, 0.2, 0.3, 0.4], index=idx, name="s").loc[[5, 4, 4]] idx = Index([4, 5, 6, 7], dtype="int64", name="idx") with pytest.raises(KeyError, match="not in index"): Series([0.1, 0.2, 0.3, 0.4], index=idx, name="s").loc[[7, 2, 2]] idx = Index([1, 2, 3, 4], dtype="int64", name="idx") with pytest.raises(KeyError, match="not in index"): Series([0.1, 0.2, 0.3, 0.4], index=idx, name="s").loc[[4, 5, 5]] # iloc exp_idx = Index([2, 2, 1, 1], dtype="int64", name="idx") expected = Series([0.2, 0.2, 0.1, 0.1], index=exp_idx, name="s") result = ser.iloc[[1, 1, 0, 0]] tm.assert_series_equal(result, expected, check_index_type=True) @pytest.mark.parametrize("key", [100, 100.0]) def test_setitem_with_expansion_numeric_into_datetimeindex(self, key): # GH#4940 inserting non-strings orig = tm.makeTimeDataFrame() df = orig.copy() df.loc[key, :] = df.iloc[0] ex_index = Index(list(orig.index) + [key], dtype=object, name=orig.index.name) ex_data = np.concatenate([orig.values, df.iloc[[0]].values], axis=0) expected = DataFrame(ex_data, index=ex_index, columns=orig.columns) tm.assert_frame_equal(df, expected) def test_partial_set_invalid(self): # GH 4940 # allow only setting of 'valid' values orig = tm.makeTimeDataFrame() # allow object conversion here df = orig.copy() df.loc["a", :] = df.iloc[0] exp = orig.append(Series(df.iloc[0], name="a")) tm.assert_frame_equal(df, exp) tm.assert_index_equal(df.index, Index(orig.index.tolist() + ["a"])) assert df.index.dtype == "object" def test_partial_set_empty_frame(self): # partially set with an empty object # frame df = DataFrame() msg = "cannot set a frame with no defined columns" with pytest.raises(ValueError, match=msg): df.loc[1] = 1 with pytest.raises(ValueError, match=msg): df.loc[1] = Series([1], index=["foo"]) msg = "cannot set a frame with no defined index and a scalar" with pytest.raises(ValueError, match=msg): df.loc[:, 1] = 1 def test_partial_set_empty_frame2(self): # these work as they don't really change # anything but the index # GH5632 expected = DataFrame(columns=["foo"], index=Index([], dtype="object")) df = DataFrame(index=Index([], dtype="object")) df["foo"] = Series([], dtype="object") tm.assert_frame_equal(df, expected) df = DataFrame() df["foo"] = Series(df.index) tm.assert_frame_equal(df, expected) df = DataFrame() df["foo"] = df.index tm.assert_frame_equal(df, expected) def test_partial_set_empty_frame3(self): expected = DataFrame(columns=["foo"], index=Index([], dtype="int64")) expected["foo"] = expected["foo"].astype("float64") df = DataFrame(index=Index([], dtype="int64")) df["foo"] = [] tm.assert_frame_equal(df, expected) df = DataFrame(index=Index([], dtype="int64")) df["foo"] = Series(np.arange(len(df)), dtype="float64") tm.assert_frame_equal(df, expected) def test_partial_set_empty_frame4(self): df = DataFrame(index=Index([], dtype="int64")) df["foo"] = range(len(df)) expected = DataFrame(columns=["foo"], index=Index([], dtype="int64")) # range is int-dtype-like, so we get int64 dtype expected["foo"] = expected["foo"].astype("int64") tm.assert_frame_equal(df, expected) def test_partial_set_empty_frame5(self): df = DataFrame() tm.assert_index_equal(df.columns, Index([], dtype=object)) df2 = DataFrame() df2[1] = Series([1], index=["foo"]) df.loc[:, 1] = Series([1], index=["foo"]) tm.assert_frame_equal(df, DataFrame([[1]], index=["foo"], columns=[1])) tm.assert_frame_equal(df, df2) def test_partial_set_empty_frame_no_index(self): # no index to start expected = DataFrame({0: Series(1, index=range(4))}, columns=["A", "B", 0]) df = DataFrame(columns=["A", "B"]) df[0] = Series(1, index=range(4)) df.dtypes str(df) tm.assert_frame_equal(df, expected) df = DataFrame(columns=["A", "B"]) df.loc[:, 0] = Series(1, index=range(4)) df.dtypes str(df) tm.assert_frame_equal(df, expected) def test_partial_set_empty_frame_row(self): # GH5720, GH5744 # don't create rows when empty expected = DataFrame(columns=["A", "B", "New"], index=Index([], dtype="int64")) expected["A"] = expected["A"].astype("int64") expected["B"] = expected["B"].astype("float64") expected["New"] = expected["New"].astype("float64") df = DataFrame({"A": [1, 2, 3], "B": [1.2, 4.2, 5.2]}) y = df[df.A > 5] y["New"] = np.nan tm.assert_frame_equal(y, expected) # tm.assert_frame_equal(y,expected) expected = DataFrame(columns=["a", "b", "c c", "d"]) expected["d"] = expected["d"].astype("int64") df = DataFrame(columns=["a", "b", "c c"]) df["d"] = 3 tm.assert_frame_equal(df, expected) tm.assert_series_equal(df["c c"], Series(name="c c", dtype=object)) # reindex columns is ok df = DataFrame({"A": [1, 2, 3], "B": [1.2, 4.2, 5.2]}) y = df[df.A > 5] result = y.reindex(columns=["A", "B", "C"]) expected = DataFrame(columns=["A", "B", "C"], index=Index([], dtype="int64")) expected["A"] = expected["A"].astype("int64") expected["B"] = expected["B"].astype("float64") expected["C"] = expected["C"].astype("float64") tm.assert_frame_equal(result, expected) def test_partial_set_empty_frame_set_series(self): # GH 5756 # setting with empty Series df = DataFrame(Series(dtype=object)) expected = DataFrame({0: Series(dtype=object)}) tm.assert_frame_equal(df, expected) df = DataFrame(Series(name="foo", dtype=object)) expected = DataFrame({"foo": Series(dtype=object)}) tm.assert_frame_equal(df, expected) def test_partial_set_empty_frame_empty_copy_assignment(self): # GH 5932 # copy on empty with assignment fails df = DataFrame(index=[0]) df = df.copy() df["a"] = 0 expected = DataFrame(0, index=[0], columns=["a"]) tm.assert_frame_equal(df, expected) def test_partial_set_empty_frame_empty_consistencies(self): # GH 6171 # consistency on empty frames df = DataFrame(columns=["x", "y"]) df["x"] = [1, 2] expected = DataFrame({"x": [1, 2], "y": [np.nan, np.nan]}) tm.assert_frame_equal(df, expected, check_dtype=False) df = DataFrame(columns=["x", "y"]) df["x"] = ["1", "2"] expected = DataFrame({ "x": ["1", "2"], "y": [np.nan, np.nan] }, dtype=object) tm.assert_frame_equal(df, expected) df = DataFrame(columns=["x", "y"]) df.loc[0, "x"] = 1 expected = DataFrame({"x": [1], "y": [np.nan]}) tm.assert_frame_equal(df, expected, check_dtype=False) @pytest.mark.parametrize( "idx,labels,expected_idx", [ ( period_range(start="2000", periods=20, freq="D"), ["2000-01-04", "2000-01-08", "2000-01-12"], [ Period("2000-01-04", freq="D"), Period("2000-01-08", freq="D"), Period("2000-01-12", freq="D"), ], ), ( date_range(start="2000", periods=20, freq="D"), ["2000-01-04", "2000-01-08", "2000-01-12"], [ Timestamp("2000-01-04"), Timestamp("2000-01-08"), Timestamp("2000-01-12"), ], ), ( pd.timedelta_range(start="1 day", periods=20), ["4D", "8D", "12D"], [ pd.Timedelta("4 day"), pd.Timedelta("8 day"), pd.Timedelta("12 day") ], ), ], ) def test_loc_with_list_of_strings_representing_datetimes( self, idx, labels, expected_idx, frame_or_series): # GH 11278 obj = frame_or_series(range(20), index=idx) expected_value = [3, 7, 11] expected = frame_or_series(expected_value, expected_idx) tm.assert_equal(expected, obj.loc[labels]) if frame_or_series is Series: tm.assert_series_equal(expected, obj[labels]) @pytest.mark.parametrize( "idx,labels", [ ( period_range(start="2000", periods=20, freq="D"), ["2000-01-04", "2000-01-30"], ), ( date_range(start="2000", periods=20, freq="D"), ["2000-01-04", "2000-01-30"], ), (pd.timedelta_range(start="1 day", periods=20), ["3 day", "30 day"]), ], ) def test_loc_with_list_of_strings_representing_datetimes_missing_value( self, idx, labels): # GH 11278 s = Series(range(20), index=idx) df = DataFrame(range(20), index=idx) msg = r"not in index" with pytest.raises(KeyError, match=msg): s.loc[labels] with pytest.raises(KeyError, match=msg): s[labels] with pytest.raises(KeyError, match=msg): df.loc[labels] @pytest.mark.parametrize( "idx,labels,msg", [ ( period_range(start="2000", periods=20, freq="D"), ["4D", "8D"], (r"None of \[Index\(\['4D', '8D'\], dtype='object'\)\] " r"are in the \[index\]"), ), ( date_range(start="2000", periods=20, freq="D"), ["4D", "8D"], (r"None of \[Index\(\['4D', '8D'\], dtype='object'\)\] " r"are in the \[index\]"), ), ( pd.timedelta_range(start="1 day", periods=20), ["2000-01-04", "2000-01-08"], (r"None of \[Index\(\['2000-01-04', '2000-01-08'\], " r"dtype='object'\)\] are in the \[index\]"), ), ], ) def test_loc_with_list_of_strings_representing_datetimes_not_matched_type( self, idx, labels, msg): # GH 11278 s = Series(range(20), index=idx) df = DataFrame(range(20), index=idx) with pytest.raises(KeyError, match=msg): s.loc[labels] with pytest.raises(KeyError, match=msg): s[labels] with pytest.raises(KeyError, match=msg): df.loc[labels] def test_index_name_empty(self): # GH 31368 df = DataFrame({}, index=pd.RangeIndex(0, name="df_index")) series = Series(1.23, index=pd.RangeIndex(4, name="series_index")) df["series"] = series expected = DataFrame({"series": [1.23] * 4}, index=pd.RangeIndex(4, name="df_index")) tm.assert_frame_equal(df, expected) # GH 36527 df = DataFrame() series = Series(1.23, index=pd.RangeIndex(4, name="series_index")) df["series"] = series expected = DataFrame({"series": [1.23] * 4}, index=pd.RangeIndex(4, name="series_index")) tm.assert_frame_equal(df, expected) def test_slice_irregular_datetime_index_with_nan(self): # GH36953 index = pd.to_datetime( ["2012-01-01", "2012-01-02", "2012-01-03", None]) df = DataFrame(range(len(index)), index=index) expected = DataFrame(range(len(index[:3])), index=index[:3]) result = df["2012-01-01":"2012-01-04"] tm.assert_frame_equal(result, expected)
def test_count_nonnumeric_types(): # GH12541 cols = [ "int", "float", "string", "datetime", "timedelta", "periods", "fl_inf", "fl_nan", "str_nan", "dt_nat", "periods_nat", ] dt_nat_col = [ Timestamp("20170101"), Timestamp("20170203"), Timestamp(None) ] df = DataFrame( { "int": [1, 2, 3], "float": [4.0, 5.0, 6.0], "string": list("abc"), "datetime": date_range("20170101", periods=3), "timedelta": timedelta_range("1 s", periods=3, freq="s"), "periods": [ Period("2012-01"), Period("2012-02"), Period("2012-03"), ], "fl_inf": [1.0, 2.0, np.Inf], "fl_nan": [1.0, 2.0, np.NaN], "str_nan": ["aa", "bb", np.NaN], "dt_nat": dt_nat_col, "periods_nat": [ Period("2012-01"), Period("2012-02"), Period(None), ], }, columns=cols, ) expected = DataFrame( { "int": [1.0, 2.0, 2.0], "float": [1.0, 2.0, 2.0], "string": [1.0, 2.0, 2.0], "datetime": [1.0, 2.0, 2.0], "timedelta": [1.0, 2.0, 2.0], "periods": [1.0, 2.0, 2.0], "fl_inf": [1.0, 2.0, 2.0], "fl_nan": [1.0, 2.0, 1.0], "str_nan": [1.0, 2.0, 1.0], "dt_nat": [1.0, 2.0, 1.0], "periods_nat": [1.0, 2.0, 1.0], }, columns=cols, ) result = df.rolling(window=2, min_periods=0).count() tm.assert_frame_equal(result, expected) result = df.rolling(1, min_periods=0).count() expected = df.notna().astype(float) tm.assert_frame_equal(result, expected)
class Base: """ Common tests for all variations of IntervalIndex construction. Input data to be supplied in breaks format, then converted by the subclass method get_kwargs_from_breaks to the expected format. """ @pytest.mark.parametrize( "breaks", [ [3, 14, 15, 92, 653], np.arange(10, dtype="int64"), Int64Index(range(-10, 11)), Float64Index(np.arange(20, 30, 0.5)), date_range("20180101", periods=10), date_range("20180101", periods=10, tz="US/Eastern"), timedelta_range("1 day", periods=10), ], ) def test_constructor(self, constructor, breaks, closed, name): result_kwargs = self.get_kwargs_from_breaks(breaks, closed) result = constructor(closed=closed, name=name, **result_kwargs) assert result.closed == closed assert result.name == name assert result.dtype.subtype == getattr(breaks, "dtype", "int64") tm.assert_index_equal(result.left, Index(breaks[:-1])) tm.assert_index_equal(result.right, Index(breaks[1:])) @pytest.mark.parametrize( "breaks, subtype", [ (Int64Index([0, 1, 2, 3, 4]), "float64"), (Int64Index([0, 1, 2, 3, 4]), "datetime64[ns]"), (Int64Index([0, 1, 2, 3, 4]), "timedelta64[ns]"), (Float64Index([0, 1, 2, 3, 4]), "int64"), (date_range("2017-01-01", periods=5), "int64"), (timedelta_range("1 day", periods=5), "int64"), ], ) def test_constructor_dtype(self, constructor, breaks, subtype): # GH 19262: conversion via dtype parameter expected_kwargs = self.get_kwargs_from_breaks(breaks.astype(subtype)) expected = constructor(**expected_kwargs) result_kwargs = self.get_kwargs_from_breaks(breaks) iv_dtype = IntervalDtype(subtype) for dtype in (iv_dtype, str(iv_dtype)): result = constructor(dtype=dtype, **result_kwargs) tm.assert_index_equal(result, expected) @pytest.mark.parametrize("breaks", [[np.nan] * 2, [np.nan] * 4, [np.nan] * 50]) def test_constructor_nan(self, constructor, breaks, closed): # GH 18421 result_kwargs = self.get_kwargs_from_breaks(breaks) result = constructor(closed=closed, **result_kwargs) expected_subtype = np.float64 expected_values = np.array(breaks[:-1], dtype=object) assert result.closed == closed assert result.dtype.subtype == expected_subtype tm.assert_numpy_array_equal(result._ndarray_values, expected_values) @pytest.mark.parametrize( "breaks", [ [], np.array([], dtype="int64"), np.array([], dtype="float64"), np.array([], dtype="datetime64[ns]"), np.array([], dtype="timedelta64[ns]"), ], ) def test_constructor_empty(self, constructor, breaks, closed): # GH 18421 result_kwargs = self.get_kwargs_from_breaks(breaks) result = constructor(closed=closed, **result_kwargs) expected_values = np.array([], dtype=object) expected_subtype = getattr(breaks, "dtype", np.int64) assert result.empty assert result.closed == closed assert result.dtype.subtype == expected_subtype tm.assert_numpy_array_equal(result._ndarray_values, expected_values) @pytest.mark.parametrize( "breaks", [ tuple("0123456789"), list("abcdefghij"), np.array(list("abcdefghij"), dtype=object), np.array(list("abcdefghij"), dtype="<U1"), ], ) def test_constructor_string(self, constructor, breaks): # GH 19016 msg = ("category, object, and string subtypes are not supported " "for IntervalIndex") with pytest.raises(TypeError, match=msg): constructor(**self.get_kwargs_from_breaks(breaks)) @pytest.mark.parametrize("cat_constructor", [Categorical, CategoricalIndex]) def test_constructor_categorical_valid(self, constructor, cat_constructor): # GH 21243/21253 if isinstance(constructor, partial) and constructor.func is Index: # Index is defined to create CategoricalIndex from categorical data pytest.skip() breaks = np.arange(10, dtype="int64") expected = IntervalIndex.from_breaks(breaks) cat_breaks = cat_constructor(breaks) result_kwargs = self.get_kwargs_from_breaks(cat_breaks) result = constructor(**result_kwargs) tm.assert_index_equal(result, expected) def test_generic_errors(self, constructor): # filler input data to be used when supplying invalid kwargs filler = self.get_kwargs_from_breaks(range(10)) # invalid closed msg = "invalid option for 'closed': invalid" with pytest.raises(ValueError, match=msg): constructor(closed="invalid", **filler) # unsupported dtype msg = "dtype must be an IntervalDtype, got int64" with pytest.raises(TypeError, match=msg): constructor(dtype="int64", **filler) # invalid dtype msg = "data type [\"']invalid[\"'] not understood" with pytest.raises(TypeError, match=msg): constructor(dtype="invalid", **filler) # no point in nesting periods in an IntervalIndex periods = period_range("2000-01-01", periods=10) periods_kwargs = self.get_kwargs_from_breaks(periods) msg = "Period dtypes are not supported, use a PeriodIndex instead" with pytest.raises(ValueError, match=msg): constructor(**periods_kwargs) # decreasing values decreasing_kwargs = self.get_kwargs_from_breaks(range(10, -1, -1)) msg = "left side of interval must be <= right side" with pytest.raises(ValueError, match=msg): constructor(**decreasing_kwargs)
def test_dt_accessor_api_for_categorical(self): # https://github.com/pandas-dev/pandas/issues/10661 s_dr = Series(date_range("1/1/2015", periods=5, tz="MET")) c_dr = s_dr.astype("category") s_pr = Series(period_range("1/1/2015", freq="D", periods=5)) c_pr = s_pr.astype("category") s_tdr = Series(timedelta_range("1 days", "10 days")) c_tdr = s_tdr.astype("category") # only testing field (like .day) # and bool (is_month_start) get_ops = lambda x: x._datetimelike_ops test_data = [ ("Datetime", get_ops(DatetimeIndex), s_dr, c_dr), ("Period", get_ops(PeriodArray), s_pr, c_pr), ("Timedelta", get_ops(TimedeltaIndex), s_tdr, c_tdr), ] assert isinstance(c_dr.dt, Properties) special_func_defs = [ ("strftime", ("%Y-%m-%d",), {}), ("tz_convert", ("EST",), {}), ("round", ("D",), {}), ("floor", ("D",), {}), ("ceil", ("D",), {}), ("asfreq", ("D",), {}), # FIXME: don't leave commented-out # ('tz_localize', ("UTC",), {}), ] _special_func_names = [f[0] for f in special_func_defs] # the series is already localized _ignore_names = ["tz_localize", "components"] for name, attr_names, s, c in test_data: func_names = [ f for f in dir(s.dt) if not ( f.startswith("_") or f in attr_names or f in _special_func_names or f in _ignore_names ) ] func_defs = [(f, (), {}) for f in func_names] for f_def in special_func_defs: if f_def[0] in dir(s.dt): func_defs.append(f_def) for func, args, kwargs in func_defs: with warnings.catch_warnings(): if func == "to_period": # dropping TZ warnings.simplefilter("ignore", UserWarning) res = getattr(c.dt, func)(*args, **kwargs) exp = getattr(s.dt, func)(*args, **kwargs) tm.assert_equal(res, exp) for attr in attr_names: if attr in ["week", "weekofyear"]: # GH#33595 Deprecate week and weekofyear continue res = getattr(c.dt, attr) exp = getattr(s.dt, attr) if isinstance(res, DataFrame): tm.assert_frame_equal(res, exp) elif isinstance(res, Series): tm.assert_series_equal(res, exp) else: tm.assert_almost_equal(res, exp) invalid = Series([1, 2, 3]).astype("category") msg = "Can only use .dt accessor with datetimelike" with pytest.raises(AttributeError, match=msg): invalid.dt assert not hasattr(invalid, "str")
) def test_invalid_nat_setitem_array(arr, non_casting_nats): msg = ( "value should be a '(Timestamp|Timedelta|Period)', 'NaT', or array of those. " "Got '(timedelta64|datetime64|int)' instead.") for nat in non_casting_nats: with pytest.raises(TypeError, match=msg): arr[0] = nat @pytest.mark.parametrize( "arr", [ pd.date_range("2000", periods=4).array, pd.timedelta_range("2000", periods=4).array, ], ) def test_to_numpy_extra(arr): arr[0] = NaT original = arr.copy() result = arr.to_numpy() assert np.isnan(result[0]) result = arr.to_numpy(dtype="int64") assert result[0] == -9223372036854775808 result = arr.to_numpy(dtype="int64", na_value=0) assert result[0] == 0
class TestSeriesConstructors(TestData): def test_invalid_dtype(self): # GH15520 msg = 'not understood' invalid_list = [pd.Timestamp, 'pd.Timestamp', list] for dtype in invalid_list: with tm.assert_raises_regex(TypeError, msg): Series([], name='time', dtype=dtype) def test_scalar_conversion(self): # Pass in scalar is disabled scalar = Series(0.5) assert not isinstance(scalar, float) # Coercion assert float(Series([1.])) == 1.0 assert int(Series([1.])) == 1 assert long(Series([1.])) == 1 def test_constructor(self): assert self.ts.index.is_all_dates # Pass in Series derived = Series(self.ts) assert derived.index.is_all_dates assert tm.equalContents(derived.index, self.ts.index) # Ensure new index is not created assert id(self.ts.index) == id(derived.index) # Mixed type Series mixed = Series(['hello', np.NaN], index=[0, 1]) assert mixed.dtype == np.object_ assert mixed[1] is np.NaN assert not self.empty.index.is_all_dates assert not Series({}).index.is_all_dates pytest.raises(Exception, Series, np.random.randn(3, 3), index=np.arange(3)) mixed.name = 'Series' rs = Series(mixed).name xp = 'Series' assert rs == xp # raise on MultiIndex GH4187 m = MultiIndex.from_arrays([[1, 2], [3, 4]]) pytest.raises(NotImplementedError, Series, m) @pytest.mark.parametrize('input_class', [list, dict, OrderedDict]) def test_constructor_empty(self, input_class): empty = Series() empty2 = Series(input_class()) # these are Index() and RangeIndex() which don't compare type equal # but are just .equals assert_series_equal(empty, empty2, check_index_type=False) # With explicit dtype: empty = Series(dtype='float64') empty2 = Series(input_class(), dtype='float64') assert_series_equal(empty, empty2, check_index_type=False) # GH 18515 : with dtype=category: empty = Series(dtype='category') empty2 = Series(input_class(), dtype='category') assert_series_equal(empty, empty2, check_index_type=False) if input_class is not list: # With index: empty = Series(index=lrange(10)) empty2 = Series(input_class(), index=lrange(10)) assert_series_equal(empty, empty2) # With index and dtype float64: empty = Series(np.nan, index=lrange(10)) empty2 = Series(input_class(), index=lrange(10), dtype='float64') assert_series_equal(empty, empty2) # GH 19853 : with empty string, index and dtype str empty = Series('', dtype=str, index=range(3)) empty2 = Series('', index=range(3)) assert_series_equal(empty, empty2) @pytest.mark.parametrize('input_arg', [np.nan, float('nan')]) def test_constructor_nan(self, input_arg): empty = Series(dtype='float64', index=lrange(10)) empty2 = Series(input_arg, index=lrange(10)) assert_series_equal(empty, empty2, check_index_type=False) @pytest.mark.parametrize('dtype', [ 'f8', 'i8', 'M8[ns]', 'm8[ns]', 'category', 'object', 'datetime64[ns, UTC]', ]) @pytest.mark.parametrize('index', [None, pd.Index([])]) def test_constructor_dtype_only(self, dtype, index): # GH-20865 result = pd.Series(dtype=dtype, index=index) assert result.dtype == dtype assert len(result) == 0 def test_constructor_no_data_index_order(self): result = pd.Series(index=['b', 'a', 'c']) assert result.index.tolist() == ['b', 'a', 'c'] def test_constructor_dtype_str_na_values(self, string_dtype): # https://github.com/pandas-dev/pandas/issues/21083 ser = Series(['x', None], dtype=string_dtype) result = ser.isna() expected = Series([False, True]) tm.assert_series_equal(result, expected) assert ser.iloc[1] is None ser = Series(['x', np.nan], dtype=string_dtype) assert np.isnan(ser.iloc[1]) def test_constructor_series(self): index1 = ['d', 'b', 'a', 'c'] index2 = sorted(index1) s1 = Series([4, 7, -5, 3], index=index1) s2 = Series(s1, index=index2) assert_series_equal(s2, s1.sort_index()) def test_constructor_iterator(self): expected = Series(list(range(10)), dtype='int64') result = Series(range(10), dtype='int64') assert_series_equal(result, expected) def test_constructor_list_like(self): # make sure that we are coercing different # list-likes to standard dtypes and not # platform specific expected = Series([1, 2, 3], dtype='int64') for obj in [[1, 2, 3], (1, 2, 3), np.array([1, 2, 3], dtype='int64')]: result = Series(obj, index=[0, 1, 2]) assert_series_equal(result, expected) @pytest.mark.parametrize('input_vals', [ ([1, 2]), (['1', '2']), (list(pd.date_range('1/1/2011', periods=2, freq='H'))), (list(pd.date_range('1/1/2011', periods=2, freq='H', tz='US/Eastern'))), ([pd.Interval(left=0, right=5)]), ]) def test_constructor_list_str(self, input_vals, string_dtype): # GH 16605 # Ensure that data elements from a list are converted to strings # when dtype is str, 'str', or 'U' result = Series(input_vals, dtype=string_dtype) expected = Series(input_vals).astype(string_dtype) assert_series_equal(result, expected) def test_constructor_list_str_na(self, string_dtype): result = Series([1.0, 2.0, np.nan], dtype=string_dtype) expected = Series(['1.0', '2.0', np.nan], dtype=object) assert_series_equal(result, expected) assert np.isnan(result[2]) def test_constructor_generator(self): gen = (i for i in range(10)) result = Series(gen) exp = Series(lrange(10)) assert_series_equal(result, exp) gen = (i for i in range(10)) result = Series(gen, index=lrange(10, 20)) exp.index = lrange(10, 20) assert_series_equal(result, exp) def test_constructor_map(self): # GH8909 m = map(lambda x: x, range(10)) result = Series(m) exp = Series(lrange(10)) assert_series_equal(result, exp) m = map(lambda x: x, range(10)) result = Series(m, index=lrange(10, 20)) exp.index = lrange(10, 20) assert_series_equal(result, exp) def test_constructor_categorical(self): cat = pd.Categorical([0, 1, 2, 0, 1, 2], ['a', 'b', 'c'], fastpath=True) res = Series(cat) tm.assert_categorical_equal(res.values, cat) # GH12574 pytest.raises(ValueError, lambda: Series(pd.Categorical([1, 2, 3]), dtype='int64')) cat = Series(pd.Categorical([1, 2, 3]), dtype='category') assert is_categorical_dtype(cat) assert is_categorical_dtype(cat.dtype) s = Series([1, 2, 3], dtype='category') assert is_categorical_dtype(s) assert is_categorical_dtype(s.dtype) def test_constructor_categorical_with_coercion(self): factor = Categorical(['a', 'b', 'b', 'a', 'a', 'c', 'c', 'c']) # test basic creation / coercion of categoricals s = Series(factor, name='A') assert s.dtype == 'category' assert len(s) == len(factor) str(s.values) str(s) # in a frame df = DataFrame({'A': factor}) result = df['A'] tm.assert_series_equal(result, s) result = df.iloc[:, 0] tm.assert_series_equal(result, s) assert len(df) == len(factor) str(df.values) str(df) df = DataFrame({'A': s}) result = df['A'] tm.assert_series_equal(result, s) assert len(df) == len(factor) str(df.values) str(df) # multiples df = DataFrame({'A': s, 'B': s, 'C': 1}) result1 = df['A'] result2 = df['B'] tm.assert_series_equal(result1, s) tm.assert_series_equal(result2, s, check_names=False) assert result2.name == 'B' assert len(df) == len(factor) str(df.values) str(df) # GH8623 x = DataFrame( [[1, 'John P. Doe'], [2, 'Jane Dove'], [1, 'John P. Doe']], columns=['person_id', 'person_name']) x['person_name'] = Categorical( x.person_name) # doing this breaks transform expected = x.iloc[0].person_name result = x.person_name.iloc[0] assert result == expected result = x.person_name[0] assert result == expected result = x.person_name.loc[0] assert result == expected def test_constructor_categorical_dtype(self): result = pd.Series(['a', 'b'], dtype=CategoricalDtype(['a', 'b', 'c'], ordered=True)) assert is_categorical_dtype(result) is True tm.assert_index_equal(result.cat.categories, pd.Index(['a', 'b', 'c'])) assert result.cat.ordered result = pd.Series(['a', 'b'], dtype=CategoricalDtype(['b', 'a'])) assert is_categorical_dtype(result) tm.assert_index_equal(result.cat.categories, pd.Index(['b', 'a'])) assert result.cat.ordered is False # GH 19565 - Check broadcasting of scalar with Categorical dtype result = Series('a', index=[0, 1], dtype=CategoricalDtype(['a', 'b'], ordered=True)) expected = Series(['a', 'a'], index=[0, 1], dtype=CategoricalDtype(['a', 'b'], ordered=True)) tm.assert_series_equal(result, expected, check_categorical=True) def test_categorical_sideeffects_free(self): # Passing a categorical to a Series and then changing values in either # the series or the categorical should not change the values in the # other one, IF you specify copy! cat = Categorical(["a", "b", "c", "a"]) s = Series(cat, copy=True) assert s.cat is not cat s.cat.categories = [1, 2, 3] exp_s = np.array([1, 2, 3, 1], dtype=np.int64) exp_cat = np.array(["a", "b", "c", "a"], dtype=np.object_) tm.assert_numpy_array_equal(s.__array__(), exp_s) tm.assert_numpy_array_equal(cat.__array__(), exp_cat) # setting s[0] = 2 exp_s2 = np.array([2, 2, 3, 1], dtype=np.int64) tm.assert_numpy_array_equal(s.__array__(), exp_s2) tm.assert_numpy_array_equal(cat.__array__(), exp_cat) # however, copy is False by default # so this WILL change values cat = Categorical(["a", "b", "c", "a"]) s = Series(cat) assert s.values is cat s.cat.categories = [1, 2, 3] exp_s = np.array([1, 2, 3, 1], dtype=np.int64) tm.assert_numpy_array_equal(s.__array__(), exp_s) tm.assert_numpy_array_equal(cat.__array__(), exp_s) s[0] = 2 exp_s2 = np.array([2, 2, 3, 1], dtype=np.int64) tm.assert_numpy_array_equal(s.__array__(), exp_s2) tm.assert_numpy_array_equal(cat.__array__(), exp_s2) def test_unordered_compare_equal(self): left = pd.Series(['a', 'b', 'c'], dtype=CategoricalDtype(['a', 'b'])) right = pd.Series( pd.Categorical(['a', 'b', np.nan], categories=['a', 'b'])) tm.assert_series_equal(left, right) def test_constructor_maskedarray(self): data = ma.masked_all((3, ), dtype=float) result = Series(data) expected = Series([nan, nan, nan]) assert_series_equal(result, expected) data[0] = 0.0 data[2] = 2.0 index = ['a', 'b', 'c'] result = Series(data, index=index) expected = Series([0.0, nan, 2.0], index=index) assert_series_equal(result, expected) data[1] = 1.0 result = Series(data, index=index) expected = Series([0.0, 1.0, 2.0], index=index) assert_series_equal(result, expected) data = ma.masked_all((3, ), dtype=int) result = Series(data) expected = Series([nan, nan, nan], dtype=float) assert_series_equal(result, expected) data[0] = 0 data[2] = 2 index = ['a', 'b', 'c'] result = Series(data, index=index) expected = Series([0, nan, 2], index=index, dtype=float) assert_series_equal(result, expected) data[1] = 1 result = Series(data, index=index) expected = Series([0, 1, 2], index=index, dtype=int) assert_series_equal(result, expected) data = ma.masked_all((3, ), dtype=bool) result = Series(data) expected = Series([nan, nan, nan], dtype=object) assert_series_equal(result, expected) data[0] = True data[2] = False index = ['a', 'b', 'c'] result = Series(data, index=index) expected = Series([True, nan, False], index=index, dtype=object) assert_series_equal(result, expected) data[1] = True result = Series(data, index=index) expected = Series([True, True, False], index=index, dtype=bool) assert_series_equal(result, expected) data = ma.masked_all((3, ), dtype='M8[ns]') result = Series(data) expected = Series([iNaT, iNaT, iNaT], dtype='M8[ns]') assert_series_equal(result, expected) data[0] = datetime(2001, 1, 1) data[2] = datetime(2001, 1, 3) index = ['a', 'b', 'c'] result = Series(data, index=index) expected = Series([datetime(2001, 1, 1), iNaT, datetime(2001, 1, 3)], index=index, dtype='M8[ns]') assert_series_equal(result, expected) data[1] = datetime(2001, 1, 2) result = Series(data, index=index) expected = Series( [datetime(2001, 1, 1), datetime(2001, 1, 2), datetime(2001, 1, 3)], index=index, dtype='M8[ns]') assert_series_equal(result, expected) def test_series_ctor_plus_datetimeindex(self): rng = date_range('20090415', '20090519', freq='B') data = {k: 1 for k in rng} result = Series(data, index=rng) assert result.index is rng def test_constructor_default_index(self): s = Series([0, 1, 2]) tm.assert_index_equal(s.index, pd.Index(np.arange(3))) @pytest.mark.parametrize('input', [[1, 2, 3], (1, 2, 3), list(range(3)), pd.Categorical(['a', 'b', 'a']), (i for i in range(3)), map(lambda x: x, range(3))]) def test_constructor_index_mismatch(self, input): # GH 19342 # test that construction of a Series with an index of different length # raises an error msg = 'Length of passed values is 3, index implies 4' with pytest.raises(ValueError, message=msg): Series(input, index=np.arange(4)) def test_constructor_numpy_scalar(self): # GH 19342 # construction with a numpy scalar # should not raise result = Series(np.array(100), index=np.arange(4), dtype='int64') expected = Series(100, index=np.arange(4), dtype='int64') tm.assert_series_equal(result, expected) def test_constructor_broadcast_list(self): # GH 19342 # construction with single-element container and index # should raise pytest.raises(ValueError, Series, ['foo'], index=['a', 'b', 'c']) def test_constructor_corner(self): df = tm.makeTimeDataFrame() objs = [df, df] s = Series(objs, index=[0, 1]) assert isinstance(s, Series) def test_constructor_sanitize(self): s = Series(np.array([1., 1., 8.]), dtype='i8') assert s.dtype == np.dtype('i8') s = Series(np.array([1., 1., np.nan]), copy=True, dtype='i8') assert s.dtype == np.dtype('f8') def test_constructor_copy(self): # GH15125 # test dtype parameter has no side effects on copy=True for data in [[1.], np.array([1.])]: x = Series(data) y = pd.Series(x, copy=True, dtype=float) # copy=True maintains original data in Series tm.assert_series_equal(x, y) # changes to origin of copy does not affect the copy x[0] = 2. assert not x.equals(y) assert x[0] == 2. assert y[0] == 1. @pytest.mark.parametrize("index", [ pd.date_range('20170101', periods=3, tz='US/Eastern'), pd.date_range('20170101', periods=3), pd.timedelta_range('1 day', periods=3), pd.period_range('2012Q1', periods=3, freq='Q'), pd.Index(list('abc')), pd.Int64Index([1, 2, 3]), pd.RangeIndex(0, 3) ], ids=lambda x: type(x).__name__) def test_constructor_limit_copies(self, index): # GH 17449 # limit copies of input s = pd.Series(index) # we make 1 copy; this is just a smoke test here assert s._data.blocks[0].values is not index def test_constructor_pass_none(self): s = Series(None, index=lrange(5)) assert s.dtype == np.float64 s = Series(None, index=lrange(5), dtype=object) assert s.dtype == np.object_ # GH 7431 # inference on the index s = Series(index=np.array([None])) expected = Series(index=Index([None])) assert_series_equal(s, expected) def test_constructor_pass_nan_nat(self): # GH 13467 exp = Series([np.nan, np.nan], dtype=np.float64) assert exp.dtype == np.float64 tm.assert_series_equal(Series([np.nan, np.nan]), exp) tm.assert_series_equal(Series(np.array([np.nan, np.nan])), exp) exp = Series([pd.NaT, pd.NaT]) assert exp.dtype == 'datetime64[ns]' tm.assert_series_equal(Series([pd.NaT, pd.NaT]), exp) tm.assert_series_equal(Series(np.array([pd.NaT, pd.NaT])), exp) tm.assert_series_equal(Series([pd.NaT, np.nan]), exp) tm.assert_series_equal(Series(np.array([pd.NaT, np.nan])), exp) tm.assert_series_equal(Series([np.nan, pd.NaT]), exp) tm.assert_series_equal(Series(np.array([np.nan, pd.NaT])), exp) def test_constructor_cast(self): pytest.raises(ValueError, Series, ['a', 'b', 'c'], dtype=float) def test_constructor_dtype_nocast(self): # 1572 s = Series([1, 2, 3]) s2 = Series(s, dtype=np.int64) s2[1] = 5 assert s[1] == 5 def test_constructor_datelike_coercion(self): # GH 9477 # incorrectly inferring on dateimelike looking when object dtype is # specified s = Series([Timestamp('20130101'), 'NOV'], dtype=object) assert s.iloc[0] == Timestamp('20130101') assert s.iloc[1] == 'NOV' assert s.dtype == object # the dtype was being reset on the slicing and re-inferred to datetime # even thought the blocks are mixed belly = '216 3T19'.split() wing1 = '2T15 4H19'.split() wing2 = '416 4T20'.split() mat = pd.to_datetime('2016-01-22 2019-09-07'.split()) df = pd.DataFrame({ 'wing1': wing1, 'wing2': wing2, 'mat': mat }, index=belly) result = df.loc['3T19'] assert result.dtype == object result = df.loc['216'] assert result.dtype == object def test_constructor_datetimes_with_nulls(self): # gh-15869 for arr in [ np.array([None, None, None, None, datetime.now(), None]), np.array([None, None, datetime.now(), None]) ]: result = Series(arr) assert result.dtype == 'M8[ns]' def test_constructor_dtype_datetime64(self): s = Series(iNaT, dtype='M8[ns]', index=lrange(5)) assert isna(s).all() # in theory this should be all nulls, but since # we are not specifying a dtype is ambiguous s = Series(iNaT, index=lrange(5)) assert not isna(s).all() s = Series(nan, dtype='M8[ns]', index=lrange(5)) assert isna(s).all() s = Series([datetime(2001, 1, 2, 0, 0), iNaT], dtype='M8[ns]') assert isna(s[1]) assert s.dtype == 'M8[ns]' s = Series([datetime(2001, 1, 2, 0, 0), nan], dtype='M8[ns]') assert isna(s[1]) assert s.dtype == 'M8[ns]' # GH3416 dates = [ np.datetime64(datetime(2013, 1, 1)), np.datetime64(datetime(2013, 1, 2)), np.datetime64(datetime(2013, 1, 3)), ] s = Series(dates) assert s.dtype == 'M8[ns]' s.iloc[0] = np.nan assert s.dtype == 'M8[ns]' # GH3414 related pytest.raises( TypeError, lambda x: Series(Series(dates).astype('int') / 1000000, dtype='M8[ms]')) pytest.raises(TypeError, lambda x: Series(dates, dtype='datetime64')) # invalid dates can be help as object result = Series([datetime(2, 1, 1)]) assert result[0] == datetime(2, 1, 1, 0, 0) result = Series([datetime(3000, 1, 1)]) assert result[0] == datetime(3000, 1, 1, 0, 0) # don't mix types result = Series([Timestamp('20130101'), 1], index=['a', 'b']) assert result['a'] == Timestamp('20130101') assert result['b'] == 1 # GH6529 # coerce datetime64 non-ns properly dates = date_range('01-Jan-2015', '01-Dec-2015', freq='M') values2 = dates.view(np.ndarray).astype('datetime64[ns]') expected = Series(values2, index=dates) for dtype in ['s', 'D', 'ms', 'us', 'ns']: values1 = dates.view(np.ndarray).astype('M8[{0}]'.format(dtype)) result = Series(values1, dates) assert_series_equal(result, expected) # GH 13876 # coerce to non-ns to object properly expected = Series(values2, index=dates, dtype=object) for dtype in ['s', 'D', 'ms', 'us', 'ns']: values1 = dates.view(np.ndarray).astype('M8[{0}]'.format(dtype)) result = Series(values1, index=dates, dtype=object) assert_series_equal(result, expected) # leave datetime.date alone dates2 = np.array([d.date() for d in dates.to_pydatetime()], dtype=object) series1 = Series(dates2, dates) tm.assert_numpy_array_equal(series1.values, dates2) assert series1.dtype == object # these will correctly infer a datetime s = Series([None, pd.NaT, '2013-08-05 15:30:00.000001']) assert s.dtype == 'datetime64[ns]' s = Series([np.nan, pd.NaT, '2013-08-05 15:30:00.000001']) assert s.dtype == 'datetime64[ns]' s = Series([pd.NaT, None, '2013-08-05 15:30:00.000001']) assert s.dtype == 'datetime64[ns]' s = Series([pd.NaT, np.nan, '2013-08-05 15:30:00.000001']) assert s.dtype == 'datetime64[ns]' # tz-aware (UTC and other tz's) # GH 8411 dr = date_range('20130101', periods=3) assert Series(dr).iloc[0].tz is None dr = date_range('20130101', periods=3, tz='UTC') assert str(Series(dr).iloc[0].tz) == 'UTC' dr = date_range('20130101', periods=3, tz='US/Eastern') assert str(Series(dr).iloc[0].tz) == 'US/Eastern' # non-convertible s = Series([1479596223000, -1479590, pd.NaT]) assert s.dtype == 'object' assert s[2] is pd.NaT assert 'NaT' in str(s) # if we passed a NaT it remains s = Series([datetime(2010, 1, 1), datetime(2, 1, 1), pd.NaT]) assert s.dtype == 'object' assert s[2] is pd.NaT assert 'NaT' in str(s) # if we passed a nan it remains s = Series([datetime(2010, 1, 1), datetime(2, 1, 1), np.nan]) assert s.dtype == 'object' assert s[2] is np.nan assert 'NaN' in str(s) def test_constructor_with_datetime_tz(self): # 8260 # support datetime64 with tz dr = date_range('20130101', periods=3, tz='US/Eastern') s = Series(dr) assert s.dtype.name == 'datetime64[ns, US/Eastern]' assert s.dtype == 'datetime64[ns, US/Eastern]' assert is_datetime64tz_dtype(s.dtype) assert 'datetime64[ns, US/Eastern]' in str(s) # export result = s.values assert isinstance(result, np.ndarray) assert result.dtype == 'datetime64[ns]' exp = pd.DatetimeIndex(result) exp = exp.tz_localize('UTC').tz_convert(tz=s.dt.tz) tm.assert_index_equal(dr, exp) # indexing result = s.iloc[0] assert result == Timestamp('2013-01-01 00:00:00-0500', tz='US/Eastern', freq='D') result = s[0] assert result == Timestamp('2013-01-01 00:00:00-0500', tz='US/Eastern', freq='D') result = s[Series([True, True, False], index=s.index)] assert_series_equal(result, s[0:2]) result = s.iloc[0:1] assert_series_equal(result, Series(dr[0:1])) # concat result = pd.concat([s.iloc[0:1], s.iloc[1:]]) assert_series_equal(result, s) # short str assert 'datetime64[ns, US/Eastern]' in str(s) # formatting with NaT result = s.shift() assert 'datetime64[ns, US/Eastern]' in str(result) assert 'NaT' in str(result) # long str t = Series(date_range('20130101', periods=1000, tz='US/Eastern')) assert 'datetime64[ns, US/Eastern]' in str(t) result = pd.DatetimeIndex(s, freq='infer') tm.assert_index_equal(result, dr) # inference s = Series([ pd.Timestamp('2013-01-01 13:00:00-0800', tz='US/Pacific'), pd.Timestamp('2013-01-02 14:00:00-0800', tz='US/Pacific') ]) assert s.dtype == 'datetime64[ns, US/Pacific]' assert lib.infer_dtype(s) == 'datetime64' s = Series([ pd.Timestamp('2013-01-01 13:00:00-0800', tz='US/Pacific'), pd.Timestamp('2013-01-02 14:00:00-0800', tz='US/Eastern') ]) assert s.dtype == 'object' assert lib.infer_dtype(s) == 'datetime' # with all NaT s = Series(pd.NaT, index=[0, 1], dtype='datetime64[ns, US/Eastern]') expected = Series(pd.DatetimeIndex(['NaT', 'NaT'], tz='US/Eastern')) assert_series_equal(s, expected) @pytest.mark.parametrize("arr_dtype", [np.int64, np.float64]) @pytest.mark.parametrize("dtype", ["M8", "m8"]) @pytest.mark.parametrize("unit", ['ns', 'us', 'ms', 's', 'h', 'm', 'D']) def test_construction_to_datetimelike_unit(self, arr_dtype, dtype, unit): # tests all units # gh-19223 dtype = "{}[{}]".format(dtype, unit) arr = np.array([1, 2, 3], dtype=arr_dtype) s = Series(arr) result = s.astype(dtype) expected = Series(arr.astype(dtype)) tm.assert_series_equal(result, expected) @pytest.mark.parametrize('arg', ['2013-01-01 00:00:00', pd.NaT, np.nan, None]) def test_constructor_with_naive_string_and_datetimetz_dtype(self, arg): # GH 17415: With naive string result = Series([arg], dtype='datetime64[ns, CET]') expected = Series(pd.Timestamp(arg)).dt.tz_localize('CET') assert_series_equal(result, expected) def test_construction_interval(self): # construction from interval & array of intervals index = IntervalIndex.from_breaks(np.arange(3), closed='right') result = Series(index) repr(result) str(result) tm.assert_index_equal(Index(result.values), index) result = Series(index.values) tm.assert_index_equal(Index(result.values), index) def test_construction_consistency(self): # make sure that we are not re-localizing upon construction # GH 14928 s = Series(pd.date_range('20130101', periods=3, tz='US/Eastern')) result = Series(s, dtype=s.dtype) tm.assert_series_equal(result, s) result = Series(s.dt.tz_convert('UTC'), dtype=s.dtype) tm.assert_series_equal(result, s) result = Series(s.values, dtype=s.dtype) tm.assert_series_equal(result, s) def test_constructor_periodindex(self): # GH7932 # converting a PeriodIndex when put in a Series pi = period_range('20130101', periods=5, freq='D') s = Series(pi) expected = Series(pi.astype(object)) assert_series_equal(s, expected) assert s.dtype == 'object' def test_constructor_dict(self): d = {'a': 0., 'b': 1., 'c': 2.} result = Series(d, index=['b', 'c', 'd', 'a']) expected = Series([1, 2, nan, 0], index=['b', 'c', 'd', 'a']) assert_series_equal(result, expected) pidx = tm.makePeriodIndex(100) d = {pidx[0]: 0, pidx[1]: 1} result = Series(d, index=pidx) expected = Series(np.nan, pidx) expected.iloc[0] = 0 expected.iloc[1] = 1 assert_series_equal(result, expected) def test_constructor_dict_order(self): # GH19018 # initialization ordering: by insertion order if python>= 3.6, else # order by value d = {'b': 1, 'a': 0, 'c': 2} result = Series(d) if PY36: expected = Series([1, 0, 2], index=list('bac')) else: expected = Series([0, 1, 2], index=list('abc')) tm.assert_series_equal(result, expected) @pytest.mark.parametrize("value", [2, np.nan, None, float('nan')]) def test_constructor_dict_nan_key(self, value): # GH 18480 d = {1: 'a', value: 'b', float('nan'): 'c', 4: 'd'} result = Series(d).sort_values() expected = Series(['a', 'b', 'c', 'd'], index=[1, value, np.nan, 4]) assert_series_equal(result, expected) # MultiIndex: d = {(1, 1): 'a', (2, np.nan): 'b', (3, value): 'c'} result = Series(d).sort_values() expected = Series(['a', 'b', 'c'], index=Index([(1, 1), (2, np.nan), (3, value)])) assert_series_equal(result, expected) def test_constructor_dict_datetime64_index(self): # GH 9456 dates_as_str = ['1984-02-19', '1988-11-06', '1989-12-03', '1990-03-15'] values = [42544017.198965244, 1234565, 40512335.181958228, -1] def create_data(constructor): return dict(zip((constructor(x) for x in dates_as_str), values)) data_datetime64 = create_data(np.datetime64) data_datetime = create_data(lambda x: datetime.strptime(x, '%Y-%m-%d')) data_Timestamp = create_data(Timestamp) expected = Series(values, (Timestamp(x) for x in dates_as_str)) result_datetime64 = Series(data_datetime64) result_datetime = Series(data_datetime) result_Timestamp = Series(data_Timestamp) assert_series_equal(result_datetime64, expected) assert_series_equal(result_datetime, expected) assert_series_equal(result_Timestamp, expected) def test_constructor_list_of_tuples(self): data = [(1, 1), (2, 2), (2, 3)] s = Series(data) assert list(s) == data def test_constructor_tuple_of_tuples(self): data = ((1, 1), (2, 2), (2, 3)) s = Series(data) assert tuple(s) == data def test_constructor_dict_of_tuples(self): data = {(1, 2): 3, (None, 5): 6} result = Series(data).sort_values() expected = Series([3, 6], index=MultiIndex.from_tuples([(1, 2), (None, 5)])) tm.assert_series_equal(result, expected) def test_constructor_set(self): values = set([1, 2, 3, 4, 5]) pytest.raises(TypeError, Series, values) values = frozenset(values) pytest.raises(TypeError, Series, values) def test_fromDict(self): data = {'a': 0, 'b': 1, 'c': 2, 'd': 3} series = Series(data) assert tm.is_sorted(series.index) data = {'a': 0, 'b': '1', 'c': '2', 'd': datetime.now()} series = Series(data) assert series.dtype == np.object_ data = {'a': 0, 'b': '1', 'c': '2', 'd': '3'} series = Series(data) assert series.dtype == np.object_ data = {'a': '0', 'b': '1'} series = Series(data, dtype=float) assert series.dtype == np.float64 def test_fromValue(self): nans = Series(np.NaN, index=self.ts.index) assert nans.dtype == np.float_ assert len(nans) == len(self.ts) strings = Series('foo', index=self.ts.index) assert strings.dtype == np.object_ assert len(strings) == len(self.ts) d = datetime.now() dates = Series(d, index=self.ts.index) assert dates.dtype == 'M8[ns]' assert len(dates) == len(self.ts) # GH12336 # POJO.Test construction of categorical series from value categorical = Series(0, index=self.ts.index, dtype="category") expected = Series(0, index=self.ts.index).astype("category") assert categorical.dtype == 'category' assert len(categorical) == len(self.ts) tm.assert_series_equal(categorical, expected) def test_constructor_dtype_timedelta64(self): # basic td = Series([timedelta(days=i) for i in range(3)]) assert td.dtype == 'timedelta64[ns]' td = Series([timedelta(days=1)]) assert td.dtype == 'timedelta64[ns]' td = Series( [timedelta(days=1), timedelta(days=2), np.timedelta64(1, 's')]) assert td.dtype == 'timedelta64[ns]' # mixed with NaT td = Series([timedelta(days=1), NaT], dtype='m8[ns]') assert td.dtype == 'timedelta64[ns]' td = Series([timedelta(days=1), np.nan], dtype='m8[ns]') assert td.dtype == 'timedelta64[ns]' td = Series([np.timedelta64(300000000), pd.NaT], dtype='m8[ns]') assert td.dtype == 'timedelta64[ns]' # improved inference # GH5689 td = Series([np.timedelta64(300000000), NaT]) assert td.dtype == 'timedelta64[ns]' # because iNaT is int, not coerced to timedelta td = Series([np.timedelta64(300000000), iNaT]) assert td.dtype == 'object' td = Series([np.timedelta64(300000000), np.nan]) assert td.dtype == 'timedelta64[ns]' td = Series([pd.NaT, np.timedelta64(300000000)]) assert td.dtype == 'timedelta64[ns]' td = Series([np.timedelta64(1, 's')]) assert td.dtype == 'timedelta64[ns]' # these are frequency conversion astypes # for t in ['s', 'D', 'us', 'ms']: # pytest.raises(TypeError, td.astype, 'm8[%s]' % t) # valid astype td.astype('int64') # invalid casting pytest.raises(TypeError, td.astype, 'int32') # this is an invalid casting def f(): Series([timedelta(days=1), 'foo'], dtype='m8[ns]') pytest.raises(Exception, f) # leave as object here td = Series([timedelta(days=i) for i in range(3)] + ['foo']) assert td.dtype == 'object' # these will correctly infer a timedelta s = Series([None, pd.NaT, '1 Day']) assert s.dtype == 'timedelta64[ns]' s = Series([np.nan, pd.NaT, '1 Day']) assert s.dtype == 'timedelta64[ns]' s = Series([pd.NaT, None, '1 Day']) assert s.dtype == 'timedelta64[ns]' s = Series([pd.NaT, np.nan, '1 Day']) assert s.dtype == 'timedelta64[ns]' # GH 16406 def test_constructor_mixed_tz(self): s = Series( [Timestamp('20130101'), Timestamp('20130101', tz='US/Eastern')]) expected = Series( [Timestamp('20130101'), Timestamp('20130101', tz='US/Eastern')], dtype='object') assert_series_equal(s, expected) def test_NaT_scalar(self): series = Series([0, 1000, 2000, iNaT], dtype='M8[ns]') val = series[3] assert isna(val) series[2] = val assert isna(series[2]) def test_NaT_cast(self): # GH10747 result = Series([np.nan]).astype('M8[ns]') expected = Series([NaT]) assert_series_equal(result, expected) def test_constructor_name_hashable(self): for n in [777, 777., 'name', datetime(2001, 11, 11), (1, ), u"\u05D0"]: for data in [[1, 2, 3], np.ones(3), {'a': 0, 'b': 1}]: s = Series(data, name=n) assert s.name == n def test_constructor_name_unhashable(self): for n in [['name_list'], np.ones(2), {1: 2}]: for data in [['name_list'], np.ones(2), {1: 2}]: pytest.raises(TypeError, Series, data, name=n) def test_auto_conversion(self): series = Series(list(date_range('1/1/2000', periods=10))) assert series.dtype == 'M8[ns]' def test_convert_non_ns(self): # convert from a numpy array of non-ns timedelta64 arr = np.array([1, 2, 3], dtype='timedelta64[s]') s = Series(arr) expected = Series(pd.timedelta_range('00:00:01', periods=3, freq='s')) assert_series_equal(s, expected) # convert from a numpy array of non-ns datetime64 # note that creating a numpy datetime64 is in LOCAL time!!!! # seems to work for M8[D], but not for M8[s] s = Series( np.array(['2013-01-01', '2013-01-02', '2013-01-03'], dtype='datetime64[D]')) assert_series_equal( s, Series(date_range('20130101', periods=3, freq='D'))) # s = Series(np.array(['2013-01-01 00:00:01','2013-01-01 # 00:00:02','2013-01-01 00:00:03'],dtype='datetime64[s]')) # assert_series_equal(s,date_range('20130101 # 00:00:01',period=3,freq='s')) @pytest.mark.parametrize("index", [ date_range('1/1/2000', periods=10), timedelta_range('1 day', periods=10), period_range('2000-Q1', periods=10, freq='Q') ], ids=lambda x: type(x).__name__) def test_constructor_cant_cast_datetimelike(self, index): # floats are not ok msg = "Cannot cast {} to ".format(type(index).__name__) with tm.assert_raises_regex(TypeError, msg): Series(index, dtype=float) # ints are ok # we test with np.int64 to get similar results on # windows / 32-bit platforms result = Series(index, dtype=np.int64) expected = Series(index.astype(np.int64)) tm.assert_series_equal(result, expected) @pytest.mark.parametrize("index", [ date_range('1/1/2000', periods=10), timedelta_range('1 day', periods=10), period_range('2000-Q1', periods=10, freq='Q') ], ids=lambda x: type(x).__name__) def test_constructor_cast_object(self, index): s = Series(index, dtype=object) exp = Series(index).astype(object) tm.assert_series_equal(s, exp) s = Series(pd.Index(index, dtype=object), dtype=object) exp = Series(index).astype(object) tm.assert_series_equal(s, exp) s = Series(index.astype(object), dtype=object) exp = Series(index).astype(object) tm.assert_series_equal(s, exp) def test_constructor_generic_timestamp_deprecated(self): # see gh-15524 with tm.assert_produces_warning(FutureWarning): dtype = np.timedelta64 s = Series([], dtype=dtype) assert s.empty assert s.dtype == 'm8[ns]' with tm.assert_produces_warning(FutureWarning): dtype = np.datetime64 s = Series([], dtype=dtype) assert s.empty assert s.dtype == 'M8[ns]' # These timestamps have the wrong frequencies, # so an Exception should be raised now. msg = "cannot convert timedeltalike" with tm.assert_raises_regex(TypeError, msg): Series([], dtype='m8[ps]') msg = "cannot convert datetimelike" with tm.assert_raises_regex(TypeError, msg): Series([], dtype='M8[ps]') @pytest.mark.parametrize('dtype', [None, 'uint8', 'category']) def test_constructor_range_dtype(self, dtype): # GH 16804 expected = Series([0, 1, 2, 3, 4], dtype=dtype or 'int64') result = Series(range(5), dtype=dtype) tm.assert_series_equal(result, expected)
def test_infer_freq(self, freq): # GH#11018 idx = pd.timedelta_range('1', freq=freq, periods=10) result = pd.TimedeltaIndex(idx.asi8, freq='infer') tm.assert_index_equal(idx, result) assert result.freq == freq
class TestTableOrientReader: @pytest.mark.parametrize( "index_nm", [ None, "idx", pytest.param("index", marks=pytest.mark.xfail), "level_0" ], ) @pytest.mark.parametrize( "vals", [ { "ints": [1, 2, 3, 4] }, { "objects": ["a", "b", "c", "d"] }, { "objects": ["1", "2", "3", "4"] }, { "date_ranges": pd.date_range("2016-01-01", freq="d", periods=4) }, { "categoricals": pd.Series(pd.Categorical(["a", "b", "c", "c"])) }, { "ordered_cats": pd.Series(pd.Categorical(["a", "b", "c", "c"], ordered=True)) }, { "floats": [1.0, 2.0, 3.0, 4.0] }, { "floats": [1.1, 2.2, 3.3, 4.4] }, { "bools": [True, False, False, True] }, ], ) def test_read_json_table_orient(self, index_nm, vals, recwarn): df = DataFrame(vals, index=pd.Index(range(4), name=index_nm)) out = df.to_json(orient="table") result = pd.read_json(out, orient="table") tm.assert_frame_equal(df, result) @pytest.mark.parametrize("index_nm", [None, "idx", "index"]) @pytest.mark.parametrize( "vals", [ { "timedeltas": pd.timedelta_range("1H", periods=4, freq="T") }, { "timezones": pd.date_range( "2016-01-01", freq="d", periods=4, tz="US/Central") }, ], ) def test_read_json_table_orient_raises(self, index_nm, vals, recwarn): df = DataFrame(vals, index=pd.Index(range(4), name=index_nm)) out = df.to_json(orient="table") with pytest.raises(NotImplementedError, match="can not yet read "): pd.read_json(out, orient="table") def test_comprehensive(self): df = DataFrame( { "A": [1, 2, 3, 4], "B": ["a", "b", "c", "c"], "C": pd.date_range("2016-01-01", freq="d", periods=4), # 'D': pd.timedelta_range('1H', periods=4, freq='T'), "E": pd.Series(pd.Categorical(["a", "b", "c", "c"])), "F": pd.Series( pd.Categorical(["a", "b", "c", "c"], ordered=True)), "G": [1.1, 2.2, 3.3, 4.4], # 'H': pd.date_range('2016-01-01', freq='d', periods=4, # tz='US/Central'), "I": [True, False, False, True], }, index=pd.Index(range(4), name="idx"), ) out = df.to_json(orient="table") result = pd.read_json(out, orient="table") tm.assert_frame_equal(df, result) @pytest.mark.parametrize( "index_names", [[None, None], ["foo", "bar"], ["foo", None], [None, "foo"], ["index", "foo"]], ) def test_multiindex(self, index_names): # GH 18912 df = pd.DataFrame( [["Arr", "alpha", [1, 2, 3, 4]], ["Bee", "Beta", [10, 20, 30, 40]] ], index=[["A", "B"], ["Null", "Eins"]], columns=["Aussprache", "Griechisch", "Args"], ) df.index.names = index_names out = df.to_json(orient="table") result = pd.read_json(out, orient="table") tm.assert_frame_equal(df, result) def test_empty_frame_roundtrip(self): # GH 21287 df = pd.DataFrame(columns=["a", "b", "c"]) expected = df.copy() out = df.to_json(orient="table") result = pd.read_json(out, orient="table") tm.assert_frame_equal(expected, result)
def create_data(): """ create the pickle/msgpack data """ data = { u'A': [0., 1., 2., 3., np.nan], u'B': [0, 1, 0, 1, 0], u'C': [u'foo1', u'foo2', u'foo3', u'foo4', u'foo5'], u'D': date_range('1/1/2009', periods=5), u'E': [0., 1, Timestamp('20100101'), u'foo', 2.] } scalars = dict(timestamp=Timestamp('20130101'), period=Period('2012', 'M')) index = dict(int=Index(np.arange(10)), date=date_range('20130101', periods=10), period=period_range('2013-01-01', freq='M', periods=10), float=Index(np.arange(10, dtype=np.float64)), uint=Index(np.arange(10, dtype=np.uint64)), timedelta=timedelta_range('00:00:00', freq='30T', periods=10)) if _loose_version >= LooseVersion('0.18'): from pandas import RangeIndex index['range'] = RangeIndex(10) if _loose_version >= LooseVersion('0.21'): from pandas import interval_range index['interval'] = interval_range(0, periods=10) mi = dict(reg2=MultiIndex.from_tuples( tuple(zip(*[[u'bar', u'bar', u'baz', u'baz', u'foo', u'foo', u'qux', u'qux'], [u'one', u'two', u'one', u'two', u'one', u'two', u'one', u'two']])), names=[u'first', u'second'])) series = dict(float=Series(data[u'A']), int=Series(data[u'B']), mixed=Series(data[u'E']), ts=Series(np.arange(10).astype(np.int64), index=date_range('20130101', periods=10)), mi=Series(np.arange(5).astype(np.float64), index=MultiIndex.from_tuples( tuple(zip(*[[1, 1, 2, 2, 2], [3, 4, 3, 4, 5]])), names=[u'one', u'two'])), dup=Series(np.arange(5).astype(np.float64), index=[u'A', u'B', u'C', u'D', u'A']), cat=Series(Categorical([u'foo', u'bar', u'baz'])), dt=Series(date_range('20130101', periods=5)), dt_tz=Series(date_range('20130101', periods=5, tz='US/Eastern')), period=Series([Period('2000Q1')] * 5)) mixed_dup_df = DataFrame(data) mixed_dup_df.columns = list(u"ABCDA") frame = dict(float=DataFrame({u'A': series[u'float'], u'B': series[u'float'] + 1}), int=DataFrame({u'A': series[u'int'], u'B': series[u'int'] + 1}), mixed=DataFrame({k: data[k] for k in [u'A', u'B', u'C', u'D']}), mi=DataFrame({u'A': np.arange(5).astype(np.float64), u'B': np.arange(5).astype(np.int64)}, index=MultiIndex.from_tuples( tuple(zip(*[[u'bar', u'bar', u'baz', u'baz', u'baz'], [u'one', u'two', u'one', u'two', u'three']])), names=[u'first', u'second'])), dup=DataFrame(np.arange(15).reshape(5, 3).astype(np.float64), columns=[u'A', u'B', u'A']), cat_onecol=DataFrame({u'A': Categorical([u'foo', u'bar'])}), cat_and_float=DataFrame({ u'A': Categorical([u'foo', u'bar', u'baz']), u'B': np.arange(3).astype(np.int64)}), mixed_dup=mixed_dup_df, dt_mixed_tzs=DataFrame({ u'A': Timestamp('20130102', tz='US/Eastern'), u'B': Timestamp('20130603', tz='CET')}, index=range(5)), dt_mixed2_tzs=DataFrame({ u'A': Timestamp('20130102', tz='US/Eastern'), u'B': Timestamp('20130603', tz='CET'), u'C': Timestamp('20130603', tz='UTC')}, index=range(5)) ) with catch_warnings(record=True): mixed_dup_panel = Panel({u'ItemA': frame[u'float'], u'ItemB': frame[u'int']}) mixed_dup_panel.items = [u'ItemA', u'ItemA'] panel = dict(float=Panel({u'ItemA': frame[u'float'], u'ItemB': frame[u'float'] + 1}), dup=Panel( np.arange(30).reshape(3, 5, 2).astype(np.float64), items=[u'A', u'B', u'A']), mixed_dup=mixed_dup_panel) cat = dict(int8=Categorical(list('abcdefg')), int16=Categorical(np.arange(1000)), int32=Categorical(np.arange(10000))) timestamp = dict(normal=Timestamp('2011-01-01'), nat=NaT, tz=Timestamp('2011-01-01', tz='US/Eastern')) if _loose_version < LooseVersion('0.19.2'): timestamp['freq'] = Timestamp('2011-01-01', offset='D') timestamp['both'] = Timestamp('2011-01-01', tz='Asia/Tokyo', offset='M') else: timestamp['freq'] = Timestamp('2011-01-01', freq='D') timestamp['both'] = Timestamp('2011-01-01', tz='Asia/Tokyo', freq='M') off = {'DateOffset': DateOffset(years=1), 'DateOffset_h_ns': DateOffset(hour=6, nanoseconds=5824), 'BusinessDay': BusinessDay(offset=timedelta(seconds=9)), 'BusinessHour': BusinessHour(normalize=True, n=6, end='15:14'), 'CustomBusinessDay': CustomBusinessDay(weekmask='Mon Fri'), 'SemiMonthBegin': SemiMonthBegin(day_of_month=9), 'SemiMonthEnd': SemiMonthEnd(day_of_month=24), 'MonthBegin': MonthBegin(1), 'MonthEnd': MonthEnd(1), 'QuarterBegin': QuarterBegin(1), 'QuarterEnd': QuarterEnd(1), 'Day': Day(1), 'YearBegin': YearBegin(1), 'YearEnd': YearEnd(1), 'Week': Week(1), 'Week_Tues': Week(2, normalize=False, weekday=1), 'WeekOfMonth': WeekOfMonth(week=3, weekday=4), 'LastWeekOfMonth': LastWeekOfMonth(n=1, weekday=3), 'FY5253': FY5253(n=2, weekday=6, startingMonth=7, variation="last"), 'Easter': Easter(), 'Hour': Hour(1), 'Minute': Minute(1)} return dict(series=series, frame=frame, panel=panel, index=index, scalars=scalars, mi=mi, sp_series=dict(float=_create_sp_series(), ts=_create_sp_tsseries()), sp_frame=dict(float=_create_sp_frame()), cat=cat, timestamp=timestamp, offsets=off)
def test_timedelta_methods(method): s = pd.Series(pd.timedelta_range("2000", periods=4)) s.attrs = {"a": 1} result = method(s.dt) assert result.attrs == {"a": 1}
def test_linspace_behavior(self, periods, freq): # GH 20976 result = timedelta_range(start="0 days", end="4 days", periods=periods) expected = timedelta_range(start="0 days", end="4 days", freq=freq) tm.assert_index_equal(result, expected)
def test_constructor_timedelta64_values_mismatched_dtype(self): # check we don't silently ignore the dtype keyword tdi = timedelta_range("4 Days", periods=5) result = Index(tdi, dtype="category") expected = CategoricalIndex(tdi) tm.assert_index_equal(result, expected)