def test_hash_pandas_object(self): for obj in [Series([1, 2, 3]), Series([1.0, 1.5, 3.2]), Series([1.0, 1.5, np.nan]), Series([1.0, 1.5, 3.2], index=[1.5, 1.1, 3.3]), Series(['a', 'b', 'c']), Series(['a', np.nan, 'c']), Series(['a', None, 'c']), Series([True, False, True]), Series(), Index([1, 2, 3]), Index([True, False, True]), DataFrame({'x': ['a', 'b', 'c'], 'y': [1, 2, 3]}), DataFrame(), tm.makeMissingDataframe(), tm.makeMixedDataFrame(), tm.makeTimeDataFrame(), tm.makeTimeSeries(), tm.makeTimedeltaIndex(), tm.makePeriodIndex(), Series(tm.makePeriodIndex()), Series(pd.date_range('20130101', periods=3, tz='US/Eastern')), MultiIndex.from_product( [range(5), ['foo', 'bar', 'baz'], pd.date_range('20130101', periods=2)]), MultiIndex.from_product( [pd.CategoricalIndex(list('aabc')), range(3)])]: self.check_equal(obj) self.check_not_equal_with_index(obj)
def setup_method(self, method): self.bool_index = tm.makeBoolIndex(10, name='a') self.int_index = tm.makeIntIndex(10, name='a') self.float_index = tm.makeFloatIndex(10, name='a') self.dt_index = tm.makeDateIndex(10, name='a') self.dt_tz_index = tm.makeDateIndex( 10, name='a').tz_localize(tz='US/Eastern') self.period_index = tm.makePeriodIndex(10, name='a') self.string_index = tm.makeStringIndex(10, name='a') self.unicode_index = tm.makeUnicodeIndex(10, name='a') arr = np.random.randn(10) self.bool_series = Series(arr, index=self.bool_index, name='a') self.int_series = Series(arr, index=self.int_index, name='a') self.float_series = Series(arr, index=self.float_index, name='a') self.dt_series = Series(arr, index=self.dt_index, name='a') self.dt_tz_series = self.dt_tz_index.to_series(keep_tz=True) self.period_series = Series(arr, index=self.period_index, name='a') self.string_series = Series(arr, index=self.string_index, name='a') self.unicode_series = Series(arr, index=self.unicode_index, name='a') types = [ 'bool', 'int', 'float', 'dt', 'dt_tz', 'period', 'string', 'unicode' ] self.indexes = [getattr(self, '{}_index'.format(t)) for t in types] self.series = [getattr(self, '{}_series'.format(t)) for t in types] self.objs = self.indexes + self.series
def setup_method(self, method): self.bool_index = tm.makeBoolIndex(10, name='a') self.int_index = tm.makeIntIndex(10, name='a') self.float_index = tm.makeFloatIndex(10, name='a') self.dt_index = tm.makeDateIndex(10, name='a') self.dt_tz_index = tm.makeDateIndex(10, name='a').tz_localize( tz='US/Eastern') self.period_index = tm.makePeriodIndex(10, name='a') self.string_index = tm.makeStringIndex(10, name='a') self.unicode_index = tm.makeUnicodeIndex(10, name='a') arr = np.random.randn(10) self.bool_series = Series(arr, index=self.bool_index, name='a') self.int_series = Series(arr, index=self.int_index, name='a') self.float_series = Series(arr, index=self.float_index, name='a') self.dt_series = Series(arr, index=self.dt_index, name='a') self.dt_tz_series = self.dt_tz_index.to_series(keep_tz=True) self.period_series = Series(arr, index=self.period_index, name='a') self.string_series = Series(arr, index=self.string_index, name='a') self.unicode_series = Series(arr, index=self.unicode_index, name='a') types = ['bool', 'int', 'float', 'dt', 'dt_tz', 'period', 'string', 'unicode'] self.indexes = [getattr(self, '{}_index'.format(t)) for t in types] self.series = [getattr(self, '{}_series'.format(t)) for t in types] self.objs = self.indexes + self.series
def setUp(self): self.bool_index = tm.makeBoolIndex(10, name='a') self.int_index = tm.makeIntIndex(10, name='a') self.float_index = tm.makeFloatIndex(10, name='a') self.dt_index = tm.makeDateIndex(10, name='a') self.dt_tz_index = tm.makeDateIndex(10, name='a').tz_localize( tz='US/Eastern') self.period_index = tm.makePeriodIndex(10, name='a') self.string_index = tm.makeStringIndex(10, name='a') self.unicode_index = tm.makeUnicodeIndex(10, name='a') arr = np.random.randn(10) self.int_series = Series(arr, index=self.int_index, name='a') self.float_series = Series(arr, index=self.float_index, name='a') self.dt_series = Series(arr, index=self.dt_index, name='a') self.dt_tz_series = self.dt_tz_index.to_series(keep_tz=True) self.period_series = Series(arr, index=self.period_index, name='a') self.string_series = Series(arr, index=self.string_index, name='a') types = ['bool', 'int', 'float', 'dt', 'dt_tz', 'period', 'string', 'unicode'] fmts = ["{0}_{1}".format(t, f) for t in types for f in ['index', 'series']] self.objs = [getattr(self, f) for f in fmts if getattr(self, f, None) is not None]
def test_invalid_index_types(self): # test all index types for i in [tm.makeIntIndex(10), tm.makeFloatIndex(10), tm.makePeriodIndex(10)]: self.assertRaises(TypeError, lambda: infer_freq(i)) for i in [tm.makeStringIndex(10), tm.makeUnicodeIndex(10)]: self.assertRaises(ValueError, lambda: infer_freq(i))
def test_invalid_index_types(self): # test all index types for i in [tm.makeIntIndex(10), tm.makeFloatIndex(10), tm.makePeriodIndex(10)]: self.assertRaises(TypeError, lambda: frequencies.infer_freq(i)) # GH 10822 # odd error message on conversions to datetime for unicode if not is_platform_windows(): for i in [tm.makeStringIndex(10), tm.makeUnicodeIndex(10)]: self.assertRaises(ValueError, lambda: frequencies.infer_freq(i))
def test_invalid_index_types(self): # test all index types for i in [ tm.makeIntIndex(10), tm.makeFloatIndex(10), tm.makePeriodIndex(10) ]: self.assertRaises(TypeError, lambda : infer_freq(i)) for i in [ tm.makeStringIndex(10), tm.makeUnicodeIndex(10) ]: self.assertRaises(ValueError, lambda : infer_freq(i))
def test_invalid_index_types(self): # test all index types for i in [tm.makeIntIndex(10), tm.makeFloatIndex(10), tm.makePeriodIndex(10)]: pytest.raises(TypeError, lambda: frequencies.infer_freq(i)) # GH 10822 # odd error message on conversions to datetime for unicode if not is_platform_windows(): for i in [tm.makeStringIndex(10), tm.makeUnicodeIndex(10)]: pytest.raises(ValueError, lambda: frequencies.infer_freq(i))
def test_constructor_dict(self): d = {'a': 0., 'b': 1., 'c': 2.} result = Series(d, index=['b', 'c', 'd', 'a']) expected = Series([1, 2, nan, 0], index=['b', 'c', 'd', 'a']) assert_series_equal(result, expected) pidx = tm.makePeriodIndex(100) d = {pidx[0]: 0, pidx[1]: 1} result = Series(d, index=pidx) expected = Series(np.nan, pidx) expected.iloc[0] = 0 expected.iloc[1] = 1 assert_series_equal(result, expected)
def test_constructor_dict(self): d = {'a': 0., 'b': 1., 'c': 2.} result = Series(d, index=['b', 'c', 'd', 'a']) expected = Series([1, 2, nan, 0], index=['b', 'c', 'd', 'a']) assert_series_equal(result, expected) pidx = tm.makePeriodIndex(100) d = {pidx[0]: 0, pidx[1]: 1} result = Series(d, index=pidx) expected = Series(np.nan, pidx) expected.ix[0] = 0 expected.ix[1] = 1 assert_series_equal(result, expected)
def test_constructor_dict(self): d = {"a": 0.0, "b": 1.0, "c": 2.0} result = Series(d, index=["b", "c", "d", "a"]) expected = Series([1, 2, np.nan, 0], index=["b", "c", "d", "a"]) tm.assert_series_equal(result, expected) pidx = tm.makePeriodIndex(100) d = {pidx[0]: 0, pidx[1]: 1} result = Series(d, index=pidx) expected = Series(np.nan, pidx) expected.iloc[0] = 0 expected.iloc[1] = 1 tm.assert_series_equal(result, expected)
def test_hash_pandas_object(self): for obj in [ Series([1, 2, 3]), Series([1.0, 1.5, 3.2]), Series([1.0, 1.5, np.nan]), Series([1.0, 1.5, 3.2], index=[1.5, 1.1, 3.3]), Series(['a', 'b', 'c']), Series(['a', np.nan, 'c']), Series(['a', None, 'c']), Series([True, False, True]), Series(), Index([1, 2, 3]), Index([True, False, True]), DataFrame({ 'x': ['a', 'b', 'c'], 'y': [1, 2, 3] }), DataFrame(), tm.makeMissingDataframe(), tm.makeMixedDataFrame(), tm.makeTimeDataFrame(), tm.makeTimeSeries(), tm.makeTimedeltaIndex(), tm.makePeriodIndex(), Series(tm.makePeriodIndex()), Series(pd.date_range('20130101', periods=3, tz='US/Eastern')), MultiIndex.from_product([ range(5), ['foo', 'bar', 'baz'], pd.date_range('20130101', periods=2) ]), MultiIndex.from_product( [pd.CategoricalIndex(list('aabc')), range(3)]) ]: self.check_equal(obj) self.check_not_equal_with_index(obj)
def setUp(self): self.int_index = tm.makeIntIndex(10) self.float_index = tm.makeFloatIndex(10) self.dt_index = tm.makeDateIndex(10) self.period_index = tm.makePeriodIndex(10) self.string_index = tm.makeStringIndex(10) arr = np.random.randn(10) self.int_series = Series(arr, index=self.int_index) self.float_series = Series(arr, index=self.int_index) self.dt_series = Series(arr, index=self.dt_index) self.period_series = Series(arr, index=self.period_index) self.string_series = Series(arr, index=self.string_index) self.objs = [ getattr(self,"{0}_{1}".format(t,f)) for t in ['int','float','dt','period','string'] for f in ['index','series'] ]
def get_objs(): indexes = [ tm.makeBoolIndex(10, name="a"), tm.makeIntIndex(10, name="a"), tm.makeFloatIndex(10, name="a"), tm.makeDateIndex(10, name="a"), tm.makeDateIndex(10, name="a").tz_localize(tz="US/Eastern"), tm.makePeriodIndex(10, name="a"), tm.makeStringIndex(10, name="a"), tm.makeUnicodeIndex(10, name="a"), ] arr = np.random.randn(10) series = [Series(arr, index=idx, name="a") for idx in indexes] objs = indexes + series return objs
def get_objs(): indexes = [ tm.makeBoolIndex(10, name='a'), tm.makeIntIndex(10, name='a'), tm.makeFloatIndex(10, name='a'), tm.makeDateIndex(10, name='a'), tm.makeDateIndex(10, name='a').tz_localize(tz='US/Eastern'), tm.makePeriodIndex(10, name='a'), tm.makeStringIndex(10, name='a'), tm.makeUnicodeIndex(10, name='a') ] arr = np.random.randn(10) series = [Series(arr, index=idx, name='a') for idx in indexes] objs = indexes + series return objs
def test_tseries_indices_series(self): idx = tm.makeDateIndex(10) ser = Series(np.random.randn(len(idx)), idx) self.store['a'] = ser result = self.store['a'] assert_series_equal(result, ser) self.assertEquals(type(result.index), type(ser.index)) self.assertEquals(result.index.freq, ser.index.freq) idx = tm.makePeriodIndex(10) ser = Series(np.random.randn(len(idx)), idx) self.store['a'] = ser result = self.store['a'] assert_series_equal(result, ser) self.assertEquals(type(result.index), type(ser.index)) self.assertEquals(result.index.freq, ser.index.freq)
def setUp(self): self.int_index = tm.makeIntIndex(10) self.float_index = tm.makeFloatIndex(10) self.dt_index = tm.makeDateIndex(10) self.dt_tz_index = tm.makeDateIndex(10).tz_localize(tz='US/Eastern') self.period_index = tm.makePeriodIndex(10) self.string_index = tm.makeStringIndex(10) arr = np.random.randn(10) self.int_series = Series(arr, index=self.int_index) self.float_series = Series(arr, index=self.int_index) self.dt_series = Series(arr, index=self.dt_index) self.dt_tz_series = self.dt_tz_index.to_series(keep_tz=True) self.period_series = Series(arr, index=self.period_index) self.string_series = Series(arr, index=self.string_index) types = ['int','float','dt', 'dt_tz', 'period','string'] self.objs = [ getattr(self,"{0}_{1}".format(t,f)) for t in types for f in ['index','series'] ]
def test_tseries_indices_frame(self): idx = tm.makeDateIndex(10) df = DataFrame(np.random.randn(len(idx), 3), index=idx) self.store['a'] = df result = self.store['a'] assert_frame_equal(result, df) self.assertEquals(type(result.index), type(df.index)) self.assertEquals(result.index.freq, df.index.freq) idx = tm.makePeriodIndex(10) df = DataFrame(np.random.randn(len(idx), 3), idx) self.store['a'] = df result = self.store['a'] assert_frame_equal(result, df) self.assertEquals(type(result.index), type(df.index)) self.assertEquals(result.index.freq, df.index.freq)
def setUp(self): self.int_index = tm.makeIntIndex(10) self.float_index = tm.makeFloatIndex(10) self.dt_index = tm.makeDateIndex(10) self.dt_tz_index = tm.makeDateIndex(10).tz_localize(tz="US/Eastern") self.period_index = tm.makePeriodIndex(10) self.string_index = tm.makeStringIndex(10) arr = np.random.randn(10) self.int_series = Series(arr, index=self.int_index) self.float_series = Series(arr, index=self.int_index) self.dt_series = Series(arr, index=self.dt_index) self.dt_tz_series = self.dt_tz_index.to_series(keep_tz=True) self.period_series = Series(arr, index=self.period_index) self.string_series = Series(arr, index=self.string_index) types = ["int", "float", "dt", "dt_tz", "period", "string"] self.objs = [getattr(self, "{0}_{1}".format(t, f)) for t in types for f in ["index", "series"]]
def setUp(self): self.int_index = tm.makeIntIndex(10) self.float_index = tm.makeFloatIndex(10) self.dt_index = tm.makeDateIndex(10) self.period_index = tm.makePeriodIndex(10) self.string_index = tm.makeStringIndex(10) arr = np.random.randn(10) self.int_series = Series(arr, index=self.int_index) self.float_series = Series(arr, index=self.int_index) self.dt_series = Series(arr, index=self.dt_index) self.period_series = Series(arr, index=self.period_index) self.string_series = Series(arr, index=self.string_index) self.objs = [ getattr(self, "{0}_{1}".format(t, f)) for t in ['int', 'float', 'dt', 'period', 'string'] for f in ['index', 'series'] ]
def setUp(self): self.bool_index = tm.makeBoolIndex(10, name="a") self.int_index = tm.makeIntIndex(10, name="a") self.float_index = tm.makeFloatIndex(10, name="a") self.dt_index = tm.makeDateIndex(10, name="a") self.dt_tz_index = tm.makeDateIndex(10, name="a").tz_localize(tz="US/Eastern") self.period_index = tm.makePeriodIndex(10, name="a") self.string_index = tm.makeStringIndex(10, name="a") self.unicode_index = tm.makeUnicodeIndex(10, name="a") arr = np.random.randn(10) self.int_series = Series(arr, index=self.int_index, name="a") self.float_series = Series(arr, index=self.float_index, name="a") self.dt_series = Series(arr, index=self.dt_index, name="a") self.dt_tz_series = self.dt_tz_index.to_series(keep_tz=True) self.period_series = Series(arr, index=self.period_index, name="a") self.string_series = Series(arr, index=self.string_index, name="a") types = ["bool", "int", "float", "dt", "dt_tz", "period", "string", "unicode"] fmts = ["{0}_{1}".format(t, f) for t in types for f in ["index", "series"]] self.objs = [getattr(self, f) for f in fmts if getattr(self, f, None) is not None]
def setUp(self): self.int_index = tm.makeIntIndex(10) self.float_index = tm.makeFloatIndex(10) self.dt_index = tm.makeDateIndex(10) self.dt_tz_index = tm.makeDateIndex(10).tz_localize(tz='US/Eastern') self.period_index = tm.makePeriodIndex(10) self.string_index = tm.makeStringIndex(10) arr = np.random.randn(10) self.int_series = Series(arr, index=self.int_index) self.float_series = Series(arr, index=self.int_index) self.dt_series = Series(arr, index=self.dt_index) self.dt_tz_series = self.dt_tz_index.to_series(keep_tz=True) self.period_series = Series(arr, index=self.period_index) self.string_series = Series(arr, index=self.string_index) types = ['int', 'float', 'dt', 'dt_tz', 'period', 'string'] self.objs = [ getattr(self, "{0}_{1}".format(t, f)) for t in types for f in ['index', 'series'] ]
class TestSeriesMisc(TestData, SharedWithSparse): series_klass = Series # SharedWithSparse tests use generic, series_klass-agnostic assertion _assert_series_equal = staticmethod(tm.assert_series_equal) def test_tab_completion(self): # GH 9910 s = Series(list('abcd')) # Series of str values should have .str but not .dt/.cat in __dir__ assert 'str' in dir(s) assert 'dt' not in dir(s) assert 'cat' not in dir(s) # similarly for .dt s = Series(date_range('1/1/2015', periods=5)) assert 'dt' in dir(s) assert 'str' not in dir(s) assert 'cat' not in dir(s) # Similarly for .cat, but with the twist that str and dt should be # there if the categories are of that type first cat and str. s = Series(list('abbcd'), dtype="category") assert 'cat' in dir(s) assert 'str' in dir(s) # as it is a string categorical assert 'dt' not in dir(s) # similar to cat and str s = Series(date_range('1/1/2015', periods=5)).astype("category") assert 'cat' in dir(s) assert 'str' not in dir(s) assert 'dt' in dir(s) # as it is a datetime categorical def test_tab_completion_with_categorical(self): # test the tab completion display ok_for_cat = [ 'categories', 'codes', 'ordered', 'set_categories', 'add_categories', 'remove_categories', 'rename_categories', 'reorder_categories', 'remove_unused_categories', 'as_ordered', 'as_unordered' ] def get_dir(s): results = [r for r in s.cat.__dir__() if not r.startswith('_')] return list(sorted(set(results))) s = Series(list('aabbcde')).astype('category') results = get_dir(s) tm.assert_almost_equal(results, list(sorted(set(ok_for_cat)))) @pytest.mark.parametrize("index", [ tm.makeUnicodeIndex(10), tm.makeStringIndex(10), tm.makeCategoricalIndex(10), Index(['foo', 'bar', 'baz'] * 2), tm.makeDateIndex(10), tm.makePeriodIndex(10), tm.makeTimedeltaIndex(10), tm.makeIntIndex(10), tm.makeUIntIndex(10), tm.makeIntIndex(10), tm.makeFloatIndex(10), Index([True, False]), Index(['a{}'.format(i) for i in range(101)]), pd.MultiIndex.from_tuples(lzip('ABCD', 'EFGH')), pd.MultiIndex.from_tuples(lzip([0, 1, 2, 3], 'EFGH')), ]) def test_index_tab_completion(self, index): # dir contains string-like values of the Index. s = pd.Series(index=index) dir_s = dir(s) for i, x in enumerate(s.index.unique(level=0)): if i < 100: assert (not isinstance(x, string_types) or not isidentifier(x) or x in dir_s) else: assert x not in dir_s def test_not_hashable(self): s_empty = Series() s = Series([1]) pytest.raises(TypeError, hash, s_empty) pytest.raises(TypeError, hash, s) def test_contains(self): tm.assert_contains_all(self.ts.index, self.ts) def test_iter(self): for i, val in enumerate(self.series): assert val == self.series[i] for i, val in enumerate(self.ts): assert val == self.ts[i] def test_keys(self): # HACK: By doing this in two stages, we avoid 2to3 wrapping the call # to .keys() in a list() getkeys = self.ts.keys assert getkeys() is self.ts.index def test_values(self): tm.assert_almost_equal(self.ts.values, self.ts, check_dtype=False) def test_iteritems(self): for idx, val in compat.iteritems(self.series): assert val == self.series[idx] for idx, val in compat.iteritems(self.ts): assert val == self.ts[idx] # assert is lazy (genrators don't define reverse, lists do) assert not hasattr(self.series.iteritems(), 'reverse') def test_items(self): for idx, val in self.series.items(): assert val == self.series[idx] for idx, val in self.ts.items(): assert val == self.ts[idx] # assert is lazy (genrators don't define reverse, lists do) assert not hasattr(self.series.items(), 'reverse') def test_raise_on_info(self): s = Series(np.random.randn(10)) with pytest.raises(AttributeError): s.info() def test_copy(self): for deep in [None, False, True]: s = Series(np.arange(10), dtype='float64') # default deep is True if deep is None: s2 = s.copy() else: s2 = s.copy(deep=deep) s2[::2] = np.NaN if deep is None or deep is True: # Did not modify original Series assert np.isnan(s2[0]) assert not np.isnan(s[0]) else: # we DID modify the original Series assert np.isnan(s2[0]) assert np.isnan(s[0]) # GH 11794 # copy of tz-aware expected = Series([Timestamp('2012/01/01', tz='UTC')]) expected2 = Series([Timestamp('1999/01/01', tz='UTC')]) for deep in [None, False, True]: s = Series([Timestamp('2012/01/01', tz='UTC')]) if deep is None: s2 = s.copy() else: s2 = s.copy(deep=deep) s2[0] = pd.Timestamp('1999/01/01', tz='UTC') # default deep is True if deep is None or deep is True: # Did not modify original Series assert_series_equal(s2, expected2) assert_series_equal(s, expected) else: # we DID modify the original Series assert_series_equal(s2, expected2) assert_series_equal(s, expected2) def test_axis_alias(self): s = Series([1, 2, np.nan]) assert_series_equal(s.dropna(axis='rows'), s.dropna(axis='index')) assert s.dropna().sum('rows') == 3 assert s._get_axis_number('rows') == 0 assert s._get_axis_name('rows') == 'index' def test_class_axis(self): # https://github.com/pandas-dev/pandas/issues/18147 # no exception and no empty docstring assert pydoc.getdoc(Series.index) def test_numpy_unique(self): # it works! np.unique(self.ts) def test_ndarray_compat(self): # test numpy compat with Series as sub-class of NDFrame tsdf = DataFrame(np.random.randn(1000, 3), columns=['A', 'B', 'C'], index=date_range('1/1/2000', periods=1000)) def f(x): return x[x.idxmax()] result = tsdf.apply(f) expected = tsdf.max() tm.assert_series_equal(result, expected) # .item() s = Series([1]) result = s.item() assert result == 1 assert s.item() == s.iloc[0] # using an ndarray like function s = Series(np.random.randn(10)) result = Series(np.ones_like(s)) expected = Series(1, index=range(10), dtype='float64') tm.assert_series_equal(result, expected) # ravel s = Series(np.random.randn(10)) tm.assert_almost_equal(s.ravel(order='F'), s.values.ravel(order='F')) # compress # GH 6658 s = Series([0, 1., -1], index=list('abc')) with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): result = np.compress(s > 0, s) tm.assert_series_equal(result, Series([1.], index=['b'])) with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): result = np.compress(s < -1, s) # result empty Index(dtype=object) as the same as original exp = Series([], dtype='float64', index=Index([], dtype='object')) tm.assert_series_equal(result, exp) s = Series([0, 1., -1], index=[.1, .2, .3]) with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): result = np.compress(s > 0, s) tm.assert_series_equal(result, Series([1.], index=[.2])) with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): result = np.compress(s < -1, s) # result empty Float64Index as the same as original exp = Series([], dtype='float64', index=Index([], dtype='float64')) tm.assert_series_equal(result, exp) def test_str_attribute(self): # GH9068 methods = ['strip', 'rstrip', 'lstrip'] s = Series([' jack', 'jill ', ' jesse ', 'frank']) for method in methods: expected = Series([getattr(str, method)(x) for x in s.values]) assert_series_equal(getattr(Series.str, method)(s.str), expected) # str accessor only valid with string values s = Series(range(5)) with tm.assert_raises_regex(AttributeError, 'only use .str accessor'): s.str.repeat(2) def test_empty_method(self): s_empty = pd.Series() assert s_empty.empty for full_series in [pd.Series([1]), pd.Series(index=[1])]: assert not full_series.empty def test_tab_complete_warning(self, ip): # https://github.com/pandas-dev/pandas/issues/16409 pytest.importorskip('IPython', minversion="6.0.0") from IPython.core.completer import provisionalcompleter code = "import pandas as pd; s = pd.Series()" ip.run_code(code) with tm.assert_produces_warning(None): with provisionalcompleter('ignore'): list(ip.Completer.completions('s.', 1))
class TestSeriesMisc: def test_scalarop_preserve_name(self, datetime_series): result = datetime_series * 2 assert result.name == datetime_series.name def test_copy_name(self, datetime_series): result = datetime_series.copy() assert result.name == datetime_series.name def test_copy_index_name_checking(self, datetime_series): # don't want to be able to modify the index stored elsewhere after # making a copy datetime_series.index.name = None assert datetime_series.index.name is None assert datetime_series is datetime_series cp = datetime_series.copy() cp.index.name = "foo" printing.pprint_thing(datetime_series.index.name) assert datetime_series.index.name is None def test_append_preserve_name(self, datetime_series): result = datetime_series[:5].append(datetime_series[5:]) assert result.name == datetime_series.name def test_binop_maybe_preserve_name(self, datetime_series): # names match, preserve result = datetime_series * datetime_series assert result.name == datetime_series.name result = datetime_series.mul(datetime_series) assert result.name == datetime_series.name result = datetime_series * datetime_series[:-2] assert result.name == datetime_series.name # names don't match, don't preserve cp = datetime_series.copy() cp.name = "something else" result = datetime_series + cp assert result.name is None result = datetime_series.add(cp) assert result.name is None ops = ["add", "sub", "mul", "div", "truediv", "floordiv", "mod", "pow"] ops = ops + ["r" + op for op in ops] for op in ops: # names match, preserve s = datetime_series.copy() result = getattr(s, op)(s) assert result.name == datetime_series.name # names don't match, don't preserve cp = datetime_series.copy() cp.name = "changed" result = getattr(s, op)(cp) assert result.name is None def test_combine_first_name(self, datetime_series): result = datetime_series.combine_first(datetime_series[:5]) assert result.name == datetime_series.name def test_getitem_preserve_name(self, datetime_series): result = datetime_series[datetime_series > 0] assert result.name == datetime_series.name result = datetime_series[[0, 2, 4]] assert result.name == datetime_series.name result = datetime_series[5:10] assert result.name == datetime_series.name def test_pickle_datetimes(self, datetime_series): unp_ts = self._pickle_roundtrip(datetime_series) tm.assert_series_equal(unp_ts, datetime_series) def test_pickle_strings(self, string_series): unp_series = self._pickle_roundtrip(string_series) tm.assert_series_equal(unp_series, string_series) def _pickle_roundtrip(self, obj): with tm.ensure_clean() as path: obj.to_pickle(path) unpickled = pd.read_pickle(path) return unpickled def test_argsort_preserve_name(self, datetime_series): result = datetime_series.argsort() assert result.name == datetime_series.name def test_sort_index_name(self, datetime_series): result = datetime_series.sort_index(ascending=False) assert result.name == datetime_series.name def test_constructor_dict(self): d = {"a": 0.0, "b": 1.0, "c": 2.0} result = Series(d) expected = Series(d, index=sorted(d.keys())) tm.assert_series_equal(result, expected) result = Series(d, index=["b", "c", "d", "a"]) expected = Series([1, 2, np.nan, 0], index=["b", "c", "d", "a"]) tm.assert_series_equal(result, expected) def test_constructor_subclass_dict(self): data = tm.TestSubDict((x, 10.0 * x) for x in range(10)) series = Series(data) expected = Series(dict(data.items())) tm.assert_series_equal(series, expected) def test_constructor_ordereddict(self): # GH3283 data = OrderedDict( ("col{i}".format(i=i), np.random.random()) for i in range(12) ) series = Series(data) expected = Series(list(data.values()), list(data.keys())) tm.assert_series_equal(series, expected) # Test with subclass class A(OrderedDict): pass series = Series(A(data)) tm.assert_series_equal(series, expected) def test_constructor_dict_multiindex(self): d = {("a", "a"): 0.0, ("b", "a"): 1.0, ("b", "c"): 2.0} _d = sorted(d.items()) result = Series(d) expected = Series( [x[1] for x in _d], index=pd.MultiIndex.from_tuples([x[0] for x in _d]) ) tm.assert_series_equal(result, expected) d["z"] = 111.0 _d.insert(0, ("z", d["z"])) result = Series(d) expected = Series( [x[1] for x in _d], index=pd.Index([x[0] for x in _d], tupleize_cols=False) ) result = result.reindex(index=expected.index) tm.assert_series_equal(result, expected) def test_constructor_dict_timedelta_index(self): # GH #12169 : Resample category data with timedelta index # construct Series from dict as data and TimedeltaIndex as index # will result NaN in result Series data expected = Series( data=["A", "B", "C"], index=pd.to_timedelta([0, 10, 20], unit="s") ) result = Series( data={ pd.to_timedelta(0, unit="s"): "A", pd.to_timedelta(10, unit="s"): "B", pd.to_timedelta(20, unit="s"): "C", }, index=pd.to_timedelta([0, 10, 20], unit="s"), ) tm.assert_series_equal(result, expected) def test_sparse_accessor_updates_on_inplace(self): s = pd.Series([1, 1, 2, 3], dtype="Sparse[int]") s.drop([0, 1], inplace=True) assert s.sparse.density == 1.0 def test_tab_completion(self): # GH 9910 s = Series(list("abcd")) # Series of str values should have .str but not .dt/.cat in __dir__ assert "str" in dir(s) assert "dt" not in dir(s) assert "cat" not in dir(s) # similarly for .dt s = Series(date_range("1/1/2015", periods=5)) assert "dt" in dir(s) assert "str" not in dir(s) assert "cat" not in dir(s) # Similarly for .cat, but with the twist that str and dt should be # there if the categories are of that type first cat and str. s = Series(list("abbcd"), dtype="category") assert "cat" in dir(s) assert "str" in dir(s) # as it is a string categorical assert "dt" not in dir(s) # similar to cat and str s = Series(date_range("1/1/2015", periods=5)).astype("category") assert "cat" in dir(s) assert "str" not in dir(s) assert "dt" in dir(s) # as it is a datetime categorical def test_tab_completion_with_categorical(self): # test the tab completion display ok_for_cat = [ "categories", "codes", "ordered", "set_categories", "add_categories", "remove_categories", "rename_categories", "reorder_categories", "remove_unused_categories", "as_ordered", "as_unordered", ] def get_dir(s): results = [r for r in s.cat.__dir__() if not r.startswith("_")] return sorted(set(results)) s = Series(list("aabbcde")).astype("category") results = get_dir(s) tm.assert_almost_equal(results, sorted(set(ok_for_cat))) @pytest.mark.parametrize( "index", [ tm.makeUnicodeIndex(10), tm.makeStringIndex(10), tm.makeCategoricalIndex(10), Index(["foo", "bar", "baz"] * 2), tm.makeDateIndex(10), tm.makePeriodIndex(10), tm.makeTimedeltaIndex(10), tm.makeIntIndex(10), tm.makeUIntIndex(10), tm.makeIntIndex(10), tm.makeFloatIndex(10), Index([True, False]), Index(["a{}".format(i) for i in range(101)]), pd.MultiIndex.from_tuples(zip("ABCD", "EFGH")), pd.MultiIndex.from_tuples(zip([0, 1, 2, 3], "EFGH")), ], ) def test_index_tab_completion(self, index): # dir contains string-like values of the Index. s = pd.Series(index=index) dir_s = dir(s) for i, x in enumerate(s.index.unique(level=0)): if i < 100: assert not isinstance(x, str) or not x.isidentifier() or x in dir_s else: assert x not in dir_s def test_not_hashable(self): s_empty = Series() s = Series([1]) msg = "'Series' objects are mutable, thus they cannot be hashed" with pytest.raises(TypeError, match=msg): hash(s_empty) with pytest.raises(TypeError, match=msg): hash(s) def test_contains(self, datetime_series): tm.assert_contains_all(datetime_series.index, datetime_series) def test_iter_datetimes(self, datetime_series): for i, val in enumerate(datetime_series): assert val == datetime_series[i] def test_iter_strings(self, string_series): for i, val in enumerate(string_series): assert val == string_series[i] def test_keys(self, datetime_series): # HACK: By doing this in two stages, we avoid 2to3 wrapping the call # to .keys() in a list() getkeys = datetime_series.keys assert getkeys() is datetime_series.index def test_values(self, datetime_series): tm.assert_almost_equal( datetime_series.values, datetime_series, check_dtype=False ) def test_iteritems_datetimes(self, datetime_series): for idx, val in datetime_series.iteritems(): assert val == datetime_series[idx] def test_iteritems_strings(self, string_series): for idx, val in string_series.iteritems(): assert val == string_series[idx] # assert is lazy (genrators don't define reverse, lists do) assert not hasattr(string_series.iteritems(), "reverse") def test_items_datetimes(self, datetime_series): for idx, val in datetime_series.items(): assert val == datetime_series[idx] def test_items_strings(self, string_series): for idx, val in string_series.items(): assert val == string_series[idx] # assert is lazy (genrators don't define reverse, lists do) assert not hasattr(string_series.items(), "reverse") def test_raise_on_info(self): s = Series(np.random.randn(10)) msg = "'Series' object has no attribute 'info'" with pytest.raises(AttributeError, match=msg): s.info() def test_copy(self): for deep in [None, False, True]: s = Series(np.arange(10), dtype="float64") # default deep is True if deep is None: s2 = s.copy() else: s2 = s.copy(deep=deep) s2[::2] = np.NaN if deep is None or deep is True: # Did not modify original Series assert np.isnan(s2[0]) assert not np.isnan(s[0]) else: # we DID modify the original Series assert np.isnan(s2[0]) assert np.isnan(s[0]) def test_copy_tzaware(self): # GH#11794 # copy of tz-aware expected = Series([Timestamp("2012/01/01", tz="UTC")]) expected2 = Series([Timestamp("1999/01/01", tz="UTC")]) for deep in [None, False, True]: s = Series([Timestamp("2012/01/01", tz="UTC")]) if deep is None: s2 = s.copy() else: s2 = s.copy(deep=deep) s2[0] = pd.Timestamp("1999/01/01", tz="UTC") # default deep is True if deep is None or deep is True: # Did not modify original Series tm.assert_series_equal(s2, expected2) tm.assert_series_equal(s, expected) else: # we DID modify the original Series tm.assert_series_equal(s2, expected2) tm.assert_series_equal(s, expected2) def test_axis_alias(self): s = Series([1, 2, np.nan]) tm.assert_series_equal(s.dropna(axis="rows"), s.dropna(axis="index")) assert s.dropna().sum("rows") == 3 assert s._get_axis_number("rows") == 0 assert s._get_axis_name("rows") == "index" def test_class_axis(self): # https://github.com/pandas-dev/pandas/issues/18147 # no exception and no empty docstring assert pydoc.getdoc(Series.index) def test_numpy_unique(self, datetime_series): # it works! np.unique(datetime_series) def test_ndarray_compat(self): # test numpy compat with Series as sub-class of NDFrame tsdf = DataFrame( np.random.randn(1000, 3), columns=["A", "B", "C"], index=date_range("1/1/2000", periods=1000), ) def f(x): return x[x.idxmax()] result = tsdf.apply(f) expected = tsdf.max() tm.assert_series_equal(result, expected) # .item() with tm.assert_produces_warning(FutureWarning): s = Series([1]) result = s.item() assert result == 1 assert s.item() == s.iloc[0] # using an ndarray like function s = Series(np.random.randn(10)) result = Series(np.ones_like(s)) expected = Series(1, index=range(10), dtype="float64") tm.assert_series_equal(result, expected) # ravel s = Series(np.random.randn(10)) tm.assert_almost_equal(s.ravel(order="F"), s.values.ravel(order="F")) # compress # GH 6658 s = Series([0, 1.0, -1], index=list("abc")) with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): result = np.compress(s > 0, s) tm.assert_series_equal(result, Series([1.0], index=["b"])) with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): result = np.compress(s < -1, s) # result empty Index(dtype=object) as the same as original exp = Series([], dtype="float64", index=Index([], dtype="object")) tm.assert_series_equal(result, exp) s = Series([0, 1.0, -1], index=[0.1, 0.2, 0.3]) with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): result = np.compress(s > 0, s) tm.assert_series_equal(result, Series([1.0], index=[0.2])) with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): result = np.compress(s < -1, s) # result empty Float64Index as the same as original exp = Series([], dtype="float64", index=Index([], dtype="float64")) tm.assert_series_equal(result, exp) def test_str_accessor_updates_on_inplace(self): s = pd.Series(list("abc")) s.drop([0], inplace=True) assert len(s.str.lower()) == 2 def test_str_attribute(self): # GH9068 methods = ["strip", "rstrip", "lstrip"] s = Series([" jack", "jill ", " jesse ", "frank"]) for method in methods: expected = Series([getattr(str, method)(x) for x in s.values]) tm.assert_series_equal(getattr(Series.str, method)(s.str), expected) # str accessor only valid with string values s = Series(range(5)) with pytest.raises(AttributeError, match="only use .str accessor"): s.str.repeat(2) def test_empty_method(self): s_empty = pd.Series() assert s_empty.empty for full_series in [pd.Series([1]), pd.Series(index=[1])]: assert not full_series.empty def test_tab_complete_warning(self, ip): # https://github.com/pandas-dev/pandas/issues/16409 pytest.importorskip("IPython", minversion="6.0.0") from IPython.core.completer import provisionalcompleter code = "import pandas as pd; s = pd.Series()" ip.run_code(code) with tm.assert_produces_warning(None): with provisionalcompleter("ignore"): list(ip.Completer.completions("s.", 1)) def test_integer_series_size(self): # GH 25580 s = Series(range(9)) assert s.size == 9 s = Series(range(9), dtype="Int64") assert s.size == 9 def test_get_values_deprecation(self): s = Series(range(9)) with tm.assert_produces_warning(FutureWarning): res = s.get_values() tm.assert_numpy_array_equal(res, s.values)
import numpy as np import pytest import pandas as pd from pandas.core.indexes.api import Index, MultiIndex import pandas.util.testing as tm @pytest.fixture(params=[tm.makeUnicodeIndex(100), tm.makeStringIndex(100), tm.makeDateIndex(100), tm.makePeriodIndex(100), tm.makeTimedeltaIndex(100), tm.makeIntIndex(100), tm.makeUIntIndex(100), tm.makeRangeIndex(100), tm.makeFloatIndex(100), Index([True, False]), tm.makeCategoricalIndex(100), Index([]), MultiIndex.from_tuples(zip( ['foo', 'bar', 'baz'], [1, 2, 3])), Index([0, 0, 1, 1, 2, 2])], ids=lambda x: type(x).__name__) def indices(request): return request.param @pytest.fixture(params=[1, np.array(1, dtype=np.int64)]) def one(request): # zero-dim integer array behaves like an integer
class TestSeriesMisc(TestData, SharedWithSparse): series_klass = Series # SharedWithSparse tests use generic, series_klass-agnostic assertion _assert_series_equal = staticmethod(tm.assert_series_equal) def test_tab_completion(self): # GH 9910 s = Series(list("abcd")) # Series of str values should have .str but not .dt/.cat in __dir__ assert "str" in dir(s) assert "dt" not in dir(s) assert "cat" not in dir(s) # similarly for .dt s = Series(date_range("1/1/2015", periods=5)) assert "dt" in dir(s) assert "str" not in dir(s) assert "cat" not in dir(s) # Similarly for .cat, but with the twist that str and dt should be # there if the categories are of that type first cat and str. s = Series(list("abbcd"), dtype="category") assert "cat" in dir(s) assert "str" in dir(s) # as it is a string categorical assert "dt" not in dir(s) # similar to cat and str s = Series(date_range("1/1/2015", periods=5)).astype("category") assert "cat" in dir(s) assert "str" not in dir(s) assert "dt" in dir(s) # as it is a datetime categorical def test_tab_completion_with_categorical(self): # test the tab completion display ok_for_cat = [ "name", "index", "categorical", "categories", "codes", "ordered", "set_categories", "add_categories", "remove_categories", "rename_categories", "reorder_categories", "remove_unused_categories", "as_ordered", "as_unordered", ] def get_dir(s): results = [r for r in s.cat.__dir__() if not r.startswith("_")] return list(sorted(set(results))) s = Series(list("aabbcde")).astype("category") results = get_dir(s) tm.assert_almost_equal(results, list(sorted(set(ok_for_cat)))) @pytest.mark.parametrize( "index", [ tm.makeUnicodeIndex(10), tm.makeStringIndex(10), tm.makeCategoricalIndex(10), Index(["foo", "bar", "baz"] * 2), tm.makeDateIndex(10), tm.makePeriodIndex(10), tm.makeTimedeltaIndex(10), tm.makeIntIndex(10), tm.makeUIntIndex(10), tm.makeIntIndex(10), tm.makeFloatIndex(10), Index([True, False]), Index(["a{}".format(i) for i in range(101)]), pd.MultiIndex.from_tuples(zip("ABCD", "EFGH")), pd.MultiIndex.from_tuples(zip([0, 1, 2, 3], "EFGH")), ], ) def test_index_tab_completion(self, index): # dir contains string-like values of the Index. s = pd.Series(index=index) dir_s = dir(s) for i, x in enumerate(s.index.unique(level=0)): if i < 100: assert not isinstance( x, str) or not x.isidentifier() or x in dir_s else: assert x not in dir_s def test_not_hashable(self): s_empty = Series() s = Series([1]) msg = "'Series' objects are mutable, thus they cannot be hashed" with pytest.raises(TypeError, match=msg): hash(s_empty) with pytest.raises(TypeError, match=msg): hash(s) def test_contains(self): tm.assert_contains_all(self.ts.index, self.ts) def test_iter(self): for i, val in enumerate(self.series): assert val == self.series[i] for i, val in enumerate(self.ts): assert val == self.ts[i] def test_keys(self): # HACK: By doing this in two stages, we avoid 2to3 wrapping the call # to .keys() in a list() getkeys = self.ts.keys assert getkeys() is self.ts.index def test_values(self): tm.assert_almost_equal(self.ts.values, self.ts, check_dtype=False) def test_iteritems(self): for idx, val in self.series.iteritems(): assert val == self.series[idx] for idx, val in self.ts.iteritems(): assert val == self.ts[idx] # assert is lazy (genrators don't define reverse, lists do) assert not hasattr(self.series.iteritems(), "reverse") def test_items(self): for idx, val in self.series.items(): assert val == self.series[idx] for idx, val in self.ts.items(): assert val == self.ts[idx] # assert is lazy (genrators don't define reverse, lists do) assert not hasattr(self.series.items(), "reverse") def test_raise_on_info(self): s = Series(np.random.randn(10)) msg = "'Series' object has no attribute 'info'" with pytest.raises(AttributeError, match=msg): s.info() def test_copy(self): for deep in [None, False, True]: s = Series(np.arange(10), dtype="float64") # default deep is True if deep is None: s2 = s.copy() else: s2 = s.copy(deep=deep) s2[::2] = np.NaN if deep is None or deep is True: # Did not modify original Series assert np.isnan(s2[0]) assert not np.isnan(s[0]) else: # we DID modify the original Series assert np.isnan(s2[0]) assert np.isnan(s[0]) def test_copy_tzaware(self): # GH#11794 # copy of tz-aware expected = Series([Timestamp("2012/01/01", tz="UTC")]) expected2 = Series([Timestamp("1999/01/01", tz="UTC")]) for deep in [None, False, True]: s = Series([Timestamp("2012/01/01", tz="UTC")]) if deep is None: s2 = s.copy() else: s2 = s.copy(deep=deep) s2[0] = pd.Timestamp("1999/01/01", tz="UTC") # default deep is True if deep is None or deep is True: # Did not modify original Series assert_series_equal(s2, expected2) assert_series_equal(s, expected) else: # we DID modify the original Series assert_series_equal(s2, expected2) assert_series_equal(s, expected2) def test_axis_alias(self): s = Series([1, 2, np.nan]) assert_series_equal(s.dropna(axis="rows"), s.dropna(axis="index")) assert s.dropna().sum("rows") == 3 assert s._get_axis_number("rows") == 0 assert s._get_axis_name("rows") == "index" def test_class_axis(self): # https://github.com/pandas-dev/pandas/issues/18147 # no exception and no empty docstring assert pydoc.getdoc(Series.index) def test_numpy_unique(self): # it works! np.unique(self.ts) def test_ndarray_compat(self): # test numpy compat with Series as sub-class of NDFrame tsdf = DataFrame( np.random.randn(1000, 3), columns=["A", "B", "C"], index=date_range("1/1/2000", periods=1000), ) def f(x): return x[x.idxmax()] result = tsdf.apply(f) expected = tsdf.max() tm.assert_series_equal(result, expected) # .item() with tm.assert_produces_warning(FutureWarning): s = Series([1]) result = s.item() assert result == 1 assert s.item() == s.iloc[0] # using an ndarray like function s = Series(np.random.randn(10)) result = Series(np.ones_like(s)) expected = Series(1, index=range(10), dtype="float64") tm.assert_series_equal(result, expected) # ravel s = Series(np.random.randn(10)) tm.assert_almost_equal(s.ravel(order="F"), s.values.ravel(order="F")) # compress # GH 6658 s = Series([0, 1.0, -1], index=list("abc")) with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): result = np.compress(s > 0, s) tm.assert_series_equal(result, Series([1.0], index=["b"])) with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): result = np.compress(s < -1, s) # result empty Index(dtype=object) as the same as original exp = Series([], dtype="float64", index=Index([], dtype="object")) tm.assert_series_equal(result, exp) s = Series([0, 1.0, -1], index=[0.1, 0.2, 0.3]) with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): result = np.compress(s > 0, s) tm.assert_series_equal(result, Series([1.0], index=[0.2])) with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): result = np.compress(s < -1, s) # result empty Float64Index as the same as original exp = Series([], dtype="float64", index=Index([], dtype="float64")) tm.assert_series_equal(result, exp) def test_str_accessor_updates_on_inplace(self): s = pd.Series(list("abc")) s.drop([0], inplace=True) assert len(s.str.lower()) == 2 def test_str_attribute(self): # GH9068 methods = ["strip", "rstrip", "lstrip"] s = Series([" jack", "jill ", " jesse ", "frank"]) for method in methods: expected = Series([getattr(str, method)(x) for x in s.values]) assert_series_equal(getattr(Series.str, method)(s.str), expected) # str accessor only valid with string values s = Series(range(5)) with pytest.raises(AttributeError, match="only use .str accessor"): s.str.repeat(2) def test_empty_method(self): s_empty = pd.Series() assert s_empty.empty for full_series in [pd.Series([1]), pd.Series(index=[1])]: assert not full_series.empty def test_tab_complete_warning(self, ip): # https://github.com/pandas-dev/pandas/issues/16409 pytest.importorskip("IPython", minversion="6.0.0") from IPython.core.completer import provisionalcompleter code = "import pandas as pd; s = pd.Series()" ip.run_code(code) with tm.assert_produces_warning(None): with provisionalcompleter("ignore"): list(ip.Completer.completions("s.", 1)) def test_integer_series_size(self): # GH 25580 s = Series(range(9)) assert s.size == 9 s = Series(range(9), dtype="Int64") assert s.size == 9 def test_get_values_deprecation(self): s = Series(range(9)) with tm.assert_produces_warning(FutureWarning): res = s.get_values() tm.assert_numpy_array_equal(res, s.values)
def setup_method(self, method): self.indices = dict(index=tm.makePeriodIndex(10), index_dec=period_range('20130101', periods=10, freq='D')[::-1]) self.setup_indices()
assert rng.inferred_freq == "-1A-JAN" def test_non_datetime_index2(): rng = DatetimeIndex(["1/31/2000", "1/31/2001", "1/31/2002"]) vals = rng.to_pydatetime() result = frequencies.infer_freq(vals) assert result == rng.inferred_freq @pytest.mark.parametrize( "idx", [tm.makeIntIndex(10), tm.makeFloatIndex(10), tm.makePeriodIndex(10)]) def test_invalid_index_types(idx): msg = ("(cannot infer freq from a non-convertible)|" "(Check the `freq` attribute instead of using infer_freq)") with pytest.raises(TypeError, match=msg): frequencies.infer_freq(idx) @pytest.mark.skipif(is_platform_windows(), reason="see gh-10822: Windows issue") @pytest.mark.parametrize( "idx", [tm.makeStringIndex(10), tm.makeUnicodeIndex(10)]) def test_invalid_index_types_unicode(idx): # see gh-10822
class TestPeriodIndex(DatetimeLike): _holder = PeriodIndex @pytest.fixture( params=[ tm.makePeriodIndex(10), period_range("20130101", periods=10, freq="D")[::-1], ], ids=["index_inc", "index_dec"], ) def indices(self, request): return request.param def create_index(self): return period_range("20130101", periods=5, freq="D") def test_pickle_compat_construction(self): pass @pytest.mark.parametrize("freq", ["D", "M", "A"]) def test_pickle_round_trip(self, freq): idx = PeriodIndex(["2016-05-16", "NaT", NaT, np.NaN], freq=freq) result = tm.round_trip_pickle(idx) tm.assert_index_equal(result, idx) def test_where(self): # This is handled in test_indexing pass @pytest.mark.parametrize("use_numpy", [True, False]) @pytest.mark.parametrize( "index", [ pd.period_range("2000-01-01", periods=3, freq="D"), pd.period_range("2001-01-01", periods=3, freq="2D"), pd.PeriodIndex(["2001-01", "NaT", "2003-01"], freq="M"), ], ) def test_repeat_freqstr(self, index, use_numpy): # GH10183 expected = PeriodIndex([p for p in index for _ in range(3)]) result = np.repeat(index, 3) if use_numpy else index.repeat(3) tm.assert_index_equal(result, expected) assert result.freqstr == index.freqstr def test_fillna_period(self): # GH 11343 idx = pd.PeriodIndex(["2011-01-01 09:00", pd.NaT, "2011-01-01 11:00"], freq="H") exp = pd.PeriodIndex( ["2011-01-01 09:00", "2011-01-01 10:00", "2011-01-01 11:00"], freq="H") tm.assert_index_equal( idx.fillna(pd.Period("2011-01-01 10:00", freq="H")), exp) exp = pd.Index( [ pd.Period("2011-01-01 09:00", freq="H"), "x", pd.Period("2011-01-01 11:00", freq="H"), ], dtype=object, ) tm.assert_index_equal(idx.fillna("x"), exp) exp = pd.Index( [ pd.Period("2011-01-01 09:00", freq="H"), pd.Period("2011-01-01", freq="D"), pd.Period("2011-01-01 11:00", freq="H"), ], dtype=object, ) tm.assert_index_equal(idx.fillna(pd.Period("2011-01-01", freq="D")), exp) def test_no_millisecond_field(self): msg = "type object 'DatetimeIndex' has no attribute 'millisecond'" with pytest.raises(AttributeError, match=msg): DatetimeIndex.millisecond msg = "'DatetimeIndex' object has no attribute 'millisecond'" with pytest.raises(AttributeError, match=msg): DatetimeIndex([]).millisecond @pytest.mark.parametrize("sort", [None, False]) def test_difference_freq(self, sort): # GH14323: difference of Period MUST preserve frequency # but the ability to union results must be preserved index = period_range("20160920", "20160925", freq="D") other = period_range("20160921", "20160924", freq="D") expected = PeriodIndex(["20160920", "20160925"], freq="D") idx_diff = index.difference(other, sort) tm.assert_index_equal(idx_diff, expected) tm.assert_attr_equal("freq", idx_diff, expected) other = period_range("20160922", "20160925", freq="D") idx_diff = index.difference(other, sort) expected = PeriodIndex(["20160920", "20160921"], freq="D") tm.assert_index_equal(idx_diff, expected) tm.assert_attr_equal("freq", idx_diff, expected) def test_hash_error(self): index = period_range("20010101", periods=10) msg = "unhashable type: '{}'".format(type(index).__name__) with pytest.raises(TypeError, match=msg): hash(index) def test_make_time_series(self): index = period_range(freq="A", start="1/1/2001", end="12/1/2009") series = Series(1, index=index) assert isinstance(series, Series) def test_shallow_copy_empty(self): # GH13067 idx = PeriodIndex([], freq="M") result = idx._shallow_copy() expected = idx tm.assert_index_equal(result, expected) def test_shallow_copy_i8(self): # GH-24391 pi = period_range("2018-01-01", periods=3, freq="2D") result = pi._shallow_copy(pi.asi8, freq=pi.freq) tm.assert_index_equal(result, pi) def test_shallow_copy_changing_freq_raises(self): pi = period_range("2018-01-01", periods=3, freq="2D") msg = "specified freq and dtype are different" with pytest.raises(IncompatibleFrequency, match=msg): pi._shallow_copy(pi, freq="H") def test_view_asi8(self): idx = pd.PeriodIndex([], freq="M") exp = np.array([], dtype=np.int64) tm.assert_numpy_array_equal(idx.view("i8"), exp) tm.assert_numpy_array_equal(idx.asi8, exp) idx = pd.PeriodIndex(["2011-01", pd.NaT], freq="M") exp = np.array([492, -9223372036854775808], dtype=np.int64) tm.assert_numpy_array_equal(idx.view("i8"), exp) tm.assert_numpy_array_equal(idx.asi8, exp) exp = np.array([14975, -9223372036854775808], dtype=np.int64) idx = pd.PeriodIndex(["2011-01-01", pd.NaT], freq="D") tm.assert_numpy_array_equal(idx.view("i8"), exp) tm.assert_numpy_array_equal(idx.asi8, exp) def test_values(self): idx = pd.PeriodIndex([], freq="M") exp = np.array([], dtype=np.object) tm.assert_numpy_array_equal(idx.values, exp) tm.assert_numpy_array_equal(idx.to_numpy(), exp) exp = np.array([], dtype=np.int64) tm.assert_numpy_array_equal(idx._ndarray_values, exp) idx = pd.PeriodIndex(["2011-01", pd.NaT], freq="M") exp = np.array([pd.Period("2011-01", freq="M"), pd.NaT], dtype=object) tm.assert_numpy_array_equal(idx.values, exp) tm.assert_numpy_array_equal(idx.to_numpy(), exp) exp = np.array([492, -9223372036854775808], dtype=np.int64) tm.assert_numpy_array_equal(idx._ndarray_values, exp) idx = pd.PeriodIndex(["2011-01-01", pd.NaT], freq="D") exp = np.array([pd.Period("2011-01-01", freq="D"), pd.NaT], dtype=object) tm.assert_numpy_array_equal(idx.values, exp) tm.assert_numpy_array_equal(idx.to_numpy(), exp) exp = np.array([14975, -9223372036854775808], dtype=np.int64) tm.assert_numpy_array_equal(idx._ndarray_values, exp) def test_period_index_length(self): pi = period_range(freq="A", start="1/1/2001", end="12/1/2009") assert len(pi) == 9 pi = period_range(freq="Q", start="1/1/2001", end="12/1/2009") assert len(pi) == 4 * 9 pi = period_range(freq="M", start="1/1/2001", end="12/1/2009") assert len(pi) == 12 * 9 start = Period("02-Apr-2005", "B") i1 = period_range(start=start, periods=20) assert len(i1) == 20 assert i1.freq == start.freq assert i1[0] == start end_intv = Period("2006-12-31", "W") i1 = period_range(end=end_intv, periods=10) assert len(i1) == 10 assert i1.freq == end_intv.freq assert i1[-1] == end_intv end_intv = Period("2006-12-31", "1w") i2 = period_range(end=end_intv, periods=10) assert len(i1) == len(i2) assert (i1 == i2).all() assert i1.freq == i2.freq end_intv = Period("2006-12-31", ("w", 1)) i2 = period_range(end=end_intv, periods=10) assert len(i1) == len(i2) assert (i1 == i2).all() assert i1.freq == i2.freq msg = "start and end must have same freq" with pytest.raises(ValueError, match=msg): period_range(start=start, end=end_intv) end_intv = Period("2005-05-01", "B") i1 = period_range(start=start, end=end_intv) msg = ("Of the three parameters: start, end, and periods, exactly two" " must be specified") with pytest.raises(ValueError, match=msg): period_range(start=start) # infer freq from first element i2 = PeriodIndex([end_intv, Period("2005-05-05", "B")]) assert len(i2) == 2 assert i2[0] == end_intv i2 = PeriodIndex(np.array([end_intv, Period("2005-05-05", "B")])) assert len(i2) == 2 assert i2[0] == end_intv # Mixed freq should fail vals = [end_intv, Period("2006-12-31", "w")] msg = r"Input has different freq=W-SUN from PeriodIndex\(freq=B\)" with pytest.raises(IncompatibleFrequency, match=msg): PeriodIndex(vals) vals = np.array(vals) with pytest.raises(ValueError, match=msg): PeriodIndex(vals) def test_fields(self): # year, month, day, hour, minute # second, weekofyear, week, dayofweek, weekday, dayofyear, quarter # qyear pi = period_range(freq="A", start="1/1/2001", end="12/1/2005") self._check_all_fields(pi) pi = period_range(freq="Q", start="1/1/2001", end="12/1/2002") self._check_all_fields(pi) pi = period_range(freq="M", start="1/1/2001", end="1/1/2002") self._check_all_fields(pi) pi = period_range(freq="D", start="12/1/2001", end="6/1/2001") self._check_all_fields(pi) pi = period_range(freq="B", start="12/1/2001", end="6/1/2001") self._check_all_fields(pi) pi = period_range(freq="H", start="12/31/2001", end="1/1/2002 23:00") self._check_all_fields(pi) pi = period_range(freq="Min", start="12/31/2001", end="1/1/2002 00:20") self._check_all_fields(pi) pi = period_range(freq="S", start="12/31/2001 00:00:00", end="12/31/2001 00:05:00") self._check_all_fields(pi) end_intv = Period("2006-12-31", "W") i1 = period_range(end=end_intv, periods=10) self._check_all_fields(i1) def _check_all_fields(self, periodindex): fields = [ "year", "month", "day", "hour", "minute", "second", "weekofyear", "week", "dayofweek", "dayofyear", "quarter", "qyear", "days_in_month", ] periods = list(periodindex) s = pd.Series(periodindex) for field in fields: field_idx = getattr(periodindex, field) assert len(periodindex) == len(field_idx) for x, val in zip(periods, field_idx): assert getattr(x, field) == val if len(s) == 0: continue field_s = getattr(s.dt, field) assert len(periodindex) == len(field_s) for x, val in zip(periods, field_s): assert getattr(x, field) == val def test_period_set_index_reindex(self): # GH 6631 df = DataFrame(np.random.random(6)) idx1 = period_range("2011/01/01", periods=6, freq="M") idx2 = period_range("2013", periods=6, freq="A") df = df.set_index(idx1) tm.assert_index_equal(df.index, idx1) df = df.set_index(idx2) tm.assert_index_equal(df.index, idx2) @pytest.mark.parametrize( "p_values, o_values, values, expected_values", [ ( [Period("2019Q1", "Q-DEC"), Period("2019Q2", "Q-DEC")], [Period("2019Q1", "Q-DEC"), Period("2019Q2", "Q-DEC"), "All"], [1.0, 1.0], [1.0, 1.0, np.nan], ), ( [Period("2019Q1", "Q-DEC"), Period("2019Q2", "Q-DEC")], [Period("2019Q1", "Q-DEC"), Period("2019Q2", "Q-DEC")], [1.0, 1.0], [1.0, 1.0], ), ], ) def test_period_reindex_with_object(self, p_values, o_values, values, expected_values): # GH 28337 period_index = PeriodIndex(p_values) object_index = Index(o_values) s = pd.Series(values, index=period_index) result = s.reindex(object_index) expected = pd.Series(expected_values, index=object_index) tm.assert_series_equal(result, expected) def test_factorize(self): idx1 = PeriodIndex( ["2014-01", "2014-01", "2014-02", "2014-02", "2014-03", "2014-03"], freq="M") exp_arr = np.array([0, 0, 1, 1, 2, 2], dtype=np.intp) exp_idx = PeriodIndex(["2014-01", "2014-02", "2014-03"], freq="M") arr, idx = idx1.factorize() tm.assert_numpy_array_equal(arr, exp_arr) tm.assert_index_equal(idx, exp_idx) arr, idx = idx1.factorize(sort=True) tm.assert_numpy_array_equal(arr, exp_arr) tm.assert_index_equal(idx, exp_idx) idx2 = pd.PeriodIndex( ["2014-03", "2014-03", "2014-02", "2014-01", "2014-03", "2014-01"], freq="M") exp_arr = np.array([2, 2, 1, 0, 2, 0], dtype=np.intp) arr, idx = idx2.factorize(sort=True) tm.assert_numpy_array_equal(arr, exp_arr) tm.assert_index_equal(idx, exp_idx) exp_arr = np.array([0, 0, 1, 2, 0, 2], dtype=np.intp) exp_idx = PeriodIndex(["2014-03", "2014-02", "2014-01"], freq="M") arr, idx = idx2.factorize() tm.assert_numpy_array_equal(arr, exp_arr) tm.assert_index_equal(idx, exp_idx) def test_is_(self): create_index = lambda: period_range( freq="A", start="1/1/2001", end="12/1/2009") index = create_index() assert index.is_(index) assert not index.is_(create_index()) assert index.is_(index.view()) assert index.is_(index.view().view().view().view().view()) assert index.view().is_(index) ind2 = index.view() index.name = "Apple" assert ind2.is_(index) assert not index.is_(index[:]) assert not index.is_(index.asfreq("M")) assert not index.is_(index.asfreq("A")) assert not index.is_(index - 2) assert not index.is_(index - 0) def test_contains(self): rng = period_range("2007-01", freq="M", periods=10) assert Period("2007-01", freq="M") in rng assert not Period("2007-01", freq="D") in rng assert not Period("2007-01", freq="2M") in rng def test_contains_nat(self): # see gh-13582 idx = period_range("2007-01", freq="M", periods=10) assert pd.NaT not in idx assert None not in idx assert float("nan") not in idx assert np.nan not in idx idx = pd.PeriodIndex(["2011-01", "NaT", "2011-02"], freq="M") assert pd.NaT in idx assert None in idx assert float("nan") in idx assert np.nan in idx def test_periods_number_check(self): msg = ("Of the three parameters: start, end, and periods, exactly two" " must be specified") with pytest.raises(ValueError, match=msg): period_range("2011-1-1", "2012-1-1", "B") def test_start_time(self): # GH 17157 index = period_range(freq="M", start="2016-01-01", end="2016-05-31") expected_index = date_range("2016-01-01", end="2016-05-31", freq="MS") tm.assert_index_equal(index.start_time, expected_index) def test_end_time(self): # GH 17157 index = period_range(freq="M", start="2016-01-01", end="2016-05-31") expected_index = date_range("2016-01-01", end="2016-05-31", freq="M") expected_index = expected_index.shift(1, freq="D").shift(-1, freq="ns") tm.assert_index_equal(index.end_time, expected_index) def test_index_duplicate_periods(self): # monotonic idx = PeriodIndex([2000, 2007, 2007, 2009, 2009], freq="A-JUN") ts = Series(np.random.randn(len(idx)), index=idx) result = ts[2007] expected = ts[1:3] tm.assert_series_equal(result, expected) result[:] = 1 assert (ts[1:3] == 1).all() # not monotonic idx = PeriodIndex([2000, 2007, 2007, 2009, 2007], freq="A-JUN") ts = Series(np.random.randn(len(idx)), index=idx) result = ts[2007] expected = ts[idx == 2007] tm.assert_series_equal(result, expected) def test_index_unique(self): idx = PeriodIndex([2000, 2007, 2007, 2009, 2009], freq="A-JUN") expected = PeriodIndex([2000, 2007, 2009], freq="A-JUN") tm.assert_index_equal(idx.unique(), expected) assert idx.nunique() == 3 idx = PeriodIndex([2000, 2007, 2007, 2009, 2007], freq="A-JUN", tz="US/Eastern") expected = PeriodIndex([2000, 2007, 2009], freq="A-JUN", tz="US/Eastern") tm.assert_index_equal(idx.unique(), expected) assert idx.nunique() == 3 def test_shift(self): # This is tested in test_arithmetic pass @td.skip_if_32bit def test_ndarray_compat_properties(self): super().test_ndarray_compat_properties() def test_negative_ordinals(self): Period(ordinal=-1000, freq="A") Period(ordinal=0, freq="A") idx1 = PeriodIndex(ordinal=[-1, 0, 1], freq="A") idx2 = PeriodIndex(ordinal=np.array([-1, 0, 1]), freq="A") tm.assert_index_equal(idx1, idx2) def test_pindex_fieldaccessor_nat(self): idx = PeriodIndex(["2011-01", "2011-02", "NaT", "2012-03", "2012-04"], freq="D", name="name") exp = Index([2011, 2011, -1, 2012, 2012], dtype=np.int64, name="name") tm.assert_index_equal(idx.year, exp) exp = Index([1, 2, -1, 3, 4], dtype=np.int64, name="name") tm.assert_index_equal(idx.month, exp) def test_pindex_qaccess(self): pi = PeriodIndex(["2Q05", "3Q05", "4Q05", "1Q06", "2Q06"], freq="Q") s = Series(np.random.rand(len(pi)), index=pi).cumsum() # Todo: fix these accessors! assert s["05Q4"] == s[2] def test_pindex_multiples(self): expected = PeriodIndex( ["2011-01", "2011-03", "2011-05", "2011-07", "2011-09", "2011-11"], freq="2M", ) pi = period_range(start="1/1/11", end="12/31/11", freq="2M") tm.assert_index_equal(pi, expected) assert pi.freq == offsets.MonthEnd(2) assert pi.freqstr == "2M" pi = period_range(start="1/1/11", periods=6, freq="2M") tm.assert_index_equal(pi, expected) assert pi.freq == offsets.MonthEnd(2) assert pi.freqstr == "2M" def test_iteration(self): index = period_range(start="1/1/10", periods=4, freq="B") result = list(index) assert isinstance(result[0], Period) assert result[0].freq == index.freq def test_is_full(self): index = PeriodIndex([2005, 2007, 2009], freq="A") assert not index.is_full index = PeriodIndex([2005, 2006, 2007], freq="A") assert index.is_full index = PeriodIndex([2005, 2005, 2007], freq="A") assert not index.is_full index = PeriodIndex([2005, 2005, 2006], freq="A") assert index.is_full index = PeriodIndex([2006, 2005, 2005], freq="A") with pytest.raises(ValueError, match="Index is not monotonic"): index.is_full assert index[:0].is_full def test_with_multi_index(self): # #1705 index = date_range("1/1/2012", periods=4, freq="12H") index_as_arrays = [index.to_period(freq="D"), index.hour] s = Series([0, 1, 2, 3], index_as_arrays) assert isinstance(s.index.levels[0], PeriodIndex) assert isinstance(s.index.values[0][0], Period) def test_convert_array_of_periods(self): rng = period_range("1/1/2000", periods=20, freq="D") periods = list(rng) result = pd.Index(periods) assert isinstance(result, PeriodIndex) def test_append_concat(self): # #1815 d1 = date_range("12/31/1990", "12/31/1999", freq="A-DEC") d2 = date_range("12/31/2000", "12/31/2009", freq="A-DEC") s1 = Series(np.random.randn(10), d1) s2 = Series(np.random.randn(10), d2) s1 = s1.to_period() s2 = s2.to_period() # drops index result = pd.concat([s1, s2]) assert isinstance(result.index, PeriodIndex) assert result.index[0] == s1.index[0] def test_pickle_freq(self): # GH2891 prng = period_range("1/1/2011", "1/1/2012", freq="M") new_prng = tm.round_trip_pickle(prng) assert new_prng.freq == offsets.MonthEnd() assert new_prng.freqstr == "M" def test_map(self): # test_map_dictlike generally tests index = PeriodIndex([2005, 2007, 2009], freq="A") result = index.map(lambda x: x.ordinal) exp = Index([x.ordinal for x in index]) tm.assert_index_equal(result, exp) def test_join_self(self, join_type): index = period_range("1/1/2000", periods=10) joined = index.join(index, how=join_type) assert index is joined def test_insert(self): # GH 18295 (test missing) expected = PeriodIndex( ["2017Q1", pd.NaT, "2017Q2", "2017Q3", "2017Q4"], freq="Q") for na in (np.nan, pd.NaT, None): result = period_range("2017Q1", periods=4, freq="Q").insert(1, na) tm.assert_index_equal(result, expected) @pytest.mark.parametrize( "msg, key", [ (r"Period\('2019', 'A-DEC'\), 'foo', 'bar'", (Period(2019), "foo", "bar")), (r"Period\('2019', 'A-DEC'\), 'y1', 'bar'", (Period(2019), "y1", "bar")), (r"Period\('2019', 'A-DEC'\), 'foo', 'z1'", (Period(2019), "foo", "z1")), ( r"Period\('2018', 'A-DEC'\), Period\('2016', 'A-DEC'\), 'bar'", (Period(2018), Period(2016), "bar"), ), (r"Period\('2018', 'A-DEC'\), 'foo', 'y1'", (Period(2018), "foo", "y1")), ( r"Period\('2017', 'A-DEC'\), 'foo', Period\('2015', 'A-DEC'\)", (Period(2017), "foo", Period(2015)), ), (r"Period\('2017', 'A-DEC'\), 'z1', 'bar'", (Period(2017), "z1", "bar")), ], ) def test_contains_raise_error_if_period_index_is_in_multi_index( self, msg, key): # issue 20684 """ parse_time_string return parameter if type not matched. PeriodIndex.get_loc takes returned value from parse_time_string as a tuple. If first argument is Period and a tuple has 3 items, process go on not raise exception """ df = DataFrame({ "A": [Period(2019), "x1", "x2"], "B": [Period(2018), Period(2016), "y1"], "C": [Period(2017), "z1", Period(2015)], "V1": [1, 2, 3], "V2": [10, 20, 30], }).set_index(["A", "B", "C"]) with pytest.raises(KeyError, match=msg): df.loc[key]
pd.Series(range(10),index=pd.date_range("2000",freq="D",periods=10)) start=pd.Timestamp("2018-01-06 00:00:00") pd.date_range(start, periods=10,freq="2h20min") rng=pd.date_range(start,end,freq="D") ts=pd.Series(np.random.randn(len(rng)),index=rng) ts.shift(2)[1] # dummy datasets with dates import pandas.util.testing as tm tm.N, tm.K = 5,3 tm.makeTimedeltaIndex(), tm.makeTimeSeries(), tm.makePeriodSeries() tm.makeDateIndex(), tm.makePeriodIndex(), tm.makeObjectSeries() from itertools import product datecols = ['year', 'month', 'day'] df = pd.DataFrame(list(product([2016,2017],[1,2],[1,2,3])),columns = datecols) df['data']=np.random.randn(len(df)) df.index = pd.to_datetime(df[datecols]) # Align 2 datetime series and interpolate if necessary from datetime import date, time import numpy as np from ..errors import MqValueError
import numpy as np import pytest from pandas.compat import long, lzip import pandas as pd from pandas.core.indexes.api import Index, MultiIndex import pandas.util.testing as tm @pytest.fixture(params=[tm.makeUnicodeIndex(100), tm.makeStringIndex(100), tm.makeDateIndex(100), tm.makePeriodIndex(100), tm.makeTimedeltaIndex(100), tm.makeIntIndex(100), tm.makeUIntIndex(100), tm.makeRangeIndex(100), tm.makeFloatIndex(100), Index([True, False]), tm.makeCategoricalIndex(100), Index([]), MultiIndex.from_tuples(lzip( ['foo', 'bar', 'baz'], [1, 2, 3])), Index([0, 0, 1, 1, 2, 2])], ids=lambda x: type(x).__name__) def indices(request): return request.param @pytest.fixture(params=[1, np.array(1, dtype=np.int64)])
def setUp(self): self.indices = dict(index=tm.makePeriodIndex(10)) self.setup_indices()
class TestHashing(object): @pytest.fixture(params=[ Series([1, 2, 3] * 3, dtype='int32'), Series([None, 2.5, 3.5] * 3, dtype='float32'), Series(['a', 'b', 'c'] * 3, dtype='category'), Series(['d', 'e', 'f'] * 3), Series([True, False, True] * 3), Series(pd.date_range('20130101', periods=9)), Series(pd.date_range('20130101', periods=9, tz='US/Eastern')), Series(pd.timedelta_range('2000', periods=9)) ]) def series(self, request): return request.param def test_consistency(self): # check that our hash doesn't change because of a mistake # in the actual code; this is the ground truth result = hash_pandas_object(Index(['foo', 'bar', 'baz'])) expected = Series(np.array( [3600424527151052760, 1374399572096150070, 477881037637427054], dtype='uint64'), index=['foo', 'bar', 'baz']) tm.assert_series_equal(result, expected) def test_hash_array(self, series): a = series.values tm.assert_numpy_array_equal(hash_array(a), hash_array(a)) def test_hash_array_mixed(self): result1 = hash_array(np.array([3, 4, 'All'])) result2 = hash_array(np.array(['3', '4', 'All'])) result3 = hash_array(np.array([3, 4, 'All'], dtype=object)) tm.assert_numpy_array_equal(result1, result2) tm.assert_numpy_array_equal(result1, result3) @pytest.mark.parametrize('val', [5, 'foo', pd.Timestamp('20130101')]) def test_hash_array_errors(self, val): msg = 'must pass a ndarray-like' with tm.assert_raises_regex(TypeError, msg): hash_array(val) def check_equal(self, obj, **kwargs): a = hash_pandas_object(obj, **kwargs) b = hash_pandas_object(obj, **kwargs) tm.assert_series_equal(a, b) kwargs.pop('index', None) a = hash_pandas_object(obj, **kwargs) b = hash_pandas_object(obj, **kwargs) tm.assert_series_equal(a, b) def check_not_equal_with_index(self, obj): # check that we are not hashing the same if # we include the index if not isinstance(obj, Index): a = hash_pandas_object(obj, index=True) b = hash_pandas_object(obj, index=False) if len(obj): assert not (a == b).all() def test_hash_tuples(self): tups = [(1, 'one'), (1, 'two'), (2, 'one')] result = hash_tuples(tups) expected = hash_pandas_object(MultiIndex.from_tuples(tups)).values tm.assert_numpy_array_equal(result, expected) result = hash_tuples(tups[0]) assert result == expected[0] @pytest.mark.parametrize('tup', [(1, 'one'), (1, np.nan), (1.0, pd.NaT, 'A'), ('A', pd.Timestamp("2012-01-01"))]) def test_hash_tuple(self, tup): # test equivalence between hash_tuples and hash_tuple result = hash_tuple(tup) expected = hash_tuples([tup])[0] assert result == expected @pytest.mark.parametrize('val', [ 1, 1.4, 'A', b'A', u'A', pd.Timestamp("2012-01-01"), pd.Timestamp("2012-01-01", tz='Europe/Brussels'), datetime.datetime(2012, 1, 1), pd.Timestamp("2012-01-01", tz='EST').to_pydatetime(), pd.Timedelta('1 days'), datetime.timedelta(1), pd.Period('2012-01-01', freq='D'), pd.Interval(0, 1), np.nan, pd.NaT, None ]) def test_hash_scalar(self, val): result = _hash_scalar(val) expected = hash_array(np.array([val], dtype=object), categorize=True) assert result[0] == expected[0] @pytest.mark.parametrize('val', [5, 'foo', pd.Timestamp('20130101')]) def test_hash_tuples_err(self, val): msg = 'must be convertible to a list-of-tuples' with tm.assert_raises_regex(TypeError, msg): hash_tuples(val) def test_multiindex_unique(self): mi = MultiIndex.from_tuples([(118, 472), (236, 118), (51, 204), (102, 51)]) assert mi.is_unique is True result = hash_pandas_object(mi) assert result.is_unique is True def test_multiindex_objects(self): mi = MultiIndex(levels=[['b', 'd', 'a'], [1, 2, 3]], labels=[[0, 1, 0, 2], [2, 0, 0, 1]], names=['col1', 'col2']) recons = mi._sort_levels_monotonic() # these are equal assert mi.equals(recons) assert Index(mi.values).equals(Index(recons.values)) # _hashed_values and hash_pandas_object(..., index=False) # equivalency expected = hash_pandas_object(mi, index=False).values result = mi._hashed_values tm.assert_numpy_array_equal(result, expected) expected = hash_pandas_object(recons, index=False).values result = recons._hashed_values tm.assert_numpy_array_equal(result, expected) expected = mi._hashed_values result = recons._hashed_values # values should match, but in different order tm.assert_numpy_array_equal(np.sort(result), np.sort(expected)) @pytest.mark.parametrize('obj', [ Series([1, 2, 3]), Series([1.0, 1.5, 3.2]), Series([1.0, 1.5, np.nan]), Series([1.0, 1.5, 3.2], index=[1.5, 1.1, 3.3]), Series(['a', 'b', 'c']), Series(['a', np.nan, 'c']), Series(['a', None, 'c']), Series([True, False, True]), Series(), Index([1, 2, 3]), Index([True, False, True]), DataFrame({ 'x': ['a', 'b', 'c'], 'y': [1, 2, 3] }), DataFrame(), tm.makeMissingDataframe(), tm.makeMixedDataFrame(), tm.makeTimeDataFrame(), tm.makeTimeSeries(), tm.makeTimedeltaIndex(), tm.makePeriodIndex(), Series(tm.makePeriodIndex()), Series(pd.date_range('20130101', periods=3, tz='US/Eastern')), MultiIndex.from_product([ range(5), ['foo', 'bar', 'baz'], pd.date_range('20130101', periods=2) ]), MultiIndex.from_product([pd.CategoricalIndex(list('aabc')), range(3)]) ]) def test_hash_pandas_object(self, obj): self.check_equal(obj) self.check_not_equal_with_index(obj) def test_hash_pandas_object2(self, series): self.check_equal(series) self.check_not_equal_with_index(series) @pytest.mark.parametrize( 'obj', [Series([], dtype='float64'), Series([], dtype='object'), Index([])]) def test_hash_pandas_empty_object(self, obj): # these are by-definition the same with # or w/o the index as the data is empty self.check_equal(obj) @pytest.mark.parametrize('s1', [ Series(['a', 'b', 'c', 'd']), Series([1000, 2000, 3000, 4000]), Series(pd.date_range(0, periods=4)) ]) @pytest.mark.parametrize('categorize', [True, False]) def test_categorical_consistency(self, s1, categorize): # GH15143 # Check that categoricals hash consistent with their values, not codes # This should work for categoricals of any dtype s2 = s1.astype('category').cat.set_categories(s1) s3 = s2.cat.set_categories(list(reversed(s1))) # These should all hash identically h1 = hash_pandas_object(s1, categorize=categorize) h2 = hash_pandas_object(s2, categorize=categorize) h3 = hash_pandas_object(s3, categorize=categorize) tm.assert_series_equal(h1, h2) tm.assert_series_equal(h1, h3) def test_categorical_with_nan_consistency(self): c = pd.Categorical.from_codes([-1, 0, 1, 2, 3, 4], categories=pd.date_range('2012-01-01', periods=5, name='B')) expected = hash_array(c, categorize=False) c = pd.Categorical.from_codes([-1, 0], categories=[pd.Timestamp('2012-01-01')]) result = hash_array(c, categorize=False) assert result[0] in expected assert result[1] in expected @pytest.mark.filterwarnings("ignore:\\nPanel:FutureWarning") def test_pandas_errors(self): with pytest.raises(TypeError): hash_pandas_object(pd.Timestamp('20130101')) obj = tm.makePanel() with pytest.raises(TypeError): hash_pandas_object(obj) def test_hash_keys(self): # using different hash keys, should have different hashes # for the same data # this only matters for object dtypes obj = Series(list('abc')) a = hash_pandas_object(obj, hash_key='9876543210123456') b = hash_pandas_object(obj, hash_key='9876543210123465') assert (a != b).all() def test_invalid_key(self): # this only matters for object dtypes msg = 'key should be a 16-byte string encoded' with tm.assert_raises_regex(ValueError, msg): hash_pandas_object(Series(list('abc')), hash_key='foo') def test_alread_encoded(self): # if already encoded then ok obj = Series(list('abc')).str.encode('utf8') self.check_equal(obj) def test_alternate_encoding(self): obj = Series(list('abc')) self.check_equal(obj, encoding='ascii') @pytest.mark.parametrize('l_exp', range(8)) @pytest.mark.parametrize('l_add', [0, 1]) def test_same_len_hash_collisions(self, l_exp, l_add): length = 2**(l_exp + 8) + l_add s = tm.rands_array(length, 2) result = hash_array(s, 'utf8') assert not result[0] == result[1] def test_hash_collisions(self): # hash collisions are bad # https://github.com/pandas-dev/pandas/issues/14711#issuecomment-264885726 L = [ 'Ingrid-9Z9fKIZmkO7i7Cn51Li34pJm44fgX6DYGBNj3VPlOH50m7HnBlPxfIwFMrcNJNMP6PSgLmwWnInciMWrCSAlLEvt7JkJl4IxiMrVbXSa8ZQoVaq5xoQPjltuJEfwdNlO6jo8qRRHvD8sBEBMQASrRa6TsdaPTPCBo3nwIBpE7YzzmyH0vMBhjQZLx1aCT7faSEx7PgFxQhHdKFWROcysamgy9iVj8DO2Fmwg1NNl93rIAqC3mdqfrCxrzfvIY8aJdzin2cHVzy3QUJxZgHvtUtOLxoqnUHsYbNTeq0xcLXpTZEZCxD4PGubIuCNf32c33M7HFsnjWSEjE2yVdWKhmSVodyF8hFYVmhYnMCztQnJrt3O8ZvVRXd5IKwlLexiSp4h888w7SzAIcKgc3g5XQJf6MlSMftDXm9lIsE1mJNiJEv6uY6pgvC3fUPhatlR5JPpVAHNSbSEE73MBzJrhCAbOLXQumyOXigZuPoME7QgJcBalliQol7YZ9', # noqa 'Tim-b9MddTxOWW2AT1Py6vtVbZwGAmYCjbp89p8mxsiFoVX4FyDOF3wFiAkyQTUgwg9sVqVYOZo09Dh1AzhFHbgij52ylF0SEwgzjzHH8TGY8Lypart4p4onnDoDvVMBa0kdthVGKl6K0BDVGzyOXPXKpmnMF1H6rJzqHJ0HywfwS4XYpVwlAkoeNsiicHkJUFdUAhG229INzvIAiJuAHeJDUoyO4DCBqtoZ5TDend6TK7Y914yHlfH3g1WZu5LksKv68VQHJriWFYusW5e6ZZ6dKaMjTwEGuRgdT66iU5nqWTHRH8WSzpXoCFwGcTOwyuqPSe0fTe21DVtJn1FKj9F9nEnR9xOvJUO7E0piCIF4Ad9yAIDY4DBimpsTfKXCu1vdHpKYerzbndfuFe5AhfMduLYZJi5iAw8qKSwR5h86ttXV0Mc0QmXz8dsRvDgxjXSmupPxBggdlqUlC828hXiTPD7am0yETBV0F3bEtvPiNJfremszcV8NcqAoARMe' ] # noqa # these should be different! result1 = hash_array(np.asarray(L[0:1], dtype=object), 'utf8') expected1 = np.array([14963968704024874985], dtype=np.uint64) tm.assert_numpy_array_equal(result1, expected1) result2 = hash_array(np.asarray(L[1:2], dtype=object), 'utf8') expected2 = np.array([16428432627716348016], dtype=np.uint64) tm.assert_numpy_array_equal(result2, expected2) result = hash_array(np.asarray(L, dtype=object), 'utf8') tm.assert_numpy_array_equal( result, np.concatenate([expected1, expected2], axis=0))
def setup_method(self, method): self.indices = dict(index=tm.makePeriodIndex(10)) self.setup_indices()
rng = DatetimeIndex(["1/31/2000", "1/31/2001", "1/31/2002"]) rng = rng[::-1] assert rng.inferred_freq == "-1A-JAN" def test_non_datetime_index2(): rng = DatetimeIndex(["1/31/2000", "1/31/2001", "1/31/2002"]) vals = rng.to_pydatetime() result = frequencies.infer_freq(vals) assert result == rng.inferred_freq @pytest.mark.parametrize("idx", [ tm.makeIntIndex(10), tm.makeFloatIndex(10), tm.makePeriodIndex(10) ]) def test_invalid_index_types(idx): msg = ("(cannot infer freq from a non-convertible)|" "(Check the `freq` attribute instead of using infer_freq)") with pytest.raises(TypeError, match=msg): frequencies.infer_freq(idx) @pytest.mark.skipif(is_platform_windows(), reason="see gh-10822: Windows issue") @pytest.mark.parametrize("idx", [tm.makeStringIndex(10), tm.makeUnicodeIndex(10)]) def test_invalid_index_types_unicode(idx): # see gh-10822
Series([1.0, 1.5, 3.2], index=[1.5, 1.1, 3.3]), Series(["a", "b", "c"]), Series(["a", np.nan, "c"]), Series(["a", None, "c"]), Series([True, False, True]), Series(), Index([1, 2, 3]), Index([True, False, True]), DataFrame({"x": ["a", "b", "c"], "y": [1, 2, 3]}), DataFrame(), tm.makeMissingDataframe(), tm.makeMixedDataFrame(), tm.makeTimeDataFrame(), tm.makeTimeSeries(), tm.makeTimedeltaIndex(), tm.makePeriodIndex(), Series(tm.makePeriodIndex()), Series(pd.date_range("20130101", periods=3, tz="US/Eastern")), MultiIndex.from_product([range(5), ["foo", "bar", "baz"], pd.date_range("20130101", periods=2)]), MultiIndex.from_product([pd.CategoricalIndex(list("aabc")), range(3)]) ]) def test_hash_pandas_object(obj, index): _check_equal(obj, index=index) _check_not_equal_with_index(obj) def test_hash_pandas_object2(series, index): _check_equal(series, index=index) _check_not_equal_with_index(series)
Series(["a", None, "c"]), Series([True, False, True]), Series(), Index([1, 2, 3]), Index([True, False, True]), DataFrame({ "x": ["a", "b", "c"], "y": [1, 2, 3] }), DataFrame(), tm.makeMissingDataframe(), tm.makeMixedDataFrame(), tm.makeTimeDataFrame(), tm.makeTimeSeries(), tm.makeTimedeltaIndex(), tm.makePeriodIndex(), Series(tm.makePeriodIndex()), Series(pd.date_range("20130101", periods=3, tz="US/Eastern")), MultiIndex.from_product([ range(5), ["foo", "bar", "baz"], pd.date_range("20130101", periods=2) ]), MultiIndex.from_product([pd.CategoricalIndex(list("aabc")), range(3)]), ], ) def test_hash_pandas_object(obj, index): _check_equal(obj, index=index) _check_not_equal_with_index(obj)
import numpy as np import pytest import pandas as pd from pandas.core.indexes.api import Index, MultiIndex import pandas.util.testing as tm indices_dict = { "unicode": tm.makeUnicodeIndex(100), "string": tm.makeStringIndex(100), "datetime": tm.makeDateIndex(100), "period": tm.makePeriodIndex(100), "timedelta": tm.makeTimedeltaIndex(100), "int": tm.makeIntIndex(100), "uint": tm.makeUIntIndex(100), "range": tm.makeRangeIndex(100), "float": tm.makeFloatIndex(100), "bool": Index([True, False]), "categorical": tm.makeCategoricalIndex(100), "interval": tm.makeIntervalIndex(100), "empty": Index([]), "tuples": MultiIndex.from_tuples(zip(["foo", "bar", "baz"], [1, 2, 3])), "repeats": Index([0, 0, 1, 1, 2, 2]), } @pytest.fixture(params=indices_dict.keys()) def indices(request): # copy to avoid mutation, e.g. setting .name return indices_dict[request.param].copy()
def setup_method(self, method): self.bool_index = tm.makeBoolIndex(10, name="a") self.int_index = tm.makeIntIndex(10, name="a") self.float_index = tm.makeFloatIndex(10, name="a") self.dt_index = tm.makeDateIndex(10, name="a") self.dt_tz_index = tm.makeDateIndex( 10, name="a").tz_localize(tz="US/Eastern") self.period_index = tm.makePeriodIndex(10, name="a") self.string_index = tm.makeStringIndex(10, name="a") self.unicode_index = tm.makeUnicodeIndex(10, name="a") arr = np.random.randn(10) self.bool_series = Series(arr, index=self.bool_index, name="a") self.int_series = Series(arr, index=self.int_index, name="a") self.float_series = Series(arr, index=self.float_index, name="a") self.dt_series = Series(arr, index=self.dt_index, name="a") self.dt_tz_series = self.dt_tz_index.to_series() self.period_series = Series(arr, index=self.period_index, name="a") self.string_series = Series(arr, index=self.string_index, name="a") self.unicode_series = Series(arr, index=self.unicode_index, name="a") types = [ "bool", "int", "float", "dt", "dt_tz", "period", "string", "unicode" ] self.indexes = [getattr(self, "{}_index".format(t)) for t in types] self.series = [getattr(self, "{}_series".format(t)) for t in types] # To test narrow dtypes, we use narrower *data* elements, not *index* elements index = self.int_index self.float32_series = Series(arr.astype(np.float32), index=index, name="a") arr_int = np.random.choice(10, size=10, replace=False) self.int8_series = Series(arr_int.astype(np.int8), index=index, name="a") self.int16_series = Series(arr_int.astype(np.int16), index=index, name="a") self.int32_series = Series(arr_int.astype(np.int32), index=index, name="a") self.uint8_series = Series(arr_int.astype(np.uint8), index=index, name="a") self.uint16_series = Series(arr_int.astype(np.uint16), index=index, name="a") self.uint32_series = Series(arr_int.astype(np.uint32), index=index, name="a") nrw_types = [ "float32", "int8", "int16", "int32", "uint8", "uint16", "uint32" ] self.narrow_series = [ getattr(self, "{}_series".format(t)) for t in nrw_types ] self.objs = self.indexes + self.series + self.narrow_series