Пример #1
0
    def test_hash_pandas_object(self):

        for obj in [Series([1, 2, 3]),
                    Series([1.0, 1.5, 3.2]),
                    Series([1.0, 1.5, np.nan]),
                    Series([1.0, 1.5, 3.2], index=[1.5, 1.1, 3.3]),
                    Series(['a', 'b', 'c']),
                    Series(['a', np.nan, 'c']),
                    Series(['a', None, 'c']),
                    Series([True, False, True]),
                    Series(),
                    Index([1, 2, 3]),
                    Index([True, False, True]),
                    DataFrame({'x': ['a', 'b', 'c'], 'y': [1, 2, 3]}),
                    DataFrame(),
                    tm.makeMissingDataframe(),
                    tm.makeMixedDataFrame(),
                    tm.makeTimeDataFrame(),
                    tm.makeTimeSeries(),
                    tm.makeTimedeltaIndex(),
                    tm.makePeriodIndex(),
                    Series(tm.makePeriodIndex()),
                    Series(pd.date_range('20130101',
                                         periods=3, tz='US/Eastern')),
                    MultiIndex.from_product(
                        [range(5),
                         ['foo', 'bar', 'baz'],
                         pd.date_range('20130101', periods=2)]),
                    MultiIndex.from_product(
                        [pd.CategoricalIndex(list('aabc')),
                         range(3)])]:
            self.check_equal(obj)
            self.check_not_equal_with_index(obj)
Пример #2
0
    def setup_method(self, method):
        self.bool_index = tm.makeBoolIndex(10, name='a')
        self.int_index = tm.makeIntIndex(10, name='a')
        self.float_index = tm.makeFloatIndex(10, name='a')
        self.dt_index = tm.makeDateIndex(10, name='a')
        self.dt_tz_index = tm.makeDateIndex(
            10, name='a').tz_localize(tz='US/Eastern')
        self.period_index = tm.makePeriodIndex(10, name='a')
        self.string_index = tm.makeStringIndex(10, name='a')
        self.unicode_index = tm.makeUnicodeIndex(10, name='a')

        arr = np.random.randn(10)
        self.bool_series = Series(arr, index=self.bool_index, name='a')
        self.int_series = Series(arr, index=self.int_index, name='a')
        self.float_series = Series(arr, index=self.float_index, name='a')
        self.dt_series = Series(arr, index=self.dt_index, name='a')
        self.dt_tz_series = self.dt_tz_index.to_series(keep_tz=True)
        self.period_series = Series(arr, index=self.period_index, name='a')
        self.string_series = Series(arr, index=self.string_index, name='a')
        self.unicode_series = Series(arr, index=self.unicode_index, name='a')

        types = [
            'bool', 'int', 'float', 'dt', 'dt_tz', 'period', 'string',
            'unicode'
        ]
        self.indexes = [getattr(self, '{}_index'.format(t)) for t in types]
        self.series = [getattr(self, '{}_series'.format(t)) for t in types]
        self.objs = self.indexes + self.series
Пример #3
0
    def setup_method(self, method):
        self.bool_index = tm.makeBoolIndex(10, name='a')
        self.int_index = tm.makeIntIndex(10, name='a')
        self.float_index = tm.makeFloatIndex(10, name='a')
        self.dt_index = tm.makeDateIndex(10, name='a')
        self.dt_tz_index = tm.makeDateIndex(10, name='a').tz_localize(
            tz='US/Eastern')
        self.period_index = tm.makePeriodIndex(10, name='a')
        self.string_index = tm.makeStringIndex(10, name='a')
        self.unicode_index = tm.makeUnicodeIndex(10, name='a')

        arr = np.random.randn(10)
        self.bool_series = Series(arr, index=self.bool_index, name='a')
        self.int_series = Series(arr, index=self.int_index, name='a')
        self.float_series = Series(arr, index=self.float_index, name='a')
        self.dt_series = Series(arr, index=self.dt_index, name='a')
        self.dt_tz_series = self.dt_tz_index.to_series(keep_tz=True)
        self.period_series = Series(arr, index=self.period_index, name='a')
        self.string_series = Series(arr, index=self.string_index, name='a')
        self.unicode_series = Series(arr, index=self.unicode_index, name='a')

        types = ['bool', 'int', 'float', 'dt', 'dt_tz', 'period', 'string',
                 'unicode']
        self.indexes = [getattr(self, '{}_index'.format(t)) for t in types]
        self.series = [getattr(self, '{}_series'.format(t)) for t in types]
        self.objs = self.indexes + self.series
Пример #4
0
    def setUp(self):
        self.bool_index = tm.makeBoolIndex(10, name='a')
        self.int_index = tm.makeIntIndex(10, name='a')
        self.float_index = tm.makeFloatIndex(10, name='a')
        self.dt_index = tm.makeDateIndex(10, name='a')
        self.dt_tz_index = tm.makeDateIndex(10, name='a').tz_localize(
            tz='US/Eastern')
        self.period_index = tm.makePeriodIndex(10, name='a')
        self.string_index = tm.makeStringIndex(10, name='a')
        self.unicode_index = tm.makeUnicodeIndex(10, name='a')

        arr = np.random.randn(10)
        self.int_series = Series(arr, index=self.int_index, name='a')
        self.float_series = Series(arr, index=self.float_index, name='a')
        self.dt_series = Series(arr, index=self.dt_index, name='a')
        self.dt_tz_series = self.dt_tz_index.to_series(keep_tz=True)
        self.period_series = Series(arr, index=self.period_index, name='a')
        self.string_series = Series(arr, index=self.string_index, name='a')

        types = ['bool', 'int', 'float', 'dt', 'dt_tz', 'period', 'string',
                 'unicode']
        fmts = ["{0}_{1}".format(t, f)
                for t in types for f in ['index', 'series']]
        self.objs = [getattr(self, f)
                     for f in fmts if getattr(self, f, None) is not None]
Пример #5
0
    def test_invalid_index_types(self):

        # test all index types
        for i in [tm.makeIntIndex(10), tm.makeFloatIndex(10), tm.makePeriodIndex(10)]:
            self.assertRaises(TypeError, lambda: infer_freq(i))

        for i in [tm.makeStringIndex(10), tm.makeUnicodeIndex(10)]:
            self.assertRaises(ValueError, lambda: infer_freq(i))
Пример #6
0
    def test_invalid_index_types(self):

        # test all index types
        for i in [tm.makeIntIndex(10), tm.makeFloatIndex(10), tm.makePeriodIndex(10)]:
            self.assertRaises(TypeError, lambda: frequencies.infer_freq(i))

        # GH 10822
        # odd error message on conversions to datetime for unicode
        if not is_platform_windows():
            for i in [tm.makeStringIndex(10), tm.makeUnicodeIndex(10)]:
                self.assertRaises(ValueError, lambda: frequencies.infer_freq(i))
Пример #7
0
    def test_invalid_index_types(self):

        # test all index types
        for i in [ tm.makeIntIndex(10),
                   tm.makeFloatIndex(10),
                   tm.makePeriodIndex(10) ]:
            self.assertRaises(TypeError, lambda : infer_freq(i))

        for i in [ tm.makeStringIndex(10),
                   tm.makeUnicodeIndex(10) ]:
            self.assertRaises(ValueError, lambda : infer_freq(i))
Пример #8
0
    def test_invalid_index_types(self):

        # test all index types
        for i in [tm.makeIntIndex(10), tm.makeFloatIndex(10),
                  tm.makePeriodIndex(10)]:
            pytest.raises(TypeError, lambda: frequencies.infer_freq(i))

        # GH 10822
        # odd error message on conversions to datetime for unicode
        if not is_platform_windows():
            for i in [tm.makeStringIndex(10), tm.makeUnicodeIndex(10)]:
                pytest.raises(ValueError, lambda: frequencies.infer_freq(i))
Пример #9
0
    def test_constructor_dict(self):
        d = {'a': 0., 'b': 1., 'c': 2.}
        result = Series(d, index=['b', 'c', 'd', 'a'])
        expected = Series([1, 2, nan, 0], index=['b', 'c', 'd', 'a'])
        assert_series_equal(result, expected)

        pidx = tm.makePeriodIndex(100)
        d = {pidx[0]: 0, pidx[1]: 1}
        result = Series(d, index=pidx)
        expected = Series(np.nan, pidx)
        expected.iloc[0] = 0
        expected.iloc[1] = 1
        assert_series_equal(result, expected)
Пример #10
0
    def test_constructor_dict(self):
        d = {'a': 0., 'b': 1., 'c': 2.}
        result = Series(d, index=['b', 'c', 'd', 'a'])
        expected = Series([1, 2, nan, 0], index=['b', 'c', 'd', 'a'])
        assert_series_equal(result, expected)

        pidx = tm.makePeriodIndex(100)
        d = {pidx[0]: 0, pidx[1]: 1}
        result = Series(d, index=pidx)
        expected = Series(np.nan, pidx)
        expected.ix[0] = 0
        expected.ix[1] = 1
        assert_series_equal(result, expected)
Пример #11
0
    def test_constructor_dict(self):
        d = {"a": 0.0, "b": 1.0, "c": 2.0}
        result = Series(d, index=["b", "c", "d", "a"])
        expected = Series([1, 2, np.nan, 0], index=["b", "c", "d", "a"])
        tm.assert_series_equal(result, expected)

        pidx = tm.makePeriodIndex(100)
        d = {pidx[0]: 0, pidx[1]: 1}
        result = Series(d, index=pidx)
        expected = Series(np.nan, pidx)
        expected.iloc[0] = 0
        expected.iloc[1] = 1
        tm.assert_series_equal(result, expected)
    def test_hash_pandas_object(self):

        for obj in [
                Series([1, 2, 3]),
                Series([1.0, 1.5, 3.2]),
                Series([1.0, 1.5, np.nan]),
                Series([1.0, 1.5, 3.2], index=[1.5, 1.1, 3.3]),
                Series(['a', 'b', 'c']),
                Series(['a', np.nan, 'c']),
                Series(['a', None, 'c']),
                Series([True, False, True]),
                Series(),
                Index([1, 2, 3]),
                Index([True, False, True]),
                DataFrame({
                    'x': ['a', 'b', 'c'],
                    'y': [1, 2, 3]
                }),
                DataFrame(),
                tm.makeMissingDataframe(),
                tm.makeMixedDataFrame(),
                tm.makeTimeDataFrame(),
                tm.makeTimeSeries(),
                tm.makeTimedeltaIndex(),
                tm.makePeriodIndex(),
                Series(tm.makePeriodIndex()),
                Series(pd.date_range('20130101', periods=3, tz='US/Eastern')),
                MultiIndex.from_product([
                    range(5), ['foo', 'bar', 'baz'],
                    pd.date_range('20130101', periods=2)
                ]),
                MultiIndex.from_product(
                    [pd.CategoricalIndex(list('aabc')),
                     range(3)])
        ]:
            self.check_equal(obj)
            self.check_not_equal_with_index(obj)
Пример #13
0
    def setUp(self):
        self.int_index     = tm.makeIntIndex(10)
        self.float_index   = tm.makeFloatIndex(10)
        self.dt_index      = tm.makeDateIndex(10)
        self.period_index  = tm.makePeriodIndex(10)
        self.string_index  = tm.makeStringIndex(10)

        arr = np.random.randn(10)
        self.int_series    = Series(arr, index=self.int_index)
        self.float_series  = Series(arr, index=self.int_index)
        self.dt_series     = Series(arr, index=self.dt_index)
        self.period_series = Series(arr, index=self.period_index)
        self.string_series = Series(arr, index=self.string_index)

        self.objs = [ getattr(self,"{0}_{1}".format(t,f)) for t in ['int','float','dt','period','string'] for f in ['index','series'] ]
Пример #14
0
def get_objs():
    indexes = [
        tm.makeBoolIndex(10, name="a"),
        tm.makeIntIndex(10, name="a"),
        tm.makeFloatIndex(10, name="a"),
        tm.makeDateIndex(10, name="a"),
        tm.makeDateIndex(10, name="a").tz_localize(tz="US/Eastern"),
        tm.makePeriodIndex(10, name="a"),
        tm.makeStringIndex(10, name="a"),
        tm.makeUnicodeIndex(10, name="a"),
    ]

    arr = np.random.randn(10)
    series = [Series(arr, index=idx, name="a") for idx in indexes]

    objs = indexes + series
    return objs
Пример #15
0
def get_objs():
    indexes = [
        tm.makeBoolIndex(10, name='a'),
        tm.makeIntIndex(10, name='a'),
        tm.makeFloatIndex(10, name='a'),
        tm.makeDateIndex(10, name='a'),
        tm.makeDateIndex(10, name='a').tz_localize(tz='US/Eastern'),
        tm.makePeriodIndex(10, name='a'),
        tm.makeStringIndex(10, name='a'),
        tm.makeUnicodeIndex(10, name='a')
    ]

    arr = np.random.randn(10)
    series = [Series(arr, index=idx, name='a') for idx in indexes]

    objs = indexes + series
    return objs
Пример #16
0
    def test_tseries_indices_series(self):
        idx = tm.makeDateIndex(10)
        ser = Series(np.random.randn(len(idx)), idx)
        self.store['a'] = ser
        result = self.store['a']

        assert_series_equal(result, ser)
        self.assertEquals(type(result.index), type(ser.index))
        self.assertEquals(result.index.freq, ser.index.freq)

        idx = tm.makePeriodIndex(10)
        ser = Series(np.random.randn(len(idx)), idx)
        self.store['a'] = ser
        result = self.store['a']

        assert_series_equal(result, ser)
        self.assertEquals(type(result.index), type(ser.index))
        self.assertEquals(result.index.freq, ser.index.freq)
Пример #17
0
    def setUp(self):
        self.int_index     = tm.makeIntIndex(10)
        self.float_index   = tm.makeFloatIndex(10)
        self.dt_index      = tm.makeDateIndex(10)
        self.dt_tz_index   = tm.makeDateIndex(10).tz_localize(tz='US/Eastern')
        self.period_index  = tm.makePeriodIndex(10)
        self.string_index  = tm.makeStringIndex(10)

        arr = np.random.randn(10)
        self.int_series    = Series(arr, index=self.int_index)
        self.float_series  = Series(arr, index=self.int_index)
        self.dt_series     = Series(arr, index=self.dt_index)
        self.dt_tz_series  = self.dt_tz_index.to_series(keep_tz=True)
        self.period_series = Series(arr, index=self.period_index)
        self.string_series = Series(arr, index=self.string_index)

        types = ['int','float','dt', 'dt_tz', 'period','string']
        self.objs = [ getattr(self,"{0}_{1}".format(t,f)) for t in types for f in ['index','series'] ]
Пример #18
0
    def test_tseries_indices_series(self):
        idx = tm.makeDateIndex(10)
        ser = Series(np.random.randn(len(idx)), idx)
        self.store['a'] = ser
        result = self.store['a']

        assert_series_equal(result, ser)
        self.assertEquals(type(result.index), type(ser.index))
        self.assertEquals(result.index.freq, ser.index.freq)

        idx = tm.makePeriodIndex(10)
        ser = Series(np.random.randn(len(idx)), idx)
        self.store['a'] = ser
        result = self.store['a']

        assert_series_equal(result, ser)
        self.assertEquals(type(result.index), type(ser.index))
        self.assertEquals(result.index.freq, ser.index.freq)
Пример #19
0
    def test_tseries_indices_frame(self):
        idx = tm.makeDateIndex(10)
        df = DataFrame(np.random.randn(len(idx), 3), index=idx)
        self.store['a'] = df
        result = self.store['a']

        assert_frame_equal(result, df)
        self.assertEquals(type(result.index), type(df.index))
        self.assertEquals(result.index.freq, df.index.freq)

        idx = tm.makePeriodIndex(10)
        df = DataFrame(np.random.randn(len(idx), 3), idx)
        self.store['a'] = df
        result = self.store['a']

        assert_frame_equal(result, df)
        self.assertEquals(type(result.index), type(df.index))
        self.assertEquals(result.index.freq, df.index.freq)
Пример #20
0
    def setUp(self):
        self.int_index = tm.makeIntIndex(10)
        self.float_index = tm.makeFloatIndex(10)
        self.dt_index = tm.makeDateIndex(10)
        self.dt_tz_index = tm.makeDateIndex(10).tz_localize(tz="US/Eastern")
        self.period_index = tm.makePeriodIndex(10)
        self.string_index = tm.makeStringIndex(10)

        arr = np.random.randn(10)
        self.int_series = Series(arr, index=self.int_index)
        self.float_series = Series(arr, index=self.int_index)
        self.dt_series = Series(arr, index=self.dt_index)
        self.dt_tz_series = self.dt_tz_index.to_series(keep_tz=True)
        self.period_series = Series(arr, index=self.period_index)
        self.string_series = Series(arr, index=self.string_index)

        types = ["int", "float", "dt", "dt_tz", "period", "string"]
        self.objs = [getattr(self, "{0}_{1}".format(t, f)) for t in types for f in ["index", "series"]]
Пример #21
0
    def test_tseries_indices_frame(self):
        idx = tm.makeDateIndex(10)
        df = DataFrame(np.random.randn(len(idx), 3), index=idx)
        self.store['a'] = df
        result = self.store['a']

        assert_frame_equal(result, df)
        self.assertEquals(type(result.index), type(df.index))
        self.assertEquals(result.index.freq, df.index.freq)

        idx = tm.makePeriodIndex(10)
        df = DataFrame(np.random.randn(len(idx), 3), idx)
        self.store['a'] = df
        result = self.store['a']

        assert_frame_equal(result, df)
        self.assertEquals(type(result.index), type(df.index))
        self.assertEquals(result.index.freq, df.index.freq)
Пример #22
0
    def setUp(self):
        self.int_index = tm.makeIntIndex(10)
        self.float_index = tm.makeFloatIndex(10)
        self.dt_index = tm.makeDateIndex(10)
        self.period_index = tm.makePeriodIndex(10)
        self.string_index = tm.makeStringIndex(10)

        arr = np.random.randn(10)
        self.int_series = Series(arr, index=self.int_index)
        self.float_series = Series(arr, index=self.int_index)
        self.dt_series = Series(arr, index=self.dt_index)
        self.period_series = Series(arr, index=self.period_index)
        self.string_series = Series(arr, index=self.string_index)

        self.objs = [
            getattr(self, "{0}_{1}".format(t, f))
            for t in ['int', 'float', 'dt', 'period', 'string']
            for f in ['index', 'series']
        ]
Пример #23
0
    def setUp(self):
        self.bool_index = tm.makeBoolIndex(10, name="a")
        self.int_index = tm.makeIntIndex(10, name="a")
        self.float_index = tm.makeFloatIndex(10, name="a")
        self.dt_index = tm.makeDateIndex(10, name="a")
        self.dt_tz_index = tm.makeDateIndex(10, name="a").tz_localize(tz="US/Eastern")
        self.period_index = tm.makePeriodIndex(10, name="a")
        self.string_index = tm.makeStringIndex(10, name="a")
        self.unicode_index = tm.makeUnicodeIndex(10, name="a")

        arr = np.random.randn(10)
        self.int_series = Series(arr, index=self.int_index, name="a")
        self.float_series = Series(arr, index=self.float_index, name="a")
        self.dt_series = Series(arr, index=self.dt_index, name="a")
        self.dt_tz_series = self.dt_tz_index.to_series(keep_tz=True)
        self.period_series = Series(arr, index=self.period_index, name="a")
        self.string_series = Series(arr, index=self.string_index, name="a")

        types = ["bool", "int", "float", "dt", "dt_tz", "period", "string", "unicode"]
        fmts = ["{0}_{1}".format(t, f) for t in types for f in ["index", "series"]]
        self.objs = [getattr(self, f) for f in fmts if getattr(self, f, None) is not None]
Пример #24
0
    def setUp(self):
        self.int_index = tm.makeIntIndex(10)
        self.float_index = tm.makeFloatIndex(10)
        self.dt_index = tm.makeDateIndex(10)
        self.dt_tz_index = tm.makeDateIndex(10).tz_localize(tz='US/Eastern')
        self.period_index = tm.makePeriodIndex(10)
        self.string_index = tm.makeStringIndex(10)

        arr = np.random.randn(10)
        self.int_series = Series(arr, index=self.int_index)
        self.float_series = Series(arr, index=self.int_index)
        self.dt_series = Series(arr, index=self.dt_index)
        self.dt_tz_series = self.dt_tz_index.to_series(keep_tz=True)
        self.period_series = Series(arr, index=self.period_index)
        self.string_series = Series(arr, index=self.string_index)

        types = ['int', 'float', 'dt', 'dt_tz', 'period', 'string']
        self.objs = [
            getattr(self, "{0}_{1}".format(t, f)) for t in types
            for f in ['index', 'series']
        ]
Пример #25
0
class TestSeriesMisc(TestData, SharedWithSparse):

    series_klass = Series
    # SharedWithSparse tests use generic, series_klass-agnostic assertion
    _assert_series_equal = staticmethod(tm.assert_series_equal)

    def test_tab_completion(self):
        # GH 9910
        s = Series(list('abcd'))
        # Series of str values should have .str but not .dt/.cat in __dir__
        assert 'str' in dir(s)
        assert 'dt' not in dir(s)
        assert 'cat' not in dir(s)

        # similarly for .dt
        s = Series(date_range('1/1/2015', periods=5))
        assert 'dt' in dir(s)
        assert 'str' not in dir(s)
        assert 'cat' not in dir(s)

        # Similarly for .cat, but with the twist that str and dt should be
        # there if the categories are of that type first cat and str.
        s = Series(list('abbcd'), dtype="category")
        assert 'cat' in dir(s)
        assert 'str' in dir(s)  # as it is a string categorical
        assert 'dt' not in dir(s)

        # similar to cat and str
        s = Series(date_range('1/1/2015', periods=5)).astype("category")
        assert 'cat' in dir(s)
        assert 'str' not in dir(s)
        assert 'dt' in dir(s)  # as it is a datetime categorical

    def test_tab_completion_with_categorical(self):
        # test the tab completion display
        ok_for_cat = [
            'categories', 'codes', 'ordered', 'set_categories',
            'add_categories', 'remove_categories', 'rename_categories',
            'reorder_categories', 'remove_unused_categories', 'as_ordered',
            'as_unordered'
        ]

        def get_dir(s):
            results = [r for r in s.cat.__dir__() if not r.startswith('_')]
            return list(sorted(set(results)))

        s = Series(list('aabbcde')).astype('category')
        results = get_dir(s)
        tm.assert_almost_equal(results, list(sorted(set(ok_for_cat))))

    @pytest.mark.parametrize("index", [
        tm.makeUnicodeIndex(10),
        tm.makeStringIndex(10),
        tm.makeCategoricalIndex(10),
        Index(['foo', 'bar', 'baz'] * 2),
        tm.makeDateIndex(10),
        tm.makePeriodIndex(10),
        tm.makeTimedeltaIndex(10),
        tm.makeIntIndex(10),
        tm.makeUIntIndex(10),
        tm.makeIntIndex(10),
        tm.makeFloatIndex(10),
        Index([True, False]),
        Index(['a{}'.format(i) for i in range(101)]),
        pd.MultiIndex.from_tuples(lzip('ABCD', 'EFGH')),
        pd.MultiIndex.from_tuples(lzip([0, 1, 2, 3], 'EFGH')),
    ])
    def test_index_tab_completion(self, index):
        # dir contains string-like values of the Index.
        s = pd.Series(index=index)
        dir_s = dir(s)
        for i, x in enumerate(s.index.unique(level=0)):
            if i < 100:
                assert (not isinstance(x, string_types) or not isidentifier(x)
                        or x in dir_s)
            else:
                assert x not in dir_s

    def test_not_hashable(self):
        s_empty = Series()
        s = Series([1])
        pytest.raises(TypeError, hash, s_empty)
        pytest.raises(TypeError, hash, s)

    def test_contains(self):
        tm.assert_contains_all(self.ts.index, self.ts)

    def test_iter(self):
        for i, val in enumerate(self.series):
            assert val == self.series[i]

        for i, val in enumerate(self.ts):
            assert val == self.ts[i]

    def test_keys(self):
        # HACK: By doing this in two stages, we avoid 2to3 wrapping the call
        # to .keys() in a list()
        getkeys = self.ts.keys
        assert getkeys() is self.ts.index

    def test_values(self):
        tm.assert_almost_equal(self.ts.values, self.ts, check_dtype=False)

    def test_iteritems(self):
        for idx, val in compat.iteritems(self.series):
            assert val == self.series[idx]

        for idx, val in compat.iteritems(self.ts):
            assert val == self.ts[idx]

        # assert is lazy (genrators don't define reverse, lists do)
        assert not hasattr(self.series.iteritems(), 'reverse')

    def test_items(self):
        for idx, val in self.series.items():
            assert val == self.series[idx]

        for idx, val in self.ts.items():
            assert val == self.ts[idx]

        # assert is lazy (genrators don't define reverse, lists do)
        assert not hasattr(self.series.items(), 'reverse')

    def test_raise_on_info(self):
        s = Series(np.random.randn(10))
        with pytest.raises(AttributeError):
            s.info()

    def test_copy(self):

        for deep in [None, False, True]:
            s = Series(np.arange(10), dtype='float64')

            # default deep is True
            if deep is None:
                s2 = s.copy()
            else:
                s2 = s.copy(deep=deep)

            s2[::2] = np.NaN

            if deep is None or deep is True:
                # Did not modify original Series
                assert np.isnan(s2[0])
                assert not np.isnan(s[0])
            else:
                # we DID modify the original Series
                assert np.isnan(s2[0])
                assert np.isnan(s[0])

        # GH 11794
        # copy of tz-aware
        expected = Series([Timestamp('2012/01/01', tz='UTC')])
        expected2 = Series([Timestamp('1999/01/01', tz='UTC')])

        for deep in [None, False, True]:

            s = Series([Timestamp('2012/01/01', tz='UTC')])

            if deep is None:
                s2 = s.copy()
            else:
                s2 = s.copy(deep=deep)

            s2[0] = pd.Timestamp('1999/01/01', tz='UTC')

            # default deep is True
            if deep is None or deep is True:
                # Did not modify original Series
                assert_series_equal(s2, expected2)
                assert_series_equal(s, expected)
            else:
                # we DID modify the original Series
                assert_series_equal(s2, expected2)
                assert_series_equal(s, expected2)

    def test_axis_alias(self):
        s = Series([1, 2, np.nan])
        assert_series_equal(s.dropna(axis='rows'), s.dropna(axis='index'))
        assert s.dropna().sum('rows') == 3
        assert s._get_axis_number('rows') == 0
        assert s._get_axis_name('rows') == 'index'

    def test_class_axis(self):
        # https://github.com/pandas-dev/pandas/issues/18147
        # no exception and no empty docstring
        assert pydoc.getdoc(Series.index)

    def test_numpy_unique(self):
        # it works!
        np.unique(self.ts)

    def test_ndarray_compat(self):

        # test numpy compat with Series as sub-class of NDFrame
        tsdf = DataFrame(np.random.randn(1000, 3),
                         columns=['A', 'B', 'C'],
                         index=date_range('1/1/2000', periods=1000))

        def f(x):
            return x[x.idxmax()]

        result = tsdf.apply(f)
        expected = tsdf.max()
        tm.assert_series_equal(result, expected)

        # .item()
        s = Series([1])
        result = s.item()
        assert result == 1
        assert s.item() == s.iloc[0]

        # using an ndarray like function
        s = Series(np.random.randn(10))
        result = Series(np.ones_like(s))
        expected = Series(1, index=range(10), dtype='float64')
        tm.assert_series_equal(result, expected)

        # ravel
        s = Series(np.random.randn(10))
        tm.assert_almost_equal(s.ravel(order='F'), s.values.ravel(order='F'))

        # compress
        # GH 6658
        s = Series([0, 1., -1], index=list('abc'))
        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
            result = np.compress(s > 0, s)
        tm.assert_series_equal(result, Series([1.], index=['b']))

        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
            result = np.compress(s < -1, s)
        # result empty Index(dtype=object) as the same as original
        exp = Series([], dtype='float64', index=Index([], dtype='object'))
        tm.assert_series_equal(result, exp)

        s = Series([0, 1., -1], index=[.1, .2, .3])
        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
            result = np.compress(s > 0, s)
        tm.assert_series_equal(result, Series([1.], index=[.2]))

        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
            result = np.compress(s < -1, s)
        # result empty Float64Index as the same as original
        exp = Series([], dtype='float64', index=Index([], dtype='float64'))
        tm.assert_series_equal(result, exp)

    def test_str_attribute(self):
        # GH9068
        methods = ['strip', 'rstrip', 'lstrip']
        s = Series([' jack', 'jill ', ' jesse ', 'frank'])
        for method in methods:
            expected = Series([getattr(str, method)(x) for x in s.values])
            assert_series_equal(getattr(Series.str, method)(s.str), expected)

        # str accessor only valid with string values
        s = Series(range(5))
        with tm.assert_raises_regex(AttributeError, 'only use .str accessor'):
            s.str.repeat(2)

    def test_empty_method(self):
        s_empty = pd.Series()
        assert s_empty.empty

        for full_series in [pd.Series([1]), pd.Series(index=[1])]:
            assert not full_series.empty

    def test_tab_complete_warning(self, ip):
        # https://github.com/pandas-dev/pandas/issues/16409
        pytest.importorskip('IPython', minversion="6.0.0")
        from IPython.core.completer import provisionalcompleter

        code = "import pandas as pd; s = pd.Series()"
        ip.run_code(code)
        with tm.assert_produces_warning(None):
            with provisionalcompleter('ignore'):
                list(ip.Completer.completions('s.', 1))
Пример #26
0
class TestSeriesMisc:
    def test_scalarop_preserve_name(self, datetime_series):
        result = datetime_series * 2
        assert result.name == datetime_series.name

    def test_copy_name(self, datetime_series):
        result = datetime_series.copy()
        assert result.name == datetime_series.name

    def test_copy_index_name_checking(self, datetime_series):
        # don't want to be able to modify the index stored elsewhere after
        # making a copy

        datetime_series.index.name = None
        assert datetime_series.index.name is None
        assert datetime_series is datetime_series

        cp = datetime_series.copy()
        cp.index.name = "foo"
        printing.pprint_thing(datetime_series.index.name)
        assert datetime_series.index.name is None

    def test_append_preserve_name(self, datetime_series):
        result = datetime_series[:5].append(datetime_series[5:])
        assert result.name == datetime_series.name

    def test_binop_maybe_preserve_name(self, datetime_series):
        # names match, preserve
        result = datetime_series * datetime_series
        assert result.name == datetime_series.name
        result = datetime_series.mul(datetime_series)
        assert result.name == datetime_series.name

        result = datetime_series * datetime_series[:-2]
        assert result.name == datetime_series.name

        # names don't match, don't preserve
        cp = datetime_series.copy()
        cp.name = "something else"
        result = datetime_series + cp
        assert result.name is None
        result = datetime_series.add(cp)
        assert result.name is None

        ops = ["add", "sub", "mul", "div", "truediv", "floordiv", "mod", "pow"]
        ops = ops + ["r" + op for op in ops]
        for op in ops:
            # names match, preserve
            s = datetime_series.copy()
            result = getattr(s, op)(s)
            assert result.name == datetime_series.name

            # names don't match, don't preserve
            cp = datetime_series.copy()
            cp.name = "changed"
            result = getattr(s, op)(cp)
            assert result.name is None

    def test_combine_first_name(self, datetime_series):
        result = datetime_series.combine_first(datetime_series[:5])
        assert result.name == datetime_series.name

    def test_getitem_preserve_name(self, datetime_series):
        result = datetime_series[datetime_series > 0]
        assert result.name == datetime_series.name

        result = datetime_series[[0, 2, 4]]
        assert result.name == datetime_series.name

        result = datetime_series[5:10]
        assert result.name == datetime_series.name

    def test_pickle_datetimes(self, datetime_series):
        unp_ts = self._pickle_roundtrip(datetime_series)
        tm.assert_series_equal(unp_ts, datetime_series)

    def test_pickle_strings(self, string_series):
        unp_series = self._pickle_roundtrip(string_series)
        tm.assert_series_equal(unp_series, string_series)

    def _pickle_roundtrip(self, obj):

        with tm.ensure_clean() as path:
            obj.to_pickle(path)
            unpickled = pd.read_pickle(path)
            return unpickled

    def test_argsort_preserve_name(self, datetime_series):
        result = datetime_series.argsort()
        assert result.name == datetime_series.name

    def test_sort_index_name(self, datetime_series):
        result = datetime_series.sort_index(ascending=False)
        assert result.name == datetime_series.name

    def test_constructor_dict(self):
        d = {"a": 0.0, "b": 1.0, "c": 2.0}
        result = Series(d)
        expected = Series(d, index=sorted(d.keys()))
        tm.assert_series_equal(result, expected)

        result = Series(d, index=["b", "c", "d", "a"])
        expected = Series([1, 2, np.nan, 0], index=["b", "c", "d", "a"])
        tm.assert_series_equal(result, expected)

    def test_constructor_subclass_dict(self):
        data = tm.TestSubDict((x, 10.0 * x) for x in range(10))
        series = Series(data)
        expected = Series(dict(data.items()))
        tm.assert_series_equal(series, expected)

    def test_constructor_ordereddict(self):
        # GH3283
        data = OrderedDict(
            ("col{i}".format(i=i), np.random.random()) for i in range(12)
        )

        series = Series(data)
        expected = Series(list(data.values()), list(data.keys()))
        tm.assert_series_equal(series, expected)

        # Test with subclass
        class A(OrderedDict):
            pass

        series = Series(A(data))
        tm.assert_series_equal(series, expected)

    def test_constructor_dict_multiindex(self):
        d = {("a", "a"): 0.0, ("b", "a"): 1.0, ("b", "c"): 2.0}
        _d = sorted(d.items())
        result = Series(d)
        expected = Series(
            [x[1] for x in _d], index=pd.MultiIndex.from_tuples([x[0] for x in _d])
        )
        tm.assert_series_equal(result, expected)

        d["z"] = 111.0
        _d.insert(0, ("z", d["z"]))
        result = Series(d)
        expected = Series(
            [x[1] for x in _d], index=pd.Index([x[0] for x in _d], tupleize_cols=False)
        )
        result = result.reindex(index=expected.index)
        tm.assert_series_equal(result, expected)

    def test_constructor_dict_timedelta_index(self):
        # GH #12169 : Resample category data with timedelta index
        # construct Series from dict as data and TimedeltaIndex as index
        # will result NaN in result Series data
        expected = Series(
            data=["A", "B", "C"], index=pd.to_timedelta([0, 10, 20], unit="s")
        )

        result = Series(
            data={
                pd.to_timedelta(0, unit="s"): "A",
                pd.to_timedelta(10, unit="s"): "B",
                pd.to_timedelta(20, unit="s"): "C",
            },
            index=pd.to_timedelta([0, 10, 20], unit="s"),
        )
        tm.assert_series_equal(result, expected)

    def test_sparse_accessor_updates_on_inplace(self):
        s = pd.Series([1, 1, 2, 3], dtype="Sparse[int]")
        s.drop([0, 1], inplace=True)
        assert s.sparse.density == 1.0

    def test_tab_completion(self):
        # GH 9910
        s = Series(list("abcd"))
        # Series of str values should have .str but not .dt/.cat in __dir__
        assert "str" in dir(s)
        assert "dt" not in dir(s)
        assert "cat" not in dir(s)

        # similarly for .dt
        s = Series(date_range("1/1/2015", periods=5))
        assert "dt" in dir(s)
        assert "str" not in dir(s)
        assert "cat" not in dir(s)

        # Similarly for .cat, but with the twist that str and dt should be
        # there if the categories are of that type first cat and str.
        s = Series(list("abbcd"), dtype="category")
        assert "cat" in dir(s)
        assert "str" in dir(s)  # as it is a string categorical
        assert "dt" not in dir(s)

        # similar to cat and str
        s = Series(date_range("1/1/2015", periods=5)).astype("category")
        assert "cat" in dir(s)
        assert "str" not in dir(s)
        assert "dt" in dir(s)  # as it is a datetime categorical

    def test_tab_completion_with_categorical(self):
        # test the tab completion display
        ok_for_cat = [
            "categories",
            "codes",
            "ordered",
            "set_categories",
            "add_categories",
            "remove_categories",
            "rename_categories",
            "reorder_categories",
            "remove_unused_categories",
            "as_ordered",
            "as_unordered",
        ]

        def get_dir(s):
            results = [r for r in s.cat.__dir__() if not r.startswith("_")]
            return sorted(set(results))

        s = Series(list("aabbcde")).astype("category")
        results = get_dir(s)
        tm.assert_almost_equal(results, sorted(set(ok_for_cat)))

    @pytest.mark.parametrize(
        "index",
        [
            tm.makeUnicodeIndex(10),
            tm.makeStringIndex(10),
            tm.makeCategoricalIndex(10),
            Index(["foo", "bar", "baz"] * 2),
            tm.makeDateIndex(10),
            tm.makePeriodIndex(10),
            tm.makeTimedeltaIndex(10),
            tm.makeIntIndex(10),
            tm.makeUIntIndex(10),
            tm.makeIntIndex(10),
            tm.makeFloatIndex(10),
            Index([True, False]),
            Index(["a{}".format(i) for i in range(101)]),
            pd.MultiIndex.from_tuples(zip("ABCD", "EFGH")),
            pd.MultiIndex.from_tuples(zip([0, 1, 2, 3], "EFGH")),
        ],
    )
    def test_index_tab_completion(self, index):
        # dir contains string-like values of the Index.
        s = pd.Series(index=index)
        dir_s = dir(s)
        for i, x in enumerate(s.index.unique(level=0)):
            if i < 100:
                assert not isinstance(x, str) or not x.isidentifier() or x in dir_s
            else:
                assert x not in dir_s

    def test_not_hashable(self):
        s_empty = Series()
        s = Series([1])
        msg = "'Series' objects are mutable, thus they cannot be hashed"
        with pytest.raises(TypeError, match=msg):
            hash(s_empty)
        with pytest.raises(TypeError, match=msg):
            hash(s)

    def test_contains(self, datetime_series):
        tm.assert_contains_all(datetime_series.index, datetime_series)

    def test_iter_datetimes(self, datetime_series):
        for i, val in enumerate(datetime_series):
            assert val == datetime_series[i]

    def test_iter_strings(self, string_series):
        for i, val in enumerate(string_series):
            assert val == string_series[i]

    def test_keys(self, datetime_series):
        # HACK: By doing this in two stages, we avoid 2to3 wrapping the call
        # to .keys() in a list()
        getkeys = datetime_series.keys
        assert getkeys() is datetime_series.index

    def test_values(self, datetime_series):
        tm.assert_almost_equal(
            datetime_series.values, datetime_series, check_dtype=False
        )

    def test_iteritems_datetimes(self, datetime_series):
        for idx, val in datetime_series.iteritems():
            assert val == datetime_series[idx]

    def test_iteritems_strings(self, string_series):
        for idx, val in string_series.iteritems():
            assert val == string_series[idx]

        # assert is lazy (genrators don't define reverse, lists do)
        assert not hasattr(string_series.iteritems(), "reverse")

    def test_items_datetimes(self, datetime_series):
        for idx, val in datetime_series.items():
            assert val == datetime_series[idx]

    def test_items_strings(self, string_series):
        for idx, val in string_series.items():
            assert val == string_series[idx]

        # assert is lazy (genrators don't define reverse, lists do)
        assert not hasattr(string_series.items(), "reverse")

    def test_raise_on_info(self):
        s = Series(np.random.randn(10))
        msg = "'Series' object has no attribute 'info'"
        with pytest.raises(AttributeError, match=msg):
            s.info()

    def test_copy(self):

        for deep in [None, False, True]:
            s = Series(np.arange(10), dtype="float64")

            # default deep is True
            if deep is None:
                s2 = s.copy()
            else:
                s2 = s.copy(deep=deep)

            s2[::2] = np.NaN

            if deep is None or deep is True:
                # Did not modify original Series
                assert np.isnan(s2[0])
                assert not np.isnan(s[0])
            else:
                # we DID modify the original Series
                assert np.isnan(s2[0])
                assert np.isnan(s[0])

    def test_copy_tzaware(self):
        # GH#11794
        # copy of tz-aware
        expected = Series([Timestamp("2012/01/01", tz="UTC")])
        expected2 = Series([Timestamp("1999/01/01", tz="UTC")])

        for deep in [None, False, True]:

            s = Series([Timestamp("2012/01/01", tz="UTC")])

            if deep is None:
                s2 = s.copy()
            else:
                s2 = s.copy(deep=deep)

            s2[0] = pd.Timestamp("1999/01/01", tz="UTC")

            # default deep is True
            if deep is None or deep is True:
                # Did not modify original Series
                tm.assert_series_equal(s2, expected2)
                tm.assert_series_equal(s, expected)
            else:
                # we DID modify the original Series
                tm.assert_series_equal(s2, expected2)
                tm.assert_series_equal(s, expected2)

    def test_axis_alias(self):
        s = Series([1, 2, np.nan])
        tm.assert_series_equal(s.dropna(axis="rows"), s.dropna(axis="index"))
        assert s.dropna().sum("rows") == 3
        assert s._get_axis_number("rows") == 0
        assert s._get_axis_name("rows") == "index"

    def test_class_axis(self):
        # https://github.com/pandas-dev/pandas/issues/18147
        # no exception and no empty docstring
        assert pydoc.getdoc(Series.index)

    def test_numpy_unique(self, datetime_series):
        # it works!
        np.unique(datetime_series)

    def test_ndarray_compat(self):

        # test numpy compat with Series as sub-class of NDFrame
        tsdf = DataFrame(
            np.random.randn(1000, 3),
            columns=["A", "B", "C"],
            index=date_range("1/1/2000", periods=1000),
        )

        def f(x):
            return x[x.idxmax()]

        result = tsdf.apply(f)
        expected = tsdf.max()
        tm.assert_series_equal(result, expected)

        # .item()
        with tm.assert_produces_warning(FutureWarning):
            s = Series([1])
            result = s.item()
            assert result == 1
            assert s.item() == s.iloc[0]

        # using an ndarray like function
        s = Series(np.random.randn(10))
        result = Series(np.ones_like(s))
        expected = Series(1, index=range(10), dtype="float64")
        tm.assert_series_equal(result, expected)

        # ravel
        s = Series(np.random.randn(10))
        tm.assert_almost_equal(s.ravel(order="F"), s.values.ravel(order="F"))

        # compress
        # GH 6658
        s = Series([0, 1.0, -1], index=list("abc"))
        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
            result = np.compress(s > 0, s)
        tm.assert_series_equal(result, Series([1.0], index=["b"]))

        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
            result = np.compress(s < -1, s)
        # result empty Index(dtype=object) as the same as original
        exp = Series([], dtype="float64", index=Index([], dtype="object"))
        tm.assert_series_equal(result, exp)

        s = Series([0, 1.0, -1], index=[0.1, 0.2, 0.3])
        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
            result = np.compress(s > 0, s)
        tm.assert_series_equal(result, Series([1.0], index=[0.2]))

        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
            result = np.compress(s < -1, s)
        # result empty Float64Index as the same as original
        exp = Series([], dtype="float64", index=Index([], dtype="float64"))
        tm.assert_series_equal(result, exp)

    def test_str_accessor_updates_on_inplace(self):
        s = pd.Series(list("abc"))
        s.drop([0], inplace=True)
        assert len(s.str.lower()) == 2

    def test_str_attribute(self):
        # GH9068
        methods = ["strip", "rstrip", "lstrip"]
        s = Series([" jack", "jill ", " jesse ", "frank"])
        for method in methods:
            expected = Series([getattr(str, method)(x) for x in s.values])
            tm.assert_series_equal(getattr(Series.str, method)(s.str), expected)

        # str accessor only valid with string values
        s = Series(range(5))
        with pytest.raises(AttributeError, match="only use .str accessor"):
            s.str.repeat(2)

    def test_empty_method(self):
        s_empty = pd.Series()
        assert s_empty.empty

        for full_series in [pd.Series([1]), pd.Series(index=[1])]:
            assert not full_series.empty

    def test_tab_complete_warning(self, ip):
        # https://github.com/pandas-dev/pandas/issues/16409
        pytest.importorskip("IPython", minversion="6.0.0")
        from IPython.core.completer import provisionalcompleter

        code = "import pandas as pd; s = pd.Series()"
        ip.run_code(code)
        with tm.assert_produces_warning(None):
            with provisionalcompleter("ignore"):
                list(ip.Completer.completions("s.", 1))

    def test_integer_series_size(self):
        # GH 25580
        s = Series(range(9))
        assert s.size == 9
        s = Series(range(9), dtype="Int64")
        assert s.size == 9

    def test_get_values_deprecation(self):
        s = Series(range(9))
        with tm.assert_produces_warning(FutureWarning):
            res = s.get_values()
        tm.assert_numpy_array_equal(res, s.values)
Пример #27
0
import numpy as np
import pytest

import pandas as pd
from pandas.core.indexes.api import Index, MultiIndex
import pandas.util.testing as tm


@pytest.fixture(params=[tm.makeUnicodeIndex(100),
                        tm.makeStringIndex(100),
                        tm.makeDateIndex(100),
                        tm.makePeriodIndex(100),
                        tm.makeTimedeltaIndex(100),
                        tm.makeIntIndex(100),
                        tm.makeUIntIndex(100),
                        tm.makeRangeIndex(100),
                        tm.makeFloatIndex(100),
                        Index([True, False]),
                        tm.makeCategoricalIndex(100),
                        Index([]),
                        MultiIndex.from_tuples(zip(
                            ['foo', 'bar', 'baz'], [1, 2, 3])),
                        Index([0, 0, 1, 1, 2, 2])],
                ids=lambda x: type(x).__name__)
def indices(request):
    return request.param


@pytest.fixture(params=[1, np.array(1, dtype=np.int64)])
def one(request):
    # zero-dim integer array behaves like an integer
Пример #28
0
class TestSeriesMisc(TestData, SharedWithSparse):

    series_klass = Series
    # SharedWithSparse tests use generic, series_klass-agnostic assertion
    _assert_series_equal = staticmethod(tm.assert_series_equal)

    def test_tab_completion(self):
        # GH 9910
        s = Series(list("abcd"))
        # Series of str values should have .str but not .dt/.cat in __dir__
        assert "str" in dir(s)
        assert "dt" not in dir(s)
        assert "cat" not in dir(s)

        # similarly for .dt
        s = Series(date_range("1/1/2015", periods=5))
        assert "dt" in dir(s)
        assert "str" not in dir(s)
        assert "cat" not in dir(s)

        # Similarly for .cat, but with the twist that str and dt should be
        # there if the categories are of that type first cat and str.
        s = Series(list("abbcd"), dtype="category")
        assert "cat" in dir(s)
        assert "str" in dir(s)  # as it is a string categorical
        assert "dt" not in dir(s)

        # similar to cat and str
        s = Series(date_range("1/1/2015", periods=5)).astype("category")
        assert "cat" in dir(s)
        assert "str" not in dir(s)
        assert "dt" in dir(s)  # as it is a datetime categorical

    def test_tab_completion_with_categorical(self):
        # test the tab completion display
        ok_for_cat = [
            "name",
            "index",
            "categorical",
            "categories",
            "codes",
            "ordered",
            "set_categories",
            "add_categories",
            "remove_categories",
            "rename_categories",
            "reorder_categories",
            "remove_unused_categories",
            "as_ordered",
            "as_unordered",
        ]

        def get_dir(s):
            results = [r for r in s.cat.__dir__() if not r.startswith("_")]
            return list(sorted(set(results)))

        s = Series(list("aabbcde")).astype("category")
        results = get_dir(s)
        tm.assert_almost_equal(results, list(sorted(set(ok_for_cat))))

    @pytest.mark.parametrize(
        "index",
        [
            tm.makeUnicodeIndex(10),
            tm.makeStringIndex(10),
            tm.makeCategoricalIndex(10),
            Index(["foo", "bar", "baz"] * 2),
            tm.makeDateIndex(10),
            tm.makePeriodIndex(10),
            tm.makeTimedeltaIndex(10),
            tm.makeIntIndex(10),
            tm.makeUIntIndex(10),
            tm.makeIntIndex(10),
            tm.makeFloatIndex(10),
            Index([True, False]),
            Index(["a{}".format(i) for i in range(101)]),
            pd.MultiIndex.from_tuples(zip("ABCD", "EFGH")),
            pd.MultiIndex.from_tuples(zip([0, 1, 2, 3], "EFGH")),
        ],
    )
    def test_index_tab_completion(self, index):
        # dir contains string-like values of the Index.
        s = pd.Series(index=index)
        dir_s = dir(s)
        for i, x in enumerate(s.index.unique(level=0)):
            if i < 100:
                assert not isinstance(
                    x, str) or not x.isidentifier() or x in dir_s
            else:
                assert x not in dir_s

    def test_not_hashable(self):
        s_empty = Series()
        s = Series([1])
        msg = "'Series' objects are mutable, thus they cannot be hashed"
        with pytest.raises(TypeError, match=msg):
            hash(s_empty)
        with pytest.raises(TypeError, match=msg):
            hash(s)

    def test_contains(self):
        tm.assert_contains_all(self.ts.index, self.ts)

    def test_iter(self):
        for i, val in enumerate(self.series):
            assert val == self.series[i]

        for i, val in enumerate(self.ts):
            assert val == self.ts[i]

    def test_keys(self):
        # HACK: By doing this in two stages, we avoid 2to3 wrapping the call
        # to .keys() in a list()
        getkeys = self.ts.keys
        assert getkeys() is self.ts.index

    def test_values(self):
        tm.assert_almost_equal(self.ts.values, self.ts, check_dtype=False)

    def test_iteritems(self):
        for idx, val in self.series.iteritems():
            assert val == self.series[idx]

        for idx, val in self.ts.iteritems():
            assert val == self.ts[idx]

        # assert is lazy (genrators don't define reverse, lists do)
        assert not hasattr(self.series.iteritems(), "reverse")

    def test_items(self):
        for idx, val in self.series.items():
            assert val == self.series[idx]

        for idx, val in self.ts.items():
            assert val == self.ts[idx]

        # assert is lazy (genrators don't define reverse, lists do)
        assert not hasattr(self.series.items(), "reverse")

    def test_raise_on_info(self):
        s = Series(np.random.randn(10))
        msg = "'Series' object has no attribute 'info'"
        with pytest.raises(AttributeError, match=msg):
            s.info()

    def test_copy(self):

        for deep in [None, False, True]:
            s = Series(np.arange(10), dtype="float64")

            # default deep is True
            if deep is None:
                s2 = s.copy()
            else:
                s2 = s.copy(deep=deep)

            s2[::2] = np.NaN

            if deep is None or deep is True:
                # Did not modify original Series
                assert np.isnan(s2[0])
                assert not np.isnan(s[0])
            else:
                # we DID modify the original Series
                assert np.isnan(s2[0])
                assert np.isnan(s[0])

    def test_copy_tzaware(self):
        # GH#11794
        # copy of tz-aware
        expected = Series([Timestamp("2012/01/01", tz="UTC")])
        expected2 = Series([Timestamp("1999/01/01", tz="UTC")])

        for deep in [None, False, True]:

            s = Series([Timestamp("2012/01/01", tz="UTC")])

            if deep is None:
                s2 = s.copy()
            else:
                s2 = s.copy(deep=deep)

            s2[0] = pd.Timestamp("1999/01/01", tz="UTC")

            # default deep is True
            if deep is None or deep is True:
                # Did not modify original Series
                assert_series_equal(s2, expected2)
                assert_series_equal(s, expected)
            else:
                # we DID modify the original Series
                assert_series_equal(s2, expected2)
                assert_series_equal(s, expected2)

    def test_axis_alias(self):
        s = Series([1, 2, np.nan])
        assert_series_equal(s.dropna(axis="rows"), s.dropna(axis="index"))
        assert s.dropna().sum("rows") == 3
        assert s._get_axis_number("rows") == 0
        assert s._get_axis_name("rows") == "index"

    def test_class_axis(self):
        # https://github.com/pandas-dev/pandas/issues/18147
        # no exception and no empty docstring
        assert pydoc.getdoc(Series.index)

    def test_numpy_unique(self):
        # it works!
        np.unique(self.ts)

    def test_ndarray_compat(self):

        # test numpy compat with Series as sub-class of NDFrame
        tsdf = DataFrame(
            np.random.randn(1000, 3),
            columns=["A", "B", "C"],
            index=date_range("1/1/2000", periods=1000),
        )

        def f(x):
            return x[x.idxmax()]

        result = tsdf.apply(f)
        expected = tsdf.max()
        tm.assert_series_equal(result, expected)

        # .item()
        with tm.assert_produces_warning(FutureWarning):
            s = Series([1])
            result = s.item()
            assert result == 1
            assert s.item() == s.iloc[0]

        # using an ndarray like function
        s = Series(np.random.randn(10))
        result = Series(np.ones_like(s))
        expected = Series(1, index=range(10), dtype="float64")
        tm.assert_series_equal(result, expected)

        # ravel
        s = Series(np.random.randn(10))
        tm.assert_almost_equal(s.ravel(order="F"), s.values.ravel(order="F"))

        # compress
        # GH 6658
        s = Series([0, 1.0, -1], index=list("abc"))
        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
            result = np.compress(s > 0, s)
        tm.assert_series_equal(result, Series([1.0], index=["b"]))

        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
            result = np.compress(s < -1, s)
        # result empty Index(dtype=object) as the same as original
        exp = Series([], dtype="float64", index=Index([], dtype="object"))
        tm.assert_series_equal(result, exp)

        s = Series([0, 1.0, -1], index=[0.1, 0.2, 0.3])
        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
            result = np.compress(s > 0, s)
        tm.assert_series_equal(result, Series([1.0], index=[0.2]))

        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
            result = np.compress(s < -1, s)
        # result empty Float64Index as the same as original
        exp = Series([], dtype="float64", index=Index([], dtype="float64"))
        tm.assert_series_equal(result, exp)

    def test_str_accessor_updates_on_inplace(self):
        s = pd.Series(list("abc"))
        s.drop([0], inplace=True)
        assert len(s.str.lower()) == 2

    def test_str_attribute(self):
        # GH9068
        methods = ["strip", "rstrip", "lstrip"]
        s = Series([" jack", "jill ", " jesse ", "frank"])
        for method in methods:
            expected = Series([getattr(str, method)(x) for x in s.values])
            assert_series_equal(getattr(Series.str, method)(s.str), expected)

        # str accessor only valid with string values
        s = Series(range(5))
        with pytest.raises(AttributeError, match="only use .str accessor"):
            s.str.repeat(2)

    def test_empty_method(self):
        s_empty = pd.Series()
        assert s_empty.empty

        for full_series in [pd.Series([1]), pd.Series(index=[1])]:
            assert not full_series.empty

    def test_tab_complete_warning(self, ip):
        # https://github.com/pandas-dev/pandas/issues/16409
        pytest.importorskip("IPython", minversion="6.0.0")
        from IPython.core.completer import provisionalcompleter

        code = "import pandas as pd; s = pd.Series()"
        ip.run_code(code)
        with tm.assert_produces_warning(None):
            with provisionalcompleter("ignore"):
                list(ip.Completer.completions("s.", 1))

    def test_integer_series_size(self):
        # GH 25580
        s = Series(range(9))
        assert s.size == 9
        s = Series(range(9), dtype="Int64")
        assert s.size == 9

    def test_get_values_deprecation(self):
        s = Series(range(9))
        with tm.assert_produces_warning(FutureWarning):
            res = s.get_values()
        tm.assert_numpy_array_equal(res, s.values)
Пример #29
0
 def setup_method(self, method):
     self.indices = dict(index=tm.makePeriodIndex(10),
                         index_dec=period_range('20130101', periods=10,
                                                freq='D')[::-1])
     self.setup_indices()
Пример #30
0
    assert rng.inferred_freq == "-1A-JAN"


def test_non_datetime_index2():
    rng = DatetimeIndex(["1/31/2000", "1/31/2001", "1/31/2002"])
    vals = rng.to_pydatetime()

    result = frequencies.infer_freq(vals)
    assert result == rng.inferred_freq


@pytest.mark.parametrize(
    "idx",
    [tm.makeIntIndex(10),
     tm.makeFloatIndex(10),
     tm.makePeriodIndex(10)])
def test_invalid_index_types(idx):
    msg = ("(cannot infer freq from a non-convertible)|"
           "(Check the `freq` attribute instead of using infer_freq)")

    with pytest.raises(TypeError, match=msg):
        frequencies.infer_freq(idx)


@pytest.mark.skipif(is_platform_windows(),
                    reason="see gh-10822: Windows issue")
@pytest.mark.parametrize(
    "idx",
    [tm.makeStringIndex(10), tm.makeUnicodeIndex(10)])
def test_invalid_index_types_unicode(idx):
    # see gh-10822
Пример #31
0
class TestPeriodIndex(DatetimeLike):
    _holder = PeriodIndex

    @pytest.fixture(
        params=[
            tm.makePeriodIndex(10),
            period_range("20130101", periods=10, freq="D")[::-1],
        ],
        ids=["index_inc", "index_dec"],
    )
    def indices(self, request):
        return request.param

    def create_index(self):
        return period_range("20130101", periods=5, freq="D")

    def test_pickle_compat_construction(self):
        pass

    @pytest.mark.parametrize("freq", ["D", "M", "A"])
    def test_pickle_round_trip(self, freq):
        idx = PeriodIndex(["2016-05-16", "NaT", NaT, np.NaN], freq=freq)
        result = tm.round_trip_pickle(idx)
        tm.assert_index_equal(result, idx)

    def test_where(self):
        # This is handled in test_indexing
        pass

    @pytest.mark.parametrize("use_numpy", [True, False])
    @pytest.mark.parametrize(
        "index",
        [
            pd.period_range("2000-01-01", periods=3, freq="D"),
            pd.period_range("2001-01-01", periods=3, freq="2D"),
            pd.PeriodIndex(["2001-01", "NaT", "2003-01"], freq="M"),
        ],
    )
    def test_repeat_freqstr(self, index, use_numpy):
        # GH10183
        expected = PeriodIndex([p for p in index for _ in range(3)])
        result = np.repeat(index, 3) if use_numpy else index.repeat(3)
        tm.assert_index_equal(result, expected)
        assert result.freqstr == index.freqstr

    def test_fillna_period(self):
        # GH 11343
        idx = pd.PeriodIndex(["2011-01-01 09:00", pd.NaT, "2011-01-01 11:00"],
                             freq="H")

        exp = pd.PeriodIndex(
            ["2011-01-01 09:00", "2011-01-01 10:00", "2011-01-01 11:00"],
            freq="H")
        tm.assert_index_equal(
            idx.fillna(pd.Period("2011-01-01 10:00", freq="H")), exp)

        exp = pd.Index(
            [
                pd.Period("2011-01-01 09:00", freq="H"),
                "x",
                pd.Period("2011-01-01 11:00", freq="H"),
            ],
            dtype=object,
        )
        tm.assert_index_equal(idx.fillna("x"), exp)

        exp = pd.Index(
            [
                pd.Period("2011-01-01 09:00", freq="H"),
                pd.Period("2011-01-01", freq="D"),
                pd.Period("2011-01-01 11:00", freq="H"),
            ],
            dtype=object,
        )
        tm.assert_index_equal(idx.fillna(pd.Period("2011-01-01", freq="D")),
                              exp)

    def test_no_millisecond_field(self):
        msg = "type object 'DatetimeIndex' has no attribute 'millisecond'"
        with pytest.raises(AttributeError, match=msg):
            DatetimeIndex.millisecond

        msg = "'DatetimeIndex' object has no attribute 'millisecond'"
        with pytest.raises(AttributeError, match=msg):
            DatetimeIndex([]).millisecond

    @pytest.mark.parametrize("sort", [None, False])
    def test_difference_freq(self, sort):
        # GH14323: difference of Period MUST preserve frequency
        # but the ability to union results must be preserved

        index = period_range("20160920", "20160925", freq="D")

        other = period_range("20160921", "20160924", freq="D")
        expected = PeriodIndex(["20160920", "20160925"], freq="D")
        idx_diff = index.difference(other, sort)
        tm.assert_index_equal(idx_diff, expected)
        tm.assert_attr_equal("freq", idx_diff, expected)

        other = period_range("20160922", "20160925", freq="D")
        idx_diff = index.difference(other, sort)
        expected = PeriodIndex(["20160920", "20160921"], freq="D")
        tm.assert_index_equal(idx_diff, expected)
        tm.assert_attr_equal("freq", idx_diff, expected)

    def test_hash_error(self):
        index = period_range("20010101", periods=10)
        msg = "unhashable type: '{}'".format(type(index).__name__)
        with pytest.raises(TypeError, match=msg):
            hash(index)

    def test_make_time_series(self):
        index = period_range(freq="A", start="1/1/2001", end="12/1/2009")
        series = Series(1, index=index)
        assert isinstance(series, Series)

    def test_shallow_copy_empty(self):

        # GH13067
        idx = PeriodIndex([], freq="M")
        result = idx._shallow_copy()
        expected = idx

        tm.assert_index_equal(result, expected)

    def test_shallow_copy_i8(self):
        # GH-24391
        pi = period_range("2018-01-01", periods=3, freq="2D")
        result = pi._shallow_copy(pi.asi8, freq=pi.freq)
        tm.assert_index_equal(result, pi)

    def test_shallow_copy_changing_freq_raises(self):
        pi = period_range("2018-01-01", periods=3, freq="2D")
        msg = "specified freq and dtype are different"
        with pytest.raises(IncompatibleFrequency, match=msg):
            pi._shallow_copy(pi, freq="H")

    def test_view_asi8(self):
        idx = pd.PeriodIndex([], freq="M")

        exp = np.array([], dtype=np.int64)
        tm.assert_numpy_array_equal(idx.view("i8"), exp)
        tm.assert_numpy_array_equal(idx.asi8, exp)

        idx = pd.PeriodIndex(["2011-01", pd.NaT], freq="M")

        exp = np.array([492, -9223372036854775808], dtype=np.int64)
        tm.assert_numpy_array_equal(idx.view("i8"), exp)
        tm.assert_numpy_array_equal(idx.asi8, exp)

        exp = np.array([14975, -9223372036854775808], dtype=np.int64)
        idx = pd.PeriodIndex(["2011-01-01", pd.NaT], freq="D")
        tm.assert_numpy_array_equal(idx.view("i8"), exp)
        tm.assert_numpy_array_equal(idx.asi8, exp)

    def test_values(self):
        idx = pd.PeriodIndex([], freq="M")

        exp = np.array([], dtype=np.object)
        tm.assert_numpy_array_equal(idx.values, exp)
        tm.assert_numpy_array_equal(idx.to_numpy(), exp)

        exp = np.array([], dtype=np.int64)
        tm.assert_numpy_array_equal(idx._ndarray_values, exp)

        idx = pd.PeriodIndex(["2011-01", pd.NaT], freq="M")

        exp = np.array([pd.Period("2011-01", freq="M"), pd.NaT], dtype=object)
        tm.assert_numpy_array_equal(idx.values, exp)
        tm.assert_numpy_array_equal(idx.to_numpy(), exp)
        exp = np.array([492, -9223372036854775808], dtype=np.int64)
        tm.assert_numpy_array_equal(idx._ndarray_values, exp)

        idx = pd.PeriodIndex(["2011-01-01", pd.NaT], freq="D")

        exp = np.array([pd.Period("2011-01-01", freq="D"), pd.NaT],
                       dtype=object)
        tm.assert_numpy_array_equal(idx.values, exp)
        tm.assert_numpy_array_equal(idx.to_numpy(), exp)
        exp = np.array([14975, -9223372036854775808], dtype=np.int64)
        tm.assert_numpy_array_equal(idx._ndarray_values, exp)

    def test_period_index_length(self):
        pi = period_range(freq="A", start="1/1/2001", end="12/1/2009")
        assert len(pi) == 9

        pi = period_range(freq="Q", start="1/1/2001", end="12/1/2009")
        assert len(pi) == 4 * 9

        pi = period_range(freq="M", start="1/1/2001", end="12/1/2009")
        assert len(pi) == 12 * 9

        start = Period("02-Apr-2005", "B")
        i1 = period_range(start=start, periods=20)
        assert len(i1) == 20
        assert i1.freq == start.freq
        assert i1[0] == start

        end_intv = Period("2006-12-31", "W")
        i1 = period_range(end=end_intv, periods=10)
        assert len(i1) == 10
        assert i1.freq == end_intv.freq
        assert i1[-1] == end_intv

        end_intv = Period("2006-12-31", "1w")
        i2 = period_range(end=end_intv, periods=10)
        assert len(i1) == len(i2)
        assert (i1 == i2).all()
        assert i1.freq == i2.freq

        end_intv = Period("2006-12-31", ("w", 1))
        i2 = period_range(end=end_intv, periods=10)
        assert len(i1) == len(i2)
        assert (i1 == i2).all()
        assert i1.freq == i2.freq

        msg = "start and end must have same freq"
        with pytest.raises(ValueError, match=msg):
            period_range(start=start, end=end_intv)

        end_intv = Period("2005-05-01", "B")
        i1 = period_range(start=start, end=end_intv)

        msg = ("Of the three parameters: start, end, and periods, exactly two"
               " must be specified")
        with pytest.raises(ValueError, match=msg):
            period_range(start=start)

        # infer freq from first element
        i2 = PeriodIndex([end_intv, Period("2005-05-05", "B")])
        assert len(i2) == 2
        assert i2[0] == end_intv

        i2 = PeriodIndex(np.array([end_intv, Period("2005-05-05", "B")]))
        assert len(i2) == 2
        assert i2[0] == end_intv

        # Mixed freq should fail
        vals = [end_intv, Period("2006-12-31", "w")]
        msg = r"Input has different freq=W-SUN from PeriodIndex\(freq=B\)"
        with pytest.raises(IncompatibleFrequency, match=msg):
            PeriodIndex(vals)
        vals = np.array(vals)
        with pytest.raises(ValueError, match=msg):
            PeriodIndex(vals)

    def test_fields(self):
        # year, month, day, hour, minute
        # second, weekofyear, week, dayofweek, weekday, dayofyear, quarter
        # qyear
        pi = period_range(freq="A", start="1/1/2001", end="12/1/2005")
        self._check_all_fields(pi)

        pi = period_range(freq="Q", start="1/1/2001", end="12/1/2002")
        self._check_all_fields(pi)

        pi = period_range(freq="M", start="1/1/2001", end="1/1/2002")
        self._check_all_fields(pi)

        pi = period_range(freq="D", start="12/1/2001", end="6/1/2001")
        self._check_all_fields(pi)

        pi = period_range(freq="B", start="12/1/2001", end="6/1/2001")
        self._check_all_fields(pi)

        pi = period_range(freq="H", start="12/31/2001", end="1/1/2002 23:00")
        self._check_all_fields(pi)

        pi = period_range(freq="Min", start="12/31/2001", end="1/1/2002 00:20")
        self._check_all_fields(pi)

        pi = period_range(freq="S",
                          start="12/31/2001 00:00:00",
                          end="12/31/2001 00:05:00")
        self._check_all_fields(pi)

        end_intv = Period("2006-12-31", "W")
        i1 = period_range(end=end_intv, periods=10)
        self._check_all_fields(i1)

    def _check_all_fields(self, periodindex):
        fields = [
            "year",
            "month",
            "day",
            "hour",
            "minute",
            "second",
            "weekofyear",
            "week",
            "dayofweek",
            "dayofyear",
            "quarter",
            "qyear",
            "days_in_month",
        ]

        periods = list(periodindex)
        s = pd.Series(periodindex)

        for field in fields:
            field_idx = getattr(periodindex, field)
            assert len(periodindex) == len(field_idx)
            for x, val in zip(periods, field_idx):
                assert getattr(x, field) == val

            if len(s) == 0:
                continue

            field_s = getattr(s.dt, field)
            assert len(periodindex) == len(field_s)
            for x, val in zip(periods, field_s):
                assert getattr(x, field) == val

    def test_period_set_index_reindex(self):
        # GH 6631
        df = DataFrame(np.random.random(6))
        idx1 = period_range("2011/01/01", periods=6, freq="M")
        idx2 = period_range("2013", periods=6, freq="A")

        df = df.set_index(idx1)
        tm.assert_index_equal(df.index, idx1)
        df = df.set_index(idx2)
        tm.assert_index_equal(df.index, idx2)

    @pytest.mark.parametrize(
        "p_values, o_values, values, expected_values",
        [
            (
                [Period("2019Q1", "Q-DEC"),
                 Period("2019Q2", "Q-DEC")],
                [Period("2019Q1", "Q-DEC"),
                 Period("2019Q2", "Q-DEC"), "All"],
                [1.0, 1.0],
                [1.0, 1.0, np.nan],
            ),
            (
                [Period("2019Q1", "Q-DEC"),
                 Period("2019Q2", "Q-DEC")],
                [Period("2019Q1", "Q-DEC"),
                 Period("2019Q2", "Q-DEC")],
                [1.0, 1.0],
                [1.0, 1.0],
            ),
        ],
    )
    def test_period_reindex_with_object(self, p_values, o_values, values,
                                        expected_values):
        # GH 28337
        period_index = PeriodIndex(p_values)
        object_index = Index(o_values)

        s = pd.Series(values, index=period_index)
        result = s.reindex(object_index)
        expected = pd.Series(expected_values, index=object_index)
        tm.assert_series_equal(result, expected)

    def test_factorize(self):
        idx1 = PeriodIndex(
            ["2014-01", "2014-01", "2014-02", "2014-02", "2014-03", "2014-03"],
            freq="M")

        exp_arr = np.array([0, 0, 1, 1, 2, 2], dtype=np.intp)
        exp_idx = PeriodIndex(["2014-01", "2014-02", "2014-03"], freq="M")

        arr, idx = idx1.factorize()
        tm.assert_numpy_array_equal(arr, exp_arr)
        tm.assert_index_equal(idx, exp_idx)

        arr, idx = idx1.factorize(sort=True)
        tm.assert_numpy_array_equal(arr, exp_arr)
        tm.assert_index_equal(idx, exp_idx)

        idx2 = pd.PeriodIndex(
            ["2014-03", "2014-03", "2014-02", "2014-01", "2014-03", "2014-01"],
            freq="M")

        exp_arr = np.array([2, 2, 1, 0, 2, 0], dtype=np.intp)
        arr, idx = idx2.factorize(sort=True)
        tm.assert_numpy_array_equal(arr, exp_arr)
        tm.assert_index_equal(idx, exp_idx)

        exp_arr = np.array([0, 0, 1, 2, 0, 2], dtype=np.intp)
        exp_idx = PeriodIndex(["2014-03", "2014-02", "2014-01"], freq="M")
        arr, idx = idx2.factorize()
        tm.assert_numpy_array_equal(arr, exp_arr)
        tm.assert_index_equal(idx, exp_idx)

    def test_is_(self):
        create_index = lambda: period_range(
            freq="A", start="1/1/2001", end="12/1/2009")
        index = create_index()
        assert index.is_(index)
        assert not index.is_(create_index())
        assert index.is_(index.view())
        assert index.is_(index.view().view().view().view().view())
        assert index.view().is_(index)
        ind2 = index.view()
        index.name = "Apple"
        assert ind2.is_(index)
        assert not index.is_(index[:])
        assert not index.is_(index.asfreq("M"))
        assert not index.is_(index.asfreq("A"))

        assert not index.is_(index - 2)
        assert not index.is_(index - 0)

    def test_contains(self):
        rng = period_range("2007-01", freq="M", periods=10)

        assert Period("2007-01", freq="M") in rng
        assert not Period("2007-01", freq="D") in rng
        assert not Period("2007-01", freq="2M") in rng

    def test_contains_nat(self):
        # see gh-13582
        idx = period_range("2007-01", freq="M", periods=10)
        assert pd.NaT not in idx
        assert None not in idx
        assert float("nan") not in idx
        assert np.nan not in idx

        idx = pd.PeriodIndex(["2011-01", "NaT", "2011-02"], freq="M")
        assert pd.NaT in idx
        assert None in idx
        assert float("nan") in idx
        assert np.nan in idx

    def test_periods_number_check(self):
        msg = ("Of the three parameters: start, end, and periods, exactly two"
               " must be specified")
        with pytest.raises(ValueError, match=msg):
            period_range("2011-1-1", "2012-1-1", "B")

    def test_start_time(self):
        # GH 17157
        index = period_range(freq="M", start="2016-01-01", end="2016-05-31")
        expected_index = date_range("2016-01-01", end="2016-05-31", freq="MS")
        tm.assert_index_equal(index.start_time, expected_index)

    def test_end_time(self):
        # GH 17157
        index = period_range(freq="M", start="2016-01-01", end="2016-05-31")
        expected_index = date_range("2016-01-01", end="2016-05-31", freq="M")
        expected_index = expected_index.shift(1, freq="D").shift(-1, freq="ns")
        tm.assert_index_equal(index.end_time, expected_index)

    def test_index_duplicate_periods(self):
        # monotonic
        idx = PeriodIndex([2000, 2007, 2007, 2009, 2009], freq="A-JUN")
        ts = Series(np.random.randn(len(idx)), index=idx)

        result = ts[2007]
        expected = ts[1:3]
        tm.assert_series_equal(result, expected)
        result[:] = 1
        assert (ts[1:3] == 1).all()

        # not monotonic
        idx = PeriodIndex([2000, 2007, 2007, 2009, 2007], freq="A-JUN")
        ts = Series(np.random.randn(len(idx)), index=idx)

        result = ts[2007]
        expected = ts[idx == 2007]
        tm.assert_series_equal(result, expected)

    def test_index_unique(self):
        idx = PeriodIndex([2000, 2007, 2007, 2009, 2009], freq="A-JUN")
        expected = PeriodIndex([2000, 2007, 2009], freq="A-JUN")
        tm.assert_index_equal(idx.unique(), expected)
        assert idx.nunique() == 3

        idx = PeriodIndex([2000, 2007, 2007, 2009, 2007],
                          freq="A-JUN",
                          tz="US/Eastern")
        expected = PeriodIndex([2000, 2007, 2009],
                               freq="A-JUN",
                               tz="US/Eastern")
        tm.assert_index_equal(idx.unique(), expected)
        assert idx.nunique() == 3

    def test_shift(self):
        # This is tested in test_arithmetic
        pass

    @td.skip_if_32bit
    def test_ndarray_compat_properties(self):
        super().test_ndarray_compat_properties()

    def test_negative_ordinals(self):
        Period(ordinal=-1000, freq="A")
        Period(ordinal=0, freq="A")

        idx1 = PeriodIndex(ordinal=[-1, 0, 1], freq="A")
        idx2 = PeriodIndex(ordinal=np.array([-1, 0, 1]), freq="A")
        tm.assert_index_equal(idx1, idx2)

    def test_pindex_fieldaccessor_nat(self):
        idx = PeriodIndex(["2011-01", "2011-02", "NaT", "2012-03", "2012-04"],
                          freq="D",
                          name="name")

        exp = Index([2011, 2011, -1, 2012, 2012], dtype=np.int64, name="name")
        tm.assert_index_equal(idx.year, exp)
        exp = Index([1, 2, -1, 3, 4], dtype=np.int64, name="name")
        tm.assert_index_equal(idx.month, exp)

    def test_pindex_qaccess(self):
        pi = PeriodIndex(["2Q05", "3Q05", "4Q05", "1Q06", "2Q06"], freq="Q")
        s = Series(np.random.rand(len(pi)), index=pi).cumsum()
        # Todo: fix these accessors!
        assert s["05Q4"] == s[2]

    def test_pindex_multiples(self):
        expected = PeriodIndex(
            ["2011-01", "2011-03", "2011-05", "2011-07", "2011-09", "2011-11"],
            freq="2M",
        )

        pi = period_range(start="1/1/11", end="12/31/11", freq="2M")
        tm.assert_index_equal(pi, expected)
        assert pi.freq == offsets.MonthEnd(2)
        assert pi.freqstr == "2M"

        pi = period_range(start="1/1/11", periods=6, freq="2M")
        tm.assert_index_equal(pi, expected)
        assert pi.freq == offsets.MonthEnd(2)
        assert pi.freqstr == "2M"

    def test_iteration(self):
        index = period_range(start="1/1/10", periods=4, freq="B")

        result = list(index)
        assert isinstance(result[0], Period)
        assert result[0].freq == index.freq

    def test_is_full(self):
        index = PeriodIndex([2005, 2007, 2009], freq="A")
        assert not index.is_full

        index = PeriodIndex([2005, 2006, 2007], freq="A")
        assert index.is_full

        index = PeriodIndex([2005, 2005, 2007], freq="A")
        assert not index.is_full

        index = PeriodIndex([2005, 2005, 2006], freq="A")
        assert index.is_full

        index = PeriodIndex([2006, 2005, 2005], freq="A")
        with pytest.raises(ValueError, match="Index is not monotonic"):
            index.is_full

        assert index[:0].is_full

    def test_with_multi_index(self):
        # #1705
        index = date_range("1/1/2012", periods=4, freq="12H")
        index_as_arrays = [index.to_period(freq="D"), index.hour]

        s = Series([0, 1, 2, 3], index_as_arrays)

        assert isinstance(s.index.levels[0], PeriodIndex)

        assert isinstance(s.index.values[0][0], Period)

    def test_convert_array_of_periods(self):
        rng = period_range("1/1/2000", periods=20, freq="D")
        periods = list(rng)

        result = pd.Index(periods)
        assert isinstance(result, PeriodIndex)

    def test_append_concat(self):
        # #1815
        d1 = date_range("12/31/1990", "12/31/1999", freq="A-DEC")
        d2 = date_range("12/31/2000", "12/31/2009", freq="A-DEC")

        s1 = Series(np.random.randn(10), d1)
        s2 = Series(np.random.randn(10), d2)

        s1 = s1.to_period()
        s2 = s2.to_period()

        # drops index
        result = pd.concat([s1, s2])
        assert isinstance(result.index, PeriodIndex)
        assert result.index[0] == s1.index[0]

    def test_pickle_freq(self):
        # GH2891
        prng = period_range("1/1/2011", "1/1/2012", freq="M")
        new_prng = tm.round_trip_pickle(prng)
        assert new_prng.freq == offsets.MonthEnd()
        assert new_prng.freqstr == "M"

    def test_map(self):
        # test_map_dictlike generally tests

        index = PeriodIndex([2005, 2007, 2009], freq="A")
        result = index.map(lambda x: x.ordinal)
        exp = Index([x.ordinal for x in index])
        tm.assert_index_equal(result, exp)

    def test_join_self(self, join_type):
        index = period_range("1/1/2000", periods=10)
        joined = index.join(index, how=join_type)
        assert index is joined

    def test_insert(self):
        # GH 18295 (test missing)
        expected = PeriodIndex(
            ["2017Q1", pd.NaT, "2017Q2", "2017Q3", "2017Q4"], freq="Q")
        for na in (np.nan, pd.NaT, None):
            result = period_range("2017Q1", periods=4, freq="Q").insert(1, na)
            tm.assert_index_equal(result, expected)

    @pytest.mark.parametrize(
        "msg, key",
        [
            (r"Period\('2019', 'A-DEC'\), 'foo', 'bar'",
             (Period(2019), "foo", "bar")),
            (r"Period\('2019', 'A-DEC'\), 'y1', 'bar'",
             (Period(2019), "y1", "bar")),
            (r"Period\('2019', 'A-DEC'\), 'foo', 'z1'",
             (Period(2019), "foo", "z1")),
            (
                r"Period\('2018', 'A-DEC'\), Period\('2016', 'A-DEC'\), 'bar'",
                (Period(2018), Period(2016), "bar"),
            ),
            (r"Period\('2018', 'A-DEC'\), 'foo', 'y1'",
             (Period(2018), "foo", "y1")),
            (
                r"Period\('2017', 'A-DEC'\), 'foo', Period\('2015', 'A-DEC'\)",
                (Period(2017), "foo", Period(2015)),
            ),
            (r"Period\('2017', 'A-DEC'\), 'z1', 'bar'",
             (Period(2017), "z1", "bar")),
        ],
    )
    def test_contains_raise_error_if_period_index_is_in_multi_index(
            self, msg, key):
        # issue 20684
        """
        parse_time_string return parameter if type not matched.
        PeriodIndex.get_loc takes returned value from parse_time_string as a tuple.
        If first argument is Period and a tuple has 3 items,
        process go on not raise exception
        """
        df = DataFrame({
            "A": [Period(2019), "x1", "x2"],
            "B": [Period(2018), Period(2016), "y1"],
            "C": [Period(2017), "z1", Period(2015)],
            "V1": [1, 2, 3],
            "V2": [10, 20, 30],
        }).set_index(["A", "B", "C"])
        with pytest.raises(KeyError, match=msg):
            df.loc[key]
Пример #32
0
	pd.Series(range(10),index=pd.date_range("2000",freq="D",periods=10))

    start=pd.Timestamp("2018-01-06 00:00:00")
    pd.date_range(start, periods=10,freq="2h20min")


    rng=pd.date_range(start,end,freq="D")
    ts=pd.Series(np.random.randn(len(rng)),index=rng)
    ts.shift(2)[1]

# dummy datasets with dates
	import pandas.util.testing as tm
	tm.N, tm.K = 5,3
	tm.makeTimedeltaIndex(), tm.makeTimeSeries(), tm.makePeriodSeries()
	tm.makeDateIndex(), tm.makePeriodIndex(), tm.makeObjectSeries()


	from itertools import product
	datecols = ['year', 'month', 'day']
	df = pd.DataFrame(list(product([2016,2017],[1,2],[1,2,3])),columns = datecols)
	df['data']=np.random.randn(len(df))
	df.index = pd.to_datetime(df[datecols])


# Align 2 datetime series and interpolate if necessary

    from datetime import date, time
    import numpy as np
    from ..errors import MqValueError
Пример #33
0
import numpy as np
import pytest

from pandas.compat import long, lzip

import pandas as pd
from pandas.core.indexes.api import Index, MultiIndex
import pandas.util.testing as tm


@pytest.fixture(params=[tm.makeUnicodeIndex(100),
                        tm.makeStringIndex(100),
                        tm.makeDateIndex(100),
                        tm.makePeriodIndex(100),
                        tm.makeTimedeltaIndex(100),
                        tm.makeIntIndex(100),
                        tm.makeUIntIndex(100),
                        tm.makeRangeIndex(100),
                        tm.makeFloatIndex(100),
                        Index([True, False]),
                        tm.makeCategoricalIndex(100),
                        Index([]),
                        MultiIndex.from_tuples(lzip(
                            ['foo', 'bar', 'baz'], [1, 2, 3])),
                        Index([0, 0, 1, 1, 2, 2])],
                ids=lambda x: type(x).__name__)
def indices(request):
    return request.param


@pytest.fixture(params=[1, np.array(1, dtype=np.int64)])
Пример #34
0
 def setUp(self):
     self.indices = dict(index=tm.makePeriodIndex(10))
     self.setup_indices()
Пример #35
0
class TestHashing(object):
    @pytest.fixture(params=[
        Series([1, 2, 3] * 3, dtype='int32'),
        Series([None, 2.5, 3.5] * 3, dtype='float32'),
        Series(['a', 'b', 'c'] * 3, dtype='category'),
        Series(['d', 'e', 'f'] * 3),
        Series([True, False, True] * 3),
        Series(pd.date_range('20130101', periods=9)),
        Series(pd.date_range('20130101', periods=9, tz='US/Eastern')),
        Series(pd.timedelta_range('2000', periods=9))
    ])
    def series(self, request):
        return request.param

    def test_consistency(self):
        # check that our hash doesn't change because of a mistake
        # in the actual code; this is the ground truth
        result = hash_pandas_object(Index(['foo', 'bar', 'baz']))
        expected = Series(np.array(
            [3600424527151052760, 1374399572096150070, 477881037637427054],
            dtype='uint64'),
                          index=['foo', 'bar', 'baz'])
        tm.assert_series_equal(result, expected)

    def test_hash_array(self, series):
        a = series.values
        tm.assert_numpy_array_equal(hash_array(a), hash_array(a))

    def test_hash_array_mixed(self):
        result1 = hash_array(np.array([3, 4, 'All']))
        result2 = hash_array(np.array(['3', '4', 'All']))
        result3 = hash_array(np.array([3, 4, 'All'], dtype=object))
        tm.assert_numpy_array_equal(result1, result2)
        tm.assert_numpy_array_equal(result1, result3)

    @pytest.mark.parametrize('val', [5, 'foo', pd.Timestamp('20130101')])
    def test_hash_array_errors(self, val):
        msg = 'must pass a ndarray-like'
        with tm.assert_raises_regex(TypeError, msg):
            hash_array(val)

    def check_equal(self, obj, **kwargs):
        a = hash_pandas_object(obj, **kwargs)
        b = hash_pandas_object(obj, **kwargs)
        tm.assert_series_equal(a, b)

        kwargs.pop('index', None)
        a = hash_pandas_object(obj, **kwargs)
        b = hash_pandas_object(obj, **kwargs)
        tm.assert_series_equal(a, b)

    def check_not_equal_with_index(self, obj):

        # check that we are not hashing the same if
        # we include the index
        if not isinstance(obj, Index):
            a = hash_pandas_object(obj, index=True)
            b = hash_pandas_object(obj, index=False)
            if len(obj):
                assert not (a == b).all()

    def test_hash_tuples(self):
        tups = [(1, 'one'), (1, 'two'), (2, 'one')]
        result = hash_tuples(tups)
        expected = hash_pandas_object(MultiIndex.from_tuples(tups)).values
        tm.assert_numpy_array_equal(result, expected)

        result = hash_tuples(tups[0])
        assert result == expected[0]

    @pytest.mark.parametrize('tup', [(1, 'one'), (1, np.nan),
                                     (1.0, pd.NaT, 'A'),
                                     ('A', pd.Timestamp("2012-01-01"))])
    def test_hash_tuple(self, tup):
        # test equivalence between hash_tuples and hash_tuple
        result = hash_tuple(tup)
        expected = hash_tuples([tup])[0]
        assert result == expected

    @pytest.mark.parametrize('val', [
        1, 1.4, 'A', b'A', u'A',
        pd.Timestamp("2012-01-01"),
        pd.Timestamp("2012-01-01", tz='Europe/Brussels'),
        datetime.datetime(2012, 1, 1),
        pd.Timestamp("2012-01-01", tz='EST').to_pydatetime(),
        pd.Timedelta('1 days'),
        datetime.timedelta(1),
        pd.Period('2012-01-01', freq='D'),
        pd.Interval(0, 1), np.nan, pd.NaT, None
    ])
    def test_hash_scalar(self, val):
        result = _hash_scalar(val)
        expected = hash_array(np.array([val], dtype=object), categorize=True)
        assert result[0] == expected[0]

    @pytest.mark.parametrize('val', [5, 'foo', pd.Timestamp('20130101')])
    def test_hash_tuples_err(self, val):
        msg = 'must be convertible to a list-of-tuples'
        with tm.assert_raises_regex(TypeError, msg):
            hash_tuples(val)

    def test_multiindex_unique(self):
        mi = MultiIndex.from_tuples([(118, 472), (236, 118), (51, 204),
                                     (102, 51)])
        assert mi.is_unique is True
        result = hash_pandas_object(mi)
        assert result.is_unique is True

    def test_multiindex_objects(self):
        mi = MultiIndex(levels=[['b', 'd', 'a'], [1, 2, 3]],
                        labels=[[0, 1, 0, 2], [2, 0, 0, 1]],
                        names=['col1', 'col2'])
        recons = mi._sort_levels_monotonic()

        # these are equal
        assert mi.equals(recons)
        assert Index(mi.values).equals(Index(recons.values))

        # _hashed_values and hash_pandas_object(..., index=False)
        # equivalency
        expected = hash_pandas_object(mi, index=False).values
        result = mi._hashed_values
        tm.assert_numpy_array_equal(result, expected)

        expected = hash_pandas_object(recons, index=False).values
        result = recons._hashed_values
        tm.assert_numpy_array_equal(result, expected)

        expected = mi._hashed_values
        result = recons._hashed_values

        # values should match, but in different order
        tm.assert_numpy_array_equal(np.sort(result), np.sort(expected))

    @pytest.mark.parametrize('obj', [
        Series([1, 2, 3]),
        Series([1.0, 1.5, 3.2]),
        Series([1.0, 1.5, np.nan]),
        Series([1.0, 1.5, 3.2], index=[1.5, 1.1, 3.3]),
        Series(['a', 'b', 'c']),
        Series(['a', np.nan, 'c']),
        Series(['a', None, 'c']),
        Series([True, False, True]),
        Series(),
        Index([1, 2, 3]),
        Index([True, False, True]),
        DataFrame({
            'x': ['a', 'b', 'c'],
            'y': [1, 2, 3]
        }),
        DataFrame(),
        tm.makeMissingDataframe(),
        tm.makeMixedDataFrame(),
        tm.makeTimeDataFrame(),
        tm.makeTimeSeries(),
        tm.makeTimedeltaIndex(),
        tm.makePeriodIndex(),
        Series(tm.makePeriodIndex()),
        Series(pd.date_range('20130101', periods=3, tz='US/Eastern')),
        MultiIndex.from_product([
            range(5), ['foo', 'bar', 'baz'],
            pd.date_range('20130101', periods=2)
        ]),
        MultiIndex.from_product([pd.CategoricalIndex(list('aabc')),
                                 range(3)])
    ])
    def test_hash_pandas_object(self, obj):
        self.check_equal(obj)
        self.check_not_equal_with_index(obj)

    def test_hash_pandas_object2(self, series):
        self.check_equal(series)
        self.check_not_equal_with_index(series)

    @pytest.mark.parametrize(
        'obj',
        [Series([], dtype='float64'),
         Series([], dtype='object'),
         Index([])])
    def test_hash_pandas_empty_object(self, obj):
        # these are by-definition the same with
        # or w/o the index as the data is empty
        self.check_equal(obj)

    @pytest.mark.parametrize('s1', [
        Series(['a', 'b', 'c', 'd']),
        Series([1000, 2000, 3000, 4000]),
        Series(pd.date_range(0, periods=4))
    ])
    @pytest.mark.parametrize('categorize', [True, False])
    def test_categorical_consistency(self, s1, categorize):
        # GH15143
        # Check that categoricals hash consistent with their values, not codes
        # This should work for categoricals of any dtype
        s2 = s1.astype('category').cat.set_categories(s1)
        s3 = s2.cat.set_categories(list(reversed(s1)))

        # These should all hash identically
        h1 = hash_pandas_object(s1, categorize=categorize)
        h2 = hash_pandas_object(s2, categorize=categorize)
        h3 = hash_pandas_object(s3, categorize=categorize)
        tm.assert_series_equal(h1, h2)
        tm.assert_series_equal(h1, h3)

    def test_categorical_with_nan_consistency(self):
        c = pd.Categorical.from_codes([-1, 0, 1, 2, 3, 4],
                                      categories=pd.date_range('2012-01-01',
                                                               periods=5,
                                                               name='B'))
        expected = hash_array(c, categorize=False)
        c = pd.Categorical.from_codes([-1, 0],
                                      categories=[pd.Timestamp('2012-01-01')])
        result = hash_array(c, categorize=False)
        assert result[0] in expected
        assert result[1] in expected

    @pytest.mark.filterwarnings("ignore:\\nPanel:FutureWarning")
    def test_pandas_errors(self):
        with pytest.raises(TypeError):
            hash_pandas_object(pd.Timestamp('20130101'))

        obj = tm.makePanel()

        with pytest.raises(TypeError):
            hash_pandas_object(obj)

    def test_hash_keys(self):
        # using different hash keys, should have different hashes
        # for the same data

        # this only matters for object dtypes
        obj = Series(list('abc'))
        a = hash_pandas_object(obj, hash_key='9876543210123456')
        b = hash_pandas_object(obj, hash_key='9876543210123465')
        assert (a != b).all()

    def test_invalid_key(self):
        # this only matters for object dtypes
        msg = 'key should be a 16-byte string encoded'
        with tm.assert_raises_regex(ValueError, msg):
            hash_pandas_object(Series(list('abc')), hash_key='foo')

    def test_alread_encoded(self):
        # if already encoded then ok

        obj = Series(list('abc')).str.encode('utf8')
        self.check_equal(obj)

    def test_alternate_encoding(self):

        obj = Series(list('abc'))
        self.check_equal(obj, encoding='ascii')

    @pytest.mark.parametrize('l_exp', range(8))
    @pytest.mark.parametrize('l_add', [0, 1])
    def test_same_len_hash_collisions(self, l_exp, l_add):
        length = 2**(l_exp + 8) + l_add
        s = tm.rands_array(length, 2)
        result = hash_array(s, 'utf8')
        assert not result[0] == result[1]

    def test_hash_collisions(self):

        # hash collisions are bad
        # https://github.com/pandas-dev/pandas/issues/14711#issuecomment-264885726
        L = [
            'Ingrid-9Z9fKIZmkO7i7Cn51Li34pJm44fgX6DYGBNj3VPlOH50m7HnBlPxfIwFMrcNJNMP6PSgLmwWnInciMWrCSAlLEvt7JkJl4IxiMrVbXSa8ZQoVaq5xoQPjltuJEfwdNlO6jo8qRRHvD8sBEBMQASrRa6TsdaPTPCBo3nwIBpE7YzzmyH0vMBhjQZLx1aCT7faSEx7PgFxQhHdKFWROcysamgy9iVj8DO2Fmwg1NNl93rIAqC3mdqfrCxrzfvIY8aJdzin2cHVzy3QUJxZgHvtUtOLxoqnUHsYbNTeq0xcLXpTZEZCxD4PGubIuCNf32c33M7HFsnjWSEjE2yVdWKhmSVodyF8hFYVmhYnMCztQnJrt3O8ZvVRXd5IKwlLexiSp4h888w7SzAIcKgc3g5XQJf6MlSMftDXm9lIsE1mJNiJEv6uY6pgvC3fUPhatlR5JPpVAHNSbSEE73MBzJrhCAbOLXQumyOXigZuPoME7QgJcBalliQol7YZ9',  # noqa
            'Tim-b9MddTxOWW2AT1Py6vtVbZwGAmYCjbp89p8mxsiFoVX4FyDOF3wFiAkyQTUgwg9sVqVYOZo09Dh1AzhFHbgij52ylF0SEwgzjzHH8TGY8Lypart4p4onnDoDvVMBa0kdthVGKl6K0BDVGzyOXPXKpmnMF1H6rJzqHJ0HywfwS4XYpVwlAkoeNsiicHkJUFdUAhG229INzvIAiJuAHeJDUoyO4DCBqtoZ5TDend6TK7Y914yHlfH3g1WZu5LksKv68VQHJriWFYusW5e6ZZ6dKaMjTwEGuRgdT66iU5nqWTHRH8WSzpXoCFwGcTOwyuqPSe0fTe21DVtJn1FKj9F9nEnR9xOvJUO7E0piCIF4Ad9yAIDY4DBimpsTfKXCu1vdHpKYerzbndfuFe5AhfMduLYZJi5iAw8qKSwR5h86ttXV0Mc0QmXz8dsRvDgxjXSmupPxBggdlqUlC828hXiTPD7am0yETBV0F3bEtvPiNJfremszcV8NcqAoARMe'
        ]  # noqa

        # these should be different!
        result1 = hash_array(np.asarray(L[0:1], dtype=object), 'utf8')
        expected1 = np.array([14963968704024874985], dtype=np.uint64)
        tm.assert_numpy_array_equal(result1, expected1)

        result2 = hash_array(np.asarray(L[1:2], dtype=object), 'utf8')
        expected2 = np.array([16428432627716348016], dtype=np.uint64)
        tm.assert_numpy_array_equal(result2, expected2)

        result = hash_array(np.asarray(L, dtype=object), 'utf8')
        tm.assert_numpy_array_equal(
            result, np.concatenate([expected1, expected2], axis=0))
Пример #36
0
 def setup_method(self, method):
     self.indices = dict(index=tm.makePeriodIndex(10),
                         index_dec=period_range('20130101', periods=10,
                                                freq='D')[::-1])
     self.setup_indices()
Пример #37
0
 def setup_method(self, method):
     self.indices = dict(index=tm.makePeriodIndex(10))
     self.setup_indices()
Пример #38
0
    rng = DatetimeIndex(["1/31/2000", "1/31/2001", "1/31/2002"])
    rng = rng[::-1]

    assert rng.inferred_freq == "-1A-JAN"


def test_non_datetime_index2():
    rng = DatetimeIndex(["1/31/2000", "1/31/2001", "1/31/2002"])
    vals = rng.to_pydatetime()

    result = frequencies.infer_freq(vals)
    assert result == rng.inferred_freq


@pytest.mark.parametrize("idx", [
    tm.makeIntIndex(10), tm.makeFloatIndex(10), tm.makePeriodIndex(10)
])
def test_invalid_index_types(idx):
    msg = ("(cannot infer freq from a non-convertible)|"
           "(Check the `freq` attribute instead of using infer_freq)")

    with pytest.raises(TypeError, match=msg):
        frequencies.infer_freq(idx)


@pytest.mark.skipif(is_platform_windows(),
                    reason="see gh-10822: Windows issue")
@pytest.mark.parametrize("idx", [tm.makeStringIndex(10),
                                 tm.makeUnicodeIndex(10)])
def test_invalid_index_types_unicode(idx):
    # see gh-10822
Пример #39
0
 def setUp(self):
     self.indices = dict(index=tm.makePeriodIndex(10))
     self.setup_indices()
Пример #40
0
    Series([1.0, 1.5, 3.2], index=[1.5, 1.1, 3.3]),
    Series(["a", "b", "c"]),
    Series(["a", np.nan, "c"]),
    Series(["a", None, "c"]),
    Series([True, False, True]),
    Series(),
    Index([1, 2, 3]),
    Index([True, False, True]),
    DataFrame({"x": ["a", "b", "c"], "y": [1, 2, 3]}),
    DataFrame(),
    tm.makeMissingDataframe(),
    tm.makeMixedDataFrame(),
    tm.makeTimeDataFrame(),
    tm.makeTimeSeries(),
    tm.makeTimedeltaIndex(),
    tm.makePeriodIndex(),
    Series(tm.makePeriodIndex()),
    Series(pd.date_range("20130101", periods=3, tz="US/Eastern")),
    MultiIndex.from_product([range(5), ["foo", "bar", "baz"],
                             pd.date_range("20130101", periods=2)]),
    MultiIndex.from_product([pd.CategoricalIndex(list("aabc")), range(3)])
])
def test_hash_pandas_object(obj, index):
    _check_equal(obj, index=index)
    _check_not_equal_with_index(obj)


def test_hash_pandas_object2(series, index):
    _check_equal(series, index=index)
    _check_not_equal_with_index(series)
Пример #41
0
        Series(["a", None, "c"]),
        Series([True, False, True]),
        Series(),
        Index([1, 2, 3]),
        Index([True, False, True]),
        DataFrame({
            "x": ["a", "b", "c"],
            "y": [1, 2, 3]
        }),
        DataFrame(),
        tm.makeMissingDataframe(),
        tm.makeMixedDataFrame(),
        tm.makeTimeDataFrame(),
        tm.makeTimeSeries(),
        tm.makeTimedeltaIndex(),
        tm.makePeriodIndex(),
        Series(tm.makePeriodIndex()),
        Series(pd.date_range("20130101", periods=3, tz="US/Eastern")),
        MultiIndex.from_product([
            range(5), ["foo", "bar", "baz"],
            pd.date_range("20130101", periods=2)
        ]),
        MultiIndex.from_product([pd.CategoricalIndex(list("aabc")),
                                 range(3)]),
    ],
)
def test_hash_pandas_object(obj, index):
    _check_equal(obj, index=index)
    _check_not_equal_with_index(obj)

Пример #42
0
import numpy as np
import pytest

import pandas as pd
from pandas.core.indexes.api import Index, MultiIndex
import pandas.util.testing as tm

indices_dict = {
    "unicode": tm.makeUnicodeIndex(100),
    "string": tm.makeStringIndex(100),
    "datetime": tm.makeDateIndex(100),
    "period": tm.makePeriodIndex(100),
    "timedelta": tm.makeTimedeltaIndex(100),
    "int": tm.makeIntIndex(100),
    "uint": tm.makeUIntIndex(100),
    "range": tm.makeRangeIndex(100),
    "float": tm.makeFloatIndex(100),
    "bool": Index([True, False]),
    "categorical": tm.makeCategoricalIndex(100),
    "interval": tm.makeIntervalIndex(100),
    "empty": Index([]),
    "tuples": MultiIndex.from_tuples(zip(["foo", "bar", "baz"], [1, 2, 3])),
    "repeats": Index([0, 0, 1, 1, 2, 2]),
}


@pytest.fixture(params=indices_dict.keys())
def indices(request):
    # copy to avoid mutation, e.g. setting .name
    return indices_dict[request.param].copy()
Пример #43
0
    def setup_method(self, method):
        self.bool_index = tm.makeBoolIndex(10, name="a")
        self.int_index = tm.makeIntIndex(10, name="a")
        self.float_index = tm.makeFloatIndex(10, name="a")
        self.dt_index = tm.makeDateIndex(10, name="a")
        self.dt_tz_index = tm.makeDateIndex(
            10, name="a").tz_localize(tz="US/Eastern")
        self.period_index = tm.makePeriodIndex(10, name="a")
        self.string_index = tm.makeStringIndex(10, name="a")
        self.unicode_index = tm.makeUnicodeIndex(10, name="a")

        arr = np.random.randn(10)
        self.bool_series = Series(arr, index=self.bool_index, name="a")
        self.int_series = Series(arr, index=self.int_index, name="a")
        self.float_series = Series(arr, index=self.float_index, name="a")
        self.dt_series = Series(arr, index=self.dt_index, name="a")
        self.dt_tz_series = self.dt_tz_index.to_series()
        self.period_series = Series(arr, index=self.period_index, name="a")
        self.string_series = Series(arr, index=self.string_index, name="a")
        self.unicode_series = Series(arr, index=self.unicode_index, name="a")

        types = [
            "bool", "int", "float", "dt", "dt_tz", "period", "string",
            "unicode"
        ]
        self.indexes = [getattr(self, "{}_index".format(t)) for t in types]
        self.series = [getattr(self, "{}_series".format(t)) for t in types]

        # To test narrow dtypes, we use narrower *data* elements, not *index* elements
        index = self.int_index
        self.float32_series = Series(arr.astype(np.float32),
                                     index=index,
                                     name="a")

        arr_int = np.random.choice(10, size=10, replace=False)
        self.int8_series = Series(arr_int.astype(np.int8),
                                  index=index,
                                  name="a")
        self.int16_series = Series(arr_int.astype(np.int16),
                                   index=index,
                                   name="a")
        self.int32_series = Series(arr_int.astype(np.int32),
                                   index=index,
                                   name="a")

        self.uint8_series = Series(arr_int.astype(np.uint8),
                                   index=index,
                                   name="a")
        self.uint16_series = Series(arr_int.astype(np.uint16),
                                    index=index,
                                    name="a")
        self.uint32_series = Series(arr_int.astype(np.uint32),
                                    index=index,
                                    name="a")

        nrw_types = [
            "float32", "int8", "int16", "int32", "uint8", "uint16", "uint32"
        ]
        self.narrow_series = [
            getattr(self, "{}_series".format(t)) for t in nrw_types
        ]

        self.objs = self.indexes + self.series + self.narrow_series