示例#1
0
    def setup_method(self, method):
        super(TestIndex, self).setup_method(method)

        self.d = {
            'string': tm.makeStringIndex(100),
            'date': tm.makeDateIndex(100),
            'int': tm.makeIntIndex(100),
            'rng': tm.makeRangeIndex(100),
            'float': tm.makeFloatIndex(100),
            'empty': Index([]),
            'tuple': Index(zip(['foo', 'bar', 'baz'], [1, 2, 3])),
            'period': Index(period_range('2012-1-1', freq='M', periods=3)),
            'date2': Index(date_range('2013-01-1', periods=10)),
            'bdate': Index(bdate_range('2013-01-02', periods=10)),
            'cat': tm.makeCategoricalIndex(100),
            'interval': tm.makeIntervalIndex(100),
            'timedelta': tm.makeTimedeltaIndex(100, 'H')
        }

        self.mi = {
            'reg': MultiIndex.from_tuples([('bar', 'one'), ('baz', 'two'),
                                           ('foo', 'two'),
                                           ('qux', 'one'), ('qux', 'two')],
                                          names=['first', 'second']),
        }
示例#2
0
    def test_hash_pandas_object(self):

        for obj in [
                Series([1, 2, 3]),
                Series([1.0, 1.5, 3.2]),
                Series([1.0, 1.5, np.nan]),
                Series([1.0, 1.5, 3.2], index=[1.5, 1.1, 3.3]),
                Series(['a', 'b', 'c']),
                Series(['a', np.nan, 'c']),
                Series([True, False, True]),
                Index([1, 2, 3]),
                Index([True, False, True]),
                DataFrame({
                    'x': ['a', 'b', 'c'],
                    'y': [1, 2, 3]
                }),
                tm.makeMissingDataframe(),
                tm.makeMixedDataFrame(),
                tm.makeTimeDataFrame(),
                tm.makeTimeSeries(),
                tm.makeTimedeltaIndex(),
                Series([1, 2, 3],
                       index=pd.MultiIndex.from_tuples([('a', 1), ('a', 2),
                                                        ('b', 1)]))
        ]:
            self.check_equal(obj)
            self.check_not_equal_with_index(obj)
示例#3
0
    def test_hash_pandas_object(self):

        for obj in [Series([1, 2, 3]),
                    Series([1.0, 1.5, 3.2]),
                    Series([1.0, 1.5, np.nan]),
                    Series([1.0, 1.5, 3.2], index=[1.5, 1.1, 3.3]),
                    Series(['a', 'b', 'c']),
                    Series(['a', np.nan, 'c']),
                    Series(['a', None, 'c']),
                    Series([True, False, True]),
                    Series(),
                    Index([1, 2, 3]),
                    Index([True, False, True]),
                    DataFrame({'x': ['a', 'b', 'c'], 'y': [1, 2, 3]}),
                    DataFrame(),
                    tm.makeMissingDataframe(),
                    tm.makeMixedDataFrame(),
                    tm.makeTimeDataFrame(),
                    tm.makeTimeSeries(),
                    tm.makeTimedeltaIndex(),
                    tm.makePeriodIndex(),
                    Series(tm.makePeriodIndex()),
                    Series(pd.date_range('20130101',
                                         periods=3, tz='US/Eastern')),
                    MultiIndex.from_product(
                        [range(5),
                         ['foo', 'bar', 'baz'],
                         pd.date_range('20130101', periods=2)]),
                    MultiIndex.from_product(
                        [pd.CategoricalIndex(list('aabc')),
                         range(3)])]:
            self.check_equal(obj)
            self.check_not_equal_with_index(obj)
示例#4
0
    def test_isin(self):

        index = tm.makeTimedeltaIndex(4)
        result = index.isin(index)
        self.assertTrue(result.all())

        result = index.isin(list(index))
        self.assertTrue(result.all())

        assert_almost_equal(index.isin([index[2], 5]), [False, False, True, False])
示例#5
0
    def test_isin(self):

        index = tm.makeTimedeltaIndex(4)
        result = index.isin(index)
        assert result.all()

        result = index.isin(list(index))
        assert result.all()

        assert_almost_equal(index.isin([index[2], 5]),
                            np.array([False, False, True, False]))
示例#6
0
 def setup_method(self, method):
     self.indices = dict(index=tm.makeTimedeltaIndex(10))
     self.setup_indices()
示例#7
0
    Series([1.0, 1.5, np.nan]),
    Series([1.0, 1.5, 3.2], index=[1.5, 1.1, 3.3]),
    Series(["a", "b", "c"]),
    Series(["a", np.nan, "c"]),
    Series(["a", None, "c"]),
    Series([True, False, True]),
    Series(),
    Index([1, 2, 3]),
    Index([True, False, True]),
    DataFrame({"x": ["a", "b", "c"], "y": [1, 2, 3]}),
    DataFrame(),
    tm.makeMissingDataframe(),
    tm.makeMixedDataFrame(),
    tm.makeTimeDataFrame(),
    tm.makeTimeSeries(),
    tm.makeTimedeltaIndex(),
    tm.makePeriodIndex(),
    Series(tm.makePeriodIndex()),
    Series(pd.date_range("20130101", periods=3, tz="US/Eastern")),
    MultiIndex.from_product([range(5), ["foo", "bar", "baz"],
                             pd.date_range("20130101", periods=2)]),
    MultiIndex.from_product([pd.CategoricalIndex(list("aabc")), range(3)])
])
def test_hash_pandas_object(obj, index):
    _check_equal(obj, index=index)
    _check_not_equal_with_index(obj)


def test_hash_pandas_object2(series, index):
    _check_equal(series, index=index)
    _check_not_equal_with_index(series)
示例#8
0
    Series(["a", np.nan, "c"]),
    Series(["a", None, "c"]),
    Series([True, False, True]),
    Series(),
    Index([1, 2, 3]),
    Index([True, False, True]),
    DataFrame({
        "x": ["a", "b", "c"],
        "y": [1, 2, 3]
    }),
    DataFrame(),
    tm.makeMissingDataframe(),
    tm.makeMixedDataFrame(),
    tm.makeTimeDataFrame(),
    tm.makeTimeSeries(),
    tm.makeTimedeltaIndex(),
    tm.makePeriodIndex(),
    Series(tm.makePeriodIndex()),
    Series(pd.date_range("20130101", periods=3, tz="US/Eastern")),
    MultiIndex.from_product([
        range(5), ["foo", "bar", "baz"],
        pd.date_range("20130101", periods=2)
    ]),
    MultiIndex.from_product([pd.CategoricalIndex(list("aabc")),
                             range(3)])
])
def test_hash_pandas_object(obj, index):
    _check_equal(obj, index=index)
    _check_not_equal_with_index(obj)

示例#9
0
import numpy as np
import pytest

from pandas.compat import long, lzip

import pandas as pd
from pandas.core.indexes.api import Index, MultiIndex
import pandas.util.testing as tm


@pytest.fixture(params=[
    tm.makeUnicodeIndex(100),
    tm.makeStringIndex(100),
    tm.makeDateIndex(100),
    tm.makePeriodIndex(100),
    tm.makeTimedeltaIndex(100),
    tm.makeIntIndex(100),
    tm.makeUIntIndex(100),
    tm.makeFloatIndex(100),
    Index([True, False]),
    tm.makeCategoricalIndex(100),
    Index([]),
    MultiIndex.from_tuples(lzip(['foo', 'bar', 'baz'], [1, 2, 3])),
    Index([0, 0, 1, 1, 2, 2])
],
                ids=lambda x: type(x).__name__)
def indices(request):
    return request.param


@pytest.fixture(params=[1, np.array(1, dtype=np.int64)])
示例#10
0
class TestSeriesMisc(SharedWithSparse):

    series_klass = Series
    # SharedWithSparse tests use generic, series_klass-agnostic assertion
    _assert_series_equal = staticmethod(tm.assert_series_equal)

    def test_tab_completion(self):
        # GH 9910
        s = Series(list("abcd"))
        # Series of str values should have .str but not .dt/.cat in __dir__
        assert "str" in dir(s)
        assert "dt" not in dir(s)
        assert "cat" not in dir(s)

        # similarly for .dt
        s = Series(date_range("1/1/2015", periods=5))
        assert "dt" in dir(s)
        assert "str" not in dir(s)
        assert "cat" not in dir(s)

        # Similarly for .cat, but with the twist that str and dt should be
        # there if the categories are of that type first cat and str.
        s = Series(list("abbcd"), dtype="category")
        assert "cat" in dir(s)
        assert "str" in dir(s)  # as it is a string categorical
        assert "dt" not in dir(s)

        # similar to cat and str
        s = Series(date_range("1/1/2015", periods=5)).astype("category")
        assert "cat" in dir(s)
        assert "str" not in dir(s)
        assert "dt" in dir(s)  # as it is a datetime categorical

    def test_tab_completion_with_categorical(self):
        # test the tab completion display
        ok_for_cat = [
            "categories",
            "codes",
            "ordered",
            "set_categories",
            "add_categories",
            "remove_categories",
            "rename_categories",
            "reorder_categories",
            "remove_unused_categories",
            "as_ordered",
            "as_unordered",
        ]

        def get_dir(s):
            results = [r for r in s.cat.__dir__() if not r.startswith("_")]
            return list(sorted(set(results)))

        s = Series(list("aabbcde")).astype("category")
        results = get_dir(s)
        tm.assert_almost_equal(results, list(sorted(set(ok_for_cat))))

    @pytest.mark.parametrize(
        "index",
        [
            tm.makeUnicodeIndex(10),
            tm.makeStringIndex(10),
            tm.makeCategoricalIndex(10),
            Index(["foo", "bar", "baz"] * 2),
            tm.makeDateIndex(10),
            tm.makePeriodIndex(10),
            tm.makeTimedeltaIndex(10),
            tm.makeIntIndex(10),
            tm.makeUIntIndex(10),
            tm.makeIntIndex(10),
            tm.makeFloatIndex(10),
            Index([True, False]),
            Index(["a{}".format(i) for i in range(101)]),
            pd.MultiIndex.from_tuples(zip("ABCD", "EFGH")),
            pd.MultiIndex.from_tuples(zip([0, 1, 2, 3], "EFGH")),
        ],
    )
    def test_index_tab_completion(self, index):
        # dir contains string-like values of the Index.
        s = pd.Series(index=index)
        dir_s = dir(s)
        for i, x in enumerate(s.index.unique(level=0)):
            if i < 100:
                assert not isinstance(x, str) or not x.isidentifier() or x in dir_s
            else:
                assert x not in dir_s

    def test_not_hashable(self):
        s_empty = Series()
        s = Series([1])
        msg = "'Series' objects are mutable, thus they cannot be hashed"
        with pytest.raises(TypeError, match=msg):
            hash(s_empty)
        with pytest.raises(TypeError, match=msg):
            hash(s)

    def test_contains(self, datetime_series):
        tm.assert_contains_all(datetime_series.index, datetime_series)

    def test_iter_datetimes(self, datetime_series):
        for i, val in enumerate(datetime_series):
            assert val == datetime_series[i]

    def test_iter_strings(self, string_series):
        for i, val in enumerate(string_series):
            assert val == string_series[i]

    def test_keys(self, datetime_series):
        # HACK: By doing this in two stages, we avoid 2to3 wrapping the call
        # to .keys() in a list()
        getkeys = datetime_series.keys
        assert getkeys() is datetime_series.index

    def test_values(self, datetime_series):
        tm.assert_almost_equal(
            datetime_series.values, datetime_series, check_dtype=False
        )

    def test_iteritems_datetimes(self, datetime_series):
        for idx, val in datetime_series.iteritems():
            assert val == datetime_series[idx]

    def test_iteritems_strings(self, string_series):
        for idx, val in string_series.iteritems():
            assert val == string_series[idx]

        # assert is lazy (genrators don't define reverse, lists do)
        assert not hasattr(string_series.iteritems(), "reverse")

    def test_items_datetimes(self, datetime_series):
        for idx, val in datetime_series.items():
            assert val == datetime_series[idx]

    def test_items_strings(self, string_series):
        for idx, val in string_series.items():
            assert val == string_series[idx]

        # assert is lazy (genrators don't define reverse, lists do)
        assert not hasattr(string_series.items(), "reverse")

    def test_raise_on_info(self):
        s = Series(np.random.randn(10))
        msg = "'Series' object has no attribute 'info'"
        with pytest.raises(AttributeError, match=msg):
            s.info()

    def test_copy(self):

        for deep in [None, False, True]:
            s = Series(np.arange(10), dtype="float64")

            # default deep is True
            if deep is None:
                s2 = s.copy()
            else:
                s2 = s.copy(deep=deep)

            s2[::2] = np.NaN

            if deep is None or deep is True:
                # Did not modify original Series
                assert np.isnan(s2[0])
                assert not np.isnan(s[0])
            else:
                # we DID modify the original Series
                assert np.isnan(s2[0])
                assert np.isnan(s[0])

    def test_copy_tzaware(self):
        # GH#11794
        # copy of tz-aware
        expected = Series([Timestamp("2012/01/01", tz="UTC")])
        expected2 = Series([Timestamp("1999/01/01", tz="UTC")])

        for deep in [None, False, True]:

            s = Series([Timestamp("2012/01/01", tz="UTC")])

            if deep is None:
                s2 = s.copy()
            else:
                s2 = s.copy(deep=deep)

            s2[0] = pd.Timestamp("1999/01/01", tz="UTC")

            # default deep is True
            if deep is None or deep is True:
                # Did not modify original Series
                assert_series_equal(s2, expected2)
                assert_series_equal(s, expected)
            else:
                # we DID modify the original Series
                assert_series_equal(s2, expected2)
                assert_series_equal(s, expected2)

    def test_axis_alias(self):
        s = Series([1, 2, np.nan])
        assert_series_equal(s.dropna(axis="rows"), s.dropna(axis="index"))
        assert s.dropna().sum("rows") == 3
        assert s._get_axis_number("rows") == 0
        assert s._get_axis_name("rows") == "index"

    def test_class_axis(self):
        # https://github.com/pandas-dev/pandas/issues/18147
        # no exception and no empty docstring
        assert pydoc.getdoc(Series.index)

    def test_numpy_unique(self, datetime_series):
        # it works!
        np.unique(datetime_series)

    def test_ndarray_compat(self):

        # test numpy compat with Series as sub-class of NDFrame
        tsdf = DataFrame(
            np.random.randn(1000, 3),
            columns=["A", "B", "C"],
            index=date_range("1/1/2000", periods=1000),
        )

        def f(x):
            return x[x.idxmax()]

        result = tsdf.apply(f)
        expected = tsdf.max()
        tm.assert_series_equal(result, expected)

        # .item()
        with tm.assert_produces_warning(FutureWarning):
            s = Series([1])
            result = s.item()
            assert result == 1
            assert s.item() == s.iloc[0]

        # using an ndarray like function
        s = Series(np.random.randn(10))
        result = Series(np.ones_like(s))
        expected = Series(1, index=range(10), dtype="float64")
        tm.assert_series_equal(result, expected)

        # ravel
        s = Series(np.random.randn(10))
        tm.assert_almost_equal(s.ravel(order="F"), s.values.ravel(order="F"))

        # compress
        # GH 6658
        s = Series([0, 1.0, -1], index=list("abc"))
        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
            result = np.compress(s > 0, s)
        tm.assert_series_equal(result, Series([1.0], index=["b"]))

        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
            result = np.compress(s < -1, s)
        # result empty Index(dtype=object) as the same as original
        exp = Series([], dtype="float64", index=Index([], dtype="object"))
        tm.assert_series_equal(result, exp)

        s = Series([0, 1.0, -1], index=[0.1, 0.2, 0.3])
        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
            result = np.compress(s > 0, s)
        tm.assert_series_equal(result, Series([1.0], index=[0.2]))

        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
            result = np.compress(s < -1, s)
        # result empty Float64Index as the same as original
        exp = Series([], dtype="float64", index=Index([], dtype="float64"))
        tm.assert_series_equal(result, exp)

    def test_str_accessor_updates_on_inplace(self):
        s = pd.Series(list("abc"))
        s.drop([0], inplace=True)
        assert len(s.str.lower()) == 2

    def test_str_attribute(self):
        # GH9068
        methods = ["strip", "rstrip", "lstrip"]
        s = Series([" jack", "jill ", " jesse ", "frank"])
        for method in methods:
            expected = Series([getattr(str, method)(x) for x in s.values])
            assert_series_equal(getattr(Series.str, method)(s.str), expected)

        # str accessor only valid with string values
        s = Series(range(5))
        with pytest.raises(AttributeError, match="only use .str accessor"):
            s.str.repeat(2)

    def test_empty_method(self):
        s_empty = pd.Series()
        assert s_empty.empty

        for full_series in [pd.Series([1]), pd.Series(index=[1])]:
            assert not full_series.empty

    def test_tab_complete_warning(self, ip):
        # https://github.com/pandas-dev/pandas/issues/16409
        pytest.importorskip("IPython", minversion="6.0.0")
        from IPython.core.completer import provisionalcompleter

        code = "import pandas as pd; s = pd.Series()"
        ip.run_code(code)
        with tm.assert_produces_warning(None):
            with provisionalcompleter("ignore"):
                list(ip.Completer.completions("s.", 1))

    def test_integer_series_size(self):
        # GH 25580
        s = Series(range(9))
        assert s.size == 9
        s = Series(range(9), dtype="Int64")
        assert s.size == 9

    def test_get_values_deprecation(self):
        s = Series(range(9))
        with tm.assert_produces_warning(FutureWarning):
            res = s.get_values()
        tm.assert_numpy_array_equal(res, s.values)
示例#11
0
 def setUp(self):
     self.indices = dict(index=tm.makeTimedeltaIndex(10))
     self.setup_indices()
示例#12
0
 def setup_method(self, method):
     self.indices = dict(index=tm.makeTimedeltaIndex(10))
     self.setup_indices()
示例#13
0
class TestHashing(object):
    @pytest.fixture(params=[
        Series([1, 2, 3] * 3, dtype='int32'),
        Series([None, 2.5, 3.5] * 3, dtype='float32'),
        Series(['a', 'b', 'c'] * 3, dtype='category'),
        Series(['d', 'e', 'f'] * 3),
        Series([True, False, True] * 3),
        Series(pd.date_range('20130101', periods=9)),
        Series(pd.date_range('20130101', periods=9, tz='US/Eastern')),
        Series(pd.timedelta_range('2000', periods=9))
    ])
    def series(self, request):
        return request.param

    def test_consistency(self):
        # check that our hash doesn't change because of a mistake
        # in the actual code; this is the ground truth
        result = hash_pandas_object(Index(['foo', 'bar', 'baz']))
        expected = Series(np.array(
            [3600424527151052760, 1374399572096150070, 477881037637427054],
            dtype='uint64'),
                          index=['foo', 'bar', 'baz'])
        tm.assert_series_equal(result, expected)

    def test_hash_array(self, series):
        a = series.values
        tm.assert_numpy_array_equal(hash_array(a), hash_array(a))

    def test_hash_array_mixed(self):
        result1 = hash_array(np.array([3, 4, 'All']))
        result2 = hash_array(np.array(['3', '4', 'All']))
        result3 = hash_array(np.array([3, 4, 'All'], dtype=object))
        tm.assert_numpy_array_equal(result1, result2)
        tm.assert_numpy_array_equal(result1, result3)

    @pytest.mark.parametrize('val', [5, 'foo', pd.Timestamp('20130101')])
    def test_hash_array_errors(self, val):
        msg = 'must pass a ndarray-like'
        with tm.assert_raises_regex(TypeError, msg):
            hash_array(val)

    def check_equal(self, obj, **kwargs):
        a = hash_pandas_object(obj, **kwargs)
        b = hash_pandas_object(obj, **kwargs)
        tm.assert_series_equal(a, b)

        kwargs.pop('index', None)
        a = hash_pandas_object(obj, **kwargs)
        b = hash_pandas_object(obj, **kwargs)
        tm.assert_series_equal(a, b)

    def check_not_equal_with_index(self, obj):

        # check that we are not hashing the same if
        # we include the index
        if not isinstance(obj, Index):
            a = hash_pandas_object(obj, index=True)
            b = hash_pandas_object(obj, index=False)
            if len(obj):
                assert not (a == b).all()

    def test_hash_tuples(self):
        tups = [(1, 'one'), (1, 'two'), (2, 'one')]
        result = hash_tuples(tups)
        expected = hash_pandas_object(MultiIndex.from_tuples(tups)).values
        tm.assert_numpy_array_equal(result, expected)

        result = hash_tuples(tups[0])
        assert result == expected[0]

    @pytest.mark.parametrize('tup', [(1, 'one'), (1, np.nan),
                                     (1.0, pd.NaT, 'A'),
                                     ('A', pd.Timestamp("2012-01-01"))])
    def test_hash_tuple(self, tup):
        # test equivalence between hash_tuples and hash_tuple
        result = hash_tuple(tup)
        expected = hash_tuples([tup])[0]
        assert result == expected

    @pytest.mark.parametrize('val', [
        1, 1.4, 'A', b'A', u'A',
        pd.Timestamp("2012-01-01"),
        pd.Timestamp("2012-01-01", tz='Europe/Brussels'),
        datetime.datetime(2012, 1, 1),
        pd.Timestamp("2012-01-01", tz='EST').to_pydatetime(),
        pd.Timedelta('1 days'),
        datetime.timedelta(1),
        pd.Period('2012-01-01', freq='D'),
        pd.Interval(0, 1), np.nan, pd.NaT, None
    ])
    def test_hash_scalar(self, val):
        result = _hash_scalar(val)
        expected = hash_array(np.array([val], dtype=object), categorize=True)
        assert result[0] == expected[0]

    @pytest.mark.parametrize('val', [5, 'foo', pd.Timestamp('20130101')])
    def test_hash_tuples_err(self, val):
        msg = 'must be convertible to a list-of-tuples'
        with tm.assert_raises_regex(TypeError, msg):
            hash_tuples(val)

    def test_multiindex_unique(self):
        mi = MultiIndex.from_tuples([(118, 472), (236, 118), (51, 204),
                                     (102, 51)])
        assert mi.is_unique
        result = hash_pandas_object(mi)
        assert result.is_unique

    def test_multiindex_objects(self):
        mi = MultiIndex(levels=[['b', 'd', 'a'], [1, 2, 3]],
                        labels=[[0, 1, 0, 2], [2, 0, 0, 1]],
                        names=['col1', 'col2'])
        recons = mi._sort_levels_monotonic()

        # these are equal
        assert mi.equals(recons)
        assert Index(mi.values).equals(Index(recons.values))

        # _hashed_values and hash_pandas_object(..., index=False)
        # equivalency
        expected = hash_pandas_object(mi, index=False).values
        result = mi._hashed_values
        tm.assert_numpy_array_equal(result, expected)

        expected = hash_pandas_object(recons, index=False).values
        result = recons._hashed_values
        tm.assert_numpy_array_equal(result, expected)

        expected = mi._hashed_values
        result = recons._hashed_values

        # values should match, but in different order
        tm.assert_numpy_array_equal(np.sort(result), np.sort(expected))

    @pytest.mark.parametrize('obj', [
        Series([1, 2, 3]),
        Series([1.0, 1.5, 3.2]),
        Series([1.0, 1.5, np.nan]),
        Series([1.0, 1.5, 3.2], index=[1.5, 1.1, 3.3]),
        Series(['a', 'b', 'c']),
        Series(['a', np.nan, 'c']),
        Series(['a', None, 'c']),
        Series([True, False, True]),
        Series(),
        Index([1, 2, 3]),
        Index([True, False, True]),
        DataFrame({
            'x': ['a', 'b', 'c'],
            'y': [1, 2, 3]
        }),
        DataFrame(),
        tm.makeMissingDataframe(),
        tm.makeMixedDataFrame(),
        tm.makeTimeDataFrame(),
        tm.makeTimeSeries(),
        tm.makeTimedeltaIndex(),
        tm.makePeriodIndex(),
        Series(tm.makePeriodIndex()),
        Series(pd.date_range('20130101', periods=3, tz='US/Eastern')),
        MultiIndex.from_product([
            range(5), ['foo', 'bar', 'baz'],
            pd.date_range('20130101', periods=2)
        ]),
        MultiIndex.from_product([pd.CategoricalIndex(list('aabc')),
                                 range(3)])
    ])
    def test_hash_pandas_object(self, obj):
        self.check_equal(obj)
        self.check_not_equal_with_index(obj)

    def test_hash_pandas_object2(self, series):
        self.check_equal(series)
        self.check_not_equal_with_index(series)

    @pytest.mark.parametrize(
        'obj',
        [Series([], dtype='float64'),
         Series([], dtype='object'),
         Index([])])
    def test_hash_pandas_empty_object(self, obj):
        # these are by-definition the same with
        # or w/o the index as the data is empty
        self.check_equal(obj)

    @pytest.mark.parametrize('s1', [
        Series(['a', 'b', 'c', 'd']),
        Series([1000, 2000, 3000, 4000]),
        Series(pd.date_range(0, periods=4))
    ])
    @pytest.mark.parametrize('categorize', [True, False])
    def test_categorical_consistency(self, s1, categorize):
        # GH15143
        # Check that categoricals hash consistent with their values, not codes
        # This should work for categoricals of any dtype
        s2 = s1.astype('category').cat.set_categories(s1)
        s3 = s2.cat.set_categories(list(reversed(s1)))

        # These should all hash identically
        h1 = hash_pandas_object(s1, categorize=categorize)
        h2 = hash_pandas_object(s2, categorize=categorize)
        h3 = hash_pandas_object(s3, categorize=categorize)
        tm.assert_series_equal(h1, h2)
        tm.assert_series_equal(h1, h3)

    def test_categorical_with_nan_consistency(self):
        c = pd.Categorical.from_codes([-1, 0, 1, 2, 3, 4],
                                      categories=pd.date_range('2012-01-01',
                                                               periods=5,
                                                               name='B'))
        expected = hash_array(c, categorize=False)
        c = pd.Categorical.from_codes([-1, 0],
                                      categories=[pd.Timestamp('2012-01-01')])
        result = hash_array(c, categorize=False)
        assert result[0] in expected
        assert result[1] in expected

    @pytest.mark.filterwarnings("ignore:\\nPanel:FutureWarning")
    def test_pandas_errors(self):
        with pytest.raises(TypeError):
            hash_pandas_object(pd.Timestamp('20130101'))

        obj = tm.makePanel()

        with pytest.raises(TypeError):
            hash_pandas_object(obj)

    def test_hash_keys(self):
        # using different hash keys, should have different hashes
        # for the same data

        # this only matters for object dtypes
        obj = Series(list('abc'))
        a = hash_pandas_object(obj, hash_key='9876543210123456')
        b = hash_pandas_object(obj, hash_key='9876543210123465')
        assert (a != b).all()

    def test_invalid_key(self):
        # this only matters for object dtypes
        msg = 'key should be a 16-byte string encoded'
        with tm.assert_raises_regex(ValueError, msg):
            hash_pandas_object(Series(list('abc')), hash_key='foo')

    def test_alread_encoded(self):
        # if already encoded then ok

        obj = Series(list('abc')).str.encode('utf8')
        self.check_equal(obj)

    def test_alternate_encoding(self):

        obj = Series(list('abc'))
        self.check_equal(obj, encoding='ascii')

    @pytest.mark.parametrize('l_exp', range(8))
    @pytest.mark.parametrize('l_add', [0, 1])
    def test_same_len_hash_collisions(self, l_exp, l_add):
        length = 2**(l_exp + 8) + l_add
        s = tm.rands_array(length, 2)
        result = hash_array(s, 'utf8')
        assert not result[0] == result[1]

    def test_hash_collisions(self):

        # hash collisions are bad
        # https://github.com/pandas-dev/pandas/issues/14711#issuecomment-264885726
        L = [
            'Ingrid-9Z9fKIZmkO7i7Cn51Li34pJm44fgX6DYGBNj3VPlOH50m7HnBlPxfIwFMrcNJNMP6PSgLmwWnInciMWrCSAlLEvt7JkJl4IxiMrVbXSa8ZQoVaq5xoQPjltuJEfwdNlO6jo8qRRHvD8sBEBMQASrRa6TsdaPTPCBo3nwIBpE7YzzmyH0vMBhjQZLx1aCT7faSEx7PgFxQhHdKFWROcysamgy9iVj8DO2Fmwg1NNl93rIAqC3mdqfrCxrzfvIY8aJdzin2cHVzy3QUJxZgHvtUtOLxoqnUHsYbNTeq0xcLXpTZEZCxD4PGubIuCNf32c33M7HFsnjWSEjE2yVdWKhmSVodyF8hFYVmhYnMCztQnJrt3O8ZvVRXd5IKwlLexiSp4h888w7SzAIcKgc3g5XQJf6MlSMftDXm9lIsE1mJNiJEv6uY6pgvC3fUPhatlR5JPpVAHNSbSEE73MBzJrhCAbOLXQumyOXigZuPoME7QgJcBalliQol7YZ9',  # noqa
            'Tim-b9MddTxOWW2AT1Py6vtVbZwGAmYCjbp89p8mxsiFoVX4FyDOF3wFiAkyQTUgwg9sVqVYOZo09Dh1AzhFHbgij52ylF0SEwgzjzHH8TGY8Lypart4p4onnDoDvVMBa0kdthVGKl6K0BDVGzyOXPXKpmnMF1H6rJzqHJ0HywfwS4XYpVwlAkoeNsiicHkJUFdUAhG229INzvIAiJuAHeJDUoyO4DCBqtoZ5TDend6TK7Y914yHlfH3g1WZu5LksKv68VQHJriWFYusW5e6ZZ6dKaMjTwEGuRgdT66iU5nqWTHRH8WSzpXoCFwGcTOwyuqPSe0fTe21DVtJn1FKj9F9nEnR9xOvJUO7E0piCIF4Ad9yAIDY4DBimpsTfKXCu1vdHpKYerzbndfuFe5AhfMduLYZJi5iAw8qKSwR5h86ttXV0Mc0QmXz8dsRvDgxjXSmupPxBggdlqUlC828hXiTPD7am0yETBV0F3bEtvPiNJfremszcV8NcqAoARMe'
        ]  # noqa

        # these should be different!
        result1 = hash_array(np.asarray(L[0:1], dtype=object), 'utf8')
        expected1 = np.array([14963968704024874985], dtype=np.uint64)
        tm.assert_numpy_array_equal(result1, expected1)

        result2 = hash_array(np.asarray(L[1:2], dtype=object), 'utf8')
        expected2 = np.array([16428432627716348016], dtype=np.uint64)
        tm.assert_numpy_array_equal(result2, expected2)

        result = hash_array(np.asarray(L, dtype=object), 'utf8')
        tm.assert_numpy_array_equal(
            result, np.concatenate([expected1, expected2], axis=0))
示例#14
0
class TestSeriesMisc(TestData, SharedWithSparse):

    series_klass = Series
    # SharedWithSparse tests use generic, series_klass-agnostic assertion
    _assert_series_equal = staticmethod(tm.assert_series_equal)

    def test_tab_completion(self):
        # GH 9910
        s = Series(list('abcd'))
        # Series of str values should have .str but not .dt/.cat in __dir__
        assert 'str' in dir(s)
        assert 'dt' not in dir(s)
        assert 'cat' not in dir(s)

        # similarly for .dt
        s = Series(date_range('1/1/2015', periods=5))
        assert 'dt' in dir(s)
        assert 'str' not in dir(s)
        assert 'cat' not in dir(s)

        # Similarly for .cat, but with the twist that str and dt should be
        # there if the categories are of that type first cat and str.
        s = Series(list('abbcd'), dtype="category")
        assert 'cat' in dir(s)
        assert 'str' in dir(s)  # as it is a string categorical
        assert 'dt' not in dir(s)

        # similar to cat and str
        s = Series(date_range('1/1/2015', periods=5)).astype("category")
        assert 'cat' in dir(s)
        assert 'str' not in dir(s)
        assert 'dt' in dir(s)  # as it is a datetime categorical

    def test_tab_completion_with_categorical(self):
        # test the tab completion display
        ok_for_cat = [
            'categories', 'codes', 'ordered', 'set_categories',
            'add_categories', 'remove_categories', 'rename_categories',
            'reorder_categories', 'remove_unused_categories', 'as_ordered',
            'as_unordered'
        ]

        def get_dir(s):
            results = [r for r in s.cat.__dir__() if not r.startswith('_')]
            return list(sorted(set(results)))

        s = Series(list('aabbcde')).astype('category')
        results = get_dir(s)
        tm.assert_almost_equal(results, list(sorted(set(ok_for_cat))))

    @pytest.mark.parametrize("index", [
        tm.makeUnicodeIndex(10),
        tm.makeStringIndex(10),
        tm.makeCategoricalIndex(10),
        Index(['foo', 'bar', 'baz'] * 2),
        tm.makeDateIndex(10),
        tm.makePeriodIndex(10),
        tm.makeTimedeltaIndex(10),
        tm.makeIntIndex(10),
        tm.makeUIntIndex(10),
        tm.makeIntIndex(10),
        tm.makeFloatIndex(10),
        Index([True, False]),
        Index(['a{}'.format(i) for i in range(101)]),
        pd.MultiIndex.from_tuples(lzip('ABCD', 'EFGH')),
        pd.MultiIndex.from_tuples(lzip([0, 1, 2, 3], 'EFGH')),
    ])
    def test_index_tab_completion(self, index):
        # dir contains string-like values of the Index.
        s = pd.Series(index=index)
        dir_s = dir(s)
        for i, x in enumerate(s.index.unique(level=0)):
            if i < 100:
                assert (not isinstance(x, string_types) or not isidentifier(x)
                        or x in dir_s)
            else:
                assert x not in dir_s

    def test_not_hashable(self):
        s_empty = Series()
        s = Series([1])
        pytest.raises(TypeError, hash, s_empty)
        pytest.raises(TypeError, hash, s)

    def test_contains(self):
        tm.assert_contains_all(self.ts.index, self.ts)

    def test_iter(self):
        for i, val in enumerate(self.series):
            assert val == self.series[i]

        for i, val in enumerate(self.ts):
            assert val == self.ts[i]

    def test_keys(self):
        # HACK: By doing this in two stages, we avoid 2to3 wrapping the call
        # to .keys() in a list()
        getkeys = self.ts.keys
        assert getkeys() is self.ts.index

    def test_values(self):
        tm.assert_almost_equal(self.ts.values, self.ts, check_dtype=False)

    def test_iteritems(self):
        for idx, val in compat.iteritems(self.series):
            assert val == self.series[idx]

        for idx, val in compat.iteritems(self.ts):
            assert val == self.ts[idx]

        # assert is lazy (genrators don't define reverse, lists do)
        assert not hasattr(self.series.iteritems(), 'reverse')

    def test_items(self):
        for idx, val in self.series.items():
            assert val == self.series[idx]

        for idx, val in self.ts.items():
            assert val == self.ts[idx]

        # assert is lazy (genrators don't define reverse, lists do)
        assert not hasattr(self.series.items(), 'reverse')

    def test_raise_on_info(self):
        s = Series(np.random.randn(10))
        with pytest.raises(AttributeError):
            s.info()

    def test_copy(self):

        for deep in [None, False, True]:
            s = Series(np.arange(10), dtype='float64')

            # default deep is True
            if deep is None:
                s2 = s.copy()
            else:
                s2 = s.copy(deep=deep)

            s2[::2] = np.NaN

            if deep is None or deep is True:
                # Did not modify original Series
                assert np.isnan(s2[0])
                assert not np.isnan(s[0])
            else:
                # we DID modify the original Series
                assert np.isnan(s2[0])
                assert np.isnan(s[0])

        # GH 11794
        # copy of tz-aware
        expected = Series([Timestamp('2012/01/01', tz='UTC')])
        expected2 = Series([Timestamp('1999/01/01', tz='UTC')])

        for deep in [None, False, True]:

            s = Series([Timestamp('2012/01/01', tz='UTC')])

            if deep is None:
                s2 = s.copy()
            else:
                s2 = s.copy(deep=deep)

            s2[0] = pd.Timestamp('1999/01/01', tz='UTC')

            # default deep is True
            if deep is None or deep is True:
                # Did not modify original Series
                assert_series_equal(s2, expected2)
                assert_series_equal(s, expected)
            else:
                # we DID modify the original Series
                assert_series_equal(s2, expected2)
                assert_series_equal(s, expected2)

    def test_axis_alias(self):
        s = Series([1, 2, np.nan])
        assert_series_equal(s.dropna(axis='rows'), s.dropna(axis='index'))
        assert s.dropna().sum('rows') == 3
        assert s._get_axis_number('rows') == 0
        assert s._get_axis_name('rows') == 'index'

    def test_class_axis(self):
        # https://github.com/pandas-dev/pandas/issues/18147
        # no exception and no empty docstring
        assert pydoc.getdoc(Series.index)

    def test_numpy_unique(self):
        # it works!
        np.unique(self.ts)

    def test_ndarray_compat(self):

        # test numpy compat with Series as sub-class of NDFrame
        tsdf = DataFrame(np.random.randn(1000, 3),
                         columns=['A', 'B', 'C'],
                         index=date_range('1/1/2000', periods=1000))

        def f(x):
            return x[x.idxmax()]

        result = tsdf.apply(f)
        expected = tsdf.max()
        tm.assert_series_equal(result, expected)

        # .item()
        s = Series([1])
        result = s.item()
        assert result == 1
        assert s.item() == s.iloc[0]

        # using an ndarray like function
        s = Series(np.random.randn(10))
        result = Series(np.ones_like(s))
        expected = Series(1, index=range(10), dtype='float64')
        tm.assert_series_equal(result, expected)

        # ravel
        s = Series(np.random.randn(10))
        tm.assert_almost_equal(s.ravel(order='F'), s.values.ravel(order='F'))

        # compress
        # GH 6658
        s = Series([0, 1., -1], index=list('abc'))
        result = np.compress(s > 0, s)
        tm.assert_series_equal(result, Series([1.], index=['b']))

        result = np.compress(s < -1, s)
        # result empty Index(dtype=object) as the same as original
        exp = Series([], dtype='float64', index=Index([], dtype='object'))
        tm.assert_series_equal(result, exp)

        s = Series([0, 1., -1], index=[.1, .2, .3])
        result = np.compress(s > 0, s)
        tm.assert_series_equal(result, Series([1.], index=[.2]))

        result = np.compress(s < -1, s)
        # result empty Float64Index as the same as original
        exp = Series([], dtype='float64', index=Index([], dtype='float64'))
        tm.assert_series_equal(result, exp)

    def test_str_attribute(self):
        # GH9068
        methods = ['strip', 'rstrip', 'lstrip']
        s = Series([' jack', 'jill ', ' jesse ', 'frank'])
        for method in methods:
            expected = Series([getattr(str, method)(x) for x in s.values])
            assert_series_equal(getattr(Series.str, method)(s.str), expected)

        # str accessor only valid with string values
        s = Series(range(5))
        with tm.assert_raises_regex(AttributeError, 'only use .str accessor'):
            s.str.repeat(2)

    def test_empty_method(self):
        s_empty = pd.Series()
        assert s_empty.empty

        for full_series in [pd.Series([1]), pd.Series(index=[1])]:
            assert not full_series.empty

    def test_tab_complete_warning(self, ip):
        # https://github.com/pandas-dev/pandas/issues/16409
        pytest.importorskip('IPython', minversion="6.0.0")
        from IPython.core.completer import provisionalcompleter

        code = "import pandas as pd; s = pd.Series()"
        ip.run_code(code)
        with tm.assert_produces_warning(None):
            with provisionalcompleter('ignore'):
                list(ip.Completer.completions('s.', 1))
示例#15
0
import numpy as np
import pytest

from pandas.compat import long, lzip

import pandas as pd
from pandas.core.indexes.api import Index, MultiIndex
import pandas.util.testing as tm


@pytest.fixture(params=[tm.makeUnicodeIndex(100),
                        tm.makeStringIndex(100),
                        tm.makeDateIndex(100),
                        tm.makePeriodIndex(100),
                        tm.makeTimedeltaIndex(100),
                        tm.makeIntIndex(100),
                        tm.makeUIntIndex(100),
                        tm.makeRangeIndex(100),
                        tm.makeFloatIndex(100),
                        Index([True, False]),
                        tm.makeCategoricalIndex(100),
                        Index([]),
                        MultiIndex.from_tuples(lzip(
                            ['foo', 'bar', 'baz'], [1, 2, 3])),
                        Index([0, 0, 1, 1, 2, 2])],
                ids=lambda x: type(x).__name__)
def indices(request):
    return request.param


@pytest.fixture(params=[1, np.array(1, dtype=np.int64)])
示例#16
0
class TestSeriesMisc:
    def test_scalarop_preserve_name(self, datetime_series):
        result = datetime_series * 2
        assert result.name == datetime_series.name

    def test_copy_name(self, datetime_series):
        result = datetime_series.copy()
        assert result.name == datetime_series.name

    def test_copy_index_name_checking(self, datetime_series):
        # don't want to be able to modify the index stored elsewhere after
        # making a copy

        datetime_series.index.name = None
        assert datetime_series.index.name is None
        assert datetime_series is datetime_series

        cp = datetime_series.copy()
        cp.index.name = "foo"
        printing.pprint_thing(datetime_series.index.name)
        assert datetime_series.index.name is None

    def test_append_preserve_name(self, datetime_series):
        result = datetime_series[:5].append(datetime_series[5:])
        assert result.name == datetime_series.name

    def test_binop_maybe_preserve_name(self, datetime_series):
        # names match, preserve
        result = datetime_series * datetime_series
        assert result.name == datetime_series.name
        result = datetime_series.mul(datetime_series)
        assert result.name == datetime_series.name

        result = datetime_series * datetime_series[:-2]
        assert result.name == datetime_series.name

        # names don't match, don't preserve
        cp = datetime_series.copy()
        cp.name = "something else"
        result = datetime_series + cp
        assert result.name is None
        result = datetime_series.add(cp)
        assert result.name is None

        ops = ["add", "sub", "mul", "div", "truediv", "floordiv", "mod", "pow"]
        ops = ops + ["r" + op for op in ops]
        for op in ops:
            # names match, preserve
            s = datetime_series.copy()
            result = getattr(s, op)(s)
            assert result.name == datetime_series.name

            # names don't match, don't preserve
            cp = datetime_series.copy()
            cp.name = "changed"
            result = getattr(s, op)(cp)
            assert result.name is None

    def test_combine_first_name(self, datetime_series):
        result = datetime_series.combine_first(datetime_series[:5])
        assert result.name == datetime_series.name

    def test_getitem_preserve_name(self, datetime_series):
        result = datetime_series[datetime_series > 0]
        assert result.name == datetime_series.name

        result = datetime_series[[0, 2, 4]]
        assert result.name == datetime_series.name

        result = datetime_series[5:10]
        assert result.name == datetime_series.name

    def test_pickle_datetimes(self, datetime_series):
        unp_ts = self._pickle_roundtrip(datetime_series)
        tm.assert_series_equal(unp_ts, datetime_series)

    def test_pickle_strings(self, string_series):
        unp_series = self._pickle_roundtrip(string_series)
        tm.assert_series_equal(unp_series, string_series)

    def _pickle_roundtrip(self, obj):

        with tm.ensure_clean() as path:
            obj.to_pickle(path)
            unpickled = pd.read_pickle(path)
            return unpickled

    def test_argsort_preserve_name(self, datetime_series):
        result = datetime_series.argsort()
        assert result.name == datetime_series.name

    def test_sort_index_name(self, datetime_series):
        result = datetime_series.sort_index(ascending=False)
        assert result.name == datetime_series.name

    def test_constructor_dict(self):
        d = {"a": 0.0, "b": 1.0, "c": 2.0}
        result = Series(d)
        expected = Series(d, index=sorted(d.keys()))
        tm.assert_series_equal(result, expected)

        result = Series(d, index=["b", "c", "d", "a"])
        expected = Series([1, 2, np.nan, 0], index=["b", "c", "d", "a"])
        tm.assert_series_equal(result, expected)

    def test_constructor_subclass_dict(self):
        data = tm.TestSubDict((x, 10.0 * x) for x in range(10))
        series = Series(data)
        expected = Series(dict(data.items()))
        tm.assert_series_equal(series, expected)

    def test_constructor_ordereddict(self):
        # GH3283
        data = OrderedDict(
            ("col{i}".format(i=i), np.random.random()) for i in range(12))

        series = Series(data)
        expected = Series(list(data.values()), list(data.keys()))
        tm.assert_series_equal(series, expected)

        # Test with subclass
        class A(OrderedDict):
            pass

        series = Series(A(data))
        tm.assert_series_equal(series, expected)

    def test_constructor_dict_multiindex(self):
        d = {("a", "a"): 0.0, ("b", "a"): 1.0, ("b", "c"): 2.0}
        _d = sorted(d.items())
        result = Series(d)
        expected = Series([x[1] for x in _d],
                          index=pd.MultiIndex.from_tuples([x[0] for x in _d]))
        tm.assert_series_equal(result, expected)

        d["z"] = 111.0
        _d.insert(0, ("z", d["z"]))
        result = Series(d)
        expected = Series([x[1] for x in _d],
                          index=pd.Index([x[0] for x in _d],
                                         tupleize_cols=False))
        result = result.reindex(index=expected.index)
        tm.assert_series_equal(result, expected)

    def test_constructor_dict_timedelta_index(self):
        # GH #12169 : Resample category data with timedelta index
        # construct Series from dict as data and TimedeltaIndex as index
        # will result NaN in result Series data
        expected = Series(data=["A", "B", "C"],
                          index=pd.to_timedelta([0, 10, 20], unit="s"))

        result = Series(
            data={
                pd.to_timedelta(0, unit="s"): "A",
                pd.to_timedelta(10, unit="s"): "B",
                pd.to_timedelta(20, unit="s"): "C",
            },
            index=pd.to_timedelta([0, 10, 20], unit="s"),
        )
        tm.assert_series_equal(result, expected)

    def test_sparse_accessor_updates_on_inplace(self):
        s = pd.Series([1, 1, 2, 3], dtype="Sparse[int]")
        s.drop([0, 1], inplace=True)
        assert s.sparse.density == 1.0

    def test_tab_completion(self):
        # GH 9910
        s = Series(list("abcd"))
        # Series of str values should have .str but not .dt/.cat in __dir__
        assert "str" in dir(s)
        assert "dt" not in dir(s)
        assert "cat" not in dir(s)

        # similarly for .dt
        s = Series(date_range("1/1/2015", periods=5))
        assert "dt" in dir(s)
        assert "str" not in dir(s)
        assert "cat" not in dir(s)

        # Similarly for .cat, but with the twist that str and dt should be
        # there if the categories are of that type first cat and str.
        s = Series(list("abbcd"), dtype="category")
        assert "cat" in dir(s)
        assert "str" in dir(s)  # as it is a string categorical
        assert "dt" not in dir(s)

        # similar to cat and str
        s = Series(date_range("1/1/2015", periods=5)).astype("category")
        assert "cat" in dir(s)
        assert "str" not in dir(s)
        assert "dt" in dir(s)  # as it is a datetime categorical

    def test_tab_completion_with_categorical(self):
        # test the tab completion display
        ok_for_cat = [
            "categories",
            "codes",
            "ordered",
            "set_categories",
            "add_categories",
            "remove_categories",
            "rename_categories",
            "reorder_categories",
            "remove_unused_categories",
            "as_ordered",
            "as_unordered",
        ]

        def get_dir(s):
            results = [r for r in s.cat.__dir__() if not r.startswith("_")]
            return sorted(set(results))

        s = Series(list("aabbcde")).astype("category")
        results = get_dir(s)
        tm.assert_almost_equal(results, sorted(set(ok_for_cat)))

    @pytest.mark.parametrize(
        "index",
        [
            tm.makeUnicodeIndex(10),
            tm.makeStringIndex(10),
            tm.makeCategoricalIndex(10),
            Index(["foo", "bar", "baz"] * 2),
            tm.makeDateIndex(10),
            tm.makePeriodIndex(10),
            tm.makeTimedeltaIndex(10),
            tm.makeIntIndex(10),
            tm.makeUIntIndex(10),
            tm.makeIntIndex(10),
            tm.makeFloatIndex(10),
            Index([True, False]),
            Index(["a{}".format(i) for i in range(101)]),
            pd.MultiIndex.from_tuples(zip("ABCD", "EFGH")),
            pd.MultiIndex.from_tuples(zip([0, 1, 2, 3], "EFGH")),
        ],
    )
    def test_index_tab_completion(self, index):
        # dir contains string-like values of the Index.
        s = pd.Series(index=index)
        dir_s = dir(s)
        for i, x in enumerate(s.index.unique(level=0)):
            if i < 100:
                assert not isinstance(
                    x, str) or not x.isidentifier() or x in dir_s
            else:
                assert x not in dir_s

    def test_not_hashable(self):
        s_empty = Series()
        s = Series([1])
        msg = "'Series' objects are mutable, thus they cannot be hashed"
        with pytest.raises(TypeError, match=msg):
            hash(s_empty)
        with pytest.raises(TypeError, match=msg):
            hash(s)

    def test_contains(self, datetime_series):
        tm.assert_contains_all(datetime_series.index, datetime_series)

    def test_iter_datetimes(self, datetime_series):
        for i, val in enumerate(datetime_series):
            assert val == datetime_series[i]

    def test_iter_strings(self, string_series):
        for i, val in enumerate(string_series):
            assert val == string_series[i]

    def test_keys(self, datetime_series):
        # HACK: By doing this in two stages, we avoid 2to3 wrapping the call
        # to .keys() in a list()
        getkeys = datetime_series.keys
        assert getkeys() is datetime_series.index

    def test_values(self, datetime_series):
        tm.assert_almost_equal(datetime_series.values,
                               datetime_series,
                               check_dtype=False)

    def test_iteritems_datetimes(self, datetime_series):
        for idx, val in datetime_series.iteritems():
            assert val == datetime_series[idx]

    def test_iteritems_strings(self, string_series):
        for idx, val in string_series.iteritems():
            assert val == string_series[idx]

        # assert is lazy (genrators don't define reverse, lists do)
        assert not hasattr(string_series.iteritems(), "reverse")

    def test_items_datetimes(self, datetime_series):
        for idx, val in datetime_series.items():
            assert val == datetime_series[idx]

    def test_items_strings(self, string_series):
        for idx, val in string_series.items():
            assert val == string_series[idx]

        # assert is lazy (genrators don't define reverse, lists do)
        assert not hasattr(string_series.items(), "reverse")

    def test_raise_on_info(self):
        s = Series(np.random.randn(10))
        msg = "'Series' object has no attribute 'info'"
        with pytest.raises(AttributeError, match=msg):
            s.info()

    def test_copy(self):

        for deep in [None, False, True]:
            s = Series(np.arange(10), dtype="float64")

            # default deep is True
            if deep is None:
                s2 = s.copy()
            else:
                s2 = s.copy(deep=deep)

            s2[::2] = np.NaN

            if deep is None or deep is True:
                # Did not modify original Series
                assert np.isnan(s2[0])
                assert not np.isnan(s[0])
            else:
                # we DID modify the original Series
                assert np.isnan(s2[0])
                assert np.isnan(s[0])

    def test_copy_tzaware(self):
        # GH#11794
        # copy of tz-aware
        expected = Series([Timestamp("2012/01/01", tz="UTC")])
        expected2 = Series([Timestamp("1999/01/01", tz="UTC")])

        for deep in [None, False, True]:

            s = Series([Timestamp("2012/01/01", tz="UTC")])

            if deep is None:
                s2 = s.copy()
            else:
                s2 = s.copy(deep=deep)

            s2[0] = pd.Timestamp("1999/01/01", tz="UTC")

            # default deep is True
            if deep is None or deep is True:
                # Did not modify original Series
                tm.assert_series_equal(s2, expected2)
                tm.assert_series_equal(s, expected)
            else:
                # we DID modify the original Series
                tm.assert_series_equal(s2, expected2)
                tm.assert_series_equal(s, expected2)

    def test_axis_alias(self):
        s = Series([1, 2, np.nan])
        tm.assert_series_equal(s.dropna(axis="rows"), s.dropna(axis="index"))
        assert s.dropna().sum("rows") == 3
        assert s._get_axis_number("rows") == 0
        assert s._get_axis_name("rows") == "index"

    def test_class_axis(self):
        # https://github.com/pandas-dev/pandas/issues/18147
        # no exception and no empty docstring
        assert pydoc.getdoc(Series.index)

    def test_numpy_unique(self, datetime_series):
        # it works!
        np.unique(datetime_series)

    def test_ndarray_compat(self):

        # test numpy compat with Series as sub-class of NDFrame
        tsdf = DataFrame(
            np.random.randn(1000, 3),
            columns=["A", "B", "C"],
            index=date_range("1/1/2000", periods=1000),
        )

        def f(x):
            return x[x.idxmax()]

        result = tsdf.apply(f)
        expected = tsdf.max()
        tm.assert_series_equal(result, expected)

        # .item()
        with tm.assert_produces_warning(FutureWarning):
            s = Series([1])
            result = s.item()
            assert result == 1
            assert s.item() == s.iloc[0]

        # using an ndarray like function
        s = Series(np.random.randn(10))
        result = Series(np.ones_like(s))
        expected = Series(1, index=range(10), dtype="float64")
        tm.assert_series_equal(result, expected)

        # ravel
        s = Series(np.random.randn(10))
        tm.assert_almost_equal(s.ravel(order="F"), s.values.ravel(order="F"))

        # compress
        # GH 6658
        s = Series([0, 1.0, -1], index=list("abc"))
        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
            result = np.compress(s > 0, s)
        tm.assert_series_equal(result, Series([1.0], index=["b"]))

        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
            result = np.compress(s < -1, s)
        # result empty Index(dtype=object) as the same as original
        exp = Series([], dtype="float64", index=Index([], dtype="object"))
        tm.assert_series_equal(result, exp)

        s = Series([0, 1.0, -1], index=[0.1, 0.2, 0.3])
        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
            result = np.compress(s > 0, s)
        tm.assert_series_equal(result, Series([1.0], index=[0.2]))

        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
            result = np.compress(s < -1, s)
        # result empty Float64Index as the same as original
        exp = Series([], dtype="float64", index=Index([], dtype="float64"))
        tm.assert_series_equal(result, exp)

    def test_str_accessor_updates_on_inplace(self):
        s = pd.Series(list("abc"))
        s.drop([0], inplace=True)
        assert len(s.str.lower()) == 2

    def test_str_attribute(self):
        # GH9068
        methods = ["strip", "rstrip", "lstrip"]
        s = Series([" jack", "jill ", " jesse ", "frank"])
        for method in methods:
            expected = Series([getattr(str, method)(x) for x in s.values])
            tm.assert_series_equal(
                getattr(Series.str, method)(s.str), expected)

        # str accessor only valid with string values
        s = Series(range(5))
        with pytest.raises(AttributeError, match="only use .str accessor"):
            s.str.repeat(2)

    def test_empty_method(self):
        s_empty = pd.Series()
        assert s_empty.empty

        for full_series in [pd.Series([1]), pd.Series(index=[1])]:
            assert not full_series.empty

    def test_tab_complete_warning(self, ip):
        # https://github.com/pandas-dev/pandas/issues/16409
        pytest.importorskip("IPython", minversion="6.0.0")
        from IPython.core.completer import provisionalcompleter

        code = "import pandas as pd; s = pd.Series()"
        ip.run_code(code)
        with tm.assert_produces_warning(None):
            with provisionalcompleter("ignore"):
                list(ip.Completer.completions("s.", 1))

    def test_integer_series_size(self):
        # GH 25580
        s = Series(range(9))
        assert s.size == 9
        s = Series(range(9), dtype="Int64")
        assert s.size == 9
示例#17
0
 def setUp(self):
     self.indices = dict(index=tm.makeTimedeltaIndex(10))
     self.setup_indices()
示例#18
0
 def indices(self):
     return tm.makeTimedeltaIndex(10)