Python makeCategoricalIndex示例，pandas._testing.makeCategoricalIndex Python示例

示例#1

0

显示文件

文件： categoricals.py 项目： ygene2/pandas

    def setup(self):
        N = 10**5
        ncats = 100

        self.s_str = pd.Series(tm.makeCategoricalIndex(N, ncats)).astype(str)
        self.s_str_cat = pd.Series(self.s_str, dtype="category")
        with warnings.catch_warnings(record=True):
            str_cat_type = pd.CategoricalDtype(set(self.s_str), ordered=True)
            self.s_str_cat_ordered = self.s_str.astype(str_cat_type)

        self.s_int = pd.Series(np.random.randint(0, ncats, size=N))
        self.s_int_cat = pd.Series(self.s_int, dtype="category")
        with warnings.catch_warnings(record=True):
            int_cat_type = pd.CategoricalDtype(set(self.s_int), ordered=True)
            self.s_int_cat_ordered = self.s_int.astype(int_cat_type)

示例#2

0

显示文件

    def test_construction_with_categorical_index(self):
        ci = tm.makeCategoricalIndex(10)
        ci.name = "B"

        # with Categorical
        df = DataFrame({"A": np.random.randn(10), "B": ci.values})
        idf = df.set_index("B")
        tm.assert_index_equal(idf.index, ci)

        # from a CategoricalIndex
        df = DataFrame({"A": np.random.randn(10), "B": ci})
        idf = df.set_index("B")
        tm.assert_index_equal(idf.index, ci)

        # round-trip
        idf = idf.reset_index().set_index("B")
        tm.assert_index_equal(idf.index, ci)

示例#3

0

显示文件

文件： test_category.py 项目： tnir/pandas

    def test_view_i8(self):
        # GH#25464
        ci = tm.makeCategoricalIndex(100)
        msg = "When changing to a larger dtype, its size must be a divisor"
        with pytest.raises(ValueError, match=msg):
            ci.view("i8")
        with pytest.raises(ValueError, match=msg):
            ci._data.view("i8")

        ci = ci[:-4]  # length divisible by 8

        res = ci.view("i8")
        expected = ci._data.codes.view("i8")
        tm.assert_numpy_array_equal(res, expected)

        cat = ci._data
        tm.assert_numpy_array_equal(cat.view("i8"), expected)

示例#4

0

显示文件

文件： test_api.py 项目： zer0kg/pandas

class TestSeriesMisc:
    def test_scalarop_preserve_name(self, datetime_series):
        result = datetime_series * 2
        assert result.name == datetime_series.name

    def test_copy_name(self, datetime_series):
        result = datetime_series.copy()
        assert result.name == datetime_series.name

    def test_copy_index_name_checking(self, datetime_series):
        # don't want to be able to modify the index stored elsewhere after
        # making a copy

        datetime_series.index.name = None
        assert datetime_series.index.name is None
        assert datetime_series is datetime_series

        cp = datetime_series.copy()
        cp.index.name = "foo"
        printing.pprint_thing(datetime_series.index.name)
        assert datetime_series.index.name is None

    def test_append_preserve_name(self, datetime_series):
        result = datetime_series[:5].append(datetime_series[5:])
        assert result.name == datetime_series.name

    def test_binop_maybe_preserve_name(self, datetime_series):
        # names match, preserve
        result = datetime_series * datetime_series
        assert result.name == datetime_series.name
        result = datetime_series.mul(datetime_series)
        assert result.name == datetime_series.name

        result = datetime_series * datetime_series[:-2]
        assert result.name == datetime_series.name

        # names don't match, don't preserve
        cp = datetime_series.copy()
        cp.name = "something else"
        result = datetime_series + cp
        assert result.name is None
        result = datetime_series.add(cp)
        assert result.name is None

        ops = ["add", "sub", "mul", "div", "truediv", "floordiv", "mod", "pow"]
        ops = ops + ["r" + op for op in ops]
        for op in ops:
            # names match, preserve
            s = datetime_series.copy()
            result = getattr(s, op)(s)
            assert result.name == datetime_series.name

            # names don't match, don't preserve
            cp = datetime_series.copy()
            cp.name = "changed"
            result = getattr(s, op)(cp)
            assert result.name is None

    def test_getitem_preserve_name(self, datetime_series):
        result = datetime_series[datetime_series > 0]
        assert result.name == datetime_series.name

        result = datetime_series[[0, 2, 4]]
        assert result.name == datetime_series.name

        result = datetime_series[5:10]
        assert result.name == datetime_series.name

    def test_pickle_datetimes(self, datetime_series):
        unp_ts = self._pickle_roundtrip(datetime_series)
        tm.assert_series_equal(unp_ts, datetime_series)

    def test_pickle_strings(self, string_series):
        unp_series = self._pickle_roundtrip(string_series)
        tm.assert_series_equal(unp_series, string_series)

    def _pickle_roundtrip(self, obj):

        with tm.ensure_clean() as path:
            obj.to_pickle(path)
            unpickled = pd.read_pickle(path)
            return unpickled

    def test_constructor_dict(self):
        d = {"a": 0.0, "b": 1.0, "c": 2.0}
        result = Series(d)
        expected = Series(d, index=sorted(d.keys()))
        tm.assert_series_equal(result, expected)

        result = Series(d, index=["b", "c", "d", "a"])
        expected = Series([1, 2, np.nan, 0], index=["b", "c", "d", "a"])
        tm.assert_series_equal(result, expected)

    def test_constructor_subclass_dict(self, dict_subclass):
        data = dict_subclass((x, 10.0 * x) for x in range(10))
        series = Series(data)
        expected = Series(dict(data.items()))
        tm.assert_series_equal(series, expected)

    def test_constructor_ordereddict(self):
        # GH3283
        data = OrderedDict((f"col{i}", np.random.random()) for i in range(12))

        series = Series(data)
        expected = Series(list(data.values()), list(data.keys()))
        tm.assert_series_equal(series, expected)

        # Test with subclass
        class A(OrderedDict):
            pass

        series = Series(A(data))
        tm.assert_series_equal(series, expected)

    def test_constructor_dict_multiindex(self):
        d = {("a", "a"): 0.0, ("b", "a"): 1.0, ("b", "c"): 2.0}
        _d = sorted(d.items())
        result = Series(d)
        expected = Series([x[1] for x in _d],
                          index=pd.MultiIndex.from_tuples([x[0] for x in _d]))
        tm.assert_series_equal(result, expected)

        d["z"] = 111.0
        _d.insert(0, ("z", d["z"]))
        result = Series(d)
        expected = Series([x[1] for x in _d],
                          index=pd.Index([x[0] for x in _d],
                                         tupleize_cols=False))
        result = result.reindex(index=expected.index)
        tm.assert_series_equal(result, expected)

    def test_constructor_dict_timedelta_index(self):
        # GH #12169 : Resample category data with timedelta index
        # construct Series from dict as data and TimedeltaIndex as index
        # will result NaN in result Series data
        expected = Series(data=["A", "B", "C"],
                          index=pd.to_timedelta([0, 10, 20], unit="s"))

        result = Series(
            data={
                pd.to_timedelta(0, unit="s"): "A",
                pd.to_timedelta(10, unit="s"): "B",
                pd.to_timedelta(20, unit="s"): "C",
            },
            index=pd.to_timedelta([0, 10, 20], unit="s"),
        )
        tm.assert_series_equal(result, expected)

    def test_sparse_accessor_updates_on_inplace(self):
        s = pd.Series([1, 1, 2, 3], dtype="Sparse[int]")
        return_value = s.drop([0, 1], inplace=True)
        assert return_value is None
        assert s.sparse.density == 1.0

    def test_tab_completion(self):
        # GH 9910
        s = Series(list("abcd"))
        # Series of str values should have .str but not .dt/.cat in __dir__
        assert "str" in dir(s)
        assert "dt" not in dir(s)
        assert "cat" not in dir(s)

        # similarly for .dt
        s = Series(date_range("1/1/2015", periods=5))
        assert "dt" in dir(s)
        assert "str" not in dir(s)
        assert "cat" not in dir(s)

        # Similarly for .cat, but with the twist that str and dt should be
        # there if the categories are of that type first cat and str.
        s = Series(list("abbcd"), dtype="category")
        assert "cat" in dir(s)
        assert "str" in dir(s)  # as it is a string categorical
        assert "dt" not in dir(s)

        # similar to cat and str
        s = Series(date_range("1/1/2015", periods=5)).astype("category")
        assert "cat" in dir(s)
        assert "str" not in dir(s)
        assert "dt" in dir(s)  # as it is a datetime categorical

    def test_tab_completion_with_categorical(self):
        # test the tab completion display
        ok_for_cat = [
            "categories",
            "codes",
            "ordered",
            "set_categories",
            "add_categories",
            "remove_categories",
            "rename_categories",
            "reorder_categories",
            "remove_unused_categories",
            "as_ordered",
            "as_unordered",
        ]

        def get_dir(s):
            results = [r for r in s.cat.__dir__() if not r.startswith("_")]
            return sorted(set(results))

        s = Series(list("aabbcde")).astype("category")
        results = get_dir(s)
        tm.assert_almost_equal(results, sorted(set(ok_for_cat)))

    @pytest.mark.parametrize(
        "index",
        [
            tm.makeUnicodeIndex(10),
            tm.makeStringIndex(10),
            tm.makeCategoricalIndex(10),
            Index(["foo", "bar", "baz"] * 2),
            tm.makeDateIndex(10),
            tm.makePeriodIndex(10),
            tm.makeTimedeltaIndex(10),
            tm.makeIntIndex(10),
            tm.makeUIntIndex(10),
            tm.makeIntIndex(10),
            tm.makeFloatIndex(10),
            Index([True, False]),
            Index([f"a{i}" for i in range(101)]),
            pd.MultiIndex.from_tuples(zip("ABCD", "EFGH")),
            pd.MultiIndex.from_tuples(zip([0, 1, 2, 3], "EFGH")),
        ],
    )
    def test_index_tab_completion(self, index):
        # dir contains string-like values of the Index.
        s = pd.Series(index=index, dtype=object)
        dir_s = dir(s)
        for i, x in enumerate(s.index.unique(level=0)):
            if i < 100:
                assert not isinstance(
                    x, str) or not x.isidentifier() or x in dir_s
            else:
                assert x not in dir_s

    def test_not_hashable(self):
        s_empty = Series(dtype=object)
        s = Series([1])
        msg = "'Series' objects are mutable, thus they cannot be hashed"
        with pytest.raises(TypeError, match=msg):
            hash(s_empty)
        with pytest.raises(TypeError, match=msg):
            hash(s)

    def test_contains(self, datetime_series):
        tm.assert_contains_all(datetime_series.index, datetime_series)

    def test_iter_datetimes(self, datetime_series):
        for i, val in enumerate(datetime_series):
            assert val == datetime_series[i]

    def test_iter_strings(self, string_series):
        for i, val in enumerate(string_series):
            assert val == string_series[i]

    def test_keys(self, datetime_series):
        # HACK: By doing this in two stages, we avoid 2to3 wrapping the call
        # to .keys() in a list()
        getkeys = datetime_series.keys
        assert getkeys() is datetime_series.index

    def test_values(self, datetime_series):
        tm.assert_almost_equal(datetime_series.values,
                               datetime_series,
                               check_dtype=False)

    def test_iteritems_datetimes(self, datetime_series):
        for idx, val in datetime_series.iteritems():
            assert val == datetime_series[idx]

    def test_iteritems_strings(self, string_series):
        for idx, val in string_series.iteritems():
            assert val == string_series[idx]

        # assert is lazy (generators don't define reverse, lists do)
        assert not hasattr(string_series.iteritems(), "reverse")

    def test_items_datetimes(self, datetime_series):
        for idx, val in datetime_series.items():
            assert val == datetime_series[idx]

    def test_items_strings(self, string_series):
        for idx, val in string_series.items():
            assert val == string_series[idx]

        # assert is lazy (generators don't define reverse, lists do)
        assert not hasattr(string_series.items(), "reverse")

    def test_raise_on_info(self):
        s = Series(np.random.randn(10))
        msg = "'Series' object has no attribute 'info'"
        with pytest.raises(AttributeError, match=msg):
            s.info()

    def test_copy(self):

        for deep in [None, False, True]:
            s = Series(np.arange(10), dtype="float64")

            # default deep is True
            if deep is None:
                s2 = s.copy()
            else:
                s2 = s.copy(deep=deep)

            s2[::2] = np.NaN

            if deep is None or deep is True:
                # Did not modify original Series
                assert np.isnan(s2[0])
                assert not np.isnan(s[0])
            else:
                # we DID modify the original Series
                assert np.isnan(s2[0])
                assert np.isnan(s[0])

    def test_copy_tzaware(self):
        # GH#11794
        # copy of tz-aware
        expected = Series([Timestamp("2012/01/01", tz="UTC")])
        expected2 = Series([Timestamp("1999/01/01", tz="UTC")])

        for deep in [None, False, True]:

            s = Series([Timestamp("2012/01/01", tz="UTC")])

            if deep is None:
                s2 = s.copy()
            else:
                s2 = s.copy(deep=deep)

            s2[0] = pd.Timestamp("1999/01/01", tz="UTC")

            # default deep is True
            if deep is None or deep is True:
                # Did not modify original Series
                tm.assert_series_equal(s2, expected2)
                tm.assert_series_equal(s, expected)
            else:
                # we DID modify the original Series
                tm.assert_series_equal(s2, expected2)
                tm.assert_series_equal(s, expected2)

    def test_axis_alias(self):
        s = Series([1, 2, np.nan])
        tm.assert_series_equal(s.dropna(axis="rows"), s.dropna(axis="index"))
        assert s.dropna().sum("rows") == 3
        assert s._get_axis_number("rows") == 0
        assert s._get_axis_name("rows") == "index"

    def test_class_axis(self):
        # https://github.com/pandas-dev/pandas/issues/18147
        # no exception and no empty docstring
        assert pydoc.getdoc(Series.index)

    def test_numpy_unique(self, datetime_series):
        # it works!
        np.unique(datetime_series)

    def test_item(self):
        s = Series([1])
        result = s.item()
        assert result == 1
        assert result == s.iloc[0]
        assert isinstance(result, int)  # i.e. not np.int64

        ser = Series([0.5], index=[3])
        result = ser.item()
        assert isinstance(result, float)
        assert result == 0.5

        ser = Series([1, 2])
        msg = "can only convert an array of size 1"
        with pytest.raises(ValueError, match=msg):
            ser.item()

        dti = pd.date_range("2016-01-01", periods=2)
        with pytest.raises(ValueError, match=msg):
            dti.item()
        with pytest.raises(ValueError, match=msg):
            Series(dti).item()

        val = dti[:1].item()
        assert isinstance(val, Timestamp)
        val = Series(dti)[:1].item()
        assert isinstance(val, Timestamp)

        tdi = dti - dti
        with pytest.raises(ValueError, match=msg):
            tdi.item()
        with pytest.raises(ValueError, match=msg):
            Series(tdi).item()

        val = tdi[:1].item()
        assert isinstance(val, Timedelta)
        val = Series(tdi)[:1].item()
        assert isinstance(val, Timedelta)

        # Case where ser[0] would not work
        ser = Series(dti, index=[5, 6])
        val = ser[:1].item()
        assert val == dti[0]

    def test_ndarray_compat(self):

        # test numpy compat with Series as sub-class of NDFrame
        tsdf = DataFrame(
            np.random.randn(1000, 3),
            columns=["A", "B", "C"],
            index=date_range("1/1/2000", periods=1000),
        )

        def f(x):
            return x[x.idxmax()]

        result = tsdf.apply(f)
        expected = tsdf.max()
        tm.assert_series_equal(result, expected)

        # using an ndarray like function
        s = Series(np.random.randn(10))
        result = Series(np.ones_like(s))
        expected = Series(1, index=range(10), dtype="float64")
        tm.assert_series_equal(result, expected)

        # ravel
        s = Series(np.random.randn(10))
        tm.assert_almost_equal(s.ravel(order="F"), s.values.ravel(order="F"))

    def test_str_accessor_updates_on_inplace(self):
        s = pd.Series(list("abc"))
        return_value = s.drop([0], inplace=True)
        assert return_value is None
        assert len(s.str.lower()) == 2

    def test_str_attribute(self):
        # GH9068
        methods = ["strip", "rstrip", "lstrip"]
        s = Series([" jack", "jill ", " jesse ", "frank"])
        for method in methods:
            expected = Series([getattr(str, method)(x) for x in s.values])
            tm.assert_series_equal(
                getattr(Series.str, method)(s.str), expected)

        # str accessor only valid with string values
        s = Series(range(5))
        with pytest.raises(AttributeError, match="only use .str accessor"):
            s.str.repeat(2)

    def test_empty_method(self):
        s_empty = pd.Series(dtype=object)
        assert s_empty.empty

        s2 = pd.Series(index=[1], dtype=object)
        for full_series in [pd.Series([1]), s2]:
            assert not full_series.empty

    @async_mark()
    @td.check_file_leaks
    async def test_tab_complete_warning(self, ip):
        # https://github.com/pandas-dev/pandas/issues/16409
        pytest.importorskip("IPython", minversion="6.0.0")
        from IPython.core.completer import provisionalcompleter

        code = "import pandas as pd; s = pd.Series()"
        await ip.run_code(code)

        # TODO: remove it when Ipython updates
        # GH 33567, jedi version raises Deprecation warning in Ipython
        import jedi

        if jedi.__version__ < "0.17.0":
            warning = tm.assert_produces_warning(None)
        else:
            warning = tm.assert_produces_warning(DeprecationWarning,
                                                 check_stacklevel=False)
        with warning:
            with provisionalcompleter("ignore"):
                list(ip.Completer.completions("s.", 1))

    def test_integer_series_size(self):
        # GH 25580
        s = Series(range(9))
        assert s.size == 9
        s = Series(range(9), dtype="Int64")
        assert s.size == 9

    def test_attrs(self):
        s = pd.Series([0, 1], name="abc")
        assert s.attrs == {}
        s.attrs["version"] = 1
        result = s + 1
        assert result.attrs == {"version": 1}

    @pytest.mark.parametrize("allows_duplicate_labels", [True, False, None])
    def test_set_flags(self, allows_duplicate_labels):
        df = pd.Series([1, 2])
        result = df.set_flags(allows_duplicate_labels=allows_duplicate_labels)
        if allows_duplicate_labels is None:
            # We don't update when it's not provided
            assert result.flags.allows_duplicate_labels is True
        else:
            assert result.flags.allows_duplicate_labels is allows_duplicate_labels

        # We made a copy
        assert df is not result
        # We didn't mutate df
        assert df.flags.allows_duplicate_labels is True

        # But we didn't copy data
        result.iloc[0] = 0
        assert df.iloc[0] == 0

        # Now we do copy.
        result = df.set_flags(copy=True,
                              allows_duplicate_labels=allows_duplicate_labels)
        result.iloc[0] = 10
        assert df.iloc[0] == 0

示例#5

0

显示文件

    )


indices_dict = {
    "unicode": tm.makeUnicodeIndex(100),
    "string": tm.makeStringIndex(100),
    "datetime": tm.makeDateIndex(100),
    "datetime-tz": tm.makeDateIndex(100, tz="US/Pacific"),
    "period": tm.makePeriodIndex(100),
    "timedelta": tm.makeTimedeltaIndex(100),
    "int": tm.makeIntIndex(100),
    "uint": tm.makeUIntIndex(100),
    "range": tm.makeRangeIndex(100),
    "float": tm.makeFloatIndex(100),
    "bool": tm.makeBoolIndex(10),
    "categorical": tm.makeCategoricalIndex(100),
    "interval": tm.makeIntervalIndex(100),
    "empty": Index([]),
    "tuples": MultiIndex.from_tuples(zip(["foo", "bar", "baz"], [1, 2, 3])),
    "mi-with-dt64tz-level": _create_mi_with_dt64tz_level(),
    "multi": _create_multiindex(),
    "repeats": Index([0, 0, 1, 1, 2, 2]),
}


@pytest.fixture(params=indices_dict.keys())
def index(request):
    """
    Fixture for many "simple" kinds of indices.

    These indices are unlikely to cover corner cases, e.g.

示例#6

0

显示文件

文件： categoricals.py 项目： ygene2/pandas

 def setup(self):
     N = 10**5
     self.ci = tm.makeCategoricalIndex(N).sort_values()
     self.c = self.ci.values
     self.key = self.ci.categories[1]

示例#7

0

显示文件

 def index(self, request):
     return tm.makeCategoricalIndex(100)

示例#8

0

显示文件

class TestSeriesMisc:
    def test_tab_completion(self):
        # GH 9910
        s = Series(list("abcd"))
        # Series of str values should have .str but not .dt/.cat in __dir__
        assert "str" in dir(s)
        assert "dt" not in dir(s)
        assert "cat" not in dir(s)

        # similarly for .dt
        s = Series(date_range("1/1/2015", periods=5))
        assert "dt" in dir(s)
        assert "str" not in dir(s)
        assert "cat" not in dir(s)

        # Similarly for .cat, but with the twist that str and dt should be
        # there if the categories are of that type first cat and str.
        s = Series(list("abbcd"), dtype="category")
        assert "cat" in dir(s)
        assert "str" in dir(s)  # as it is a string categorical
        assert "dt" not in dir(s)

        # similar to cat and str
        s = Series(date_range("1/1/2015", periods=5)).astype("category")
        assert "cat" in dir(s)
        assert "str" not in dir(s)
        assert "dt" in dir(s)  # as it is a datetime categorical

    def test_tab_completion_with_categorical(self):
        # test the tab completion display
        ok_for_cat = [
            "categories",
            "codes",
            "ordered",
            "set_categories",
            "add_categories",
            "remove_categories",
            "rename_categories",
            "reorder_categories",
            "remove_unused_categories",
            "as_ordered",
            "as_unordered",
        ]

        def get_dir(s):
            results = [r for r in s.cat.__dir__() if not r.startswith("_")]
            return sorted(set(results))

        s = Series(list("aabbcde")).astype("category")
        results = get_dir(s)
        tm.assert_almost_equal(results, sorted(set(ok_for_cat)))

    @pytest.mark.parametrize(
        "index",
        [
            tm.makeUnicodeIndex(10),
            tm.makeStringIndex(10),
            tm.makeCategoricalIndex(10),
            Index(["foo", "bar", "baz"] * 2),
            tm.makeDateIndex(10),
            tm.makePeriodIndex(10),
            tm.makeTimedeltaIndex(10),
            tm.makeIntIndex(10),
            tm.makeUIntIndex(10),
            tm.makeIntIndex(10),
            tm.makeFloatIndex(10),
            Index([True, False]),
            Index([f"a{i}" for i in range(101)]),
            pd.MultiIndex.from_tuples(zip("ABCD", "EFGH")),
            pd.MultiIndex.from_tuples(zip([0, 1, 2, 3], "EFGH")),
        ],
    )
    def test_index_tab_completion(self, index):
        # dir contains string-like values of the Index.
        s = Series(index=index, dtype=object)
        dir_s = dir(s)
        for i, x in enumerate(s.index.unique(level=0)):
            if i < 100:
                assert not isinstance(x, str) or not x.isidentifier() or x in dir_s
            else:
                assert x not in dir_s

    def test_not_hashable(self):
        s_empty = Series(dtype=object)
        s = Series([1])
        msg = "unhashable type: 'Series'"
        with pytest.raises(TypeError, match=msg):
            hash(s_empty)
        with pytest.raises(TypeError, match=msg):
            hash(s)

    def test_contains(self, datetime_series):
        tm.assert_contains_all(datetime_series.index, datetime_series)

    def test_raise_on_info(self):
        s = Series(np.random.randn(10))
        msg = "'Series' object has no attribute 'info'"
        with pytest.raises(AttributeError, match=msg):
            s.info()

    def test_axis_alias(self):
        s = Series([1, 2, np.nan])
        tm.assert_series_equal(s.dropna(axis="rows"), s.dropna(axis="index"))
        assert s.dropna().sum("rows") == 3
        assert s._get_axis_number("rows") == 0
        assert s._get_axis_name("rows") == "index"

    def test_class_axis(self):
        # https://github.com/pandas-dev/pandas/issues/18147
        # no exception and no empty docstring
        assert pydoc.getdoc(Series.index)

    def test_ndarray_compat(self):

        # test numpy compat with Series as sub-class of NDFrame
        tsdf = DataFrame(
            np.random.randn(1000, 3),
            columns=["A", "B", "C"],
            index=date_range("1/1/2000", periods=1000),
        )

        def f(x):
            return x[x.idxmax()]

        result = tsdf.apply(f)
        expected = tsdf.max()
        tm.assert_series_equal(result, expected)

        # using an ndarray like function
        s = Series(np.random.randn(10))
        result = Series(np.ones_like(s))
        expected = Series(1, index=range(10), dtype="float64")
        tm.assert_series_equal(result, expected)

        # ravel
        s = Series(np.random.randn(10))
        tm.assert_almost_equal(s.ravel(order="F"), s.values.ravel(order="F"))

    def test_empty_method(self):
        s_empty = Series(dtype=object)
        assert s_empty.empty

        s2 = Series(index=[1], dtype=object)
        for full_series in [Series([1]), s2]:
            assert not full_series.empty

    def test_integer_series_size(self):
        # GH 25580
        s = Series(range(9))
        assert s.size == 9
        s = Series(range(9), dtype="Int64")
        assert s.size == 9

    def test_attrs(self):
        s = Series([0, 1], name="abc")
        assert s.attrs == {}
        s.attrs["version"] = 1
        result = s + 1
        assert result.attrs == {"version": 1}

    @skip_if_no("jinja2")
    def test_inspect_getmembers(self):
        # GH38782
        ser = Series(dtype=object)
        with tm.assert_produces_warning(None):
            inspect.getmembers(ser)

    def test_unknown_attribute(self):
        # GH#9680
        tdi = pd.timedelta_range(start=0, periods=10, freq="1s")
        ser = Series(np.random.normal(size=10), index=tdi)
        assert "foo" not in ser.__dict__.keys()
        msg = "'Series' object has no attribute 'foo'"
        with pytest.raises(AttributeError, match=msg):
            ser.foo

示例#9

0

显示文件

文件： test_api.py 项目： wlgranados/pandas

class TestSeriesMisc:
    def test_getitem_preserve_name(self, datetime_series):
        result = datetime_series[datetime_series > 0]
        assert result.name == datetime_series.name

        result = datetime_series[[0, 2, 4]]
        assert result.name == datetime_series.name

        result = datetime_series[5:10]
        assert result.name == datetime_series.name

    def test_tab_completion(self):
        # GH 9910
        s = Series(list("abcd"))
        # Series of str values should have .str but not .dt/.cat in __dir__
        assert "str" in dir(s)
        assert "dt" not in dir(s)
        assert "cat" not in dir(s)

        # similarly for .dt
        s = Series(date_range("1/1/2015", periods=5))
        assert "dt" in dir(s)
        assert "str" not in dir(s)
        assert "cat" not in dir(s)

        # Similarly for .cat, but with the twist that str and dt should be
        # there if the categories are of that type first cat and str.
        s = Series(list("abbcd"), dtype="category")
        assert "cat" in dir(s)
        assert "str" in dir(s)  # as it is a string categorical
        assert "dt" not in dir(s)

        # similar to cat and str
        s = Series(date_range("1/1/2015", periods=5)).astype("category")
        assert "cat" in dir(s)
        assert "str" not in dir(s)
        assert "dt" in dir(s)  # as it is a datetime categorical

    def test_tab_completion_with_categorical(self):
        # test the tab completion display
        ok_for_cat = [
            "categories",
            "codes",
            "ordered",
            "set_categories",
            "add_categories",
            "remove_categories",
            "rename_categories",
            "reorder_categories",
            "remove_unused_categories",
            "as_ordered",
            "as_unordered",
        ]

        def get_dir(s):
            results = [r for r in s.cat.__dir__() if not r.startswith("_")]
            return sorted(set(results))

        s = Series(list("aabbcde")).astype("category")
        results = get_dir(s)
        tm.assert_almost_equal(results, sorted(set(ok_for_cat)))

    @pytest.mark.parametrize(
        "index",
        [
            tm.makeUnicodeIndex(10),
            tm.makeStringIndex(10),
            tm.makeCategoricalIndex(10),
            Index(["foo", "bar", "baz"] * 2),
            tm.makeDateIndex(10),
            tm.makePeriodIndex(10),
            tm.makeTimedeltaIndex(10),
            tm.makeIntIndex(10),
            tm.makeUIntIndex(10),
            tm.makeIntIndex(10),
            tm.makeFloatIndex(10),
            Index([True, False]),
            Index([f"a{i}" for i in range(101)]),
            pd.MultiIndex.from_tuples(zip("ABCD", "EFGH")),
            pd.MultiIndex.from_tuples(zip([0, 1, 2, 3], "EFGH")),
        ],
    )
    def test_index_tab_completion(self, index):
        # dir contains string-like values of the Index.
        s = Series(index=index, dtype=object)
        dir_s = dir(s)
        for i, x in enumerate(s.index.unique(level=0)):
            if i < 100:
                assert not isinstance(
                    x, str) or not x.isidentifier() or x in dir_s
            else:
                assert x not in dir_s

    def test_not_hashable(self):
        s_empty = Series(dtype=object)
        s = Series([1])
        msg = "'Series' objects are mutable, thus they cannot be hashed"
        with pytest.raises(TypeError, match=msg):
            hash(s_empty)
        with pytest.raises(TypeError, match=msg):
            hash(s)

    def test_contains(self, datetime_series):
        tm.assert_contains_all(datetime_series.index, datetime_series)

    def test_values(self, datetime_series):
        tm.assert_almost_equal(datetime_series.values,
                               datetime_series,
                               check_dtype=False)

    def test_raise_on_info(self):
        s = Series(np.random.randn(10))
        msg = "'Series' object has no attribute 'info'"
        with pytest.raises(AttributeError, match=msg):
            s.info()

    def test_axis_alias(self):
        s = Series([1, 2, np.nan])
        tm.assert_series_equal(s.dropna(axis="rows"), s.dropna(axis="index"))
        assert s.dropna().sum("rows") == 3
        assert s._get_axis_number("rows") == 0
        assert s._get_axis_name("rows") == "index"

    def test_class_axis(self):
        # https://github.com/pandas-dev/pandas/issues/18147
        # no exception and no empty docstring
        assert pydoc.getdoc(Series.index)

    def test_item(self):
        s = Series([1])
        result = s.item()
        assert result == 1
        assert result == s.iloc[0]
        assert isinstance(result, int)  # i.e. not np.int64

        ser = Series([0.5], index=[3])
        result = ser.item()
        assert isinstance(result, float)
        assert result == 0.5

        ser = Series([1, 2])
        msg = "can only convert an array of size 1"
        with pytest.raises(ValueError, match=msg):
            ser.item()

        dti = pd.date_range("2016-01-01", periods=2)
        with pytest.raises(ValueError, match=msg):
            dti.item()
        with pytest.raises(ValueError, match=msg):
            Series(dti).item()

        val = dti[:1].item()
        assert isinstance(val, Timestamp)
        val = Series(dti)[:1].item()
        assert isinstance(val, Timestamp)

        tdi = dti - dti
        with pytest.raises(ValueError, match=msg):
            tdi.item()
        with pytest.raises(ValueError, match=msg):
            Series(tdi).item()

        val = tdi[:1].item()
        assert isinstance(val, Timedelta)
        val = Series(tdi)[:1].item()
        assert isinstance(val, Timedelta)

        # Case where ser[0] would not work
        ser = Series(dti, index=[5, 6])
        val = ser[:1].item()
        assert val == dti[0]

    def test_ndarray_compat(self):

        # test numpy compat with Series as sub-class of NDFrame
        tsdf = DataFrame(
            np.random.randn(1000, 3),
            columns=["A", "B", "C"],
            index=date_range("1/1/2000", periods=1000),
        )

        def f(x):
            return x[x.idxmax()]

        result = tsdf.apply(f)
        expected = tsdf.max()
        tm.assert_series_equal(result, expected)

        # using an ndarray like function
        s = Series(np.random.randn(10))
        result = Series(np.ones_like(s))
        expected = Series(1, index=range(10), dtype="float64")
        tm.assert_series_equal(result, expected)

        # ravel
        s = Series(np.random.randn(10))
        tm.assert_almost_equal(s.ravel(order="F"), s.values.ravel(order="F"))

    def test_empty_method(self):
        s_empty = Series(dtype=object)
        assert s_empty.empty

        s2 = Series(index=[1], dtype=object)
        for full_series in [Series([1]), s2]:
            assert not full_series.empty

    @async_mark()
    @td.check_file_leaks
    async def test_tab_complete_warning(self, ip):
        # https://github.com/pandas-dev/pandas/issues/16409
        pytest.importorskip("IPython", minversion="6.0.0")
        from IPython.core.completer import provisionalcompleter

        code = "import pandas as pd; s = Series(dtype=object)"
        await ip.run_code(code)

        # TODO: remove it when Ipython updates
        # GH 33567, jedi version raises Deprecation warning in Ipython
        import jedi

        if jedi.__version__ < "0.17.0":
            warning = tm.assert_produces_warning(None)
        else:
            warning = tm.assert_produces_warning(DeprecationWarning,
                                                 check_stacklevel=False)
        with warning:
            with provisionalcompleter("ignore"):
                list(ip.Completer.completions("s.", 1))

    def test_integer_series_size(self):
        # GH 25580
        s = Series(range(9))
        assert s.size == 9
        s = Series(range(9), dtype="Int64")
        assert s.size == 9

    def test_attrs(self):
        s = Series([0, 1], name="abc")
        assert s.attrs == {}
        s.attrs["version"] = 1
        result = s + 1
        assert result.attrs == {"version": 1}

    @pytest.mark.parametrize("allows_duplicate_labels", [True, False, None])
    def test_set_flags(self, allows_duplicate_labels):
        df = Series([1, 2])
        result = df.set_flags(allows_duplicate_labels=allows_duplicate_labels)
        if allows_duplicate_labels is None:
            # We don't update when it's not provided
            assert result.flags.allows_duplicate_labels is True
        else:
            assert result.flags.allows_duplicate_labels is allows_duplicate_labels

        # We made a copy
        assert df is not result
        # We didn't mutate df
        assert df.flags.allows_duplicate_labels is True

        # But we didn't copy data
        result.iloc[0] = 0
        assert df.iloc[0] == 0

        # Now we do copy.
        result = df.set_flags(copy=True,
                              allows_duplicate_labels=allows_duplicate_labels)
        result.iloc[0] = 10
        assert df.iloc[0] == 0

示例#10

0

显示文件

文件： test_category.py 项目： tnir/pandas

 def index(self):
     return tm.makeCategoricalIndex(100)