示例#1
0
    def test_loc_scalar(self):
        result = self.df.loc["a"]
        expected = DataFrame({
            "A": [0, 1, 5],
            "B": (Series(list("aaa")).astype(CDT(list("cab"))))
        }).set_index("B")
        tm.assert_frame_equal(result, expected)

        df = self.df.copy()
        df.loc["a"] = 20
        expected = DataFrame({
            "A": [20, 20, 2, 3, 4, 20],
            "B": (Series(list("aabbca")).astype(CDT(list("cab")))),
        }).set_index("B")
        tm.assert_frame_equal(df, expected)

        # value not in the categories
        with pytest.raises(KeyError, match=r"^'d'$"):
            df.loc["d"]

        msg = "cannot append a non-category item to a CategoricalIndex"
        with pytest.raises(TypeError, match=msg):
            df.loc["d"] = 10

        msg = ("cannot insert an item into a CategoricalIndex that is not "
               "already an existing category")
        msg = "'fill_value=d' is not present in this Categorical's categories"
        with pytest.raises(ValueError, match=msg):
            df.loc["d", "A"] = 10
        with pytest.raises(ValueError, match=msg):
            df.loc["d", "C"] = 10

        with pytest.raises(KeyError, match="^1$"):
            df.loc[1]
示例#2
0
    def setup_method(self, method):

        self.df = DataFrame({
            'A':
            np.arange(6, dtype='int64'),
            'B':
            Series(list('aabbca')).astype(CDT(list('cab')))
        }).set_index('B')
        self.df2 = DataFrame({
            'A':
            np.arange(6, dtype='int64'),
            'B':
            Series(list('aabbca')).astype(CDT(list('cabe')))
        }).set_index('B')
        self.df3 = DataFrame({
            'A':
            np.arange(6, dtype='int64'),
            'B': (Series([1, 1, 2, 1, 3,
                          2]).astype(CDT([3, 2, 1], ordered=True)))
        }).set_index('B')
        self.df4 = DataFrame({
            'A':
            np.arange(6, dtype='int64'),
            'B': (Series([1, 1, 2, 1, 3,
                          2]).astype(CDT([3, 2, 1], ordered=False)))
        }).set_index('B')
示例#3
0
    def test_loc_scalar(self):
        result = self.df.loc['a']
        expected = (DataFrame({
            'A': [0, 1, 5],
            'B': (Series(list('aaa')).astype(CDT(list('cab'))))
        }).set_index('B'))
        assert_frame_equal(result, expected)

        df = self.df.copy()
        df.loc['a'] = 20
        expected = (DataFrame({
            'A': [20, 20, 2, 3, 4, 20],
            'B': (Series(list('aabbca')).astype(CDT(list('cab'))))
        }).set_index('B'))
        assert_frame_equal(df, expected)

        # value not in the categories
        with pytest.raises(KeyError, match=r"^'d'$"):
            df.loc['d']

        msg = "cannot append a non-category item to a CategoricalIndex"
        with pytest.raises(TypeError, match=msg):
            df.loc['d'] = 10

        msg = ("cannot insert an item into a CategoricalIndex that is not"
               " already an existing category")
        with pytest.raises(TypeError, match=msg):
            df.loc['d', 'A'] = 10
        with pytest.raises(TypeError, match=msg):
            df.loc['d', 'C'] = 10
示例#4
0
    def test_loc_scalar(self):
        result = self.df.loc['a']
        expected = (DataFrame({
            'A': [0, 1, 5],
            'B': (Series(list('aaa')).astype(CDT(list('cab'))))
        }).set_index('B'))
        assert_frame_equal(result, expected)

        df = self.df.copy()
        df.loc['a'] = 20
        expected = (DataFrame({
            'A': [20, 20, 2, 3, 4, 20],
            'B': (Series(list('aabbca')).astype(CDT(list('cab'))))
        }).set_index('B'))
        assert_frame_equal(df, expected)

        # value not in the categories
        pytest.raises(KeyError, lambda: df.loc['d'])

        def f():
            df.loc['d'] = 10

        pytest.raises(TypeError, f)

        def f():
            df.loc['d', 'A'] = 10

        pytest.raises(TypeError, f)

        def f():
            df.loc['d', 'C'] = 10

        pytest.raises(TypeError, f)
示例#5
0
    def test_loc_scalar(self):
        result = self.df.loc["a"]
        expected = DataFrame({
            "A": [0, 1, 5],
            "B": (Series(list("aaa")).astype(CDT(list("cab"))))
        }).set_index("B")
        tm.assert_frame_equal(result, expected)

        df = self.df.copy()
        df.loc["a"] = 20
        expected = DataFrame({
            "A": [20, 20, 2, 3, 4, 20],
            "B": (Series(list("aabbca")).astype(CDT(list("cab")))),
        }).set_index("B")
        tm.assert_frame_equal(df, expected)

        # value not in the categories
        with pytest.raises(KeyError, match=r"^'d'$"):
            df.loc["d"]

        df2 = df.copy()
        expected = df2.copy()
        expected.index = expected.index.astype(object)
        expected.loc["d"] = 10
        df2.loc["d"] = 10
        tm.assert_frame_equal(df2, expected)

        msg = "'fill_value=d' is not present in this Categorical's categories"
        with pytest.raises(TypeError, match=msg):
            df.loc["d", "A"] = 10
        with pytest.raises(TypeError, match=msg):
            df.loc["d", "C"] = 10

        with pytest.raises(KeyError, match="^1$"):
            df.loc[1]
示例#6
0
    def setup_method(self, method):

        self.df = DataFrame(
            {
                "A": np.arange(6, dtype="int64"),
                "B": Series(list("aabbca")).astype(CDT(list("cab"))),
            }
        ).set_index("B")
        self.df2 = DataFrame(
            {
                "A": np.arange(6, dtype="int64"),
                "B": Series(list("aabbca")).astype(CDT(list("cabe"))),
            }
        ).set_index("B")
        self.df3 = DataFrame(
            {
                "A": np.arange(6, dtype="int64"),
                "B": (Series([1, 1, 2, 1, 3, 2]).astype(CDT([3, 2, 1], ordered=True))),
            }
        ).set_index("B")
        self.df4 = DataFrame(
            {
                "A": np.arange(6, dtype="int64"),
                "B": (Series([1, 1, 2, 1, 3, 2]).astype(CDT([3, 2, 1], ordered=False))),
            }
        ).set_index("B")
示例#7
0
    def setup_method(self, method):

        self.df = DataFrame(
            {
                "A": np.arange(6, dtype="int64"),
            },
            index=CategoricalIndex(list("aabbca"), dtype=CDT(list("cab")), name="B"),
        )
        self.df2 = DataFrame(
            {
                "A": np.arange(6, dtype="int64"),
            },
            index=CategoricalIndex(list("aabbca"), dtype=CDT(list("cabe")), name="B"),
        )
示例#8
0
    def test_series_retbins(self):
        # GH 8589
        s = Series(np.arange(4))
        result, bins = cut(s, 2, retbins=True)
        expected = Series(IntervalIndex.from_breaks(
            [-0.003, 1.5, 3], closed='right').repeat(2)).astype(
            CDT(ordered=True))
        tm.assert_series_equal(result, expected)

        result, bins = qcut(s, 2, retbins=True)
        expected = Series(IntervalIndex.from_breaks(
            [-0.001, 1.5, 3], closed='right').repeat(2)).astype(
            CDT(ordered=True))
        tm.assert_series_equal(result, expected)
示例#9
0
def test_datetime_tz_cut(bins, box):
    # see gh-19872
    tz = "US/Eastern"
    s = Series(date_range("20130101", periods=3, tz=tz))

    if not isinstance(bins, int):
        bins = box(bins)

    result = cut(s, bins)
    expected = Series(
        IntervalIndex([
            Interval(
                Timestamp("2012-12-31 23:57:07.200000", tz=tz),
                Timestamp("2013-01-01 16:00:00", tz=tz),
            ),
            Interval(
                Timestamp("2013-01-01 16:00:00", tz=tz),
                Timestamp("2013-01-02 08:00:00", tz=tz),
            ),
            Interval(
                Timestamp("2013-01-02 08:00:00", tz=tz),
                Timestamp("2013-01-03 00:00:00", tz=tz),
            ),
        ])).astype(CDT(ordered=True))
    tm.assert_series_equal(result, expected)
示例#10
0
def test_datetime_tz_qcut(bins):
    # see gh-19872
    tz = "US/Eastern"
    ser = Series(date_range("20130101", periods=3, tz=tz))

    result = qcut(ser, bins)
    expected = Series(
        IntervalIndex(
            [
                Interval(
                    Timestamp("2012-12-31 23:59:59.999999999", tz=tz),
                    Timestamp("2013-01-01 16:00:00", tz=tz),
                ),
                Interval(
                    Timestamp("2013-01-01 16:00:00", tz=tz),
                    Timestamp("2013-01-02 08:00:00", tz=tz),
                ),
                Interval(
                    Timestamp("2013-01-02 08:00:00", tz=tz),
                    Timestamp("2013-01-03 00:00:00", tz=tz),
                ),
            ]
        )
    ).astype(CDT(ordered=True))
    tm.assert_series_equal(result, expected)
    def test_loc_scalar(self):
        dtype = CDT(list("cab"))
        result = self.df.loc["a"]
        bidx = Series(list("aaa"), name="B").astype(dtype)
        assert bidx.dtype == dtype

        expected = DataFrame({"A": [0, 1, 5]}, index=Index(bidx))
        tm.assert_frame_equal(result, expected)

        df = self.df.copy()
        df.loc["a"] = 20
        bidx2 = Series(list("aabbca"), name="B").astype(dtype)
        assert bidx2.dtype == dtype
        expected = DataFrame(
            {
                "A": [20, 20, 2, 3, 4, 20],
            },
            index=Index(bidx2),
        )
        tm.assert_frame_equal(df, expected)

        # value not in the categories
        with pytest.raises(KeyError, match=r"^'d'$"):
            df.loc["d"]

        df2 = df.copy()
        expected = df2.copy()
        expected.index = expected.index.astype(object)
        expected.loc["d"] = 10
        df2.loc["d"] = 10
        tm.assert_frame_equal(df2, expected)
示例#12
0
    def test_datetime_cut(self):
        # GH 14714
        # testing for time data to be present as series
        data = to_datetime(Series(['2013-01-01', '2013-01-02', '2013-01-03']))

        result, bins = cut(data, 3, retbins=True)
        expected = (
            Series(IntervalIndex([
                Interval(Timestamp('2012-12-31 23:57:07.200000'),
                         Timestamp('2013-01-01 16:00:00')),
                Interval(Timestamp('2013-01-01 16:00:00'),
                         Timestamp('2013-01-02 08:00:00')),
                Interval(Timestamp('2013-01-02 08:00:00'),
                         Timestamp('2013-01-03 00:00:00'))]))
            .astype(CDT(ordered=True)))

        tm.assert_series_equal(result, expected)

        # testing for time data to be present as list
        data = [np.datetime64('2013-01-01'), np.datetime64('2013-01-02'),
                np.datetime64('2013-01-03')]
        result, bins = cut(data, 3, retbins=True)
        tm.assert_series_equal(Series(result), expected)

        # testing for time data to be present as ndarray
        data = np.array([np.datetime64('2013-01-01'),
                         np.datetime64('2013-01-02'),
                         np.datetime64('2013-01-03')])
        result, bins = cut(data, 3, retbins=True)
        tm.assert_series_equal(Series(result), expected)

        # testing for time data to be present as datetime index
        data = DatetimeIndex(['2013-01-01', '2013-01-02', '2013-01-03'])
        result, bins = cut(data, 3, retbins=True)
        tm.assert_series_equal(Series(result), expected)
示例#13
0
def test_series_ret_bins():
    # see gh-8589
    ser = Series(np.arange(4))
    result, bins = cut(ser, 2, retbins=True)

    expected = Series(IntervalIndex.from_breaks(
        [-0.003, 1.5, 3], closed="right").repeat(2)).astype(CDT(ordered=True))
    tm.assert_series_equal(result, expected)
示例#14
0
 def test_qcut_return_intervals(self):
     s = Series([0, 1, 2, 3, 4, 5, 6, 7, 8])
     res = qcut(s, [0, 0.333, 0.666, 1])
     exp_levels = np.array([Interval(-0.001, 2.664),
                            Interval(2.664, 5.328), Interval(5.328, 8)])
     exp = Series(exp_levels.take([0, 0, 0, 1, 1, 1, 2, 2, 2])).astype(
         CDT(ordered=True))
     tm.assert_series_equal(res, exp)
示例#15
0
 def test_cut_return_intervals(self):
     s = Series([0, 1, 2, 3, 4, 5, 6, 7, 8])
     res = cut(s, 3)
     exp_bins = np.linspace(0, 8, num=4).round(3)
     exp_bins[0] -= 0.008
     exp = Series(IntervalIndex.from_breaks(exp_bins, closed='right').take(
         [0, 0, 0, 1, 1, 1, 2, 2, 2])).astype(CDT(ordered=True))
     tm.assert_series_equal(res, exp)
示例#16
0
def df2():
    return DataFrame(
        {
            "A": np.arange(6, dtype="int64"),
        },
        index=CategoricalIndex(list("aabbca"),
                               dtype=CDT(list("cabe")),
                               name="B"),
    )
示例#17
0
def test_single_quantile(data, start, end, length, labels):
    # see gh-15431
    ser = Series([data] * length)
    result = qcut(ser, 1, labels=labels)

    if labels is None:
        intervals = IntervalIndex([Interval(start, end)] * length, closed="right")
        expected = Series(intervals).astype(CDT(ordered=True))
    else:
        expected = Series([0] * length)

    tm.assert_series_equal(result, expected)
示例#18
0
def test_datetime_bin(conv):
    data = [np.datetime64("2012-12-13"), np.datetime64("2012-12-15")]
    bin_data = ["2012-12-12", "2012-12-14", "2012-12-16"]

    expected = Series(IntervalIndex([
        Interval(Timestamp(bin_data[0]), Timestamp(bin_data[1])),
        Interval(Timestamp(bin_data[1]), Timestamp(bin_data[2]))])).astype(
        CDT(ordered=True))

    bins = [conv(v) for v in bin_data]
    result = Series(cut(data, bins=bins))
    tm.assert_series_equal(result, expected)
    def test_loc_scalar(self):
        result = self.df.loc["a"]
        expected = DataFrame(
            {"A": [0, 1, 5], "B": (Series(list("aaa")).astype(CDT(list("cab"))))}
        ).set_index("B")
        tm.assert_frame_equal(result, expected)

        df = self.df.copy()
        df.loc["a"] = 20
        expected = DataFrame(
            {
                "A": [20, 20, 2, 3, 4, 20],
                "B": (Series(list("aabbca")).astype(CDT(list("cab")))),
            }
        ).set_index("B")
        tm.assert_frame_equal(df, expected)

        # value not in the categories
        with pytest.raises(KeyError, match=r"^'d'$"):
            df.loc["d"]

        msg = "cannot append a non-category item to a CategoricalIndex"
        with pytest.raises(TypeError, match=msg):
            df.loc["d"] = 10

        msg = (
            "cannot insert an item into a CategoricalIndex that is not "
            "already an existing category"
        )
        with pytest.raises(TypeError, match=msg):
            df.loc["d", "A"] = 10
        with pytest.raises(TypeError, match=msg):
            df.loc["d", "C"] = 10

        msg = (
            r"cannot do label indexing on <class 'pandas\.core\.indexes\.category"
            r"\.CategoricalIndex'> with these indexers \[1\] of <class 'int'>"
        )
        with pytest.raises(TypeError, match=msg):
            df.loc[1]
示例#20
0
    def test_single_quantile(self):
        # issue 15431
        expected = Series([0, 0])

        s = Series([9., 9.])
        result = qcut(s, 1, labels=False)
        tm.assert_series_equal(result, expected)
        result = qcut(s, 1)
        intervals = IntervalIndex([Interval(8.999, 9.0),
                                   Interval(8.999, 9.0)], closed='right')
        expected = Series(intervals).astype(CDT(ordered=True))
        tm.assert_series_equal(result, expected)

        s = Series([-9., -9.])
        expected = Series([0, 0])
        result = qcut(s, 1, labels=False)
        tm.assert_series_equal(result, expected)
        result = qcut(s, 1)
        intervals = IntervalIndex([Interval(-9.001, -9.0),
                                   Interval(-9.001, -9.0)], closed='right')
        expected = Series(intervals).astype(CDT(ordered=True))
        tm.assert_series_equal(result, expected)

        s = Series([0., 0.])
        expected = Series([0, 0])
        result = qcut(s, 1, labels=False)
        tm.assert_series_equal(result, expected)
        result = qcut(s, 1)
        intervals = IntervalIndex([Interval(-0.001, 0.0),
                                   Interval(-0.001, 0.0)], closed='right')
        expected = Series(intervals).astype(CDT(ordered=True))
        tm.assert_series_equal(result, expected)

        s = Series([9])
        expected = Series([0])
        result = qcut(s, 1, labels=False)
        tm.assert_series_equal(result, expected)
        result = qcut(s, 1)
        intervals = IntervalIndex([Interval(8.999, 9.0)], closed='right')
        expected = Series(intervals).astype(CDT(ordered=True))
        tm.assert_series_equal(result, expected)

        s = Series([-9])
        expected = Series([0])
        result = qcut(s, 1, labels=False)
        tm.assert_series_equal(result, expected)
        result = qcut(s, 1)
        intervals = IntervalIndex([Interval(-9.001, -9.0)], closed='right')
        expected = Series(intervals).astype(CDT(ordered=True))
        tm.assert_series_equal(result, expected)

        s = Series([0])
        expected = Series([0])
        result = qcut(s, 1, labels=False)
        tm.assert_series_equal(result, expected)
        result = qcut(s, 1)
        intervals = IntervalIndex([Interval(-0.001, 0.0)], closed='right')
        expected = Series(intervals).astype(CDT(ordered=True))
        tm.assert_series_equal(result, expected)
示例#21
0
def test_datetime_cut(data):
    # see gh-14714
    #
    # Testing time data when it comes in various collection types.
    result, _ = cut(data, 3, retbins=True)
    expected = Series(IntervalIndex([
        Interval(Timestamp("2012-12-31 23:57:07.200000"),
                 Timestamp("2013-01-01 16:00:00")),
        Interval(Timestamp("2013-01-01 16:00:00"),
                 Timestamp("2013-01-02 08:00:00")),
        Interval(Timestamp("2013-01-02 08:00:00"),
                 Timestamp("2013-01-03 00:00:00"))])).astype(CDT(ordered=True))
    tm.assert_series_equal(Series(result), expected)
示例#22
0
文件: test_cut.py 项目: tnir/pandas
def test_cut_return_intervals():
    ser = Series([0, 1, 2, 3, 4, 5, 6, 7, 8])
    result = cut(ser, 3)

    exp_bins = np.linspace(0, 8, num=4).round(3)
    exp_bins[0] -= 0.008

    expected = Series(
        IntervalIndex.from_breaks(exp_bins, inclusive="right").take(
            [0, 0, 0, 1, 1, 1, 2, 2, 2]
        )
    ).astype(CDT(ordered=True))
    tm.assert_series_equal(result, expected)
示例#23
0
 def test_datetimetz_qcut(self, bins):
     # GH 19872
     tz = 'US/Eastern'
     s = Series(date_range('20130101', periods=3, tz=tz))
     result = qcut(s, bins)
     expected = (
         Series(IntervalIndex([
             Interval(Timestamp('2012-12-31 23:59:59.999999999', tz=tz),
                      Timestamp('2013-01-01 16:00:00', tz=tz)),
             Interval(Timestamp('2013-01-01 16:00:00', tz=tz),
                      Timestamp('2013-01-02 08:00:00', tz=tz)),
             Interval(Timestamp('2013-01-02 08:00:00', tz=tz),
                      Timestamp('2013-01-03 00:00:00', tz=tz))]))
         .astype(CDT(ordered=True)))
     tm.assert_series_equal(result, expected)
示例#24
0
 def test_datetimetz_cut(self, bins, box):
     # GH 19872
     tz = 'US/Eastern'
     s = Series(date_range('20130101', periods=3, tz=tz))
     if not isinstance(bins, int):
         bins = box(bins)
     result = cut(s, bins)
     expected = (
         Series(IntervalIndex([
             Interval(Timestamp('2012-12-31 23:57:07.200000', tz=tz),
                      Timestamp('2013-01-01 16:00:00', tz=tz)),
             Interval(Timestamp('2013-01-01 16:00:00', tz=tz),
                      Timestamp('2013-01-02 08:00:00', tz=tz)),
             Interval(Timestamp('2013-01-02 08:00:00', tz=tz),
                      Timestamp('2013-01-03 00:00:00', tz=tz))]))
         .astype(CDT(ordered=True)))
     tm.assert_series_equal(result, expected)
示例#25
0
    def test_datetime_bin(self):
        data = [np.datetime64('2012-12-13'), np.datetime64('2012-12-15')]
        bin_data = ['2012-12-12', '2012-12-14', '2012-12-16']
        expected = (Series(
            IntervalIndex.from_intervals([
                Interval(Timestamp(bin_data[0]), Timestamp(bin_data[1])),
                Interval(Timestamp(bin_data[1]), Timestamp(bin_data[2]))
            ])).astype(CDT(ordered=True)))

        for conv in [Timestamp, Timestamp, np.datetime64]:
            bins = [conv(v) for v in bin_data]
            result = cut(data, bins=bins)
            tm.assert_series_equal(Series(result), expected)

        bin_pydatetime = [Timestamp(v).to_pydatetime() for v in bin_data]
        result = cut(data, bins=bin_pydatetime)
        tm.assert_series_equal(Series(result), expected)

        bins = to_datetime(bin_data)
        result = cut(data, bins=bin_pydatetime)
        tm.assert_series_equal(Series(result), expected)
示例#26
0
    def test_reindexing(self):
        df = DataFrame(
            {
                "A": np.arange(3, dtype="int64"),
                "B": Series(list("abc")).astype(CDT(list("cabe"))),
            }
        ).set_index("B")

        # reindexing
        # convert to a regular index
        result = df.reindex(["a", "b", "e"])
        expected = DataFrame({"A": [0, 1, np.nan], "B": Series(list("abe"))}).set_index(
            "B"
        )
        tm.assert_frame_equal(result, expected, check_index_type=True)

        result = df.reindex(["a", "b"])
        expected = DataFrame({"A": [0, 1], "B": Series(list("ab"))}).set_index("B")
        tm.assert_frame_equal(result, expected, check_index_type=True)

        result = df.reindex(["e"])
        expected = DataFrame({"A": [np.nan], "B": Series(["e"])}).set_index("B")
        tm.assert_frame_equal(result, expected, check_index_type=True)

        result = df.reindex(["d"])
        expected = DataFrame({"A": [np.nan], "B": Series(["d"])}).set_index("B")
        tm.assert_frame_equal(result, expected, check_index_type=True)

        # since we are actually reindexing with a Categorical
        # then return a Categorical
        cats = list("cabe")

        result = df.reindex(Categorical(["a", "e"], categories=cats))
        expected = DataFrame(
            {"A": [0, np.nan], "B": Series(list("ae")).astype(CDT(cats))}
        ).set_index("B")
        tm.assert_frame_equal(result, expected, check_index_type=True)

        result = df.reindex(Categorical(["a"], categories=cats))
        expected = DataFrame(
            {"A": [0], "B": Series(list("a")).astype(CDT(cats))}
        ).set_index("B")
        tm.assert_frame_equal(result, expected, check_index_type=True)

        result = df.reindex(["a", "b", "e"])
        expected = DataFrame({"A": [0, 1, np.nan], "B": Series(list("abe"))}).set_index(
            "B"
        )
        tm.assert_frame_equal(result, expected, check_index_type=True)

        result = df.reindex(["a", "b"])
        expected = DataFrame({"A": [0, 1], "B": Series(list("ab"))}).set_index("B")
        tm.assert_frame_equal(result, expected, check_index_type=True)

        result = df.reindex(["e"])
        expected = DataFrame({"A": [np.nan], "B": Series(["e"])}).set_index("B")
        tm.assert_frame_equal(result, expected, check_index_type=True)

        # give back the type of categorical that we received
        result = df.reindex(Categorical(["a", "e"], categories=cats, ordered=True))
        expected = DataFrame(
            {"A": [0, np.nan], "B": Series(list("ae")).astype(CDT(cats, ordered=True))}
        ).set_index("B")
        tm.assert_frame_equal(result, expected, check_index_type=True)

        result = df.reindex(Categorical(["a", "d"], categories=["a", "d"]))
        expected = DataFrame(
            {"A": [0, np.nan], "B": Series(list("ad")).astype(CDT(["a", "d"]))}
        ).set_index("B")
        tm.assert_frame_equal(result, expected, check_index_type=True)

        # passed duplicate indexers are not allowed
        msg = "cannot reindex from a duplicate axis"
        with pytest.raises(ValueError, match=msg):
            self.df2.reindex(["a", "b"])

        # args NotImplemented ATM
        msg = r"argument {} is not implemented for CategoricalIndex\.reindex"
        with pytest.raises(NotImplementedError, match=msg.format("method")):
            df.reindex(["a"], method="ffill")
        with pytest.raises(NotImplementedError, match=msg.format("level")):
            df.reindex(["a"], level=1)
        with pytest.raises(NotImplementedError, match=msg.format("limit")):
            df.reindex(["a"], limit=2)
示例#27
0
    def test_getitem_bool_mask_categorical_index(self):

        df3 = DataFrame(
            {
                "A": np.arange(6, dtype="int64"),
            },
            index=CategoricalIndex(
                [1, 1, 2, 1, 3, 2], dtype=CDT([3, 2, 1], ordered=True), name="B"
            ),
        )
        df4 = DataFrame(
            {
                "A": np.arange(6, dtype="int64"),
            },
            index=CategoricalIndex(
                [1, 1, 2, 1, 3, 2], dtype=CDT([3, 2, 1], ordered=False), name="B"
            ),
        )

        result = df3[df3.index == "a"]
        expected = df3.iloc[[]]
        tm.assert_frame_equal(result, expected)

        result = df4[df4.index == "a"]
        expected = df4.iloc[[]]
        tm.assert_frame_equal(result, expected)

        result = df3[df3.index == 1]
        expected = df3.iloc[[0, 1, 3]]
        tm.assert_frame_equal(result, expected)

        result = df4[df4.index == 1]
        expected = df4.iloc[[0, 1, 3]]
        tm.assert_frame_equal(result, expected)

        # since we have an ordered categorical

        # CategoricalIndex([1, 1, 2, 1, 3, 2],
        #         categories=[3, 2, 1],
        #         ordered=True,
        #         name='B')
        result = df3[df3.index < 2]
        expected = df3.iloc[[4]]
        tm.assert_frame_equal(result, expected)

        result = df3[df3.index > 1]
        expected = df3.iloc[[]]
        tm.assert_frame_equal(result, expected)

        # unordered
        # cannot be compared

        # CategoricalIndex([1, 1, 2, 1, 3, 2],
        #         categories=[3, 2, 1],
        #         ordered=False,
        #         name='B')
        msg = "Unordered Categoricals can only compare equality or not"
        with pytest.raises(TypeError, match=msg):
            df4[df4.index < 2]
        with pytest.raises(TypeError, match=msg):
            df4[df4.index > 1]
示例#28
0
    def test_reindexing(self):

        # reindexing
        # convert to a regular index
        result = self.df2.reindex(['a', 'b', 'e'])
        expected = DataFrame({
            'A': [0, 1, 5, 2, 3, np.nan],
            'B': Series(list('aaabbe'))
        }).set_index('B')
        assert_frame_equal(result, expected, check_index_type=True)

        result = self.df2.reindex(['a', 'b'])
        expected = DataFrame({
            'A': [0, 1, 5, 2, 3],
            'B': Series(list('aaabb'))
        }).set_index('B')
        assert_frame_equal(result, expected, check_index_type=True)

        result = self.df2.reindex(['e'])
        expected = DataFrame({
            'A': [np.nan],
            'B': Series(['e'])
        }).set_index('B')
        assert_frame_equal(result, expected, check_index_type=True)

        result = self.df2.reindex(['d'])
        expected = DataFrame({
            'A': [np.nan],
            'B': Series(['d'])
        }).set_index('B')
        assert_frame_equal(result, expected, check_index_type=True)

        # since we are actually reindexing with a Categorical
        # then return a Categorical
        cats = list('cabe')

        result = self.df2.reindex(Categorical(['a', 'd'], categories=cats))
        expected = DataFrame({
            'A': [0, 1, 5, np.nan],
            'B': Series(list('aaad')).astype(CDT(cats))
        }).set_index('B')
        assert_frame_equal(result, expected, check_index_type=True)

        result = self.df2.reindex(Categorical(['a'], categories=cats))
        expected = DataFrame({
            'A': [0, 1, 5],
            'B': Series(list('aaa')).astype(CDT(cats))
        }).set_index('B')
        assert_frame_equal(result, expected, check_index_type=True)

        result = self.df2.reindex(['a', 'b', 'e'])
        expected = DataFrame({
            'A': [0, 1, 5, 2, 3, np.nan],
            'B': Series(list('aaabbe'))
        }).set_index('B')
        assert_frame_equal(result, expected, check_index_type=True)

        result = self.df2.reindex(['a', 'b'])
        expected = DataFrame({
            'A': [0, 1, 5, 2, 3],
            'B': Series(list('aaabb'))
        }).set_index('B')
        assert_frame_equal(result, expected, check_index_type=True)

        result = self.df2.reindex(['e'])
        expected = DataFrame({
            'A': [np.nan],
            'B': Series(['e'])
        }).set_index('B')
        assert_frame_equal(result, expected, check_index_type=True)

        # give back the type of categorical that we received
        result = self.df2.reindex(
            Categorical(['a', 'd'], categories=cats, ordered=True))
        expected = DataFrame({
            'A': [0, 1, 5, np.nan],
            'B':
            Series(list('aaad')).astype(CDT(cats, ordered=True))
        }).set_index('B')
        assert_frame_equal(result, expected, check_index_type=True)

        result = self.df2.reindex(
            Categorical(['a', 'd'], categories=['a', 'd']))
        expected = DataFrame({
            'A': [0, 1, 5, np.nan],
            'B': Series(list('aaad')).astype(CDT(['a', 'd']))
        }).set_index('B')
        assert_frame_equal(result, expected, check_index_type=True)

        # passed duplicate indexers are not allowed
        msg = "cannot reindex with a non-unique indexer"
        with pytest.raises(ValueError, match=msg):
            self.df2.reindex(['a', 'a'])

        # args NotImplemented ATM
        msg = r"argument {} is not implemented for CategoricalIndex\.reindex"
        with pytest.raises(NotImplementedError, match=msg.format('method')):
            self.df2.reindex(['a'], method='ffill')
        with pytest.raises(NotImplementedError, match=msg.format('level')):
            self.df2.reindex(['a'], level=1)
        with pytest.raises(NotImplementedError, match=msg.format('limit')):
            self.df2.reindex(['a'], limit=2)