Пример #1
0
 def test_constructor_invariant(self, values):
     # GH 14190
     c = Categorical(values)
     c2 = Categorical(c)
     tm.assert_categorical_equal(c, c2)
Пример #2
0
def test_qcut_index():
    result = qcut([0, 2], 2)
    intervals = [Interval(-0.001, 1), Interval(1, 2)]

    expected = Categorical(intervals, ordered=True)
    tm.assert_categorical_equal(result, expected)
Пример #3
0
    def test_constructor(self):

        exp_arr = np.array(["a", "b", "c", "a", "b", "c"], dtype=np.object_)
        c1 = Categorical(exp_arr)
        tm.assert_numpy_array_equal(c1.__array__(), exp_arr)
        c2 = Categorical(exp_arr, categories=["a", "b", "c"])
        tm.assert_numpy_array_equal(c2.__array__(), exp_arr)
        c2 = Categorical(exp_arr, categories=["c", "b", "a"])
        tm.assert_numpy_array_equal(c2.__array__(), exp_arr)

        # categories must be unique
        msg = "Categorical categories must be unique"
        with pytest.raises(ValueError, match=msg):
            Categorical([1, 2], [1, 2, 2])

        with pytest.raises(ValueError, match=msg):
            Categorical(["a", "b"], ["a", "b", "b"])

        # The default should be unordered
        c1 = Categorical(["a", "b", "c", "a"])
        assert not c1.ordered

        # Categorical as input
        c1 = Categorical(["a", "b", "c", "a"])
        c2 = Categorical(c1)
        tm.assert_categorical_equal(c1, c2)

        c1 = Categorical(["a", "b", "c", "a"], categories=["a", "b", "c", "d"])
        c2 = Categorical(c1)
        tm.assert_categorical_equal(c1, c2)

        c1 = Categorical(["a", "b", "c", "a"], categories=["a", "c", "b"])
        c2 = Categorical(c1)
        tm.assert_categorical_equal(c1, c2)

        c1 = Categorical(["a", "b", "c", "a"], categories=["a", "c", "b"])
        c2 = Categorical(c1, categories=["a", "b", "c"])
        tm.assert_numpy_array_equal(c1.__array__(), c2.__array__())
        tm.assert_index_equal(c2.categories, Index(["a", "b", "c"]))

        # Series of dtype category
        c1 = Categorical(["a", "b", "c", "a"], categories=["a", "b", "c", "d"])
        c2 = Categorical(Series(c1))
        tm.assert_categorical_equal(c1, c2)

        c1 = Categorical(["a", "b", "c", "a"], categories=["a", "c", "b"])
        c2 = Categorical(Series(c1))
        tm.assert_categorical_equal(c1, c2)

        # Series
        c1 = Categorical(["a", "b", "c", "a"])
        c2 = Categorical(Series(["a", "b", "c", "a"]))
        tm.assert_categorical_equal(c1, c2)

        c1 = Categorical(["a", "b", "c", "a"], categories=["a", "b", "c", "d"])
        c2 = Categorical(Series(["a", "b", "c", "a"]), categories=["a", "b", "c", "d"])
        tm.assert_categorical_equal(c1, c2)

        # This should result in integer categories, not float!
        cat = Categorical([1, 2, 3, np.nan], categories=[1, 2, 3])
        assert is_integer_dtype(cat.categories)

        # https://github.com/pandas-dev/pandas/issues/3678
        cat = Categorical([np.nan, 1, 2, 3])
        assert is_integer_dtype(cat.categories)

        # this should result in floats
        cat = Categorical([np.nan, 1, 2.0, 3])
        assert is_float_dtype(cat.categories)

        cat = Categorical([np.nan, 1.0, 2.0, 3.0])
        assert is_float_dtype(cat.categories)

        # This doesn't work -> this would probably need some kind of "remember
        # the original type" feature to try to cast the array interface result
        # to...

        # vals = np.asarray(cat[cat.notna()])
        # assert is_integer_dtype(vals)

        # corner cases
        cat = Categorical([1])
        assert len(cat.categories) == 1
        assert cat.categories[0] == 1
        assert len(cat.codes) == 1
        assert cat.codes[0] == 0

        cat = Categorical(["a"])
        assert len(cat.categories) == 1
        assert cat.categories[0] == "a"
        assert len(cat.codes) == 1
        assert cat.codes[0] == 0

        with tm.assert_produces_warning(FutureWarning):
            # GH#38433
            cat = Categorical(1)
        assert len(cat.categories) == 1
        assert cat.categories[0] == 1
        assert len(cat.codes) == 1
        assert cat.codes[0] == 0
        # two arrays
        #  - when the first is an integer dtype and the second is not
        #  - when the resulting codes are all -1/NaN
        with tm.assert_produces_warning(None):
            Categorical([0, 1, 2, 0, 1, 2], categories=["a", "b", "c"])

        with tm.assert_produces_warning(None):
            Categorical([0, 1, 2, 0, 1, 2], categories=[3, 4, 5])

        # the next one are from the old docs
        with tm.assert_produces_warning(None):
            Categorical([0, 1, 2, 0, 1, 2], [1, 2, 3])
            cat = Categorical([1, 2], categories=[1, 2, 3])

        # this is a legitimate constructor
        with tm.assert_produces_warning(None):
            Categorical(np.array([], dtype="int64"), categories=[3, 2, 1], ordered=True)
Пример #4
0
def test_qcut_specify_quantiles():
    arr = np.random.randn(100)
    factor = qcut(arr, [0, 0.25, 0.5, 0.75, 1.0])

    expected = qcut(arr, 4)
    tm.assert_categorical_equal(factor, expected)
Пример #5
0
def test_qcut_list_like_labels(labels, expected):
    # GH 13318
    values = range(3)
    result = qcut(values, 3, labels=labels)
    tm.assert_categorical_equal(result, expected)
Пример #6
0
    def test_union_categoricals_sort_false(self):
        # GH 13846
        c1 = Categorical(["x", "y", "z"])
        c2 = Categorical(["a", "b", "c"])
        result = union_categoricals([c1, c2], sort_categories=False)
        expected = Categorical(["x", "y", "z", "a", "b", "c"],
                               categories=["x", "y", "z", "a", "b", "c"])
        tm.assert_categorical_equal(result, expected)

        # fastpath
        c1 = Categorical(["a", "b"], categories=["b", "a", "c"])
        c2 = Categorical(["b", "c"], categories=["b", "a", "c"])
        result = union_categoricals([c1, c2], sort_categories=False)
        expected = Categorical(["a", "b", "b", "c"],
                               categories=["b", "a", "c"])
        tm.assert_categorical_equal(result, expected)

        # fastpath - skip resort
        c1 = Categorical(["a", "b"], categories=["a", "b", "c"])
        c2 = Categorical(["b", "c"], categories=["a", "b", "c"])
        result = union_categoricals([c1, c2], sort_categories=False)
        expected = Categorical(["a", "b", "b", "c"],
                               categories=["a", "b", "c"])
        tm.assert_categorical_equal(result, expected)

        c1 = Categorical(["x", np.nan])
        c2 = Categorical([np.nan, "b"])
        result = union_categoricals([c1, c2], sort_categories=False)
        expected = Categorical(["x", np.nan, np.nan, "b"],
                               categories=["x", "b"])
        tm.assert_categorical_equal(result, expected)

        c1 = Categorical([np.nan])
        c2 = Categorical([np.nan])
        result = union_categoricals([c1, c2], sort_categories=False)
        expected = Categorical([np.nan, np.nan])
        tm.assert_categorical_equal(result, expected)

        c1 = Categorical([])
        c2 = Categorical([])
        result = union_categoricals([c1, c2], sort_categories=False)
        expected = Categorical([])
        tm.assert_categorical_equal(result, expected)

        c1 = Categorical(["b", "a"], categories=["b", "a", "c"], ordered=True)
        c2 = Categorical(["a", "c"], categories=["b", "a", "c"], ordered=True)
        result = union_categoricals([c1, c2], sort_categories=False)
        expected = Categorical(["b", "a", "a", "c"],
                               categories=["b", "a", "c"],
                               ordered=True)
        tm.assert_categorical_equal(result, expected)
Пример #7
0
    def test_union_categoricals_ignore_order(self):
        # GH 15219
        c1 = Categorical([1, 2, 3], ordered=True)
        c2 = Categorical([1, 2, 3], ordered=False)

        res = union_categoricals([c1, c2], ignore_order=True)
        exp = Categorical([1, 2, 3, 1, 2, 3])
        tm.assert_categorical_equal(res, exp)

        msg = "Categorical.ordered must be the same"
        with pytest.raises(TypeError, match=msg):
            union_categoricals([c1, c2], ignore_order=False)

        res = union_categoricals([c1, c1], ignore_order=True)
        exp = Categorical([1, 2, 3, 1, 2, 3])
        tm.assert_categorical_equal(res, exp)

        res = union_categoricals([c1, c1], ignore_order=False)
        exp = Categorical([1, 2, 3, 1, 2, 3],
                          categories=[1, 2, 3],
                          ordered=True)
        tm.assert_categorical_equal(res, exp)

        c1 = Categorical([1, 2, 3, np.nan], ordered=True)
        c2 = Categorical([3, 2], categories=[1, 2, 3], ordered=True)

        res = union_categoricals([c1, c2], ignore_order=True)
        exp = Categorical([1, 2, 3, np.nan, 3, 2])
        tm.assert_categorical_equal(res, exp)

        c1 = Categorical([1, 2, 3], ordered=True)
        c2 = Categorical([1, 2, 3], categories=[3, 2, 1], ordered=True)

        res = union_categoricals([c1, c2], ignore_order=True)
        exp = Categorical([1, 2, 3, 1, 2, 3])
        tm.assert_categorical_equal(res, exp)

        res = union_categoricals([c2, c1],
                                 ignore_order=True,
                                 sort_categories=True)
        exp = Categorical([1, 2, 3, 1, 2, 3], categories=[1, 2, 3])
        tm.assert_categorical_equal(res, exp)

        c1 = Categorical([1, 2, 3], ordered=True)
        c2 = Categorical([4, 5, 6], ordered=True)
        result = union_categoricals([c1, c2], ignore_order=True)
        expected = Categorical([1, 2, 3, 4, 5, 6])
        tm.assert_categorical_equal(result, expected)

        msg = "to union ordered Categoricals, all categories must be the same"
        with pytest.raises(TypeError, match=msg):
            union_categoricals([c1, c2], ignore_order=False)

        with pytest.raises(TypeError, match=msg):
            union_categoricals([c1, c2])
Пример #8
0
    def test_take_fill_value(self):
        # GH 12631

        # numeric category
        idx = CategoricalIndex([1, 2, 3], name="xxx")
        result = idx.take(np.array([1, 0, -1]))
        expected = CategoricalIndex([2, 1, 3], name="xxx")
        tm.assert_index_equal(result, expected)
        tm.assert_categorical_equal(result.values, expected.values)

        # fill_value
        result = idx.take(np.array([1, 0, -1]), fill_value=True)
        expected = CategoricalIndex([2, 1, np.nan],
                                    categories=[1, 2, 3],
                                    name="xxx")
        tm.assert_index_equal(result, expected)
        tm.assert_categorical_equal(result.values, expected.values)

        # allow_fill=False
        result = idx.take(np.array([1, 0, -1]),
                          allow_fill=False,
                          fill_value=True)
        expected = CategoricalIndex([2, 1, 3], name="xxx")
        tm.assert_index_equal(result, expected)
        tm.assert_categorical_equal(result.values, expected.values)

        # object category
        idx = CategoricalIndex(list("CBA"),
                               categories=list("ABC"),
                               ordered=True,
                               name="xxx")
        result = idx.take(np.array([1, 0, -1]))
        expected = CategoricalIndex(list("BCA"),
                                    categories=list("ABC"),
                                    ordered=True,
                                    name="xxx")
        tm.assert_index_equal(result, expected)
        tm.assert_categorical_equal(result.values, expected.values)

        # fill_value
        result = idx.take(np.array([1, 0, -1]), fill_value=True)
        expected = CategoricalIndex(["B", "C", np.nan],
                                    categories=list("ABC"),
                                    ordered=True,
                                    name="xxx")
        tm.assert_index_equal(result, expected)
        tm.assert_categorical_equal(result.values, expected.values)

        # allow_fill=False
        result = idx.take(np.array([1, 0, -1]),
                          allow_fill=False,
                          fill_value=True)
        expected = CategoricalIndex(list("BCA"),
                                    categories=list("ABC"),
                                    ordered=True,
                                    name="xxx")
        tm.assert_index_equal(result, expected)
        tm.assert_categorical_equal(result.values, expected.values)

        msg = ("When allow_fill=True and fill_value is not None, "
               "all indices must be >= -1")
        with pytest.raises(ValueError, match=msg):
            idx.take(np.array([1, 0, -2]), fill_value=True)
        with pytest.raises(ValueError, match=msg):
            idx.take(np.array([1, 0, -5]), fill_value=True)

        msg = "index -5 is out of bounds for (axis 0 with )?size 3"
        with pytest.raises(IndexError, match=msg):
            idx.take(np.array([1, -5]))