def test_categorical_sideeffects_free(self): # Passing a categorical to a Series and then changing values in either # the series or the categorical should not change the values in the # other one, IF you specify copy! cat = Categorical(["a", "b", "c", "a"]) s = Series(cat, copy=True) assert s.cat is not cat s.cat.categories = [1, 2, 3] exp_s = np.array([1, 2, 3, 1], dtype=np.int64) exp_cat = np.array(["a", "b", "c", "a"], dtype=np.object_) tm.assert_numpy_array_equal(s.__array__(), exp_s) tm.assert_numpy_array_equal(cat.__array__(), exp_cat) # setting s[0] = 2 exp_s2 = np.array([2, 2, 3, 1], dtype=np.int64) tm.assert_numpy_array_equal(s.__array__(), exp_s2) tm.assert_numpy_array_equal(cat.__array__(), exp_cat) # however, copy is False by default # so this WILL change values cat = Categorical(["a", "b", "c", "a"]) s = Series(cat) assert s.values is cat s.cat.categories = [1, 2, 3] exp_s = np.array([1, 2, 3, 1], dtype=np.int64) tm.assert_numpy_array_equal(s.__array__(), exp_s) tm.assert_numpy_array_equal(cat.__array__(), exp_s) s[0] = 2 exp_s2 = np.array([2, 2, 3, 1], dtype=np.int64) tm.assert_numpy_array_equal(s.__array__(), exp_s2) tm.assert_numpy_array_equal(cat.__array__(), exp_s2)
def test_categorical_sideeffects_free(self): # Passing a categorical to a Series and then changing values in either # the series or the categorical should not change the values in the # other one, IF you specify copy! cat = Categorical(["a", "b", "c", "a"]) s = Series(cat, copy=True) assert s.cat is not cat s.cat.categories = [1, 2, 3] exp_s = np.array([1, 2, 3, 1], dtype=np.int64) exp_cat = np.array(["a", "b", "c", "a"], dtype=np.object_) tm.assert_numpy_array_equal(s.__array__(), exp_s) tm.assert_numpy_array_equal(cat.__array__(), exp_cat) # setting s[0] = 2 exp_s2 = np.array([2, 2, 3, 1], dtype=np.int64) tm.assert_numpy_array_equal(s.__array__(), exp_s2) tm.assert_numpy_array_equal(cat.__array__(), exp_cat) # however, copy is False by default # so this WILL change values cat = Categorical(["a", "b", "c", "a"]) s = Series(cat) assert s.values is cat s.cat.categories = [1, 2, 3] exp_s = np.array([1, 2, 3, 1], dtype=np.int64) tm.assert_numpy_array_equal(s.__array__(), exp_s) tm.assert_numpy_array_equal(cat.__array__(), exp_s) s[0] = 2 exp_s2 = np.array([2, 2, 3, 1], dtype=np.int64) tm.assert_numpy_array_equal(s.__array__(), exp_s2) tm.assert_numpy_array_equal(cat.__array__(), exp_s2)
def test_rename_categories(self): cat = Categorical(["a", "b", "c", "a"]) # inplace=False: the old one must not be changed res = cat.rename_categories([1, 2, 3]) tm.assert_numpy_array_equal(res.__array__(), np.array([1, 2, 3, 1], dtype=np.int64)) tm.assert_index_equal(res.categories, Index([1, 2, 3])) exp_cat = np.array(["a", "b", "c", "a"], dtype=np.object_) tm.assert_numpy_array_equal(cat.__array__(), exp_cat) exp_cat = Index(["a", "b", "c"]) tm.assert_index_equal(cat.categories, exp_cat) # GH18862 (let rename_categories take callables) result = cat.rename_categories(lambda x: x.upper()) expected = Categorical(["A", "B", "C", "A"]) tm.assert_categorical_equal(result, expected) # and now inplace res = cat.rename_categories([1, 2, 3], inplace=True) assert res is None tm.assert_numpy_array_equal(cat.__array__(), np.array([1, 2, 3, 1], dtype=np.int64)) tm.assert_index_equal(cat.categories, Index([1, 2, 3])) # Lengthen with pytest.raises(ValueError): cat.rename_categories([1, 2, 3, 4]) # Shorten with pytest.raises(ValueError): cat.rename_categories([1, 2])
def test_rename_categories(self): cat = Categorical(["a", "b", "c", "a"]) # inplace=False: the old one must not be changed res = cat.rename_categories([1, 2, 3]) tm.assert_numpy_array_equal(res.__array__(), np.array([1, 2, 3, 1], dtype=np.int64)) tm.assert_index_equal(res.categories, Index([1, 2, 3])) exp_cat = np.array(["a", "b", "c", "a"], dtype=np.object_) tm.assert_numpy_array_equal(cat.__array__(), exp_cat) exp_cat = Index(["a", "b", "c"]) tm.assert_index_equal(cat.categories, exp_cat) res = cat.rename_categories([1, 2, 3], inplace=True) # and now inplace assert res is None tm.assert_numpy_array_equal(cat.__array__(), np.array([1, 2, 3, 1], dtype=np.int64)) tm.assert_index_equal(cat.categories, Index([1, 2, 3])) # Lengthen with pytest.raises(ValueError): cat.rename_categories([1, 2, 3, 4]) # Shorten with pytest.raises(ValueError): cat.rename_categories([1, 2])
def test_rename_categories(self): cat = Categorical(["a", "b", "c", "a"]) # inplace=False: the old one must not be changed res = cat.rename_categories([1, 2, 3]) tm.assert_numpy_array_equal(res.__array__(), np.array([1, 2, 3, 1], dtype=np.int64)) tm.assert_index_equal(res.categories, Index([1, 2, 3])) exp_cat = np.array(["a", "b", "c", "a"], dtype=np.object_) tm.assert_numpy_array_equal(cat.__array__(), exp_cat) exp_cat = Index(["a", "b", "c"]) tm.assert_index_equal(cat.categories, exp_cat) # GH18862 (let rename_categories take callables) result = cat.rename_categories(lambda x: x.upper()) expected = Categorical(["A", "B", "C", "A"]) tm.assert_categorical_equal(result, expected) # and now inplace res = cat.rename_categories([1, 2, 3], inplace=True) assert res is None tm.assert_numpy_array_equal(cat.__array__(), np.array([1, 2, 3, 1], dtype=np.int64)) tm.assert_index_equal(cat.categories, Index([1, 2, 3])) # Lengthen with pytest.raises(ValueError): cat.rename_categories([1, 2, 3, 4]) # Shorten with pytest.raises(ValueError): cat.rename_categories([1, 2])
def test_rename_categories(self): cat = Categorical(["a", "b", "c", "a"]) # inplace=False: the old one must not be changed res = cat.rename_categories([1, 2, 3]) tm.assert_numpy_array_equal(res.__array__(), np.array([1, 2, 3, 1], dtype=np.int64)) tm.assert_index_equal(res.categories, Index([1, 2, 3])) exp_cat = np.array(["a", "b", "c", "a"], dtype=np.object_) tm.assert_numpy_array_equal(cat.__array__(), exp_cat) exp_cat = Index(["a", "b", "c"]) tm.assert_index_equal(cat.categories, exp_cat) # GH18862 (let rename_categories take callables) result = cat.rename_categories(lambda x: x.upper()) expected = Categorical(["A", "B", "C", "A"]) tm.assert_categorical_equal(result, expected) # and now inplace with tm.assert_produces_warning(FutureWarning): # issue #37643 inplace kwarg deprecated res = cat.rename_categories([1, 2, 3], inplace=True) assert res is None tm.assert_numpy_array_equal(cat.__array__(), np.array([1, 2, 3, 1], dtype=np.int64)) tm.assert_index_equal(cat.categories, Index([1, 2, 3]))
def test_categories_assigments(self): s = Categorical(["a", "b", "c", "a"]) exp = np.array([1, 2, 3, 1], dtype=np.int64) s.categories = [1, 2, 3] tm.assert_numpy_array_equal(s.__array__(), exp) tm.assert_index_equal(s.categories, Index([1, 2, 3])) # lengthen with pytest.raises(ValueError): s.categories = [1, 2, 3, 4] # shorten with pytest.raises(ValueError): s.categories = [1, 2]
def test_categories_assigments(self): s = Categorical(["a", "b", "c", "a"]) exp = np.array([1, 2, 3, 1], dtype=np.int64) s.categories = [1, 2, 3] tm.assert_numpy_array_equal(s.__array__(), exp) tm.assert_index_equal(s.categories, Index([1, 2, 3])) # lengthen with pytest.raises(ValueError): s.categories = [1, 2, 3, 4] # shorten with pytest.raises(ValueError): s.categories = [1, 2]
def test_constructor(self): exp_arr = np.array(["a", "b", "c", "a", "b", "c"], dtype=np.object_) c1 = Categorical(exp_arr) tm.assert_numpy_array_equal(c1.__array__(), exp_arr) c2 = Categorical(exp_arr, categories=["a", "b", "c"]) tm.assert_numpy_array_equal(c2.__array__(), exp_arr) c2 = Categorical(exp_arr, categories=["c", "b", "a"]) tm.assert_numpy_array_equal(c2.__array__(), exp_arr) # categories must be unique def f(): Categorical([1, 2], [1, 2, 2]) pytest.raises(ValueError, f) def f(): Categorical(["a", "b"], ["a", "b", "b"]) pytest.raises(ValueError, f) # The default should be unordered c1 = Categorical(["a", "b", "c", "a"]) assert not c1.ordered # Categorical as input c1 = Categorical(["a", "b", "c", "a"]) c2 = Categorical(c1) tm.assert_categorical_equal(c1, c2) c1 = Categorical(["a", "b", "c", "a"], categories=["a", "b", "c", "d"]) c2 = Categorical(c1) tm.assert_categorical_equal(c1, c2) c1 = Categorical(["a", "b", "c", "a"], categories=["a", "c", "b"]) c2 = Categorical(c1) tm.assert_categorical_equal(c1, c2) c1 = Categorical(["a", "b", "c", "a"], categories=["a", "c", "b"]) c2 = Categorical(c1, categories=["a", "b", "c"]) tm.assert_numpy_array_equal(c1.__array__(), c2.__array__()) tm.assert_index_equal(c2.categories, Index(["a", "b", "c"])) # Series of dtype category c1 = Categorical(["a", "b", "c", "a"], categories=["a", "b", "c", "d"]) c2 = Categorical(Series(c1)) tm.assert_categorical_equal(c1, c2) c1 = Categorical(["a", "b", "c", "a"], categories=["a", "c", "b"]) c2 = Categorical(Series(c1)) tm.assert_categorical_equal(c1, c2) # Series c1 = Categorical(["a", "b", "c", "a"]) c2 = Categorical(Series(["a", "b", "c", "a"])) tm.assert_categorical_equal(c1, c2) c1 = Categorical(["a", "b", "c", "a"], categories=["a", "b", "c", "d"]) c2 = Categorical(Series(["a", "b", "c", "a"]), categories=["a", "b", "c", "d"]) tm.assert_categorical_equal(c1, c2) # This should result in integer categories, not float! cat = Categorical([1, 2, 3, np.nan], categories=[1, 2, 3]) assert is_integer_dtype(cat.categories) # https://github.com/pandas-dev/pandas/issues/3678 cat = Categorical([np.nan, 1, 2, 3]) assert is_integer_dtype(cat.categories) # this should result in floats cat = Categorical([np.nan, 1, 2., 3]) assert is_float_dtype(cat.categories) cat = Categorical([np.nan, 1., 2., 3.]) assert is_float_dtype(cat.categories) # This doesn't work -> this would probably need some kind of "remember # the original type" feature to try to cast the array interface result # to... # vals = np.asarray(cat[cat.notna()]) # assert is_integer_dtype(vals) # corner cases cat = Categorical([1]) assert len(cat.categories) == 1 assert cat.categories[0] == 1 assert len(cat.codes) == 1 assert cat.codes[0] == 0 cat = Categorical(["a"]) assert len(cat.categories) == 1 assert cat.categories[0] == "a" assert len(cat.codes) == 1 assert cat.codes[0] == 0 # Scalars should be converted to lists cat = Categorical(1) assert len(cat.categories) == 1 assert cat.categories[0] == 1 assert len(cat.codes) == 1 assert cat.codes[0] == 0 # two arrays # - when the first is an integer dtype and the second is not # - when the resulting codes are all -1/NaN with tm.assert_produces_warning(None): c_old = Categorical([0, 1, 2, 0, 1, 2], categories=["a", "b", "c"]) # noqa with tm.assert_produces_warning(None): c_old = Categorical([0, 1, 2, 0, 1, 2], # noqa categories=[3, 4, 5]) # the next one are from the old docs with tm.assert_produces_warning(None): c_old2 = Categorical([0, 1, 2, 0, 1, 2], [1, 2, 3]) # noqa cat = Categorical([1, 2], categories=[1, 2, 3]) # this is a legitimate constructor with tm.assert_produces_warning(None): c = Categorical(np.array([], dtype='int64'), # noqa categories=[3, 2, 1], ordered=True)
def test_set_categories(self): cat = Categorical(["a", "b", "c", "a"], ordered=True) exp_categories = Index(["c", "b", "a"]) exp_values = np.array(["a", "b", "c", "a"], dtype=np.object_) res = cat.set_categories(["c", "b", "a"], inplace=True) tm.assert_index_equal(cat.categories, exp_categories) tm.assert_numpy_array_equal(cat.__array__(), exp_values) assert res is None res = cat.set_categories(["a", "b", "c"]) # cat must be the same as before tm.assert_index_equal(cat.categories, exp_categories) tm.assert_numpy_array_equal(cat.__array__(), exp_values) # only res is changed exp_categories_back = Index(["a", "b", "c"]) tm.assert_index_equal(res.categories, exp_categories_back) tm.assert_numpy_array_equal(res.__array__(), exp_values) # not all "old" included in "new" -> all not included ones are now # np.nan cat = Categorical(["a", "b", "c", "a"], ordered=True) res = cat.set_categories(["a"]) tm.assert_numpy_array_equal(res.codes, np.array([0, -1, -1, 0], dtype=np.int8)) # still not all "old" in "new" res = cat.set_categories(["a", "b", "d"]) tm.assert_numpy_array_equal(res.codes, np.array([0, 1, -1, 0], dtype=np.int8)) tm.assert_index_equal(res.categories, Index(["a", "b", "d"])) # all "old" included in "new" cat = cat.set_categories(["a", "b", "c", "d"]) exp_categories = Index(["a", "b", "c", "d"]) tm.assert_index_equal(cat.categories, exp_categories) # internals... c = Categorical([1, 2, 3, 4, 1], categories=[1, 2, 3, 4], ordered=True) tm.assert_numpy_array_equal(c._codes, np.array([0, 1, 2, 3, 0], dtype=np.int8)) tm.assert_index_equal(c.categories, Index([1, 2, 3, 4])) exp = np.array([1, 2, 3, 4, 1], dtype=np.int64) tm.assert_numpy_array_equal(c.get_values(), exp) # all "pointers" to '4' must be changed from 3 to 0,... c = c.set_categories([4, 3, 2, 1]) # positions are changed tm.assert_numpy_array_equal(c._codes, np.array([3, 2, 1, 0, 3], dtype=np.int8)) # categories are now in new order tm.assert_index_equal(c.categories, Index([4, 3, 2, 1])) # output is the same exp = np.array([1, 2, 3, 4, 1], dtype=np.int64) tm.assert_numpy_array_equal(c.get_values(), exp) assert c.min() == 4 assert c.max() == 1 # set_categories should set the ordering if specified c2 = c.set_categories([4, 3, 2, 1], ordered=False) assert not c2.ordered tm.assert_numpy_array_equal(c.get_values(), c2.get_values()) # set_categories should pass thru the ordering c2 = c.set_ordered(False).set_categories([4, 3, 2, 1]) assert not c2.ordered tm.assert_numpy_array_equal(c.get_values(), c2.get_values())
def test_categories_assignments(self): s = Categorical(["a", "b", "c", "a"]) exp = np.array([1, 2, 3, 1], dtype=np.int64) s.categories = [1, 2, 3] tm.assert_numpy_array_equal(s.__array__(), exp) tm.assert_index_equal(s.categories, Index([1, 2, 3]))
def test_constructor(self): exp_arr = np.array(["a", "b", "c", "a", "b", "c"], dtype=np.object_) c1 = Categorical(exp_arr) tm.assert_numpy_array_equal(c1.__array__(), exp_arr) c2 = Categorical(exp_arr, categories=["a", "b", "c"]) tm.assert_numpy_array_equal(c2.__array__(), exp_arr) c2 = Categorical(exp_arr, categories=["c", "b", "a"]) tm.assert_numpy_array_equal(c2.__array__(), exp_arr) # categories must be unique msg = "Categorical categories must be unique" with pytest.raises(ValueError, match=msg): Categorical([1, 2], [1, 2, 2]) with pytest.raises(ValueError, match=msg): Categorical(["a", "b"], ["a", "b", "b"]) # The default should be unordered c1 = Categorical(["a", "b", "c", "a"]) assert not c1.ordered # Categorical as input c1 = Categorical(["a", "b", "c", "a"]) c2 = Categorical(c1) tm.assert_categorical_equal(c1, c2) c1 = Categorical(["a", "b", "c", "a"], categories=["a", "b", "c", "d"]) c2 = Categorical(c1) tm.assert_categorical_equal(c1, c2) c1 = Categorical(["a", "b", "c", "a"], categories=["a", "c", "b"]) c2 = Categorical(c1) tm.assert_categorical_equal(c1, c2) c1 = Categorical(["a", "b", "c", "a"], categories=["a", "c", "b"]) c2 = Categorical(c1, categories=["a", "b", "c"]) tm.assert_numpy_array_equal(c1.__array__(), c2.__array__()) tm.assert_index_equal(c2.categories, Index(["a", "b", "c"])) # Series of dtype category c1 = Categorical(["a", "b", "c", "a"], categories=["a", "b", "c", "d"]) c2 = Categorical(Series(c1)) tm.assert_categorical_equal(c1, c2) c1 = Categorical(["a", "b", "c", "a"], categories=["a", "c", "b"]) c2 = Categorical(Series(c1)) tm.assert_categorical_equal(c1, c2) # Series c1 = Categorical(["a", "b", "c", "a"]) c2 = Categorical(Series(["a", "b", "c", "a"])) tm.assert_categorical_equal(c1, c2) c1 = Categorical(["a", "b", "c", "a"], categories=["a", "b", "c", "d"]) c2 = Categorical(Series(["a", "b", "c", "a"]), categories=["a", "b", "c", "d"]) tm.assert_categorical_equal(c1, c2) # This should result in integer categories, not float! cat = Categorical([1, 2, 3, np.nan], categories=[1, 2, 3]) assert is_integer_dtype(cat.categories) # https://github.com/pandas-dev/pandas/issues/3678 cat = Categorical([np.nan, 1, 2, 3]) assert is_integer_dtype(cat.categories) # this should result in floats cat = Categorical([np.nan, 1, 2.0, 3]) assert is_float_dtype(cat.categories) cat = Categorical([np.nan, 1.0, 2.0, 3.0]) assert is_float_dtype(cat.categories) # This doesn't work -> this would probably need some kind of "remember # the original type" feature to try to cast the array interface result # to... # vals = np.asarray(cat[cat.notna()]) # assert is_integer_dtype(vals) # corner cases cat = Categorical([1]) assert len(cat.categories) == 1 assert cat.categories[0] == 1 assert len(cat.codes) == 1 assert cat.codes[0] == 0 cat = Categorical(["a"]) assert len(cat.categories) == 1 assert cat.categories[0] == "a" assert len(cat.codes) == 1 assert cat.codes[0] == 0 # Scalars should be converted to lists cat = Categorical(1) assert len(cat.categories) == 1 assert cat.categories[0] == 1 assert len(cat.codes) == 1 assert cat.codes[0] == 0 # two arrays # - when the first is an integer dtype and the second is not # - when the resulting codes are all -1/NaN with tm.assert_produces_warning(None): c_old = Categorical([0, 1, 2, 0, 1, 2], categories=["a", "b", "c"]) # noqa with tm.assert_produces_warning(None): c_old = Categorical([0, 1, 2, 0, 1, 2], categories=[3, 4, 5]) # noqa # the next one are from the old docs with tm.assert_produces_warning(None): c_old2 = Categorical([0, 1, 2, 0, 1, 2], [1, 2, 3]) # noqa cat = Categorical([1, 2], categories=[1, 2, 3]) # this is a legitimate constructor with tm.assert_produces_warning(None): c = Categorical( # noqa np.array([], dtype="int64"), categories=[3, 2, 1], ordered=True )
def test_set_categories(self): cat = Categorical(["a", "b", "c", "a"], ordered=True) exp_categories = Index(["c", "b", "a"]) exp_values = np.array(["a", "b", "c", "a"], dtype=np.object_) res = cat.set_categories(["c", "b", "a"], inplace=True) tm.assert_index_equal(cat.categories, exp_categories) tm.assert_numpy_array_equal(cat.__array__(), exp_values) assert res is None res = cat.set_categories(["a", "b", "c"]) # cat must be the same as before tm.assert_index_equal(cat.categories, exp_categories) tm.assert_numpy_array_equal(cat.__array__(), exp_values) # only res is changed exp_categories_back = Index(["a", "b", "c"]) tm.assert_index_equal(res.categories, exp_categories_back) tm.assert_numpy_array_equal(res.__array__(), exp_values) # not all "old" included in "new" -> all not included ones are now # np.nan cat = Categorical(["a", "b", "c", "a"], ordered=True) res = cat.set_categories(["a"]) tm.assert_numpy_array_equal(res.codes, np.array([0, -1, -1, 0], dtype=np.int8)) # still not all "old" in "new" res = cat.set_categories(["a", "b", "d"]) tm.assert_numpy_array_equal(res.codes, np.array([0, 1, -1, 0], dtype=np.int8)) tm.assert_index_equal(res.categories, Index(["a", "b", "d"])) # all "old" included in "new" cat = cat.set_categories(["a", "b", "c", "d"]) exp_categories = Index(["a", "b", "c", "d"]) tm.assert_index_equal(cat.categories, exp_categories) # internals... c = Categorical([1, 2, 3, 4, 1], categories=[1, 2, 3, 4], ordered=True) tm.assert_numpy_array_equal(c._codes, np.array([0, 1, 2, 3, 0], dtype=np.int8)) tm.assert_index_equal(c.categories, Index([1, 2, 3, 4])) exp = np.array([1, 2, 3, 4, 1], dtype=np.int64) tm.assert_numpy_array_equal(c.get_values(), exp) # all "pointers" to '4' must be changed from 3 to 0,... c = c.set_categories([4, 3, 2, 1]) # positions are changed tm.assert_numpy_array_equal(c._codes, np.array([3, 2, 1, 0, 3], dtype=np.int8)) # categories are now in new order tm.assert_index_equal(c.categories, Index([4, 3, 2, 1])) # output is the same exp = np.array([1, 2, 3, 4, 1], dtype=np.int64) tm.assert_numpy_array_equal(c.get_values(), exp) assert c.min() == 4 assert c.max() == 1 # set_categories should set the ordering if specified c2 = c.set_categories([4, 3, 2, 1], ordered=False) assert not c2.ordered tm.assert_numpy_array_equal(c.get_values(), c2.get_values()) # set_categories should pass thru the ordering c2 = c.set_ordered(False).set_categories([4, 3, 2, 1]) assert not c2.ordered tm.assert_numpy_array_equal(c.get_values(), c2.get_values())