def test_construction_with_dtype(self): # specify dtype ci = self.create_index(categories=list('abc')) result = Index(np.array(ci), dtype='category') tm.assert_index_equal(result, ci, exact=True) result = Index(np.array(ci).tolist(), dtype='category') tm.assert_index_equal(result, ci, exact=True) # these are generally only equal when the categories are reordered ci = self.create_index() result = Index(np.array(ci), dtype='category').reorder_categories(ci.categories) tm.assert_index_equal(result, ci, exact=True) # make sure indexes are handled expected = CategoricalIndex([0, 1, 2], categories=[0, 1, 2], ordered=True) idx = Index(range(3)) result = CategoricalIndex(idx, categories=idx, ordered=True) tm.assert_index_equal(result, expected, exact=True)
def test_append(self): ci = self.create_index() categories = ci.categories # append cats with the same categories result = ci[:3].append(ci[3:]) tm.assert_index_equal(result, ci, exact=True) foos = [ci[:1], ci[1:3], ci[3:]] result = foos[0].append(foos[1:]) tm.assert_index_equal(result, ci, exact=True) # empty result = ci.append([]) tm.assert_index_equal(result, ci, exact=True) # appending with different categories or reoreded is not ok self.assertRaises( TypeError, lambda: ci.append(ci.values.set_categories(list('abcd')))) self.assertRaises( TypeError, lambda: ci.append(ci.values.reorder_categories(list('abc')))) # with objects result = ci.append(Index(['c', 'a'])) expected = CategoricalIndex(list('aabbcaca'), categories=categories) tm.assert_index_equal(result, expected, exact=True) # invalid objects self.assertRaises(TypeError, lambda: ci.append(Index(['a', 'd'])))
def test_reindex_dtype(self): c = CategoricalIndex(['a', 'b', 'c', 'a']) res, indexer = c.reindex(['a', 'c']) tm.assert_index_equal(res, Index(['a', 'a', 'c']), exact=True) tm.assert_numpy_array_equal(indexer, np.array([0, 3, 2], dtype=np.int64)) c = CategoricalIndex(['a', 'b', 'c', 'a']) res, indexer = c.reindex(Categorical(['a', 'c'])) exp = CategoricalIndex(['a', 'a', 'c'], categories=['a', 'c']) tm.assert_index_equal(res, exp, exact=True) tm.assert_numpy_array_equal(indexer, np.array([0, 3, 2], dtype=np.int64)) c = CategoricalIndex(['a', 'b', 'c', 'a'], categories=['a', 'b', 'c', 'd']) res, indexer = c.reindex(['a', 'c']) exp = Index(['a', 'a', 'c'], dtype='object') tm.assert_index_equal(res, exp, exact=True) tm.assert_numpy_array_equal(indexer, np.array([0, 3, 2], dtype=np.int64)) c = CategoricalIndex(['a', 'b', 'c', 'a'], categories=['a', 'b', 'c', 'd']) res, indexer = c.reindex(Categorical(['a', 'c'])) exp = CategoricalIndex(['a', 'a', 'c'], categories=['a', 'c']) tm.assert_index_equal(res, exp, exact=True) tm.assert_numpy_array_equal(indexer, np.array([0, 3, 2], dtype=np.int64))
def test_reindex_dtype(self): res, indexer = CategoricalIndex(['a', 'b', 'c', 'a']).reindex(['a', 'c']) tm.assert_index_equal(res, Index(['a', 'a', 'c']), exact=True) tm.assert_numpy_array_equal(indexer, np.array([0, 3, 2])) res, indexer = CategoricalIndex(['a', 'b', 'c', 'a']).reindex(Categorical(['a', 'c'])) tm.assert_index_equal(res, CategoricalIndex(['a', 'a', 'c'], categories=['a', 'c']), exact=True) tm.assert_numpy_array_equal(indexer, np.array([0, 3, 2])) res, indexer = CategoricalIndex(['a', 'b', 'c', 'a'], categories=['a', 'b', 'c', 'd']).reindex(['a', 'c']) tm.assert_index_equal(res, Index(['a', 'a', 'c'], dtype='object'), exact=True) tm.assert_numpy_array_equal(indexer, np.array([0, 3, 2])) res, indexer = CategoricalIndex(['a', 'b', 'c', 'a'], categories=['a', 'b', 'c', 'd']).reindex( Categorical(['a', 'c'])) tm.assert_index_equal(res, CategoricalIndex(['a', 'a', 'c'], categories=['a', 'c']), exact=True) tm.assert_numpy_array_equal(indexer, np.array([0, 3, 2]))
def test_equals_categorical(self): ci1 = CategoricalIndex(['a', 'b'], categories=['a', 'b'], ordered=True) ci2 = CategoricalIndex(['a', 'b'], categories=['a', 'b', 'c'], ordered=True) self.assertTrue(ci1.equals(ci1)) self.assertFalse(ci1.equals(ci2)) self.assertTrue(ci1.equals(ci1.astype(object))) self.assertTrue(ci1.astype(object).equals(ci1)) self.assertTrue((ci1 == ci1).all()) self.assertFalse((ci1 != ci1).all()) self.assertFalse((ci1 > ci1).all()) self.assertFalse((ci1 < ci1).all()) self.assertTrue((ci1 <= ci1).all()) self.assertTrue((ci1 >= ci1).all()) self.assertFalse((ci1 == 1).all()) self.assertTrue((ci1 == Index(['a', 'b'])).all()) self.assertTrue((ci1 == ci1.values).all()) # invalid comparisons with tm.assertRaisesRegexp(ValueError, "Lengths must match"): ci1 == Index(['a', 'b', 'c']) self.assertRaises(TypeError, lambda: ci1 == ci2) self.assertRaises( TypeError, lambda: ci1 == Categorical(ci1.values, ordered=False)) self.assertRaises( TypeError, lambda: ci1 == Categorical(ci1.values, categories=list('abc'))) # tests # make sure that we are testing for category inclusion properly ci = CategoricalIndex(list('aabca'), categories=['c', 'a', 'b']) self.assertFalse(ci.equals(list('aabca'))) self.assertFalse(ci.equals(CategoricalIndex(list('aabca')))) self.assertTrue(ci.equals(ci.copy())) with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): ci = CategoricalIndex(list('aabca'), categories=['c', 'a', 'b', np.nan]) self.assertFalse(ci.equals(list('aabca'))) self.assertFalse(ci.equals(CategoricalIndex(list('aabca')))) with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): self.assertTrue(ci.equals(ci.copy())) ci = CategoricalIndex(list('aabca') + [np.nan], categories=['c', 'a', 'b']) self.assertFalse(ci.equals(list('aabca'))) self.assertFalse(ci.equals(CategoricalIndex(list('aabca')))) self.assertTrue(ci.equals(ci.copy())) ci = CategoricalIndex(list('aabca') + [np.nan], categories=['c', 'a', 'b']) self.assertFalse(ci.equals(list('aabca') + [np.nan])) self.assertFalse(ci.equals(CategoricalIndex(list('aabca') + [np.nan]))) self.assertTrue(ci.equals(ci.copy()))
def test_reindexing(self): ci = self.create_index() oidx = Index(np.array(ci)) for n in [1, 2, 5, len(ci)]: finder = oidx[np.random.randint(0, len(ci), size=n)] expected = oidx.get_indexer_non_unique(finder)[0] actual = ci.get_indexer(finder) tm.assert_numpy_array_equal(expected, actual)
def test_reindexing(self): ci = self.create_index() oidx = Index(np.array(ci)) for n in [1, 2, 5, len(ci)]: finder = oidx[np.random.randint(0, len(ci), size=n)] expected = oidx.get_indexer_non_unique(finder)[0] actual = ci.get_indexer(finder) tm.assert_numpy_array_equal(expected.values, actual, check_dtype=False)
def test_astype(self): ci = self.create_index() result = ci.astype('category') tm.assert_index_equal(result, ci, exact=True) result = ci.astype(object) self.assert_index_equal(result, Index(np.array(ci))) # this IS equal, but not the same class self.assertTrue(result.equals(ci)) self.assertIsInstance(result, Index) self.assertNotIsInstance(result, CategoricalIndex) # interval ii = IntervalIndex.from_arrays(left=[-0.001, 2.0], right=[2, 4], closed='right') ci = CategoricalIndex( Categorical.from_codes([0, 1, -1], categories=ii, ordered=True)) result = ci.astype('interval') expected = ii.take([0, 1, -1]) tm.assert_index_equal(result, expected) result = IntervalIndex.from_intervals(result.values) tm.assert_index_equal(result, expected)
def test_astype(self): ci = self.create_index() result = ci.astype('category') tm.assert_index_equal(result, ci, exact=True) result = ci.astype(object) self.assert_index_equal(result, Index(np.array(ci))) # this IS equal, but not the same class self.assertTrue(result.equals(ci)) self.assertIsInstance(result, Index) self.assertNotIsInstance(result, CategoricalIndex)
def test_get_indexer(self): idx1 = CategoricalIndex(list('aabcde'), categories=list('edabc')) idx2 = CategoricalIndex(list('abf')) for indexer in [idx2, list('abf'), Index(list('abf'))]: r1 = idx1.get_indexer(idx2) assert_almost_equal(r1, np.array([0, 1, 2, -1], dtype=np.intp)) self.assertRaises(NotImplementedError, lambda: idx2.get_indexer(idx1, method='pad')) self.assertRaises(NotImplementedError, lambda: idx2.get_indexer(idx1, method='backfill')) self.assertRaises(NotImplementedError, lambda: idx2.get_indexer(idx1, method='nearest'))
def test_get_loc(self): # GH 12531 cidx1 = CategoricalIndex(list('abcde'), categories=list('edabc')) idx1 = Index(list('abcde')) self.assertEqual(cidx1.get_loc('a'), idx1.get_loc('a')) self.assertEqual(cidx1.get_loc('e'), idx1.get_loc('e')) for i in [cidx1, idx1]: with tm.assertRaises(KeyError): i.get_loc('NOT-EXIST') # non-unique cidx2 = CategoricalIndex(list('aacded'), categories=list('edabc')) idx2 = Index(list('aacded')) # results in bool array res = cidx2.get_loc('d') self.assert_numpy_array_equal(res, idx2.get_loc('d')) self.assert_numpy_array_equal(res, np.array([False, False, False, True, False, True])) # unique element results in scalar res = cidx2.get_loc('e') self.assertEqual(res, idx2.get_loc('e')) self.assertEqual(res, 4) for i in [cidx2, idx2]: with tm.assertRaises(KeyError): i.get_loc('NOT-EXIST') # non-unique, slicable cidx3 = CategoricalIndex(list('aabbb'), categories=list('abc')) idx3 = Index(list('aabbb')) # results in slice res = cidx3.get_loc('a') self.assertEqual(res, idx3.get_loc('a')) self.assertEqual(res, slice(0, 2, None)) res = cidx3.get_loc('b') self.assertEqual(res, idx3.get_loc('b')) self.assertEqual(res, slice(2, 5, None)) for i in [cidx3, idx3]: with tm.assertRaises(KeyError): i.get_loc('c')
def test_map(self): ci = pd.CategoricalIndex(list('ABABC'), categories=list('CBA'), ordered=True) result = ci.map(lambda x: x.lower()) exp = pd.CategoricalIndex(list('ababc'), categories=list('cba'), ordered=True) tm.assert_index_equal(result, exp) ci = pd.CategoricalIndex(list('ABABC'), categories=list('BAC'), ordered=False, name='XXX') result = ci.map(lambda x: x.lower()) exp = pd.CategoricalIndex(list('ababc'), categories=list('bac'), ordered=False, name='XXX') tm.assert_index_equal(result, exp) # GH 12766: Return an index not an array tm.assert_index_equal( ci.map(lambda x: 1), Index(np.array([1] * 5, dtype=np.int64), name='XXX')) # change categories dtype ci = pd.CategoricalIndex(list('ABABC'), categories=list('BAC'), ordered=False) def f(x): return {'A': 10, 'B': 20, 'C': 30}.get(x) result = ci.map(f) exp = pd.CategoricalIndex([10, 20, 10, 20, 30], categories=[20, 10, 30], ordered=False) tm.assert_index_equal(result, exp)
def test_get_loc(self): # GH 12531 cidx1 = CategoricalIndex(list('abcde'), categories=list('edabc')) idx1 = Index(list('abcde')) self.assertEqual(cidx1.get_loc('a'), idx1.get_loc('a')) self.assertEqual(cidx1.get_loc('e'), idx1.get_loc('e')) for i in [cidx1, idx1]: with tm.assertRaises(KeyError): i.get_loc('NOT-EXIST') # non-unique cidx2 = CategoricalIndex(list('aacded'), categories=list('edabc')) idx2 = Index(list('aacded')) # results in bool array res = cidx2.get_loc('d') self.assert_numpy_array_equal(res, idx2.get_loc('d')) self.assert_numpy_array_equal( res, np.array([False, False, False, True, False, True])) # unique element results in scalar res = cidx2.get_loc('e') self.assertEqual(res, idx2.get_loc('e')) self.assertEqual(res, 4) for i in [cidx2, idx2]: with tm.assertRaises(KeyError): i.get_loc('NOT-EXIST') # non-unique, slicable cidx3 = CategoricalIndex(list('aabbb'), categories=list('abc')) idx3 = Index(list('aabbb')) # results in slice res = cidx3.get_loc('a') self.assertEqual(res, idx3.get_loc('a')) self.assertEqual(res, slice(0, 2, None)) res = cidx3.get_loc('b') self.assertEqual(res, idx3.get_loc('b')) self.assertEqual(res, slice(2, 5, None)) for i in [cidx3, idx3]: with tm.assertRaises(KeyError): i.get_loc('c')
def test_construction(self): ci = self.create_index(categories=list('abcd')) categories = ci.categories result = Index(ci) tm.assert_index_equal(result, ci, exact=True) self.assertFalse(result.ordered) result = Index(ci.values) tm.assert_index_equal(result, ci, exact=True) self.assertFalse(result.ordered) # empty result = CategoricalIndex(categories=categories) self.assert_index_equal(result.categories, Index(categories)) tm.assert_numpy_array_equal(result.codes, np.array([], dtype='int8')) self.assertFalse(result.ordered) # passing categories result = CategoricalIndex(list('aabbca'), categories=categories) self.assert_index_equal(result.categories, Index(categories)) tm.assert_numpy_array_equal(result.codes, np.array([0, 0, 1, 1, 2, 0], dtype='int8')) c = pd.Categorical(list('aabbca')) result = CategoricalIndex(c) self.assert_index_equal(result.categories, Index(list('abc'))) tm.assert_numpy_array_equal(result.codes, np.array([0, 0, 1, 1, 2, 0], dtype='int8')) self.assertFalse(result.ordered) result = CategoricalIndex(c, categories=categories) self.assert_index_equal(result.categories, Index(categories)) tm.assert_numpy_array_equal(result.codes, np.array([0, 0, 1, 1, 2, 0], dtype='int8')) self.assertFalse(result.ordered) ci = CategoricalIndex(c, categories=list('abcd')) result = CategoricalIndex(ci) self.assert_index_equal(result.categories, Index(categories)) tm.assert_numpy_array_equal(result.codes, np.array([0, 0, 1, 1, 2, 0], dtype='int8')) self.assertFalse(result.ordered) result = CategoricalIndex(ci, categories=list('ab')) self.assert_index_equal(result.categories, Index(list('ab'))) tm.assert_numpy_array_equal( result.codes, np.array([0, 0, 1, 1, -1, 0], dtype='int8')) self.assertFalse(result.ordered) result = CategoricalIndex(ci, categories=list('ab'), ordered=True) self.assert_index_equal(result.categories, Index(list('ab'))) tm.assert_numpy_array_equal( result.codes, np.array([0, 0, 1, 1, -1, 0], dtype='int8')) self.assertTrue(result.ordered) # turn me to an Index result = Index(np.array(ci)) self.assertIsInstance(result, Index) self.assertNotIsInstance(result, CategoricalIndex)