def test_construction_with_dtype(self): # specify dtype ci = self.create_index(categories=list('abc')) result = Index(np.array(ci), dtype='category') tm.assert_index_equal(result, ci, exact=True) result = Index(np.array(ci).tolist(), dtype='category') tm.assert_index_equal(result, ci, exact=True) # these are generally only equal when the categories are reordered ci = self.create_index() result = Index(np.array(ci), dtype='category').reorder_categories(ci.categories) tm.assert_index_equal(result, ci, exact=True) # make sure indexes are handled expected = CategoricalIndex([0, 1, 2], categories=[0, 1, 2], ordered=True) idx = Index(range(3)) result = CategoricalIndex(idx, categories=idx, ordered=True) tm.assert_index_equal(result, expected, exact=True)
def test_astype(self): ci = self.create_index() result = ci.astype('category') tm.assert_index_equal(result, ci, exact=True) result = ci.astype(object) self.assert_index_equal(result, Index(np.array(ci))) # this IS equal, but not the same class self.assertTrue(result.equals(ci)) self.assertIsInstance(result, Index) self.assertNotIsInstance(result, CategoricalIndex) # interval ii = IntervalIndex.from_arrays(left=[-0.001, 2.0], right=[2, 4], closed='right') ci = CategoricalIndex( Categorical.from_codes([0, 1, -1], categories=ii, ordered=True)) result = ci.astype('interval') expected = ii.take([0, 1, -1]) tm.assert_index_equal(result, expected) result = IntervalIndex.from_intervals(result.values) tm.assert_index_equal(result, expected)
def test_duplicates(self): idx = CategoricalIndex([0, 0, 0], name='foo') self.assertFalse(idx.is_unique) self.assertTrue(idx.has_duplicates) expected = CategoricalIndex([0], name='foo') self.assert_index_equal(idx.drop_duplicates(), expected)
def test_fillna_categorical(self): # GH 11343 idx = CategoricalIndex([1.0, np.nan, 3.0, 1.0], name='x') # fill by value in categories exp = CategoricalIndex([1.0, 1.0, 3.0, 1.0], name='x') self.assert_index_equal(idx.fillna(1.0), exp) # fill by value not in categories raises ValueError with tm.assertRaisesRegexp(ValueError, 'fill value must be in categories'): idx.fillna(2.0)
def test_ensure_copied_data(self): # Check the "copy" argument of each Index.__new__ is honoured # GH12309 # Must be tested separately from other indexes because # self.value is not an ndarray _base = lambda ar: ar if ar.base is None else ar.base for index in self.indices.values(): result = CategoricalIndex(index.values, copy=True) tm.assert_index_equal(index, result) self.assertIsNot(_base(index.values), _base(result.values)) result = CategoricalIndex(index.values, copy=False) self.assertIs(_base(index.values), _base(result.values))
def test_reindex_dtype(self): c = CategoricalIndex(['a', 'b', 'c', 'a']) res, indexer = c.reindex(['a', 'c']) tm.assert_index_equal(res, Index(['a', 'a', 'c']), exact=True) tm.assert_numpy_array_equal(indexer, np.array([0, 3, 2], dtype=np.int64)) c = CategoricalIndex(['a', 'b', 'c', 'a']) res, indexer = c.reindex(Categorical(['a', 'c'])) exp = CategoricalIndex(['a', 'a', 'c'], categories=['a', 'c']) tm.assert_index_equal(res, exp, exact=True) tm.assert_numpy_array_equal(indexer, np.array([0, 3, 2], dtype=np.int64)) c = CategoricalIndex(['a', 'b', 'c', 'a'], categories=['a', 'b', 'c', 'd']) res, indexer = c.reindex(['a', 'c']) exp = Index(['a', 'a', 'c'], dtype='object') tm.assert_index_equal(res, exp, exact=True) tm.assert_numpy_array_equal(indexer, np.array([0, 3, 2], dtype=np.int64)) c = CategoricalIndex(['a', 'b', 'c', 'a'], categories=['a', 'b', 'c', 'd']) res, indexer = c.reindex(Categorical(['a', 'c'])) exp = CategoricalIndex(['a', 'a', 'c'], categories=['a', 'c']) tm.assert_index_equal(res, exp, exact=True) tm.assert_numpy_array_equal(indexer, np.array([0, 3, 2], dtype=np.int64))
def test_delete(self): ci = self.create_index() categories = ci.categories result = ci.delete(0) expected = CategoricalIndex(list('abbca'), categories=categories) tm.assert_index_equal(result, expected, exact=True) result = ci.delete(-1) expected = CategoricalIndex(list('aabbc'), categories=categories) tm.assert_index_equal(result, expected, exact=True) with tm.assertRaises((IndexError, ValueError)): # either depeidnig on numpy version result = ci.delete(10)
def test_append(self): ci = self.create_index() categories = ci.categories # append cats with the same categories result = ci[:3].append(ci[3:]) tm.assert_index_equal(result, ci, exact=True) foos = [ci[:1], ci[1:3], ci[3:]] result = foos[0].append(foos[1:]) tm.assert_index_equal(result, ci, exact=True) # empty result = ci.append([]) tm.assert_index_equal(result, ci, exact=True) # appending with different categories or reoreded is not ok self.assertRaises( TypeError, lambda: ci.append(ci.values.set_categories(list('abcd')))) self.assertRaises( TypeError, lambda: ci.append(ci.values.reorder_categories(list('abc')))) # with objects result = ci.append(['c', 'a']) expected = CategoricalIndex(list('aabbcaca'), categories=categories) tm.assert_index_equal(result, expected, exact=True) # invalid objects self.assertRaises(TypeError, lambda: ci.append(['a', 'd']))
def test_identical(self): ci1 = CategoricalIndex(['a', 'b'], categories=['a', 'b'], ordered=True) ci2 = CategoricalIndex(['a', 'b'], categories=['a', 'b', 'c'], ordered=True) self.assertTrue(ci1.identical(ci1)) self.assertTrue(ci1.identical(ci1.copy())) self.assertFalse(ci1.identical(ci2))
def test_repr_roundtrip(self): ci = CategoricalIndex(['a', 'b'], categories=['a', 'b'], ordered=True) str(ci) tm.assert_index_equal(eval(repr(ci)), ci, exact=True) # formatting if PY3: str(ci) else: compat.text_type(ci) # long format # this is not reprable ci = CategoricalIndex(np.random.randint(0, 5, size=100)) if PY3: str(ci) else: compat.text_type(ci)
def test_contains(self): ci = self.create_index(categories=list('cabdef')) self.assertTrue('a' in ci) self.assertTrue('z' not in ci) self.assertTrue('e' not in ci) self.assertTrue(np.nan not in ci) # assert codes NOT in index self.assertFalse(0 in ci) self.assertFalse(1 in ci) with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): ci = CategoricalIndex(list('aabbca'), categories=list('cabdef') + [np.nan]) self.assertFalse(np.nan in ci) ci = CategoricalIndex(list('aabbca') + [np.nan], categories=list('cabdef')) self.assertTrue(np.nan in ci)
def test_reindex_dtype(self): res, indexer = CategoricalIndex(['a', 'b', 'c', 'a']).reindex(['a', 'c']) tm.assert_index_equal(res, Index(['a', 'a', 'c']), exact=True) tm.assert_numpy_array_equal(indexer, np.array([0, 3, 2])) res, indexer = CategoricalIndex(['a', 'b', 'c', 'a']).reindex(Categorical(['a', 'c'])) tm.assert_index_equal(res, CategoricalIndex(['a', 'a', 'c'], categories=['a', 'c']), exact=True) tm.assert_numpy_array_equal(indexer, np.array([0, 3, 2])) res, indexer = CategoricalIndex(['a', 'b', 'c', 'a'], categories=['a', 'b', 'c', 'd']).reindex(['a', 'c']) tm.assert_index_equal(res, Index(['a', 'a', 'c'], dtype='object'), exact=True) tm.assert_numpy_array_equal(indexer, np.array([0, 3, 2])) res, indexer = CategoricalIndex(['a', 'b', 'c', 'a'], categories=['a', 'b', 'c', 'd']).reindex( Categorical(['a', 'c'])) tm.assert_index_equal(res, CategoricalIndex(['a', 'a', 'c'], categories=['a', 'c']), exact=True) tm.assert_numpy_array_equal(indexer, np.array([0, 3, 2]))
def test_isin(self): ci = CategoricalIndex(list('aabca') + [np.nan], categories=['c', 'a', 'b']) tm.assert_numpy_array_equal( ci.isin(['c']), np.array([False, False, False, True, False, False])) tm.assert_numpy_array_equal(ci.isin(['c', 'a', 'b']), np.array([True] * 5 + [False])) tm.assert_numpy_array_equal(ci.isin(['c', 'a', 'b', np.nan]), np.array([True] * 6)) # mismatched categorical -> coerced to ndarray so doesn't matter tm.assert_numpy_array_equal( ci.isin(ci.set_categories(list('abcdefghi'))), np.array([True] * 6)) tm.assert_numpy_array_equal(ci.isin(ci.set_categories(list('defghi'))), np.array([False] * 5 + [True]))
def test_insert(self): ci = self.create_index() categories = ci.categories # test 0th element result = ci.insert(0, 'a') expected = CategoricalIndex(list('aaabbca'), categories=categories) tm.assert_index_equal(result, expected, exact=True) # test Nth element that follows Python list behavior result = ci.insert(-1, 'a') expected = CategoricalIndex(list('aabbcaa'), categories=categories) tm.assert_index_equal(result, expected, exact=True) # test empty result = CategoricalIndex(categories=categories).insert(0, 'a') expected = CategoricalIndex(['a'], categories=categories) tm.assert_index_equal(result, expected, exact=True) # invalid self.assertRaises(TypeError, lambda: ci.insert(0, 'd'))
def test_get_indexer(self): idx1 = CategoricalIndex(list('aabcde'), categories=list('edabc')) idx2 = CategoricalIndex(list('abf')) for indexer in [idx2, list('abf'), Index(list('abf'))]: r1 = idx1.get_indexer(idx2) assert_almost_equal(r1, np.array([0, 1, 2, -1], dtype=np.intp)) self.assertRaises(NotImplementedError, lambda: idx2.get_indexer(idx1, method='pad')) self.assertRaises(NotImplementedError, lambda: idx2.get_indexer(idx1, method='backfill')) self.assertRaises(NotImplementedError, lambda: idx2.get_indexer(idx1, method='nearest'))
def test_get_loc(self): # GH 12531 cidx1 = CategoricalIndex(list('abcde'), categories=list('edabc')) idx1 = Index(list('abcde')) self.assertEqual(cidx1.get_loc('a'), idx1.get_loc('a')) self.assertEqual(cidx1.get_loc('e'), idx1.get_loc('e')) for i in [cidx1, idx1]: with tm.assertRaises(KeyError): i.get_loc('NOT-EXIST') # non-unique cidx2 = CategoricalIndex(list('aacded'), categories=list('edabc')) idx2 = Index(list('aacded')) # results in bool array res = cidx2.get_loc('d') self.assert_numpy_array_equal(res, idx2.get_loc('d')) self.assert_numpy_array_equal(res, np.array([False, False, False, True, False, True])) # unique element results in scalar res = cidx2.get_loc('e') self.assertEqual(res, idx2.get_loc('e')) self.assertEqual(res, 4) for i in [cidx2, idx2]: with tm.assertRaises(KeyError): i.get_loc('NOT-EXIST') # non-unique, slicable cidx3 = CategoricalIndex(list('aabbb'), categories=list('abc')) idx3 = Index(list('aabbb')) # results in slice res = cidx3.get_loc('a') self.assertEqual(res, idx3.get_loc('a')) self.assertEqual(res, slice(0, 2, None)) res = cidx3.get_loc('b') self.assertEqual(res, idx3.get_loc('b')) self.assertEqual(res, slice(2, 5, None)) for i in [cidx3, idx3]: with tm.assertRaises(KeyError): i.get_loc('c')
def test_contains(self): ci = self.create_index(categories=list('cabdef')) self.assertTrue('a' in ci) self.assertTrue('z' not in ci) self.assertTrue('e' not in ci) self.assertTrue(np.nan not in ci) # assert codes NOT in index self.assertFalse(0 in ci) self.assertFalse(1 in ci) ci = CategoricalIndex(list('aabbca') + [np.nan], categories=list('cabdef')) self.assertTrue(np.nan in ci)
def test_isin(self): ci = CategoricalIndex( list('aabca') + [np.nan], categories=['c', 'a', 'b']) tm.assert_numpy_array_equal( ci.isin(['c']), np.array([False, False, False, True, False, False])) tm.assert_numpy_array_equal( ci.isin(['c', 'a', 'b']), np.array([True] * 5 + [False])) tm.assert_numpy_array_equal( ci.isin(['c', 'a', 'b', np.nan]), np.array([True] * 6)) # mismatched categorical -> coerced to ndarray so doesn't matter tm.assert_numpy_array_equal( ci.isin(ci.set_categories(list('abcdefghi'))), np.array([True] * 6)) tm.assert_numpy_array_equal( ci.isin(ci.set_categories(list('defghi'))), np.array([False] * 5 + [True]))
def test_get_indexer(self): idx1 = CategoricalIndex(list('aabcde'), categories=list('edabc')) idx2 = CategoricalIndex(list('abf')) for indexer in [idx2, list('abf'), Index(list('abf'))]: r1 = idx1.get_indexer(idx2) assert_almost_equal(r1, np.array([0, 1, 2, -1])) self.assertRaises(NotImplementedError, lambda: idx2.get_indexer(idx1, method='pad')) self.assertRaises(NotImplementedError, lambda: idx2.get_indexer(idx1, method='backfill')) self.assertRaises(NotImplementedError, lambda: idx2.get_indexer(idx1, method='nearest'))
def test_equals(self): ci1 = CategoricalIndex(['a', 'b'], categories=['a', 'b'], ordered=True) ci2 = CategoricalIndex(['a', 'b'], categories=['a', 'b', 'c'], ordered=True) self.assertTrue(ci1.equals(ci1)) self.assertFalse(ci1.equals(ci2)) self.assertTrue(ci1.equals(ci1.astype(object))) self.assertTrue(ci1.astype(object).equals(ci1)) self.assertTrue((ci1 == ci1).all()) self.assertFalse((ci1 != ci1).all()) self.assertFalse((ci1 > ci1).all()) self.assertFalse((ci1 < ci1).all()) self.assertTrue((ci1 <= ci1).all()) self.assertTrue((ci1 >= ci1).all()) self.assertFalse((ci1 == 1).all()) self.assertTrue((ci1 == Index(['a', 'b'])).all()) self.assertTrue((ci1 == ci1.values).all()) # invalid comparisons with tm.assertRaisesRegexp(ValueError, "Lengths must match"): ci1 == Index(['a', 'b', 'c']) self.assertRaises(TypeError, lambda: ci1 == ci2) self.assertRaises( TypeError, lambda: ci1 == Categorical(ci1.values, ordered=False)) self.assertRaises( TypeError, lambda: ci1 == Categorical(ci1.values, categories=list('abc'))) # tests # make sure that we are testing for category inclusion properly self.assertTrue(CategoricalIndex( list('aabca'), categories=['c', 'a', 'b']).equals(list('aabca'))) with tm.assert_produces_warning(FutureWarning, check_stacklevel=False): self.assertTrue(CategoricalIndex( list('aabca'), categories=['c', 'a', 'b', np.nan]).equals(list( 'aabca'))) self.assertFalse(CategoricalIndex( list('aabca') + [np.nan], categories=['c', 'a', 'b']).equals(list( 'aabca'))) self.assertTrue(CategoricalIndex( list('aabca') + [np.nan], categories=['c', 'a', 'b']).equals(list( 'aabca') + [np.nan]))
def test_method_delegation(self): ci = CategoricalIndex(list('aabbca'), categories=list('cabdef')) result = ci.set_categories(list('cab')) tm.assert_index_equal( result, CategoricalIndex(list('aabbca'), categories=list('cab'))) ci = CategoricalIndex(list('aabbca'), categories=list('cab')) result = ci.rename_categories(list('efg')) tm.assert_index_equal( result, CategoricalIndex(list('ffggef'), categories=list('efg'))) ci = CategoricalIndex(list('aabbca'), categories=list('cab')) result = ci.add_categories(['d']) tm.assert_index_equal( result, CategoricalIndex(list('aabbca'), categories=list('cabd'))) ci = CategoricalIndex(list('aabbca'), categories=list('cab')) result = ci.remove_categories(['c']) tm.assert_index_equal( result, CategoricalIndex(list('aabb') + [np.nan] + ['a'], categories=list('ab'))) ci = CategoricalIndex(list('aabbca'), categories=list('cabdef')) result = ci.as_unordered() tm.assert_index_equal(result, ci) ci = CategoricalIndex(list('aabbca'), categories=list('cabdef')) result = ci.as_ordered() tm.assert_index_equal( result, CategoricalIndex(list('aabbca'), categories=list('cabdef'), ordered=True)) # invalid self.assertRaises(ValueError, lambda: ci.set_categories(list('cab'), inplace=True))
def test_equals_categorical(self): ci1 = CategoricalIndex(['a', 'b'], categories=['a', 'b'], ordered=True) ci2 = CategoricalIndex(['a', 'b'], categories=['a', 'b', 'c'], ordered=True) self.assertTrue(ci1.equals(ci1)) self.assertFalse(ci1.equals(ci2)) self.assertTrue(ci1.equals(ci1.astype(object))) self.assertTrue(ci1.astype(object).equals(ci1)) self.assertTrue((ci1 == ci1).all()) self.assertFalse((ci1 != ci1).all()) self.assertFalse((ci1 > ci1).all()) self.assertFalse((ci1 < ci1).all()) self.assertTrue((ci1 <= ci1).all()) self.assertTrue((ci1 >= ci1).all()) self.assertFalse((ci1 == 1).all()) self.assertTrue((ci1 == Index(['a', 'b'])).all()) self.assertTrue((ci1 == ci1.values).all()) # invalid comparisons with tm.assertRaisesRegexp(ValueError, "Lengths must match"): ci1 == Index(['a', 'b', 'c']) self.assertRaises(TypeError, lambda: ci1 == ci2) self.assertRaises( TypeError, lambda: ci1 == Categorical(ci1.values, ordered=False)) self.assertRaises( TypeError, lambda: ci1 == Categorical(ci1.values, categories=list('abc'))) # tests # make sure that we are testing for category inclusion properly ci = CategoricalIndex(list('aabca'), categories=['c', 'a', 'b']) self.assertFalse(ci.equals(list('aabca'))) self.assertFalse(ci.equals(CategoricalIndex(list('aabca')))) self.assertTrue(ci.equals(ci.copy())) ci = CategoricalIndex(list('aabca') + [np.nan], categories=['c', 'a', 'b']) self.assertFalse(ci.equals(list('aabca'))) self.assertFalse(ci.equals(CategoricalIndex(list('aabca')))) self.assertTrue(ci.equals(ci.copy())) ci = CategoricalIndex(list('aabca') + [np.nan], categories=['c', 'a', 'b']) self.assertFalse(ci.equals(list('aabca') + [np.nan])) self.assertFalse(ci.equals(CategoricalIndex(list('aabca') + [np.nan]))) self.assertTrue(ci.equals(ci.copy()))
def test_get_loc(self): # GH 12531 cidx1 = CategoricalIndex(list('abcde'), categories=list('edabc')) idx1 = Index(list('abcde')) self.assertEqual(cidx1.get_loc('a'), idx1.get_loc('a')) self.assertEqual(cidx1.get_loc('e'), idx1.get_loc('e')) for i in [cidx1, idx1]: with tm.assertRaises(KeyError): i.get_loc('NOT-EXIST') # non-unique cidx2 = CategoricalIndex(list('aacded'), categories=list('edabc')) idx2 = Index(list('aacded')) # results in bool array res = cidx2.get_loc('d') self.assert_numpy_array_equal(res, idx2.get_loc('d')) self.assert_numpy_array_equal( res, np.array([False, False, False, True, False, True])) # unique element results in scalar res = cidx2.get_loc('e') self.assertEqual(res, idx2.get_loc('e')) self.assertEqual(res, 4) for i in [cidx2, idx2]: with tm.assertRaises(KeyError): i.get_loc('NOT-EXIST') # non-unique, slicable cidx3 = CategoricalIndex(list('aabbb'), categories=list('abc')) idx3 = Index(list('aabbb')) # results in slice res = cidx3.get_loc('a') self.assertEqual(res, idx3.get_loc('a')) self.assertEqual(res, slice(0, 2, None)) res = cidx3.get_loc('b') self.assertEqual(res, idx3.get_loc('b')) self.assertEqual(res, slice(2, 5, None)) for i in [cidx3, idx3]: with tm.assertRaises(KeyError): i.get_loc('c')
def test_method_delegation(self): ci = CategoricalIndex(list('aabbca'), categories=list('cabdef')) result = ci.set_categories(list('cab')) tm.assert_index_equal(result, CategoricalIndex( list('aabbca'), categories=list('cab'))) ci = CategoricalIndex(list('aabbca'), categories=list('cab')) result = ci.rename_categories(list('efg')) tm.assert_index_equal(result, CategoricalIndex( list('ffggef'), categories=list('efg'))) ci = CategoricalIndex(list('aabbca'), categories=list('cab')) result = ci.add_categories(['d']) tm.assert_index_equal(result, CategoricalIndex( list('aabbca'), categories=list('cabd'))) ci = CategoricalIndex(list('aabbca'), categories=list('cab')) result = ci.remove_categories(['c']) tm.assert_index_equal(result, CategoricalIndex( list('aabb') + [np.nan] + ['a'], categories=list('ab'))) ci = CategoricalIndex(list('aabbca'), categories=list('cabdef')) result = ci.as_unordered() tm.assert_index_equal(result, ci) ci = CategoricalIndex(list('aabbca'), categories=list('cabdef')) result = ci.as_ordered() tm.assert_index_equal(result, CategoricalIndex( list('aabbca'), categories=list('cabdef'), ordered=True)) # invalid self.assertRaises(ValueError, lambda: ci.set_categories( list('cab'), inplace=True))
def test_construction(self): ci = self.create_index(categories=list('abcd')) categories = ci.categories result = Index(ci) tm.assert_index_equal(result, ci, exact=True) self.assertFalse(result.ordered) result = Index(ci.values) tm.assert_index_equal(result, ci, exact=True) self.assertFalse(result.ordered) # empty result = CategoricalIndex(categories=categories) self.assert_index_equal(result.categories, Index(categories)) tm.assert_numpy_array_equal(result.codes, np.array([], dtype='int8')) self.assertFalse(result.ordered) # passing categories result = CategoricalIndex(list('aabbca'), categories=categories) self.assert_index_equal(result.categories, Index(categories)) tm.assert_numpy_array_equal(result.codes, np.array([0, 0, 1, 1, 2, 0], dtype='int8')) c = pd.Categorical(list('aabbca')) result = CategoricalIndex(c) self.assert_index_equal(result.categories, Index(list('abc'))) tm.assert_numpy_array_equal(result.codes, np.array([0, 0, 1, 1, 2, 0], dtype='int8')) self.assertFalse(result.ordered) result = CategoricalIndex(c, categories=categories) self.assert_index_equal(result.categories, Index(categories)) tm.assert_numpy_array_equal(result.codes, np.array([0, 0, 1, 1, 2, 0], dtype='int8')) self.assertFalse(result.ordered) ci = CategoricalIndex(c, categories=list('abcd')) result = CategoricalIndex(ci) self.assert_index_equal(result.categories, Index(categories)) tm.assert_numpy_array_equal(result.codes, np.array([0, 0, 1, 1, 2, 0], dtype='int8')) self.assertFalse(result.ordered) result = CategoricalIndex(ci, categories=list('ab')) self.assert_index_equal(result.categories, Index(list('ab'))) tm.assert_numpy_array_equal( result.codes, np.array([0, 0, 1, 1, -1, 0], dtype='int8')) self.assertFalse(result.ordered) result = CategoricalIndex(ci, categories=list('ab'), ordered=True) self.assert_index_equal(result.categories, Index(list('ab'))) tm.assert_numpy_array_equal( result.codes, np.array([0, 0, 1, 1, -1, 0], dtype='int8')) self.assertTrue(result.ordered) # turn me to an Index result = Index(np.array(ci)) self.assertIsInstance(result, Index) self.assertNotIsInstance(result, CategoricalIndex)
def create_index(self, categories=None, ordered=False): if categories is None: categories = list('cab') return CategoricalIndex(list('aabbca'), categories=categories, ordered=ordered)