Exemplo n.º 1
0
    def test_construction_with_dtype(self):

        # specify dtype
        ci = self.create_index(categories=list('abc'))

        result = Index(np.array(ci), dtype='category')
        tm.assert_index_equal(result, ci, exact=True)

        result = Index(np.array(ci).tolist(), dtype='category')
        tm.assert_index_equal(result, ci, exact=True)

        # these are generally only equal when the categories are reordered
        ci = self.create_index()

        result = Index(np.array(ci),
                       dtype='category').reorder_categories(ci.categories)
        tm.assert_index_equal(result, ci, exact=True)

        # make sure indexes are handled
        expected = CategoricalIndex([0, 1, 2],
                                    categories=[0, 1, 2],
                                    ordered=True)
        idx = Index(range(3))
        result = CategoricalIndex(idx, categories=idx, ordered=True)
        tm.assert_index_equal(result, expected, exact=True)
Exemplo n.º 2
0
    def test_astype(self):

        ci = self.create_index()
        result = ci.astype('category')
        tm.assert_index_equal(result, ci, exact=True)

        result = ci.astype(object)
        self.assert_index_equal(result, Index(np.array(ci)))

        # this IS equal, but not the same class
        self.assertTrue(result.equals(ci))
        self.assertIsInstance(result, Index)
        self.assertNotIsInstance(result, CategoricalIndex)

        # interval
        ii = IntervalIndex.from_arrays(left=[-0.001, 2.0],
                                       right=[2, 4],
                                       closed='right')

        ci = CategoricalIndex(
            Categorical.from_codes([0, 1, -1], categories=ii, ordered=True))

        result = ci.astype('interval')
        expected = ii.take([0, 1, -1])
        tm.assert_index_equal(result, expected)

        result = IntervalIndex.from_intervals(result.values)
        tm.assert_index_equal(result, expected)
Exemplo n.º 3
0
    def test_duplicates(self):

        idx = CategoricalIndex([0, 0, 0], name='foo')
        self.assertFalse(idx.is_unique)
        self.assertTrue(idx.has_duplicates)

        expected = CategoricalIndex([0], name='foo')
        self.assert_index_equal(idx.drop_duplicates(), expected)
Exemplo n.º 4
0
    def test_duplicates(self):

        idx = CategoricalIndex([0, 0, 0], name='foo')
        self.assertFalse(idx.is_unique)
        self.assertTrue(idx.has_duplicates)

        expected = CategoricalIndex([0], name='foo')
        self.assert_index_equal(idx.drop_duplicates(), expected)
Exemplo n.º 5
0
    def test_fillna_categorical(self):
        # GH 11343
        idx = CategoricalIndex([1.0, np.nan, 3.0, 1.0], name='x')
        # fill by value in categories
        exp = CategoricalIndex([1.0, 1.0, 3.0, 1.0], name='x')
        self.assert_index_equal(idx.fillna(1.0), exp)

        # fill by value not in categories raises ValueError
        with tm.assertRaisesRegexp(ValueError,
                                   'fill value must be in categories'):
            idx.fillna(2.0)
Exemplo n.º 6
0
    def test_ensure_copied_data(self):
        # Check the "copy" argument of each Index.__new__ is honoured
        # GH12309
        # Must be tested separately from other indexes because
        # self.value is not an ndarray
        _base = lambda ar: ar if ar.base is None else ar.base
        for index in self.indices.values():
            result = CategoricalIndex(index.values, copy=True)
            tm.assert_index_equal(index, result)
            self.assertIsNot(_base(index.values), _base(result.values))

            result = CategoricalIndex(index.values, copy=False)
            self.assertIs(_base(index.values), _base(result.values))
Exemplo n.º 7
0
    def test_reindex_dtype(self):
        c = CategoricalIndex(['a', 'b', 'c', 'a'])
        res, indexer = c.reindex(['a', 'c'])
        tm.assert_index_equal(res, Index(['a', 'a', 'c']), exact=True)
        tm.assert_numpy_array_equal(indexer,
                                    np.array([0, 3, 2], dtype=np.int64))

        c = CategoricalIndex(['a', 'b', 'c', 'a'])
        res, indexer = c.reindex(Categorical(['a', 'c']))

        exp = CategoricalIndex(['a', 'a', 'c'], categories=['a', 'c'])
        tm.assert_index_equal(res, exp, exact=True)
        tm.assert_numpy_array_equal(indexer,
                                    np.array([0, 3, 2], dtype=np.int64))

        c = CategoricalIndex(['a', 'b', 'c', 'a'],
                             categories=['a', 'b', 'c', 'd'])
        res, indexer = c.reindex(['a', 'c'])
        exp = Index(['a', 'a', 'c'], dtype='object')
        tm.assert_index_equal(res, exp, exact=True)
        tm.assert_numpy_array_equal(indexer,
                                    np.array([0, 3, 2], dtype=np.int64))

        c = CategoricalIndex(['a', 'b', 'c', 'a'],
                             categories=['a', 'b', 'c', 'd'])
        res, indexer = c.reindex(Categorical(['a', 'c']))
        exp = CategoricalIndex(['a', 'a', 'c'], categories=['a', 'c'])
        tm.assert_index_equal(res, exp, exact=True)
        tm.assert_numpy_array_equal(indexer,
                                    np.array([0, 3, 2], dtype=np.int64))
Exemplo n.º 8
0
    def test_delete(self):

        ci = self.create_index()
        categories = ci.categories

        result = ci.delete(0)
        expected = CategoricalIndex(list('abbca'), categories=categories)
        tm.assert_index_equal(result, expected, exact=True)

        result = ci.delete(-1)
        expected = CategoricalIndex(list('aabbc'), categories=categories)
        tm.assert_index_equal(result, expected, exact=True)

        with tm.assertRaises((IndexError, ValueError)):
            # either depeidnig on numpy version
            result = ci.delete(10)
Exemplo n.º 9
0
    def test_append(self):

        ci = self.create_index()
        categories = ci.categories

        # append cats with the same categories
        result = ci[:3].append(ci[3:])
        tm.assert_index_equal(result, ci, exact=True)

        foos = [ci[:1], ci[1:3], ci[3:]]
        result = foos[0].append(foos[1:])
        tm.assert_index_equal(result, ci, exact=True)

        # empty
        result = ci.append([])
        tm.assert_index_equal(result, ci, exact=True)

        # appending with different categories or reoreded is not ok
        self.assertRaises(
            TypeError,
            lambda: ci.append(ci.values.set_categories(list('abcd'))))
        self.assertRaises(
            TypeError,
            lambda: ci.append(ci.values.reorder_categories(list('abc'))))

        # with objects
        result = ci.append(['c', 'a'])
        expected = CategoricalIndex(list('aabbcaca'), categories=categories)
        tm.assert_index_equal(result, expected, exact=True)

        # invalid objects
        self.assertRaises(TypeError, lambda: ci.append(['a', 'd']))
Exemplo n.º 10
0
    def test_identical(self):

        ci1 = CategoricalIndex(['a', 'b'], categories=['a', 'b'], ordered=True)
        ci2 = CategoricalIndex(['a', 'b'], categories=['a', 'b', 'c'],
                               ordered=True)
        self.assertTrue(ci1.identical(ci1))
        self.assertTrue(ci1.identical(ci1.copy()))
        self.assertFalse(ci1.identical(ci2))
Exemplo n.º 11
0
    def test_repr_roundtrip(self):

        ci = CategoricalIndex(['a', 'b'], categories=['a', 'b'], ordered=True)
        str(ci)
        tm.assert_index_equal(eval(repr(ci)), ci, exact=True)

        # formatting
        if PY3:
            str(ci)
        else:
            compat.text_type(ci)

        # long format
        # this is not reprable
        ci = CategoricalIndex(np.random.randint(0, 5, size=100))
        if PY3:
            str(ci)
        else:
            compat.text_type(ci)
Exemplo n.º 12
0
    def test_contains(self):

        ci = self.create_index(categories=list('cabdef'))

        self.assertTrue('a' in ci)
        self.assertTrue('z' not in ci)
        self.assertTrue('e' not in ci)
        self.assertTrue(np.nan not in ci)

        # assert codes NOT in index
        self.assertFalse(0 in ci)
        self.assertFalse(1 in ci)

        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
            ci = CategoricalIndex(list('aabbca'),
                                  categories=list('cabdef') + [np.nan])
        self.assertFalse(np.nan in ci)

        ci = CategoricalIndex(list('aabbca') + [np.nan],
                              categories=list('cabdef'))
        self.assertTrue(np.nan in ci)
Exemplo n.º 13
0
    def test_reindex_dtype(self):
        res, indexer = CategoricalIndex(['a', 'b', 'c',
                                         'a']).reindex(['a', 'c'])
        tm.assert_index_equal(res, Index(['a', 'a', 'c']), exact=True)
        tm.assert_numpy_array_equal(indexer, np.array([0, 3, 2]))

        res, indexer = CategoricalIndex(['a', 'b', 'c',
                                         'a']).reindex(Categorical(['a', 'c']))
        tm.assert_index_equal(res,
                              CategoricalIndex(['a', 'a', 'c'],
                                               categories=['a', 'c']),
                              exact=True)
        tm.assert_numpy_array_equal(indexer, np.array([0, 3, 2]))

        res, indexer = CategoricalIndex(['a', 'b', 'c', 'a'],
                                        categories=['a', 'b', 'c',
                                                    'd']).reindex(['a', 'c'])
        tm.assert_index_equal(res,
                              Index(['a', 'a', 'c'], dtype='object'),
                              exact=True)
        tm.assert_numpy_array_equal(indexer, np.array([0, 3, 2]))

        res, indexer = CategoricalIndex(['a', 'b', 'c', 'a'],
                                        categories=['a', 'b', 'c',
                                                    'd']).reindex(
                                                        Categorical(['a',
                                                                     'c']))
        tm.assert_index_equal(res,
                              CategoricalIndex(['a', 'a', 'c'],
                                               categories=['a', 'c']),
                              exact=True)
        tm.assert_numpy_array_equal(indexer, np.array([0, 3, 2]))
Exemplo n.º 14
0
    def test_isin(self):

        ci = CategoricalIndex(list('aabca') + [np.nan],
                              categories=['c', 'a', 'b'])
        tm.assert_numpy_array_equal(
            ci.isin(['c']), np.array([False, False, False, True, False,
                                      False]))
        tm.assert_numpy_array_equal(ci.isin(['c', 'a', 'b']),
                                    np.array([True] * 5 + [False]))
        tm.assert_numpy_array_equal(ci.isin(['c', 'a', 'b', np.nan]),
                                    np.array([True] * 6))

        # mismatched categorical -> coerced to ndarray so doesn't matter
        tm.assert_numpy_array_equal(
            ci.isin(ci.set_categories(list('abcdefghi'))),
            np.array([True] * 6))
        tm.assert_numpy_array_equal(ci.isin(ci.set_categories(list('defghi'))),
                                    np.array([False] * 5 + [True]))
Exemplo n.º 15
0
    def test_insert(self):

        ci = self.create_index()
        categories = ci.categories

        # test 0th element
        result = ci.insert(0, 'a')
        expected = CategoricalIndex(list('aaabbca'), categories=categories)
        tm.assert_index_equal(result, expected, exact=True)

        # test Nth element that follows Python list behavior
        result = ci.insert(-1, 'a')
        expected = CategoricalIndex(list('aabbcaa'), categories=categories)
        tm.assert_index_equal(result, expected, exact=True)

        # test empty
        result = CategoricalIndex(categories=categories).insert(0, 'a')
        expected = CategoricalIndex(['a'], categories=categories)
        tm.assert_index_equal(result, expected, exact=True)

        # invalid
        self.assertRaises(TypeError, lambda: ci.insert(0, 'd'))
Exemplo n.º 16
0
    def test_get_indexer(self):

        idx1 = CategoricalIndex(list('aabcde'), categories=list('edabc'))
        idx2 = CategoricalIndex(list('abf'))

        for indexer in [idx2, list('abf'), Index(list('abf'))]:
            r1 = idx1.get_indexer(idx2)
            assert_almost_equal(r1, np.array([0, 1, 2, -1], dtype=np.intp))

        self.assertRaises(NotImplementedError,
                          lambda: idx2.get_indexer(idx1, method='pad'))
        self.assertRaises(NotImplementedError,
                          lambda: idx2.get_indexer(idx1, method='backfill'))
        self.assertRaises(NotImplementedError,
                          lambda: idx2.get_indexer(idx1, method='nearest'))
Exemplo n.º 17
0
    def test_get_loc(self):
        # GH 12531
        cidx1 = CategoricalIndex(list('abcde'), categories=list('edabc'))
        idx1 = Index(list('abcde'))
        self.assertEqual(cidx1.get_loc('a'), idx1.get_loc('a'))
        self.assertEqual(cidx1.get_loc('e'), idx1.get_loc('e'))

        for i in [cidx1, idx1]:
            with tm.assertRaises(KeyError):
                i.get_loc('NOT-EXIST')

        # non-unique
        cidx2 = CategoricalIndex(list('aacded'), categories=list('edabc'))
        idx2 = Index(list('aacded'))

        # results in bool array
        res = cidx2.get_loc('d')
        self.assert_numpy_array_equal(res, idx2.get_loc('d'))
        self.assert_numpy_array_equal(res, np.array([False, False, False,
                                                     True, False, True]))
        # unique element results in scalar
        res = cidx2.get_loc('e')
        self.assertEqual(res, idx2.get_loc('e'))
        self.assertEqual(res, 4)

        for i in [cidx2, idx2]:
            with tm.assertRaises(KeyError):
                i.get_loc('NOT-EXIST')

        # non-unique, slicable
        cidx3 = CategoricalIndex(list('aabbb'), categories=list('abc'))
        idx3 = Index(list('aabbb'))

        # results in slice
        res = cidx3.get_loc('a')
        self.assertEqual(res, idx3.get_loc('a'))
        self.assertEqual(res, slice(0, 2, None))

        res = cidx3.get_loc('b')
        self.assertEqual(res, idx3.get_loc('b'))
        self.assertEqual(res, slice(2, 5, None))

        for i in [cidx3, idx3]:
            with tm.assertRaises(KeyError):
                i.get_loc('c')
Exemplo n.º 18
0
    def test_contains(self):

        ci = self.create_index(categories=list('cabdef'))

        self.assertTrue('a' in ci)
        self.assertTrue('z' not in ci)
        self.assertTrue('e' not in ci)
        self.assertTrue(np.nan not in ci)

        # assert codes NOT in index
        self.assertFalse(0 in ci)
        self.assertFalse(1 in ci)

        ci = CategoricalIndex(list('aabbca') + [np.nan],
                              categories=list('cabdef'))
        self.assertTrue(np.nan in ci)
Exemplo n.º 19
0
    def test_fillna_categorical(self):
        # GH 11343
        idx = CategoricalIndex([1.0, np.nan, 3.0, 1.0], name='x')
        # fill by value in categories
        exp = CategoricalIndex([1.0, 1.0, 3.0, 1.0], name='x')
        self.assert_index_equal(idx.fillna(1.0), exp)

        # fill by value not in categories raises ValueError
        with tm.assertRaisesRegexp(ValueError,
                                   'fill value must be in categories'):
            idx.fillna(2.0)
Exemplo n.º 20
0
    def test_isin(self):

        ci = CategoricalIndex(
            list('aabca') + [np.nan], categories=['c', 'a', 'b'])
        tm.assert_numpy_array_equal(
            ci.isin(['c']),
            np.array([False, False, False, True, False, False]))
        tm.assert_numpy_array_equal(
            ci.isin(['c', 'a', 'b']), np.array([True] * 5 + [False]))
        tm.assert_numpy_array_equal(
            ci.isin(['c', 'a', 'b', np.nan]), np.array([True] * 6))

        # mismatched categorical -> coerced to ndarray so doesn't matter
        tm.assert_numpy_array_equal(
            ci.isin(ci.set_categories(list('abcdefghi'))), np.array([True] *
                                                                    6))
        tm.assert_numpy_array_equal(
            ci.isin(ci.set_categories(list('defghi'))),
            np.array([False] * 5 + [True]))
Exemplo n.º 21
0
    def test_get_indexer(self):

        idx1 = CategoricalIndex(list('aabcde'), categories=list('edabc'))
        idx2 = CategoricalIndex(list('abf'))

        for indexer in [idx2, list('abf'), Index(list('abf'))]:
            r1 = idx1.get_indexer(idx2)
            assert_almost_equal(r1, np.array([0, 1, 2, -1]))

        self.assertRaises(NotImplementedError,
                          lambda: idx2.get_indexer(idx1, method='pad'))
        self.assertRaises(NotImplementedError,
                          lambda: idx2.get_indexer(idx1, method='backfill'))
        self.assertRaises(NotImplementedError,
                          lambda: idx2.get_indexer(idx1, method='nearest'))
Exemplo n.º 22
0
    def test_equals(self):

        ci1 = CategoricalIndex(['a', 'b'], categories=['a', 'b'], ordered=True)
        ci2 = CategoricalIndex(['a', 'b'], categories=['a', 'b', 'c'],
                               ordered=True)

        self.assertTrue(ci1.equals(ci1))
        self.assertFalse(ci1.equals(ci2))
        self.assertTrue(ci1.equals(ci1.astype(object)))
        self.assertTrue(ci1.astype(object).equals(ci1))

        self.assertTrue((ci1 == ci1).all())
        self.assertFalse((ci1 != ci1).all())
        self.assertFalse((ci1 > ci1).all())
        self.assertFalse((ci1 < ci1).all())
        self.assertTrue((ci1 <= ci1).all())
        self.assertTrue((ci1 >= ci1).all())

        self.assertFalse((ci1 == 1).all())
        self.assertTrue((ci1 == Index(['a', 'b'])).all())
        self.assertTrue((ci1 == ci1.values).all())

        # invalid comparisons
        with tm.assertRaisesRegexp(ValueError, "Lengths must match"):
            ci1 == Index(['a', 'b', 'c'])
        self.assertRaises(TypeError, lambda: ci1 == ci2)
        self.assertRaises(
            TypeError, lambda: ci1 == Categorical(ci1.values, ordered=False))
        self.assertRaises(
            TypeError,
            lambda: ci1 == Categorical(ci1.values, categories=list('abc')))

        # tests
        # make sure that we are testing for category inclusion properly
        self.assertTrue(CategoricalIndex(
            list('aabca'), categories=['c', 'a', 'b']).equals(list('aabca')))
        with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
            self.assertTrue(CategoricalIndex(
                list('aabca'), categories=['c', 'a', 'b', np.nan]).equals(list(
                    'aabca')))

        self.assertFalse(CategoricalIndex(
            list('aabca') + [np.nan], categories=['c', 'a', 'b']).equals(list(
                'aabca')))
        self.assertTrue(CategoricalIndex(
            list('aabca') + [np.nan], categories=['c', 'a', 'b']).equals(list(
                'aabca') + [np.nan]))
Exemplo n.º 23
0
    def test_method_delegation(self):

        ci = CategoricalIndex(list('aabbca'), categories=list('cabdef'))
        result = ci.set_categories(list('cab'))
        tm.assert_index_equal(
            result, CategoricalIndex(list('aabbca'), categories=list('cab')))

        ci = CategoricalIndex(list('aabbca'), categories=list('cab'))
        result = ci.rename_categories(list('efg'))
        tm.assert_index_equal(
            result, CategoricalIndex(list('ffggef'), categories=list('efg')))

        ci = CategoricalIndex(list('aabbca'), categories=list('cab'))
        result = ci.add_categories(['d'])
        tm.assert_index_equal(
            result, CategoricalIndex(list('aabbca'), categories=list('cabd')))

        ci = CategoricalIndex(list('aabbca'), categories=list('cab'))
        result = ci.remove_categories(['c'])
        tm.assert_index_equal(
            result,
            CategoricalIndex(list('aabb') + [np.nan] + ['a'],
                             categories=list('ab')))

        ci = CategoricalIndex(list('aabbca'), categories=list('cabdef'))
        result = ci.as_unordered()
        tm.assert_index_equal(result, ci)

        ci = CategoricalIndex(list('aabbca'), categories=list('cabdef'))
        result = ci.as_ordered()
        tm.assert_index_equal(
            result,
            CategoricalIndex(list('aabbca'),
                             categories=list('cabdef'),
                             ordered=True))

        # invalid
        self.assertRaises(ValueError,
                          lambda: ci.set_categories(list('cab'), inplace=True))
Exemplo n.º 24
0
    def test_equals_categorical(self):
        ci1 = CategoricalIndex(['a', 'b'], categories=['a', 'b'], ordered=True)
        ci2 = CategoricalIndex(['a', 'b'],
                               categories=['a', 'b', 'c'],
                               ordered=True)

        self.assertTrue(ci1.equals(ci1))
        self.assertFalse(ci1.equals(ci2))
        self.assertTrue(ci1.equals(ci1.astype(object)))
        self.assertTrue(ci1.astype(object).equals(ci1))

        self.assertTrue((ci1 == ci1).all())
        self.assertFalse((ci1 != ci1).all())
        self.assertFalse((ci1 > ci1).all())
        self.assertFalse((ci1 < ci1).all())
        self.assertTrue((ci1 <= ci1).all())
        self.assertTrue((ci1 >= ci1).all())

        self.assertFalse((ci1 == 1).all())
        self.assertTrue((ci1 == Index(['a', 'b'])).all())
        self.assertTrue((ci1 == ci1.values).all())

        # invalid comparisons
        with tm.assertRaisesRegexp(ValueError, "Lengths must match"):
            ci1 == Index(['a', 'b', 'c'])
        self.assertRaises(TypeError, lambda: ci1 == ci2)
        self.assertRaises(
            TypeError, lambda: ci1 == Categorical(ci1.values, ordered=False))
        self.assertRaises(
            TypeError,
            lambda: ci1 == Categorical(ci1.values, categories=list('abc')))

        # tests
        # make sure that we are testing for category inclusion properly
        ci = CategoricalIndex(list('aabca'), categories=['c', 'a', 'b'])
        self.assertFalse(ci.equals(list('aabca')))
        self.assertFalse(ci.equals(CategoricalIndex(list('aabca'))))
        self.assertTrue(ci.equals(ci.copy()))

        ci = CategoricalIndex(list('aabca') + [np.nan],
                              categories=['c', 'a', 'b'])
        self.assertFalse(ci.equals(list('aabca')))
        self.assertFalse(ci.equals(CategoricalIndex(list('aabca'))))
        self.assertTrue(ci.equals(ci.copy()))

        ci = CategoricalIndex(list('aabca') + [np.nan],
                              categories=['c', 'a', 'b'])
        self.assertFalse(ci.equals(list('aabca') + [np.nan]))
        self.assertFalse(ci.equals(CategoricalIndex(list('aabca') + [np.nan])))
        self.assertTrue(ci.equals(ci.copy()))
Exemplo n.º 25
0
    def test_get_loc(self):
        # GH 12531
        cidx1 = CategoricalIndex(list('abcde'), categories=list('edabc'))
        idx1 = Index(list('abcde'))
        self.assertEqual(cidx1.get_loc('a'), idx1.get_loc('a'))
        self.assertEqual(cidx1.get_loc('e'), idx1.get_loc('e'))

        for i in [cidx1, idx1]:
            with tm.assertRaises(KeyError):
                i.get_loc('NOT-EXIST')

        # non-unique
        cidx2 = CategoricalIndex(list('aacded'), categories=list('edabc'))
        idx2 = Index(list('aacded'))

        # results in bool array
        res = cidx2.get_loc('d')
        self.assert_numpy_array_equal(res, idx2.get_loc('d'))
        self.assert_numpy_array_equal(
            res, np.array([False, False, False, True, False, True]))
        # unique element results in scalar
        res = cidx2.get_loc('e')
        self.assertEqual(res, idx2.get_loc('e'))
        self.assertEqual(res, 4)

        for i in [cidx2, idx2]:
            with tm.assertRaises(KeyError):
                i.get_loc('NOT-EXIST')

        # non-unique, slicable
        cidx3 = CategoricalIndex(list('aabbb'), categories=list('abc'))
        idx3 = Index(list('aabbb'))

        # results in slice
        res = cidx3.get_loc('a')
        self.assertEqual(res, idx3.get_loc('a'))
        self.assertEqual(res, slice(0, 2, None))

        res = cidx3.get_loc('b')
        self.assertEqual(res, idx3.get_loc('b'))
        self.assertEqual(res, slice(2, 5, None))

        for i in [cidx3, idx3]:
            with tm.assertRaises(KeyError):
                i.get_loc('c')
Exemplo n.º 26
0
    def test_method_delegation(self):

        ci = CategoricalIndex(list('aabbca'), categories=list('cabdef'))
        result = ci.set_categories(list('cab'))
        tm.assert_index_equal(result, CategoricalIndex(
            list('aabbca'), categories=list('cab')))

        ci = CategoricalIndex(list('aabbca'), categories=list('cab'))
        result = ci.rename_categories(list('efg'))
        tm.assert_index_equal(result, CategoricalIndex(
            list('ffggef'), categories=list('efg')))

        ci = CategoricalIndex(list('aabbca'), categories=list('cab'))
        result = ci.add_categories(['d'])
        tm.assert_index_equal(result, CategoricalIndex(
            list('aabbca'), categories=list('cabd')))

        ci = CategoricalIndex(list('aabbca'), categories=list('cab'))
        result = ci.remove_categories(['c'])
        tm.assert_index_equal(result, CategoricalIndex(
            list('aabb') + [np.nan] + ['a'], categories=list('ab')))

        ci = CategoricalIndex(list('aabbca'), categories=list('cabdef'))
        result = ci.as_unordered()
        tm.assert_index_equal(result, ci)

        ci = CategoricalIndex(list('aabbca'), categories=list('cabdef'))
        result = ci.as_ordered()
        tm.assert_index_equal(result, CategoricalIndex(
            list('aabbca'), categories=list('cabdef'), ordered=True))

        # invalid
        self.assertRaises(ValueError, lambda: ci.set_categories(
            list('cab'), inplace=True))
Exemplo n.º 27
0
    def test_reindex_dtype(self):
        c = CategoricalIndex(['a', 'b', 'c', 'a'])
        res, indexer = c.reindex(['a', 'c'])
        tm.assert_index_equal(res, Index(['a', 'a', 'c']), exact=True)
        tm.assert_numpy_array_equal(indexer, np.array([0, 3, 2],
                                                      dtype=np.int64))

        c = CategoricalIndex(['a', 'b', 'c', 'a'])
        res, indexer = c.reindex(Categorical(['a', 'c']))

        exp = CategoricalIndex(['a', 'a', 'c'], categories=['a', 'c'])
        tm.assert_index_equal(res, exp, exact=True)
        tm.assert_numpy_array_equal(indexer, np.array([0, 3, 2],
                                                      dtype=np.int64))

        c = CategoricalIndex(['a', 'b', 'c', 'a'],
                             categories=['a', 'b', 'c', 'd'])
        res, indexer = c.reindex(['a', 'c'])
        exp = Index(['a', 'a', 'c'], dtype='object')
        tm.assert_index_equal(res, exp, exact=True)
        tm.assert_numpy_array_equal(indexer, np.array([0, 3, 2],
                                                      dtype=np.int64))

        c = CategoricalIndex(['a', 'b', 'c', 'a'],
                             categories=['a', 'b', 'c', 'd'])
        res, indexer = c.reindex(Categorical(['a', 'c']))
        exp = CategoricalIndex(['a', 'a', 'c'], categories=['a', 'c'])
        tm.assert_index_equal(res, exp, exact=True)
        tm.assert_numpy_array_equal(indexer, np.array([0, 3, 2],
                                                      dtype=np.int64))
Exemplo n.º 28
0
    def test_construction(self):

        ci = self.create_index(categories=list('abcd'))
        categories = ci.categories

        result = Index(ci)
        tm.assert_index_equal(result, ci, exact=True)
        self.assertFalse(result.ordered)

        result = Index(ci.values)
        tm.assert_index_equal(result, ci, exact=True)
        self.assertFalse(result.ordered)

        # empty
        result = CategoricalIndex(categories=categories)
        self.assert_index_equal(result.categories, Index(categories))
        tm.assert_numpy_array_equal(result.codes, np.array([], dtype='int8'))
        self.assertFalse(result.ordered)

        # passing categories
        result = CategoricalIndex(list('aabbca'), categories=categories)
        self.assert_index_equal(result.categories, Index(categories))
        tm.assert_numpy_array_equal(result.codes,
                                    np.array([0, 0, 1, 1, 2, 0], dtype='int8'))

        c = pd.Categorical(list('aabbca'))
        result = CategoricalIndex(c)
        self.assert_index_equal(result.categories, Index(list('abc')))
        tm.assert_numpy_array_equal(result.codes,
                                    np.array([0, 0, 1, 1, 2, 0], dtype='int8'))
        self.assertFalse(result.ordered)

        result = CategoricalIndex(c, categories=categories)
        self.assert_index_equal(result.categories, Index(categories))
        tm.assert_numpy_array_equal(result.codes,
                                    np.array([0, 0, 1, 1, 2, 0], dtype='int8'))
        self.assertFalse(result.ordered)

        ci = CategoricalIndex(c, categories=list('abcd'))
        result = CategoricalIndex(ci)
        self.assert_index_equal(result.categories, Index(categories))
        tm.assert_numpy_array_equal(result.codes,
                                    np.array([0, 0, 1, 1, 2, 0], dtype='int8'))
        self.assertFalse(result.ordered)

        result = CategoricalIndex(ci, categories=list('ab'))
        self.assert_index_equal(result.categories, Index(list('ab')))
        tm.assert_numpy_array_equal(
            result.codes, np.array([0, 0, 1, 1, -1, 0], dtype='int8'))
        self.assertFalse(result.ordered)

        result = CategoricalIndex(ci, categories=list('ab'), ordered=True)
        self.assert_index_equal(result.categories, Index(list('ab')))
        tm.assert_numpy_array_equal(
            result.codes, np.array([0, 0, 1, 1, -1, 0], dtype='int8'))
        self.assertTrue(result.ordered)

        # turn me to an Index
        result = Index(np.array(ci))
        self.assertIsInstance(result, Index)
        self.assertNotIsInstance(result, CategoricalIndex)
Exemplo n.º 29
0
 def create_index(self, categories=None, ordered=False):
     if categories is None:
         categories = list('cab')
     return CategoricalIndex(list('aabbca'),
                             categories=categories,
                             ordered=ordered)