def test_categorical_dtype(self): # GH 10153 data = """a,b,c 1,a,3.4 1,a,3.4 2,b,4.5""" expected = pd.DataFrame({'a': Categorical(['1', '1', '2']), 'b': Categorical(['a', 'a', 'b']), 'c': Categorical(['3.4', '3.4', '4.5'])}) actual = self.read_csv(StringIO(data), dtype='category') tm.assert_frame_equal(actual, expected) actual = self.read_csv(StringIO(data), dtype=CategoricalDtype()) tm.assert_frame_equal(actual, expected) actual = self.read_csv(StringIO(data), dtype={'a': 'category', 'b': 'category', 'c': CategoricalDtype()}) tm.assert_frame_equal(actual, expected) actual = self.read_csv(StringIO(data), dtype={'b': 'category'}) expected = pd.DataFrame({'a': [1, 1, 2], 'b': Categorical(['a', 'a', 'b']), 'c': [3.4, 3.4, 4.5]}) tm.assert_frame_equal(actual, expected) actual = self.read_csv(StringIO(data), dtype={1: 'category'}) tm.assert_frame_equal(actual, expected) # unsorted data = """a,b,c 1,b,3.4 1,b,3.4 2,a,4.5""" expected = pd.DataFrame({'a': Categorical(['1', '1', '2']), 'b': Categorical(['b', 'b', 'a']), 'c': Categorical(['3.4', '3.4', '4.5'])}) actual = self.read_csv(StringIO(data), dtype='category') tm.assert_frame_equal(actual, expected) # missing data = """a,b,c 1,b,3.4 1,nan,3.4 2,a,4.5""" expected = pd.DataFrame({'a': Categorical(['1', '1', '2']), 'b': Categorical(['b', np.nan, 'a']), 'c': Categorical(['3.4', '3.4', '4.5'])}) actual = self.read_csv(StringIO(data), dtype='category') tm.assert_frame_equal(actual, expected)
def test_pandas_dtype(): assert pandas_dtype('datetime64[ns, US/Eastern]') == DatetimeTZDtype( 'datetime64[ns, US/Eastern]') assert pandas_dtype('category') == CategoricalDtype() for dtype in ['M8[ns]', 'm8[ns]', 'object', 'float64', 'int64']: assert pandas_dtype(dtype) == np.dtype(dtype)
def test_hash_vs_equality(self): # make sure that we satisfy is semantics dtype = self.dtype dtype2 = CategoricalDtype() self.assertTrue(dtype == dtype2) self.assertTrue(dtype2 == dtype) self.assertTrue(dtype is dtype2) self.assertTrue(dtype2 is dtype) self.assertTrue(hash(dtype) == hash(dtype2))
def test_is_dtype(self): self.assertTrue(CategoricalDtype.is_dtype(self.dtype)) self.assertTrue(CategoricalDtype.is_dtype('category')) self.assertTrue(CategoricalDtype.is_dtype(CategoricalDtype())) self.assertFalse(CategoricalDtype.is_dtype('foo')) self.assertFalse(CategoricalDtype.is_dtype(np.float64))
def test_equality(self): self.assertTrue(is_dtype_equal(self.dtype, 'category')) self.assertTrue(is_dtype_equal(self.dtype, CategoricalDtype())) self.assertFalse(is_dtype_equal(self.dtype, 'foo'))
def setUp(self): self.dtype = CategoricalDtype()
def test_categorical_dtype(self): dtype = CategoricalDtype() self.assertEqual(find_common_type([dtype]), 'category') self.assertEqual(find_common_type([dtype, dtype]), 'category') self.assertEqual(find_common_type([np.object, dtype]), np.object)
def test_categorical_dtype(self): self.assertEqual(pandas_dtype('category'), CategoricalDtype())
def test_as_json_table_type_categorical_dtypes(self): self.assertEqual(as_json_table_type(pd.Categorical), 'any') self.assertEqual(as_json_table_type(CategoricalDtype()), 'any')