示例#1
0
 def test_update_dtype_string(self, ordered):
     dtype = CategoricalDtype(list('abc'), ordered)
     expected_categories = dtype.categories
     expected_ordered = dtype.ordered
     result = dtype.update_dtype('category')
     tm.assert_index_equal(result.categories, expected_categories)
     assert result.ordered is expected_ordered
示例#2
0
    def test_update_dtype(self, ordered, new_categories, new_ordered):
        dtype = CategoricalDtype(list('abc'), ordered)
        new_dtype = CategoricalDtype(new_categories, new_ordered)

        expected_categories = new_dtype.categories
        if expected_categories is None:
            expected_categories = dtype.categories

        expected_ordered = new_dtype.ordered
        if expected_ordered is None:
            expected_ordered = dtype.ordered

        result = dtype.update_dtype(new_dtype)
        tm.assert_index_equal(result.categories, expected_categories)
        assert result.ordered is expected_ordered
示例#3
0
文件: category.py 项目: pydata/pandas
    def __new__(cls, data=None, categories=None, ordered=None, dtype=None,
                copy=False, name=None, fastpath=None):

        if fastpath is not None:
            warnings.warn("The 'fastpath' keyword is deprecated, and will be "
                          "removed in a future version.",
                          FutureWarning, stacklevel=2)
            if fastpath:
                return cls._simple_new(data, name=name, dtype=dtype)

        dtype = CategoricalDtype._from_values_or_dtype(data, categories,
                                                       ordered, dtype)

        if name is None and hasattr(data, 'name'):
            name = data.name

        if not is_categorical_dtype(data):
            # don't allow scalars
            # if data is None, then categories must be provided
            if is_scalar(data):
                if data is not None or categories is None:
                    cls._scalar_data_error(data)
                data = []

        data = cls._create_categorical(data, dtype=dtype)

        data = data.copy() if copy else data

        return cls._simple_new(data, name=name)
示例#4
0
 def test_is_dtype(self):
     self.assertTrue(CategoricalDtype.is_dtype(self.dtype))
     self.assertTrue(CategoricalDtype.is_dtype('category'))
     self.assertTrue(CategoricalDtype.is_dtype(CategoricalDtype()))
     self.assertFalse(CategoricalDtype.is_dtype('foo'))
     self.assertFalse(CategoricalDtype.is_dtype(np.float64))
示例#5
0
 def test_from_values_or_dtype_invalid_dtype(self):
     msg = "Cannot not construct CategoricalDtype from <class 'object'>"
     with pytest.raises(ValueError, match=msg):
         CategoricalDtype._from_values_or_dtype(None, None, None, object)
示例#6
0
 def test_from_values_or_dtype(self, values, categories, ordered, dtype,
                               expected):
     result = CategoricalDtype._from_values_or_dtype(
         values, categories, ordered, dtype)
     assert result == expected
示例#7
0
class TestDataFrameDataTypes(TestData):
    def test_concat_empty_dataframe_dtypes(self):
        df = DataFrame(columns=list("abc"))
        df['a'] = df['a'].astype(np.bool_)
        df['b'] = df['b'].astype(np.int32)
        df['c'] = df['c'].astype(np.float64)

        result = pd.concat([df, df])
        assert result['a'].dtype == np.bool_
        assert result['b'].dtype == np.int32
        assert result['c'].dtype == np.float64

        result = pd.concat([df, df.astype(np.float64)])
        assert result['a'].dtype == np.object_
        assert result['b'].dtype == np.float64
        assert result['c'].dtype == np.float64

    def test_empty_frame_dtypes_ftypes(self):
        empty_df = pd.DataFrame()
        assert_series_equal(empty_df.dtypes, pd.Series(dtype=np.object))
        assert_series_equal(empty_df.ftypes, pd.Series(dtype=np.object))

        nocols_df = pd.DataFrame(index=[1, 2, 3])
        assert_series_equal(nocols_df.dtypes, pd.Series(dtype=np.object))
        assert_series_equal(nocols_df.ftypes, pd.Series(dtype=np.object))

        norows_df = pd.DataFrame(columns=list("abc"))
        assert_series_equal(norows_df.dtypes,
                            pd.Series(np.object, index=list("abc")))
        assert_series_equal(norows_df.ftypes,
                            pd.Series('object:dense', index=list("abc")))

        norows_int_df = pd.DataFrame(columns=list("abc")).astype(np.int32)
        assert_series_equal(norows_int_df.dtypes,
                            pd.Series(np.dtype('int32'), index=list("abc")))
        assert_series_equal(norows_int_df.ftypes,
                            pd.Series('int32:dense', index=list("abc")))

        odict = OrderedDict
        df = pd.DataFrame(odict([('a', 1), ('b', True), ('c', 1.0)]),
                          index=[1, 2, 3])
        ex_dtypes = pd.Series(
            odict([('a', np.int64), ('b', np.bool), ('c', np.float64)]))
        ex_ftypes = pd.Series(
            odict([('a', 'int64:dense'), ('b', 'bool:dense'),
                   ('c', 'float64:dense')]))
        assert_series_equal(df.dtypes, ex_dtypes)
        assert_series_equal(df.ftypes, ex_ftypes)

        # same but for empty slice of df
        assert_series_equal(df[:0].dtypes, ex_dtypes)
        assert_series_equal(df[:0].ftypes, ex_ftypes)

    def test_datetime_with_tz_dtypes(self):
        tzframe = DataFrame({
            'A':
            date_range('20130101', periods=3),
            'B':
            date_range('20130101', periods=3, tz='US/Eastern'),
            'C':
            date_range('20130101', periods=3, tz='CET')
        })
        tzframe.iloc[1, 1] = pd.NaT
        tzframe.iloc[1, 2] = pd.NaT
        result = tzframe.dtypes.sort_index()
        expected = Series([
            np.dtype('datetime64[ns]'),
            DatetimeTZDtype('ns', 'US/Eastern'),
            DatetimeTZDtype('ns', 'CET')
        ], ['A', 'B', 'C'])

        assert_series_equal(result, expected)

    def test_dtypes_are_correct_after_column_slice(self):
        # GH6525
        df = pd.DataFrame(index=range(5), columns=list("abc"), dtype=np.float_)
        odict = OrderedDict
        assert_series_equal(
            df.dtypes,
            pd.Series(
                odict([('a', np.float_), ('b', np.float_), ('c', np.float_)])))
        assert_series_equal(df.iloc[:, 2:].dtypes,
                            pd.Series(odict([('c', np.float_)])))
        assert_series_equal(
            df.dtypes,
            pd.Series(
                odict([('a', np.float_), ('b', np.float_), ('c', np.float_)])))

    def test_select_dtypes_include_using_list_like(self):
        df = DataFrame({
            'a':
            list('abc'),
            'b':
            list(range(1, 4)),
            'c':
            np.arange(3, 6).astype('u1'),
            'd':
            np.arange(4.0, 7.0, dtype='float64'),
            'e': [True, False, True],
            'f':
            pd.Categorical(list('abc')),
            'g':
            pd.date_range('20130101', periods=3),
            'h':
            pd.date_range('20130101', periods=3, tz='US/Eastern'),
            'i':
            pd.date_range('20130101', periods=3, tz='CET'),
            'j':
            pd.period_range('2013-01', periods=3, freq='M'),
            'k':
            pd.timedelta_range('1 day', periods=3)
        })

        ri = df.select_dtypes(include=[np.number])
        ei = df[['b', 'c', 'd', 'k']]
        assert_frame_equal(ri, ei)

        ri = df.select_dtypes(include=[np.number], exclude=['timedelta'])
        ei = df[['b', 'c', 'd']]
        assert_frame_equal(ri, ei)

        ri = df.select_dtypes(include=[np.number, 'category'],
                              exclude=['timedelta'])
        ei = df[['b', 'c', 'd', 'f']]
        assert_frame_equal(ri, ei)

        ri = df.select_dtypes(include=['datetime'])
        ei = df[['g']]
        assert_frame_equal(ri, ei)

        ri = df.select_dtypes(include=['datetime64'])
        ei = df[['g']]
        assert_frame_equal(ri, ei)

        ri = df.select_dtypes(include=['datetimetz'])
        ei = df[['h', 'i']]
        assert_frame_equal(ri, ei)

        with pytest.raises(NotImplementedError, match=r"^$"):
            df.select_dtypes(include=['period'])

    def test_select_dtypes_exclude_using_list_like(self):
        df = DataFrame({
            'a': list('abc'),
            'b': list(range(1, 4)),
            'c': np.arange(3, 6).astype('u1'),
            'd': np.arange(4.0, 7.0, dtype='float64'),
            'e': [True, False, True]
        })
        re = df.select_dtypes(exclude=[np.number])
        ee = df[['a', 'e']]
        assert_frame_equal(re, ee)

    def test_select_dtypes_exclude_include_using_list_like(self):
        df = DataFrame({
            'a': list('abc'),
            'b': list(range(1, 4)),
            'c': np.arange(3, 6).astype('u1'),
            'd': np.arange(4.0, 7.0, dtype='float64'),
            'e': [True, False, True],
            'f': pd.date_range('now', periods=3).values
        })
        exclude = np.datetime64,
        include = np.bool_, 'integer'
        r = df.select_dtypes(include=include, exclude=exclude)
        e = df[['b', 'c', 'e']]
        assert_frame_equal(r, e)

        exclude = 'datetime',
        include = 'bool', 'int64', 'int32'
        r = df.select_dtypes(include=include, exclude=exclude)
        e = df[['b', 'e']]
        assert_frame_equal(r, e)

    def test_select_dtypes_include_using_scalars(self):
        df = DataFrame({
            'a':
            list('abc'),
            'b':
            list(range(1, 4)),
            'c':
            np.arange(3, 6).astype('u1'),
            'd':
            np.arange(4.0, 7.0, dtype='float64'),
            'e': [True, False, True],
            'f':
            pd.Categorical(list('abc')),
            'g':
            pd.date_range('20130101', periods=3),
            'h':
            pd.date_range('20130101', periods=3, tz='US/Eastern'),
            'i':
            pd.date_range('20130101', periods=3, tz='CET'),
            'j':
            pd.period_range('2013-01', periods=3, freq='M'),
            'k':
            pd.timedelta_range('1 day', periods=3)
        })

        ri = df.select_dtypes(include=np.number)
        ei = df[['b', 'c', 'd', 'k']]
        assert_frame_equal(ri, ei)

        ri = df.select_dtypes(include='datetime')
        ei = df[['g']]
        assert_frame_equal(ri, ei)

        ri = df.select_dtypes(include='datetime64')
        ei = df[['g']]
        assert_frame_equal(ri, ei)

        ri = df.select_dtypes(include='category')
        ei = df[['f']]
        assert_frame_equal(ri, ei)

        with pytest.raises(NotImplementedError, match=r"^$"):
            df.select_dtypes(include='period')

    def test_select_dtypes_exclude_using_scalars(self):
        df = DataFrame({
            'a':
            list('abc'),
            'b':
            list(range(1, 4)),
            'c':
            np.arange(3, 6).astype('u1'),
            'd':
            np.arange(4.0, 7.0, dtype='float64'),
            'e': [True, False, True],
            'f':
            pd.Categorical(list('abc')),
            'g':
            pd.date_range('20130101', periods=3),
            'h':
            pd.date_range('20130101', periods=3, tz='US/Eastern'),
            'i':
            pd.date_range('20130101', periods=3, tz='CET'),
            'j':
            pd.period_range('2013-01', periods=3, freq='M'),
            'k':
            pd.timedelta_range('1 day', periods=3)
        })

        ri = df.select_dtypes(exclude=np.number)
        ei = df[['a', 'e', 'f', 'g', 'h', 'i', 'j']]
        assert_frame_equal(ri, ei)

        ri = df.select_dtypes(exclude='category')
        ei = df[['a', 'b', 'c', 'd', 'e', 'g', 'h', 'i', 'j', 'k']]
        assert_frame_equal(ri, ei)

        with pytest.raises(NotImplementedError, match=r"^$"):
            df.select_dtypes(exclude='period')

    def test_select_dtypes_include_exclude_using_scalars(self):
        df = DataFrame({
            'a':
            list('abc'),
            'b':
            list(range(1, 4)),
            'c':
            np.arange(3, 6).astype('u1'),
            'd':
            np.arange(4.0, 7.0, dtype='float64'),
            'e': [True, False, True],
            'f':
            pd.Categorical(list('abc')),
            'g':
            pd.date_range('20130101', periods=3),
            'h':
            pd.date_range('20130101', periods=3, tz='US/Eastern'),
            'i':
            pd.date_range('20130101', periods=3, tz='CET'),
            'j':
            pd.period_range('2013-01', periods=3, freq='M'),
            'k':
            pd.timedelta_range('1 day', periods=3)
        })

        ri = df.select_dtypes(include=np.number, exclude='floating')
        ei = df[['b', 'c', 'k']]
        assert_frame_equal(ri, ei)

    def test_select_dtypes_include_exclude_mixed_scalars_lists(self):
        df = DataFrame({
            'a':
            list('abc'),
            'b':
            list(range(1, 4)),
            'c':
            np.arange(3, 6).astype('u1'),
            'd':
            np.arange(4.0, 7.0, dtype='float64'),
            'e': [True, False, True],
            'f':
            pd.Categorical(list('abc')),
            'g':
            pd.date_range('20130101', periods=3),
            'h':
            pd.date_range('20130101', periods=3, tz='US/Eastern'),
            'i':
            pd.date_range('20130101', periods=3, tz='CET'),
            'j':
            pd.period_range('2013-01', periods=3, freq='M'),
            'k':
            pd.timedelta_range('1 day', periods=3)
        })

        ri = df.select_dtypes(include=np.number,
                              exclude=['floating', 'timedelta'])
        ei = df[['b', 'c']]
        assert_frame_equal(ri, ei)

        ri = df.select_dtypes(include=[np.number, 'category'],
                              exclude='floating')
        ei = df[['b', 'c', 'f', 'k']]
        assert_frame_equal(ri, ei)

    def test_select_dtypes_duplicate_columns(self):
        # GH20839
        odict = OrderedDict
        df = DataFrame(
            odict([('a', list('abc')), ('b', list(range(1, 4))),
                   ('c', np.arange(3, 6).astype('u1')),
                   ('d', np.arange(4.0, 7.0, dtype='float64')),
                   ('e', [True, False, True]),
                   ('f', pd.date_range('now', periods=3).values)]))
        df.columns = ['a', 'a', 'b', 'b', 'b', 'c']

        expected = DataFrame({
            'a': list(range(1, 4)),
            'b': np.arange(3, 6).astype('u1')
        })

        result = df.select_dtypes(include=[np.number], exclude=['floating'])
        assert_frame_equal(result, expected)

    def test_select_dtypes_not_an_attr_but_still_valid_dtype(self):
        df = DataFrame({
            'a': list('abc'),
            'b': list(range(1, 4)),
            'c': np.arange(3, 6).astype('u1'),
            'd': np.arange(4.0, 7.0, dtype='float64'),
            'e': [True, False, True],
            'f': pd.date_range('now', periods=3).values
        })
        df['g'] = df.f.diff()
        assert not hasattr(np, 'u8')
        r = df.select_dtypes(include=['i8', 'O'], exclude=['timedelta'])
        e = df[['a', 'b']]
        assert_frame_equal(r, e)

        r = df.select_dtypes(include=['i8', 'O', 'timedelta64[ns]'])
        e = df[['a', 'b', 'g']]
        assert_frame_equal(r, e)

    def test_select_dtypes_empty(self):
        df = DataFrame({'a': list('abc'), 'b': list(range(1, 4))})
        msg = 'at least one of include or exclude must be nonempty'
        with pytest.raises(ValueError, match=msg):
            df.select_dtypes()

    def test_select_dtypes_bad_datetime64(self):
        df = DataFrame({
            'a': list('abc'),
            'b': list(range(1, 4)),
            'c': np.arange(3, 6).astype('u1'),
            'd': np.arange(4.0, 7.0, dtype='float64'),
            'e': [True, False, True],
            'f': pd.date_range('now', periods=3).values
        })
        with pytest.raises(ValueError, match='.+ is too specific'):
            df.select_dtypes(include=['datetime64[D]'])

        with pytest.raises(ValueError, match='.+ is too specific'):
            df.select_dtypes(exclude=['datetime64[as]'])

    def test_select_dtypes_datetime_with_tz(self):

        df2 = DataFrame(dict(A=Timestamp('20130102', tz='US/Eastern'),
                             B=Timestamp('20130603', tz='CET')),
                        index=range(5))
        df3 = pd.concat([df2.A.to_frame(), df2.B.to_frame()], axis=1)
        result = df3.select_dtypes(include=['datetime64[ns]'])
        expected = df3.reindex(columns=[])
        assert_frame_equal(result, expected)

    @pytest.mark.parametrize(
        "dtype", [str, "str", np.string_, "S1", "unicode", np.unicode_, "U1"])
    @pytest.mark.parametrize("arg", ["include", "exclude"])
    def test_select_dtypes_str_raises(self, dtype, arg):
        df = DataFrame({
            "a": list("abc"),
            "g": list("abc"),
            "b": list(range(1, 4)),
            "c": np.arange(3, 6).astype("u1"),
            "d": np.arange(4.0, 7.0, dtype="float64"),
            "e": [True, False, True],
            "f": pd.date_range("now", periods=3).values
        })
        msg = "string dtypes are not allowed"
        kwargs = {arg: [dtype]}

        with pytest.raises(TypeError, match=msg):
            df.select_dtypes(**kwargs)

    def test_select_dtypes_bad_arg_raises(self):
        df = DataFrame({
            'a': list('abc'),
            'g': list('abc'),
            'b': list(range(1, 4)),
            'c': np.arange(3, 6).astype('u1'),
            'd': np.arange(4.0, 7.0, dtype='float64'),
            'e': [True, False, True],
            'f': pd.date_range('now', periods=3).values
        })

        msg = 'data type.*not understood'
        with pytest.raises(TypeError, match=msg):
            df.select_dtypes(['blargy, blarg, blarg'])

    def test_select_dtypes_typecodes(self):
        # GH 11990
        df = mkdf(30, 3, data_gen_f=lambda x, y: np.random.random())
        expected = df
        FLOAT_TYPES = list(np.typecodes['AllFloat'])
        assert_frame_equal(df.select_dtypes(FLOAT_TYPES), expected)

    def test_dtypes_gh8722(self):
        self.mixed_frame['bool'] = self.mixed_frame['A'] > 0
        result = self.mixed_frame.dtypes
        expected = Series({k: v.dtype
                           for k, v in self.mixed_frame.items()},
                          index=result.index)
        assert_series_equal(result, expected)

        # compat, GH 8722
        with option_context('use_inf_as_na', True):
            df = DataFrame([[1]])
            result = df.dtypes
            assert_series_equal(result, Series({0: np.dtype('int64')}))

    def test_ftypes(self):
        frame = self.mixed_float
        expected = Series(
            dict(A='float32:dense',
                 B='float32:dense',
                 C='float16:dense',
                 D='float64:dense')).sort_values()
        result = frame.ftypes.sort_values()
        assert_series_equal(result, expected)

    def test_astype(self):
        casted = self.frame.astype(int)
        expected = DataFrame(self.frame.values.astype(int),
                             index=self.frame.index,
                             columns=self.frame.columns)
        assert_frame_equal(casted, expected)

        casted = self.frame.astype(np.int32)
        expected = DataFrame(self.frame.values.astype(np.int32),
                             index=self.frame.index,
                             columns=self.frame.columns)
        assert_frame_equal(casted, expected)

        self.frame['foo'] = '5'
        casted = self.frame.astype(int)
        expected = DataFrame(self.frame.values.astype(int),
                             index=self.frame.index,
                             columns=self.frame.columns)
        assert_frame_equal(casted, expected)

        # mixed casting
        def _check_cast(df, v):
            assert (list({s.dtype.name for _, s in df.items()})[0] == v)

        mn = self.all_mixed._get_numeric_data().copy()
        mn['little_float'] = np.array(12345., dtype='float16')
        mn['big_float'] = np.array(123456789101112., dtype='float64')

        casted = mn.astype('float64')
        _check_cast(casted, 'float64')

        casted = mn.astype('int64')
        _check_cast(casted, 'int64')

        casted = self.mixed_float.reindex(columns=['A', 'B']).astype('float32')
        _check_cast(casted, 'float32')

        casted = mn.reindex(columns=['little_float']).astype('float16')
        _check_cast(casted, 'float16')

        casted = self.mixed_float.reindex(columns=['A', 'B']).astype('float16')
        _check_cast(casted, 'float16')

        casted = mn.astype('float32')
        _check_cast(casted, 'float32')

        casted = mn.astype('int32')
        _check_cast(casted, 'int32')

        # to object
        casted = mn.astype('O')
        _check_cast(casted, 'object')

    def test_astype_with_exclude_string(self):
        df = self.frame.copy()
        expected = self.frame.astype(int)
        df['string'] = 'foo'
        casted = df.astype(int, errors='ignore')

        expected['string'] = 'foo'
        assert_frame_equal(casted, expected)

        df = self.frame.copy()
        expected = self.frame.astype(np.int32)
        df['string'] = 'foo'
        casted = df.astype(np.int32, errors='ignore')

        expected['string'] = 'foo'
        assert_frame_equal(casted, expected)

    def test_astype_with_view(self):

        tf = self.mixed_float.reindex(columns=['A', 'B', 'C'])

        casted = tf.astype(np.int64)

        casted = tf.astype(np.float32)

        # this is the only real reason to do it this way
        tf = np.round(self.frame).astype(np.int32)
        casted = tf.astype(np.float32, copy=False)

        # TODO(wesm): verification?
        tf = self.frame.astype(np.float64)
        casted = tf.astype(np.int64, copy=False)  # noqa

    @pytest.mark.parametrize("dtype", [np.int32, np.int64])
    @pytest.mark.parametrize("val", [np.nan, np.inf])
    def test_astype_cast_nan_inf_int(self, val, dtype):
        # see gh-14265
        #
        # Check NaN and inf --> raise error when converting to int.
        msg = "Cannot convert non-finite values \\(NA or inf\\) to integer"
        df = DataFrame([val])

        with pytest.raises(ValueError, match=msg):
            df.astype(dtype)

    def test_astype_str(self):
        # see gh-9757
        a = Series(date_range("2010-01-04", periods=5))
        b = Series(date_range("3/6/2012 00:00", periods=5, tz="US/Eastern"))
        c = Series([Timedelta(x, unit="d") for x in range(5)])
        d = Series(range(5))
        e = Series([0.0, 0.2, 0.4, 0.6, 0.8])

        df = DataFrame({"a": a, "b": b, "c": c, "d": d, "e": e})

        # Datetime-like
        result = df.astype(str)

        expected = DataFrame({
            "a":
            list(map(str, map(lambda x: Timestamp(x)._date_repr, a._values))),
            "b":
            list(map(str, map(Timestamp, b._values))),
            "c":
            list(
                map(
                    str,
                    map(lambda x: Timedelta(x)._repr_base(format="all"),
                        c._values))),
            "d":
            list(map(str, d._values)),
            "e":
            list(map(str, e._values)),
        })

        assert_frame_equal(result, expected)

    def test_astype_str_float(self):
        # see gh-11302
        result = DataFrame([np.NaN]).astype(str)
        expected = DataFrame(["nan"])

        assert_frame_equal(result, expected)
        result = DataFrame([1.12345678901234567890]).astype(str)

        # < 1.14 truncates
        # >= 1.14 preserves the full repr
        val = ("1.12345678901"
               if _np_version_under1p14 else "1.1234567890123457")
        expected = DataFrame([val])
        assert_frame_equal(result, expected)

    @pytest.mark.parametrize("dtype_class", [dict, Series])
    def test_astype_dict_like(self, dtype_class):
        # GH7271 & GH16717
        a = Series(date_range('2010-01-04', periods=5))
        b = Series(range(5))
        c = Series([0.0, 0.2, 0.4, 0.6, 0.8])
        d = Series(['1.0', '2', '3.14', '4', '5.4'])
        df = DataFrame({'a': a, 'b': b, 'c': c, 'd': d})
        original = df.copy(deep=True)

        # change type of a subset of columns
        dt1 = dtype_class({'b': 'str', 'd': 'float32'})
        result = df.astype(dt1)
        expected = DataFrame({
            'a':
            a,
            'b':
            Series(['0', '1', '2', '3', '4']),
            'c':
            c,
            'd':
            Series([1.0, 2.0, 3.14, 4.0, 5.4], dtype='float32')
        })
        assert_frame_equal(result, expected)
        assert_frame_equal(df, original)

        dt2 = dtype_class({'b': np.float32, 'c': 'float32', 'd': np.float64})
        result = df.astype(dt2)
        expected = DataFrame({
            'a':
            a,
            'b':
            Series([0.0, 1.0, 2.0, 3.0, 4.0], dtype='float32'),
            'c':
            Series([0.0, 0.2, 0.4, 0.6, 0.8], dtype='float32'),
            'd':
            Series([1.0, 2.0, 3.14, 4.0, 5.4], dtype='float64')
        })
        assert_frame_equal(result, expected)
        assert_frame_equal(df, original)

        # change all columns
        dt3 = dtype_class({'a': str, 'b': str, 'c': str, 'd': str})
        assert_frame_equal(df.astype(dt3), df.astype(str))
        assert_frame_equal(df, original)

        # error should be raised when using something other than column labels
        # in the keys of the dtype dict
        dt4 = dtype_class({'b': str, 2: str})
        dt5 = dtype_class({'e': str})
        msg = ("Only a column name can be used for the key in a dtype mappings"
               " argument")
        with pytest.raises(KeyError, match=msg):
            df.astype(dt4)
        with pytest.raises(KeyError, match=msg):
            df.astype(dt5)
        assert_frame_equal(df, original)

        # if the dtypes provided are the same as the original dtypes, the
        # resulting DataFrame should be the same as the original DataFrame
        dt6 = dtype_class({col: df[col].dtype for col in df.columns})
        equiv = df.astype(dt6)
        assert_frame_equal(df, equiv)
        assert_frame_equal(df, original)

        # GH 16717
        # if dtypes provided is empty, the resulting DataFrame
        # should be the same as the original DataFrame
        dt7 = dtype_class({})
        result = df.astype(dt7)
        assert_frame_equal(df, equiv)
        assert_frame_equal(df, original)

    def test_astype_duplicate_col(self):
        a1 = Series([1, 2, 3, 4, 5], name='a')
        b = Series([0.1, 0.2, 0.4, 0.6, 0.8], name='b')
        a2 = Series([0, 1, 2, 3, 4], name='a')
        df = concat([a1, b, a2], axis=1)

        result = df.astype(str)
        a1_str = Series(['1', '2', '3', '4', '5'], dtype='str', name='a')
        b_str = Series(['0.1', '0.2', '0.4', '0.6', '0.8'],
                       dtype=str,
                       name='b')
        a2_str = Series(['0', '1', '2', '3', '4'], dtype='str', name='a')
        expected = concat([a1_str, b_str, a2_str], axis=1)
        assert_frame_equal(result, expected)

        result = df.astype({'a': 'str'})
        expected = concat([a1_str, b, a2_str], axis=1)
        assert_frame_equal(result, expected)

    @pytest.mark.parametrize('dtype', [
        'category',
        CategoricalDtype(),
        CategoricalDtype(ordered=True),
        CategoricalDtype(ordered=False),
        CategoricalDtype(categories=list('abcdef')),
        CategoricalDtype(categories=list('edba'), ordered=False),
        CategoricalDtype(categories=list('edcb'), ordered=True)
    ],
                             ids=repr)
    def test_astype_categorical(self, dtype):
        # GH 18099
        d = {'A': list('abbc'), 'B': list('bccd'), 'C': list('cdde')}
        df = DataFrame(d)
        result = df.astype(dtype)
        expected = DataFrame({k: Categorical(d[k], dtype=dtype) for k in d})
        tm.assert_frame_equal(result, expected)

    @pytest.mark.parametrize("cls", [
        pd.api.types.CategoricalDtype, pd.api.types.DatetimeTZDtype,
        pd.api.types.IntervalDtype
    ])
    def test_astype_categoricaldtype_class_raises(self, cls):
        df = DataFrame({"A": ['a', 'a', 'b', 'c']})
        xpr = "Expected an instance of {}".format(cls.__name__)
        with pytest.raises(TypeError, match=xpr):
            df.astype({"A": cls})

        with pytest.raises(TypeError, match=xpr):
            df['A'].astype(cls)

    @pytest.mark.parametrize("dtype", ['Int64', 'Int32', 'Int16'])
    def test_astype_extension_dtypes(self, dtype):
        # GH 22578
        df = pd.DataFrame([[1., 2.], [3., 4.], [5., 6.]], columns=['a', 'b'])

        expected1 = pd.DataFrame({
            'a': integer_array([1, 3, 5], dtype=dtype),
            'b': integer_array([2, 4, 6], dtype=dtype)
        })
        tm.assert_frame_equal(df.astype(dtype), expected1)
        tm.assert_frame_equal(df.astype('int64').astype(dtype), expected1)
        tm.assert_frame_equal(df.astype(dtype).astype('float64'), df)

        df = pd.DataFrame([[1., 2.], [3., 4.], [5., 6.]], columns=['a', 'b'])
        df['b'] = df['b'].astype(dtype)
        expected2 = pd.DataFrame({
            'a': [1., 3., 5.],
            'b': integer_array([2, 4, 6], dtype=dtype)
        })
        tm.assert_frame_equal(df, expected2)

        tm.assert_frame_equal(df.astype(dtype), expected1)
        tm.assert_frame_equal(df.astype('int64').astype(dtype), expected1)

    @pytest.mark.parametrize("dtype", ['Int64', 'Int32', 'Int16'])
    def test_astype_extension_dtypes_1d(self, dtype):
        # GH 22578
        df = pd.DataFrame({'a': [1., 2., 3.]})

        expected1 = pd.DataFrame({'a': integer_array([1, 2, 3], dtype=dtype)})
        tm.assert_frame_equal(df.astype(dtype), expected1)
        tm.assert_frame_equal(df.astype('int64').astype(dtype), expected1)

        df = pd.DataFrame({'a': [1., 2., 3.]})
        df['a'] = df['a'].astype(dtype)
        expected2 = pd.DataFrame({'a': integer_array([1, 2, 3], dtype=dtype)})
        tm.assert_frame_equal(df, expected2)

        tm.assert_frame_equal(df.astype(dtype), expected1)
        tm.assert_frame_equal(df.astype('int64').astype(dtype), expected1)

    @pytest.mark.parametrize("dtype", ['category', 'Int64'])
    def test_astype_extension_dtypes_duplicate_col(self, dtype):
        # GH 24704
        a1 = Series([0, np.nan, 4], name='a')
        a2 = Series([np.nan, 3, 5], name='a')
        df = concat([a1, a2], axis=1)

        result = df.astype(dtype)
        expected = concat([a1.astype(dtype), a2.astype(dtype)], axis=1)
        assert_frame_equal(result, expected)

    @pytest.mark.parametrize('dtype', [{
        100: 'float64',
        200: 'uint64'
    }, 'category', 'float64'])
    def test_astype_column_metadata(self, dtype):
        # GH 19920
        columns = pd.UInt64Index([100, 200, 300], name='foo')
        df = DataFrame(np.arange(15).reshape(5, 3), columns=columns)
        df = df.astype(dtype)
        tm.assert_index_equal(df.columns, columns)

    @pytest.mark.parametrize("dtype", ["M8", "m8"])
    @pytest.mark.parametrize("unit", ['ns', 'us', 'ms', 's', 'h', 'm', 'D'])
    def test_astype_from_datetimelike_to_objectt(self, dtype, unit):
        # tests astype to object dtype
        # gh-19223 / gh-12425
        dtype = "{}[{}]".format(dtype, unit)
        arr = np.array([[1, 2, 3]], dtype=dtype)
        df = DataFrame(arr)
        result = df.astype(object)
        assert (result.dtypes == object).all()

        if dtype.startswith('M8'):
            assert result.iloc[0, 0] == pd.to_datetime(1, unit=unit)
        else:
            assert result.iloc[0, 0] == pd.to_timedelta(1, unit=unit)

    @pytest.mark.parametrize("arr_dtype", [np.int64, np.float64])
    @pytest.mark.parametrize("dtype", ["M8", "m8"])
    @pytest.mark.parametrize("unit", ['ns', 'us', 'ms', 's', 'h', 'm', 'D'])
    def test_astype_to_datetimelike_unit(self, arr_dtype, dtype, unit):
        # tests all units from numeric origination
        # gh-19223 / gh-12425
        dtype = "{}[{}]".format(dtype, unit)
        arr = np.array([[1, 2, 3]], dtype=arr_dtype)
        df = DataFrame(arr)
        result = df.astype(dtype)
        expected = DataFrame(arr.astype(dtype))

        tm.assert_frame_equal(result, expected)

    @pytest.mark.parametrize("unit", ['ns', 'us', 'ms', 's', 'h', 'm', 'D'])
    def test_astype_to_datetime_unit(self, unit):
        # tests all units from datetime origination
        # gh-19223
        dtype = "M8[{}]".format(unit)
        arr = np.array([[1, 2, 3]], dtype=dtype)
        df = DataFrame(arr)
        result = df.astype(dtype)
        expected = DataFrame(arr.astype(dtype))

        tm.assert_frame_equal(result, expected)

    @pytest.mark.parametrize("unit", ['ns'])
    def test_astype_to_timedelta_unit_ns(self, unit):
        # preserver the timedelta conversion
        # gh-19223
        dtype = "m8[{}]".format(unit)
        arr = np.array([[1, 2, 3]], dtype=dtype)
        df = DataFrame(arr)
        result = df.astype(dtype)
        expected = DataFrame(arr.astype(dtype))

        tm.assert_frame_equal(result, expected)

    @pytest.mark.parametrize("unit", ['us', 'ms', 's', 'h', 'm', 'D'])
    def test_astype_to_timedelta_unit(self, unit):
        # coerce to float
        # gh-19223
        dtype = "m8[{}]".format(unit)
        arr = np.array([[1, 2, 3]], dtype=dtype)
        df = DataFrame(arr)
        result = df.astype(dtype)
        expected = DataFrame(df.values.astype(dtype).astype(float))

        tm.assert_frame_equal(result, expected)

    @pytest.mark.parametrize("unit", ['ns', 'us', 'ms', 's', 'h', 'm', 'D'])
    def test_astype_to_incorrect_datetimelike(self, unit):
        # trying to astype a m to a M, or vice-versa
        # gh-19224
        dtype = "M8[{}]".format(unit)
        other = "m8[{}]".format(unit)

        df = DataFrame(np.array([[1, 2, 3]], dtype=dtype))
        msg = (r"cannot astype a datetimelike from \[datetime64\[ns\]\] to"
               r" \[timedelta64\[{}\]\]").format(unit)
        with pytest.raises(TypeError, match=msg):
            df.astype(other)

        msg = (r"cannot astype a timedelta from \[timedelta64\[ns\]\] to"
               r" \[datetime64\[{}\]\]").format(unit)
        df = DataFrame(np.array([[1, 2, 3]], dtype=other))
        with pytest.raises(TypeError, match=msg):
            df.astype(dtype)

    def test_timedeltas(self):
        df = DataFrame(
            dict(A=Series(date_range('2012-1-1', periods=3, freq='D')),
                 B=Series([timedelta(days=i) for i in range(3)])))
        result = df.get_dtype_counts().sort_index()
        expected = Series({
            'datetime64[ns]': 1,
            'timedelta64[ns]': 1
        }).sort_index()
        assert_series_equal(result, expected)

        df['C'] = df['A'] + df['B']
        expected = Series({
            'datetime64[ns]': 2,
            'timedelta64[ns]': 1
        }).sort_values()
        result = df.get_dtype_counts().sort_values()
        assert_series_equal(result, expected)

        # mixed int types
        df['D'] = 1
        expected = Series({
            'datetime64[ns]': 2,
            'timedelta64[ns]': 1,
            'int64': 1
        }).sort_values()
        result = df.get_dtype_counts().sort_values()
        assert_series_equal(result, expected)

    def test_arg_for_errors_in_astype(self):
        # issue #14878

        df = DataFrame([1, 2, 3])

        with pytest.raises(ValueError):
            df.astype(np.float64, errors=True)

        df.astype(np.int8, errors='ignore')

    def test_arg_for_errors_in_astype_dictlist(self):
        # GH-25905
        df = pd.DataFrame([{
            'a': '1',
            'b': '16.5%',
            'c': 'test'
        }, {
            'a': '2.2',
            'b': '15.3',
            'c': 'another_test'
        }])
        expected = pd.DataFrame([{
            'a': 1.0,
            'b': '16.5%',
            'c': 'test'
        }, {
            'a': 2.2,
            'b': '15.3',
            'c': 'another_test'
        }])
        type_dict = {'a': 'float64', 'b': 'float64', 'c': 'object'}

        result = df.astype(dtype=type_dict, errors='ignore')

        tm.assert_frame_equal(result, expected)

    @pytest.mark.parametrize('input_vals', [
        ([1, 2]),
        (['1', '2']),
        (list(pd.date_range('1/1/2011', periods=2, freq='H'))),
        (list(pd.date_range('1/1/2011', periods=2, freq='H',
                            tz='US/Eastern'))),
        ([pd.Interval(left=0, right=5)]),
    ])
    def test_constructor_list_str(self, input_vals, string_dtype):
        # GH 16605
        # Ensure that data elements are converted to strings when
        # dtype is str, 'str', or 'U'

        result = DataFrame({'A': input_vals}, dtype=string_dtype)
        expected = DataFrame({'A': input_vals}).astype({'A': string_dtype})
        assert_frame_equal(result, expected)

    def test_constructor_list_str_na(self, string_dtype):

        result = DataFrame({"A": [1.0, 2.0, None]}, dtype=string_dtype)
        expected = DataFrame({"A": ['1.0', '2.0', None]}, dtype=object)
        assert_frame_equal(result, expected)

    @pytest.mark.parametrize(
        "data, expected",
        [
            # empty
            (DataFrame(), True),
            # multi-same
            (DataFrame({
                "A": [1, 2],
                "B": [1, 2]
            }), True),
            # multi-object
            (DataFrame({
                "A": np.array([1, 2], dtype=object),
                "B": np.array(["a", "b"], dtype=object)
            }), True),
            # multi-extension
            (DataFrame({
                "A": pd.Categorical(['a', 'b']),
                "B": pd.Categorical(['a', 'b'])
            }), True),
            # differ types
            (DataFrame({
                "A": [1, 2],
                "B": [1., 2.]
            }), False),
            # differ sizes
            (DataFrame({
                "A": np.array([1, 2], dtype=np.int32),
                "B": np.array([1, 2], dtype=np.int64)
            }), False),
            # multi-extension differ
            (DataFrame({
                "A": pd.Categorical(['a', 'b']),
                "B": pd.Categorical(['b', 'c'])
            }), False),
        ])
    def test_is_homogeneous_type(self, data, expected):
        assert data._is_homogeneous_type is expected

    def test_asarray_homogenous(self):
        df = pd.DataFrame({
            "A": pd.Categorical([1, 2]),
            "B": pd.Categorical([1, 2])
        })
        result = np.asarray(df)
        # may change from object in the future
        expected = np.array([[1, 1], [2, 2]], dtype='object')
        tm.assert_numpy_array_equal(result, expected)
示例#8
0
 def test_unordered_same(self, ordered):
     c1 = CategoricalDtype(['a', 'b'], ordered=ordered)
     c2 = CategoricalDtype(['b', 'a'], ordered=ordered)
     assert hash(c1) == hash(c2)
示例#9
0
 def test_from_categorical_dtype_categories(self):
     c1 = Categorical([1, 2], categories=[1, 2, 3], ordered=True)
     # override categories
     result = CategoricalDtype._from_categorical_dtype(
         c1, categories=[2, 3])
     assert result == CategoricalDtype([2, 3], ordered=True)
示例#10
0
 def test_hash_vs_equality(self):
     dtype = self.dtype
     dtype2 = CategoricalDtype()
     assert dtype == dtype2
     assert dtype2 == dtype
     assert hash(dtype) == hash(dtype2)
示例#11
0
 def test_same_categories_different_order(self):
     c1 = CategoricalDtype(['a', 'b'], ordered=True)
     c2 = CategoricalDtype(['b', 'a'], ordered=True)
     assert c1 is not c2
示例#12
0
 def test_non_unique_invalid(self):
     with pytest.raises(ValueError):
         CategoricalDtype([1, 2, 1])
示例#13
0
 def test_nan_invalid(self):
     with pytest.raises(ValueError):
         CategoricalDtype([1, 2, np.nan])
示例#14
0
 def test_order_hashes_different(self, v1, v2):
     c1 = CategoricalDtype(v1, ordered=False)
     c2 = CategoricalDtype(v2, ordered=True)
     c3 = CategoricalDtype(v1, ordered=None)
     assert c1 is not c2
     assert c1 is not c3
示例#15
0
 def test_equal_but_different(self, ordered):
     c1 = CategoricalDtype([1, 2, 3])
     c2 = CategoricalDtype([1., 2., 3.])
     assert c1 is not c2
     assert c1 != c2
示例#16
0
 def test_categories(self):
     result = CategoricalDtype(['a', 'b', 'c'])
     tm.assert_index_equal(result.categories, pd.Index(['a', 'b', 'c']))
     assert result.ordered is None
示例#17
0
 def test_from_values_or_dtype_raises(self, values, categories,
                                      ordered, dtype):
     msg = "Cannot specify `categories` or `ordered` together with `dtype`."
     with pytest.raises(ValueError, match=msg):
         CategoricalDtype._from_values_or_dtype(values, categories,
                                                ordered, dtype)
示例#18
0
 def test_categorical_equality_strings(self, categories, ordered, other):
     c1 = CategoricalDtype(categories, ordered)
     result = c1 == other
     expected = other == 'category'
     assert result is expected
示例#19
0
 def test_update_dtype_errors(self, bad_dtype):
     dtype = CategoricalDtype(list('abc'), False)
     msg = 'a CategoricalDtype must be passed to perform an update, '
     with tm.assert_raises_regex(ValueError, msg):
         dtype._update_dtype(bad_dtype)
示例#20
0
    def test_invalid_raises(self):
        with pytest.raises(TypeError, match='ordered'):
            CategoricalDtype(['a', 'b'], ordered='foo')

        with pytest.raises(TypeError, match="'categories' must be list-like"):
            CategoricalDtype('category')
示例#21
0
 def test_construction_from_string(self):
     result = CategoricalDtype.construct_from_string('category')
     assert is_dtype_equal(self.dtype, result)
     pytest.raises(
         TypeError, lambda: CategoricalDtype.construct_from_string('foo'))
示例#22
0
 def test_mixed(self):
     a = CategoricalDtype(['a', 'b', 1, 2])
     b = CategoricalDtype(['a', 'b', '1', '2'])
     assert hash(a) != hash(b)
 def test_set_dtype_nans(self):
     c = Categorical(["a", "b", np.nan])
     result = c._set_dtype(CategoricalDtype(["a", "c"]))
     tm.assert_numpy_array_equal(result.codes, np.array([0, -1, -1], dtype="int8"))
示例#24
0
 def test_from_categorical_dtype_identity(self):
     c1 = Categorical([1, 2], categories=[1, 2, 3], ordered=True)
     # Identity test for no changes
     c2 = CategoricalDtype._from_categorical_dtype(c1)
     assert c2 is c1
示例#25
0
@pytest.mark.parametrize(
    "dtype", [CategoricalDtype, IntervalDtype, DatetimeTZDtype, PeriodDtype])
def test_registry(dtype):
    assert dtype in registry.dtypes


@pytest.mark.parametrize(
    "dtype, expected",
    [
        ("int64", None),
        ("interval", IntervalDtype()),
        ("interval[int64]", IntervalDtype()),
        ("interval[datetime64[ns]]", IntervalDtype("datetime64[ns]")),
        ("period[D]", PeriodDtype("D")),
        ("category", CategoricalDtype()),
        ("datetime64[ns, US/Eastern]", DatetimeTZDtype("ns", "US/Eastern")),
    ],
)
def test_registry_find(dtype, expected):
    assert registry.find(dtype) == expected


@pytest.mark.parametrize(
    "dtype, expected",
    [
        (str, False),
        (int, False),
        (bool, True),
        (np.bool, True),
        (np.array(["a", "b"]), False),
示例#26
0
 def test_from_categorical_dtype_categories(self):
     c1 = Categorical([1, 2], categories=[1, 2, 3], ordered=True)
     # override categories
     result = CategoricalDtype._from_categorical_dtype(c1,
                                                       categories=[2, 3])
     assert result == CategoricalDtype([2, 3], ordered=True)
示例#27
0
 def test_from_values_or_dtype_raises(self, values, categories, ordered,
                                      dtype):
     msg = "Cannot specify `categories` or `ordered` together with `dtype`."
     with pytest.raises(ValueError, match=msg):
         CategoricalDtype._from_values_or_dtype(values, categories, ordered,
                                                dtype)
示例#28
0
 def test_from_categorical_dtype_ordered(self):
     c1 = Categorical([1, 2], categories=[1, 2, 3], ordered=True)
     # override ordered
     result = CategoricalDtype._from_categorical_dtype(c1, ordered=False)
     assert result == CategoricalDtype([1, 2, 3], ordered=False)
示例#29
0
 def test_is_dtype(self, dtype):
     assert CategoricalDtype.is_dtype(dtype)
     assert CategoricalDtype.is_dtype("category")
     assert CategoricalDtype.is_dtype(CategoricalDtype())
     assert not CategoricalDtype.is_dtype("foo")
     assert not CategoricalDtype.is_dtype(np.float64)
示例#30
0
 def test_str_vs_repr(self, ordered):
     c1 = CategoricalDtype(['a', 'b'], ordered=ordered)
     assert str(c1) == 'category'
     # Py2 will have unicode prefixes
     pat = r"CategoricalDtype\(categories=\[.*\], ordered={ordered}\)"
     assert re.match(pat.format(ordered=ordered), repr(c1))
示例#31
0
 def test_construction_from_string(self):
     result = CategoricalDtype.construct_from_string('category')
     assert is_dtype_equal(self.dtype, result)
     msg = "cannot construct a CategoricalDtype"
     with pytest.raises(TypeError, match=msg):
         CategoricalDtype.construct_from_string('foo')
示例#32
0
 def test_equality(self):
     assert is_dtype_equal(self.dtype, 'category')
     assert is_dtype_equal(self.dtype, CategoricalDtype())
     assert not is_dtype_equal(self.dtype, 'foo')
示例#33
0
 def test_from_values_or_dtype(
         self, values, categories, ordered, dtype, expected):
     result = CategoricalDtype._from_values_or_dtype(values, categories,
                                                     ordered, dtype)
     assert result == expected
示例#34
0
 def test_categorical_categories(self):
     # GH17884
     c1 = CategoricalDtype(Categorical(['a', 'b']))
     tm.assert_index_equal(c1.categories, pd.Index(['a', 'b']))
     c1 = CategoricalDtype(CategoricalIndex(['a', 'b']))
     tm.assert_index_equal(c1.categories, pd.Index(['a', 'b']))
示例#35
0
 def test_update_dtype_errors(self, bad_dtype):
     dtype = CategoricalDtype(list('abc'), False)
     msg = 'a CategoricalDtype must be passed to perform an update, '
     with pytest.raises(ValueError, match=msg):
         dtype.update_dtype(bad_dtype)
示例#36
0
 def test_update_dtype_errors(self, bad_dtype):
     dtype = CategoricalDtype(list('abc'), False)
     msg = 'a CategoricalDtype must be passed to perform an update, '
     with pytest.raises(ValueError, match=msg):
         dtype.update_dtype(bad_dtype)
示例#37
0
 def test_construction_from_string(self):
     result = CategoricalDtype.construct_from_string('category')
     assert is_dtype_equal(self.dtype, result)
     msg = "cannot construct a CategoricalDtype"
     with pytest.raises(TypeError, match=msg):
         CategoricalDtype.construct_from_string('foo')
示例#38
0
 def test_constructor_invalid(self):
     msg = "Parameter 'categories' must be list-like"
     with pytest.raises(TypeError, match=msg):
         CategoricalDtype("category")
示例#39
0
 def test_from_categorical_dtype_identity(self):
     c1 = Categorical([1, 2], categories=[1, 2, 3], ordered=True)
     # Identity test for no changes
     c2 = CategoricalDtype._from_categorical_dtype(c1)
     assert c2 is c1
示例#40
0
    assert not com.is_offsetlike(np.array([pd.DateOffset(), pd.Timestamp(0)]))


@pytest.mark.parametrize('input_param,result', [
    (int, np.dtype(int)),
    ('int32', np.dtype('int32')),
    (float, np.dtype(float)),
    ('float64', np.dtype('float64')),
    (np.dtype('float64'), np.dtype('float64')),
    (str, np.dtype(str)),
    (pd.Series([1, 2], dtype=np.dtype('int16')), np.dtype('int16')),
    (pd.Series(['a', 'b']), np.dtype(object)),
    (pd.Index([1, 2]), np.dtype('int64')),
    (pd.Index(['a', 'b']), np.dtype(object)),
    ('category', 'category'),
    (pd.Categorical(['a', 'b']).dtype, CategoricalDtype(['a', 'b'])),
    (pd.Categorical(['a', 'b']), CategoricalDtype(['a', 'b'])),
    (pd.CategoricalIndex(['a', 'b']).dtype, CategoricalDtype(['a', 'b'])),
    (pd.CategoricalIndex(['a', 'b']), CategoricalDtype(['a', 'b'])),
    (CategoricalDtype(), CategoricalDtype()),
    (CategoricalDtype(['a', 'b']), CategoricalDtype()),
    (pd.DatetimeIndex([1, 2]), np.dtype('=M8[ns]')),
    (pd.DatetimeIndex([1, 2]).dtype, np.dtype('=M8[ns]')),
    ('<M8[ns]', np.dtype('<M8[ns]')),
    ('datetime64[ns, Europe/London]', DatetimeTZDtype('ns', 'Europe/London')),
    (pd.SparseSeries([1, 2], dtype='int32'), SparseDtype('int32')),
    (pd.SparseSeries([1, 2], dtype='int32').dtype, SparseDtype('int32')),
    (PeriodDtype(freq='D'), PeriodDtype(freq='D')),
    ('period[D]', PeriodDtype(freq='D')),
    (IntervalDtype(), IntervalDtype()),
])
示例#41
0
 def test_from_categorical_dtype_both(self):
     c1 = Categorical([1, 2], categories=[1, 2, 3], ordered=True)
     # override ordered
     result = CategoricalDtype._from_categorical_dtype(
         c1, categories=[1, 2], ordered=False)
     assert result == CategoricalDtype([1, 2], ordered=False)
示例#42
0
 def test_categorical_dtype(self):
     assert com.pandas_dtype('category') == CategoricalDtype()
示例#43
0
 def test_is_dtype(self):
     assert CategoricalDtype.is_dtype(self.dtype)
     assert CategoricalDtype.is_dtype('category')
     assert CategoricalDtype.is_dtype(CategoricalDtype())
     assert not CategoricalDtype.is_dtype('foo')
     assert not CategoricalDtype.is_dtype(np.float64)
示例#44
0
 def test_basic(self, categories, ordered):
     c1 = CategoricalDtype(categories, ordered=ordered)
     tm.assert_index_equal(c1.categories, pd.Index(categories))
     assert c1.ordered is ordered