示例#1
0
 def test_datetimetz_dtype(self):
     for dtype in ['datetime64[ns, US/Eastern]',
                   'datetime64[ns, Asia/Tokyo]',
                   'datetime64[ns, UTC]']:
         self.assertIs(pandas_dtype(dtype), DatetimeTZDtype(dtype))
         self.assertEqual(pandas_dtype(dtype), DatetimeTZDtype(dtype))
         self.assertEqual(pandas_dtype(dtype), dtype)
示例#2
0
def test_pandas_dtype():

    assert pandas_dtype('datetime64[ns, US/Eastern]') == DatetimeTZDtype(
        'datetime64[ns, US/Eastern]')
    assert pandas_dtype('category') == CategoricalDtype()
    for dtype in ['M8[ns]', 'm8[ns]', 'object', 'float64', 'int64']:
        assert pandas_dtype(dtype) == np.dtype(dtype)
示例#3
0
 def test_period_dtype(self):
     for dtype in [
             'period[D]', 'period[3M]', 'period[U]', 'Period[D]',
             'Period[3M]', 'Period[U]'
     ]:
         self.assertIs(pandas_dtype(dtype), PeriodDtype(dtype))
         self.assertEqual(pandas_dtype(dtype), PeriodDtype(dtype))
         self.assertEqual(pandas_dtype(dtype), dtype)
示例#4
0
文件: period.py 项目: zebartol/pandas
    def __new__(cls, data=None, ordinal=None, freq=None, start=None, end=None,
                periods=None, copy=False, name=None, tz=None, dtype=None,
                **kwargs):

        if periods is not None:
            if is_float(periods):
                periods = int(periods)
            elif not is_integer(periods):
                raise ValueError('Periods must be a number, got %s' %
                                 str(periods))

        if name is None and hasattr(data, 'name'):
            name = data.name

        if dtype is not None:
            dtype = pandas_dtype(dtype)
            if not is_period_dtype(dtype):
                raise ValueError('dtype must be PeriodDtype')
            if freq is None:
                freq = dtype.freq
            elif freq != dtype.freq:
                msg = 'specified freq and dtype are different'
                raise IncompatibleFrequency(msg)

        if data is None:
            if ordinal is not None:
                data = np.asarray(ordinal, dtype=np.int64)
            else:
                data, freq = cls._generate_range(start, end, periods,
                                                 freq, kwargs)
        else:
            ordinal, freq = cls._from_arraylike(data, freq, tz)
            data = np.array(ordinal, dtype=np.int64, copy=copy)

        return cls._simple_new(data, name=name, freq=freq)
示例#5
0
    def __new__(cls, data=None, ordinal=None, freq=None, start=None, end=None,
                periods=None, copy=False, name=None, tz=None, dtype=None,
                **kwargs):

        if periods is not None:
            if is_float(periods):
                periods = int(periods)
            elif not is_integer(periods):
                raise ValueError('Periods must be a number, got %s' %
                                 str(periods))

        if name is None and hasattr(data, 'name'):
            name = data.name

        if dtype is not None:
            dtype = pandas_dtype(dtype)
            if not is_period_dtype(dtype):
                raise ValueError('dtype must be PeriodDtype')
            if freq is None:
                freq = dtype.freq
            elif freq != dtype.freq:
                msg = 'specified freq and dtype are different'
                raise IncompatibleFrequency(msg)

        if data is None:
            if ordinal is not None:
                data = np.asarray(ordinal, dtype=np.int64)
            else:
                data, freq = cls._generate_range(start, end, periods,
                                                 freq, kwargs)
        else:
            ordinal, freq = cls._from_arraylike(data, freq, tz)
            data = np.array(ordinal, dtype=np.int64, copy=copy)

        return cls._simple_new(data, name=name, freq=freq)
示例#6
0
    def __new__(cls, subtype=None):
        """
        Parameters
        ----------
        subtype : the dtype of the Interval
        """

        if isinstance(subtype, IntervalDtype):
            return subtype
        elif subtype is None or (isinstance(subtype, compat.string_types)
                                 and subtype == 'interval'):
            subtype = None
        else:
            if isinstance(subtype, compat.string_types):
                m = cls._match.search(subtype)
                if m is not None:
                    subtype = m.group('subtype')

            from pandas.types.common import pandas_dtype
            try:
                subtype = pandas_dtype(subtype)
            except TypeError:
                raise ValueError("could not construct IntervalDtype")

        try:
            return cls._cache[str(subtype)]
        except KeyError:
            u = object.__new__(cls)
            u.subtype = subtype
            cls._cache[str(subtype)] = u
            return u
示例#7
0
def test_dtype_equal_strict():

    # we are strict on kind equality
    for dtype in [np.int8, np.int16, np.int32]:
        assert not is_dtype_equal(np.int64, dtype)

    for dtype in [np.float32]:
        assert not is_dtype_equal(np.float64, dtype)

    # strict w.r.t. PeriodDtype
    assert not is_dtype_equal(PeriodDtype('D'),
                              PeriodDtype('2D'))

    # strict w.r.t. datetime64
    assert not is_dtype_equal(
        pandas_dtype('datetime64[ns, US/Eastern]'),
        pandas_dtype('datetime64[ns, CET]'))
示例#8
0
def unconvert(values, dtype, compress=None):

    as_is_ext = isinstance(values, ExtType) and values.code == 0

    if as_is_ext:
        values = values.data

    if is_categorical_dtype(dtype):
        return values

    elif is_object_dtype(dtype):
        return np.array(values, dtype=object)

    dtype = pandas_dtype(dtype).base

    if not as_is_ext:
        values = values.encode('latin1')

    if compress:
        if compress == u'zlib':
            _check_zlib()
            decompress = zlib.decompress
        elif compress == u'blosc':
            _check_blosc()
            decompress = blosc.decompress
        else:
            raise ValueError("compress must be one of 'zlib' or 'blosc'")

        try:
            return np.frombuffer(
                _move_into_mutable_buffer(decompress(values)),
                dtype=dtype,
            )
        except _BadMove as e:
            # Pull the decompressed data off of the `_BadMove` exception.
            # We don't just store this in the locals because we want to
            # minimize the risk of giving users access to a `bytes` object
            # whose data is also given to a mutable buffer.
            values = e.args[0]
            if len(values) > 1:
                # The empty string and single characters are memoized in many
                # string creating functions in the capi. This case should not
                # warn even though we need to make a copy because we are only
                # copying at most 1 byte.
                warnings.warn(
                    'copying data after decompressing; this may mean that'
                    ' decompress is caching its result',
                    PerformanceWarning,
                )
                # fall through to copying `np.fromstring`

    # Copy the string into a numpy array.
    return np.fromstring(values, dtype=dtype)
示例#9
0
def unconvert(values, dtype, compress=None):

    as_is_ext = isinstance(values, ExtType) and values.code == 0

    if as_is_ext:
        values = values.data

    if is_categorical_dtype(dtype):
        return values

    elif is_object_dtype(dtype):
        return np.array(values, dtype=object)

    dtype = pandas_dtype(dtype).base

    if not as_is_ext:
        values = values.encode('latin1')

    if compress:
        if compress == u'zlib':
            _check_zlib()
            decompress = zlib.decompress
        elif compress == u'blosc':
            _check_blosc()
            decompress = blosc.decompress
        else:
            raise ValueError("compress must be one of 'zlib' or 'blosc'")

        try:
            return np.frombuffer(
                _move_into_mutable_buffer(decompress(values)),
                dtype=dtype,
            )
        except _BadMove as e:
            # Pull the decompressed data off of the `_BadMove` exception.
            # We don't just store this in the locals because we want to
            # minimize the risk of giving users access to a `bytes` object
            # whose data is also given to a mutable buffer.
            values = e.args[0]
            if len(values) > 1:
                # The empty string and single characters are memoized in many
                # string creating functions in the capi. This case should not
                # warn even though we need to make a copy because we are only
                # copying at most 1 byte.
                warnings.warn(
                    'copying data after decompressing; this may mean that'
                    ' decompress is caching its result',
                    PerformanceWarning,
                )
                # fall through to copying `np.fromstring`

    # Copy the string into a numpy array.
    return np.fromstring(values, dtype=dtype)
示例#10
0
文件: period.py 项目: smoofra/pandas
 def astype(self, dtype, copy=True, how='start'):
     dtype = pandas_dtype(dtype)
     if is_object_dtype(dtype):
         return self.asobject
     elif is_integer_dtype(dtype):
         return Index(self.values.astype('i8', copy=copy), name=self.name,
                      dtype='i8')
     elif is_datetime64_dtype(dtype):
         return self.to_timestamp(how=how)
     elif is_datetime64tz_dtype(dtype):
         return self.to_timestamp(how=how).tz_localize(dtype.tz)
     elif is_period_dtype(dtype):
         return self.asfreq(freq=dtype.freq)
     raise ValueError('Cannot cast PeriodIndex to dtype %s' % dtype)
示例#11
0
文件: numeric.py 项目: orivej/pandas
 def astype(self, dtype, copy=True):
     dtype = pandas_dtype(dtype)
     if is_float_dtype(dtype):
         values = self._values.astype(dtype, copy=copy)
     elif is_integer_dtype(dtype):
         if self.hasnans:
             raise ValueError('cannot convert float NaN to integer')
         values = self._values.astype(dtype, copy=copy)
     elif is_object_dtype(dtype):
         values = self._values.astype('object', copy=copy)
     else:
         raise TypeError('Setting %s dtype to anything other than '
                         'float64 or object is not supported' %
                         self.__class__)
     return Index(values, name=self.name, dtype=dtype)
示例#12
0
 def astype(self, dtype, copy=True):
     dtype = pandas_dtype(dtype)
     if is_float_dtype(dtype):
         values = self._values.astype(dtype, copy=copy)
     elif is_integer_dtype(dtype):
         if self.hasnans:
             raise ValueError('cannot convert float NaN to integer')
         values = self._values.astype(dtype, copy=copy)
     elif is_object_dtype(dtype):
         values = self._values.astype('object', copy=copy)
     else:
         raise TypeError('Setting %s dtype to anything other than '
                         'float64 or object is not supported' %
                         self.__class__)
     return Index(values, name=self.name, dtype=dtype)
示例#13
0
 def astype(self, dtype, copy=True, how='start'):
     dtype = pandas_dtype(dtype)
     if is_object_dtype(dtype):
         return self.asobject
     elif is_integer_dtype(dtype):
         if copy:
             return self._int64index.copy()
         else:
             return self._int64index
     elif is_datetime64_dtype(dtype):
         return self.to_timestamp(how=how)
     elif is_datetime64tz_dtype(dtype):
         return self.to_timestamp(how=how).tz_localize(dtype.tz)
     elif is_period_dtype(dtype):
         return self.asfreq(freq=dtype.freq)
     raise ValueError('Cannot cast PeriodIndex to dtype %s' % dtype)
示例#14
0
 def astype(self, dtype, copy=True, how='start'):
     dtype = pandas_dtype(dtype)
     if is_object_dtype(dtype):
         return self.asobject
     elif is_integer_dtype(dtype):
         if copy:
             return self._int64index.copy()
         else:
             return self._int64index
     elif is_datetime64_dtype(dtype):
         return self.to_timestamp(how=how)
     elif is_datetime64tz_dtype(dtype):
         return self.to_timestamp(how=how).tz_localize(dtype.tz)
     elif is_period_dtype(dtype):
         return self.asfreq(freq=dtype.freq)
     raise ValueError('Cannot cast PeriodIndex to dtype %s' % dtype)
示例#15
0
 def test_numpy_dtype(self):
     for dtype in ['M8[ns]', 'm8[ns]', 'object', 'float64', 'int64']:
         self.assertEqual(pandas_dtype(dtype), np.dtype(dtype))
示例#16
0
def decode(obj):
    """
    Decoder for deserializing numpy data types.
    """

    typ = obj.get(u'typ')
    if typ is None:
        return obj
    elif typ == u'timestamp':
        freq = obj[u'freq'] if 'freq' in obj else obj[u'offset']
        return Timestamp(obj[u'value'], tz=obj[u'tz'], freq=freq)
    elif typ == u'nat':
        return NaT
    elif typ == u'period':
        return Period(ordinal=obj[u'ordinal'], freq=obj[u'freq'])
    elif typ == u'index':
        dtype = dtype_for(obj[u'dtype'])
        data = unconvert(obj[u'data'], dtype, obj.get(u'compress'))
        return globals()[obj[u'klass']](data, dtype=dtype, name=obj[u'name'])
    elif typ == u'range_index':
        return globals()[obj[u'klass']](obj[u'start'],
                                        obj[u'stop'],
                                        obj[u'step'],
                                        name=obj[u'name'])
    elif typ == u'multi_index':
        dtype = dtype_for(obj[u'dtype'])
        data = unconvert(obj[u'data'], dtype, obj.get(u'compress'))
        data = [tuple(x) for x in data]
        return globals()[obj[u'klass']].from_tuples(data, names=obj[u'names'])
    elif typ == u'period_index':
        data = unconvert(obj[u'data'], np.int64, obj.get(u'compress'))
        d = dict(name=obj[u'name'], freq=obj[u'freq'])
        if _is_pandas_legacy_version:
            # legacy
            return globals()[obj[u'klass']](data, **d)
        else:
            return globals()[obj[u'klass']]._from_ordinals(data, **d)
    elif typ == u'datetime_index':
        data = unconvert(obj[u'data'], np.int64, obj.get(u'compress'))
        d = dict(name=obj[u'name'], freq=obj[u'freq'], verify_integrity=False)
        result = globals()[obj[u'klass']](data, **d)
        tz = obj[u'tz']

        # reverse tz conversion
        if tz is not None:
            result = result.tz_localize('UTC').tz_convert(tz)
        return result

    elif typ == u'category':
        from_codes = globals()[obj[u'klass']].from_codes
        return from_codes(codes=obj[u'codes'],
                          categories=obj[u'categories'],
                          ordered=obj[u'ordered'])

    elif typ == u'series':
        dtype = dtype_for(obj[u'dtype'])
        pd_dtype = pandas_dtype(dtype)

        index = obj[u'index']
        result = globals()[obj[u'klass']](unconvert(obj[u'data'], dtype,
                                                    obj[u'compress']),
                                          index=index,
                                          dtype=pd_dtype,
                                          name=obj[u'name'])
        return result

    elif typ == u'block_manager':
        axes = obj[u'axes']

        def create_block(b):
            values = _safe_reshape(
                unconvert(b[u'values'], dtype_for(b[u'dtype']),
                          b[u'compress']), b[u'shape'])

            # locs handles duplicate column names, and should be used instead
            # of items; see GH 9618
            if u'locs' in b:
                placement = b[u'locs']
            else:
                placement = axes[0].get_indexer(b[u'items'])
            return make_block(values=values,
                              klass=getattr(internals, b[u'klass']),
                              placement=placement,
                              dtype=b[u'dtype'])

        blocks = [create_block(b) for b in obj[u'blocks']]
        return globals()[obj[u'klass']](BlockManager(blocks, axes))
    elif typ == u'datetime':
        return parse(obj[u'data'])
    elif typ == u'datetime64':
        return np.datetime64(parse(obj[u'data']))
    elif typ == u'date':
        return parse(obj[u'data']).date()
    elif typ == u'timedelta':
        return timedelta(*obj[u'data'])
    elif typ == u'timedelta64':
        return np.timedelta64(int(obj[u'data']))
    # elif typ == 'sparse_series':
    #    dtype = dtype_for(obj['dtype'])
    #    return globals()[obj['klass']](
    #        unconvert(obj['sp_values'], dtype, obj['compress']),
    #        sparse_index=obj['sp_index'], index=obj['index'],
    #        fill_value=obj['fill_value'], kind=obj['kind'], name=obj['name'])
    # elif typ == 'sparse_dataframe':
    #    return globals()[obj['klass']](
    #        obj['data'], columns=obj['columns'],
    #        default_fill_value=obj['default_fill_value'],
    #        default_kind=obj['default_kind']
    #    )
    # elif typ == 'sparse_panel':
    #    return globals()[obj['klass']](
    #        obj['data'], items=obj['items'],
    #        default_fill_value=obj['default_fill_value'],
    #        default_kind=obj['default_kind'])
    elif typ == u'block_index':
        return globals()[obj[u'klass']](obj[u'length'], obj[u'blocs'],
                                        obj[u'blengths'])
    elif typ == u'int_index':
        return globals()[obj[u'klass']](obj[u'length'], obj[u'indices'])
    elif typ == u'ndarray':
        return unconvert(obj[u'data'], np.typeDict[obj[u'dtype']],
                         obj.get(u'compress')).reshape(obj[u'shape'])
    elif typ == u'np_scalar':
        if obj.get(u'sub_typ') == u'np_complex':
            return c2f(obj[u'real'], obj[u'imag'], obj[u'dtype'])
        else:
            dtype = dtype_for(obj[u'dtype'])
            try:
                return dtype(obj[u'data'])
            except:
                return dtype.type(obj[u'data'])
    elif typ == u'np_complex':
        return complex(obj[u'real'] + u'+' + obj[u'imag'] + u'j')
    elif isinstance(obj, (dict, list, set)):
        return obj
    else:
        return obj
示例#17
0
 def test_categorical_dtype(self):
     self.assertEqual(pandas_dtype('category'), CategoricalDtype())
示例#18
0
 def test_period_dtype(self):
     for dtype in ['period[D]', 'period[3M]', 'period[U]',
                   'Period[D]', 'Period[3M]', 'Period[U]']:
         self.assertIs(pandas_dtype(dtype), PeriodDtype(dtype))
         self.assertEqual(pandas_dtype(dtype), PeriodDtype(dtype))
         self.assertEqual(pandas_dtype(dtype), dtype)
示例#19
0
文件: period.py 项目: wakamori/pandas
    def __new__(cls,
                data=None,
                ordinal=None,
                freq=None,
                start=None,
                end=None,
                periods=None,
                copy=False,
                name=None,
                tz=None,
                dtype=None,
                **kwargs):

        if periods is not None:
            if is_float(periods):
                periods = int(periods)
            elif not is_integer(periods):
                raise ValueError('Periods must be a number, got %s' %
                                 str(periods))

        if name is None and hasattr(data, 'name'):
            name = data.name

        if dtype is not None:
            dtype = pandas_dtype(dtype)
            if not is_period_dtype(dtype):
                raise ValueError('dtype must be PeriodDtype')
            if freq is None:
                freq = dtype.freq
            elif freq != dtype.freq:
                msg = 'specified freq and dtype are different'
                raise IncompatibleFrequency(msg)

        # coerce freq to freq object, otherwise it can be coerced elementwise
        # which is slow
        if freq:
            freq = Period._maybe_convert_freq(freq)

        if data is None:
            if ordinal is not None:
                data = np.asarray(ordinal, dtype=np.int64)
            else:
                data, freq = cls._generate_range(start, end, periods, freq,
                                                 kwargs)
            return cls._from_ordinals(data, name=name, freq=freq)

        if isinstance(data, PeriodIndex):
            if freq is None or freq == data.freq:  # no freq change
                freq = data.freq
                data = data._values
            else:
                base1, _ = _gfc(data.freq)
                base2, _ = _gfc(freq)
                data = period.period_asfreq_arr(data._values, base1, base2, 1)
            return cls._simple_new(data, name=name, freq=freq)

        # not array / index
        if not isinstance(
                data, (np.ndarray, PeriodIndex, DatetimeIndex, Int64Index)):
            if is_scalar(data) or isinstance(data, Period):
                cls._scalar_data_error(data)

            # other iterable of some kind
            if not isinstance(data, (list, tuple)):
                data = list(data)

            data = np.asarray(data)

        # datetime other than period
        if is_datetime64_dtype(data.dtype):
            data = dt64arr_to_periodarr(data, freq, tz)
            return cls._from_ordinals(data, name=name, freq=freq)

        # check not floats
        if infer_dtype(data) == 'floating' and len(data) > 0:
            raise TypeError("PeriodIndex does not allow "
                            "floating point in construction")

        # anything else, likely an array of strings or periods
        data = _ensure_object(data)
        freq = freq or period.extract_freq(data)
        data = period.extract_ordinals(data, freq)
        return cls._from_ordinals(data, name=name, freq=freq)
示例#20
0
            self.assertIs(pandas_dtype(dtype), DatetimeTZDtype(dtype))
            self.assertEqual(pandas_dtype(dtype), DatetimeTZDtype(dtype))
            self.assertEqual(pandas_dtype(dtype), dtype)

    def test_categorical_dtype(self):
        self.assertEqual(pandas_dtype('category'), CategoricalDtype())

    def test_period_dtype(self):
        for dtype in ['period[D]', 'period[3M]', 'period[U]',
                      'Period[D]', 'Period[3M]', 'Period[U]']:
            self.assertIs(pandas_dtype(dtype), PeriodDtype(dtype))
            self.assertEqual(pandas_dtype(dtype), PeriodDtype(dtype))
            self.assertEqual(pandas_dtype(dtype), dtype)


dtypes = dict(datetime_tz=pandas_dtype('datetime64[ns, US/Eastern]'),
              datetime=pandas_dtype('datetime64[ns]'),
              timedelta=pandas_dtype('timedelta64[ns]'),
              period=PeriodDtype('D'),
              integer=np.dtype(np.int64),
              float=np.dtype(np.float64),
              object=np.dtype(np.object),
              category=pandas_dtype('category'))


@pytest.mark.parametrize('name1,dtype1',
                         list(dtypes.items()),
                         ids=lambda x: str(x))
@pytest.mark.parametrize('name2,dtype2',
                         list(dtypes.items()),
                         ids=lambda x: str(x))
示例#21
0
def decode(obj):
    """
    Decoder for deserializing numpy data types.
    """

    typ = obj.get(u'typ')
    if typ is None:
        return obj
    elif typ == u'timestamp':
        freq = obj[u'freq'] if 'freq' in obj else obj[u'offset']
        return Timestamp(obj[u'value'], tz=obj[u'tz'], freq=freq)
    elif typ == u'nat':
        return NaT
    elif typ == u'period':
        return Period(ordinal=obj[u'ordinal'], freq=obj[u'freq'])
    elif typ == u'index':
        dtype = dtype_for(obj[u'dtype'])
        data = unconvert(obj[u'data'], dtype,
                         obj.get(u'compress'))
        return globals()[obj[u'klass']](data, dtype=dtype, name=obj[u'name'])
    elif typ == u'range_index':
        return globals()[obj[u'klass']](obj[u'start'],
                                        obj[u'stop'],
                                        obj[u'step'],
                                        name=obj[u'name'])
    elif typ == u'multi_index':
        dtype = dtype_for(obj[u'dtype'])
        data = unconvert(obj[u'data'], dtype,
                         obj.get(u'compress'))
        data = [tuple(x) for x in data]
        return globals()[obj[u'klass']].from_tuples(data, names=obj[u'names'])
    elif typ == u'period_index':
        data = unconvert(obj[u'data'], np.int64, obj.get(u'compress'))
        d = dict(name=obj[u'name'], freq=obj[u'freq'])
        return globals()[obj[u'klass']](data, **d)
    elif typ == u'datetime_index':
        data = unconvert(obj[u'data'], np.int64, obj.get(u'compress'))
        d = dict(name=obj[u'name'], freq=obj[u'freq'], verify_integrity=False)
        result = globals()[obj[u'klass']](data, **d)
        tz = obj[u'tz']

        # reverse tz conversion
        if tz is not None:
            result = result.tz_localize('UTC').tz_convert(tz)
        return result

    elif typ == u'category':
        from_codes = globals()[obj[u'klass']].from_codes
        return from_codes(codes=obj[u'codes'],
                          categories=obj[u'categories'],
                          ordered=obj[u'ordered'],
                          name=obj[u'name'])

    elif typ == u'series':
        dtype = dtype_for(obj[u'dtype'])
        pd_dtype = pandas_dtype(dtype)
        np_dtype = pandas_dtype(dtype).base

        index = obj[u'index']
        result = globals()[obj[u'klass']](unconvert(obj[u'data'], dtype,
                                                    obj[u'compress']),
                                          index=index,
                                          dtype=np_dtype,
                                          name=obj[u'name'])
        tz = getattr(pd_dtype, 'tz', None)
        if tz:
            result = result.dt.tz_localize('UTC').dt.tz_convert(tz)
        return result

    elif typ == u'block_manager':
        axes = obj[u'axes']

        def create_block(b):
            values = _safe_reshape(unconvert(
                b[u'values'], dtype_for(b[u'dtype']),
                b[u'compress']), b[u'shape'])

            # locs handles duplicate column names, and should be used instead
            # of items; see GH 9618
            if u'locs' in b:
                placement = b[u'locs']
            else:
                placement = axes[0].get_indexer(b[u'items'])
            return make_block(values=values,
                              klass=getattr(internals, b[u'klass']),
                              placement=placement,
                              dtype=b[u'dtype'])

        blocks = [create_block(b) for b in obj[u'blocks']]
        return globals()[obj[u'klass']](BlockManager(blocks, axes))
    elif typ == u'datetime':
        return parse(obj[u'data'])
    elif typ == u'datetime64':
        return np.datetime64(parse(obj[u'data']))
    elif typ == u'date':
        return parse(obj[u'data']).date()
    elif typ == u'timedelta':
        return timedelta(*obj[u'data'])
    elif typ == u'timedelta64':
        return np.timedelta64(int(obj[u'data']))
    # elif typ == 'sparse_series':
    #    dtype = dtype_for(obj['dtype'])
    #    return globals()[obj['klass']](
    #        unconvert(obj['sp_values'], dtype, obj['compress']),
    #        sparse_index=obj['sp_index'], index=obj['index'],
    #        fill_value=obj['fill_value'], kind=obj['kind'], name=obj['name'])
    # elif typ == 'sparse_dataframe':
    #    return globals()[obj['klass']](
    #        obj['data'], columns=obj['columns'],
    #        default_fill_value=obj['default_fill_value'],
    #        default_kind=obj['default_kind']
    #    )
    # elif typ == 'sparse_panel':
    #    return globals()[obj['klass']](
    #        obj['data'], items=obj['items'],
    #        default_fill_value=obj['default_fill_value'],
    #        default_kind=obj['default_kind'])
    elif typ == u'block_index':
        return globals()[obj[u'klass']](obj[u'length'], obj[u'blocs'],
                                        obj[u'blengths'])
    elif typ == u'int_index':
        return globals()[obj[u'klass']](obj[u'length'], obj[u'indices'])
    elif typ == u'ndarray':
        return unconvert(obj[u'data'], np.typeDict[obj[u'dtype']],
                         obj.get(u'compress')).reshape(obj[u'shape'])
    elif typ == u'np_scalar':
        if obj.get(u'sub_typ') == u'np_complex':
            return c2f(obj[u'real'], obj[u'imag'], obj[u'dtype'])
        else:
            dtype = dtype_for(obj[u'dtype'])
            try:
                return dtype(obj[u'data'])
            except:
                return dtype.type(obj[u'data'])
    elif typ == u'np_complex':
        return complex(obj[u'real'] + u'+' + obj[u'imag'] + u'j')
    elif isinstance(obj, (dict, list, set)):
        return obj
    else:
        return obj
示例#22
0
def decode(obj):
    """
    Decoder for deserializing numpy data types.
    """

    typ = obj.get(u"typ")
    if typ is None:
        return obj
    elif typ == u"timestamp":
        freq = obj[u"freq"] if "freq" in obj else obj[u"offset"]
        return Timestamp(obj[u"value"], tz=obj[u"tz"], freq=freq)
    elif typ == u"nat":
        return NaT
    elif typ == u"period":
        return Period(ordinal=obj[u"ordinal"], freq=obj[u"freq"])
    elif typ == u"index":
        dtype = dtype_for(obj[u"dtype"])
        data = unconvert(obj[u"data"], dtype, obj.get(u"compress"))
        return globals()[obj[u"klass"]](data, dtype=dtype, name=obj[u"name"])
    elif typ == u"range_index":
        return globals()[obj[u"klass"]](obj[u"start"], obj[u"stop"], obj[u"step"], name=obj[u"name"])
    elif typ == u"multi_index":
        dtype = dtype_for(obj[u"dtype"])
        data = unconvert(obj[u"data"], dtype, obj.get(u"compress"))
        data = [tuple(x) for x in data]
        return globals()[obj[u"klass"]].from_tuples(data, names=obj[u"names"])
    elif typ == u"period_index":
        data = unconvert(obj[u"data"], np.int64, obj.get(u"compress"))
        d = dict(name=obj[u"name"], freq=obj[u"freq"])
        return globals()[obj[u"klass"]](data, **d)
    elif typ == u"datetime_index":
        data = unconvert(obj[u"data"], np.int64, obj.get(u"compress"))
        d = dict(name=obj[u"name"], freq=obj[u"freq"], verify_integrity=False)
        result = globals()[obj[u"klass"]](data, **d)
        tz = obj[u"tz"]

        # reverse tz conversion
        if tz is not None:
            result = result.tz_localize("UTC").tz_convert(tz)
        return result

    elif typ == u"category":
        from_codes = globals()[obj[u"klass"]].from_codes
        return from_codes(
            codes=obj[u"codes"], categories=obj[u"categories"], ordered=obj[u"ordered"], name=obj[u"name"]
        )

    elif typ == u"series":
        dtype = dtype_for(obj[u"dtype"])
        pd_dtype = pandas_dtype(dtype)
        np_dtype = pandas_dtype(dtype).base

        index = obj[u"index"]
        result = globals()[obj[u"klass"]](
            unconvert(obj[u"data"], dtype, obj[u"compress"]), index=index, dtype=np_dtype, name=obj[u"name"]
        )
        tz = getattr(pd_dtype, "tz", None)
        if tz:
            result = result.dt.tz_localize("UTC").dt.tz_convert(tz)
        return result

    elif typ == u"block_manager":
        axes = obj[u"axes"]

        def create_block(b):
            values = _safe_reshape(unconvert(b[u"values"], dtype_for(b[u"dtype"]), b[u"compress"]), b[u"shape"])

            # locs handles duplicate column names, and should be used instead
            # of items; see GH 9618
            if u"locs" in b:
                placement = b[u"locs"]
            else:
                placement = axes[0].get_indexer(b[u"items"])
            return make_block(
                values=values, klass=getattr(internals, b[u"klass"]), placement=placement, dtype=b[u"dtype"]
            )

        blocks = [create_block(b) for b in obj[u"blocks"]]
        return globals()[obj[u"klass"]](BlockManager(blocks, axes))
    elif typ == u"datetime":
        return parse(obj[u"data"])
    elif typ == u"datetime64":
        return np.datetime64(parse(obj[u"data"]))
    elif typ == u"date":
        return parse(obj[u"data"]).date()
    elif typ == u"timedelta":
        return timedelta(*obj[u"data"])
    elif typ == u"timedelta64":
        return np.timedelta64(int(obj[u"data"]))
    # elif typ == 'sparse_series':
    #    dtype = dtype_for(obj['dtype'])
    #    return globals()[obj['klass']](
    #        unconvert(obj['sp_values'], dtype, obj['compress']),
    #        sparse_index=obj['sp_index'], index=obj['index'],
    #        fill_value=obj['fill_value'], kind=obj['kind'], name=obj['name'])
    # elif typ == 'sparse_dataframe':
    #    return globals()[obj['klass']](
    #        obj['data'], columns=obj['columns'],
    #        default_fill_value=obj['default_fill_value'],
    #        default_kind=obj['default_kind']
    #    )
    # elif typ == 'sparse_panel':
    #    return globals()[obj['klass']](
    #        obj['data'], items=obj['items'],
    #        default_fill_value=obj['default_fill_value'],
    #        default_kind=obj['default_kind'])
    elif typ == u"block_index":
        return globals()[obj[u"klass"]](obj[u"length"], obj[u"blocs"], obj[u"blengths"])
    elif typ == u"int_index":
        return globals()[obj[u"klass"]](obj[u"length"], obj[u"indices"])
    elif typ == u"ndarray":
        return unconvert(obj[u"data"], np.typeDict[obj[u"dtype"]], obj.get(u"compress")).reshape(obj[u"shape"])
    elif typ == u"np_scalar":
        if obj.get(u"sub_typ") == u"np_complex":
            return c2f(obj[u"real"], obj[u"imag"], obj[u"dtype"])
        else:
            dtype = dtype_for(obj[u"dtype"])
            try:
                return dtype(obj[u"data"])
            except:
                return dtype.type(obj[u"data"])
    elif typ == u"np_complex":
        return complex(obj[u"real"] + u"+" + obj[u"imag"] + u"j")
    elif isinstance(obj, (dict, list, set)):
        return obj
    else:
        return obj
示例#23
0
 def test_numpy_string_dtype(self):
     # do not parse freq-like string as period dtype
     self.assertEqual(pandas_dtype('U'), np.dtype('U'))
     self.assertEqual(pandas_dtype('S'), np.dtype('S'))