Пример #1
0
def test_downcast_conv():
    # test downcasting

    arr = np.array([8.5, 8.6, 8.7, 8.8, 8.9999999999995])
    result = com._possibly_downcast_to_dtype(arr, 'infer')
    assert (np.array_equal(result, arr))

    arr = np.array([8., 8., 8., 8., 8.9999999999995])
    result = com._possibly_downcast_to_dtype(arr, 'infer')
    expected = np.array([8, 8, 8, 8, 9])
    assert (np.array_equal(result, expected))

    arr = np.array([8., 8., 8., 8., 9.0000000000005])
    result = com._possibly_downcast_to_dtype(arr, 'infer')
    expected = np.array([8, 8, 8, 8, 9])
    assert (np.array_equal(result, expected))

    # conversions

    expected = np.array([1,2])
    for dtype in [np.float64,object,np.int64]:
        arr = np.array([1.0,2.0],dtype=dtype)
        result = com._possibly_downcast_to_dtype(arr,'infer')
        tm.assert_almost_equal(result, expected)

    expected = np.array([1.0,2.0,np.nan])
    for dtype in [np.float64,object]:
        arr = np.array([1.0,2.0,np.nan],dtype=dtype)
        result = com._possibly_downcast_to_dtype(arr,'infer')
        tm.assert_almost_equal(result, expected)
Пример #2
0
def test_downcast_conv():
    # test downcasting

    arr = np.array([8.5, 8.6, 8.7, 8.8, 8.9999999999995])
    result = com._possibly_downcast_to_dtype(arr, 'infer')
    assert (np.array_equal(result, arr))

    arr = np.array([8., 8., 8., 8., 8.9999999999995])
    result = com._possibly_downcast_to_dtype(arr, 'infer')
    expected = np.array([8, 8, 8, 8, 9])
    assert (np.array_equal(result, expected))

    arr = np.array([8., 8., 8., 8., 9.0000000000005])
    result = com._possibly_downcast_to_dtype(arr, 'infer')
    expected = np.array([8, 8, 8, 8, 9])
    assert (np.array_equal(result, expected))

    # conversions

    expected = np.array([1, 2])
    for dtype in [np.float64, object, np.int64]:
        arr = np.array([1.0, 2.0], dtype=dtype)
        result = com._possibly_downcast_to_dtype(arr, 'infer')
        tm.assert_almost_equal(result, expected)

    expected = np.array([1.0, 2.0, np.nan])
    for dtype in [np.float64, object]:
        arr = np.array([1.0, 2.0, np.nan], dtype=dtype)
        result = com._possibly_downcast_to_dtype(arr, 'infer')
        tm.assert_almost_equal(result, expected)
Пример #3
0
    def downcast(self, dtypes=None):
        """ try to downcast each item to the dict of dtypes if present """

        # turn it off completely
        if dtypes is False:
            return [self]

        values = self.values

        # single block handling
        if self._is_single_block:

            # try to cast all non-floats here
            if dtypes is None:
                dtypes = 'infer'

            nv = _possibly_downcast_to_dtype(values, dtypes)
            return [make_block(nv, ndim=self.ndim,
                               fastpath=True, placement=self.mgr_locs)]

        # ndim > 1
        if dtypes is None:
            return [self]

        if not (dtypes == 'infer' or isinstance(dtypes, dict)):
            raise ValueError("downcast must have a dictionary or 'infer' as "
                             "its argument")

        # item-by-item
        # this is expensive as it splits the blocks items-by-item
        blocks = []
        for i, rl in enumerate(self.mgr_locs):

            if dtypes == 'infer':
                dtype = 'infer'
            else:
                raise AssertionError("dtypes as dict is not supported yet")
                dtype = dtypes.get(item, self._downcast_dtype)

            if dtype is None:
                nv = _block_shape(values[i], ndim=self.ndim)
            else:
                nv = _possibly_downcast_to_dtype(values[i], dtype)
                nv = _block_shape(nv, ndim=self.ndim)

            blocks.append(make_block(nv,
                                     ndim=self.ndim, fastpath=True,
                                     placement=[rl]))

        return blocks
Пример #4
0
def test_downcast_conv():
    # test downcasting

    arr = np.array([8.5, 8.6, 8.7, 8.8, 8.9999999999995])
    result = com._possibly_downcast_to_dtype(arr, 'infer')
    assert (np.array_equal(result, arr))

    arr = np.array([8., 8., 8., 8., 8.9999999999995])
    result = com._possibly_downcast_to_dtype(arr, 'infer')
    expected = np.array([8, 8, 8, 8, 9])
    assert (np.array_equal(result, expected))

    arr = np.array([8., 8., 8., 8., 9.0000000000005])
    result = com._possibly_downcast_to_dtype(arr, 'infer')
    expected = np.array([8, 8, 8, 8, 9])
    assert (np.array_equal(result, expected))
Пример #5
0
    def _try_cast_result(self, result, dtype=None):
        """ try to cast the result to our original type,
        we may have roundtripped thru object in the mean-time """
        if dtype is None:
            dtype = self.dtype

        if self.is_integer or self.is_bool or self.is_datetime:
            pass
        elif self.is_float and result.dtype == self.dtype:

            # protect against a bool/object showing up here
            if isinstance(dtype, compat.string_types) and dtype == 'infer':
                return result
            if not isinstance(dtype, type):
                dtype = dtype.type
            if issubclass(dtype, (np.bool_, np.object_)):
                if issubclass(dtype, np.bool_):
                    if isnull(result).all():
                        return result.astype(np.bool_)
                    else:
                        result = result.astype(np.object_)
                        result[result == 1] = True
                        result[result == 0] = False
                        return result
                else:
                    return result.astype(np.object_)

            return result

        # may need to change the dtype here
        return _possibly_downcast_to_dtype(result, dtype)
Пример #6
0
def test_downcast_conv():
    # test downcasting

    arr = np.array([8.5, 8.6, 8.7, 8.8, 8.9999999999995])
    result = com._possibly_downcast_to_dtype(arr, 'infer')
    assert (np.array_equal(result, arr))

    arr = np.array([8., 8., 8., 8., 8.9999999999995])
    result = com._possibly_downcast_to_dtype(arr, 'infer')
    expected = np.array([8, 8, 8, 8, 9])
    assert (np.array_equal(result, expected))

    arr = np.array([8., 8., 8., 8., 9.0000000000005])
    result = com._possibly_downcast_to_dtype(arr, 'infer')
    expected = np.array([8, 8, 8, 8, 9])
    assert (np.array_equal(result, expected))
Пример #7
0
def to_numeric(arg, errors='raise', downcast=None):
    """
    Convert argument to a numeric type.

    Parameters
    ----------
    arg : list, tuple, 1-d array, or Series
    errors : {'ignore', 'raise', 'coerce'}, default 'raise'
        - If 'raise', then invalid parsing will raise an exception
        - If 'coerce', then invalid parsing will be set as NaN
        - If 'ignore', then invalid parsing will return the input
    downcast : {'integer', 'signed', 'unsigned', 'float'} , default None
        If not None, and if the data has been successfully cast to a
        numerical dtype (or if the data was numeric to begin with),
        downcast that resulting data to the smallest numerical dtype
        possible according to the following rules:

        - 'integer' or 'signed': smallest signed int dtype (min.: np.int8)
        - 'unsigned': smallest unsigned int dtype (min.: np.uint8)
        - 'float': smallest float dtype (min.: np.float32)

        As this behaviour is separate from the core conversion to
        numeric values, any errors raised during the downcasting
        will be surfaced regardless of the value of the 'errors' input.

        In addition, downcasting will only occur if the size
        of the resulting data's dtype is strictly larger than
        the dtype it is to be cast to, so if none of the dtypes
        checked satisfy that specification, no downcasting will be
        performed on the data.

        .. versionadded:: 0.19.0

    Returns
    -------
    ret : numeric if parsing succeeded.
        Return type depends on input.  Series if Series, otherwise ndarray

    Examples
    --------
    Take separate series and convert to numeric, coercing when told to

    >>> import pandas as pd
    >>> s = pd.Series(['1.0', '2', -3])
    >>> pd.to_numeric(s)
    0    1.0
    1    2.0
    2   -3.0
    dtype: float64
    >>> pd.to_numeric(s, downcast='float')
    0    1.0
    1    2.0
    2   -3.0
    dtype: float32
    >>> pd.to_numeric(s, downcast='signed')
    0    1
    1    2
    2   -3
    dtype: int8
    >>> s = pd.Series(['apple', '1.0', '2', -3])
    >>> pd.to_numeric(s, errors='ignore')
    0    apple
    1      1.0
    2        2
    3       -3
    dtype: object
    >>> pd.to_numeric(s, errors='coerce')
    0    NaN
    1    1.0
    2    2.0
    3   -3.0
    dtype: float64
    """
    if downcast not in (None, 'integer', 'signed', 'unsigned', 'float'):
        raise ValueError('invalid downcasting method provided')

    is_series = False
    is_index = False
    is_scalar = False

    if isinstance(arg, pd.Series):
        is_series = True
        values = arg.values
    elif isinstance(arg, pd.Index):
        is_index = True
        values = arg.asi8
        if values is None:
            values = arg.values
    elif isinstance(arg, (list, tuple)):
        values = np.array(arg, dtype='O')
    elif np.isscalar(arg):
        if com.is_number(arg):
            return arg
        is_scalar = True
        values = np.array([arg], dtype='O')
    elif getattr(arg, 'ndim', 1) > 1:
        raise TypeError('arg must be a list, tuple, 1-d array, or Series')
    else:
        values = arg

    try:
        if com.is_numeric_dtype(values):
            pass
        elif com.is_datetime_or_timedelta_dtype(values):
            values = values.astype(np.int64)
        else:
            values = com._ensure_object(values)
            coerce_numeric = False if errors in ('ignore', 'raise') else True

            values = lib.maybe_convert_numeric(values,
                                               set(),
                                               coerce_numeric=coerce_numeric)

    except Exception:
        if errors == 'raise':
            raise

    # attempt downcast only if the data has been successfully converted
    # to a numerical dtype and if a downcast method has been specified
    if downcast is not None and com.is_numeric_dtype(values):
        typecodes = None

        if downcast in ('integer', 'signed'):
            typecodes = np.typecodes['Integer']
        elif downcast == 'unsigned' and np.min(values) > 0:
            typecodes = np.typecodes['UnsignedInteger']
        elif downcast == 'float':
            typecodes = np.typecodes['Float']

            # pandas support goes only to np.float32,
            # as float dtypes smaller than that are
            # extremely rare and not well supported
            float_32_char = np.dtype(np.float32).char
            float_32_ind = typecodes.index(float_32_char)
            typecodes = typecodes[float_32_ind:]

        if typecodes is not None:
            # from smallest to largest
            for dtype in typecodes:
                if np.dtype(dtype).itemsize < values.dtype.itemsize:
                    values = com._possibly_downcast_to_dtype(values, dtype)

                    # successful conversion
                    if values.dtype == dtype:
                        break

    if is_series:
        return pd.Series(values, index=arg.index, name=arg.name)
    elif is_index:
        # because we want to coerce to numeric if possible,
        # do not use _shallow_copy_with_infer
        return Index(values, name=arg.name)
    elif is_scalar:
        return values[0]
    else:
        return values
Пример #8
0
def to_numeric(arg, errors='raise', downcast=None):
    """
    Convert argument to a numeric type.

    Parameters
    ----------
    arg : list, tuple, 1-d array, or Series
    errors : {'ignore', 'raise', 'coerce'}, default 'raise'
        - If 'raise', then invalid parsing will raise an exception
        - If 'coerce', then invalid parsing will be set as NaN
        - If 'ignore', then invalid parsing will return the input
    downcast : {'integer', 'signed', 'unsigned', 'float'} , default None
        If not None, and if the data has been successfully cast to a
        numerical dtype (or if the data was numeric to begin with),
        downcast that resulting data to the smallest numerical dtype
        possible according to the following rules:

        - 'integer' or 'signed': smallest signed int dtype (min.: np.int8)
        - 'unsigned': smallest unsigned int dtype (min.: np.uint8)
        - 'float': smallest float dtype (min.: np.float32)

        As this behaviour is separate from the core conversion to
        numeric values, any errors raised during the downcasting
        will be surfaced regardless of the value of the 'errors' input.

        In addition, downcasting will only occur if the size
        of the resulting data's dtype is strictly larger than
        the dtype it is to be cast to, so if none of the dtypes
        checked satisfy that specification, no downcasting will be
        performed on the data.

        .. versionadded:: 0.19.0

    Returns
    -------
    ret : numeric if parsing succeeded.
        Return type depends on input.  Series if Series, otherwise ndarray

    Examples
    --------
    Take separate series and convert to numeric, coercing when told to

    >>> import pandas as pd
    >>> s = pd.Series(['1.0', '2', -3])
    >>> pd.to_numeric(s)
    0    1.0
    1    2.0
    2   -3.0
    dtype: float64
    >>> pd.to_numeric(s, downcast='float')
    0    1.0
    1    2.0
    2   -3.0
    dtype: float32
    >>> pd.to_numeric(s, downcast='signed')
    0    1
    1    2
    2   -3
    dtype: int8
    >>> s = pd.Series(['apple', '1.0', '2', -3])
    >>> pd.to_numeric(s, errors='ignore')
    0    apple
    1      1.0
    2        2
    3       -3
    dtype: object
    >>> pd.to_numeric(s, errors='coerce')
    0    NaN
    1    1.0
    2    2.0
    3   -3.0
    dtype: float64
    """
    if downcast not in (None, 'integer', 'signed', 'unsigned', 'float'):
        raise ValueError('invalid downcasting method provided')

    is_series = False
    is_index = False
    is_scalar = False

    if isinstance(arg, pd.Series):
        is_series = True
        values = arg.values
    elif isinstance(arg, pd.Index):
        is_index = True
        values = arg.asi8
        if values is None:
            values = arg.values
    elif isinstance(arg, (list, tuple)):
        values = np.array(arg, dtype='O')
    elif np.isscalar(arg):
        if com.is_number(arg):
            return arg
        is_scalar = True
        values = np.array([arg], dtype='O')
    elif getattr(arg, 'ndim', 1) > 1:
        raise TypeError('arg must be a list, tuple, 1-d array, or Series')
    else:
        values = arg

    try:
        if com.is_numeric_dtype(values):
            pass
        elif com.is_datetime_or_timedelta_dtype(values):
            values = values.astype(np.int64)
        else:
            values = com._ensure_object(values)
            coerce_numeric = False if errors in ('ignore', 'raise') else True

            values = lib.maybe_convert_numeric(values, set(),
                                               coerce_numeric=coerce_numeric)

    except Exception:
        if errors == 'raise':
            raise

    # attempt downcast only if the data has been successfully converted
    # to a numerical dtype and if a downcast method has been specified
    if downcast is not None and com.is_numeric_dtype(values):
        typecodes = None

        if downcast in ('integer', 'signed'):
            typecodes = np.typecodes['Integer']
        elif downcast == 'unsigned' and np.min(values) > 0:
            typecodes = np.typecodes['UnsignedInteger']
        elif downcast == 'float':
            typecodes = np.typecodes['Float']

            # pandas support goes only to np.float32,
            # as float dtypes smaller than that are
            # extremely rare and not well supported
            float_32_char = np.dtype(np.float32).char
            float_32_ind = typecodes.index(float_32_char)
            typecodes = typecodes[float_32_ind:]

        if typecodes is not None:
            # from smallest to largest
            for dtype in typecodes:
                if np.dtype(dtype).itemsize < values.dtype.itemsize:
                    values = com._possibly_downcast_to_dtype(
                        values, dtype)

                    # successful conversion
                    if values.dtype == dtype:
                        break

    if is_series:
        return pd.Series(values, index=arg.index, name=arg.name)
    elif is_index:
        # because we want to coerce to numeric if possible,
        # do not use _shallow_copy_with_infer
        return Index(values, name=arg.name)
    elif is_scalar:
        return values[0]
    else:
        return values