Exemplo n.º 1
0
    def test_convert_infs(self):
        arr = np.array(['inf', 'inf', 'inf'], dtype='O')
        result = lib.maybe_convert_numeric(arr, set(), False)
        self.assertTrue(result.dtype == np.float64)

        arr = np.array(['-inf', '-inf', '-inf'], dtype='O')
        result = lib.maybe_convert_numeric(arr, set(), False)
        self.assertTrue(result.dtype == np.float64)
Exemplo n.º 2
0
    def test_convert_infs(self):
        arr = np.array(["inf", "inf", "inf"], dtype="O")
        result = lib.maybe_convert_numeric(arr, set(), False)
        self.assertTrue(result.dtype == np.float64)

        arr = np.array(["-inf", "-inf", "-inf"], dtype="O")
        result = lib.maybe_convert_numeric(arr, set(), False)
        self.assertTrue(result.dtype == np.float64)
Exemplo n.º 3
0
    def test_convert_infs(self):
        arr = np.array(['inf', 'inf', 'inf'], dtype='O')
        result = lib.maybe_convert_numeric(arr, set(), False)
        self.assertTrue(result.dtype == np.float64)

        arr = np.array(['-inf', '-inf', '-inf'], dtype='O')
        result = lib.maybe_convert_numeric(arr, set(), False)
        self.assertTrue(result.dtype == np.float64)
Exemplo n.º 4
0
def test_convert_infs():
    arr = np.array(["inf", "inf", "inf"], dtype="O")
    result = lib.maybe_convert_numeric(arr, set(), False)
    assert result.dtype == np.float64

    arr = np.array(["-inf", "-inf", "-inf"], dtype="O")
    result = lib.maybe_convert_numeric(arr, set(), False)
    assert result.dtype == np.float64
Exemplo n.º 5
0
    def test_convert_numeric_uint64(self):
        arr = np.array([2**63], dtype=object)
        exp = np.array([2**63], dtype=np.uint64)
        tm.assert_numpy_array_equal(lib.maybe_convert_numeric(arr, set()), exp)

        arr = np.array([str(2**63)], dtype=object)
        exp = np.array([2**63], dtype=np.uint64)
        tm.assert_numpy_array_equal(lib.maybe_convert_numeric(arr, set()), exp)

        arr = np.array([np.uint64(2**63)], dtype=object)
        exp = np.array([2**63], dtype=np.uint64)
        tm.assert_numpy_array_equal(lib.maybe_convert_numeric(arr, set()), exp)
Exemplo n.º 6
0
    def test_convert_numeric_uint64(self):
        arr = np.array([2**63], dtype=object)
        exp = np.array([2**63], dtype=np.uint64)
        tm.assert_numpy_array_equal(lib.maybe_convert_numeric(arr, set()), exp)

        arr = np.array([str(2**63)], dtype=object)
        exp = np.array([2**63], dtype=np.uint64)
        tm.assert_numpy_array_equal(lib.maybe_convert_numeric(arr, set()), exp)

        arr = np.array([np.uint64(2**63)], dtype=object)
        exp = np.array([2**63], dtype=np.uint64)
        tm.assert_numpy_array_equal(lib.maybe_convert_numeric(arr, set()), exp)
Exemplo n.º 7
0
def _possibly_convert_objects(values, convert_dates=True, convert_numeric=True):
    """ if we have an object dtype, try to coerce dates and/or numers """

    if values.dtype == np.object_ and convert_dates:

        # we take an aggressive stance and convert to datetime64[ns]
        if convert_dates == 'coerce':
            new_values = _possibly_cast_to_datetime(values, 'M8[ns]', coerce = True)

            # if we are all nans then leave me alone
            if not isnull(new_values).all():
                values = new_values

        else:
            values = lib.maybe_convert_objects(values, convert_datetime=convert_dates)

    if values.dtype == np.object_ and convert_numeric:
        try:
            new_values = lib.maybe_convert_numeric(values,set(),coerce_numeric=True)
            
            # if we are all nans then leave me alone
            if not isnull(new_values).all():
                values = new_values

        except:
            pass

    return values
Exemplo n.º 8
0
def _soft_convert_objects(values,
                          datetime=True,
                          numeric=True,
                          timedelta=True,
                          coerce=False,
                          copy=True):
    """ if we have an object dtype, try to coerce dates and/or numbers """

    conversion_count = sum((datetime, numeric, timedelta))
    if conversion_count == 0:
        raise ValueError('At least one of datetime, numeric or timedelta must '
                         'be True.')
    elif conversion_count > 1 and coerce:
        raise ValueError("Only one of 'datetime', 'numeric' or "
                         "'timedelta' can be True when when coerce=True.")

    if isinstance(values, (list, tuple)):
        # List or scalar
        values = np.array(values, dtype=np.object_)
    elif not hasattr(values, 'dtype'):
        values = np.array([values], dtype=np.object_)
    elif not is_object_dtype(values.dtype):
        # If not object, do not attempt conversion
        values = values.copy() if copy else values
        return values

    # If 1 flag is coerce, ensure 2 others are False
    if coerce:
        # Immediate return if coerce
        if datetime:
            from pandas import to_datetime
            return to_datetime(values, errors='coerce', box=False)
        elif timedelta:
            from pandas import to_timedelta
            return to_timedelta(values, errors='coerce', box=False)
        elif numeric:
            from pandas import to_numeric
            return to_numeric(values, errors='coerce')

    # Soft conversions
    if datetime:
        values = lib.maybe_convert_objects(values, convert_datetime=datetime)

    if timedelta and is_object_dtype(values.dtype):
        # Object check to ensure only run if previous did not convert
        values = lib.maybe_convert_objects(values, convert_timedelta=timedelta)

    if numeric and is_object_dtype(values.dtype):
        try:
            converted = lib.maybe_convert_numeric(values,
                                                  set(),
                                                  coerce_numeric=True)
            # If all NaNs, then do not-alter
            values = converted if not isnull(converted).all() else values
            values = values.copy() if copy else values
        except:
            pass

    return values
Exemplo n.º 9
0
    def test_convert_numeric_int64_uint64(self):
        msg = 'uint64 and negative values detected'
        cases = [np.array([2**63, -1], dtype=object),
                 np.array([str(2**63), -1], dtype=object),
                 np.array([str(2**63), str(-1)], dtype=object),
                 np.array([-1, 2**63], dtype=object),
                 np.array([-1, str(2**63)], dtype=object),
                 np.array([str(-1), str(2**63)], dtype=object)]

        for coerce in (True, False):
            for case in cases:
                if coerce:
                    with tm.assertRaisesRegexp(ValueError, msg):
                        lib.maybe_convert_numeric(case, set(),
                                                  coerce_numeric=coerce)
                else:
                    tm.assert_numpy_array_equal(lib.maybe_convert_numeric(
                        case, set()), case)
Exemplo n.º 10
0
def _possibly_convert_objects(values, convert_dates=True, convert_numeric=True, convert_timedeltas=True, copy=True):
    """ if we have an object dtype, try to coerce dates and/or numbers """

    # if we have passed in a list or scalar
    if isinstance(values, (list, tuple)):
        values = np.array(values, dtype=np.object_)
    if not hasattr(values, "dtype"):
        values = np.array([values], dtype=np.object_)

    # convert dates
    if convert_dates and values.dtype == np.object_:

        # we take an aggressive stance and convert to datetime64[ns]
        if convert_dates == "coerce":
            new_values = _possibly_cast_to_datetime(values, "M8[ns]", errors="coerce")

            # if we are all nans then leave me alone
            if not isnull(new_values).all():
                values = new_values

        else:
            values = lib.maybe_convert_objects(values, convert_datetime=convert_dates)

    # convert timedeltas
    if convert_timedeltas and values.dtype == np.object_:

        if convert_timedeltas == "coerce":
            from pandas.tseries.timedeltas import to_timedelta

            new_values = to_timedelta(values, coerce=True)

            # if we are all nans then leave me alone
            if not isnull(new_values).all():
                values = new_values

        else:
            values = lib.maybe_convert_objects(values, convert_timedelta=convert_timedeltas)

    # convert to numeric
    if values.dtype == np.object_:
        if convert_numeric:
            try:
                new_values = lib.maybe_convert_numeric(values, set(), coerce_numeric=True)

                # if we are all nans then leave me alone
                if not isnull(new_values).all():
                    values = new_values

            except:
                pass
        else:
            # soft-conversion
            values = lib.maybe_convert_objects(values)

    values = values.copy() if copy else values

    return values
Exemplo n.º 11
0
    def test_maybe_convert_numeric_post_floatify_nan(self):
        # see gh-13314
        data = np.array(['1.200', '-999.000', '4.500'], dtype=object)
        expected = np.array([1.2, np.nan, 4.5], dtype=np.float64)
        nan_values = set([-999, -999.0])

        for coerce_type in (True, False):
            out = lib.maybe_convert_numeric(data, nan_values, coerce_type)
            tm.assert_numpy_array_equal(out, expected)
Exemplo n.º 12
0
    def test_maybe_convert_numeric_post_floatify_nan(self):
        # see gh-13314
        data = np.array(['1.200', '-999.000', '4.500'], dtype=object)
        expected = np.array([1.2, np.nan, 4.5], dtype=np.float64)
        nan_values = set([-999, -999.0])

        for coerce_type in (True, False):
            out = lib.maybe_convert_numeric(data, nan_values, coerce_type)
            tm.assert_numpy_array_equal(out, expected)
Exemplo n.º 13
0
    def test_convert_numeric_uint64_nan(self):
        msg = 'uint64 array detected'
        cases = [(np.array([2**63, np.nan], dtype=object), set()),
                 (np.array([str(2**63), np.nan], dtype=object), set()),
                 (np.array([np.nan, 2**63], dtype=object), set()),
                 (np.array([np.nan, str(2**63)], dtype=object), set()),
                 (np.array([2**63, 2**63 + 1], dtype=object), set([2**63])),
                 (np.array([str(2**63), str(2**63 + 1)],
                           dtype=object), set([2**63]))]

        for coerce in (True, False):
            for arr, na_values in cases:
                if coerce:
                    with tm.assertRaisesRegexp(ValueError, msg):
                        lib.maybe_convert_numeric(arr, na_values,
                                                  coerce_numeric=coerce)
                else:
                    tm.assert_numpy_array_equal(lib.maybe_convert_numeric(
                        arr, na_values), arr)
Exemplo n.º 14
0
    def test_convert_numeric_uint64_nan(self):
        msg = 'uint64 array detected'
        cases = [(np.array([2**63, np.nan], dtype=object), set()),
                 (np.array([str(2**63), np.nan], dtype=object), set()),
                 (np.array([np.nan, 2**63], dtype=object), set()),
                 (np.array([np.nan, str(2**63)], dtype=object), set()),
                 (np.array([2**63, 2**63 + 1], dtype=object), set([2**63])),
                 (np.array([str(2**63), str(2**63 + 1)],
                           dtype=object), set([2**63]))]

        for coerce in (True, False):
            for arr, na_values in cases:
                if coerce:
                    with tm.assertRaisesRegexp(ValueError, msg):
                        lib.maybe_convert_numeric(arr, na_values,
                                                  coerce_numeric=coerce)
                else:
                    tm.assert_numpy_array_equal(lib.maybe_convert_numeric(
                        arr, na_values), arr)
Exemplo n.º 15
0
def to_numeric(arg, errors='raise'):
    """
    Convert argument to a numeric type.

    Parameters
    ----------
    arg : list, tuple, 1-d array, or Series
    errors : {'ignore', 'raise', 'coerce'}, default 'raise'
        - If 'raise', then invalid parsing will raise an exception
        - If 'coerce', then invalid parsing will be set as NaN
        - If 'ignore', then invalid parsing will return the input

    Returns
    -------
    ret : numeric if parsing succeeded.
        Return type depends on input.  Series if Series, otherwise ndarray

    Examples
    --------
    Take separate series and convert to numeric, coercing when told to

    >>> import pandas as pd
    >>> s = pd.Series(['1.0', '2', -3])
    >>> pd.to_numeric(s)
    >>> s = pd.Series(['apple', '1.0', '2', -3])
    >>> pd.to_numeric(s, errors='ignore')
    >>> pd.to_numeric(s, errors='coerce')
    """

    index = name = None
    if isinstance(arg, pd.Series):
        index, name = arg.index, arg.name
    elif isinstance(arg, (list, tuple)):
        arg = np.array(arg, dtype='O')
    elif getattr(arg, 'ndim', 1) > 1:
        raise TypeError('arg must be a list, tuple, 1-d array, or Series')

    conv = arg
    arg = com._ensure_object(arg)

    coerce_numeric = False if errors in ('ignore', 'raise') else True

    try:
        conv = lib.maybe_convert_numeric(arg,
                                         set(),
                                         coerce_numeric=coerce_numeric)
    except:
        if errors == 'raise':
            raise

    if index is not None:
        return pd.Series(conv, index=index, name=name)
    else:
        return conv
Exemplo n.º 16
0
def to_numeric(arg, errors='raise'):
    """
    Convert argument to a numeric type.

    Parameters
    ----------
    arg : list, tuple, 1-d array, or Series
    errors : {'ignore', 'raise', 'coerce'}, default 'raise'
        - If 'raise', then invalid parsing will raise an exception
        - If 'coerce', then invalid parsing will be set as NaN
        - If 'ignore', then invalid parsing will return the input

    Returns
    -------
    ret : numeric if parsing succeeded.
        Return type depends on input.  Series if Series, otherwise ndarray

    Examples
    --------
    Take separate series and convert to numeric, coercing when told to

    >>> import pandas as pd
    >>> s = pd.Series(['1.0', '2', -3])
    >>> pd.to_numeric(s)
    >>> s = pd.Series(['apple', '1.0', '2', -3])
    >>> pd.to_numeric(s, errors='ignore')
    >>> pd.to_numeric(s, errors='coerce')
    """

    index = name = None
    if isinstance(arg, pd.Series):
        index, name = arg.index, arg.name
    elif isinstance(arg, (list, tuple)):
        arg = np.array(arg, dtype='O')
    elif getattr(arg, 'ndim', 1) > 1:
        raise TypeError('arg must be a list, tuple, 1-d array, or Series')

    conv = arg
    arg = com._ensure_object(arg)

    coerce_numeric = False if errors in ('ignore', 'raise') else True

    try:
        conv = lib.maybe_convert_numeric(arg,
                                         set(),
                                         coerce_numeric=coerce_numeric)
    except:
        if errors == 'raise':
            raise

    if index is not None:
        return pd.Series(conv, index=index, name=name)
    else:
        return conv
Exemplo n.º 17
0
def _soft_convert_objects(values, datetime=True, numeric=True, timedelta=True,
                          coerce=False, copy=True):
    """ if we have an object dtype, try to coerce dates and/or numbers """

    conversion_count = sum((datetime, numeric, timedelta))
    if conversion_count == 0:
        raise ValueError('At least one of datetime, numeric or timedelta must '
                         'be True.')
    elif conversion_count > 1 and coerce:
        raise ValueError("Only one of 'datetime', 'numeric' or "
                         "'timedelta' can be True when when coerce=True.")

    if isinstance(values, (list, tuple)):
        # List or scalar
        values = np.array(values, dtype=np.object_)
    elif not hasattr(values, 'dtype'):
        values = np.array([values], dtype=np.object_)
    elif not is_object_dtype(values.dtype):
        # If not object, do not attempt conversion
        values = values.copy() if copy else values
        return values

    # If 1 flag is coerce, ensure 2 others are False
    if coerce:
        # Immediate return if coerce
        if datetime:
            from pandas import to_datetime
            return to_datetime(values, errors='coerce', box=False)
        elif timedelta:
            from pandas import to_timedelta
            return to_timedelta(values, errors='coerce', box=False)
        elif numeric:
            from pandas import to_numeric
            return to_numeric(values, errors='coerce')

    # Soft conversions
    if datetime:
        values = lib.maybe_convert_objects(values, convert_datetime=datetime)

    if timedelta and is_object_dtype(values.dtype):
        # Object check to ensure only run if previous did not convert
        values = lib.maybe_convert_objects(values, convert_timedelta=timedelta)

    if numeric and is_object_dtype(values.dtype):
        try:
            converted = lib.maybe_convert_numeric(values, set(),
                                                  coerce_numeric=True)
            # If all NaNs, then do not-alter
            values = converted if not isnull(converted).all() else values
            values = values.copy() if copy else values
        except:
            pass

    return values
Exemplo n.º 18
0
    def test_maybe_convert_numeric_infinities(self):
        # see gh-13274
        infinities = ["inf", "inF", "iNf", "Inf", "iNF", "InF", "INf", "INF"]
        na_values = set(["", "NULL", "nan"])

        pos = np.array(["inf"], dtype=np.float64)
        neg = np.array(["-inf"], dtype=np.float64)

        msg = "Unable to parse string"

        for infinity in infinities:
            for maybe_int in (True, False):
                out = lib.maybe_convert_numeric(np.array([infinity], dtype=object), na_values, maybe_int)
                tm.assert_numpy_array_equal(out, pos)

                out = lib.maybe_convert_numeric(np.array(["-" + infinity], dtype=object), na_values, maybe_int)
                tm.assert_numpy_array_equal(out, neg)

                out = lib.maybe_convert_numeric(np.array([u(infinity)], dtype=object), na_values, maybe_int)
                tm.assert_numpy_array_equal(out, pos)

                out = lib.maybe_convert_numeric(np.array(["+" + infinity], dtype=object), na_values, maybe_int)
                tm.assert_numpy_array_equal(out, pos)

                # too many characters
                with tm.assertRaisesRegexp(ValueError, msg):
                    lib.maybe_convert_numeric(np.array(["foo_" + infinity], dtype=object), na_values, maybe_int)
Exemplo n.º 19
0
def _convert_types(values, na_values):
    na_count = 0
    if issubclass(values.dtype.type, (np.number, np.bool_)):
        mask = lib.ismember(values, na_values)
        na_count = mask.sum()
        if na_count > 0:
            if com.is_integer_dtype(values):
                values = values.astype(np.float64)
            np.putmask(values, mask, np.nan)
        return values, na_count

    try:
        result = lib.maybe_convert_numeric(values, na_values)
    except Exception:
        na_count = lib.sanitize_objects(values, na_values)
        result = values

    if result.dtype == np.object_:
        result = lib.maybe_convert_bool(values)

    return result, na_count
Exemplo n.º 20
0
def _convert_types(values, na_values):
    na_count = 0
    if issubclass(values.dtype.type, (np.number, np.bool_)):
        mask = lib.ismember(values, na_values)
        na_count = mask.sum()
        if na_count > 0:
            if com.is_integer_dtype(values):
                values = values.astype(np.float64)
            np.putmask(values, mask, np.nan)
        return values, na_count

    try:
        result = lib.maybe_convert_numeric(values, na_values, False)
    except Exception:
        na_count = lib.sanitize_objects(values, na_values, False)
        result = values

    if result.dtype == np.object_:
        result = lib.maybe_convert_bool(values)

    return result, na_count
Exemplo n.º 21
0
    def test_maybe_convert_numeric_infinities(self):
        # see gh-13274
        infinities = ['inf', 'inF', 'iNf', 'Inf',
                      'iNF', 'InF', 'INf', 'INF']
        na_values = set(['', 'NULL', 'nan'])

        pos = np.array(['inf'], dtype=np.float64)
        neg = np.array(['-inf'], dtype=np.float64)

        msg = "Unable to parse string"

        for infinity in infinities:
            for maybe_int in (True, False):
                out = lib.maybe_convert_numeric(
                    np.array([infinity], dtype=object),
                    na_values, maybe_int)
                tm.assert_numpy_array_equal(out, pos)

                out = lib.maybe_convert_numeric(
                    np.array(['-' + infinity], dtype=object),
                    na_values, maybe_int)
                tm.assert_numpy_array_equal(out, neg)

                out = lib.maybe_convert_numeric(
                    np.array([u(infinity)], dtype=object),
                    na_values, maybe_int)
                tm.assert_numpy_array_equal(out, pos)

                out = lib.maybe_convert_numeric(
                    np.array(['+' + infinity], dtype=object),
                    na_values, maybe_int)
                tm.assert_numpy_array_equal(out, pos)

                # too many characters
                with tm.assertRaisesRegexp(ValueError, msg):
                    lib.maybe_convert_numeric(
                        np.array(['foo_' + infinity], dtype=object),
                        na_values, maybe_int)
Exemplo n.º 22
0
    def test_maybe_convert_numeric_infinities(self):
        # see gh-13274
        infinities = ['inf', 'inF', 'iNf', 'Inf',
                      'iNF', 'InF', 'INf', 'INF']
        na_values = set(['', 'NULL', 'nan'])

        pos = np.array(['inf'], dtype=np.float64)
        neg = np.array(['-inf'], dtype=np.float64)

        msg = "Unable to parse string"

        for infinity in infinities:
            for maybe_int in (True, False):
                out = lib.maybe_convert_numeric(
                    np.array([infinity], dtype=object),
                    na_values, maybe_int)
                tm.assert_numpy_array_equal(out, pos)

                out = lib.maybe_convert_numeric(
                    np.array(['-' + infinity], dtype=object),
                    na_values, maybe_int)
                tm.assert_numpy_array_equal(out, neg)

                out = lib.maybe_convert_numeric(
                    np.array([u(infinity)], dtype=object),
                    na_values, maybe_int)
                tm.assert_numpy_array_equal(out, pos)

                out = lib.maybe_convert_numeric(
                    np.array(['+' + infinity], dtype=object),
                    na_values, maybe_int)
                tm.assert_numpy_array_equal(out, pos)

                # too many characters
                with tm.assertRaisesRegexp(ValueError, msg):
                    lib.maybe_convert_numeric(
                        np.array(['foo_' + infinity], dtype=object),
                        na_values, maybe_int)
Exemplo n.º 23
0
 def test_convert_non_hashable(self):
     # GH13324
     # make sure that we are handing non-hashables
     arr = np.array([[10.0, 2], 1.0, 'apple'])
     result = lib.maybe_convert_numeric(arr, set(), False, True)
     tm.assert_numpy_array_equal(result, np.array([np.nan, 1.0, np.nan]))
Exemplo n.º 24
0
 def test_scientific_no_exponent(self):
     # See PR 12215
     arr = np.array(["42E", "2E", "99e", "6e"], dtype="O")
     result = lib.maybe_convert_numeric(arr, set(), False, True)
     self.assertTrue(np.all(np.isnan(result)))
Exemplo n.º 25
0
def to_numeric(arg, errors='raise'):
    """
    Convert argument to a numeric type.

    Parameters
    ----------
    arg : list, tuple, 1-d array, or Series
    errors : {'ignore', 'raise', 'coerce'}, default 'raise'
        - If 'raise', then invalid parsing will raise an exception
        - If 'coerce', then invalid parsing will be set as NaN
        - If 'ignore', then invalid parsing will return the input

    Returns
    -------
    ret : numeric if parsing succeeded.
        Return type depends on input.  Series if Series, otherwise ndarray

    Examples
    --------
    Take separate series and convert to numeric, coercing when told to

    >>> import pandas as pd
    >>> s = pd.Series(['1.0', '2', -3])
    >>> pd.to_numeric(s)
    >>> s = pd.Series(['apple', '1.0', '2', -3])
    >>> pd.to_numeric(s, errors='ignore')
    >>> pd.to_numeric(s, errors='coerce')
    """
    is_series = False
    is_index = False
    is_scalar = False

    if isinstance(arg, pd.Series):
        is_series = True
        values = arg.values
    elif isinstance(arg, pd.Index):
        is_index = True
        values = arg.asi8
        if values is None:
            values = arg.values
    elif isinstance(arg, (list, tuple)):
        values = np.array(arg, dtype='O')
    elif np.isscalar(arg):
        if com.is_number(arg):
            return arg
        is_scalar = True
        values = np.array([arg], dtype='O')
    elif getattr(arg, 'ndim', 1) > 1:
        raise TypeError('arg must be a list, tuple, 1-d array, or Series')
    else:
        values = arg

    if com.is_numeric_dtype(values):
        pass
    elif com.is_datetime_or_timedelta_dtype(values):
        values = values.astype(np.int64)
    else:
        values = com._ensure_object(values)
        coerce_numeric = False if errors in ('ignore', 'raise') else True

        try:
            values = lib.maybe_convert_numeric(values,
                                               set(),
                                               coerce_numeric=coerce_numeric)
        except:
            if errors == 'raise':
                raise

    if is_series:
        return pd.Series(values, index=arg.index, name=arg.name)
    elif is_index:
        # because we want to coerce to numeric if possible,
        # do not use _shallow_copy_with_infer
        return Index(values, name=arg.name)
    elif is_scalar:
        return values[0]
    else:
        return values
Exemplo n.º 26
0
 def test_convert_non_hashable(self):
     # GH13324
     # make sure that we are handing non-hashables
     arr = np.array([[10.0, 2], 1.0, 'apple'])
     result = lib.maybe_convert_numeric(arr, set(), False, True)
     tm.assert_numpy_array_equal(result, np.array([np.nan, 1.0, np.nan]))
Exemplo n.º 27
0
 def test_scientific_no_exponent(self):
     # See PR 12215
     arr = np.array(['42E', '2E', '99e', '6e'], dtype='O')
     result = lib.maybe_convert_numeric(arr, set(), False, True)
     self.assertTrue(np.all(np.isnan(result)))
Exemplo n.º 28
0
def to_numeric(arg, errors='raise'):
    """
    Convert argument to a numeric type.

    Parameters
    ----------
    arg : list, tuple, 1-d array, or Series
    errors : {'ignore', 'raise', 'coerce'}, default 'raise'
        - If 'raise', then invalid parsing will raise an exception
        - If 'coerce', then invalid parsing will be set as NaN
        - If 'ignore', then invalid parsing will return the input

    Returns
    -------
    ret : numeric if parsing succeeded.
        Return type depends on input.  Series if Series, otherwise ndarray

    Examples
    --------
    Take separate series and convert to numeric, coercing when told to

    >>> import pandas as pd
    >>> s = pd.Series(['1.0', '2', -3])
    >>> pd.to_numeric(s)
    >>> s = pd.Series(['apple', '1.0', '2', -3])
    >>> pd.to_numeric(s, errors='ignore')
    >>> pd.to_numeric(s, errors='coerce')
    """
    is_series = False
    is_index = False
    is_scalar = False

    if isinstance(arg, pd.Series):
        is_series = True
        values = arg.values
    elif isinstance(arg, pd.Index):
        is_index = True
        values = arg.asi8
        if values is None:
            values = arg.values
    elif isinstance(arg, (list, tuple)):
        values = np.array(arg, dtype='O')
    elif np.isscalar(arg):
        if com.is_number(arg):
            return arg
        is_scalar = True
        values = np.array([arg], dtype='O')
    elif getattr(arg, 'ndim', 1) > 1:
        raise TypeError('arg must be a list, tuple, 1-d array, or Series')
    else:
        values = arg

    if com.is_numeric_dtype(values):
        pass
    elif com.is_datetime_or_timedelta_dtype(values):
        values = values.astype(np.int64)
    else:
        values = com._ensure_object(values)
        coerce_numeric = False if errors in ('ignore', 'raise') else True

        try:
            values = lib.maybe_convert_numeric(values, set(),
                                               coerce_numeric=coerce_numeric)
        except:
            if errors == 'raise':
                raise

    if is_series:
        return pd.Series(values, index=arg.index, name=arg.name)
    elif is_index:
        # because we want to coerce to numeric if possible,
        # do not use _shallow_copy_with_infer
        return Index(values, name=arg.name)
    elif is_scalar:
        return values[0]
    else:
        return values
Exemplo n.º 29
0
def to_numeric(arg, errors='raise', downcast=None):
    """
    Convert argument to a numeric type.

    Parameters
    ----------
    arg : list, tuple, 1-d array, or Series
    errors : {'ignore', 'raise', 'coerce'}, default 'raise'
        - If 'raise', then invalid parsing will raise an exception
        - If 'coerce', then invalid parsing will be set as NaN
        - If 'ignore', then invalid parsing will return the input
    downcast : {'integer', 'signed', 'unsigned', 'float'} , default None
        If not None, and if the data has been successfully cast to a
        numerical dtype (or if the data was numeric to begin with),
        downcast that resulting data to the smallest numerical dtype
        possible according to the following rules:

        - 'integer' or 'signed': smallest signed int dtype (min.: np.int8)
        - 'unsigned': smallest unsigned int dtype (min.: np.uint8)
        - 'float': smallest float dtype (min.: np.float32)

        As this behaviour is separate from the core conversion to
        numeric values, any errors raised during the downcasting
        will be surfaced regardless of the value of the 'errors' input.

        In addition, downcasting will only occur if the size
        of the resulting data's dtype is strictly larger than
        the dtype it is to be cast to, so if none of the dtypes
        checked satisfy that specification, no downcasting will be
        performed on the data.

        .. versionadded:: 0.19.0

    Returns
    -------
    ret : numeric if parsing succeeded.
        Return type depends on input.  Series if Series, otherwise ndarray

    Examples
    --------
    Take separate series and convert to numeric, coercing when told to

    >>> import pandas as pd
    >>> s = pd.Series(['1.0', '2', -3])
    >>> pd.to_numeric(s)
    0    1.0
    1    2.0
    2   -3.0
    dtype: float64
    >>> pd.to_numeric(s, downcast='float')
    0    1.0
    1    2.0
    2   -3.0
    dtype: float32
    >>> pd.to_numeric(s, downcast='signed')
    0    1
    1    2
    2   -3
    dtype: int8
    >>> s = pd.Series(['apple', '1.0', '2', -3])
    >>> pd.to_numeric(s, errors='ignore')
    0    apple
    1      1.0
    2        2
    3       -3
    dtype: object
    >>> pd.to_numeric(s, errors='coerce')
    0    NaN
    1    1.0
    2    2.0
    3   -3.0
    dtype: float64
    """
    if downcast not in (None, 'integer', 'signed', 'unsigned', 'float'):
        raise ValueError('invalid downcasting method provided')

    is_series = False
    is_index = False
    is_scalar = False

    if isinstance(arg, pd.Series):
        is_series = True
        values = arg.values
    elif isinstance(arg, pd.Index):
        is_index = True
        values = arg.asi8
        if values is None:
            values = arg.values
    elif isinstance(arg, (list, tuple)):
        values = np.array(arg, dtype='O')
    elif np.isscalar(arg):
        if is_number(arg):
            return arg
        is_scalar = True
        values = np.array([arg], dtype='O')
    elif getattr(arg, 'ndim', 1) > 1:
        raise TypeError('arg must be a list, tuple, 1-d array, or Series')
    else:
        values = arg

    try:
        if is_numeric_dtype(values):
            pass
        elif is_datetime_or_timedelta_dtype(values):
            values = values.astype(np.int64)
        else:
            values = _ensure_object(values)
            coerce_numeric = False if errors in ('ignore', 'raise') else True
            values = lib.maybe_convert_numeric(values,
                                               set(),
                                               coerce_numeric=coerce_numeric)

    except Exception:
        if errors == 'raise':
            raise

    # attempt downcast only if the data has been successfully converted
    # to a numerical dtype and if a downcast method has been specified
    if downcast is not None and is_numeric_dtype(values):
        typecodes = None

        if downcast in ('integer', 'signed'):
            typecodes = np.typecodes['Integer']
        elif downcast == 'unsigned' and np.min(values) > 0:
            typecodes = np.typecodes['UnsignedInteger']
        elif downcast == 'float':
            typecodes = np.typecodes['Float']

            # pandas support goes only to np.float32,
            # as float dtypes smaller than that are
            # extremely rare and not well supported
            float_32_char = np.dtype(np.float32).char
            float_32_ind = typecodes.index(float_32_char)
            typecodes = typecodes[float_32_ind:]

        if typecodes is not None:
            # from smallest to largest
            for dtype in typecodes:
                if np.dtype(dtype).itemsize < values.dtype.itemsize:
                    values = _possibly_downcast_to_dtype(values, dtype)

                    # successful conversion
                    if values.dtype == dtype:
                        break

    if is_series:
        return pd.Series(values, index=arg.index, name=arg.name)
    elif is_index:
        # because we want to coerce to numeric if possible,
        # do not use _shallow_copy_with_infer
        return Index(values, name=arg.name)
    elif is_scalar:
        return values[0]
    else:
        return values
Exemplo n.º 30
0
def to_numeric(arg, errors='raise', downcast=None):
    """
    Convert argument to a numeric type.

    Parameters
    ----------
    arg : list, tuple, 1-d array, or Series
    errors : {'ignore', 'raise', 'coerce'}, default 'raise'
        - If 'raise', then invalid parsing will raise an exception
        - If 'coerce', then invalid parsing will be set as NaN
        - If 'ignore', then invalid parsing will return the input
    downcast : {'integer', 'signed', 'unsigned', 'float'} , default None
        If not None, and if the data has been successfully cast to a
        numerical dtype (or if the data was numeric to begin with),
        downcast that resulting data to the smallest numerical dtype
        possible according to the following rules:

        - 'integer' or 'signed': smallest signed int dtype (min.: np.int8)
        - 'unsigned': smallest unsigned int dtype (min.: np.uint8)
        - 'float': smallest float dtype (min.: np.float32)

        As this behaviour is separate from the core conversion to
        numeric values, any errors raised during the downcasting
        will be surfaced regardless of the value of the 'errors' input.

        In addition, downcasting will only occur if the size
        of the resulting data's dtype is strictly larger than
        the dtype it is to be cast to, so if none of the dtypes
        checked satisfy that specification, no downcasting will be
        performed on the data.

        .. versionadded:: 0.19.0

    Returns
    -------
    ret : numeric if parsing succeeded.
        Return type depends on input.  Series if Series, otherwise ndarray

    Examples
    --------
    Take separate series and convert to numeric, coercing when told to

    >>> import pandas as pd
    >>> s = pd.Series(['1.0', '2', -3])
    >>> pd.to_numeric(s)
    0    1.0
    1    2.0
    2   -3.0
    dtype: float64
    >>> pd.to_numeric(s, downcast='float')
    0    1.0
    1    2.0
    2   -3.0
    dtype: float32
    >>> pd.to_numeric(s, downcast='signed')
    0    1
    1    2
    2   -3
    dtype: int8
    >>> s = pd.Series(['apple', '1.0', '2', -3])
    >>> pd.to_numeric(s, errors='ignore')
    0    apple
    1      1.0
    2        2
    3       -3
    dtype: object
    >>> pd.to_numeric(s, errors='coerce')
    0    NaN
    1    1.0
    2    2.0
    3   -3.0
    dtype: float64
    """
    if downcast not in (None, 'integer', 'signed', 'unsigned', 'float'):
        raise ValueError('invalid downcasting method provided')

    is_series = False
    is_index = False
    is_scalar = False

    if isinstance(arg, pd.Series):
        is_series = True
        values = arg.values
    elif isinstance(arg, pd.Index):
        is_index = True
        values = arg.asi8
        if values is None:
            values = arg.values
    elif isinstance(arg, (list, tuple)):
        values = np.array(arg, dtype='O')
    elif np.isscalar(arg):
        if is_number(arg):
            return arg
        is_scalar = True
        values = np.array([arg], dtype='O')
    elif getattr(arg, 'ndim', 1) > 1:
        raise TypeError('arg must be a list, tuple, 1-d array, or Series')
    else:
        values = arg

    try:
        if is_numeric_dtype(values):
            pass
        elif is_datetime_or_timedelta_dtype(values):
            values = values.astype(np.int64)
        else:
            values = _ensure_object(values)
            coerce_numeric = False if errors in ('ignore', 'raise') else True
            values = lib.maybe_convert_numeric(values, set(),
                                               coerce_numeric=coerce_numeric)

    except Exception:
        if errors == 'raise':
            raise

    # attempt downcast only if the data has been successfully converted
    # to a numerical dtype and if a downcast method has been specified
    if downcast is not None and is_numeric_dtype(values):
        typecodes = None

        if downcast in ('integer', 'signed'):
            typecodes = np.typecodes['Integer']
        elif downcast == 'unsigned' and np.min(values) > 0:
            typecodes = np.typecodes['UnsignedInteger']
        elif downcast == 'float':
            typecodes = np.typecodes['Float']

            # pandas support goes only to np.float32,
            # as float dtypes smaller than that are
            # extremely rare and not well supported
            float_32_char = np.dtype(np.float32).char
            float_32_ind = typecodes.index(float_32_char)
            typecodes = typecodes[float_32_ind:]

        if typecodes is not None:
            # from smallest to largest
            for dtype in typecodes:
                if np.dtype(dtype).itemsize < values.dtype.itemsize:
                    values = _possibly_downcast_to_dtype(
                        values, dtype)

                    # successful conversion
                    if values.dtype == dtype:
                        break

    if is_series:
        return pd.Series(values, index=arg.index, name=arg.name)
    elif is_index:
        # because we want to coerce to numeric if possible,
        # do not use _shallow_copy_with_infer
        return Index(values, name=arg.name)
    elif is_scalar:
        return values[0]
    else:
        return values
Exemplo n.º 31
0
def _possibly_convert_objects(values,
                              convert_dates=True,
                              convert_numeric=True,
                              convert_timedeltas=True,
                              copy=True):
    """ if we have an object dtype, try to coerce dates and/or numbers """

    # if we have passed in a list or scalar
    if isinstance(values, (list, tuple)):
        values = np.array(values, dtype=np.object_)
    if not hasattr(values, 'dtype'):
        values = np.array([values], dtype=np.object_)

    # convert dates
    if convert_dates and values.dtype == np.object_:

        # we take an aggressive stance and convert to datetime64[ns]
        if convert_dates == 'coerce':
            new_values = _possibly_cast_to_datetime(values,
                                                    'M8[ns]',
                                                    errors='coerce')

            # if we are all nans then leave me alone
            if not isnull(new_values).all():
                values = new_values

        else:
            values = lib.maybe_convert_objects(values,
                                               convert_datetime=convert_dates)

    # convert timedeltas
    if convert_timedeltas and values.dtype == np.object_:

        if convert_timedeltas == 'coerce':
            from pandas.tseries.timedeltas import to_timedelta
            new_values = to_timedelta(values, coerce=True)

            # if we are all nans then leave me alone
            if not isnull(new_values).all():
                values = new_values

        else:
            values = lib.maybe_convert_objects(
                values, convert_timedelta=convert_timedeltas)

    # convert to numeric
    if values.dtype == np.object_:
        if convert_numeric:
            try:
                new_values = lib.maybe_convert_numeric(values,
                                                       set(),
                                                       coerce_numeric=True)

                # if we are all nans then leave me alone
                if not isnull(new_values).all():
                    values = new_values

            except:
                pass
        else:
            # soft-conversion
            values = lib.maybe_convert_objects(values)

    values = values.copy() if copy else values

    return values
Exemplo n.º 32
0
 def test_scientific_no_exponent(self):
     # See PR 12215
     arr = np.array(['42E', '2E', '99e', '6e'], dtype='O')
     result = lib.maybe_convert_numeric(arr, set(), False, True)
     self.assertTrue(np.all(np.isnan(result)))