Exemplo n.º 1
0
def _isfinite(values):
    if is_datetime_or_timedelta_dtype(values):
        return isna(values)
    if (is_complex_dtype(values) or is_float_dtype(values) or
            is_integer_dtype(values) or is_bool_dtype(values)):
        return ~np.isfinite(values)
    return ~np.isfinite(values.astype('float64'))
Exemplo n.º 2
0
def _get_values(values, skipna, fill_value=None, fill_value_typ=None,
                isfinite=False, copy=True, mask=None, compute_mask=True):
    """ utility to get the values view, mask, dtype
    if necessary copy and mask using the specified fill_value
    copy = True will force the copy
    """
    if skipna:
        compute_mask = True

    if is_datetime64tz_dtype(values):
        # com.values_from_object returns M8[ns] dtype instead of tz-aware,
        #  so this case must be handled separately from the rest
        dtype = values.dtype
        values = getattr(values, "_values", values)
    else:
        values = com.values_from_object(values)
        dtype = values.dtype

    if mask is None and compute_mask:
        if isfinite:
            mask = _isfinite(values)
        else:
            mask = isna(values)

    if is_datetime_or_timedelta_dtype(values) or is_datetime64tz_dtype(values):
        # changing timedelta64/datetime64 to int64 needs to happen after
        #  finding `mask` above
        values = getattr(values, "asi8", values)
        values = values.view(np.int64)

    dtype_ok = _na_ok_dtype(dtype)

    # get our fill value (in case we need to provide an alternative
    # dtype for it)
    fill_value = _get_fill_value(dtype, fill_value=fill_value,
                                 fill_value_typ=fill_value_typ)

    if skipna:
        if copy:
            values = values.copy()
        if dtype_ok:
            np.putmask(values, mask, fill_value)

        # promote if needed
        else:
            values, changed = maybe_upcast_putmask(values, mask, fill_value)

    elif copy:
        values = values.copy()

    # return a platform independent precision dtype
    dtype_max = dtype
    if is_integer_dtype(dtype) or is_bool_dtype(dtype):
        dtype_max = np.int64
    elif is_float_dtype(dtype):
        dtype_max = np.float64

    return values, mask, dtype, dtype_max, fill_value
Exemplo n.º 3
0
def test_is_datetime_or_timedelta_dtype():
    assert not com.is_datetime_or_timedelta_dtype(int)
    assert not com.is_datetime_or_timedelta_dtype(str)
    assert not com.is_datetime_or_timedelta_dtype(pd.Series([1, 2]))
    assert not com.is_datetime_or_timedelta_dtype(np.array(['a', 'b']))
    assert not com.is_datetime_or_timedelta_dtype(
        DatetimeTZDtype("ns", "US/Eastern"))

    assert com.is_datetime_or_timedelta_dtype(np.datetime64)
    assert com.is_datetime_or_timedelta_dtype(np.timedelta64)
    assert com.is_datetime_or_timedelta_dtype(
        np.array([], dtype=np.timedelta64))
    assert com.is_datetime_or_timedelta_dtype(
        np.array([], dtype=np.datetime64))
Exemplo n.º 4
0
def test_is_datetime_or_timedelta_dtype():
    assert not com.is_datetime_or_timedelta_dtype(int)
    assert not com.is_datetime_or_timedelta_dtype(str)
    assert not com.is_datetime_or_timedelta_dtype(pd.Series([1, 2]))
    assert not com.is_datetime_or_timedelta_dtype(np.array(['a', 'b']))

    # TODO(jreback), this is sligthly suspect
    assert not com.is_datetime_or_timedelta_dtype(
        DatetimeTZDtype("ns", "US/Eastern"))

    assert com.is_datetime_or_timedelta_dtype(np.datetime64)
    assert com.is_datetime_or_timedelta_dtype(np.timedelta64)
    assert com.is_datetime_or_timedelta_dtype(
        np.array([], dtype=np.timedelta64))
    assert com.is_datetime_or_timedelta_dtype(
        np.array([], dtype=np.datetime64))
Exemplo n.º 5
0
def _bn_ok_dtype(dt, name):
    # Bottleneck chokes on datetime64
    if (not is_object_dtype(dt) and not is_datetime_or_timedelta_dtype(dt)):

        # bottleneck does not properly upcast during the sum
        # so can overflow
        if name == 'nansum':
            if dt.itemsize < 8:
                return False

        return True
    return False
Exemplo n.º 6
0
def _get_next_label(label):
    dtype = getattr(label, 'dtype', type(label))
    if isinstance(label, (Timestamp, Timedelta)):
        dtype = 'datetime64'
    if is_datetime_or_timedelta_dtype(dtype) or is_datetime64tz_dtype(dtype):
        return label + np.timedelta64(1, 'ns')
    elif is_integer_dtype(dtype):
        return label + 1
    elif is_float_dtype(dtype):
        return np.nextafter(label, np.infty)
    else:
        raise TypeError('cannot determine next label for type {typ!r}'
                        .format(typ=type(label)))
Exemplo n.º 7
0
def _get_prev_label(label):
    dtype = getattr(label, 'dtype', type(label))
    if isinstance(label, (Timestamp, Timedelta)):
        dtype = 'datetime64'
    if is_datetime_or_timedelta_dtype(dtype):
        return label - np.timedelta64(1, 'ns')
    elif is_integer_dtype(dtype):
        return label - 1
    elif is_float_dtype(dtype):
        return np.nextafter(label, -np.infty)
    else:
        raise TypeError('cannot determine next label for type %r'
                        % type(label))
Exemplo n.º 8
0
 def astype(self, dtype, copy=True):
     if is_object_dtype(dtype):
         return self._box_values_as_index()
     elif is_string_dtype(dtype) and not is_categorical_dtype(dtype):
         return Index(self.format(), name=self.name, dtype=object)
     elif is_integer_dtype(dtype):
         return Index(self.values.astype('i8', copy=copy), name=self.name,
                      dtype='i8')
     elif (is_datetime_or_timedelta_dtype(dtype) and
           not is_dtype_equal(self.dtype, dtype)) or is_float_dtype(dtype):
         # disallow conversion between datetime/timedelta,
         # and conversions for any datetimelike to float
         msg = 'Cannot cast {name} to dtype {dtype}'
         raise TypeError(msg.format(name=type(self).__name__, dtype=dtype))
     return super(DatetimeIndexOpsMixin, self).astype(dtype, copy=copy)
Exemplo n.º 9
0
def _bn_ok_dtype(dt, name):
    # Bottleneck chokes on datetime64
    if (not is_object_dtype(dt) and not is_datetime_or_timedelta_dtype(dt)):

        # GH 15507
        # bottleneck does not properly upcast during the sum
        # so can overflow

        # GH 9422
        # further we also want to preserve NaN when all elements
        # are NaN, unlinke bottleneck/numpy which consider this
        # to be 0
        if name in ['nansum', 'nanprod']:
            return False

        return True
    return False
Exemplo n.º 10
0
 def astype(self, dtype, copy=True):
     if is_object_dtype(dtype):
         return self._box_values_as_index()
     elif is_string_dtype(dtype) and not is_categorical_dtype(dtype):
         return Index(self.format(), name=self.name, dtype=object)
     elif is_integer_dtype(dtype):
         # TODO(DatetimeArray): use self._values here.
         # Can't use ._values currently, because that returns a
         # DatetimeIndex, which throws us in an infinite loop.
         return Index(self.values.astype('i8', copy=copy), name=self.name,
                      dtype='i8')
     elif (is_datetime_or_timedelta_dtype(dtype) and
           not is_dtype_equal(self.dtype, dtype)) or is_float_dtype(dtype):
         # disallow conversion between datetime/timedelta,
         # and conversions for any datetimelike to float
         msg = 'Cannot cast {name} to dtype {dtype}'
         raise TypeError(msg.format(name=type(self).__name__, dtype=dtype))
     return super(DatetimeIndexOpsMixin, self).astype(dtype, copy=copy)
Exemplo n.º 11
0
def _convert_bin_to_datelike_type(bins, dtype):
    """
    Convert bins to a DatetimeIndex or TimedeltaIndex if the orginal dtype is
    datelike

    Parameters
    ----------
    bins : list-like of bins
    dtype : dtype of data

    Returns
    -------
    bins : Array-like of bins, DatetimeIndex or TimedeltaIndex if dtype is
           datelike
    """
    if is_datetime64tz_dtype(dtype) or is_datetime_or_timedelta_dtype(dtype):
        bins = Index(bins.astype(np.int64), dtype=dtype)
    return bins
Exemplo n.º 12
0
    def astype(self, dtype, copy=True):
        # TODO: Figure out something better here...
        # We have DatetimeLikeArrayMixin ->
        #     super(...), which ends up being... DatetimeIndexOpsMixin?
        # this is complicated.
        # need a pandas_astype(arr, dtype).
        from pandas import Categorical

        dtype = pandas_dtype(dtype)

        if is_object_dtype(dtype):
            return np.asarray(self, dtype=object)
        elif is_string_dtype(dtype) and not is_categorical_dtype(dtype):
            return self._format_native_types()
        elif is_integer_dtype(dtype):
            values = self._data

            if values.dtype != dtype:
                # int32 vs. int64
                values = values.astype(dtype)

            elif copy:
                values = values.copy()

            return values
        elif (is_datetime_or_timedelta_dtype(dtype) and
              not is_dtype_equal(self.dtype, dtype)) or is_float_dtype(dtype):
            # disallow conversion between datetime/timedelta,
            # and conversions for any datetimelike to float
            msg = 'Cannot cast {name} to dtype {dtype}'
            raise TypeError(msg.format(name=type(self).__name__, dtype=dtype))
        elif is_categorical_dtype(dtype):
            return Categorical(self, dtype=dtype)
        elif is_period_dtype(dtype):
            return self.asfreq(dtype.freq)
        else:
            return np.asarray(self, dtype=dtype)
Exemplo n.º 13
0
def test_maybe_promote_any_numpy_dtype_with_na(any_numpy_dtype_reduced,
                                               fill_value, box):
    dtype = np.dtype(any_numpy_dtype_reduced)
    boxed, box_dtype = box  # read from parametrized fixture

    if (dtype == bytes and not boxed and fill_value is not None
            and fill_value is not NaT):
        pytest.xfail("does not upcast to object")
    elif dtype == "uint64" and not boxed and fill_value == iNaT:
        pytest.xfail("does not upcast correctly")
    # below: opinionated that iNaT should be interpreted as missing value
    elif (not boxed and (is_float_dtype(dtype) or is_complex_dtype(dtype))
          and fill_value == iNaT):
        pytest.xfail("does not cast to missing value marker correctly")
    elif (is_string_dtype(dtype)
          or dtype == bool) and not boxed and fill_value == iNaT:
        pytest.xfail("does not cast to missing value marker correctly")

    if is_integer_dtype(dtype) and dtype == "uint64" and fill_value == iNaT:
        # uint64 + negative int casts to object; iNaT is considered as missing
        expected_dtype = np.dtype(object)
        exp_val_for_scalar = np.nan
    elif is_integer_dtype(dtype) and fill_value == iNaT:
        # other integer + iNaT casts to int64
        expected_dtype = np.int64
        exp_val_for_scalar = iNaT
    elif is_integer_dtype(dtype) and fill_value is not NaT:
        # integer + other missing value (np.nan / None) casts to float
        expected_dtype = np.float64
        exp_val_for_scalar = np.nan
    elif is_object_dtype(dtype) and (fill_value == iNaT or fill_value is NaT):
        # inserting into object does not cast the value
        # but *does* cast None to np.nan
        expected_dtype = np.dtype(object)
        exp_val_for_scalar = fill_value
    elif is_datetime_or_timedelta_dtype(dtype):
        # datetime / timedelta cast all missing values to iNaT
        expected_dtype = dtype
        exp_val_for_scalar = iNaT
    elif fill_value is NaT:
        # NaT upcasts everything that's not datetime/timedelta to object
        expected_dtype = np.dtype(object)
        exp_val_for_scalar = NaT
    elif is_float_dtype(dtype) or is_complex_dtype(dtype):
        # float / complex + missing value (!= NaT) stays the same
        expected_dtype = dtype
        exp_val_for_scalar = np.nan
    else:
        # all other cases cast to object, and use np.nan as missing value
        expected_dtype = np.dtype(object)
        exp_val_for_scalar = np.nan

    # array case has same expected_dtype; but returns corresponding na-marker
    if is_integer_dtype(expected_dtype):
        # integers cannot hold NaNs; maybe_promote_with_array returns None
        exp_val_for_array = None
    elif is_datetime_or_timedelta_dtype(expected_dtype):
        exp_val_for_array = iNaT
    else:  # expected_dtype = float / complex / object
        exp_val_for_array = np.nan

    _check_promote(
        dtype,
        fill_value,
        boxed,
        box_dtype,
        expected_dtype,
        exp_val_for_scalar,
        exp_val_for_array,
    )
Exemplo n.º 14
0
def ndarray_to_mgr(values, index, columns, dtype: DtypeObj | None, copy: bool,
                   typ: str) -> Manager:
    # used in DataFrame.__init__
    # input must be a ndarray, list, Series, Index, ExtensionArray

    if isinstance(values, ABCSeries):
        if columns is None:
            if values.name is not None:
                columns = Index([values.name])
        if index is None:
            index = values.index
        else:
            values = values.reindex(index)

        # zero len case (GH #2234)
        if not len(values) and columns is not None and len(columns):
            values = np.empty((0, 1), dtype=object)

    vdtype = getattr(values, "dtype", None)
    if is_1d_only_ea_dtype(vdtype) or isinstance(dtype, ExtensionDtype):
        # GH#19157

        if isinstance(values, np.ndarray) and values.ndim > 1:
            # GH#12513 a EA dtype passed with a 2D array, split into
            #  multiple EAs that view the values
            values = [values[:, n] for n in range(values.shape[1])]
        else:
            values = [values]

        if columns is None:
            columns = Index(range(len(values)))

        return arrays_to_mgr(values,
                             columns,
                             index,
                             columns,
                             dtype=dtype,
                             typ=typ)

    if is_extension_array_dtype(vdtype) and not is_1d_only_ea_dtype(vdtype):
        # i.e. Datetime64TZ
        values = extract_array(values, extract_numpy=True)
        if copy:
            values = values.copy()
        if values.ndim == 1:
            values = values.reshape(-1, 1)

    else:
        # by definition an array here
        # the dtypes will be coerced to a single dtype
        values = _prep_ndarray(values, copy=copy)

    if dtype is not None and not is_dtype_equal(values.dtype, dtype):
        shape = values.shape
        flat = values.ravel()

        if not is_integer_dtype(dtype):
            # TODO: skipping integer_dtype is needed to keep the tests passing,
            #  not clear it is correct
            # Note: we really only need _try_cast, but keeping to exposed funcs
            values = sanitize_array(flat,
                                    None,
                                    dtype=dtype,
                                    copy=copy,
                                    raise_cast_failure=True)
        else:
            try:
                values = construct_1d_ndarray_preserving_na(flat,
                                                            dtype=dtype,
                                                            copy=False)
            except IntCastingNaNError:
                # following Series, we ignore the dtype and retain floating
                # values instead of casting nans to meaningless ints
                pass

        values = values.reshape(shape)

    # _prep_ndarray ensures that values.ndim == 2 at this point
    index, columns = _get_axes(values.shape[0],
                               values.shape[1],
                               index=index,
                               columns=columns)

    _check_values_indices_shape_match(values, index, columns)

    if typ == "array":

        if issubclass(values.dtype.type, str):
            values = np.array(values, dtype=object)

        if dtype is None and is_object_dtype(values.dtype):
            arrays = [
                ensure_wrapped_if_datetimelike(
                    maybe_infer_to_datetimelike(values[:, i].copy()))
                for i in range(values.shape[1])
            ]
        else:
            if is_datetime_or_timedelta_dtype(values.dtype):
                values = ensure_wrapped_if_datetimelike(values)
            arrays = [values[:, i].copy() for i in range(values.shape[1])]

        return ArrayManager(arrays, [index, columns], verify_integrity=False)

    values = values.T

    # if we don't have a dtype specified, then try to convert objects
    # on the entire block; this is to convert if we have datetimelike's
    # embedded in an object type
    if dtype is None and is_object_dtype(values.dtype):

        if values.ndim == 2 and values.shape[0] != 1:
            # transpose and separate blocks

            dtlike_vals = [maybe_infer_to_datetimelike(row) for row in values]
            dvals_list = [ensure_block_shape(dval, 2) for dval in dtlike_vals]

            # TODO: What about re-joining object columns?
            block_values = [
                new_block(dvals_list[n], placement=n, ndim=2)
                for n in range(len(dvals_list))
            ]

        else:
            datelike_vals = maybe_infer_to_datetimelike(values)
            nb = new_block(datelike_vals,
                           placement=slice(len(columns)),
                           ndim=2)
            block_values = [nb]
    else:
        nb = new_block(values, placement=slice(len(columns)), ndim=2)
        block_values = [nb]

    if len(columns) == 0:
        block_values = []

    return create_block_manager_from_blocks(block_values, [columns, index])
Exemplo n.º 15
0
    def _metric_aggs(
        self,
        query_compiler,
        func,
        field_types=None,
        numeric_only=None,
        keep_original_dtype=False,
    ):
        """
        Parameters
        ----------
        field_types: str, default None
            if `aggregatable` use only field_names whose fields in elasticseach are aggregatable.
            If `None`, use only numeric fields.
        keep_original_dtype : bool, default False
            if `True` the output values should keep the same domain as the input values, i.e. booleans should be booleans

        Returns
        -------
        pandas.Series
            Series containing results of `func` applied to the field_name(s)
        """
        query_params, post_processing = self._resolve_tasks(query_compiler)

        size = self._size(query_params, post_processing)
        if size is not None:
            raise NotImplementedError(
                f"Can not count field matches if size is set {size}")

        body = Query(query_params["query"])

        results = {}

        # some metrics aggs (including cardinality) work on all aggregatable fields
        # therefore we include an optional all parameter on operations
        # that call _metric_aggs
        if field_types == "aggregatable":
            aggregatable_field_names = (
                query_compiler._mappings.aggregatable_field_names())

            for field in aggregatable_field_names.keys():
                body.metric_aggs(field, func, field)

            response = query_compiler._client.search(
                index=query_compiler._index_pattern,
                size=0,
                body=body.to_search_body())

            # Results are of the form
            # "aggregations" : {
            #   "customer_full_name.keyword" : {
            #     "value" : 10
            #   }
            # }

            # map aggregatable (e.g. x.keyword) to field_name
            for key, value in aggregatable_field_names.items():
                results[value] = response["aggregations"][key]["value"]
        else:
            if numeric_only:
                (
                    pd_dtypes,
                    source_fields,
                    date_formats,
                ) = query_compiler._mappings.metric_source_fields(
                    include_bool=True)
            else:
                # The only non-numerics we support are bool and timestamps currently
                # strings are not supported by metric aggs in ES
                # TODO - sum isn't supported for Timestamp in pandas - although ES does attempt to do it
                (
                    pd_dtypes,
                    source_fields,
                    date_formats,
                ) = query_compiler._mappings.metric_source_fields(
                    include_bool=True, include_timestamp=True)

            for field in source_fields:
                body.metric_aggs(field, func, field)

            response = query_compiler._client.search(
                index=query_compiler._index_pattern,
                size=0,
                body=body.to_search_body())

            # Results are of the form
            # "aggregations" : {
            #   "AvgTicketPrice" : {
            #     "value" : 628.2536888148849
            #   },
            #   "timestamp": {
            #     "value": 1.5165624455644382E12,
            #     "value_as_string": "2018-01-21T19:20:45.564Z"
            #   }
            # }
            for pd_dtype, field, date_format in zip(pd_dtypes, source_fields,
                                                    date_formats):
                if is_datetime_or_timedelta_dtype(pd_dtype):
                    results[field] = elasticsearch_date_to_pandas_date(
                        response["aggregations"][field]["value_as_string"],
                        date_format)
                elif keep_original_dtype:
                    results[field] = pd_dtype.type(
                        response["aggregations"][field]["value"])
                else:
                    results[field] = response["aggregations"][field]["value"]

        # Return single value if this is a series
        # if len(numeric_source_fields) == 1:
        #    return np.float64(results[numeric_source_fields[0]])
        s = pd.Series(data=results, index=results.keys())

        return s
Exemplo n.º 16
0
def ndarray_to_mgr(values, index, columns, dtype: DtypeObj | None, copy: bool,
                   typ: str) -> Manager:
    # used in DataFrame.__init__
    # input must be a ndarray, list, Series, Index, ExtensionArray

    if isinstance(values, ABCSeries):
        if columns is None:
            if values.name is not None:
                columns = Index([values.name])
        if index is None:
            index = values.index
        else:
            values = values.reindex(index)

        # zero len case (GH #2234)
        if not len(values) and columns is not None and len(columns):
            values = np.empty((0, 1), dtype=object)

    vdtype = getattr(values, "dtype", None)
    if is_1d_only_ea_dtype(vdtype) or isinstance(dtype, ExtensionDtype):
        # GH#19157

        if isinstance(values, np.ndarray) and values.ndim > 1:
            # GH#12513 a EA dtype passed with a 2D array, split into
            #  multiple EAs that view the values
            values = [values[:, n] for n in range(values.shape[1])]
        else:
            values = [values]

        if columns is None:
            columns = Index(range(len(values)))
        else:
            columns = ensure_index(columns)

        return arrays_to_mgr(values, columns, index, dtype=dtype, typ=typ)

    elif is_extension_array_dtype(vdtype) and not is_1d_only_ea_dtype(vdtype):
        # i.e. Datetime64TZ
        values = extract_array(values, extract_numpy=True)
        if copy:
            values = values.copy()
        if values.ndim == 1:
            values = values.reshape(-1, 1)

    else:
        # by definition an array here
        # the dtypes will be coerced to a single dtype
        values = _prep_ndarray(values, copy=copy)

    if dtype is not None and not is_dtype_equal(values.dtype, dtype):
        shape = values.shape
        flat = values.ravel()

        # GH#40110 see similar check inside sanitize_array
        rcf = not (is_integer_dtype(dtype) and values.dtype.kind == "f")

        values = sanitize_array(flat,
                                None,
                                dtype=dtype,
                                copy=copy,
                                raise_cast_failure=rcf)

        values = values.reshape(shape)

    # _prep_ndarray ensures that values.ndim == 2 at this point
    index, columns = _get_axes(values.shape[0],
                               values.shape[1],
                               index=index,
                               columns=columns)

    _check_values_indices_shape_match(values, index, columns)

    if typ == "array":

        if issubclass(values.dtype.type, str):
            values = np.array(values, dtype=object)

        if dtype is None and is_object_dtype(values.dtype):
            arrays = [
                ensure_wrapped_if_datetimelike(
                    maybe_infer_to_datetimelike(values[:, i]))
                for i in range(values.shape[1])
            ]
        else:
            if is_datetime_or_timedelta_dtype(values.dtype):
                values = ensure_wrapped_if_datetimelike(values)
            arrays = [values[:, i] for i in range(values.shape[1])]

        return ArrayManager(arrays, [index, columns], verify_integrity=False)

    values = values.T

    # if we don't have a dtype specified, then try to convert objects
    # on the entire block; this is to convert if we have datetimelike's
    # embedded in an object type
    if dtype is None and is_object_dtype(values.dtype):
        obj_columns = list(values)
        maybe_datetime = [maybe_infer_to_datetimelike(x) for x in obj_columns]
        # don't convert (and copy) the objects if no type inference occurs
        if any(x is not y for x, y in zip(obj_columns, maybe_datetime)):
            dvals_list = [
                ensure_block_shape(dval, 2) for dval in maybe_datetime
            ]
            block_values = [
                new_block_2d(dvals_list[n], placement=BlockPlacement(n))
                for n in range(len(dvals_list))
            ]
        else:
            bp = BlockPlacement(slice(len(columns)))
            nb = new_block_2d(values, placement=bp)
            block_values = [nb]
    else:
        bp = BlockPlacement(slice(len(columns)))
        nb = new_block_2d(values, placement=bp)
        block_values = [nb]

    if len(columns) == 0:
        block_values = []

    return create_block_manager_from_blocks(block_values, [columns, index],
                                            verify_integrity=False)
Exemplo n.º 17
0
def _view_if_needed(values):
    if is_datetime_or_timedelta_dtype(values):
        return values.view(np.int64)
    return values
Exemplo n.º 18
0
def to_numeric(arg, errors='raise', downcast=None):
    """
    Convert argument to a numeric type.

    The default return dtype is `float64` or `int64`
    depending on the data supplied. Use the `downcast` parameter
    to obtain other dtypes.

    Parameters
    ----------
    arg : list, tuple, 1-d array, or Series
    errors : {'ignore', 'raise', 'coerce'}, default 'raise'
        - If 'raise', then invalid parsing will raise an exception
        - If 'coerce', then invalid parsing will be set as NaN
        - If 'ignore', then invalid parsing will return the input
    downcast : {'integer', 'signed', 'unsigned', 'float'} , default None
        If not None, and if the data has been successfully cast to a
        numerical dtype (or if the data was numeric to begin with),
        downcast that resulting data to the smallest numerical dtype
        possible according to the following rules:

        - 'integer' or 'signed': smallest signed int dtype (min.: np.int8)
        - 'unsigned': smallest unsigned int dtype (min.: np.uint8)
        - 'float': smallest float dtype (min.: np.float32)

        As this behaviour is separate from the core conversion to
        numeric values, any errors raised during the downcasting
        will be surfaced regardless of the value of the 'errors' input.

        In addition, downcasting will only occur if the size
        of the resulting data's dtype is strictly larger than
        the dtype it is to be cast to, so if none of the dtypes
        checked satisfy that specification, no downcasting will be
        performed on the data.

        .. versionadded:: 0.19.0

    Returns
    -------
    ret : numeric if parsing succeeded.
        Return type depends on input.  Series if Series, otherwise ndarray

    Examples
    --------
    Take separate series and convert to numeric, coercing when told to

    >>> s = pd.Series(['1.0', '2', -3])
    >>> pd.to_numeric(s)
    0    1.0
    1    2.0
    2   -3.0
    dtype: float64
    >>> pd.to_numeric(s, downcast='float')
    0    1.0
    1    2.0
    2   -3.0
    dtype: float32
    >>> pd.to_numeric(s, downcast='signed')
    0    1
    1    2
    2   -3
    dtype: int8
    >>> s = pd.Series(['apple', '1.0', '2', -3])
    >>> pd.to_numeric(s, errors='ignore')
    0    apple
    1      1.0
    2        2
    3       -3
    dtype: object
    >>> pd.to_numeric(s, errors='coerce')
    0    NaN
    1    1.0
    2    2.0
    3   -3.0
    dtype: float64

    See Also
    --------
    pandas.DataFrame.astype : Cast argument to a specified dtype.
    pandas.to_datetime : Convert argument to datetime.
    pandas.to_timedelta : Convert argument to timedelta.
    numpy.ndarray.astype : Cast a numpy array to a specified type.
    """
    if downcast not in (None, 'integer', 'signed', 'unsigned', 'float'):
        raise ValueError('invalid downcasting method provided')

    is_series = False
    is_index = False
    is_scalars = False

    if isinstance(arg, ABCSeries):
        is_series = True
        values = arg.values
    elif isinstance(arg, ABCIndexClass):
        is_index = True
        values = arg.asi8
        if values is None:
            values = arg.values
    elif isinstance(arg, (list, tuple)):
        values = np.array(arg, dtype='O')
    elif is_scalar(arg):
        if is_decimal(arg):
            return float(arg)
        if is_number(arg):
            return arg
        is_scalars = True
        values = np.array([arg], dtype='O')
    elif getattr(arg, 'ndim', 1) > 1:
        raise TypeError('arg must be a list, tuple, 1-d array, or Series')
    else:
        values = arg

    try:
        if is_numeric_dtype(values):
            pass
        elif is_datetime_or_timedelta_dtype(values):
            values = values.astype(np.int64)
        else:
            values = ensure_object(values)
            coerce_numeric = False if errors in ('ignore', 'raise') else True
            values = lib.maybe_convert_numeric(values,
                                               set(),
                                               coerce_numeric=coerce_numeric)

    except Exception:
        if errors == 'raise':
            raise

    # attempt downcast only if the data has been successfully converted
    # to a numerical dtype and if a downcast method has been specified
    if downcast is not None and is_numeric_dtype(values):
        typecodes = None

        if downcast in ('integer', 'signed'):
            typecodes = np.typecodes['Integer']
        elif downcast == 'unsigned' and np.min(values) >= 0:
            typecodes = np.typecodes['UnsignedInteger']
        elif downcast == 'float':
            typecodes = np.typecodes['Float']

            # pandas support goes only to np.float32,
            # as float dtypes smaller than that are
            # extremely rare and not well supported
            float_32_char = np.dtype(np.float32).char
            float_32_ind = typecodes.index(float_32_char)
            typecodes = typecodes[float_32_ind:]

        if typecodes is not None:
            # from smallest to largest
            for dtype in typecodes:
                if np.dtype(dtype).itemsize <= values.dtype.itemsize:
                    values = maybe_downcast_to_dtype(values, dtype)

                    # successful conversion
                    if values.dtype == dtype:
                        break

    if is_series:
        return pd.Series(values, index=arg.index, name=arg.name)
    elif is_index:
        # because we want to coerce to numeric if possible,
        # do not use _shallow_copy_with_infer
        return pd.Index(values, name=arg.name)
    elif is_scalars:
        return values[0]
    else:
        return values
Exemplo n.º 19
0
def maybe_promote(dtype, fill_value=np.nan):
    """
    Find the minimal dtype that can hold both the given dtype and fill_value.

    Parameters
    ----------
    dtype : np.dtype or ExtensionDtype
    fill_value : scalar, default np.nan

    Returns
    -------
    dtype
        Upcasted from dtype argument if necessary.
    fill_value
        Upcasted from fill_value argument if necessary.
    """
    if not is_scalar(fill_value) and not is_object_dtype(dtype):
        # with object dtype there is nothing to promote, and the user can
        #  pass pretty much any weird fill_value they like
        raise ValueError("fill_value must be a scalar")

    # if we passed an array here, determine the fill value by dtype
    if isinstance(fill_value, np.ndarray):
        if issubclass(fill_value.dtype.type, (np.datetime64, np.timedelta64)):
            fill_value = fill_value.dtype.type("NaT", "ns")
        else:

            # we need to change to object type as our
            # fill_value is of object type
            if fill_value.dtype == np.object_:
                dtype = np.dtype(np.object_)
            fill_value = np.nan

        if dtype == np.object_ or dtype.kind in ["U", "S"]:
            # We treat string-like dtypes as object, and _always_ fill
            #  with np.nan
            fill_value = np.nan
            dtype = np.dtype(np.object_)

    # returns tuple of (dtype, fill_value)
    if issubclass(dtype.type, np.datetime64):
        if isinstance(fill_value, datetime) and fill_value.tzinfo is not None:
            # Trying to insert tzaware into tznaive, have to cast to object
            dtype = np.dtype(np.object_)
        elif is_integer(fill_value) or (is_float(fill_value)
                                        and not isna(fill_value)):
            dtype = np.dtype(np.object_)
        else:
            try:
                fill_value = tslibs.Timestamp(fill_value).to_datetime64()
            except (TypeError, ValueError):
                dtype = np.dtype(np.object_)
    elif issubclass(dtype.type, np.timedelta64):
        if (is_integer(fill_value)
                or (is_float(fill_value) and not np.isnan(fill_value))
                or isinstance(fill_value, str)):
            # TODO: What about str that can be a timedelta?
            dtype = np.dtype(np.object_)
        else:
            try:
                fv = tslibs.Timedelta(fill_value)
            except ValueError:
                dtype = np.dtype(np.object_)
            else:
                if fv is NaT:
                    # NaT has no `to_timedelta64` method
                    fill_value = np.timedelta64("NaT", "ns")
                else:
                    fill_value = fv.to_timedelta64()
    elif is_datetime64tz_dtype(dtype):
        if isna(fill_value):
            fill_value = NaT
        elif not isinstance(fill_value, datetime):
            dtype = np.dtype(np.object_)
        elif fill_value.tzinfo is None:
            dtype = np.dtype(np.object_)
        elif not tz_compare(fill_value.tzinfo, dtype.tz):
            # TODO: sure we want to cast here?
            dtype = np.dtype(np.object_)

    elif is_extension_array_dtype(dtype) and isna(fill_value):
        fill_value = dtype.na_value

    elif is_float(fill_value):
        if issubclass(dtype.type, np.bool_):
            dtype = np.dtype(np.object_)

        elif issubclass(dtype.type, np.integer):
            dtype = np.dtype(np.float64)

        elif dtype.kind == "f":
            mst = np.min_scalar_type(fill_value)
            if mst > dtype:
                # e.g. mst is np.float64 and dtype is np.float32
                dtype = mst

        elif dtype.kind == "c":
            mst = np.min_scalar_type(fill_value)
            dtype = np.promote_types(dtype, mst)

    elif is_bool(fill_value):
        if not issubclass(dtype.type, np.bool_):
            dtype = np.dtype(np.object_)

    elif is_integer(fill_value):
        if issubclass(dtype.type, np.bool_):
            dtype = np.dtype(np.object_)

        elif issubclass(dtype.type, np.integer):
            if not np.can_cast(fill_value, dtype):
                # upcast to prevent overflow
                mst = np.min_scalar_type(fill_value)
                dtype = np.promote_types(dtype, mst)
                if dtype.kind == "f":
                    # Case where we disagree with numpy
                    dtype = np.dtype(np.object_)

    elif is_complex(fill_value):
        if issubclass(dtype.type, np.bool_):
            dtype = np.dtype(np.object_)

        elif issubclass(dtype.type, (np.integer, np.floating)):
            mst = np.min_scalar_type(fill_value)
            dtype = np.promote_types(dtype, mst)

        elif dtype.kind == "c":
            mst = np.min_scalar_type(fill_value)
            if mst > dtype:
                # e.g. mst is np.complex128 and dtype is np.complex64
                dtype = mst

    elif fill_value is None:
        if is_float_dtype(dtype) or is_complex_dtype(dtype):
            fill_value = np.nan
        elif is_integer_dtype(dtype):
            dtype = np.float64
            fill_value = np.nan
        elif is_datetime_or_timedelta_dtype(dtype):
            fill_value = dtype.type("NaT", "ns")
        else:
            dtype = np.dtype(np.object_)
            fill_value = np.nan
    else:
        dtype = np.dtype(np.object_)

    # in case we have a string that looked like a number
    if is_extension_array_dtype(dtype):
        pass
    elif issubclass(np.dtype(dtype).type, (bytes, str)):
        dtype = np.dtype(np.object_)

    fill_value = _ensure_dtype_type(fill_value, dtype)
    return dtype, fill_value
Exemplo n.º 20
0
def to_numeric(arg, errors='raise', downcast=None):
    """
    Convert argument to a numeric type.

    The default return dtype is `float64` or `int64`
    depending on the data supplied. Use the `downcast` parameter
    to obtain other dtypes.

    Parameters
    ----------
    arg : list, tuple, 1-d array, or Series
    errors : {'ignore', 'raise', 'coerce'}, default 'raise'
        - If 'raise', then invalid parsing will raise an exception
        - If 'coerce', then invalid parsing will be set as NaN
        - If 'ignore', then invalid parsing will return the input
    downcast : {'integer', 'signed', 'unsigned', 'float'} , default None
        If not None, and if the data has been successfully cast to a
        numerical dtype (or if the data was numeric to begin with),
        downcast that resulting data to the smallest numerical dtype
        possible according to the following rules:

        - 'integer' or 'signed': smallest signed int dtype (min.: np.int8)
        - 'unsigned': smallest unsigned int dtype (min.: np.uint8)
        - 'float': smallest float dtype (min.: np.float32)

        As this behaviour is separate from the core conversion to
        numeric values, any errors raised during the downcasting
        will be surfaced regardless of the value of the 'errors' input.

        In addition, downcasting will only occur if the size
        of the resulting data's dtype is strictly larger than
        the dtype it is to be cast to, so if none of the dtypes
        checked satisfy that specification, no downcasting will be
        performed on the data.

        .. versionadded:: 0.19.0

    Returns
    -------
    ret : numeric if parsing succeeded.
        Return type depends on input.  Series if Series, otherwise ndarray

    Examples
    --------
    Take separate series and convert to numeric, coercing when told to

    >>> s = pd.Series(['1.0', '2', -3])
    >>> pd.to_numeric(s)
    0    1.0
    1    2.0
    2   -3.0
    dtype: float64
    >>> pd.to_numeric(s, downcast='float')
    0    1.0
    1    2.0
    2   -3.0
    dtype: float32
    >>> pd.to_numeric(s, downcast='signed')
    0    1
    1    2
    2   -3
    dtype: int8
    >>> s = pd.Series(['apple', '1.0', '2', -3])
    >>> pd.to_numeric(s, errors='ignore')
    0    apple
    1      1.0
    2        2
    3       -3
    dtype: object
    >>> pd.to_numeric(s, errors='coerce')
    0    NaN
    1    1.0
    2    2.0
    3   -3.0
    dtype: float64

    See Also
    --------
    pandas.DataFrame.astype : Cast argument to a specified dtype.
    pandas.to_datetime : Convert argument to datetime.
    pandas.to_timedelta : Convert argument to timedelta.
    numpy.ndarray.astype : Cast a numpy array to a specified type.
    """
    if downcast not in (None, 'integer', 'signed', 'unsigned', 'float'):
        raise ValueError('invalid downcasting method provided')

    is_series = False
    is_index = False
    is_scalars = False

    if isinstance(arg, ABCSeries):
        is_series = True
        values = arg.values
    elif isinstance(arg, ABCIndexClass):
        is_index = True
        values = arg.asi8
        if values is None:
            values = arg.values
    elif isinstance(arg, (list, tuple)):
        values = np.array(arg, dtype='O')
    elif is_scalar(arg):
        if is_decimal(arg):
            return float(arg)
        if is_number(arg):
            return arg
        is_scalars = True
        values = np.array([arg], dtype='O')
    elif getattr(arg, 'ndim', 1) > 1:
        raise TypeError('arg must be a list, tuple, 1-d array, or Series')
    else:
        values = arg

    try:
        if is_numeric_dtype(values):
            pass
        elif is_datetime_or_timedelta_dtype(values):
            values = values.astype(np.int64)
        else:
            values = ensure_object(values)
            coerce_numeric = False if errors in ('ignore', 'raise') else True
            values = lib.maybe_convert_numeric(values, set(),
                                               coerce_numeric=coerce_numeric)

    except Exception:
        if errors == 'raise':
            raise

    # attempt downcast only if the data has been successfully converted
    # to a numerical dtype and if a downcast method has been specified
    if downcast is not None and is_numeric_dtype(values):
        typecodes = None

        if downcast in ('integer', 'signed'):
            typecodes = np.typecodes['Integer']
        elif downcast == 'unsigned' and np.min(values) >= 0:
            typecodes = np.typecodes['UnsignedInteger']
        elif downcast == 'float':
            typecodes = np.typecodes['Float']

            # pandas support goes only to np.float32,
            # as float dtypes smaller than that are
            # extremely rare and not well supported
            float_32_char = np.dtype(np.float32).char
            float_32_ind = typecodes.index(float_32_char)
            typecodes = typecodes[float_32_ind:]

        if typecodes is not None:
            # from smallest to largest
            for dtype in typecodes:
                if np.dtype(dtype).itemsize <= values.dtype.itemsize:
                    values = maybe_downcast_to_dtype(values, dtype)

                    # successful conversion
                    if values.dtype == dtype:
                        break

    if is_series:
        return pd.Series(values, index=arg.index, name=arg.name)
    elif is_index:
        # because we want to coerce to numeric if possible,
        # do not use _shallow_copy_with_infer
        return pd.Index(values, name=arg.name)
    elif is_scalars:
        return values[0]
    else:
        return values
Exemplo n.º 21
0
def _get_values(
    values: np.ndarray,
    skipna: bool,
    fill_value: Any = None,
    fill_value_typ: Optional[str] = None,
    mask: Optional[np.ndarray] = None,
) -> Tuple[np.ndarray, Optional[np.ndarray], np.dtype, np.dtype, Any]:
    """
    Utility to get the values view, mask, dtype, dtype_max, and fill_value.

    If both mask and fill_value/fill_value_typ are not None and skipna is True,
    the values array will be copied.

    For input arrays of boolean or integer dtypes, copies will only occur if a
    precomputed mask, a fill_value/fill_value_typ, and skipna=True are
    provided.

    Parameters
    ----------
    values : ndarray
        input array to potentially compute mask for
    skipna : bool
        boolean for whether NaNs should be skipped
    fill_value : Any
        value to fill NaNs with
    fill_value_typ : str
        Set to '+inf' or '-inf' to handle dtype-specific infinities
    mask : Optional[np.ndarray]
        nan-mask if known

    Returns
    -------
    values : ndarray
        Potential copy of input value array
    mask : Optional[ndarray[bool]]
        Mask for values, if deemed necessary to compute
    dtype : dtype
        dtype for values
    dtype_max : dtype
        platform independent dtype
    fill_value : Any
        fill value used
    """

    # In _get_values is only called from within nanops, and in all cases
    #  with scalar fill_value.  This guarantee is important for the
    #  maybe_upcast_putmask call below
    assert is_scalar(fill_value)

    mask = _maybe_get_mask(values, skipna, mask)

    if is_datetime64tz_dtype(values):
        # lib.values_from_object returns M8[ns] dtype instead of tz-aware,
        #  so this case must be handled separately from the rest
        dtype = values.dtype
        values = getattr(values, "_values", values)
    else:
        values = lib.values_from_object(values)
        dtype = values.dtype

    if is_datetime_or_timedelta_dtype(values) or is_datetime64tz_dtype(values):
        # changing timedelta64/datetime64 to int64 needs to happen after
        #  finding `mask` above
        values = getattr(values, "asi8", values)
        values = values.view(np.int64)

    dtype_ok = _na_ok_dtype(dtype)

    # get our fill value (in case we need to provide an alternative
    # dtype for it)
    fill_value = _get_fill_value(
        dtype, fill_value=fill_value, fill_value_typ=fill_value_typ
    )

    copy = (mask is not None) and (fill_value is not None)

    if skipna and copy:
        values = values.copy()
        if dtype_ok:
            np.putmask(values, mask, fill_value)

        # promote if needed
        else:
            values, _ = maybe_upcast_putmask(values, mask, fill_value)

    # return a platform independent precision dtype
    dtype_max = dtype
    if is_integer_dtype(dtype) or is_bool_dtype(dtype):
        dtype_max = np.int64
    elif is_float_dtype(dtype):
        dtype_max = np.float64

    return values, mask, dtype, dtype_max, fill_value
Exemplo n.º 22
0
def _get_values(values,
                skipna,
                fill_value=None,
                fill_value_typ=None,
                isfinite=False,
                copy=True,
                mask=None):
    """ utility to get the values view, mask, dtype
    if necessary copy and mask using the specified fill_value
    copy = True will force the copy
    """

    if is_datetime64tz_dtype(values):
        # com.values_from_object returns M8[ns] dtype instead of tz-aware,
        #  so this case must be handled separately from the rest
        dtype = values.dtype
        values = getattr(values, "_values", values)
    else:
        values = com.values_from_object(values)
        dtype = values.dtype

    if mask is None:
        if isfinite:
            mask = _isfinite(values)
        else:
            mask = isna(values)

    if is_datetime_or_timedelta_dtype(values) or is_datetime64tz_dtype(values):
        # changing timedelta64/datetime64 to int64 needs to happen after
        #  finding `mask` above
        values = getattr(values, "asi8", values)
        values = values.view(np.int64)

    dtype_ok = _na_ok_dtype(dtype)

    # get our fill value (in case we need to provide an alternative
    # dtype for it)
    fill_value = _get_fill_value(dtype,
                                 fill_value=fill_value,
                                 fill_value_typ=fill_value_typ)

    if skipna:
        if copy:
            values = values.copy()
        if dtype_ok:
            np.putmask(values, mask, fill_value)

        # promote if needed
        else:
            values, changed = maybe_upcast_putmask(values, mask, fill_value)

    elif copy:
        values = values.copy()

    # return a platform independent precision dtype
    dtype_max = dtype
    if is_integer_dtype(dtype) or is_bool_dtype(dtype):
        dtype_max = np.int64
    elif is_float_dtype(dtype):
        dtype_max = np.float64

    return values, mask, dtype, dtype_max, fill_value
Exemplo n.º 23
0
def to_numeric(arg, errors="raise", downcast=None):
    """
    Convert argument to a numeric type.

    The default return dtype is `float64` or `int64`
    depending on the data supplied. Use the `downcast` parameter
    to obtain other dtypes.

    Please note that precision loss may occur if really large numbers
    are passed in. Due to the internal limitations of `ndarray`, if
    numbers smaller than `-9223372036854775808` (np.iinfo(np.int64).min)
    or larger than `18446744073709551615` (np.iinfo(np.uint64).max) are
    passed in, it is very likely they will be converted to float so that
    they can stored in an `ndarray`. These warnings apply similarly to
    `Series` since it internally leverages `ndarray`.

    Parameters
    ----------
    arg : scalar, list, tuple, 1-d array, or Series
        Argument to be converted.
    errors : {'ignore', 'raise', 'coerce'}, default 'raise'
        - If 'raise', then invalid parsing will raise an exception.
        - If 'coerce', then invalid parsing will be set as NaN.
        - If 'ignore', then invalid parsing will return the input.
    downcast : {'integer', 'signed', 'unsigned', 'float'}, default None
        If not None, and if the data has been successfully cast to a
        numerical dtype (or if the data was numeric to begin with),
        downcast that resulting data to the smallest numerical dtype
        possible according to the following rules:

        - 'integer' or 'signed': smallest signed int dtype (min.: np.int8)
        - 'unsigned': smallest unsigned int dtype (min.: np.uint8)
        - 'float': smallest float dtype (min.: np.float32)

        As this behaviour is separate from the core conversion to
        numeric values, any errors raised during the downcasting
        will be surfaced regardless of the value of the 'errors' input.

        In addition, downcasting will only occur if the size
        of the resulting data's dtype is strictly larger than
        the dtype it is to be cast to, so if none of the dtypes
        checked satisfy that specification, no downcasting will be
        performed on the data.

    Returns
    -------
    ret
        Numeric if parsing succeeded.
        Return type depends on input.  Series if Series, otherwise ndarray.

    See Also
    --------
    DataFrame.astype : Cast argument to a specified dtype.
    to_datetime : Convert argument to datetime.
    to_timedelta : Convert argument to timedelta.
    numpy.ndarray.astype : Cast a numpy array to a specified type.
    DataFrame.convert_dtypes : Convert dtypes.

    Examples
    --------
    Take separate series and convert to numeric, coercing when told to

    >>> s = pd.Series(['1.0', '2', -3])
    >>> pd.to_numeric(s)
    0    1.0
    1    2.0
    2   -3.0
    dtype: float64
    >>> pd.to_numeric(s, downcast='float')
    0    1.0
    1    2.0
    2   -3.0
    dtype: float32
    >>> pd.to_numeric(s, downcast='signed')
    0    1
    1    2
    2   -3
    dtype: int8
    >>> s = pd.Series(['apple', '1.0', '2', -3])
    >>> pd.to_numeric(s, errors='ignore')
    0    apple
    1      1.0
    2        2
    3       -3
    dtype: object
    >>> pd.to_numeric(s, errors='coerce')
    0    NaN
    1    1.0
    2    2.0
    3   -3.0
    dtype: float64
    """
    if downcast not in (None, "integer", "signed", "unsigned", "float"):
        raise ValueError("invalid downcasting method provided")

    if errors not in ("ignore", "raise", "coerce"):
        raise ValueError("invalid error value specified")

    is_series = False
    is_index = False
    is_scalars = False

    if isinstance(arg, ABCSeries):
        is_series = True
        values = arg.values
    elif isinstance(arg, ABCIndexClass):
        is_index = True
        if needs_i8_conversion(arg.dtype):
            values = arg.asi8
        else:
            values = arg.values
    elif isinstance(arg, (list, tuple)):
        values = np.array(arg, dtype="O")
    elif is_scalar(arg):
        if is_decimal(arg):
            return float(arg)
        if is_number(arg):
            return arg
        is_scalars = True
        values = np.array([arg], dtype="O")
    elif getattr(arg, "ndim", 1) > 1:
        raise TypeError("arg must be a list, tuple, 1-d array, or Series")
    else:
        values = arg

    values_dtype = getattr(values, "dtype", None)
    if is_numeric_dtype(values_dtype):
        pass
    elif is_datetime_or_timedelta_dtype(values_dtype):
        values = values.astype(np.int64)
    else:
        values = ensure_object(values)
        coerce_numeric = errors not in ("ignore", "raise")
        try:
            values = lib.maybe_convert_numeric(
                values, set(), coerce_numeric=coerce_numeric
            )
        except (ValueError, TypeError):
            if errors == "raise":
                raise

    # attempt downcast only if the data has been successfully converted
    # to a numerical dtype and if a downcast method has been specified
    if downcast is not None and is_numeric_dtype(values.dtype):
        typecodes = None

        if downcast in ("integer", "signed"):
            typecodes = np.typecodes["Integer"]
        elif downcast == "unsigned" and (not len(values) or np.min(values) >= 0):
            typecodes = np.typecodes["UnsignedInteger"]
        elif downcast == "float":
            typecodes = np.typecodes["Float"]

            # pandas support goes only to np.float32,
            # as float dtypes smaller than that are
            # extremely rare and not well supported
            float_32_char = np.dtype(np.float32).char
            float_32_ind = typecodes.index(float_32_char)
            typecodes = typecodes[float_32_ind:]

        if typecodes is not None:
            # from smallest to largest
            for dtype in typecodes:
                dtype = np.dtype(dtype)
                if dtype.itemsize <= values.dtype.itemsize:
                    values = maybe_downcast_numeric(values, dtype)

                    # successful conversion
                    if values.dtype == dtype:
                        break

    if is_series:
        return arg._constructor(values, index=arg.index, name=arg.name)
    elif is_index:
        # because we want to coerce to numeric if possible,
        # do not use _shallow_copy
        return pd.Index(values, name=arg.name)
    elif is_scalars:
        return values[0]
    else:
        return values
Exemplo n.º 24
0
 def is_timestamp(self) -> bool:
     return is_datetime_or_timedelta_dtype(self.pd_dtype)
Exemplo n.º 25
0
def _resample_time(trj: TrajaDataFrame, step_time: Union[float, int]):
    if not is_datetime_or_timedelta_dtype(trj.index):
        raise Exception(f"{trj.index.dtype} is not datetime or timedelta.")
    return trj.resample(step_time).agg({"x": np.mean, "y": np.mean})
Exemplo n.º 26
0
def _view_if_needed(values):
    if is_datetime_or_timedelta_dtype(values):
        return values.view(np.int64)
    return values