def objects_to_td64ns(data, unit="ns", errors="raise"): """ Convert a object-dtyped or string-dtyped array into an timedelta64[ns]-dtyped array. Parameters ---------- data : ndarray or Index unit : str, default "ns" The timedelta unit to treat integers as multiples of. errors : {"raise", "coerce", "ignore"}, default "raise" How to handle elements that cannot be converted to timedelta64[ns]. See ``pandas.to_timedelta`` for details. Returns ------- numpy.ndarray : timedelta64[ns] array converted from data Raises ------ ValueError : Data cannot be converted to timedelta64[ns]. Notes ----- Unlike `pandas.to_timedelta`, if setting `errors=ignore` will not cause errors to be ignored; they are caught and subsequently ignored at a higher level. """ # coerce Index to np.ndarray, converting string-dtype if necessary values = np.array(data, dtype=np.object_, copy=False) result = array_to_timedelta64(values, unit=unit, errors=errors) return result.view('timedelta64[ns]')
def _convert_listlike(arg, unit='ns', box=True, errors='raise', name=None): """Convert a list of objects to a timedelta index object.""" if isinstance(arg, (list, tuple)) or not hasattr(arg, 'dtype'): arg = np.array(list(arg), dtype='O') # these are shortcut-able if is_timedelta64_dtype(arg): value = arg.astype('timedelta64[ns]') elif is_integer_dtype(arg): value = arg.astype('timedelta64[{unit}]'.format(unit=unit)).astype( 'timedelta64[ns]', copy=False) else: try: value = array_to_timedelta64(ensure_object(arg), unit=unit, errors=errors) value = value.astype('timedelta64[ns]', copy=False) except ValueError: if errors == 'ignore': return arg else: # This else-block accounts for the cases when errors='raise' # and errors='coerce'. If errors == 'raise', these errors # should be raised. If errors == 'coerce', we shouldn't # expect any errors to be raised, since all parsing errors # cause coercion to pd.NaT. However, if an error / bug is # introduced that causes an Exception to be raised, we would # like to surface it. raise if box: from pandas import TimedeltaIndex value = TimedeltaIndex(value, unit='ns', name=name) return value
def __new__(cls, data=None, unit=None, freq=None, start=None, end=None, periods=None, closed=None, dtype=None, copy=False, name=None, verify_integrity=True): if isinstance(data, TimedeltaIndex) and freq is None and name is None: if copy: return data.copy() else: return data._shallow_copy() freq, freq_infer = dtl.maybe_infer_freq(freq) if data is None: # TODO: Remove this block and associated kwargs; GH#20535 result = cls._generate_range(start, end, periods, freq, closed=closed) result.name = name return result if unit is not None: data = to_timedelta(data, unit=unit, box=False) if is_scalar(data): raise ValueError('TimedeltaIndex() must be called with a ' 'collection of some kind, {data} was passed' .format(data=repr(data))) # convert if not already if getattr(data, 'dtype', None) != _TD_DTYPE: data = to_timedelta(data, unit=unit, box=False) elif copy: data = np.array(data, copy=True) data = np.array(data, copy=False) if data.dtype == np.object_: data = array_to_timedelta64(data) if data.dtype != _TD_DTYPE: if is_timedelta64_dtype(data): # non-nano unit # TODO: watch out for overflows data = data.astype(_TD_DTYPE) else: data = ensure_int64(data).view(_TD_DTYPE) assert data.dtype == 'm8[ns]', data.dtype subarr = cls._simple_new(data, name=name, freq=freq) # check that we are matching freqs if verify_integrity and len(subarr) > 0: if freq is not None and not freq_infer: cls._validate_frequency(subarr, freq) if freq_infer: inferred = subarr.inferred_freq if inferred: subarr.freq = to_offset(inferred) return subarr
def _simple_new(cls, values, name=None, freq=None, **kwargs): values = np.array(values, copy=False) if values.dtype == np.object_: values = array_to_timedelta64(values) if values.dtype != _TD_DTYPE: values = _ensure_int64(values).view(_TD_DTYPE) result = object.__new__(cls) result._data = values result.name = name result.freq = freq result._reset_identity() return result
def __new__(cls, values, freq=None): freq, freq_infer = dtl.maybe_infer_freq(freq) values = np.array(values, copy=False) if values.dtype == np.object_: values = array_to_timedelta64(values) result = cls._simple_new(values, freq=freq) if freq_infer: result.freq = to_offset(result.inferred_freq) return result
def _simple_new(cls, values, freq=None, **kwargs): values = np.array(values, copy=False) if values.dtype == np.object_: values = array_to_timedelta64(values) if values.dtype != _TD_DTYPE: if is_timedelta64_dtype(values): # non-nano unit values = values.astype(_TD_DTYPE) else: values = _ensure_int64(values).view(_TD_DTYPE) result = object.__new__(cls) result._data = values result._freq = freq return result
def __new__(cls, values, freq=None): freq, freq_infer = dtl.maybe_infer_freq(freq) values = np.array(values, copy=False) if values.dtype == np.object_: values = array_to_timedelta64(values) result = cls._simple_new(values, freq=freq) if freq_infer: inferred = result.inferred_freq if inferred: result.freq = to_offset(inferred) return result
def _simple_new(cls, values, freq=None, **kwargs): values = np.array(values, copy=False) if values.dtype == np.object_: values = array_to_timedelta64(values) if values.dtype != _TD_DTYPE: if is_timedelta64_dtype(values): # non-nano unit values = values.astype(_TD_DTYPE) else: values = ensure_int64(values).view(_TD_DTYPE) result = object.__new__(cls) result._data = values result._freq = freq return result
def astype_nansafe( arr: np.ndarray, dtype: DtypeObj, copy: bool = True, skipna: bool = False ) -> ArrayLike: """ Cast the elements of an array to a given dtype a nan-safe manner. Parameters ---------- arr : ndarray dtype : np.dtype or ExtensionDtype copy : bool, default True If False, a view will be attempted but may fail, if e.g. the item sizes don't align. skipna: bool, default False Whether or not we should skip NaN when casting as a string-type. Raises ------ ValueError The dtype was a datetime64/timedelta64 dtype, but it had no unit. """ # We get here with 0-dim from sparse arr = np.atleast_1d(arr) # dispatch on extension dtype if needed if isinstance(dtype, ExtensionDtype): return dtype.construct_array_type()._from_sequence(arr, dtype=dtype, copy=copy) elif not isinstance(dtype, np.dtype): # pragma: no cover raise ValueError("dtype must be np.dtype or ExtensionDtype") if arr.dtype.kind in ["m", "M"] and ( issubclass(dtype.type, str) or dtype == _dtype_obj ): from pandas.core.construction import ensure_wrapped_if_datetimelike arr = ensure_wrapped_if_datetimelike(arr) return arr.astype(dtype, copy=copy) if issubclass(dtype.type, str): shape = arr.shape if arr.ndim > 1: arr = arr.ravel() return lib.ensure_string_array( arr, skipna=skipna, convert_na_value=False ).reshape(shape) elif is_datetime64_dtype(arr.dtype): if dtype == np.int64: if isna(arr).any(): raise ValueError("Cannot convert NaT values to integer") return arr.view(dtype) # allow frequency conversions if dtype.kind == "M": return arr.astype(dtype) raise TypeError(f"cannot astype a datetimelike from [{arr.dtype}] to [{dtype}]") elif is_timedelta64_dtype(arr.dtype): if dtype == np.int64: if isna(arr).any(): raise ValueError("Cannot convert NaT values to integer") return arr.view(dtype) elif dtype.kind == "m": return astype_td64_unit_conversion(arr, dtype, copy=copy) raise TypeError(f"cannot astype a timedelta from [{arr.dtype}] to [{dtype}]") elif np.issubdtype(arr.dtype, np.floating) and is_integer_dtype(dtype): return _astype_float_to_int_nansafe(arr, dtype, copy) elif is_object_dtype(arr.dtype): # if we have a datetime/timedelta array of objects # then coerce to a proper dtype and recall astype_nansafe if is_datetime64_dtype(dtype): from pandas import to_datetime return astype_nansafe( to_datetime(arr.ravel()).values.reshape(arr.shape), dtype, copy=copy, ) elif is_timedelta64_dtype(dtype): # bc we know arr.dtype == object, this is equivalent to # `np.asarray(to_timedelta(arr))`, but using a lower-level API that # does not require a circular import. return array_to_timedelta64(arr).view("m8[ns]").astype(dtype, copy=False) if dtype.name in ("datetime64", "timedelta64"): msg = ( f"The '{dtype.name}' dtype has no unit. Please pass in " f"'{dtype.name}[ns]' instead." ) raise ValueError(msg) if copy or is_object_dtype(arr.dtype) or is_object_dtype(dtype): # Explicit copy, or required since NumPy can't view from / to object. return arr.astype(dtype, copy=True) return arr.astype(dtype, copy=copy)
def test_array_to_timedelta64_string_with_unit_2d_raises(self): # check the 'unit is not None and errors != "coerce"' path # in array_to_timedelta64 raises correctly with 2D values values = np.array([["1", 2], [3, "4"]], dtype=object) with pytest.raises(ValueError, match="unit must not be specified"): array_to_timedelta64(values, unit="s")
def __new__(cls, data=None, unit=None, freq=None, start=None, end=None, periods=None, closed=None, dtype=None, copy=False, name=None, verify_integrity=True): if isinstance(data, TimedeltaIndex) and freq is None and name is None: if copy: return data.copy() else: return data._shallow_copy() freq, freq_infer = dtl.maybe_infer_freq(freq) if data is None: # TODO: Remove this block and associated kwargs; GH#20535 result = cls._generate_range(start, end, periods, freq, closed=closed) result.name = name return result if unit is not None: data = to_timedelta(data, unit=unit, box=False) if is_scalar(data): raise ValueError( 'TimedeltaIndex() must be called with a ' 'collection of some kind, {data} was passed'.format( data=repr(data))) # convert if not already if getattr(data, 'dtype', None) != _TD_DTYPE: data = to_timedelta(data, unit=unit, box=False) elif copy: data = np.array(data, copy=True) data = np.array(data, copy=False) if data.dtype == np.object_: data = array_to_timedelta64(data) if data.dtype != _TD_DTYPE: if is_timedelta64_dtype(data): # non-nano unit # TODO: watch out for overflows data = data.astype(_TD_DTYPE) else: data = ensure_int64(data).view(_TD_DTYPE) assert data.dtype == 'm8[ns]', data.dtype subarr = cls._simple_new(data, name=name, freq=freq) # check that we are matching freqs if verify_integrity and len(subarr) > 0: if freq is not None and not freq_infer: cls._validate_frequency(subarr, freq) if freq_infer: inferred = subarr.inferred_freq if inferred: subarr.freq = to_offset(inferred) return subarr