Пример #1
0
def test_is_datetime64_dtype():
    assert not com.is_datetime64_dtype(object)
    assert not com.is_datetime64_dtype([1, 2, 3])
    assert not com.is_datetime64_dtype(np.array([], dtype=int))

    assert com.is_datetime64_dtype(np.datetime64)
    assert com.is_datetime64_dtype(np.array([], dtype=np.datetime64))
Пример #2
0
    def test_is_datetime_dtypes(self):

        ts = pd.date_range('20130101', periods=3)
        tsa = pd.date_range('20130101', periods=3, tz='US/Eastern')

        assert is_datetime64_dtype('datetime64')
        assert is_datetime64_dtype('datetime64[ns]')
        assert is_datetime64_dtype(ts)
        assert not is_datetime64_dtype(tsa)

        assert not is_datetime64_ns_dtype('datetime64')
        assert is_datetime64_ns_dtype('datetime64[ns]')
        assert is_datetime64_ns_dtype(ts)
        assert is_datetime64_ns_dtype(tsa)

        assert is_datetime64_any_dtype('datetime64')
        assert is_datetime64_any_dtype('datetime64[ns]')
        assert is_datetime64_any_dtype(ts)
        assert is_datetime64_any_dtype(tsa)

        assert not is_datetime64tz_dtype('datetime64')
        assert not is_datetime64tz_dtype('datetime64[ns]')
        assert not is_datetime64tz_dtype(ts)
        assert is_datetime64tz_dtype(tsa)

        for tz in ['US/Eastern', 'UTC']:
            dtype = 'datetime64[ns, {}]'.format(tz)
            assert not is_datetime64_dtype(dtype)
            assert is_datetime64tz_dtype(dtype)
            assert is_datetime64_ns_dtype(dtype)
            assert is_datetime64_any_dtype(dtype)
Пример #3
0
 def test_compat(self):
     self.assertTrue(is_datetime64tz_dtype(self.dtype))
     self.assertTrue(is_datetime64tz_dtype('datetime64[ns, US/Eastern]'))
     self.assertTrue(is_datetime64_any_dtype(self.dtype))
     self.assertTrue(is_datetime64_any_dtype('datetime64[ns, US/Eastern]'))
     self.assertTrue(is_datetime64_ns_dtype(self.dtype))
     self.assertTrue(is_datetime64_ns_dtype('datetime64[ns, US/Eastern]'))
     self.assertFalse(is_datetime64_dtype(self.dtype))
     self.assertFalse(is_datetime64_dtype('datetime64[ns, US/Eastern]'))
Пример #4
0
 def test_compat(self):
     assert is_datetime64tz_dtype(self.dtype)
     assert is_datetime64tz_dtype('datetime64[ns, US/Eastern]')
     assert is_datetime64_any_dtype(self.dtype)
     assert is_datetime64_any_dtype('datetime64[ns, US/Eastern]')
     assert is_datetime64_ns_dtype(self.dtype)
     assert is_datetime64_ns_dtype('datetime64[ns, US/Eastern]')
     assert not is_datetime64_dtype(self.dtype)
     assert not is_datetime64_dtype('datetime64[ns, US/Eastern]')
Пример #5
0
def pad_1d(values, limit=None, mask=None, dtype=None):
    if dtype is None:
        dtype = values.dtype
    _method = None
    if is_float_dtype(values):
        name = 'pad_inplace_{name}'.format(name=dtype.name)
        _method = getattr(algos, name, None)
    elif is_datetime64_dtype(dtype) or is_datetime64tz_dtype(dtype):
        _method = _pad_1d_datetime
    elif is_integer_dtype(values):
        values = ensure_float64(values)
        _method = algos.pad_inplace_float64
    elif values.dtype == np.object_:
        _method = algos.pad_inplace_object
    elif is_timedelta64_dtype(values):
        # NaTs are treated identically to datetime64, so we can dispatch
        #  to that implementation
        _method = _pad_1d_datetime

    if _method is None:
        raise ValueError('Invalid dtype for pad_1d [{name}]'
                         .format(name=dtype.name))

    if mask is None:
        mask = isna(values)
    mask = mask.view(np.uint8)
    _method(values, mask, limit=limit)
    return values
Пример #6
0
def backfill_1d(values, limit=None, mask=None, dtype=None):
    if dtype is None:
        dtype = values.dtype
    _method = None
    if is_float_dtype(values):
        name = 'backfill_inplace_{name}'.format(name=dtype.name)
        _method = getattr(algos, name, None)
    elif is_datetime64_dtype(dtype) or is_datetime64tz_dtype(dtype):
        _method = _backfill_1d_datetime
    elif is_integer_dtype(values):
        values = ensure_float64(values)
        _method = algos.backfill_inplace_float64
    elif values.dtype == np.object_:
        _method = algos.backfill_inplace_object

    if _method is None:
        raise ValueError('Invalid dtype for backfill_1d [{name}]'
                         .format(name=dtype.name))

    if mask is None:
        mask = isna(values)
    mask = mask.view(np.uint8)

    _method(values, mask, limit=limit)
    return values
Пример #7
0
    def __new__(cls, data):
        # CombinedDatetimelikeProperties isn't really instantiated. Instead
        # we need to choose which parent (datetime or timedelta) is
        # appropriate. Since we're checking the dtypes anyway, we'll just
        # do all the validation here.
        from pandas import Series

        if not isinstance(data, Series):
            raise TypeError("cannot convert an object of type {0} to a "
                            "datetimelike index".format(type(data)))

        orig = data if is_categorical_dtype(data) else None
        if orig is not None:
            data = Series(orig.values.categories,
                          name=orig.name,
                          copy=False)

        try:
            if is_datetime64_dtype(data.dtype):
                return DatetimeProperties(data, orig)
            elif is_datetime64tz_dtype(data.dtype):
                return DatetimeProperties(data, orig)
            elif is_timedelta64_dtype(data.dtype):
                return TimedeltaProperties(data, orig)
            else:
                if is_period_arraylike(data):
                    return PeriodProperties(data, orig)
                if is_datetime_arraylike(data):
                    return DatetimeProperties(data, orig)
        except Exception:
            pass  # we raise an attribute error anyway

        raise AttributeError("Can only use .dt accessor with datetimelike "
                             "values")
Пример #8
0
def pad_2d(values, limit=None, mask=None, dtype=None):
    if dtype is None:
        dtype = values.dtype
    _method = None
    if is_float_dtype(values):
        name = 'pad_2d_inplace_{name}'.format(name=dtype.name)
        _method = getattr(algos, name, None)
    elif is_datetime64_dtype(dtype) or is_datetime64tz_dtype(dtype):
        _method = _pad_2d_datetime
    elif is_integer_dtype(values):
        values = ensure_float64(values)
        _method = algos.pad_2d_inplace_float64
    elif values.dtype == np.object_:
        _method = algos.pad_2d_inplace_object

    if _method is None:
        raise ValueError('Invalid dtype for pad_2d [{name}]'
                         .format(name=dtype.name))

    if mask is None:
        mask = isna(values)
    mask = mask.view(np.uint8)

    if np.all(values.shape):
        _method(values, mask, limit=limit)
    else:
        # for test coverage
        pass
    return values
Пример #9
0
    def wrapper(left, right, name=name, na_op=na_op):

        if isinstance(right, ABCDataFrame):
            return NotImplemented

        left, right = _align_method_SERIES(left, right)
        res_name = _get_series_op_result_name(left, right)

        if is_datetime64_dtype(left) or is_datetime64tz_dtype(left):
            result = dispatch_to_index_op(op, left, right, pd.DatetimeIndex)
            return construct_result(left, result,
                                    index=left.index, name=res_name,
                                    dtype=result.dtype)

        elif is_timedelta64_dtype(left):
            result = dispatch_to_index_op(op, left, right, pd.TimedeltaIndex)
            return construct_result(left, result,
                                    index=left.index, name=res_name,
                                    dtype=result.dtype)

        lvalues = left.values
        rvalues = right
        if isinstance(rvalues, ABCSeries):
            rvalues = getattr(rvalues, 'values', rvalues)

        result = safe_na_op(lvalues, rvalues)
        return construct_result(left, result,
                                index=left.index, name=res_name, dtype=None)
Пример #10
0
    def wrapper(left, right, name=name, na_op=na_op):

        if isinstance(right, ABCDataFrame):
            return NotImplemented

        left, right = _align_method_SERIES(left, right)
        res_name = _get_series_op_result_name(left, right)

        if is_datetime64_dtype(left) or is_datetime64tz_dtype(left):
            result = dispatch_to_index_op(op, left, right, pd.DatetimeIndex)
            return construct_result(left, result,
                                    index=left.index, name=res_name,
                                    dtype=result.dtype)

        elif is_timedelta64_dtype(left):
            result = dispatch_to_index_op(op, left, right, pd.TimedeltaIndex)
            return construct_result(left, result,
                                    index=left.index, name=res_name,
                                    dtype=result.dtype)

        elif is_categorical_dtype(left):
            raise TypeError("{typ} cannot perform the operation "
                            "{op}".format(typ=type(left).__name__, op=str_rep))

        lvalues = left.values
        rvalues = right
        if isinstance(rvalues, ABCSeries):
            rvalues = rvalues.values

        result = safe_na_op(lvalues, rvalues)
        return construct_result(left, result,
                                index=left.index, name=res_name, dtype=None)
Пример #11
0
    def __sub__(self, other):
        other = lib.item_from_zerodim(other)
        if isinstance(other, (ABCSeries, ABCDataFrame)):
            return NotImplemented

        # scalar others
        elif other is NaT:
            result = self._sub_nat()
        elif isinstance(other, (Tick, timedelta, np.timedelta64)):
            result = self._add_delta(-other)
        elif isinstance(other, DateOffset):
            # specifically _not_ a Tick
            result = self._add_offset(-other)
        elif isinstance(other, (datetime, np.datetime64)):
            result = self._sub_datetimelike_scalar(other)
        elif lib.is_integer(other):
            # This check must come after the check for np.timedelta64
            # as is_integer returns True for these
            maybe_integer_op_deprecated(self)
            result = self._time_shift(-other)

        elif isinstance(other, Period):
            result = self._sub_period(other)

        # array-like others
        elif is_timedelta64_dtype(other):
            # TimedeltaIndex, ndarray[timedelta64]
            result = self._add_delta(-other)
        elif is_offsetlike(other):
            # Array/Index of DateOffset objects
            result = self._addsub_offset_array(other, operator.sub)
        elif is_datetime64_dtype(other) or is_datetime64tz_dtype(other):
            # DatetimeIndex, ndarray[datetime64]
            result = self._sub_datetime_arraylike(other)
        elif is_period_dtype(other):
            # PeriodIndex
            result = self._sub_period_array(other)
        elif is_integer_dtype(other):
            maybe_integer_op_deprecated(self)
            result = self._addsub_int_array(other, operator.sub)
        elif isinstance(other, ABCIndexClass):
            raise TypeError("cannot subtract {cls} and {typ}"
                            .format(cls=type(self).__name__,
                                    typ=type(other).__name__))
        elif is_float_dtype(other):
            # Explicitly catch invalid dtypes
            raise TypeError("cannot subtract {dtype}-dtype from {cls}"
                            .format(dtype=other.dtype,
                                    cls=type(self).__name__))
        elif is_extension_array_dtype(other):
            # Categorical op will raise; defer explicitly
            return NotImplemented
        else:  # pragma: no cover
            return NotImplemented

        if is_timedelta64_dtype(result) and isinstance(result, np.ndarray):
            from pandas.core.arrays import TimedeltaArrayMixin
            # TODO: infer freq?
            return TimedeltaArrayMixin(result)
        return result
Пример #12
0
 def __rsub__(self, other):
     if is_datetime64_dtype(other) and is_timedelta64_dtype(self):
         # ndarray[datetime64] cannot be subtracted from self, so
         # we need to wrap in DatetimeIndex and flip the operation
         from pandas import DatetimeIndex
         return DatetimeIndex(other) - self
     return -(self - other)
Пример #13
0
def _convert_bin_to_numeric_type(bins, dtype):
    """
    if the passed bin is of datetime/timedelta type,
    this method converts it to integer

    Parameters
    ----------
    bins : list-liek of bins
    dtype : dtype of data

    Raises
    ------
    ValueError if bins are not of a compat dtype to dtype
    """
    bins_dtype = infer_dtype(bins)
    if is_timedelta64_dtype(dtype):
        if bins_dtype in ['timedelta', 'timedelta64']:
            bins = to_timedelta(bins).view(np.int64)
        else:
            raise ValueError("bins must be of timedelta64 dtype")
    elif is_datetime64_dtype(dtype):
        if bins_dtype in ['datetime', 'datetime64']:
            bins = to_datetime(bins).view(np.int64)
        else:
            raise ValueError("bins must be of datetime64 dtype")

    return bins
Пример #14
0
def _format_labels(bins, precision, right=True,
                   include_lowest=False, dtype=None):
    """ based on the dtype, return our labels """

    closed = 'right' if right else 'left'

    if is_datetime64_dtype(dtype):
        formatter = Timestamp
        adjust = lambda x: x - Timedelta('1ns')
    elif is_timedelta64_dtype(dtype):
        formatter = Timedelta
        adjust = lambda x: x - Timedelta('1ns')
    else:
        precision = _infer_precision(precision, bins)
        formatter = lambda x: _round_frac(x, precision)
        adjust = lambda x: x - 10 ** (-precision)

    breaks = [formatter(b) for b in bins]
    labels = IntervalIndex.from_breaks(breaks, closed=closed)

    if right and include_lowest:
        # we will adjust the left hand side by precision to
        # account that we are all right closed
        v = adjust(labels[0].left)

        i = IntervalIndex.from_intervals(
            [Interval(v, labels[0].right, closed='right')])
        labels = i.append(labels[1:])

    return labels
Пример #15
0
def backfill_2d(values, limit=None, mask=None, dtype=None):
    if dtype is None:
        dtype = values.dtype
    _method = None
    if is_float_dtype(values):
        _method = getattr(algos, 'backfill_2d_inplace_%s' % dtype.name, None)
    elif is_datetime64_dtype(dtype) or is_datetime64tz_dtype(dtype):
        _method = _backfill_2d_datetime
    elif is_integer_dtype(values):
        values = _ensure_float64(values)
        _method = algos.backfill_2d_inplace_float64
    elif values.dtype == np.object_:
        _method = algos.backfill_2d_inplace_object

    if _method is None:
        raise ValueError('Invalid dtype for backfill_2d [%s]' % dtype.name)

    if mask is None:
        mask = isnull(values)
    mask = mask.view(np.uint8)

    if np.all(values.shape):
        _method(values, mask, limit=limit)
    else:
        # for test coverage
        pass
    return values
Пример #16
0
def _wrap_results(result, dtype, fill_value=None):
    """ wrap our results if needed """

    if is_datetime64_dtype(dtype) or is_datetime64tz_dtype(dtype):
        if fill_value is None:
            # GH#24293
            fill_value = iNaT
        if not isinstance(result, np.ndarray):
            tz = getattr(dtype, 'tz', None)
            assert not isna(fill_value), "Expected non-null fill_value"
            if result == fill_value:
                result = np.nan
            result = tslibs.Timestamp(result, tz=tz)
        else:
            result = result.view(dtype)
    elif is_timedelta64_dtype(dtype):
        if not isinstance(result, np.ndarray):
            if result == fill_value:
                result = np.nan

            # raise if we have a timedelta64[ns] which is too large
            if np.fabs(result) > _int64_max:
                raise ValueError("overflow in timedelta operation")

            result = tslibs.Timedelta(result, unit='ns')
        else:
            result = result.astype('i8').view(dtype)

    return result
Пример #17
0
        def __sub__(self, other):
            from pandas import Index

            other = lib.item_from_zerodim(other)
            if isinstance(other, (ABCSeries, ABCDataFrame)):
                return NotImplemented

            # scalar others
            elif other is NaT:
                result = self._sub_nat()
            elif isinstance(other, (Tick, timedelta, np.timedelta64)):
                result = self._add_delta(-other)
            elif isinstance(other, DateOffset):
                # specifically _not_ a Tick
                result = self._add_offset(-other)
            elif isinstance(other, (datetime, np.datetime64)):
                result = self._sub_datelike(other)
            elif is_integer(other):
                # This check must come after the check for np.timedelta64
                # as is_integer returns True for these
                result = self.shift(-other)
            elif isinstance(other, Period):
                result = self._sub_period(other)

            # array-like others
            elif is_timedelta64_dtype(other):
                # TimedeltaIndex, ndarray[timedelta64]
                result = self._add_delta(-other)
            elif is_offsetlike(other):
                # Array/Index of DateOffset objects
                result = self._addsub_offset_array(other, operator.sub)
            elif is_datetime64_dtype(other) or is_datetime64tz_dtype(other):
                # DatetimeIndex, ndarray[datetime64]
                result = self._sub_datelike(other)
            elif isinstance(other, Index):
                raise TypeError("cannot subtract {cls} and {typ}"
                                .format(cls=type(self).__name__,
                                        typ=type(other).__name__))
            elif is_integer_dtype(other) and self.freq is None:
                # GH#19123
                raise NullFrequencyError("Cannot shift with no freq")

            elif is_float_dtype(other):
                # Explicitly catch invalid dtypes
                raise TypeError("cannot subtract {dtype}-dtype from {cls}"
                                .format(dtype=other.dtype,
                                        cls=type(self).__name__))
            else:  # pragma: no cover
                return NotImplemented

            if result is NotImplemented:
                return NotImplemented
            elif not isinstance(result, Index):
                # Index.__new__ will choose appropriate subclass for dtype
                result = Index(result)
            res_name = ops.get_op_result_name(self, other)
            result.name = res_name
            return result
Пример #18
0
def test_numpy_array_all_dtypes(any_numpy_dtype):
    ser = pd.Series(dtype=any_numpy_dtype)
    result = ser.array
    if is_datetime64_dtype(any_numpy_dtype):
        assert isinstance(result, DatetimeArray)
    elif is_timedelta64_dtype(any_numpy_dtype):
        assert isinstance(result, TimedeltaArray)
    else:
        assert isinstance(result, PandasArray)
Пример #19
0
    def __add__(self, other):
        other = lib.item_from_zerodim(other)
        if isinstance(other, (ABCSeries, ABCDataFrame)):
            return NotImplemented

        # scalar others
        elif other is NaT:
            result = self._add_nat()
        elif isinstance(other, (Tick, timedelta, np.timedelta64)):
            result = self._add_delta(other)
        elif isinstance(other, DateOffset):
            # specifically _not_ a Tick
            result = self._add_offset(other)
        elif isinstance(other, (datetime, np.datetime64)):
            result = self._add_datetimelike_scalar(other)
        elif lib.is_integer(other):
            # This check must come after the check for np.timedelta64
            # as is_integer returns True for these
            maybe_integer_op_deprecated(self)
            result = self._time_shift(other)

        # array-like others
        elif is_timedelta64_dtype(other):
            # TimedeltaIndex, ndarray[timedelta64]
            result = self._add_delta(other)
        elif is_offsetlike(other):
            # Array/Index of DateOffset objects
            result = self._addsub_offset_array(other, operator.add)
        elif is_datetime64_dtype(other) or is_datetime64tz_dtype(other):
            # DatetimeIndex, ndarray[datetime64]
            return self._add_datetime_arraylike(other)
        elif is_integer_dtype(other):
            maybe_integer_op_deprecated(self)
            result = self._addsub_int_array(other, operator.add)
        elif is_float_dtype(other):
            # Explicitly catch invalid dtypes
            raise TypeError("cannot add {dtype}-dtype to {cls}"
                            .format(dtype=other.dtype,
                                    cls=type(self).__name__))
        elif is_period_dtype(other):
            # if self is a TimedeltaArray and other is a PeriodArray with
            #  a timedelta-like (i.e. Tick) freq, this operation is valid.
            #  Defer to the PeriodArray implementation.
            # In remaining cases, this will end up raising TypeError.
            return NotImplemented
        elif is_extension_array_dtype(other):
            # Categorical op will raise; defer explicitly
            return NotImplemented
        else:  # pragma: no cover
            return NotImplemented

        if is_timedelta64_dtype(result) and isinstance(result, np.ndarray):
            from pandas.core.arrays import TimedeltaArrayMixin
            # TODO: infer freq?
            return TimedeltaArrayMixin(result)
        return result
Пример #20
0
def _unpickle_array(bytes):
    arr = read_array(BytesIO(bytes))

    # All datetimes should be stored as M8[ns].  When unpickling with
    # numpy1.6, it will read these as M8[us].  So this ensures all
    # datetime64 types are read as MS[ns]
    if is_datetime64_dtype(arr):
        arr = arr.view(_NS_DTYPE)

    return arr
Пример #21
0
    def __new__(cls, values, freq=None, **kwargs):
        if is_period_dtype(values):
            # PeriodArray, PeriodIndex
            if freq is not None and values.freq != freq:
                raise IncompatibleFrequency(freq, values.freq)
            freq = values.freq
            values = values.asi8

        elif is_datetime64_dtype(values):
            # TODO: what if it has tz?
            values = dt64arr_to_periodarr(values, freq)

        return cls._simple_new(values, freq=freq, **kwargs)
Пример #22
0
def maybe_to_datetimelike(data, copy=False):
    """
    return a DelegatedClass of a Series that is datetimelike
      (e.g. datetime64[ns],timedelta64[ns] dtype or a Series of Periods)
    raise TypeError if this is not possible.

    Parameters
    ----------
    data : Series
    copy : boolean, default False
           copy the input data

    Returns
    -------
    DelegatedClass

    """
    from pandas import Series

    if not isinstance(data, Series):
        raise TypeError("cannot convert an object of type {0} to a "
                        "datetimelike index".format(type(data)))

    index = data.index
    name = data.name
    orig = data if is_categorical_dtype(data) else None
    if orig is not None:
        data = orig.values.categories

    if is_datetime64_dtype(data.dtype):
        return DatetimeProperties(DatetimeIndex(data, copy=copy, freq='infer'),
                                  index, name=name, orig=orig)
    elif is_datetime64tz_dtype(data.dtype):
        return DatetimeProperties(DatetimeIndex(data, copy=copy, freq='infer',
                                                ambiguous='infer'),
                                  index, data.name, orig=orig)
    elif is_timedelta64_dtype(data.dtype):
        return TimedeltaProperties(TimedeltaIndex(data, copy=copy,
                                                  freq='infer'), index,
                                   name=name, orig=orig)
    else:
        if is_period_arraylike(data):
            return PeriodProperties(PeriodIndex(data, copy=copy), index,
                                    name=name, orig=orig)
        if is_datetime_arraylike(data):
            return DatetimeProperties(DatetimeIndex(data, copy=copy,
                                                    freq='infer'), index,
                                      name=name, orig=orig)

    raise TypeError("cannot convert an object of type {0} to a "
                    "datetimelike index".format(type(data)))
Пример #23
0
def infer_freq(index, warn=True):
    """
    Infer the most likely frequency given the input index. If the frequency is
    uncertain, a warning will be printed.

    Parameters
    ----------
    index : DatetimeIndex or TimedeltaIndex
      if passed a Series will use the values of the series (NOT THE INDEX)
    warn : boolean, default True

    Returns
    -------
    str or None
        None if no discernible frequency
        TypeError if the index is not datetime-like
        ValueError if there are less than three values.
    """
    import pandas as pd

    if isinstance(index, ABCSeries):
        values = index._values
        if not (is_datetime64_dtype(values) or
                is_timedelta64_dtype(values) or
                values.dtype == object):
            raise TypeError("cannot infer freq from a non-convertible dtype "
                            "on a Series of {dtype}".format(dtype=index.dtype))
        index = values

    if is_period_arraylike(index):
        raise TypeError("PeriodIndex given. Check the `freq` attribute "
                        "instead of using infer_freq.")
    elif is_timedelta64_dtype(index):
        # Allow TimedeltaIndex and TimedeltaArray
        inferer = _TimedeltaFrequencyInferer(index, warn=warn)
        return inferer.get_freq()

    if isinstance(index, pd.Index) and not isinstance(index, pd.DatetimeIndex):
        if isinstance(index, (pd.Int64Index, pd.Float64Index)):
            raise TypeError("cannot infer freq from a non-convertible index "
                            "type {type}".format(type=type(index)))
        index = index.values

    if not isinstance(index, pd.DatetimeIndex):
        try:
            index = pd.DatetimeIndex(index)
        except AmbiguousTimeError:
            index = pd.DatetimeIndex(index.asi8)

    inferer = _FrequencyInferer(index, warn=warn)
    return inferer.get_freq()
Пример #24
0
        def __add__(self, other):
            other = lib.item_from_zerodim(other)
            if isinstance(other, (ABCSeries, ABCDataFrame)):
                return NotImplemented

            # scalar others
            elif other is NaT:
                result = self._add_nat()
            elif isinstance(other, (Tick, timedelta, np.timedelta64)):
                result = self._add_delta(other)
            elif isinstance(other, DateOffset):
                # specifically _not_ a Tick
                result = self._add_offset(other)
            elif isinstance(other, (datetime, np.datetime64)):
                result = self._add_datelike(other)
            elif is_integer(other):
                # This check must come after the check for np.timedelta64
                # as is_integer returns True for these
                result = self.shift(other)

            # array-like others
            elif is_timedelta64_dtype(other):
                # TimedeltaIndex, ndarray[timedelta64]
                result = self._add_delta(other)
            elif is_offsetlike(other):
                # Array/Index of DateOffset objects
                result = self._addsub_offset_array(other, operator.add)
            elif is_datetime64_dtype(other) or is_datetime64tz_dtype(other):
                # DatetimeIndex, ndarray[datetime64]
                return self._add_datelike(other)
            elif is_integer_dtype(other):
                result = self._addsub_int_array(other, operator.add)
            elif is_float_dtype(other) or is_period_dtype(other):
                # Explicitly catch invalid dtypes
                raise TypeError("cannot add {dtype}-dtype to {cls}"
                                .format(dtype=other.dtype,
                                        cls=type(self).__name__))
            elif is_categorical_dtype(other):
                # Categorical op will raise; defer explicitly
                return NotImplemented
            else:  # pragma: no cover
                return NotImplemented

            if result is NotImplemented:
                return NotImplemented
            elif not isinstance(result, Index):
                # Index.__new__ will choose appropriate subclass for dtype
                result = Index(result)
            res_name = ops.get_op_result_name(self, other)
            result.name = res_name
            return result
Пример #25
0
    def wrapper(self, other):
        meth = getattr(dtl.DatetimeLikeArrayMixin, opname)

        if isinstance(other, (datetime, np.datetime64, compat.string_types)):
            if isinstance(other, (datetime, np.datetime64)):
                # GH#18435 strings get a pass from tzawareness compat
                self._assert_tzawareness_compat(other)

            try:
                other = _to_m8(other, tz=self.tz)
            except ValueError:
                # string that cannot be parsed to Timestamp
                return ops.invalid_comparison(self, other, op)

            result = meth(self, other)
            if isna(other):
                result.fill(nat_result)
        elif lib.is_scalar(other):
            return ops.invalid_comparison(self, other, op)
        else:
            if isinstance(other, list):
                # FIXME: This can break for object-dtype with mixed types
                other = type(self)(other)
            elif not isinstance(other, (np.ndarray, ABCIndexClass, ABCSeries)):
                # Following Timestamp convention, __eq__ is all-False
                # and __ne__ is all True, others raise TypeError.
                return ops.invalid_comparison(self, other, op)

            if is_object_dtype(other):
                result = op(self.astype('O'), np.array(other))
            elif not (is_datetime64_dtype(other) or
                      is_datetime64tz_dtype(other)):
                # e.g. is_timedelta64_dtype(other)
                return ops.invalid_comparison(self, other, op)
            else:
                self._assert_tzawareness_compat(other)
                result = meth(self, np.asarray(other))

            result = com.values_from_object(result)

            # Make sure to pass an array to result[...]; indexing with
            # Series breaks with older version of numpy
            o_mask = np.array(isna(other))
            if o_mask.any():
                result[o_mask] = nat_result

        if self.hasnans:
            result[self._isnan] = nat_result

        return result
Пример #26
0
 def __rsub__(self, other):
     if is_datetime64_dtype(other) and is_timedelta64_dtype(self):
         # ndarray[datetime64] cannot be subtracted from self, so
         # we need to wrap in DatetimeIndex and flip the operation
         from pandas import DatetimeIndex
         return DatetimeIndex(other) - self
     elif (is_datetime64_any_dtype(self) and hasattr(other, 'dtype') and
           not is_datetime64_any_dtype(other)):
         # GH#19959 datetime - datetime is well-defined as timedelta,
         # but any other type - datetime is not well-defined.
         raise TypeError("cannot subtract {cls} from {typ}"
                         .format(cls=type(self).__name__,
                                 typ=type(other).__name__))
     return -(self - other)
Пример #27
0
def nanmean(values, axis=None, skipna=True, mask=None):
    """
    Compute the mean of the element along an axis ignoring NaNs

    Parameters
    ----------
    values : ndarray
    axis: int, optional
    skipna : bool, default True
    mask : ndarray[bool], optional
        nan-mask if known

    Returns
    -------
    result : float
        Unless input is a float array, in which case use the same
        precision as the input array.

    Examples
    --------
    >>> import pandas.core.nanops as nanops
    >>> s = pd.Series([1, 2, np.nan])
    >>> nanops.nanmean(s)
    1.5
    """
    values, mask, dtype, dtype_max, _ = _get_values(
        values, skipna, 0, mask=mask)
    dtype_sum = dtype_max
    dtype_count = np.float64
    if (is_integer_dtype(dtype) or is_timedelta64_dtype(dtype) or
            is_datetime64_dtype(dtype) or is_datetime64tz_dtype(dtype)):
        dtype_sum = np.float64
    elif is_float_dtype(dtype):
        dtype_sum = dtype
        dtype_count = dtype
    count = _get_counts(mask, axis, dtype=dtype_count)
    the_sum = _ensure_numeric(values.sum(axis, dtype=dtype_sum))

    if axis is not None and getattr(the_sum, 'ndim', False):
        with np.errstate(all="ignore"):
            # suppress division by zero warnings
            the_mean = the_sum / count
        ct_mask = count == 0
        if ct_mask.any():
            the_mean[ct_mask] = np.nan
    else:
        the_mean = the_sum / count if count > 0 else np.nan

    return _wrap_results(the_mean, dtype)
Пример #28
0
def _cast_values_for_fillna(values, dtype):
    """
    Cast values to a dtype that algos.pad and algos.backfill can handle.
    """
    # TODO: for int-dtypes we make a copy, but for everything else this
    #  alters the values in-place.  Is this intentional?

    if (is_datetime64_dtype(dtype) or is_datetime64tz_dtype(dtype) or
            is_timedelta64_dtype(dtype)):
        values = values.view(np.int64)

    elif is_integer_dtype(values):
        # NB: this check needs to come after the datetime64 check above
        values = ensure_float64(values)

    return values
Пример #29
0
 def astype(self, dtype, copy=True, how='start'):
     dtype = pandas_dtype(dtype)
     if is_object_dtype(dtype):
         return self.asobject
     elif is_integer_dtype(dtype):
         if copy:
             return self._int64index.copy()
         else:
             return self._int64index
     elif is_datetime64_dtype(dtype):
         return self.to_timestamp(how=how)
     elif is_datetime64tz_dtype(dtype):
         return self.to_timestamp(how=how).tz_localize(dtype.tz)
     elif is_period_dtype(dtype):
         return self.asfreq(freq=dtype.freq)
     raise ValueError('Cannot cast PeriodIndex to dtype %s' % dtype)
Пример #30
0
def _coerce_to_type(x):
    """
    if the passed data is of datetime/timedelta type,
    this method converts it to integer so that cut method can
    handle it
    """
    dtype = None

    if is_timedelta64_dtype(x):
        x = to_timedelta(x).view(np.int64)
        dtype = np.timedelta64
    elif is_datetime64_dtype(x):
        x = to_datetime(x).view(np.int64)
        dtype = np.datetime64

    return x, dtype
Пример #31
0
def astype_nansafe(
    arr: np.ndarray, dtype: DtypeObj, copy: bool = True, skipna: bool = False
) -> ArrayLike:
    """
    Cast the elements of an array to a given dtype a nan-safe manner.

    Parameters
    ----------
    arr : ndarray
    dtype : np.dtype or ExtensionDtype
    copy : bool, default True
        If False, a view will be attempted but may fail, if
        e.g. the item sizes don't align.
    skipna: bool, default False
        Whether or not we should skip NaN when casting as a string-type.

    Raises
    ------
    ValueError
        The dtype was a datetime64/timedelta64 dtype, but it had no unit.
    """

    # We get here with 0-dim from sparse
    arr = np.atleast_1d(arr)

    # dispatch on extension dtype if needed
    if isinstance(dtype, ExtensionDtype):
        return dtype.construct_array_type()._from_sequence(arr, dtype=dtype, copy=copy)

    elif not isinstance(dtype, np.dtype):  # pragma: no cover
        raise ValueError("dtype must be np.dtype or ExtensionDtype")

    if arr.dtype.kind in ["m", "M"] and (
        issubclass(dtype.type, str) or dtype == _dtype_obj
    ):
        from pandas.core.construction import ensure_wrapped_if_datetimelike

        arr = ensure_wrapped_if_datetimelike(arr)
        return arr.astype(dtype, copy=copy)

    if issubclass(dtype.type, str):
        shape = arr.shape
        if arr.ndim > 1:
            arr = arr.ravel()
        return lib.ensure_string_array(
            arr, skipna=skipna, convert_na_value=False
        ).reshape(shape)

    elif is_datetime64_dtype(arr.dtype):
        if dtype == np.int64:
            if isna(arr).any():
                raise ValueError("Cannot convert NaT values to integer")
            return arr.view(dtype)

        # allow frequency conversions
        if dtype.kind == "M":
            return arr.astype(dtype)

        raise TypeError(f"cannot astype a datetimelike from [{arr.dtype}] to [{dtype}]")

    elif is_timedelta64_dtype(arr.dtype):
        if dtype == np.int64:
            if isna(arr).any():
                raise ValueError("Cannot convert NaT values to integer")
            return arr.view(dtype)

        elif dtype.kind == "m":
            return astype_td64_unit_conversion(arr, dtype, copy=copy)

        raise TypeError(f"cannot astype a timedelta from [{arr.dtype}] to [{dtype}]")

    elif np.issubdtype(arr.dtype, np.floating) and is_integer_dtype(dtype):
        return _astype_float_to_int_nansafe(arr, dtype, copy)

    elif is_object_dtype(arr.dtype):

        # if we have a datetime/timedelta array of objects
        # then coerce to a proper dtype and recall astype_nansafe

        if is_datetime64_dtype(dtype):
            from pandas import to_datetime

            return astype_nansafe(
                to_datetime(arr.ravel()).values.reshape(arr.shape),
                dtype,
                copy=copy,
            )
        elif is_timedelta64_dtype(dtype):
            # bc we know arr.dtype == object, this is equivalent to
            #  `np.asarray(to_timedelta(arr))`, but using a lower-level API that
            #  does not require a circular import.
            return array_to_timedelta64(arr).view("m8[ns]").astype(dtype, copy=False)

    if dtype.name in ("datetime64", "timedelta64"):
        msg = (
            f"The '{dtype.name}' dtype has no unit. Please pass in "
            f"'{dtype.name}[ns]' instead."
        )
        raise ValueError(msg)

    if copy or is_object_dtype(arr.dtype) or is_object_dtype(dtype):
        # Explicit copy, or required since NumPy can't view from / to object.
        return arr.astype(dtype, copy=True)

    return arr.astype(dtype, copy=copy)
Пример #32
0
def hash_array(vals, encoding='utf8', hash_key=None, categorize=True):
    """
    Given a 1d array, return an array of deterministic integers.

    .. versionadded:: 0.19.2

    Parameters
    ----------
    vals : ndarray, Categorical
    encoding : string, default 'utf8'
        encoding for data & key when strings
    hash_key : string key to encode, default to _default_hash_key
    categorize : bool, default True
        Whether to first categorize object arrays before hashing. This is more
        efficient when the array contains duplicate values.

        .. versionadded:: 0.20.0

    Returns
    -------
    1d uint64 numpy array of hash values, same length as the vals

    """

    if not hasattr(vals, 'dtype'):
        raise TypeError("must pass a ndarray-like")

    if hash_key is None:
        hash_key = _default_hash_key

    # For categoricals, we hash the categories, then remap the codes to the
    # hash values. (This check is above the complex check so that we don't ask
    # numpy if categorical is a subdtype of complex, as it will choke.
    if is_categorical_dtype(vals.dtype):
        return _hash_categorical(vals, encoding, hash_key)

    # we'll be working with everything as 64-bit values, so handle this
    # 128-bit value early
    if np.issubdtype(vals.dtype, np.complex128):
        return hash_array(vals.real) + 23 * hash_array(vals.imag)

    # First, turn whatever array this is into unsigned 64-bit ints, if we can
    # manage it.
    if is_bool_array(vals):
        vals = vals.astype('u8')
    elif (is_datetime64_dtype(vals) or is_timedelta64_dtype(vals)):
        vals = vals.view('i8').astype('u8', copy=False)
    elif (is_numeric_dtype(vals) and vals.dtype.itemsize <= 8):
        vals = vals.view('u{}'.format(vals.dtype.itemsize)).astype('u8')
    else:
        # With repeated values, its MUCH faster to categorize object dtypes,
        # then hash and rename categories. We allow skipping the categorization
        # when the values are known/likely to be unique.
        if categorize:
            from pandas import factorize, Categorical, Index
            codes, categories = factorize(vals, sort=False)
            cat = Categorical(codes,
                              Index(categories),
                              ordered=False,
                              fastpath=True)
            return _hash_categorical(cat, encoding, hash_key)

        try:
            vals = hashing.hash_object_array(vals, hash_key, encoding)
        except TypeError:
            # we have mixed types
            vals = hashing.hash_object_array(
                vals.astype(str).astype(object), hash_key, encoding)

    # Then, redistribute these 64-bit ints within the space of 64-bit ints
    vals ^= vals >> 30
    vals *= np.uint64(0xbf58476d1ce4e5b9)
    vals ^= vals >> 27
    vals *= np.uint64(0x94d049bb133111eb)
    vals ^= vals >> 31
    return vals
Пример #33
0
def maybe_to_datetimelike(data, copy=False):
    """
    return a DelegatedClass of a Series that is datetimelike
      (e.g. datetime64[ns],timedelta64[ns] dtype or a Series of Periods)
    raise TypeError if this is not possible.

    Parameters
    ----------
    data : Series
    copy : boolean, default False
           copy the input data

    Returns
    -------
    DelegatedClass

    """
    from pandas import Series

    if not isinstance(data, Series):
        raise TypeError("cannot convert an object of type {0} to a "
                        "datetimelike index".format(type(data)))

    index = data.index
    name = data.name
    orig = data if is_categorical_dtype(data) else None
    if orig is not None:
        data = orig.values.categories

    if is_datetime64_dtype(data.dtype):
        return DatetimeProperties(DatetimeIndex(data, copy=copy, freq='infer'),
                                  index,
                                  name=name,
                                  orig=orig)
    elif is_datetime64tz_dtype(data.dtype):
        return DatetimeProperties(DatetimeIndex(data,
                                                copy=copy,
                                                freq='infer',
                                                ambiguous='infer'),
                                  index,
                                  data.name,
                                  orig=orig)
    elif is_timedelta64_dtype(data.dtype):
        return TimedeltaProperties(TimedeltaIndex(data,
                                                  copy=copy,
                                                  freq='infer'),
                                   index,
                                   name=name,
                                   orig=orig)
    else:
        if is_period_arraylike(data):
            return PeriodProperties(PeriodIndex(data, copy=copy),
                                    index,
                                    name=name,
                                    orig=orig)
        if is_datetime_arraylike(data):
            return DatetimeProperties(DatetimeIndex(data,
                                                    copy=copy,
                                                    freq='infer'),
                                      index,
                                      name=name,
                                      orig=orig)

    raise TypeError("cannot convert an object of type {0} to a "
                    "datetimelike index".format(type(data)))
Пример #34
0
def maybe_cast_to_datetime(value, dtype, errors: str = "raise"):
    """ try to cast the array/value to a datetimelike dtype, converting float
    nan to iNaT
    """
    from pandas.core.tools.timedeltas import to_timedelta
    from pandas.core.tools.datetimes import to_datetime

    if dtype is not None:
        if isinstance(dtype, str):
            dtype = np.dtype(dtype)

        is_datetime64 = is_datetime64_dtype(dtype)
        is_datetime64tz = is_datetime64tz_dtype(dtype)
        is_timedelta64 = is_timedelta64_dtype(dtype)

        if is_datetime64 or is_datetime64tz or is_timedelta64:

            # Force the dtype if needed.
            msg = (f"The '{dtype.name}' dtype has no unit. "
                   f"Please pass in '{dtype.name}[ns]' instead.")

            if is_datetime64 and not is_dtype_equal(dtype, _NS_DTYPE):

                # pandas supports dtype whose granularity is less than [ns]
                # e.g., [ps], [fs], [as]
                if dtype <= np.dtype("M8[ns]"):
                    if dtype.name == "datetime64":
                        raise ValueError(msg)
                    dtype = _NS_DTYPE
                else:
                    raise TypeError(
                        f"cannot convert datetimelike to dtype [{dtype}]")
            elif is_datetime64tz:

                # our NaT doesn't support tz's
                # this will coerce to DatetimeIndex with
                # a matching dtype below
                if is_scalar(value) and isna(value):
                    value = [value]

            elif is_timedelta64 and not is_dtype_equal(dtype, _TD_DTYPE):

                # pandas supports dtype whose granularity is less than [ns]
                # e.g., [ps], [fs], [as]
                if dtype <= np.dtype("m8[ns]"):
                    if dtype.name == "timedelta64":
                        raise ValueError(msg)
                    dtype = _TD_DTYPE
                else:
                    raise TypeError(
                        f"cannot convert timedeltalike to dtype [{dtype}]")

            if is_scalar(value):
                if value == iNaT or isna(value):
                    value = iNaT
            else:
                value = np.array(value, copy=False)

                # have a scalar array-like (e.g. NaT)
                if value.ndim == 0:
                    value = iNaT

                # we have an array of datetime or timedeltas & nulls
                elif np.prod(
                        value.shape) or not is_dtype_equal(value.dtype, dtype):
                    try:
                        if is_datetime64:
                            value = to_datetime(value, errors=errors)
                            # GH 25843: Remove tz information since the dtype
                            # didn't specify one
                            if value.tz is not None:
                                value = value.tz_localize(None)
                            value = value._values
                        elif is_datetime64tz:
                            # The string check can be removed once issue #13712
                            # is solved. String data that is passed with a
                            # datetime64tz is assumed to be naive which should
                            # be localized to the timezone.
                            is_dt_string = is_string_dtype(value)
                            value = to_datetime(value, errors=errors).array
                            if is_dt_string:
                                # Strings here are naive, so directly localize
                                value = value.tz_localize(dtype.tz)
                            else:
                                # Numeric values are UTC at this point,
                                # so localize and convert
                                value = value.tz_localize("UTC").tz_convert(
                                    dtype.tz)
                        elif is_timedelta64:
                            value = to_timedelta(value, errors=errors)._values
                    except OutOfBoundsDatetime:
                        raise
                    except (AttributeError, ValueError, TypeError):
                        pass

        # coerce datetimelike to object
        elif is_datetime64_dtype(value) and not is_datetime64_dtype(dtype):
            if is_object_dtype(dtype):
                if value.dtype != _NS_DTYPE:
                    value = value.astype(_NS_DTYPE)
                ints = np.asarray(value).view("i8")
                return tslib.ints_to_pydatetime(ints)

            # we have a non-castable dtype that was passed
            raise TypeError(f"Cannot cast datetime64 to {dtype}")

    else:

        is_array = isinstance(value, np.ndarray)

        # catch a datetime/timedelta that is not of ns variety
        # and no coercion specified
        if is_array and value.dtype.kind in ["M", "m"]:
            dtype = value.dtype

            if dtype.kind == "M" and dtype != _NS_DTYPE:
                value = tslibs.conversion.ensure_datetime64ns(value)

            elif dtype.kind == "m" and dtype != _TD_DTYPE:
                value = to_timedelta(value)

        # only do this if we have an array and the dtype of the array is not
        # setup already we are not an integer/object, so don't bother with this
        # conversion
        elif not (is_array and not (issubclass(value.dtype.type, np.integer)
                                    or value.dtype == np.object_)):
            value = maybe_infer_to_datetimelike(value)

    return value
Пример #35
0
    def __sub__(self, other):
        other = lib.item_from_zerodim(other)
        if isinstance(other, (ABCSeries, ABCDataFrame)):
            return NotImplemented

        # scalar others
        elif other is NaT:
            result = self._sub_nat()
        elif isinstance(other, (Tick, timedelta, np.timedelta64)):
            result = self._add_delta(-other)
        elif isinstance(other, DateOffset):
            # specifically _not_ a Tick
            result = self._add_offset(-other)
        elif isinstance(other, (datetime, np.datetime64)):
            result = self._sub_datetimelike_scalar(other)
        elif lib.is_integer(other):
            # This check must come after the check for np.timedelta64
            # as is_integer returns True for these
            if not is_period_dtype(self):
                maybe_integer_op_deprecated(self)
            result = self._time_shift(-other)

        elif isinstance(other, Period):
            result = self._sub_period(other)

        # array-like others
        elif is_timedelta64_dtype(other):
            # TimedeltaIndex, ndarray[timedelta64]
            result = self._add_delta(-other)
        elif is_offsetlike(other):
            # Array/Index of DateOffset objects
            result = self._addsub_offset_array(other, operator.sub)
        elif is_datetime64_dtype(other) or is_datetime64tz_dtype(other):
            # DatetimeIndex, ndarray[datetime64]
            result = self._sub_datetime_arraylike(other)
        elif is_period_dtype(other):
            # PeriodIndex
            result = self._sub_period_array(other)
        elif is_integer_dtype(other):
            if not is_period_dtype(self):
                maybe_integer_op_deprecated(self)
            result = self._addsub_int_array(other, operator.sub)
        elif isinstance(other, ABCIndexClass):
            raise TypeError("cannot subtract {cls} and {typ}"
                            .format(cls=type(self).__name__,
                                    typ=type(other).__name__))
        elif is_float_dtype(other):
            # Explicitly catch invalid dtypes
            raise TypeError("cannot subtract {dtype}-dtype from {cls}"
                            .format(dtype=other.dtype,
                                    cls=type(self).__name__))
        elif is_extension_array_dtype(other):
            # Categorical op will raise; defer explicitly
            return NotImplemented
        else:  # pragma: no cover
            return NotImplemented

        if is_timedelta64_dtype(result) and isinstance(result, np.ndarray):
            from pandas.core.arrays import TimedeltaArrayMixin
            # TODO: infer freq?
            return TimedeltaArrayMixin(result)
        return result
Пример #36
0
def astype_nansafe(arr: np.ndarray,
                   dtype: DtypeObj,
                   copy: bool = True,
                   skipna: bool = False) -> ArrayLike:
    """
    Cast the elements of an array to a given dtype a nan-safe manner.

    Parameters
    ----------
    arr : ndarray
    dtype : np.dtype or ExtensionDtype
    copy : bool, default True
        If False, a view will be attempted but may fail, if
        e.g. the item sizes don't align.
    skipna: bool, default False
        Whether or not we should skip NaN when casting as a string-type.

    Raises
    ------
    ValueError
        The dtype was a datetime64/timedelta64 dtype, but it had no unit.
    """
    if arr.ndim > 1:
        flat = arr.ravel()
        result = astype_nansafe(flat, dtype, copy=copy, skipna=skipna)
        # error: Item "ExtensionArray" of "Union[ExtensionArray, ndarray]" has no
        # attribute "reshape"
        return result.reshape(arr.shape)  # type: ignore[union-attr]

    # We get here with 0-dim from sparse
    arr = np.atleast_1d(arr)

    # dispatch on extension dtype if needed
    if isinstance(dtype, ExtensionDtype):
        return dtype.construct_array_type()._from_sequence(arr,
                                                           dtype=dtype,
                                                           copy=copy)

    elif not isinstance(dtype, np.dtype):  # pragma: no cover
        raise ValueError("dtype must be np.dtype or ExtensionDtype")

    if arr.dtype.kind in ["m", "M"] and (issubclass(dtype.type, str)
                                         or dtype == _dtype_obj):
        from pandas.core.construction import ensure_wrapped_if_datetimelike

        arr = ensure_wrapped_if_datetimelike(arr)
        return arr.astype(dtype, copy=copy)

    if issubclass(dtype.type, str):
        return lib.ensure_string_array(arr,
                                       skipna=skipna,
                                       convert_na_value=False)

    elif is_datetime64_dtype(arr.dtype):
        if dtype == np.int64:
            warnings.warn(
                f"casting {arr.dtype} values to int64 with .astype(...) "
                "is deprecated and will raise in a future version. "
                "Use .view(...) instead.",
                FutureWarning,
                stacklevel=find_stack_level(),
            )
            if isna(arr).any():
                raise ValueError("Cannot convert NaT values to integer")
            return arr.view(dtype)

        # allow frequency conversions
        if dtype.kind == "M":
            return arr.astype(dtype)

        raise TypeError(
            f"cannot astype a datetimelike from [{arr.dtype}] to [{dtype}]")

    elif is_timedelta64_dtype(arr.dtype):
        if dtype == np.int64:
            warnings.warn(
                f"casting {arr.dtype} values to int64 with .astype(...) "
                "is deprecated and will raise in a future version. "
                "Use .view(...) instead.",
                FutureWarning,
                stacklevel=find_stack_level(),
            )
            if isna(arr).any():
                raise ValueError("Cannot convert NaT values to integer")
            return arr.view(dtype)

        elif dtype.kind == "m":
            return astype_td64_unit_conversion(arr, dtype, copy=copy)

        raise TypeError(
            f"cannot astype a timedelta from [{arr.dtype}] to [{dtype}]")

    elif np.issubdtype(arr.dtype, np.floating) and np.issubdtype(
            dtype, np.integer):
        return _astype_float_to_int_nansafe(arr, dtype, copy)

    elif is_object_dtype(arr.dtype):

        # work around NumPy brokenness, #1987
        if np.issubdtype(dtype.type, np.integer):
            return lib.astype_intsafe(arr, dtype)

        # if we have a datetime/timedelta array of objects
        # then coerce to a proper dtype and recall astype_nansafe

        elif is_datetime64_dtype(dtype):
            from pandas import to_datetime

            return astype_nansafe(
                to_datetime(arr).values,
                dtype,
                copy=copy,
            )
        elif is_timedelta64_dtype(dtype):
            from pandas import to_timedelta

            return astype_nansafe(to_timedelta(arr)._values, dtype, copy=copy)

    if dtype.name in ("datetime64", "timedelta64"):
        msg = (f"The '{dtype.name}' dtype has no unit. Please pass in "
               f"'{dtype.name}[ns]' instead.")
        raise ValueError(msg)

    if copy or is_object_dtype(arr.dtype) or is_object_dtype(dtype):
        # Explicit copy, or required since NumPy can't view from / to object.
        return arr.astype(dtype, copy=True)

    return arr.astype(dtype, copy=copy)
Пример #37
0
 def test_compat(self, dtype):
     assert not is_datetime64_ns_dtype(dtype)
     assert not is_datetime64_ns_dtype("period[D]")
     assert not is_datetime64_dtype(dtype)
     assert not is_datetime64_dtype("period[D]")
Пример #38
0
 def test_compat(self):
     assert not is_datetime64_ns_dtype(self.dtype)
     assert not is_datetime64_ns_dtype('period[D]')
     assert not is_datetime64_dtype(self.dtype)
     assert not is_datetime64_dtype('period[D]')
Пример #39
0
 def test_compat(self):
     self.assertFalse(is_datetime64_ns_dtype(self.dtype))
     self.assertFalse(is_datetime64_ns_dtype('period[D]'))
     self.assertFalse(is_datetime64_dtype(self.dtype))
     self.assertFalse(is_datetime64_dtype('period[D]'))
Пример #40
0
def sequence_to_td64ns(data, copy=False, unit="ns", errors="raise"):
    """
    Parameters
    ----------
    array : list-like
    copy : bool, default False
    unit : str, default "ns"
        The timedelta unit to treat integers as multiples of.
    errors : {"raise", "coerce", "ignore"}, default "raise"
        How to handle elements that cannot be converted to timedelta64[ns].
        See ``pandas.to_timedelta`` for details.

    Returns
    -------
    converted : numpy.ndarray
        The sequence converted to a numpy array with dtype ``timedelta64[ns]``.
    inferred_freq : Tick or None
        The inferred frequency of the sequence.

    Raises
    ------
    ValueError : Data cannot be converted to timedelta64[ns].

    Notes
    -----
    Unlike `pandas.to_timedelta`, if setting ``errors=ignore`` will not cause
    errors to be ignored; they are caught and subsequently ignored at a
    higher level.
    """
    inferred_freq = None
    unit = parse_timedelta_unit(unit)

    # Unwrap whatever we have into a np.ndarray
    if not hasattr(data, "dtype"):
        # e.g. list, tuple
        if np.ndim(data) == 0:
            # i.e. generator
            data = list(data)
        data = np.array(data, copy=False)
    elif isinstance(data, ABCSeries):
        data = data._values
    elif isinstance(data, (ABCTimedeltaIndex, TimedeltaArray)):
        inferred_freq = data.freq
        data = data._data

    # Convert whatever we have into timedelta64[ns] dtype
    if is_object_dtype(data.dtype) or is_string_dtype(data.dtype):
        # no need to make a copy, need to convert if string-dtyped
        data = objects_to_td64ns(data, unit=unit, errors=errors)
        copy = False

    elif is_integer_dtype(data.dtype):
        # treat as multiples of the given unit
        data, copy_made = ints_to_td64ns(data, unit=unit)
        copy = copy and not copy_made

    elif is_float_dtype(data.dtype):
        # cast the unit, multiply base/frace separately
        # to avoid precision issues from float -> int
        mask = np.isnan(data)
        m, p = precision_from_unit(unit)
        base = data.astype(np.int64)
        frac = data - base
        if p:
            frac = np.round(frac, p)
        data = (base * m + (frac * m).astype(np.int64)).view("timedelta64[ns]")
        data[mask] = iNaT
        copy = False

    elif is_timedelta64_dtype(data.dtype):
        if data.dtype != _TD_DTYPE:
            # non-nano unit
            # TODO: watch out for overflows
            data = data.astype(_TD_DTYPE)
            copy = False

    elif is_datetime64_dtype(data):
        # GH#23539
        warnings.warn(
            "Passing datetime64-dtype data to TimedeltaIndex is "
            "deprecated, will raise a TypeError in a future "
            "version",
            FutureWarning,
            stacklevel=4,
        )
        data = ensure_int64(data).view(_TD_DTYPE)

    else:
        raise TypeError(
            "dtype {dtype} cannot be converted to timedelta64[ns]".format(
                dtype=data.dtype))

    data = np.array(data, copy=copy)
    if data.ndim != 1:
        raise ValueError("Only 1-dimensional input arrays are supported.")

    assert data.dtype == "m8[ns]", data
    return data, inferred_freq
Пример #41
0
    def __new__(cls,
                data=None,
                ordinal=None,
                freq=None,
                start=None,
                end=None,
                periods=None,
                tz=None,
                dtype=None,
                copy=False,
                name=None,
                **fields):

        valid_field_set = {
            'year', 'month', 'day', 'quarter', 'hour', 'minute', 'second'
        }

        if not set(fields).issubset(valid_field_set):
            raise TypeError(
                '__new__() got an unexpected keyword argument {}'.format(
                    list(set(fields) - valid_field_set)[0]))

        if periods is not None:
            if is_float(periods):
                periods = int(periods)
            elif not is_integer(periods):
                msg = 'periods must be a number, got {periods}'
                raise TypeError(msg.format(periods=periods))

        if name is None and hasattr(data, 'name'):
            name = data.name

        if dtype is not None:
            dtype = pandas_dtype(dtype)
            if not is_period_dtype(dtype):
                raise ValueError('dtype must be PeriodDtype')
            if freq is None:
                freq = dtype.freq
            elif freq != dtype.freq:
                msg = 'specified freq and dtype are different'
                raise IncompatibleFrequency(msg)

        # coerce freq to freq object, otherwise it can be coerced elementwise
        # which is slow
        if freq:
            freq = Period._maybe_convert_freq(freq)

        if data is None:
            if ordinal is not None:
                data = np.asarray(ordinal, dtype=np.int64)
            else:
                data, freq = cls._generate_range(start, end, periods, freq,
                                                 fields)
            return cls._from_ordinals(data, name=name, freq=freq)

        if isinstance(data, PeriodIndex):
            if freq is None or freq == data.freq:  # no freq change
                freq = data.freq
                data = data._ndarray_values
            else:
                base1, _ = _gfc(data.freq)
                base2, _ = _gfc(freq)
                data = period.period_asfreq_arr(data._ndarray_values, base1,
                                                base2, 1)
            return cls._simple_new(data, name=name, freq=freq)

        # not array / index
        if not isinstance(
                data, (np.ndarray, PeriodIndex, DatetimeIndex, Int64Index)):
            if is_scalar(data) or isinstance(data, Period):
                cls._scalar_data_error(data)

            # other iterable of some kind
            if not isinstance(data, (list, tuple)):
                data = list(data)

            data = np.asarray(data)

        # datetime other than period
        if is_datetime64_dtype(data.dtype):
            data = dt64arr_to_periodarr(data, freq, tz)
            return cls._from_ordinals(data, name=name, freq=freq)

        # check not floats
        if infer_dtype(data) == 'floating' and len(data) > 0:
            raise TypeError("PeriodIndex does not allow "
                            "floating point in construction")

        # anything else, likely an array of strings or periods
        data = ensure_object(data)
        freq = freq or period.extract_freq(data)
        data = period.extract_ordinals(data, freq)
        return cls._from_ordinals(data, name=name, freq=freq)
Пример #42
0
def _get_empty_dtype_and_na(join_units):
    """
    Return dtype and N/A values to use when concatenating specified units.

    Returned N/A value may be None which means there was no casting involved.

    Returns
    -------
    dtype
    na
    """
    if len(join_units) == 1:
        blk = join_units[0].block
        if blk is None:
            return np.float64, np.nan

    if _is_uniform_reindex(join_units):
        # FIXME: integrate property
        empty_dtype = join_units[0].block.dtype
        upcasted_na = join_units[0].block.fill_value
        return empty_dtype, upcasted_na

    has_none_blocks = False
    dtypes = [None] * len(join_units)
    for i, unit in enumerate(join_units):
        if unit.block is None:
            has_none_blocks = True
        else:
            dtypes[i] = unit.dtype

    upcast_classes = defaultdict(list)
    null_upcast_classes = defaultdict(list)
    for dtype, unit in zip(dtypes, join_units):
        if dtype is None:
            continue

        if is_categorical_dtype(dtype):
            upcast_cls = "category"
        elif is_datetime64tz_dtype(dtype):
            upcast_cls = "datetimetz"
        elif issubclass(dtype.type, np.bool_):
            upcast_cls = "bool"
        elif issubclass(dtype.type, np.object_):
            upcast_cls = "object"
        elif is_datetime64_dtype(dtype):
            upcast_cls = "datetime"
        elif is_timedelta64_dtype(dtype):
            upcast_cls = "timedelta"
        elif is_sparse(dtype):
            upcast_cls = dtype.subtype.name
        elif is_extension_array_dtype(dtype):
            upcast_cls = "object"
        elif is_float_dtype(dtype) or is_numeric_dtype(dtype):
            upcast_cls = dtype.name
        else:
            upcast_cls = "float"

        # Null blocks should not influence upcast class selection, unless there
        # are only null blocks, when same upcasting rules must be applied to
        # null upcast classes.
        if unit.is_na:
            null_upcast_classes[upcast_cls].append(dtype)
        else:
            upcast_classes[upcast_cls].append(dtype)

    if not upcast_classes:
        upcast_classes = null_upcast_classes

    # TODO: de-duplicate with maybe_promote?
    # create the result
    if "object" in upcast_classes:
        return np.dtype(np.object_), np.nan
    elif "bool" in upcast_classes:
        if has_none_blocks:
            return np.dtype(np.object_), np.nan
        else:
            return np.dtype(np.bool_), None
    elif "category" in upcast_classes:
        return np.dtype(np.object_), np.nan
    elif "datetimetz" in upcast_classes:
        # GH-25014. We use NaT instead of iNaT, since this eventually
        # ends up in DatetimeArray.take, which does not allow iNaT.
        dtype = upcast_classes["datetimetz"]
        return dtype[0], tslibs.NaT
    elif "datetime" in upcast_classes:
        return np.dtype("M8[ns]"), tslibs.iNaT
    elif "timedelta" in upcast_classes:
        return np.dtype("m8[ns]"), np.timedelta64("NaT", "ns")
    else:  # pragma
        try:
            g = np.find_common_type(upcast_classes, [])
        except TypeError:
            # At least one is an ExtensionArray
            return np.dtype(np.object_), np.nan
        else:
            if is_float_dtype(g):
                return g, g.type(np.nan)
            elif is_numeric_dtype(g):
                if has_none_blocks:
                    return np.float64, np.nan
                else:
                    return g, None

    msg = "invalid dtype determination in get_concat_dtype"
    raise AssertionError(msg)
Пример #43
0
def _convert_listlike_datetimes(arg,
                                box,
                                format,
                                name=None,
                                tz=None,
                                unit=None,
                                errors=None,
                                infer_datetime_format=None,
                                dayfirst=None,
                                yearfirst=None,
                                exact=None):
    """
    Helper function for to_datetime. Performs the conversions of 1D listlike
    of dates

    Parameters
    ----------
    arg : list, tuple, ndarray, Series, Index
        date to be parced
    box : boolean
        True boxes result as an Index-like, False returns an ndarray
    name : object
        None or string for the Index name
    tz : object
        None or 'utc'
    unit : string
        None or string of the frequency of the passed data
    errors : string
        error handing behaviors from to_datetime, 'raise', 'coerce', 'ignore'
    infer_datetime_format : boolean
        inferring format behavior from to_datetime
    dayfirst : boolean
        dayfirst parsing behavior from to_datetime
    yearfirst : boolean
        yearfirst parsing behavior from to_datetime
    exact : boolean
        exact format matching behavior from to_datetime

    Returns
    -------
    ndarray of parsed dates
        Returns:

        - Index-like if box=True
        - ndarray of Timestamps if box=False
    """
    from pandas import DatetimeIndex
    if isinstance(arg, (list, tuple)):
        arg = np.array(arg, dtype='O')

    # these are shortcutable
    if is_datetime64tz_dtype(arg):
        if not isinstance(arg, DatetimeIndex):
            return DatetimeIndex(arg, tz=tz, name=name)
        if tz == 'utc':
            arg = arg.tz_convert(None).tz_localize(tz)
        return arg

    elif is_datetime64_ns_dtype(arg):
        if box and not isinstance(arg, DatetimeIndex):
            try:
                return DatetimeIndex(arg, tz=tz, name=name)
            except ValueError:
                pass

        return arg

    elif unit is not None:
        if format is not None:
            raise ValueError("cannot specify both format and unit")
        arg = getattr(arg, 'values', arg)
        result = tslib.array_with_unit_to_datetime(arg, unit, errors=errors)
        if box:
            if errors == 'ignore':
                from pandas import Index
                return Index(result, name=name)

            return DatetimeIndex(result, tz=tz, name=name)
        return result
    elif getattr(arg, 'ndim', 1) > 1:
        raise TypeError('arg must be a string, datetime, list, tuple, '
                        '1-d array, or Series')

    arg = ensure_object(arg)
    require_iso8601 = False

    if infer_datetime_format and format is None:
        format = _guess_datetime_format_for_array(arg, dayfirst=dayfirst)

    if format is not None:
        # There is a special fast-path for iso8601 formatted
        # datetime strings, so in those cases don't use the inferred
        # format because this path makes process slower in this
        # special case
        format_is_iso8601 = _format_is_iso(format)
        if format_is_iso8601:
            require_iso8601 = not infer_datetime_format
            format = None

    try:
        result = None

        if format is not None:
            # shortcut formatting here
            if format == '%Y%m%d':
                try:
                    result = _attempt_YYYYMMDD(arg, errors=errors)
                except (ValueError, TypeError, tslibs.OutOfBoundsDatetime):
                    raise ValueError("cannot convert the input to "
                                     "'%Y%m%d' date format")

            # fallback
            if result is None:
                try:
                    result, timezones = array_strptime(arg,
                                                       format,
                                                       exact=exact,
                                                       errors=errors)
                    if '%Z' in format or '%z' in format:
                        return _return_parsed_timezone_results(
                            result, timezones, box, tz, name)
                except tslibs.OutOfBoundsDatetime:
                    if errors == 'raise':
                        raise
                    result = arg
                except ValueError:
                    # if format was inferred, try falling back
                    # to array_to_datetime - terminate here
                    # for specified formats
                    if not infer_datetime_format:
                        if errors == 'raise':
                            raise
                        result = arg

        if result is None and (format is None or infer_datetime_format):
            result, tz_parsed = tslib.array_to_datetime(
                arg,
                errors=errors,
                utc=tz == 'utc',
                dayfirst=dayfirst,
                yearfirst=yearfirst,
                require_iso8601=require_iso8601)
            if tz_parsed is not None:
                if box:
                    # We can take a shortcut since the datetime64 numpy array
                    # is in UTC
                    return DatetimeIndex._simple_new(result,
                                                     name=name,
                                                     tz=tz_parsed)
                else:
                    # Convert the datetime64 numpy array to an numpy array
                    # of datetime objects
                    result = [
                        Timestamp(ts, tz=tz_parsed).to_pydatetime()
                        for ts in result
                    ]
                    return np.array(result, dtype=object)

        if box:
            # Ensure we return an Index in all cases where box=True
            if is_datetime64_dtype(result):
                return DatetimeIndex(result, tz=tz, name=name)
            elif is_object_dtype(result):
                # e.g. an Index of datetime objects
                from pandas import Index
                return Index(result, name=name)
        return result

    except ValueError as e:
        try:
            values, tz = conversion.datetime_to_datetime64(arg)
            return DatetimeIndex._simple_new(values, name=name, tz=tz)
        except (ValueError, TypeError):
            raise e
Пример #44
0
def sequence_to_td64ns(data, copy=False, unit="ns", errors="raise"):
    """
    Parameters
    ----------
    array : list-like
    copy : bool, default False
    unit : str, default "ns"
        The timedelta unit to treat integers as multiples of.
    errors : {"raise", "coerce", "ignore"}, default "raise"
        How to handle elements that cannot be converted to timedelta64[ns].
        See ``pandas.to_timedelta`` for details.

    Returns
    -------
    converted : numpy.ndarray
        The sequence converted to a numpy array with dtype ``timedelta64[ns]``.
    inferred_freq : Tick or None
        The inferred frequency of the sequence.

    Raises
    ------
    ValueError : Data cannot be converted to timedelta64[ns].

    Notes
    -----
    Unlike `pandas.to_timedelta`, if setting ``errors=ignore`` will not cause
    errors to be ignored; they are caught and subsequently ignored at a
    higher level.
    """
    inferred_freq = None
    unit = parse_timedelta_unit(unit)

    # Unwrap whatever we have into a np.ndarray
    if not hasattr(data, 'dtype'):
        # e.g. list, tuple
        if np.ndim(data) == 0:
            # i.e. generator
            data = list(data)
        data = np.array(data, copy=False)
    elif isinstance(data, ABCSeries):
        data = data._values
    elif isinstance(data, (ABCTimedeltaIndex, TimedeltaArrayMixin)):
        inferred_freq = data.freq
        data = data._data

    # Convert whatever we have into timedelta64[ns] dtype
    if is_object_dtype(data) or is_string_dtype(data):
        # no need to make a copy, need to convert if string-dtyped
        data = objects_to_td64ns(data, unit=unit, errors=errors)
        copy = False

    elif is_integer_dtype(data):
        # treat as multiples of the given unit
        data, copy_made = ints_to_td64ns(data, unit=unit)
        copy = copy and not copy_made

    elif is_float_dtype(data):
        # treat as multiples of the given unit.  If after converting to nanos,
        #  there are fractional components left, these are truncated
        #  (i.e. NOT rounded)
        mask = np.isnan(data)
        coeff = np.timedelta64(1, unit) / np.timedelta64(1, 'ns')
        data = (coeff * data).astype(np.int64).view('timedelta64[ns]')
        data[mask] = iNaT
        copy = False

    elif is_timedelta64_dtype(data):
        if data.dtype != _TD_DTYPE:
            # non-nano unit
            # TODO: watch out for overflows
            data = data.astype(_TD_DTYPE)
            copy = False

    elif is_datetime64_dtype(data):
        # GH#23539
        warnings.warn(
            "Passing datetime64-dtype data to TimedeltaIndex is "
            "deprecated, will raise a TypeError in a future "
            "version",
            FutureWarning,
            stacklevel=4)
        data = ensure_int64(data).view(_TD_DTYPE)

    else:
        raise TypeError(
            "dtype {dtype} cannot be converted to timedelta64[ns]".format(
                dtype=data.dtype))

    data = np.array(data, copy=copy)
    assert data.dtype == 'm8[ns]', data
    return data, inferred_freq
Пример #45
0
def period_array(
    data: Union[Sequence[Optional[Period]], AnyArrayLike],
    freq: Optional[Union[str, Tick]] = None,
    copy: bool = False,
) -> PeriodArray:
    """
    Construct a new PeriodArray from a sequence of Period scalars.

    Parameters
    ----------
    data : Sequence of Period objects
        A sequence of Period objects. These are required to all have
        the same ``freq.`` Missing values can be indicated by ``None``
        or ``pandas.NaT``.
    freq : str, Tick, or Offset
        The frequency of every element of the array. This can be specified
        to avoid inferring the `freq` from `data`.
    copy : bool, default False
        Whether to ensure a copy of the data is made.

    Returns
    -------
    PeriodArray

    See Also
    --------
    PeriodArray
    pandas.PeriodIndex

    Examples
    --------
    >>> period_array([pd.Period('2017', freq='A'),
    ...               pd.Period('2018', freq='A')])
    <PeriodArray>
    ['2017', '2018']
    Length: 2, dtype: period[A-DEC]

    >>> period_array([pd.Period('2017', freq='A'),
    ...               pd.Period('2018', freq='A'),
    ...               pd.NaT])
    <PeriodArray>
    ['2017', '2018', 'NaT']
    Length: 3, dtype: period[A-DEC]

    Integers that look like years are handled

    >>> period_array([2000, 2001, 2002], freq='D')
    <PeriodArray>
    ['2000-01-01', '2001-01-01', '2002-01-01']
    Length: 3, dtype: period[D]

    Datetime-like strings may also be passed

    >>> period_array(['2000-Q1', '2000-Q2', '2000-Q3', '2000-Q4'], freq='Q')
    <PeriodArray>
    ['2000Q1', '2000Q2', '2000Q3', '2000Q4']
    Length: 4, dtype: period[Q-DEC]
    """
    data_dtype = getattr(data, "dtype", None)

    if is_datetime64_dtype(data_dtype):
        return PeriodArray._from_datetime64(data, freq)
    if is_period_dtype(data_dtype):
        return PeriodArray(data, freq=freq)

    # other iterable of some kind
    if not isinstance(data, (np.ndarray, list, tuple, ABCSeries)):
        data = list(data)

    data = np.asarray(data)

    dtype: Optional[PeriodDtype]
    if freq:
        dtype = PeriodDtype(freq)
    else:
        dtype = None

    if is_float_dtype(data) and len(data) > 0:
        raise TypeError(
            "PeriodIndex does not allow floating point in construction")

    data = ensure_object(data)

    return PeriodArray._from_sequence(data, dtype=dtype)
Пример #46
0
def infer_freq(index, warn: bool = True) -> Optional[str]:
    """
    Infer the most likely frequency given the input index. If the frequency is
    uncertain, a warning will be printed.

    Parameters
    ----------
    index : DatetimeIndex or TimedeltaIndex
      If passed a Series will use the values of the series (NOT THE INDEX).
    warn : bool, default True

    Returns
    -------
    str or None
        None if no discernible frequency.

    Raises
    ------
    TypeError
        If the index is not datetime-like.
    ValueError
        If there are fewer than three values.
    """
    import pandas as pd

    if isinstance(index, ABCSeries):
        values = index._values
        if not (
            is_datetime64_dtype(values)
            or is_timedelta64_dtype(values)
            or values.dtype == object
        ):
            raise TypeError(
                "cannot infer freq from a non-convertible dtype "
                f"on a Series of {index.dtype}"
            )
        index = values

    inferer: _FrequencyInferer

    if not hasattr(index, "dtype"):
        pass
    elif is_period_dtype(index.dtype):
        raise TypeError(
            "PeriodIndex given. Check the `freq` attribute "
            "instead of using infer_freq."
        )
    elif is_timedelta64_dtype(index.dtype):
        # Allow TimedeltaIndex and TimedeltaArray
        inferer = _TimedeltaFrequencyInferer(index, warn=warn)
        return inferer.get_freq()

    if isinstance(index, pd.Index) and not isinstance(index, pd.DatetimeIndex):
        if isinstance(index, (pd.Int64Index, pd.Float64Index)):
            raise TypeError(
                f"cannot infer freq from a non-convertible index type {type(index)}"
            )
        index = index._values

    if not isinstance(index, pd.DatetimeIndex):
        index = pd.DatetimeIndex(index)

    inferer = _FrequencyInferer(index, warn=warn)
    return inferer.get_freq()
Пример #47
0
def get_empty_dtype_and_na(join_units):
    """
    Return dtype and N/A values to use when concatenating specified units.

    Returned N/A value may be None which means there was no casting involved.

    Returns
    -------
    dtype
    na
    """
    if len(join_units) == 1:
        blk = join_units[0].block
        if blk is None:
            return np.float64, np.nan

    if is_uniform_reindex(join_units):
        # XXX: integrate property
        empty_dtype = join_units[0].block.dtype
        upcasted_na = join_units[0].block.fill_value
        return empty_dtype, upcasted_na

    has_none_blocks = False
    dtypes = [None] * len(join_units)
    for i, unit in enumerate(join_units):
        if unit.block is None:
            has_none_blocks = True
        else:
            dtypes[i] = unit.dtype

    upcast_classes = defaultdict(list)
    null_upcast_classes = defaultdict(list)
    for dtype, unit in zip(dtypes, join_units):
        if dtype is None:
            continue

        if is_categorical_dtype(dtype):
            upcast_cls = 'category'
        elif is_datetimetz(dtype):
            upcast_cls = 'datetimetz'
        elif issubclass(dtype.type, np.bool_):
            upcast_cls = 'bool'
        elif issubclass(dtype.type, np.object_):
            upcast_cls = 'object'
        elif is_datetime64_dtype(dtype):
            upcast_cls = 'datetime'
        elif is_timedelta64_dtype(dtype):
            upcast_cls = 'timedelta'
        elif is_sparse(dtype):
            upcast_cls = dtype.subtype.name
        elif is_float_dtype(dtype) or is_numeric_dtype(dtype):
            upcast_cls = dtype.name
        else:
            upcast_cls = 'float'

        # Null blocks should not influence upcast class selection, unless there
        # are only null blocks, when same upcasting rules must be applied to
        # null upcast classes.
        if unit.is_na:
            null_upcast_classes[upcast_cls].append(dtype)
        else:
            upcast_classes[upcast_cls].append(dtype)

    if not upcast_classes:
        upcast_classes = null_upcast_classes

    # create the result
    if 'object' in upcast_classes:
        return np.dtype(np.object_), np.nan
    elif 'bool' in upcast_classes:
        if has_none_blocks:
            return np.dtype(np.object_), np.nan
        else:
            return np.dtype(np.bool_), None
    elif 'category' in upcast_classes:
        return np.dtype(np.object_), np.nan
    elif 'datetimetz' in upcast_classes:
        dtype = upcast_classes['datetimetz']
        return dtype[0], tslibs.iNaT
    elif 'datetime' in upcast_classes:
        return np.dtype('M8[ns]'), tslibs.iNaT
    elif 'timedelta' in upcast_classes:
        return np.dtype('m8[ns]'), tslibs.iNaT
    else:  # pragma
        try:
            g = np.find_common_type(upcast_classes, [])
        except TypeError:
            # At least one is an ExtensionArray
            return np.dtype(np.object_), np.nan
        else:
            if is_float_dtype(g):
                return g, g.type(np.nan)
            elif is_numeric_dtype(g):
                if has_none_blocks:
                    return np.float64, np.nan
                else:
                    return g, None

    msg = "invalid dtype determination in get_concat_dtype"
    raise AssertionError(msg)
Пример #48
0
def astype_nansafe(arr, dtype, copy: bool = True, skipna: bool = False):
    """
    Cast the elements of an array to a given dtype a nan-safe manner.

    Parameters
    ----------
    arr : ndarray
    dtype : np.dtype
    copy : bool, default True
        If False, a view will be attempted but may fail, if
        e.g. the item sizes don't align.
    skipna: bool, default False
        Whether or not we should skip NaN when casting as a string-type.

    Raises
    ------
    ValueError
        The dtype was a datetime64/timedelta64 dtype, but it had no unit.
    """

    # dispatch on extension dtype if needed
    if is_extension_array_dtype(dtype):
        return dtype.construct_array_type()._from_sequence(arr,
                                                           dtype=dtype,
                                                           copy=copy)

    if not isinstance(dtype, np.dtype):
        dtype = pandas_dtype(dtype)

    if issubclass(dtype.type, str):
        return lib.astype_str(arr.ravel(), skipna=skipna).reshape(arr.shape)

    elif is_datetime64_dtype(arr):
        if is_object_dtype(dtype):
            return tslib.ints_to_pydatetime(arr.view(np.int64))
        elif dtype == np.int64:
            if isna(arr).any():
                raise ValueError("Cannot convert NaT values to integer")
            return arr.view(dtype)

        # allow frequency conversions
        if dtype.kind == "M":
            return arr.astype(dtype)

        raise TypeError(
            f"cannot astype a datetimelike from [{arr.dtype}] to [{dtype}]")

    elif is_timedelta64_dtype(arr):
        if is_object_dtype(dtype):
            return tslibs.ints_to_pytimedelta(arr.view(np.int64))
        elif dtype == np.int64:
            if isna(arr).any():
                raise ValueError("Cannot convert NaT values to integer")
            return arr.view(dtype)

        if dtype not in [_INT64_DTYPE, _TD_DTYPE]:

            # allow frequency conversions
            # we return a float here!
            if dtype.kind == "m":
                mask = isna(arr)
                result = arr.astype(dtype).astype(np.float64)
                result[mask] = np.nan
                return result
        elif dtype == _TD_DTYPE:
            return arr.astype(_TD_DTYPE, copy=copy)

        raise TypeError(
            f"cannot astype a timedelta from [{arr.dtype}] to [{dtype}]")

    elif np.issubdtype(arr.dtype, np.floating) and np.issubdtype(
            dtype, np.integer):

        if not np.isfinite(arr).all():
            raise ValueError(
                "Cannot convert non-finite values (NA or inf) to integer")

    elif is_object_dtype(arr):

        # work around NumPy brokenness, #1987
        if np.issubdtype(dtype.type, np.integer):
            return lib.astype_intsafe(arr.ravel(), dtype).reshape(arr.shape)

        # if we have a datetime/timedelta array of objects
        # then coerce to a proper dtype and recall astype_nansafe

        elif is_datetime64_dtype(dtype):
            from pandas import to_datetime

            return astype_nansafe(to_datetime(arr).values, dtype, copy=copy)
        elif is_timedelta64_dtype(dtype):
            from pandas import to_timedelta

            return astype_nansafe(to_timedelta(arr).values, dtype, copy=copy)

    if dtype.name in ("datetime64", "timedelta64"):
        msg = (f"The '{dtype.name}' dtype has no unit. Please pass in "
               f"'{dtype.name}[ns]' instead.")
        raise ValueError(msg)

    if copy or is_object_dtype(arr) or is_object_dtype(dtype):
        # Explicit copy, or required since NumPy can't view from / to object.
        return arr.astype(dtype, copy=True)

    return arr.view(dtype)
Пример #49
0
    def wrapper(self, other, axis=None):
        # Validate the axis parameter
        if axis is not None:
            self._get_axis_number(axis)

        res_name = get_op_result_name(self, other)
        other = lib.item_from_zerodim(other)

        # TODO: shouldn't we be applying finalize whenever
        #  not isinstance(other, ABCSeries)?
        finalizer = (
            lambda x: x.__finalize__(self)
            if isinstance(other, (np.ndarray, ABCIndexClass))
            else x
        )

        if isinstance(other, list):
            # TODO: same for tuples?
            other = np.asarray(other)

        if isinstance(other, ABCDataFrame):  # pragma: no cover
            # Defer to DataFrame implementation; fail early
            return NotImplemented

        if isinstance(other, ABCSeries) and not self._indexed_same(other):
            raise ValueError("Can only compare identically-labeled Series objects")
        elif (
            is_list_like(other)
            and len(other) != len(self)
            and not isinstance(other, (set, frozenset))
        ):
            raise ValueError("Lengths must match")

        elif isinstance(other, (np.ndarray, ABCIndexClass, ABCSeries)):
            # TODO: make this treatment consistent across ops and classes.
            #  We are not catching all listlikes here (e.g. frozenset, tuple)
            #  The ambiguous case is object-dtype.  See GH#27803
            if len(self) != len(other):
                raise ValueError("Lengths must match to compare")

        if is_categorical_dtype(self):
            # Dispatch to Categorical implementation; CategoricalIndex
            # behavior is non-canonical GH#19513
            res_values = dispatch_to_extension_op(op, self, other)

        elif is_datetime64_dtype(self) or is_datetime64tz_dtype(self):
            # Dispatch to DatetimeIndex to ensure identical
            # Series/Index behavior
            from pandas.core.arrays import DatetimeArray

            res_values = dispatch_to_extension_op(op, DatetimeArray(self), other)

        elif is_timedelta64_dtype(self):
            from pandas.core.arrays import TimedeltaArray

            res_values = dispatch_to_extension_op(op, TimedeltaArray(self), other)

        elif is_extension_array_dtype(self) or (
            is_extension_array_dtype(other) and not is_scalar(other)
        ):
            # Note: the `not is_scalar(other)` condition rules out
            #  e.g. other == "category"
            res_values = dispatch_to_extension_op(op, self, other)

        elif is_scalar(other) and isna(other):
            # numpy does not like comparisons vs None
            if op is operator.ne:
                res_values = np.ones(len(self), dtype=bool)
            else:
                res_values = np.zeros(len(self), dtype=bool)

        else:
            lvalues = extract_array(self, extract_numpy=True)
            rvalues = extract_array(other, extract_numpy=True)

            with np.errstate(all="ignore"):
                res_values = na_op(lvalues, rvalues)
            if is_scalar(res_values):
                raise TypeError(
                    "Could not compare {typ} type with Series".format(typ=type(other))
                )

        result = self._constructor(res_values, index=self.index)
        # rename is needed in case res_name is None and result.name
        #  is not.
        return finalizer(result).rename(res_name)
Пример #50
0
    def __init__(
        self,
        index,
        grouper=None,
        obj=None,
        name=None,
        level=None,
        sort=True,
        observed=False,
        in_axis=False,
    ):

        self.name = name
        self.level = level
        self.grouper = _convert_grouper(index, grouper)
        self.all_grouper = None
        self.index = index
        self.sort = sort
        self.obj = obj
        self.observed = observed
        self.in_axis = in_axis

        # right place for this?
        if isinstance(grouper, (Series, Index)) and name is None:
            self.name = grouper.name

        if isinstance(grouper, MultiIndex):
            self.grouper = grouper.values

        # we have a single grouper which may be a myriad of things,
        # some of which are dependent on the passing in level

        if level is not None:
            if not isinstance(level, int):
                if level not in index.names:
                    raise AssertionError("Level {} not in index".format(level))
                level = index.names.index(level)

            if self.name is None:
                self.name = index.names[level]

            self.grouper, self._labels, self._group_index = index._get_grouper_for_level(  # noqa: E501
                self.grouper, level)

        # a passed Grouper like, directly get the grouper in the same way
        # as single grouper groupby, use the group_info to get labels
        elif isinstance(self.grouper, Grouper):
            # get the new grouper; we already have disambiguated
            # what key/level refer to exactly, don't need to
            # check again as we have by this point converted these
            # to an actual value (rather than a pd.Grouper)
            _, grouper, _ = self.grouper._get_grouper(self.obj, validate=False)
            if self.name is None:
                self.name = grouper.result_index.name
            self.obj = self.grouper.obj
            self.grouper = grouper._get_grouper()

        else:
            if self.grouper is None and self.name is not None:
                self.grouper = self.obj[self.name]

            elif isinstance(self.grouper, (list, tuple)):
                self.grouper = com.asarray_tuplesafe(self.grouper)

            # a passed Categorical
            elif is_categorical_dtype(self.grouper):

                self.grouper, self.all_grouper = recode_for_groupby(
                    self.grouper, self.sort, observed)
                categories = self.grouper.categories

                # we make a CategoricalIndex out of the cat grouper
                # preserving the categories / ordered attributes
                self._labels = self.grouper.codes
                if observed:
                    codes = algorithms.unique1d(self.grouper.codes)
                    codes = codes[codes != -1]
                    if sort or self.grouper.ordered:
                        codes = np.sort(codes)
                else:
                    codes = np.arange(len(categories))

                self._group_index = CategoricalIndex(
                    Categorical.from_codes(codes=codes,
                                           categories=categories,
                                           ordered=self.grouper.ordered))

            # we are done
            if isinstance(self.grouper, Grouping):
                self.grouper = self.grouper.grouper

            # no level passed
            elif not isinstance(self.grouper,
                                (Series, Index, ExtensionArray, np.ndarray)):
                if getattr(self.grouper, "ndim", 1) != 1:
                    t = self.name or str(type(self.grouper))
                    raise ValueError(
                        "Grouper for '{}' not 1-dimensional".format(t))
                self.grouper = self.index.map(self.grouper)
                if not (hasattr(self.grouper, "__len__")
                        and len(self.grouper) == len(self.index)):
                    errmsg = ("Grouper result violates len(labels) == "
                              "len(data)\nresult: %s" %
                              pprint_thing(self.grouper))
                    self.grouper = None  # Try for sanity
                    raise AssertionError(errmsg)

        # if we have a date/time-like grouper, make sure that we have
        # Timestamps like
        if getattr(self.grouper, "dtype", None) is not None:
            if is_datetime64_dtype(self.grouper):
                self.grouper = self.grouper.astype("datetime64[ns]")
            elif is_timedelta64_dtype(self.grouper):

                self.grouper = self.grouper.astype("timedelta64[ns]")
Пример #51
0
def _convert_listlike_datetimes(arg,
                                box,
                                format,
                                name=None,
                                tz=None,
                                unit=None,
                                errors=None,
                                infer_datetime_format=None,
                                dayfirst=None,
                                yearfirst=None,
                                exact=None):
    """
    Helper function for to_datetime. Performs the conversions of 1D listlike
    of dates

    Parameters
    ----------
    arg : list, tuple, ndarray, Series, Index
        date to be parced
    box : boolean
        True boxes result as an Index-like, False returns an ndarray
    name : object
        None or string for the Index name
    tz : object
        None or 'utc'
    unit : string
        None or string of the frequency of the passed data
    errors : string
        error handing behaviors from to_datetime, 'raise', 'coerce', 'ignore'
    infer_datetime_format : boolean
        inferring format behavior from to_datetime
    dayfirst : boolean
        dayfirst parsing behavior from to_datetime
    yearfirst : boolean
        yearfirst parsing behavior from to_datetime
    exact : boolean
        exact format matching behavior from to_datetime

    Returns
    -------
    ndarray of parsed dates
        Returns:

        - Index-like if box=True
        - ndarray of Timestamps if box=False
    """
    from pandas import DatetimeIndex
    from pandas.core.arrays import DatetimeArray
    from pandas.core.arrays.datetimes import (maybe_convert_dtype,
                                              objects_to_datetime64ns)

    if isinstance(arg, (list, tuple)):
        arg = np.array(arg, dtype='O')

    # these are shortcutable
    if is_datetime64tz_dtype(arg):
        if not isinstance(arg, (DatetimeArray, DatetimeIndex)):
            return DatetimeIndex(arg, tz=tz, name=name)
        if tz == 'utc':
            arg = arg.tz_convert(None).tz_localize(tz)
        return arg

    elif is_datetime64_ns_dtype(arg):
        if box and not isinstance(arg, (DatetimeArray, DatetimeIndex)):
            try:
                return DatetimeIndex(arg, tz=tz, name=name)
            except ValueError:
                pass

        return arg

    elif unit is not None:
        if format is not None:
            raise ValueError("cannot specify both format and unit")
        arg = getattr(arg, 'values', arg)
        result, tz_parsed = tslib.array_with_unit_to_datetime(arg,
                                                              unit,
                                                              errors=errors)
        if box:
            if errors == 'ignore':
                from pandas import Index
                result = Index(result, name=name)
            else:
                result = DatetimeIndex(result, name=name)
            # GH 23758: We may still need to localize the result with tz
            # GH 25546: Apply tz_parsed first (from arg), then tz (from caller)
            # result will be naive but in UTC
            try:
                result = result.tz_localize('UTC').tz_convert(tz_parsed)
            except AttributeError:
                # Regular Index from 'ignore' path
                return result
            if tz is not None:
                if result.tz is None:
                    result = result.tz_localize(tz)
                else:
                    result = result.tz_convert(tz)
        return result
    elif getattr(arg, 'ndim', 1) > 1:
        raise TypeError('arg must be a string, datetime, list, tuple, '
                        '1-d array, or Series')

    # warn if passing timedelta64, raise for PeriodDtype
    # NB: this must come after unit transformation
    orig_arg = arg
    arg, _ = maybe_convert_dtype(arg, copy=False)

    arg = ensure_object(arg)
    require_iso8601 = False

    if infer_datetime_format and format is None:
        format = _guess_datetime_format_for_array(arg, dayfirst=dayfirst)

    if format is not None:
        # There is a special fast-path for iso8601 formatted
        # datetime strings, so in those cases don't use the inferred
        # format because this path makes process slower in this
        # special case
        format_is_iso8601 = _format_is_iso(format)
        if format_is_iso8601:
            require_iso8601 = not infer_datetime_format
            format = None

    tz_parsed = None
    result = None

    if format is not None:
        try:
            # shortcut formatting here
            if format == '%Y%m%d':
                try:
                    # pass orig_arg as float-dtype may have been converted to
                    # datetime64[ns]
                    orig_arg = ensure_object(orig_arg)
                    result = _attempt_YYYYMMDD(orig_arg, errors=errors)
                except (ValueError, TypeError, tslibs.OutOfBoundsDatetime):
                    raise ValueError("cannot convert the input to "
                                     "'%Y%m%d' date format")

            # fallback
            if result is None:
                try:
                    result, timezones = array_strptime(arg,
                                                       format,
                                                       exact=exact,
                                                       errors=errors)
                    if '%Z' in format or '%z' in format:
                        return _return_parsed_timezone_results(
                            result, timezones, box, tz, name)
                except tslibs.OutOfBoundsDatetime:
                    if errors == 'raise':
                        raise
                    elif errors == 'coerce':
                        result = np.empty(arg.shape, dtype='M8[ns]')
                        iresult = result.view('i8')
                        iresult.fill(tslibs.iNaT)
                    else:
                        result = arg
                except ValueError:
                    # if format was inferred, try falling back
                    # to array_to_datetime - terminate here
                    # for specified formats
                    if not infer_datetime_format:
                        if errors == 'raise':
                            raise
                        elif errors == 'coerce':
                            result = np.empty(arg.shape, dtype='M8[ns]')
                            iresult = result.view('i8')
                            iresult.fill(tslibs.iNaT)
                        else:
                            result = arg
        except ValueError as e:
            # Fallback to try to convert datetime objects if timezone-aware
            #  datetime objects are found without passing `utc=True`
            try:
                values, tz = conversion.datetime_to_datetime64(arg)
                return DatetimeIndex._simple_new(values, name=name, tz=tz)
            except (ValueError, TypeError):
                raise e

    if result is None:
        assert format is None or infer_datetime_format
        utc = tz == 'utc'
        result, tz_parsed = objects_to_datetime64ns(
            arg,
            dayfirst=dayfirst,
            yearfirst=yearfirst,
            utc=utc,
            errors=errors,
            require_iso8601=require_iso8601,
            allow_object=True)

    if tz_parsed is not None:
        if box:
            # We can take a shortcut since the datetime64 numpy array
            # is in UTC
            return DatetimeIndex._simple_new(result, name=name, tz=tz_parsed)
        else:
            # Convert the datetime64 numpy array to an numpy array
            # of datetime objects
            result = [
                Timestamp(ts, tz=tz_parsed).to_pydatetime() for ts in result
            ]
            return np.array(result, dtype=object)

    if box:
        # Ensure we return an Index in all cases where box=True
        if is_datetime64_dtype(result):
            return DatetimeIndex(result, tz=tz, name=name)
        elif is_object_dtype(result):
            # e.g. an Index of datetime objects
            from pandas import Index
            return Index(result, name=name)
    return result
Пример #52
0
def infer_freq(index, warn: bool = True) -> str | None:
    """
    Infer the most likely frequency given the input index.

    Parameters
    ----------
    index : DatetimeIndex or TimedeltaIndex
      If passed a Series will use the values of the series (NOT THE INDEX).
    warn : bool, default True
      .. deprecated:: 1.5.0

    Returns
    -------
    str or None
        None if no discernible frequency.

    Raises
    ------
    TypeError
        If the index is not datetime-like.
    ValueError
        If there are fewer than three values.

    Examples
    --------
    >>> idx = pd.date_range(start='2020/12/01', end='2020/12/30', periods=30)
    >>> pd.infer_freq(idx)
    'D'
    """
    from pandas.core.api import (
        DatetimeIndex,
        Float64Index,
        Index,
        Int64Index,
    )

    if isinstance(index, ABCSeries):
        values = index._values
        if not (is_datetime64_dtype(values) or is_timedelta64_dtype(values)
                or values.dtype == object):
            raise TypeError("cannot infer freq from a non-convertible dtype "
                            f"on a Series of {index.dtype}")
        index = values

    inferer: _FrequencyInferer

    if not hasattr(index, "dtype"):
        pass
    elif is_period_dtype(index.dtype):
        raise TypeError("PeriodIndex given. Check the `freq` attribute "
                        "instead of using infer_freq.")
    elif is_timedelta64_dtype(index.dtype):
        # Allow TimedeltaIndex and TimedeltaArray
        inferer = _TimedeltaFrequencyInferer(index, warn=warn)
        return inferer.get_freq()

    if isinstance(index, Index) and not isinstance(index, DatetimeIndex):
        if isinstance(index, (Int64Index, Float64Index)):
            raise TypeError(
                f"cannot infer freq from a non-convertible index type {type(index)}"
            )
        index = index._values

    if not isinstance(index, DatetimeIndex):
        index = DatetimeIndex(index)

    inferer = _FrequencyInferer(index, warn=warn)
    return inferer.get_freq()
Пример #53
0
def astype_array_safe(values: ArrayLike,
                      dtype,
                      copy: bool = False,
                      errors: IgnoreRaise = "raise") -> ArrayLike:
    """
    Cast array (ndarray or ExtensionArray) to the new dtype.

    This basically is the implementation for DataFrame/Series.astype and
    includes all custom logic for pandas (NaN-safety, converting str to object,
    not allowing )

    Parameters
    ----------
    values : ndarray or ExtensionArray
    dtype : str, dtype convertible
    copy : bool, default False
        copy if indicated
    errors : str, {'raise', 'ignore'}, default 'raise'
        - ``raise`` : allow exceptions to be raised
        - ``ignore`` : suppress exceptions. On error return original object

    Returns
    -------
    ndarray or ExtensionArray
    """
    errors_legal_values = ("raise", "ignore")

    if errors not in errors_legal_values:
        invalid_arg = (
            "Expected value of kwarg 'errors' to be one of "
            f"{list(errors_legal_values)}. Supplied value is '{errors}'")
        raise ValueError(invalid_arg)

    if inspect.isclass(dtype) and issubclass(dtype, ExtensionDtype):
        msg = (f"Expected an instance of {dtype.__name__}, "
               "but got the class instead. Try instantiating 'dtype'.")
        raise TypeError(msg)

    dtype = pandas_dtype(dtype)
    if isinstance(dtype, PandasDtype):
        # Ensure we don't end up with a PandasArray
        dtype = dtype.numpy_dtype

    if (is_datetime64_dtype(values.dtype)
            # need to do np.dtype check instead of is_datetime64_dtype
            #  otherwise pyright complains
            and isinstance(dtype, np.dtype) and dtype.kind == "M" and
            not is_unitless(dtype) and
            not is_dtype_equal(dtype, values.dtype)):
        # unit conversion, we would re-cast to nanosecond, so this is
        #  effectively just a copy (regardless of copy kwd)
        # TODO(2.0): remove special-case
        return values.copy()

    try:
        new_values = astype_array(values, dtype, copy=copy)
    except (ValueError, TypeError):
        # e.g. astype_nansafe can fail on object-dtype of strings
        #  trying to convert to float
        if errors == "ignore":
            new_values = values
        else:
            raise

    return new_values
Пример #54
0
        def __sub__(self, other):
            from pandas import Index

            other = lib.item_from_zerodim(other)
            if isinstance(other, (ABCSeries, ABCDataFrame)):
                return NotImplemented

            # scalar others
            elif other is NaT:
                result = self._sub_nat()
            elif isinstance(other, (Tick, timedelta, np.timedelta64)):
                result = self._add_delta(-other)
            elif isinstance(other, DateOffset):
                # specifically _not_ a Tick
                result = self._add_offset(-other)
            elif isinstance(other, (datetime, np.datetime64)):
                result = self._sub_datelike(other)
            elif is_integer(other):
                # This check must come after the check for np.timedelta64
                # as is_integer returns True for these
                result = self.shift(-other)
            elif isinstance(other, Period):
                result = self._sub_period(other)

            # array-like others
            elif is_timedelta64_dtype(other):
                # TimedeltaIndex, ndarray[timedelta64]
                result = self._add_delta(-other)
            elif is_offsetlike(other):
                # Array/Index of DateOffset objects
                result = self._addsub_offset_array(other, operator.sub)
            elif is_datetime64_dtype(other) or is_datetime64tz_dtype(other):
                # DatetimeIndex, ndarray[datetime64]
                result = self._sub_datelike(other)
            elif is_period_dtype(other):
                # PeriodIndex
                result = self._sub_period_array(other)
            elif is_integer_dtype(other):
                result = self._addsub_int_array(other, operator.sub)
            elif isinstance(other, Index):
                raise TypeError("cannot subtract {cls} and {typ}".format(
                    cls=type(self).__name__, typ=type(other).__name__))
            elif is_float_dtype(other):
                # Explicitly catch invalid dtypes
                raise TypeError(
                    "cannot subtract {dtype}-dtype from {cls}".format(
                        dtype=other.dtype, cls=type(self).__name__))
            elif is_categorical_dtype(other):
                # Categorical op will raise; defer explicitly
                return NotImplemented
            else:  # pragma: no cover
                return NotImplemented

            if result is NotImplemented:
                return NotImplemented
            elif not isinstance(result, Index):
                # Index.__new__ will choose appropriate subclass for dtype
                result = Index(result)
            res_name = ops.get_op_result_name(self, other)
            result.name = res_name
            return result
Пример #55
0
    def wrapper(left, right):
        if isinstance(right, ABCDataFrame):
            return NotImplemented

        left, right = _align_method_SERIES(left, right)
        res_name = get_op_result_name(left, right)
        right = maybe_upcast_for_op(right, left.shape)

        if is_categorical_dtype(left):
            raise TypeError("{typ} cannot perform the operation "
                            "{op}".format(typ=type(left).__name__, op=str_rep))

        elif is_datetime64_dtype(left) or is_datetime64tz_dtype(left):
            from pandas.core.arrays import DatetimeArray

            result = dispatch_to_extension_op(op, DatetimeArray(left), right)
            return construct_result(left,
                                    result,
                                    index=left.index,
                                    name=res_name)

        elif is_extension_array_dtype(left) or (is_extension_array_dtype(right)
                                                and not is_scalar(right)):
            # GH#22378 disallow scalar to exclude e.g. "category", "Int64"
            result = dispatch_to_extension_op(op, left, right)
            return construct_result(left,
                                    result,
                                    index=left.index,
                                    name=res_name)

        elif is_timedelta64_dtype(left):
            from pandas.core.arrays import TimedeltaArray

            result = dispatch_to_extension_op(op, TimedeltaArray(left), right)
            return construct_result(left,
                                    result,
                                    index=left.index,
                                    name=res_name)

        elif is_timedelta64_dtype(right):
            # We should only get here with non-scalar values for right
            #  upcast by maybe_upcast_for_op
            assert not isinstance(right, (np.timedelta64, np.ndarray))

            result = op(left._values, right)

            # We do not pass dtype to ensure that the Series constructor
            #  does inference in the case where `result` has object-dtype.
            return construct_result(left,
                                    result,
                                    index=left.index,
                                    name=res_name)

        elif isinstance(right, (ABCDatetimeArray, ABCDatetimeIndex)):
            result = op(left._values, right)
            return construct_result(left,
                                    result,
                                    index=left.index,
                                    name=res_name)

        lvalues = left.values
        rvalues = right
        if isinstance(rvalues, (ABCSeries, ABCIndexClass)):
            rvalues = rvalues._values

        with np.errstate(all="ignore"):
            result = na_op(lvalues, rvalues)
        return construct_result(left,
                                result,
                                index=left.index,
                                name=res_name,
                                dtype=None)
Пример #56
0
    def _convert_listlike(arg, box, format, name=None, tz=tz):

        if isinstance(arg, (list, tuple)):
            arg = np.array(arg, dtype='O')

        # these are shortcutable
        if is_datetime64tz_dtype(arg):
            if not isinstance(arg, DatetimeIndex):
                return DatetimeIndex(arg, tz=tz, name=name)
            if utc:
                arg = arg.tz_convert(None).tz_localize('UTC')
            return arg

        elif is_datetime64_ns_dtype(arg):
            if box and not isinstance(arg, DatetimeIndex):
                try:
                    return DatetimeIndex(arg, tz=tz, name=name)
                except ValueError:
                    pass

            return arg

        elif unit is not None:
            if format is not None:
                raise ValueError("cannot specify both format and unit")
            arg = getattr(arg, 'values', arg)
            result = tslib.array_with_unit_to_datetime(arg,
                                                       unit,
                                                       errors=errors)
            if box:
                if errors == 'ignore':
                    from pandas import Index
                    return Index(result)

                return DatetimeIndex(result, tz=tz, name=name)
            return result
        elif getattr(arg, 'ndim', 1) > 1:
            raise TypeError('arg must be a string, datetime, list, tuple, '
                            '1-d array, or Series')

        arg = _ensure_object(arg)
        require_iso8601 = False

        if infer_datetime_format and format is None:
            format = _guess_datetime_format_for_array(arg, dayfirst=dayfirst)

        if format is not None:
            # There is a special fast-path for iso8601 formatted
            # datetime strings, so in those cases don't use the inferred
            # format because this path makes process slower in this
            # special case
            format_is_iso8601 = _format_is_iso(format)
            if format_is_iso8601:
                require_iso8601 = not infer_datetime_format
                format = None

        try:
            result = None

            if format is not None:
                # shortcut formatting here
                if format == '%Y%m%d':
                    try:
                        result = _attempt_YYYYMMDD(arg, errors=errors)
                    except:
                        raise ValueError("cannot convert the input to "
                                         "'%Y%m%d' date format")

                # fallback
                if result is None:
                    try:
                        result = array_strptime(arg,
                                                format,
                                                exact=exact,
                                                errors=errors)
                    except tslib.OutOfBoundsDatetime:
                        if errors == 'raise':
                            raise
                        result = arg
                    except ValueError:
                        # if format was inferred, try falling back
                        # to array_to_datetime - terminate here
                        # for specified formats
                        if not infer_datetime_format:
                            if errors == 'raise':
                                raise
                            result = arg

            if result is None and (format is None or infer_datetime_format):
                result = tslib.array_to_datetime(
                    arg,
                    errors=errors,
                    utc=utc,
                    dayfirst=dayfirst,
                    yearfirst=yearfirst,
                    require_iso8601=require_iso8601)

            if is_datetime64_dtype(result) and box:
                result = DatetimeIndex(result, tz=tz, name=name)
            return result

        except ValueError as e:
            try:
                values, tz = tslib.datetime_to_datetime64(arg)
                return DatetimeIndex._simple_new(values, name=name, tz=tz)
            except (ValueError, TypeError):
                raise e