示例#1
0
import re

import pytest

from pandas._libs.tslibs import Timedelta, offsets, to_offset


@pytest.mark.parametrize(
    "freq_input,expected",
    [
        (to_offset("10us"), offsets.Micro(10)),
        (offsets.Hour(), offsets.Hour()),
        ("2h30min", offsets.Minute(150)),
        ("2h 30min", offsets.Minute(150)),
        ("2h30min15s", offsets.Second(150 * 60 + 15)),
        ("2h 60min", offsets.Hour(3)),
        ("2h 20.5min", offsets.Second(8430)),
        ("1.5min", offsets.Second(90)),
        ("0.5S", offsets.Milli(500)),
        ("15l500u", offsets.Micro(15500)),
        ("10s75L", offsets.Milli(10075)),
        ("1s0.25ms", offsets.Micro(1000250)),
        ("1s0.25L", offsets.Micro(1000250)),
        ("2800N", offsets.Nano(2800)),
        ("2SM", offsets.SemiMonthEnd(2)),
        ("2SM-16", offsets.SemiMonthEnd(2, day_of_month=16)),
        ("2SMS-14", offsets.SemiMonthBegin(2, day_of_month=14)),
        ("2SMS-15", offsets.SemiMonthBegin(2)),
    ],
)
def test_to_offset(freq_input, expected):
def get_freq_code(freqstr: str) -> int:
    off = to_offset(freqstr)
    code = off._period_dtype_code
    return code
示例#3
0
 def _parsed_string_to_bounds(self, reso, parsed: Timedelta):
     # reso is unused, included to match signature of DTI/PI
     lbound = parsed.round(parsed.resolution_string)
     rbound = lbound + to_offset(parsed.resolution_string) - Timedelta(
         1, "ns")
     return lbound, rbound
示例#4
0
def test_to_offset_no_evaluate():
    msg = str(("", ""))
    with pytest.raises(TypeError, match=msg):
        to_offset(("", ""))
示例#5
0
def _get_period_alias(freq) -> Optional[str]:
    freqstr = to_offset(freq).rule_code

    freq = get_period_alias(freqstr)
    return freq
示例#6
0
def test_to_offset_pd_timedelta(kwargs, expected):
    # see gh-9064
    td = Timedelta(**kwargs)
    result = to_offset(td)
    assert result == expected
示例#7
0
def test_to_offset(freq_input, expected):
    result = to_offset(freq_input)
    assert result == expected
示例#8
0
    def __truediv__(self, other):
        # timedelta / X is well-defined for timedelta-like or numeric X

        if isinstance(other, self._recognized_scalars):
            other = Timedelta(other)
            # mypy assumes that __new__ returns an instance of the class
            # github.com/python/mypy/issues/1020
            if cast("Timedelta | NaTType", other) is NaT:
                # specifically timedelta64-NaT
                result = np.empty(self.shape, dtype=np.float64)
                result.fill(np.nan)
                return result

            # otherwise, dispatch to Timedelta implementation
            return self._ndarray / other

        elif lib.is_scalar(other):
            # assume it is numeric
            result = self._ndarray / other
            freq = None
            if self.freq is not None:
                # Tick division is not implemented, so operate on Timedelta
                freq = self.freq.delta / other
                freq = to_offset(freq)
            return type(self)._simple_new(result, dtype=result.dtype, freq=freq)

        if not hasattr(other, "dtype"):
            # e.g. list, tuple
            other = np.array(other)

        if len(other) != len(self):
            raise ValueError("Cannot divide vectors with unequal lengths")

        elif is_timedelta64_dtype(other.dtype):
            # let numpy handle it
            return self._ndarray / other

        elif is_object_dtype(other.dtype):
            # We operate on raveled arrays to avoid problems in inference
            #  on NaT
            # TODO: tests with non-nano
            srav = self.ravel()
            orav = other.ravel()
            result_list = [srav[n] / orav[n] for n in range(len(srav))]
            result = np.array(result_list).reshape(self.shape)

            # We need to do dtype inference in order to keep DataFrame ops
            #  behavior consistent with Series behavior
            inferred = lib.infer_dtype(result, skipna=False)
            if inferred == "timedelta":
                flat = result.ravel()
                result = type(self)._from_sequence(flat).reshape(result.shape)
            elif inferred == "floating":
                result = result.astype(float)
            elif inferred == "datetime":
                # GH#39750 this occurs when result is all-NaT, in which case
                #  we want to interpret these NaTs as td64.
                #  We construct an all-td64NaT result.
                # error: Incompatible types in assignment (expression has type
                # "TimedeltaArray", variable has type "ndarray[Any,
                # dtype[floating[_64Bit]]]")
                result = self * np.nan  # type: ignore[assignment]

            return result

        else:
            result = self._ndarray / other
            return type(self)._simple_new(result, dtype=result.dtype)
示例#9
0
def _get_period_alias(freq: timedelta | BaseOffset | str) -> str | None:
    freqstr = to_offset(freq).rule_code

    return get_period_alias(freqstr)
示例#10
0
def test_to_offset_no_evaluate():
    with pytest.raises(ValueError, match="Could not evaluate"):
        to_offset(("", ""))
示例#11
0
    def tradeDayOffset(self,
                       today,
                       n,
                       freq='1d',
                       incl_on_offset_today=False,
                       **kwargs):
        """
        日期漂移

        若参数n为正,返回以today为起始日向前推第n个交易日,反之亦然。
        若n为零,返回以today为起点,向后推1个freq的交易日。

        注意:
            若incl_on_offset_today=True,today on offset时,漂移的起点是today,today not
            offset时,漂移的起点是today +- offset
            若incl_on_offset_today=False,日期漂移的起点是today +- offset。

        例如:
            2017-08-18是交易日, 2017-08-20不是交易日,则:

            tradeDayOffset('2017-08-18', 1, freq='1d', incl_on_offset_today=False) -> 2017-08-21

            tradeDayOffset('2017-08-18', 1, freq='1d', incl_on_offset_today=True) -> 2017-08-18

            tradeDayOffset('2017-08-18', 2, freq='1d', incl_on_offset_today=True) -> 2017-08-21

            tradeDayOffset('2017-08-18', -1, freq='1d', incl_on_offset_today=True) -> 2017-08-18

            tradeDayOffset('2017-08-18', -2, freq='1d', incl_on_offset_today=True) -> 2017-08-17

            tradeDayOffset('2017-08-18', 0, freq='1d', incl_on_offset_today=False) -> 2017-08-18

            tradeDayOffset('2017-08-20', 0, freq='1d', incl_on_offset_today=True) -> 2017-08-18
        """
        if n == 0:
            raise ValueError(
                "absolute value of parameter 'n' must be positive!")

        if is_non_string_iterable(today):
            days = pd.to_datetime(today)
        else:
            days = pd.DatetimeIndex([pd.to_datetime(today)])
        raw_days = days.copy()

        move_forward = n > 0
        begin_offset = freq.endswith('S')
        offset = to_offset(freq.upper())

        # 先把日期归位到offset
        if begin_offset:
            days = days + offset - offset
            bdays = days - bday_chn_ashare + bday_chn_ashare
        else:
            days = days - offset + offset
            bdays = days + bday_chn_ashare - bday_chn_ashare

        if move_forward:
            if incl_on_offset_today:
                td = np.where(raw_days <= bdays, n - 1, n)
            else:
                td = np.where(raw_days < bdays, n - 1, n)
        else:
            if incl_on_offset_today:
                td = np.where(raw_days < bdays, n, n + 1)
            else:
                td = np.where(raw_days <= bdays, n, n + 1)

        if freq.endswith('d'):
            days = pd.DatetimeIndex(
                np.where(td != 0, days + bday_chn_ashare * td,
                         days).astype('datetime64[D]'))
        else:
            days = pd.DatetimeIndex(
                np.where(td != 0, days + offset * td,
                         days).astype('datetime64[D]'))

        if not begin_offset:
            days = days + bday_chn_ashare - bday_chn_ashare
        else:
            days = days - bday_chn_ashare + bday_chn_ashare

        if not is_non_string_iterable(today):
            days = days[0]

        return days
示例#12
0
def test_get_to_timestamp_base(freqstr, exp_freqstr):
    left_code = to_offset(freqstr)._period_dtype_code
    exp_code = to_offset(exp_freqstr)._period_dtype_code
    assert get_to_timestamp_base(left_code) == exp_code
示例#13
0
def get_freq_code(freqstr: str) -> int:
    off = to_offset(freqstr)
    # error: "BaseOffset" has no attribute "_period_dtype_code"
    code = off._period_dtype_code  # type: ignore[attr-defined]
    return code
示例#14
0
 def test_extract_ordinals_raises(self):
     # with non-object, make sure we raise TypeError, not segfault
     arr = np.arange(5)
     freq = to_offset("D")
     with pytest.raises(TypeError, match="values must be object-dtype"):
         extract_ordinals(arr, freq)
示例#15
0
def test_to_offset_whitespace(freqstr, expected):
    result = to_offset(freqstr)
    assert result == expected
示例#16
0
def _get_period_alias(freq) -> str | None:
    freqstr = to_offset(freq).rule_code

    freq = get_period_alias(freqstr)
    return freq
示例#17
0
def test_to_offset_leading_plus(freqstr, expected):
    result = to_offset(freqstr)
    assert result.n == expected
示例#18
0
import re

import pytest

from pandas._libs.tslibs import Timedelta, offsets, to_offset


@pytest.mark.parametrize(
    "freq_input,expected",
    [
        (to_offset("10us"), offsets.Micro(10)),
        (offsets.Hour(), offsets.Hour()),
        ("2h30min", offsets.Minute(150)),
        ("2h 30min", offsets.Minute(150)),
        ("2h30min15s", offsets.Second(150 * 60 + 15)),
        ("2h 60min", offsets.Hour(3)),
        ("2h 20.5min", offsets.Second(8430)),
        ("1.5min", offsets.Second(90)),
        ("0.5S", offsets.Milli(500)),
        ("15l500u", offsets.Micro(15500)),
        ("10s75L", offsets.Milli(10075)),
        ("1s0.25ms", offsets.Micro(1000250)),
        ("1s0.25L", offsets.Micro(1000250)),
        ("2800N", offsets.Nano(2800)),
        ("2SM", offsets.SemiMonthEnd(2)),
        ("2SM-16", offsets.SemiMonthEnd(2, day_of_month=16)),
        ("2SMS-14", offsets.SemiMonthBegin(2, day_of_month=14)),
        ("2SMS-15", offsets.SemiMonthBegin(2)),
    ],
)
def test_to_offset(freq_input, expected):
示例#19
0
def test_anchored_shortcuts(shortcut, expected):
    result = to_offset(shortcut)
    assert result == expected
示例#20
0
def test_resolution_bumping(args, expected):
    # see gh-14378
    off = to_offset(str(args[0]) + args[1])
    assert off.n == expected[0]
    assert off._prefix == expected[1]
示例#21
0
def test_to_offset_negative(freqstr, expected):
    result = to_offset(freqstr)
    assert result.n == expected
示例#22
0
def test_cat(args):
    msg = "Invalid frequency"

    with pytest.raises(ValueError, match=msg):
        to_offset(str(args[0]) + args[1])
示例#23
0
def test_to_offset_tuple_unsupported():
    with pytest.raises(TypeError, match="pass as a string instead"):
        to_offset((5, "T"))
示例#24
0
def test_compatibility(freqstr, expected):
    ts_np = np.datetime64("2021-01-01T08:00:00.00")
    do = to_offset(freqstr)
    assert ts_np + do == np.datetime64(expected)
示例#25
0
def interval_range(start=None,
                   end=None,
                   periods=None,
                   freq=None,
                   name=None,
                   closed="right"):
    """
    Return a fixed frequency IntervalIndex.

    Parameters
    ----------
    start : numeric or datetime-like, default None
        Left bound for generating intervals.
    end : numeric or datetime-like, default None
        Right bound for generating intervals.
    periods : int, default None
        Number of periods to generate.
    freq : numeric, str, or DateOffset, default None
        The length of each interval. Must be consistent with the type of start
        and end, e.g. 2 for numeric, or '5H' for datetime-like.  Default is 1
        for numeric and 'D' for datetime-like.
    name : str, default None
        Name of the resulting IntervalIndex.
    closed : {'left', 'right', 'both', 'neither'}, default 'right'
        Whether the intervals are closed on the left-side, right-side, both
        or neither.

    Returns
    -------
    IntervalIndex

    See Also
    --------
    IntervalIndex : An Index of intervals that are all closed on the same side.

    Notes
    -----
    Of the four parameters ``start``, ``end``, ``periods``, and ``freq``,
    exactly three must be specified. If ``freq`` is omitted, the resulting
    ``IntervalIndex`` will have ``periods`` linearly spaced elements between
    ``start`` and ``end``, inclusively.

    To learn more about datetime-like frequency strings, please see `this link
    <https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases>`__.

    Examples
    --------
    Numeric ``start`` and  ``end`` is supported.

    >>> pd.interval_range(start=0, end=5)
    IntervalIndex([(0, 1], (1, 2], (2, 3], (3, 4], (4, 5]],
                  closed='right', dtype='interval[int64]')

    Additionally, datetime-like input is also supported.

    >>> pd.interval_range(start=pd.Timestamp('2017-01-01'),
    ...                   end=pd.Timestamp('2017-01-04'))
    IntervalIndex([(2017-01-01, 2017-01-02], (2017-01-02, 2017-01-03],
                   (2017-01-03, 2017-01-04]],
                  closed='right', dtype='interval[datetime64[ns]]')

    The ``freq`` parameter specifies the frequency between the left and right.
    endpoints of the individual intervals within the ``IntervalIndex``.  For
    numeric ``start`` and ``end``, the frequency must also be numeric.

    >>> pd.interval_range(start=0, periods=4, freq=1.5)
    IntervalIndex([(0.0, 1.5], (1.5, 3.0], (3.0, 4.5], (4.5, 6.0]],
                  closed='right', dtype='interval[float64]')

    Similarly, for datetime-like ``start`` and ``end``, the frequency must be
    convertible to a DateOffset.

    >>> pd.interval_range(start=pd.Timestamp('2017-01-01'),
    ...                   periods=3, freq='MS')
    IntervalIndex([(2017-01-01, 2017-02-01], (2017-02-01, 2017-03-01],
                   (2017-03-01, 2017-04-01]],
                  closed='right', dtype='interval[datetime64[ns]]')

    Specify ``start``, ``end``, and ``periods``; the frequency is generated
    automatically (linearly spaced).

    >>> pd.interval_range(start=0, end=6, periods=4)
    IntervalIndex([(0.0, 1.5], (1.5, 3.0], (3.0, 4.5], (4.5, 6.0]],
              closed='right',
              dtype='interval[float64]')

    The ``closed`` parameter specifies which endpoints of the individual
    intervals within the ``IntervalIndex`` are closed.

    >>> pd.interval_range(end=5, periods=4, closed='both')
    IntervalIndex([[1, 2], [2, 3], [3, 4], [4, 5]],
                  closed='both', dtype='interval[int64]')
    """
    start = maybe_box_datetimelike(start)
    end = maybe_box_datetimelike(end)
    endpoint = start if start is not None else end

    if freq is None and com.any_none(periods, start, end):
        freq = 1 if is_number(endpoint) else "D"

    if com.count_not_none(start, end, periods, freq) != 3:
        raise ValueError("Of the four parameters: start, end, periods, and "
                         "freq, exactly three must be specified")

    if not _is_valid_endpoint(start):
        raise ValueError(
            f"start must be numeric or datetime-like, got {start}")
    elif not _is_valid_endpoint(end):
        raise ValueError(f"end must be numeric or datetime-like, got {end}")

    if is_float(periods):
        periods = int(periods)
    elif not is_integer(periods) and periods is not None:
        raise TypeError(f"periods must be a number, got {periods}")

    if freq is not None and not is_number(freq):
        try:
            freq = to_offset(freq)
        except ValueError as err:
            raise ValueError(
                f"freq must be numeric or convertible to DateOffset, got {freq}"
            ) from err

    # verify type compatibility
    if not all([
            _is_type_compatible(start, end),
            _is_type_compatible(start, freq),
            _is_type_compatible(end, freq),
    ]):
        raise TypeError("start, end, freq need to be type compatible")

    # +1 to convert interval count to breaks count (n breaks = n-1 intervals)
    if periods is not None:
        periods += 1

    if is_number(endpoint):
        # force consistency between start/end/freq (lower end if freq skips it)
        if com.all_not_none(start, end, freq):
            end -= (end - start) % freq

        # compute the period/start/end if unspecified (at most one)
        if periods is None:
            periods = int((end - start) // freq) + 1
        elif start is None:
            start = end - (periods - 1) * freq
        elif end is None:
            end = start + (periods - 1) * freq

        breaks = np.linspace(start, end, periods)
        if all(is_integer(x) for x in com.not_none(start, end, freq)):
            # np.linspace always produces float output
            breaks = maybe_downcast_to_dtype(breaks, "int64")
    else:
        # delegate to the appropriate range function
        if isinstance(endpoint, Timestamp):
            breaks = date_range(start=start,
                                end=end,
                                periods=periods,
                                freq=freq)
        else:
            breaks = timedelta_range(start=start,
                                     end=end,
                                     periods=periods,
                                     freq=freq)

    return IntervalIndex.from_breaks(breaks, name=name, closed=closed)
示例#26
0
 def get_freq_code(freqstr):
     return to_offset(freqstr)._period_dtype_code